diff --git a/.github/labeler.yml b/.github/labeler.yml index 4ff1d48beabed..23e0950d448a5 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -14,3 +14,7 @@ area/documentation: CDK: - airbyte-cdk/* - airbyte-cdk/**/* + +normalization: + - airbyte-integrations/bases/base-normalization/* + - airbyte-integrations/bases/base-normalization/**/* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6d87ae30cb2d4..1170125df75e8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,6 +6,8 @@ exclude: | ^.*?/node_modules/.*$| ^.*?/charts/.*$| + ^airbyte-integrations/bases/base-normalization/.*$| + ^.*?/normalization_test_output/.*$| ^.*?/pnpm-lock\.yaml$| ^.*?/source-amplitude/unit_tests/api_data/zipped\.json$| diff --git a/airbyte-integrations/bases/base-java/.dockerignore b/airbyte-integrations/bases/base-java/.dockerignore new file mode 100644 index 0000000000000..70cd13cb50b78 --- /dev/null +++ b/airbyte-integrations/bases/base-java/.dockerignore @@ -0,0 +1,5 @@ +* +!Dockerfile +!build +!javabase.sh +!run_with_normalization.sh diff --git a/airbyte-integrations/bases/base-java/Dockerfile b/airbyte-integrations/bases/base-java/Dockerfile new file mode 100644 index 0000000000000..d19438eab3f01 --- /dev/null +++ b/airbyte-integrations/bases/base-java/Dockerfile @@ -0,0 +1,34 @@ +### WARNING ### +# The Java connector Dockerfiles will soon be deprecated. +# This Dockerfile is not used to build the connector image we publish to DockerHub. +# The new logic to build the connector image is declared with Dagger here: +# https://github.com/airbytehq/airbyte/blob/master/tools/ci_connector_ops/ci_connector_ops/pipelines/actions/environments.py#L649 + +# If you need to add a custom logic to build your connector image, you can do it by adding a finalize_build.sh or finalize_build.py script in the connector folder. +# Please reach out to the Connectors Operations team if you have any question. +ARG JDK_VERSION=17.0.8 +FROM amazoncorretto:${JDK_VERSION} +COPY --from=airbyte/integration-base:dev /airbyte /airbyte + +RUN yum update -y && yum install -y tar openssl && yum clean all + +WORKDIR /airbyte + +# Add the Datadog Java APM agent +ADD https://dtdg.co/latest-java-tracer dd-java-agent.jar + +COPY javabase.sh . +COPY run_with_normalization.sh . + +# airbyte base commands +ENV AIRBYTE_SPEC_CMD "/airbyte/javabase.sh --spec" +ENV AIRBYTE_CHECK_CMD "/airbyte/javabase.sh --check" +ENV AIRBYTE_DISCOVER_CMD "/airbyte/javabase.sh --discover" +ENV AIRBYTE_READ_CMD "/airbyte/javabase.sh --read" +ENV AIRBYTE_WRITE_CMD "/airbyte/javabase.sh --write" + +ENV AIRBYTE_ENTRYPOINT "/airbyte/base.sh" +ENTRYPOINT ["/airbyte/base.sh"] + +LABEL io.airbyte.version=0.1.2 +LABEL io.airbyte.name=airbyte/integration-base-java diff --git a/airbyte-integrations/bases/base-java/build.gradle b/airbyte-integrations/bases/base-java/build.gradle new file mode 100644 index 0000000000000..0c2de175e2cc9 --- /dev/null +++ b/airbyte-integrations/bases/base-java/build.gradle @@ -0,0 +1,3 @@ +plugins { + id 'airbyte-docker-legacy' +} diff --git a/airbyte-integrations/bases/base-java/javabase.sh b/airbyte-integrations/bases/base-java/javabase.sh new file mode 100755 index 0000000000000..b5fc9ab7166c2 --- /dev/null +++ b/airbyte-integrations/bases/base-java/javabase.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash + +set -e + +# if IS_CAPTURE_HEAP_DUMP_ON_ERROR is set to true, then will capture Heap dump on OutOfMemory error +if [[ $IS_CAPTURE_HEAP_DUMP_ON_ERROR = true ]]; then + + arrayOfSupportedConnectors=("source-postgres" "source-mssql" "source-mysql" ) + + # The heap dump would be captured only in case when java-based connector fails with OutOfMemory error + if [[ " ${arrayOfSupportedConnectors[*]} " =~ " $APPLICATION " ]]; then + JAVA_OPTS=$JAVA_OPTS" -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/data/dump.hprof" + export JAVA_OPTS + echo "Added JAVA_OPTS=$JAVA_OPTS" + echo "APPLICATION=$APPLICATION" + fi +fi +#30781 - Allocate 32KB for log4j appender buffer to ensure that each line is logged in a single println +JAVA_OPTS=$JAVA_OPTS" -Dlog4j.encoder.byteBufferSize=32768 -Dlog4j2.configurationFile=log4j2.xml" +#needed because we make ThreadLocal.get(Thread) accessible in IntegrationRunner.stopOrphanedThreads +JAVA_OPTS=$JAVA_OPTS" --add-opens=java.base/java.lang=ALL-UNNAMED" +# tell jooq to be quiet (https://stackoverflow.com/questions/28272284/how-to-disable-jooqs-self-ad-message-in-3-4) +JAVA_OPTS=$JAVA_OPTS" -Dorg.jooq.no-logo=true -Dorg.jooq.no-tips=true" +export JAVA_OPTS + +# Wrap run script in a script so that we can lazy evaluate the value of APPLICATION. APPLICATION is +# set by the dockerfile that inherits base-java, so it cannot be evaluated when base-java is built. +# We also need to make sure that stdin of the script is piped to the stdin of the java application. +if [[ $A = --write ]]; then + cat <&0 | /airbyte/bin/"$APPLICATION" "$@" +else + /airbyte/bin/"$APPLICATION" "$@" +fi diff --git a/airbyte-integrations/bases/base-java/run_with_normalization.sh b/airbyte-integrations/bases/base-java/run_with_normalization.sh new file mode 100755 index 0000000000000..669763021803c --- /dev/null +++ b/airbyte-integrations/bases/base-java/run_with_normalization.sh @@ -0,0 +1,61 @@ +#!/bin/bash +# Intentionally no set -e, because we want to run normalization even if the destination fails +set -o pipefail + +/airbyte/base.sh $@ +destination_exit_code=$? +echo '{"type": "LOG","log":{"level":"INFO","message":"Destination process done (exit code '"$destination_exit_code"')"}}' + +# store original args +args=$@ + +while [ $# -ne 0 ]; do + case "$1" in + --config) + CONFIG_FILE="$2" + shift 2 + ;; + *) + # move on + shift + ;; + esac +done + +# restore original args after shifts +set -- $args + +USE_1S1T_FORMAT="false" +if [[ -s "$CONFIG_FILE" ]]; then + USE_1S1T_FORMAT=$(jq -r '.use_1s1t_format' "$CONFIG_FILE") +fi + +if test "$1" != 'write' +then + normalization_exit_code=0 +elif test "$NORMALIZATION_TECHNIQUE" = 'LEGACY' && test "$USE_1S1T_FORMAT" != "true" +then + echo '{"type": "LOG","log":{"level":"INFO","message":"Starting in-connector normalization"}}' + # Normalization tries to create this file from the connector config and crashes if it already exists + # so just nuke it and let normalization recreate it. + # Use -f to avoid error if it doesn't exist, since it's only created for certain SSL modes. + rm -f ca.crt + # the args in a write command are `write --catalog foo.json --config bar.json` + # so if we remove the `write`, we can just pass the rest directly into normalization + /airbyte/entrypoint.sh run ${@:2} --integration-type $AIRBYTE_NORMALIZATION_INTEGRATION | java -cp "/airbyte/lib/*" io.airbyte.cdk.integrations.destination.normalization.NormalizationLogParser + normalization_exit_code=$? + echo '{"type": "LOG","log":{"level":"INFO","message":"In-connector normalization done (exit code '"$normalization_exit_code"')"}}' +else + echo '{"type": "LOG","log":{"level":"INFO","message":"Skipping in-connector normalization"}}' + normalization_exit_code=0 +fi + +if test $destination_exit_code -ne 0 +then + exit $destination_exit_code +elif test $normalization_exit_code -ne 0 +then + exit $normalization_exit_code +else + exit 0 +fi diff --git a/airbyte-integrations/bases/base-normalization/.dockerignore b/airbyte-integrations/bases/base-normalization/.dockerignore new file mode 100644 index 0000000000000..1af2d8606be8f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/.dockerignore @@ -0,0 +1,13 @@ +* +!Dockerfile +!entrypoint.sh +!build/sshtunneling.sh +!setup.py +!normalization +!dbt-project-template +!dbt-project-template-mssql +!dbt-project-template-mysql +!dbt-project-template-oracle +!dbt-project-template-clickhouse +!dbt-project-template-snowflake +!dbt-project-template-redshift diff --git a/airbyte-integrations/bases/base-normalization/.gitignore b/airbyte-integrations/bases/base-normalization/.gitignore new file mode 100644 index 0000000000000..7994f50ee6bea --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/.gitignore @@ -0,0 +1,51 @@ +build/ +logs/ +dbt-project-template/models/generated/ +dbt-project-template/test_output.log +dbt_modules/ +secrets/ +dist/ + +integration_tests/normalization_test_output/*/*/macros +integration_tests/normalization_test_output/*/*/tests +integration_tests/normalization_test_output/**/*.json +integration_tests/normalization_test_output/**/*.log +integration_tests/normalization_test_output/**/*.md +integration_tests/normalization_test_output/**/*.sql +integration_tests/normalization_test_output/**/*.yml +!integration_tests/normalization_test_output/**/*dbt_project.yml +!integration_tests/normalization_test_output/**/generated/sources.yml + +# We keep a minimal/restricted subset of sql files for all destinations to avoid noise in diff +# Simple Streams +!integration_tests/normalization_test_output/**/dedup_exchange_rate*.sql +!integration_tests/normalization_test_output/**/DEDUP_EXCHANGE_RATE*.sql +!integration_tests/normalization_test_output/**/exchange_rate.sql +!integration_tests/normalization_test_output/**/EXCHANGE_RATE.sql +!integration_tests/normalization_test_output/**/test_simple_streams/first_output/airbyte_views/**/multiple_column_names_conflicts_stg.sql +# Nested Streams +# Parent table +!integration_tests/normalization_test_output/**/nested_stream_with*_names_ab*.sql +!integration_tests/normalization_test_output/**/nested_stream_with*_names_scd.sql +!integration_tests/normalization_test_output/**/nested_stream_with*_names.sql +!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_NAMES_AB*.sql +!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_NAMES_SCD.sql +!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_NAMES.sql +# Nested table +!integration_tests/normalization_test_output/**/nested_stream_with_*_partition_ab1.sql +!integration_tests/normalization_test_output/**/nested_stream_with_*_data_ab1.sql +!integration_tests/normalization_test_output/**/nested_stream_with*_partition_scd.sql +!integration_tests/normalization_test_output/**/nested_stream_with*_data_scd.sql +!integration_tests/normalization_test_output/**/nested_stream_with*_partition.sql +!integration_tests/normalization_test_output/**/nested_stream_with*_data.sql +!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH_*_PARTITION_AB1.sql +!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH_*_DATA_AB1.sql +!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_PARTITION_SCD.sql +!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_DATA_SCD.sql +!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_PARTITION.sql +!integration_tests/normalization_test_output/**/NESTED_STREAM_WITH*_DATA.sql + +# but we keep all sql files for Postgres +!integration_tests/normalization_test_output/postgres/**/*.sql +integration_tests/normalization_test_output/postgres/**/dbt_data_tests +integration_tests/normalization_test_output/postgres/**/dbt_schema_tests diff --git a/airbyte-integrations/bases/base-normalization/Dockerfile b/airbyte-integrations/bases/base-normalization/Dockerfile new file mode 100644 index 0000000000000..c0ee635f30459 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/Dockerfile @@ -0,0 +1,37 @@ +FROM fishtownanalytics/dbt:1.0.0 +COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte + +# Install SSH Tunneling dependencies +RUN apt-get update && apt-get install -y jq sshpass + +WORKDIR /airbyte +COPY entrypoint.sh . +COPY build/sshtunneling.sh . + +WORKDIR /airbyte/normalization_code +COPY normalization ./normalization +COPY setup.py . +COPY dbt-project-template/ ./dbt-template/ + +# Install python dependencies +WORKDIR /airbyte/base_python_structs + +# workaround for https://github.com/yaml/pyyaml/issues/601 +# this should be fixed in the airbyte/base-airbyte-protocol-python image +RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation + +RUN pip install . + +WORKDIR /airbyte/normalization_code +RUN pip install . + +WORKDIR /airbyte/normalization_code/dbt-template/ +# Download external dbt dependencies +RUN dbt deps + +WORKDIR /airbyte +ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" +ENTRYPOINT ["/airbyte/entrypoint.sh"] + +LABEL io.airbyte.version=0.4.3 +LABEL io.airbyte.name=airbyte/normalization diff --git a/airbyte-integrations/bases/base-normalization/build.gradle b/airbyte-integrations/bases/base-normalization/build.gradle new file mode 100644 index 0000000000000..4cc45316ef92d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/build.gradle @@ -0,0 +1,57 @@ +plugins { + id 'airbyte-docker-legacy' + id 'airbyte-python' +} + +dependencies { + testFixtures(project(':airbyte-cdk:java:airbyte-cdk:airbyte-cdk-dependencies')) +} + +// we need to access the sshtunneling script from airbyte-workers for ssh support +def copySshScript = tasks.register('copySshScript', Copy) { + from "${project(':airbyte-cdk:java:airbyte-cdk:airbyte-cdk-dependencies').buildDir}/resources/testFixtures" + into "${buildDir}" + include "sshtunneling.sh" +} +copySshScript.configure { + dependsOn project(':airbyte-cdk:java:airbyte-cdk:airbyte-cdk-dependencies').tasks.named('processTestFixturesResources') +} + +// make sure the copy task above worked (if it fails, it fails silently annoyingly) +def checkSshScriptCopy = tasks.register('checkSshScriptCopy') { + doFirst { + assert file("${buildDir}/sshtunneling.sh").exists() : "Copy of sshtunneling.sh failed." + } +} +checkSshScriptCopy.configure { + dependsOn copySshScript +} + +def generate = tasks.register('generate') +generate.configure { + dependsOn checkSshScriptCopy +} + +tasks.named('check').configure { + dependsOn generate +} + +tasks.named("jar").configure { + dependsOn copySshScript +} + +[ + 'bigquery', + 'mysql', + 'postgres', + 'redshift', + 'snowflake', + 'oracle', + 'mssql', + 'clickhouse', + 'tidb', +].each {destinationName -> + tasks.matching { it.name == 'integrationTestPython' }.configureEach { + dependsOn project(":airbyte-integrations:connectors:destination-$destinationName").tasks.named('assemble') + } +} diff --git a/airbyte-integrations/bases/base-normalization/clickhouse.Dockerfile b/airbyte-integrations/bases/base-normalization/clickhouse.Dockerfile new file mode 100644 index 0000000000000..18005ea89872a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/clickhouse.Dockerfile @@ -0,0 +1,36 @@ +FROM ghcr.io/dbt-labs/dbt-core:1.3.1 +COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte + +# Install SSH Tunneling dependencies +RUN apt-get update && apt-get install -y jq sshpass +WORKDIR /airbyte +COPY entrypoint.sh . +COPY build/sshtunneling.sh . + +WORKDIR /airbyte/normalization_code +COPY normalization ./normalization +COPY setup.py . +COPY dbt-project-template/ ./dbt-template/ + +# Install python dependencies +WORKDIR /airbyte/base_python_structs + +# workaround for https://github.com/yaml/pyyaml/issues/601 +# this should be fixed in the airbyte/base-airbyte-protocol-python image +RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation + +RUN pip install . + +WORKDIR /airbyte/normalization_code +RUN pip install . + +WORKDIR /airbyte/normalization_code/dbt-template/ +RUN pip install "dbt-clickhouse>=1.4.0" +# Download external dbt dependencies +RUN dbt deps + +WORKDIR /airbyte +ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" +ENTRYPOINT ["/airbyte/entrypoint.sh"] + +LABEL io.airbyte.name=airbyte/normalization-clickhouse diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/dbt_project.yml new file mode 100755 index 0000000000000..b6033fcb69544 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/dbt_project.yml @@ -0,0 +1,65 @@ +# This file is necessary to install dbt-utils with dbt deps +# the content will be overwritten by the transform function + +# Name your package! Package names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: "airbyte_utils" +version: "1.0" +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. Profiles contain +# database connection information, and should be configured in the ~/.dbt/profiles.yml file +profile: "normalize" + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that source models can be found +# in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +docs-paths: ["docs"] +analysis-paths: ["analysis"] +test-paths: ["tests"] +seed-paths: ["data"] +macro-paths: ["macros"] + +target-path: "../build" # directory which will store compiled SQL files +log-path: "../logs" # directory which will store DBT logs +packages-install-path: "/dbt" # directory which will store external DBT dependencies + +clean-targets: # directories to be removed by `dbt clean` + - "build" + - "dbt_modules" + +quoting: + database: true + # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) + # all schemas should be unquoted + schema: true + identifier: true + +# You can define configurations for models in the `model-paths` directory here. +# Using these configurations, you can enable or disable models, change how they +# are materialized, and more! +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + # ephemeral materialization isn't supported in ClickHouse yet + +materialized: view + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + # schema change test isn't supported in ClickHouse yet + +on_schema_change: "ignore" + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view + +dispatch: + - macro_namespace: dbt_utils + search_order: ["airbyte_utils", "dbt_utils"] diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/packages.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/packages.yml new file mode 100755 index 0000000000000..33b4edd58c8c6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-clickhouse/packages.yml @@ -0,0 +1,5 @@ +# add dependencies. these will get pulled during the `dbt deps` process. + +packages: + - git: "https://github.com/fishtown-analytics/dbt-utils.git" + revision: 0.8.2 diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-duckdb/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-duckdb/dbt_project.yml new file mode 100755 index 0000000000000..7631ef356dc92 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-duckdb/dbt_project.yml @@ -0,0 +1,63 @@ +# This file is necessary to install dbt-utils with dbt deps +# the content will be overwritten by the transform function + +# Name your package! Package names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: "airbyte_utils" +version: "1.0" +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. Profiles contain +# database connection information, and should be configured in the ~/.dbt/profiles.yml file +profile: "normalize" + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that source models can be found +# in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +docs-paths: ["docs"] +analysis-paths: ["analysis"] +test-paths: ["tests"] +seed-paths: ["data"] +macro-paths: ["macros"] + +target-path: "../build" # directory which will store compiled SQL files +log-path: "../logs" # directory which will store DBT logs +packages-install-path: "/dbt" # directory which will store external DBT dependencies + +clean-targets: # directories to be removed by `dbt clean` + - "build" + - "dbt_modules" + +quoting: + database: true + # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) + # all schemas should be unquoted + schema: false + identifier: true + +# You can define configurations for models in the `model-paths` directory here. +# Using these configurations, you can enable or disable models, change how they +# are materialized, and more! +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + +on_schema_change: sync_all_columns + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view + +dispatch: + - macro_namespace: dbt_utils + search_order: ["airbyte_utils", "dbt_utils"] diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-duckdb/packages.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-duckdb/packages.yml new file mode 100755 index 0000000000000..33b4edd58c8c6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-duckdb/packages.yml @@ -0,0 +1,5 @@ +# add dependencies. these will get pulled during the `dbt deps` process. + +packages: + - git: "https://github.com/fishtown-analytics/dbt-utils.git" + revision: 0.8.2 diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-mssql/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-mssql/dbt_project.yml new file mode 100755 index 0000000000000..8ed082f367749 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-mssql/dbt_project.yml @@ -0,0 +1,61 @@ +# This file is necessary to install dbt-utils with dbt deps +# the content will be overwritten by the transform function + +# Name your package! Package names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: "airbyte_utils" +version: "1.0" +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. Profiles contain +# database connection information, and should be configured in the ~/.dbt/profiles.yml file +profile: "normalize" + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that source models can be found +# in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +docs-paths: ["docs"] +analysis-paths: ["analysis"] +test-paths: ["tests"] +seed-paths: ["data"] +macro-paths: ["macros"] + +target-path: "../build" # directory which will store compiled SQL files +log-path: "../logs" # directory which will store DBT logs +packages-install-path: "/dbt" # directory which will store external DBT dependencies + +clean-targets: # directories to be removed by `dbt clean` + - "build" + - "dbt_modules" + +quoting: + database: true + # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) + # all schemas should be unquoted + schema: false + identifier: true + +# You can define configurations for models in the `model-paths` directory here. +# Using these configurations, you can enable or disable models, change how they +# are materialized, and more! +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view + +vars: + dbt_utils_dispatch_list: ["airbyte_utils"] diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-mssql/packages.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-mssql/packages.yml new file mode 100755 index 0000000000000..33b4edd58c8c6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-mssql/packages.yml @@ -0,0 +1,5 @@ +# add dependencies. these will get pulled during the `dbt deps` process. + +packages: + - git: "https://github.com/fishtown-analytics/dbt-utils.git" + revision: 0.8.2 diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-mysql/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-mysql/dbt_project.yml new file mode 100755 index 0000000000000..7116e6dc63d2e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-mysql/dbt_project.yml @@ -0,0 +1,63 @@ +# This file is necessary to install dbt-utils with dbt deps +# the content will be overwritten by the transform function + +# Name your package! Package names should contain only lowercase characters +# and underscores. A good package name should reflect your organization"s +# name or the intended use of these models +name: "airbyte_utils" +version: "1.0" +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. Profiles contain +# database connection information, and should be configured in the ~/.dbt/profiles.yml file +profile: "normalize" + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that source models can be found +# in the "models/" directory. You probably won"t need to change these! +model-paths: ["models"] +docs-paths: ["docs"] +analysis-paths: ["analysis"] +test-paths: ["tests"] +seed-paths: ["data"] +macro-paths: ["macros"] + +target-path: "../build" # directory which will store compiled SQL files +log-path: "../logs" # directory which will store DBT logs +packages-install-path: "/dbt" # directory which will store external DBT dependencies + +clean-targets: # directories to be removed by `dbt clean` + - "build" + - "dbt_modules" + +quoting: + database: true + # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) + # all schemas should be unquoted + schema: false + identifier: true + +# You can define configurations for models in the `model-paths` directory here. +# Using these configurations, you can enable or disable models, change how they +# are materialized, and more! +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + # incremental is not enabled for MySql yet + #+materialized: incremental + +materialized: table + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view + +vars: + dbt_utils_dispatch_list: ["airbyte_utils"] diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-mysql/packages.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-mysql/packages.yml new file mode 100755 index 0000000000000..33b4edd58c8c6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-mysql/packages.yml @@ -0,0 +1,5 @@ +# add dependencies. these will get pulled during the `dbt deps` process. + +packages: + - git: "https://github.com/fishtown-analytics/dbt-utils.git" + revision: 0.8.2 diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-oracle/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-oracle/dbt_project.yml new file mode 100755 index 0000000000000..7ad95ea5f9414 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-oracle/dbt_project.yml @@ -0,0 +1,61 @@ +# This file is necessary to install dbt-utils with dbt deps +# the content will be overwritten by the transform function + +# Name your package! Package names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: "airbyte_utils" +version: "1.0" +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. Profiles contain +# database connection information, and should be configured in the ~/.dbt/profiles.yml file +profile: "normalize" + +# These configurations specify where dbt should look for different types of files. +# The `source-paths` config, for example, states that source models can be found +# in the "models/" directory. You probably won't need to change these! +source-paths: ["models"] +docs-paths: ["docs"] +analysis-paths: ["analysis"] +test-paths: ["tests"] +data-paths: ["data"] +macro-paths: ["macros"] + +target-path: "../build" # directory which will store compiled SQL files +log-path: "../logs" # directory which will store DBT logs +modules-path: "/dbt" # directory which will store external DBT dependencies + +clean-targets: # directories to be removed by `dbt clean` + - "build" + - "dbt_modules" + +quoting: + database: false + schema: false + identifier: false + +# You can define configurations for models in the `source-paths` directory here. +# Using these configurations, you can enable or disable models, change how they +# are materialized, and more! +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + # incremental is not enabled for Oracle yet + #+materialized: incremental + +materialized: table + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view + +vars: + dbt_utils_dispatch_list: ["airbyte_utils"] diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-oracle/packages.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-oracle/packages.yml new file mode 100755 index 0000000000000..13d4e69a45cb7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-oracle/packages.yml @@ -0,0 +1,5 @@ +# add dependencies. these will get pulled during the `dbt deps` process. + +packages: + - git: "https://github.com/fishtown-analytics/dbt-utils.git" + revision: 0.6.4 diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-redshift/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-redshift/dbt_project.yml new file mode 100755 index 0000000000000..c17ac179bd600 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-redshift/dbt_project.yml @@ -0,0 +1,66 @@ +# This file is necessary to install dbt-utils with dbt deps +# the content will be overwritten by the transform function + +# Name your package! Package names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: "airbyte_utils" +version: "1.0" +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. Profiles contain +# database connection information, and should be configured in the ~/.dbt/profiles.yml file +profile: "normalize" + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that source models can be found +# in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +docs-paths: ["docs"] +analysis-paths: ["analysis"] +test-paths: ["tests"] +seed-paths: ["data"] +macro-paths: ["macros"] + +target-path: "../build" # directory which will store compiled SQL files +log-path: "../logs" # directory which will store DBT logs +packages-install-path: "/dbt" # directory which will store external DBT dependencies + +clean-targets: # directories to be removed by `dbt clean` + - "build" + - "dbt_modules" + +quoting: + database: true + # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) + # all schemas should be unquoted + schema: false + identifier: true + +# You can define configurations for models in the `model-paths` directory here. +# Using these configurations, you can enable or disable models, change how they +# are materialized, and more! +models: + +transient: false + # https://docs.aws.amazon.com/redshift/latest/dg/super-configurations.html + +pre-hook: "SET enable_case_sensitive_identifier to TRUE" + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + +on_schema_change: sync_all_columns + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view + +dispatch: + - macro_namespace: dbt_utils + search_order: ["airbyte_utils", "dbt_utils"] diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-snowflake/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-snowflake/dbt_project.yml new file mode 100644 index 0000000000000..2e807c5e19bae --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-snowflake/dbt_project.yml @@ -0,0 +1,64 @@ +# This file is necessary to install dbt-utils with dbt deps +# the content will be overwritten by the transform function + +# Name your package! Package names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: "airbyte_utils" +version: "1.0" +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. Profiles contain +# database connection information, and should be configured in the ~/.dbt/profiles.yml file +profile: "normalize" + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that source models can be found +# in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +docs-paths: ["docs"] +analysis-paths: ["analysis"] +test-paths: ["tests"] +seed-paths: ["data"] +macro-paths: ["macros"] + +target-path: "../build" # directory which will store compiled SQL files +log-path: "../logs" # directory which will store DBT logs +packages-install-path: "/dbt" # directory which will store external DBT dependencies + +clean-targets: # directories to be removed by `dbt clean` + - "build" + - "dbt_modules" + +quoting: + database: true + # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) + # all schemas should be unquoted + schema: false + identifier: true + +# You can define configurations for models in the `model-paths` directory here. +# Using these configurations, you can enable or disable models, change how they +# are materialized, and more! +models: + +transient: false + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + +on_schema_change: sync_all_columns + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view + +dispatch: + - macro_namespace: dbt_utils + search_order: ["airbyte_utils", "dbt_utils"] diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-tidb/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-tidb/dbt_project.yml new file mode 100755 index 0000000000000..497a4f592e3f0 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-tidb/dbt_project.yml @@ -0,0 +1,61 @@ +# This file is necessary to install dbt-utils with dbt deps +# the content will be overwritten by the transform function + +# Name your package! Package names should contain only lowercase characters +# and underscores. A good package name should reflect your organization"s +# name or the intended use of these models +name: "airbyte_utils" +version: "1.0" +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. Profiles contain +# database connection information, and should be configured in the ~/.dbt/profiles.yml file +profile: "normalize" + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that source models can be found +# in the "models/" directory. You probably won"t need to change these! +model-paths: ["models"] +docs-paths: ["docs"] +analysis-paths: ["analysis"] +test-paths: ["tests"] +seed-paths: ["data"] +macro-paths: ["macros"] + +target-path: "../build" # directory which will store compiled SQL files +log-path: "../logs" # directory which will store DBT logs +packages-install-path: "/dbt" # directory which will store external DBT dependencies + +clean-targets: # directories to be removed by `dbt clean` + - "build" + - "dbt_modules" + +quoting: + database: true + # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) + # all schemas should be unquoted + schema: false + identifier: true + +# You can define configurations for models in the `model-paths` directory here. +# Using these configurations, you can enable or disable models, change how they +# are materialized, and more! +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view + +vars: + dbt_utils_dispatch_list: ["airbyte_utils"] diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template-tidb/packages.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template-tidb/packages.yml new file mode 100755 index 0000000000000..33b4edd58c8c6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template-tidb/packages.yml @@ -0,0 +1,5 @@ +# add dependencies. these will get pulled during the `dbt deps` process. + +packages: + - git: "https://github.com/fishtown-analytics/dbt-utils.git" + revision: 0.8.2 diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/README.md b/airbyte-integrations/bases/base-normalization/dbt-project-template/README.md new file mode 100644 index 0000000000000..13e812383e92d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/README.md @@ -0,0 +1,19 @@ +## Installing dbt + +1. Activate your venv and run `pip3 install dbt` +1. Copy `airbyte-normalization/sample_files/profiles.yml` over to `~/.dbt/profiles.yml` +1. Edit to configure your profiles accordingly + +## Running dbt + +1. `cd airbyte-normalization` +1. You can now run dbt commands, to check the setup is fine: `dbt debug` +1. To build the dbt tables in your warehouse: `dbt run` + +## Running dbt from Airbyte generated config + +1. You can also change directory (`cd /tmp/dev_root/workspace/1/0/normalize` for example) to one of the workspace generated by Airbyte within one of the `normalize` folder. +1. You should find `profiles.yml` and a bunch of other dbt files/folders created there. +1. To check everything is setup properly: `dbt debug --profiles-dir=$(pwd) --project-dir=$(pwd)` +1. You can modify the `.sql` files and run `dbt run --profiles-dir=$(pwd) --project-dir=$(pwd)` too +1. You can inspect compiled dbt `.sql` files before they are run in the destination engine in `normalize/build/compiled` or `normalize/build/run` folders diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/dbt_project.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template/dbt_project.yml new file mode 100755 index 0000000000000..7631ef356dc92 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/dbt_project.yml @@ -0,0 +1,63 @@ +# This file is necessary to install dbt-utils with dbt deps +# the content will be overwritten by the transform function + +# Name your package! Package names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: "airbyte_utils" +version: "1.0" +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. Profiles contain +# database connection information, and should be configured in the ~/.dbt/profiles.yml file +profile: "normalize" + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that source models can be found +# in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +docs-paths: ["docs"] +analysis-paths: ["analysis"] +test-paths: ["tests"] +seed-paths: ["data"] +macro-paths: ["macros"] + +target-path: "../build" # directory which will store compiled SQL files +log-path: "../logs" # directory which will store DBT logs +packages-install-path: "/dbt" # directory which will store external DBT dependencies + +clean-targets: # directories to be removed by `dbt clean` + - "build" + - "dbt_modules" + +quoting: + database: true + # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) + # all schemas should be unquoted + schema: false + identifier: true + +# You can define configurations for models in the `model-paths` directory here. +# Using these configurations, you can enable or disable models, change how they +# are materialized, and more! +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + +on_schema_change: sync_all_columns + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view + +dispatch: + - macro_namespace: dbt_utils + search_order: ["airbyte_utils", "dbt_utils"] diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/clean_tmp_tables.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/clean_tmp_tables.sql new file mode 100644 index 0000000000000..46e2328745f1a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/clean_tmp_tables.sql @@ -0,0 +1,19 @@ +{% macro clean_tmp_tables(schemas) -%} + {{ adapter.dispatch('clean_tmp_tables')(schemas) }} +{%- endmacro %} + +-- default +{% macro default__clean_tmp_tables(schemas) -%} + {% do exceptions.warn("\tINFO: CLEANING TEST LEFTOVERS IS NOT IMPLEMENTED FOR THIS DESTINATION. CONSIDER TO REMOVE TEST TABLES MANUALY.\n") %} +{%- endmacro %} + +-- for redshift +{% macro redshift__clean_tmp_tables(schemas) %} + {%- for tmp_schema in schemas -%} + {% do log("\tDROP SCHEMA IF EXISTS " ~ tmp_schema, info=True) %} + {%- set drop_query -%} + drop schema if exists {{ tmp_schema }} cascade; + {%- endset -%} + {%- do run_query(drop_query) -%} + {%- endfor -%} +{% endmacro %} \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/array.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/array.sql new file mode 100644 index 0000000000000..6180675674b7f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/array.sql @@ -0,0 +1,173 @@ +{# + Adapter Macros for the following functions: + - Bigquery: unnest() -> https://cloud.google.com/bigquery/docs/reference/standard-sql/arrays#flattening-arrays-and-repeated-fields + - Snowflake: flatten() -> https://docs.snowflake.com/en/sql-reference/functions/flatten.html + - Redshift: -> https://blog.getdbt.com/how-to-unnest-arrays-in-redshift/ + - postgres: unnest() -> https://www.postgresqltutorial.com/postgresql-array/ + - MSSQL: openjson() –> https://docs.microsoft.com/en-us/sql/relational-databases/json/validate-query-and-change-json-data-with-built-in-functions-sql-server?view=sql-server-ver15 + - ClickHouse: ARRAY JOIN –> https://clickhouse.com/docs/zh/sql-reference/statements/select/array-join/ +#} + +{# cross_join_unnest ------------------------------------------------- #} + +{% macro cross_join_unnest(stream_name, array_col) -%} + {{ adapter.dispatch('cross_join_unnest')(stream_name, array_col) }} +{%- endmacro %} + +{% macro default__cross_join_unnest(stream_name, array_col) -%} + {% do exceptions.warn("Undefined macro cross_join_unnest for this destination engine") %} +{%- endmacro %} + +{% macro bigquery__cross_join_unnest(stream_name, array_col) -%} + cross join unnest({{ array_col }}) as {{ array_col }} +{%- endmacro %} + +{% macro clickhouse__cross_join_unnest(stream_name, array_col) -%} + ARRAY JOIN {{ array_col }} +{%- endmacro %} + +{% macro oracle__cross_join_unnest(stream_name, array_col) -%} + {% do exceptions.warn("Normalization does not support unnesting for Oracle yet.") %} +{%- endmacro %} + +{% macro postgres__cross_join_unnest(stream_name, array_col) -%} + cross join jsonb_array_elements( + case jsonb_typeof({{ array_col }}) + when 'array' then {{ array_col }} + else '[]' end + ) as _airbyte_nested_data +{%- endmacro %} + +{% macro mysql__cross_join_unnest(stream_name, array_col) -%} + left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid +{%- endmacro %} + +{% macro tidb__cross_join_unnest(stream_name, array_col) -%} + left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid +{%- endmacro %} + +{% macro duckdb__cross_join_unnest(stream_name, array_col) -%} + left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid +{%- endmacro %} + +{% macro redshift__cross_join_unnest(stream_name, array_col) -%} + left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid +{%- endmacro %} + +{% macro snowflake__cross_join_unnest(stream_name, array_col) -%} + cross join table(flatten({{ array_col }})) as {{ array_col }} +{%- endmacro %} + +{% macro sqlserver__cross_join_unnest(stream_name, array_col) -%} +{# https://docs.microsoft.com/en-us/sql/relational-databases/json/convert-json-data-to-rows-and-columns-with-openjson-sql-server?view=sql-server-ver15#option-1---openjson-with-the-default-output #} + CROSS APPLY ( + SELECT [value] = CASE + WHEN [type] = 4 THEN (SELECT [value] FROM OPENJSON([value])) + WHEN [type] = 5 THEN [value] + END + FROM OPENJSON({{ array_col }}) + ) AS {{ array_col }} +{%- endmacro %} + +{# unnested_column_value -- this macro is related to unnest_cte #} + +{% macro unnested_column_value(column_col) -%} + {{ adapter.dispatch('unnested_column_value')(column_col) }} +{%- endmacro %} + +{% macro default__unnested_column_value(column_col) -%} + {{ column_col }} +{%- endmacro %} + +{% macro postgres__unnested_column_value(column_col) -%} + _airbyte_nested_data +{%- endmacro %} + +{% macro snowflake__unnested_column_value(column_col) -%} + {{ column_col }}.value +{%- endmacro %} + +{% macro redshift__unnested_column_value(column_col) -%} + _airbyte_nested_data +{%- endmacro %} + +{% macro mysql__unnested_column_value(column_col) -%} + _airbyte_nested_data +{%- endmacro %} + +{% macro tidb__unnested_column_value(column_col) -%} + _airbyte_nested_data +{%- endmacro %} + +{% macro duckdb__unnested_column_value(column_col) -%} + _airbyte_nested_data +{%- endmacro %} + +{% macro oracle__unnested_column_value(column_col) -%} + {{ column_col }} +{%- endmacro %} + +{% macro sqlserver__unnested_column_value(column_col) -%} + {# unnested array/sub_array will be located in `value` column afterwards, we need to address to it #} + {{ column_col }}.value +{%- endmacro %} + +{# unnest_cte ------------------------------------------------- #} + +{% macro unnest_cte(from_table, stream_name, column_col) -%} + {{ adapter.dispatch('unnest_cte')(from_table, stream_name, column_col) }} +{%- endmacro %} + +{% macro default__unnest_cte(from_table, stream_name, column_col) -%}{%- endmacro %} + +{% macro redshift__unnest_cte(from_table, stream_name, column_col) -%} + {# -- based on https://docs.aws.amazon.com/redshift/latest/dg/query-super.html #} + with joined as ( + select + table_alias._airbyte_{{ stream_name }}_hashid as _airbyte_hashid, + _airbyte_nested_data + from {{ from_table }} as table_alias, table_alias.{{ column_col }} as _airbyte_nested_data + ) +{%- endmacro %} + +{% macro mysql__unnest_cte(from_table, stream_name, column_col) -%} + {%- if not execute -%} + {{ return('') }} + {% endif %} + + {%- call statement('max_json_array_length', fetch_result=True) -%} + with max_value as ( + select max(json_length({{ column_col }})) as max_number_of_items + from {{ from_table }} + ) + select + case when max_number_of_items is not null and max_number_of_items > 1 + then max_number_of_items + else 1 end as max_number_of_items + from max_value + {%- endcall -%} + + {%- set max_length = load_result('max_json_array_length') -%} + with numbers as ( + {{ dbt_utils.generate_series(max_length["data"][0][0]) }} + ), + joined as ( + select + _airbyte_{{ stream_name }}_hashid as _airbyte_hashid, + {# -- json_extract(column_col, '$[i][0]') as _airbyte_nested_data #} + json_extract({{ column_col }}, concat("$[", numbers.generated_number - 1, "][0]")) as _airbyte_nested_data + from {{ from_table }} + cross join numbers + -- only generate the number of records in the cross join that corresponds + -- to the number of items in {{ from_table }}.{{ column_col }} + where numbers.generated_number <= json_length({{ column_col }}) + ) +{%- endmacro %} + +{% macro tidb__unnest_cte(from_table, stream_name, column_col) -%} + {{ mysql__unnest_cte(from_table, stream_name, column_col) }} +{%- endmacro %} + +{% macro duckdb__unnest_cte(from_table, stream_name, column_col) -%} + {{ mysql__unnest_cte(from_table, stream_name, column_col) }} +{%- endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/concat.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/concat.sql new file mode 100644 index 0000000000000..aab42ca3b9640 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/concat.sql @@ -0,0 +1,36 @@ +{# + concat in dbt 0.6.4 used to work fine for bigquery but the new implementaion in 0.7.3 is less scalable (can not handle too many columns) + Therefore, we revert the implementation here and add versions for missing destinations +#} + +{% macro concat(fields) -%} + {{ adapter.dispatch('concat')(fields) }} +{%- endmacro %} + +{% macro bigquery__concat(fields) -%} + {#-- concat() in SQL bigquery scales better with number of columns than using the '||' operator --#} + concat({{ fields|join(', ') }}) +{%- endmacro %} + +{% macro mysql__concat(fields) -%} + {#-- MySQL doesn't support the '||' operator as concatenation by default --#} + concat({{ fields|join(', ') }}) +{%- endmacro %} + +{% macro sqlserver__concat(fields) -%} + {#-- CONCAT() in SQL SERVER accepts from 2 to 254 arguments, we use batches for the main concat, to overcome the limit. --#} + {% set concat_chunks = [] %} + {% for chunk in fields|batch(253) -%} + {% set _ = concat_chunks.append( "concat(" ~ chunk|join(', ') ~ ",'')" ) %} + {% endfor %} + + concat({{ concat_chunks|join(', ') }}, '') +{%- endmacro %} + +{% macro tidb__concat(fields) -%} + concat({{ fields|join(', ') }}) +{%- endmacro %} + +{% macro duckdb__concat(fields) -%} + concat({{ fields|join(', ') }}) +{%- endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/current_timestamp.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/current_timestamp.sql new file mode 100644 index 0000000000000..a9df34c9e4979 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/current_timestamp.sql @@ -0,0 +1,7 @@ +{% macro mysql__current_timestamp() %} + CURRENT_TIMESTAMP +{% endmacro %} + +{% macro oracle__current_timestamp() %} + CURRENT_TIMESTAMP +{% endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql new file mode 100755 index 0000000000000..7f69c66f78df2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql @@ -0,0 +1,394 @@ +{# json ------------------------------------------------- #} + +{%- macro type_json() -%} + {{ adapter.dispatch('type_json')() }} +{%- endmacro -%} + +{% macro default__type_json() %} + string +{% endmacro %} + +{%- macro redshift__type_json() -%} + super +{%- endmacro -%} + +{% macro postgres__type_json() %} + jsonb +{% endmacro %} + +{%- macro oracle__type_json() -%} + varchar2(4000) +{%- endmacro -%} + +{% macro snowflake__type_json() %} + variant +{% endmacro %} + +{%- macro mysql__type_json() -%} + json +{%- endmacro -%} + +{%- macro sqlserver__type_json() -%} + NVARCHAR(max) +{%- endmacro -%} + +{% macro clickhouse__type_json() %} + String +{% endmacro %} + +{%- macro tidb__type_json() -%} + json +{%- endmacro -%} + +{%- macro duckdb__type_json() -%} + json +{%- endmacro -%} + +{# string ------------------------------------------------- #} + +{%- macro mysql__type_string() -%} + char +{%- endmacro -%} + +{%- macro oracle__type_string() -%} + varchar2(4000) +{%- endmacro -%} + +{% macro sqlserver__type_string() %} + NVARCHAR(max) +{%- endmacro -%} + +{%- macro clickhouse__type_string() -%} + String +{%- endmacro -%} + +{#-- TODO: Remove this macro when dbt issue regarding unlimited varchars on postgres is resolved (https://github.com/dbt-labs/dbt-core/issues/5238) and we've upgraded to the latest version of dbt --#} +{%- macro postgres__type_string() -%} + text +{%- endmacro -%} + +{%- macro tidb__type_string() -%} + char(1000) +{%- endmacro -%} + +{%- macro duckdb__type_string() -%} + VARCHAR +{%- endmacro -%} + +{# float ------------------------------------------------- #} +{% macro mysql__type_float() %} + float +{% endmacro %} + +{% macro oracle__type_float() %} + float +{% endmacro %} + +{% macro clickhouse__type_float() %} + Float64 +{% endmacro %} + +{% macro tidb__type_float() %} + float +{% endmacro %} + +{% macro duckdb__type_float() %} + DOUBLE +{% endmacro %} + +{# int ------------------------------------------------- #} +{% macro default__type_int() %} + int +{% endmacro %} + +{% macro mysql__type_int() %} + signed +{% endmacro %} + +{% macro oracle__type_int() %} + int +{% endmacro %} + +{% macro clickhouse__type_int() %} + INT +{% endmacro %} + +{% macro tidb__type_int() %} + signed +{% endmacro %} + +{% macro duckdb__type_int() %} + INTEGER +{% endmacro %} + +{# bigint ------------------------------------------------- #} +{% macro mysql__type_bigint() %} + signed +{% endmacro %} + +{% macro oracle__type_bigint() %} + numeric +{% endmacro %} + +{% macro clickhouse__type_bigint() %} + BIGINT +{% endmacro %} + +{% macro tidb__type_bigint() %} + signed +{% endmacro %} + +{% macro duckdb__type_bigint() %} + BIGINT +{% endmacro %} + +{# numeric ------------------------------------------------- --#} +{% macro mysql__type_numeric() %} + float +{% endmacro %} + +{% macro clickhouse__type_numeric() %} + Float64 +{% endmacro %} + +{% macro tidb__type_numeric() %} + float +{% endmacro %} + +{% macro duckdb__type_numeric() %} + DOUBLE +{% endmacro %} + +{# very_large_integer --------------------------------------- --#} +{# +Most databases don't have a true unbounded numeric datatype, so we use a really big numeric field. +Our type terminology unfortunately collides with DB terminology (i.e. "big_integer" means different things in different contexts) +so this macro needs to be called very_large_integer. +#} +{%- macro type_very_large_integer() -%} + {{ adapter.dispatch('type_very_large_integer')() }} +{%- endmacro -%} + +{% macro default__type_very_large_integer() %} + numeric +{% endmacro %} + +{% macro snowflake__type_very_large_integer() %} + numeric +{% endmacro %} + +{% macro mysql__type_very_large_integer() %} + decimal(38, 0) +{% endmacro %} + +{% macro clickhouse__type_very_large_integer() %} + decimal128(0) +{% endmacro %} + +{% macro tidb__type_very_large_integer() %} + decimal(38, 0) +{% endmacro %} + +{% macro duckdb__type_very_large_integer() %} + DECIMAL(38, 0) +{% endmacro %} + +{# timestamp ------------------------------------------------- --#} +{% macro mysql__type_timestamp() %} + time +{% endmacro %} + +{%- macro sqlserver__type_timestamp() -%} + {#-- in TSQL timestamp is really datetime --#} + {#-- https://docs.microsoft.com/en-us/sql/t-sql/functions/date-and-time-data-types-and-functions-transact-sql?view=sql-server-ver15#DateandTimeDataTypes --#} + datetime +{%- endmacro -%} + +{% macro clickhouse__type_timestamp() %} + DateTime64 +{% endmacro %} + +{% macro tidb__type_timestamp() %} + time +{% endmacro %} + +{% macro duckdb__type_timestamp() %} + TIMESTAMP +{% endmacro %} + +{# timestamp with time zone ------------------------------------------------- #} + +{%- macro type_timestamp_with_timezone() -%} + {{ adapter.dispatch('type_timestamp_with_timezone')() }} +{%- endmacro -%} + +{% macro default__type_timestamp_with_timezone() %} + timestamp with time zone +{% endmacro %} + +{% macro bigquery__type_timestamp_with_timezone() %} + timestamp +{% endmacro %} + +{#-- MySQL doesnt allow cast operation with nullif to work with DATETIME and doesn't support storing of timezone so we have to use char --#} +{#-- https://bugs.mysql.com/bug.php?id=77805 --#} +{%- macro mysql__type_timestamp_with_timezone() -%} + char(1024) +{%- endmacro -%} + +{% macro oracle__type_timestamp_with_timezone() %} + varchar2(4000) +{% endmacro %} + +{%- macro sqlserver__type_timestamp_with_timezone() -%} + datetimeoffset +{%- endmacro -%} + +{% macro redshift__type_timestamp_with_timezone() %} + TIMESTAMPTZ +{% endmacro %} + +{% macro clickhouse__type_timestamp_with_timezone() %} + DateTime64 +{% endmacro %} + +{%- macro tidb__type_timestamp_with_timezone() -%} + char(1000) +{%- endmacro -%} + +{%- macro duckdb__type_timestamp_with_timezone() -%} + TIMESTAMPTZ +{%- endmacro -%} + +{# timestamp without time zone ------------------------------------------------- #} + +{%- macro type_timestamp_without_timezone() -%} + {{ adapter.dispatch('type_timestamp_without_timezone')() }} +{%- endmacro -%} + +{% macro default__type_timestamp_without_timezone() %} + timestamp +{% endmacro %} + +{%- macro sqlserver__type_timestamp_without_timezone() -%} + {#-- in TSQL timestamp is really datetime or datetime2 --#} + {#-- https://docs.microsoft.com/en-us/sql/t-sql/functions/date-and-time-data-types-and-functions-transact-sql?view=sql-server-ver15#DateandTimeDataTypes --#} + datetime2 +{%- endmacro -%} + +{% macro bigquery__type_timestamp_without_timezone() %} + datetime +{% endmacro %} + +{% macro oracle__type_timestamp_without_timezone() %} + varchar2(4000) +{% endmacro %} + +{% macro redshift__type_timestamp_without_timezone() %} + TIMESTAMP +{% endmacro %} + +{% macro tidb__type_timestamp_without_timezone() %} + datetime +{% endmacro %} + +{% macro duckdb__type_timestamp_without_timezone() %} + TIMESTAMP +{% endmacro %} + +{# time without time zone ------------------------------------------------- #} + +{%- macro type_time_without_timezone() -%} + {{ adapter.dispatch('type_time_without_timezone')() }} +{%- endmacro -%} + +{% macro default__type_time_without_timezone() %} + time +{% endmacro %} + +{% macro oracle__type_time_without_timezone() %} + varchar2(4000) +{% endmacro %} + +{% macro redshift__type_time_without_timezone() %} + TIME +{% endmacro %} + +{% macro clickhouse__type_time_without_timezone() %} + String +{% endmacro %} + +{% macro tidb__type_time_without_timezone() %} + time +{% endmacro %} + +{% macro duckdb__type_time_without_timezone() %} + TIMESTAMP +{% endmacro %} + +{# time with time zone ------------------------------------------------- #} + +{%- macro type_time_with_timezone() -%} + {{ adapter.dispatch('type_time_with_timezone')() }} +{%- endmacro -%} + +{% macro default__type_time_with_timezone() %} + time with time zone +{% endmacro %} + +{%- macro mysql__type_time_with_timezone() -%} + char(1024) +{%- endmacro -%} + +{%- macro sqlserver__type_time_with_timezone() -%} + NVARCHAR(max) +{%- endmacro -%} + +{% macro bigquery__type_time_with_timezone() %} + STRING +{% endmacro %} + +{% macro oracle__type_time_with_timezone() %} + varchar2(4000) +{% endmacro %} + +{% macro snowflake__type_time_with_timezone() %} + varchar +{% endmacro %} + +{% macro redshift__type_time_with_timezone() %} + TIMETZ +{% endmacro %} + +{% macro clickhouse__type_time_with_timezone() %} + String +{% endmacro %} + +{%- macro tidb__type_time_with_timezone() -%} + char(1000) +{%- endmacro -%} + +{%- macro duckdb__type_time_with_timezone() -%} + TIMESTAMPTZ +{%- endmacro -%} +{# date ------------------------------------------------- #} + +{%- macro type_date() -%} + {{ adapter.dispatch('type_date')() }} +{%- endmacro -%} + +{% macro default__type_date() %} + date +{% endmacro %} + +{% macro oracle__type_date() %} + varchar2(4000) +{% endmacro %} + +{%- macro sqlserver__type_date() -%} + date +{%- endmacro -%} + +{% macro clickhouse__type_date() %} + Date32 +{% endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/except.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/except.sql new file mode 100644 index 0000000000000..a0f0c159dc214 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/except.sql @@ -0,0 +1,7 @@ +{% macro mysql__except() %} + {% do exceptions.warn("MySQL does not support EXCEPT operator") %} +{% endmacro %} + +{% macro oracle__except() %} + minus +{% endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/hash.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/hash.sql new file mode 100644 index 0000000000000..184888794b9f0 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/hash.sql @@ -0,0 +1,5 @@ +{# converting hash in varchar _macro #} + +{% macro sqlserver__hash(field) -%} + convert(varchar(32), HashBytes('md5', coalesce(cast({{field}} as {{dbt_utils.type_string()}}), '')), 2) +{%- endmacro %} \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/json_operations.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/json_operations.sql new file mode 100644 index 0000000000000..cbbfbc4510196 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/json_operations.sql @@ -0,0 +1,317 @@ +{# + Adapter Macros for the following functions: + - Bigquery: JSON_EXTRACT(json_string_expr, json_path_format) -> https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions + - Snowflake: JSON_EXTRACT_PATH_TEXT( , '' ) -> https://docs.snowflake.com/en/sql-reference/functions/json_extract_path_text.html + - Redshift: json_extract_path_text('json_string', 'path_elem' [,'path_elem'[, ...] ] [, null_if_invalid ] ) -> https://docs.aws.amazon.com/redshift/latest/dg/JSON_EXTRACT_PATH_TEXT.html + - Postgres: json_extract_path_text(, 'path' [, 'path' [, ...}}) -> https://www.postgresql.org/docs/12/functions-json.html + - MySQL: JSON_EXTRACT(json_doc, 'path' [, 'path'] ...) -> https://dev.mysql.com/doc/refman/8.0/en/json-search-functions.html + - ClickHouse: JSONExtractString(json_doc, 'path' [, 'path'] ...) -> https://clickhouse.com/docs/en/sql-reference/functions/json-functions/ + - TiDB: JSON_EXTRACT(json_doc, 'path' [, 'path'] ...) -> https://docs.pingcap.com/tidb/stable/json-functions + - DuckDB: json_extract(json, 'path') note: If path is a LIST, the result will be a LIST of JSON -> https://duckdb.org/docs/extensions/json +#} + +{# format_json_path -------------------------------------------------- #} +{% macro format_json_path(json_path_list) -%} + {{ adapter.dispatch('format_json_path')(json_path_list) }} +{%- endmacro %} + +{% macro default__format_json_path(json_path_list) -%} + {{ '.' ~ json_path_list|join('.') }} +{%- endmacro %} + +{% macro oracle__format_json_path(json_path_list) -%} + {{ '\'$."' ~ json_path_list|join('."') ~ '"\'' }} +{%- endmacro %} + +{# + BigQuery has different JSONPath syntax depending on which function you call. + Most of our macros use the "legacy" JSON functions, so this function uses + the legacy syntax. + + These paths look like: "$['foo']['bar']" +#} +{% macro bigquery__format_json_path(json_path_list) -%} + {%- set str_list = [] -%} + {%- for json_path in json_path_list -%} + {%- if str_list.append(json_path.replace('"', '\\"')) -%} {%- endif -%} + {%- endfor -%} + {{ '"$[\'' ~ str_list|join('\'][\'') ~ '\']"' }} +{%- endmacro %} + +{# + For macros which use the newer JSON functions, define a new_format_json_path + macro which generates the correct path syntax. + + These paths look like: '$."foo"."bar"' +#} +{% macro bigquery_new_format_json_path(json_path_list) -%} + {%- set str_list = [] -%} + {%- for json_path in json_path_list -%} + {%- if str_list.append(json_path.replace('\'', '\\\'')) -%} {%- endif -%} + {%- endfor -%} + {{ '\'$."' ~ str_list|join('"."') ~ '"\'' }} +{%- endmacro %} + +{% macro postgres__format_json_path(json_path_list) -%} + {%- set str_list = [] -%} + {%- for json_path in json_path_list -%} + {%- if str_list.append(json_path.replace("'", "''")) -%} {%- endif -%} + {%- endfor -%} + {{ "'" ~ str_list|join("','") ~ "'" }} +{%- endmacro %} + +{% macro mysql__format_json_path(json_path_list) -%} + {# -- '$."x"."y"."z"' #} + {{ "'$.\"" ~ json_path_list|join(".") ~ "\"'" }} +{%- endmacro %} + +{% macro redshift__format_json_path(json_path_list) -%} + {%- set quote = '"' -%} + {%- set str_list = [] -%} + {%- for json_path in json_path_list -%} + {%- if str_list.append(json_path.replace(quote, quote + quote)) -%} {%- endif -%} + {%- endfor -%} + {{ quote ~ str_list|join(quote + "," + quote) ~ quote }} +{%- endmacro %} + +{% macro snowflake__format_json_path(json_path_list) -%} + {%- set str_list = [] -%} + {%- for json_path in json_path_list -%} + {%- if str_list.append(json_path.replace("'", "''").replace('"', '""')) -%} {%- endif -%} + {%- endfor -%} + {{ "'\"" ~ str_list|join('"."') ~ "\"'" }} +{%- endmacro %} + +{% macro sqlserver__format_json_path(json_path_list) -%} + {# -- '$."x"."y"."z"' #} + {%- set str_list = [] -%} + {%- for json_path in json_path_list -%} + {%- if str_list.append(json_path.replace("'", "''").replace('"', '\\"')) -%} {%- endif -%} + {%- endfor -%} + {{ "'$.\"" ~ str_list|join(".") ~ "\"'" }} +{%- endmacro %} + +{% macro clickhouse__format_json_path(json_path_list) -%} + {%- set str_list = [] -%} + {%- for json_path in json_path_list -%} + {%- if str_list.append(json_path.replace("'", "''").replace('"', '\\"')) -%} {%- endif -%} + {%- endfor -%} + {{ "'" ~ str_list|join("','") ~ "'" }} +{%- endmacro %} + +{% macro tidb__format_json_path(json_path_list) -%} + {# -- '$."x"."y"."z"' #} + {{ "'$.\"" ~ json_path_list|join(".") ~ "\"'" }} +{%- endmacro %} + +{% macro duckdb__format_json_path(json_path_list) -%} + {# -- '$."x"."y"."z"' #} + {{ "'$.\"" ~ json_path_list|join(".") ~ "\"'" }} +{%- endmacro %} + +{# json_extract ------------------------------------------------- #} + +{% macro json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} + {{ adapter.dispatch('json_extract')(from_table, json_column, json_path_list, normalized_json_path) }} +{%- endmacro %} + +{% macro default__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} + json_extract({{ from_table}}.{{ json_column }}, {{ format_json_path(json_path_list) }}) +{%- endmacro %} + +{% macro oracle__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} + json_value({{ json_column }}, {{ format_json_path(normalized_json_path) }}) +{%- endmacro %} + +{% macro bigquery__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} + {%- if from_table|string() == '' %} + json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }}) + {% else %} + json_extract({{ from_table}}.{{ json_column }}, {{ format_json_path(normalized_json_path) }}) + {% endif -%} +{%- endmacro %} + +{% macro postgres__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} + {%- if from_table|string() == '' %} + jsonb_extract_path({{ json_column }}, {{ format_json_path(json_path_list) }}) + {% else %} + jsonb_extract_path({{ from_table }}.{{ json_column }}, {{ format_json_path(json_path_list) }}) + {% endif -%} +{%- endmacro %} + +{% macro mysql__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} + {%- if from_table|string() == '' %} + json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }}) + {% else %} + json_extract({{ from_table }}.{{ json_column }}, {{ format_json_path(normalized_json_path) }}) + {% endif -%} +{%- endmacro %} + +{% macro redshift__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} + {%- if from_table|string() != '' -%} + {%- set json_column = from_table|string() + "." + json_column|string() -%} + {%- endif -%} + case when {{ json_column }}.{{ format_json_path(json_path_list) }} != '' then {{ json_column }}.{{ format_json_path(json_path_list) }} end +{%- endmacro %} + +{% macro snowflake__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} + {%- if from_table|string() == '' %} + get_path(parse_json({{ json_column }}), {{ format_json_path(json_path_list) }}) + {% else %} + get_path(parse_json({{ from_table }}.{{ json_column }}), {{ format_json_path(json_path_list) }}) + {% endif -%} +{%- endmacro %} + +{% macro sqlserver__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} + json_query({{ json_column }}, {{ format_json_path(json_path_list) }}) +{%- endmacro %} + +{% macro clickhouse__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} + {%- if from_table|string() == '' %} + JSONExtractRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }}) + {% else %} + JSONExtractRaw(assumeNotNull({{ from_table }}.{{ json_column }}), {{ format_json_path(json_path_list) }}) + {% endif -%} +{%- endmacro %} + +{% macro tidb__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} + {%- if from_table|string() == '' %} + json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }}) + {% else %} + json_extract({{ from_table }}.{{ json_column }}, {{ format_json_path(normalized_json_path) }}) + {% endif -%} +{%- endmacro %} + +{% macro duckdb__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%} + {%- if from_table|string() == '' %} + json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }}) + {% else %} + json_extract({{ from_table }}.{{ json_column }}, {{ format_json_path(normalized_json_path) }}) + {% endif -%} +{%- endmacro %} + +{# json_extract_scalar ------------------------------------------------- #} + +{% macro json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} + {{ adapter.dispatch('json_extract_scalar')(json_column, json_path_list, normalized_json_path) }} +{%- endmacro %} + +{% macro default__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} + json_extract_scalar({{ json_column }}, {{ format_json_path(json_path_list) }}) +{%- endmacro %} + +{% macro oracle__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} + json_value({{ json_column }}, {{ format_json_path(normalized_json_path) }}) +{%- endmacro %} + +{% macro bigquery__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} + json_extract_scalar({{ json_column }}, {{ format_json_path(normalized_json_path) }}) +{%- endmacro %} + +{% macro postgres__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} + jsonb_extract_path_text({{ json_column }}, {{ format_json_path(json_path_list) }}) +{%- endmacro %} + +{% macro mysql__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} + json_value({{ json_column }}, {{ format_json_path(normalized_json_path) }} RETURNING CHAR) +{%- endmacro %} + +{% macro redshift__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} + case when {{ json_column }}.{{ format_json_path(json_path_list) }} != '' then {{ json_column }}.{{ format_json_path(json_path_list) }} end +{%- endmacro %} + +{% macro snowflake__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} + to_varchar(get_path(parse_json({{ json_column }}), {{ format_json_path(json_path_list) }})) +{%- endmacro %} + +{% macro sqlserver__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} + json_value({{ json_column }}, {{ format_json_path(json_path_list) }}) +{%- endmacro %} + +{% macro clickhouse__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} + JSONExtractRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }}) +{%- endmacro %} + +{% macro tidb__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} + IF( + JSON_UNQUOTE(JSON_EXTRACT({{ json_column }}, {{ format_json_path(normalized_json_path) }})) = 'null', + NULL, + JSON_UNQUOTE(JSON_EXTRACT({{ json_column }}, {{ format_json_path(normalized_json_path) }})) + ) +{%- endmacro %} + +{% macro duckdb__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%} + json_extract_string({{ json_column }}, {{ format_json_path(json_path_list) }}) +{%- endmacro %} + +{# json_extract_array ------------------------------------------------- #} + +{% macro json_extract_array(json_column, json_path_list, normalized_json_path) -%} + {{ adapter.dispatch('json_extract_array')(json_column, json_path_list, normalized_json_path) }} +{%- endmacro %} + +{% macro default__json_extract_array(json_column, json_path_list, normalized_json_path) -%} + json_extract_array({{ json_column }}, {{ format_json_path(json_path_list) }}) +{%- endmacro %} + +{% macro oracle__json_extract_array(json_column, json_path_list, normalized_json_path) -%} + json_value({{ json_column }}, {{ format_json_path(normalized_json_path) }}) +{%- endmacro %} + +{% macro bigquery__json_extract_array(json_column, json_path_list, normalized_json_path) -%} + json_extract_array({{ json_column }}, {{ format_json_path(normalized_json_path) }}) +{%- endmacro %} + +{% macro postgres__json_extract_array(json_column, json_path_list, normalized_json_path) -%} + jsonb_extract_path({{ json_column }}, {{ format_json_path(json_path_list) }}) +{%- endmacro %} + +{% macro mysql__json_extract_array(json_column, json_path_list, normalized_json_path) -%} + json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }}) +{%- endmacro %} + +{% macro redshift__json_extract_array(json_column, json_path_list, normalized_json_path) -%} + {{ json_column }}.{{ format_json_path(json_path_list) }} +{%- endmacro %} + +{% macro snowflake__json_extract_array(json_column, json_path_list, normalized_json_path) -%} + get_path(parse_json({{ json_column }}), {{ format_json_path(json_path_list) }}) +{%- endmacro %} + +{% macro sqlserver__json_extract_array(json_column, json_path_list, normalized_json_path) -%} + json_query({{ json_column }}, {{ format_json_path(json_path_list) }}) +{%- endmacro %} + +{% macro clickhouse__json_extract_array(json_column, json_path_list, normalized_json_path) -%} + JSONExtractArrayRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }}) +{%- endmacro %} + +{% macro tidb__json_extract_array(json_column, json_path_list, normalized_json_path) -%} + json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }}) +{%- endmacro %} + +{% macro duckdb__json_extract_array(json_column, json_path_list, normalized_json_path) -%} + json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }}) +{%- endmacro %} + +{# json_extract_string_array ------------------------------------------------- #} + +{% macro json_extract_string_array(json_column, json_path_list, normalized_json_path) -%} + {{ adapter.dispatch('json_extract_string_array')(json_column, json_path_list, normalized_json_path) }} +{%- endmacro %} + +{% macro default__json_extract_string_array(json_column, json_path_list, normalized_json_path) -%} + {{ json_extract_array(json_column, json_path_list, normalized_json_path) }} +{%- endmacro %} + +{# +See https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_extract_string_array + +BigQuery does not allow NULL entries in REPEATED fields, so we replace those with literal "NULL" strings. +#} +{% macro bigquery__json_extract_string_array(json_column, json_path_list, normalized_json_path) -%} + array( + select ifnull(x, "NULL") + from unnest(json_value_array({{ json_column }}, {{ bigquery_new_format_json_path(normalized_json_path) }})) as x + ) +{%- endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/quote.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/quote.sql new file mode 100644 index 0000000000000..87862498cfc5f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/quote.sql @@ -0,0 +1,16 @@ +{# quote ---------------------------------- #} +{% macro quote(column_name) -%} + {{ adapter.dispatch('quote')(column_name) }} +{%- endmacro %} + +{% macro default__quote(column_name) -%} + adapter.quote(column_name) +{%- endmacro %} + +{% macro oracle__quote(column_name) -%} + {{ '\"' ~ column_name ~ '\"'}} +{%- endmacro %} + +{% macro clickhouse__quote(column_name) -%} + {{ '\"' ~ column_name ~ '\"'}} +{%- endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/surrogate_key.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/surrogate_key.sql new file mode 100644 index 0000000000000..9de2965409aad --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/surrogate_key.sql @@ -0,0 +1,25 @@ +{# surrogate_key ---------------------------------- #} + +{% macro oracle__surrogate_key(field_list) -%} + ora_hash( + {%- for field in field_list %} + {% if not loop.last %} + {{ field }} || '~' || + {% else %} + {{ field }} + {% endif %} + {%- endfor %} + ) +{%- endmacro %} + +{% macro clickhouse__surrogate_key(field_list) -%} + assumeNotNull(hex(MD5( + {%- for field in field_list %} + {% if not loop.last %} + toString({{ field }}) || '~' || + {% else %} + toString({{ field }}) + {% endif %} + {%- endfor %} + ))) +{%- endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/type_conversions.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/type_conversions.sql new file mode 100644 index 0000000000000..90b2337ed3ba0 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/type_conversions.sql @@ -0,0 +1,105 @@ + +{# boolean_to_string ------------------------------------------------- #} +{% macro boolean_to_string(boolean_column) -%} + {{ adapter.dispatch('boolean_to_string')(boolean_column) }} +{%- endmacro %} + +{% macro default__boolean_to_string(boolean_column) -%} + {{ boolean_column }} +{%- endmacro %} + +{% macro redshift__boolean_to_string(boolean_column) -%} + case when {{ boolean_column }} then 'true' else 'false' end +{%- endmacro %} + +{# array_to_string ------------------------------------------------- #} +{% macro array_to_string(array_column) -%} + {{ adapter.dispatch('array_to_string')(array_column) }} +{%- endmacro %} + +{% macro default__array_to_string(array_column) -%} + {{ array_column }} +{%- endmacro %} + +{% macro bigquery__array_to_string(array_column) -%} + array_to_string({{ array_column }}, "|", "") +{%- endmacro %} + +{% macro oracle__array_to_string(array_column) -%} + cast({{ array_column }} as varchar2(4000)) +{%- endmacro %} + +{% macro sqlserver__array_to_string(array_column) -%} + cast({{ array_column }} as {{dbt_utils.type_string()}}) +{%- endmacro %} + +{% macro redshift__array_to_string(array_column) -%} + json_serialize({{array_column}}) +{%- endmacro %} + +{# object_to_string ------------------------------------------------- #} +{% macro object_to_string(object_column) -%} + {{ adapter.dispatch('object_to_string')(object_column) }} +{%- endmacro %} + +{% macro default__object_to_string(object_column) -%} + {{ object_column }} +{%- endmacro %} + +{% macro redshift__object_to_string(object_column) -%} + json_serialize({{object_column}}) +{%- endmacro %} + +{# cast_to_boolean ------------------------------------------------- #} +{% macro cast_to_boolean(field) -%} + {{ adapter.dispatch('cast_to_boolean')(field) }} +{%- endmacro %} + +{% macro default__cast_to_boolean(field) -%} + cast({{ field }} as boolean) +{%- endmacro %} + +{# -- MySQL does not support cast function converting string directly to boolean (an alias of tinyint(1), https://dev.mysql.com/doc/refman/8.0/en/cast-functions.html#function_cast #} +{% macro mysql__cast_to_boolean(field) -%} + IF(lower({{ field }}) = 'true', true, false) +{%- endmacro %} + +{# TiDB does not support cast string to boolean #} +{% macro tidb__cast_to_boolean(field) -%} + IF(lower({{ field }}) = 'true', true, false) +{%- endmacro %} + +{% macro duckdb__cast_to_boolean(field) -%} + cast({{ field }} as boolean) +{%- endmacro %} + +{% macro redshift__cast_to_boolean(field) -%} + cast({{ field }} as boolean) +{%- endmacro %} + +{# -- MS SQL Server does not support converting string directly to boolean, it must be casted as bit #} +{% macro sqlserver__cast_to_boolean(field) -%} + cast({{ field }} as bit) +{%- endmacro %} + +{# -- ClickHouse does not support converting string directly to Int8, it must go through int first #} +{% macro clickhouse__cast_to_boolean(field) -%} + IF(lower({{ field }}) = 'true', 1, 0) +{%- endmacro %} + +{# empty_string_to_null ------------------------------------------------- #} +{% macro empty_string_to_null(field) -%} + {{ return(adapter.dispatch('empty_string_to_null')(field)) }} +{%- endmacro %} + +{%- macro default__empty_string_to_null(field) -%} + nullif({{ field }}, '') +{%- endmacro %} + +{%- macro duckdb__empty_string_to_null(field) -%} + nullif(nullif({{ field }}, 'null'), '') +{%- endmacro %} + +{%- macro redshift__empty_string_to_null(field) -%} + nullif({{ field }}::varchar, '') +{%- endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/get_custom_schema.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/get_custom_schema.sql new file mode 100644 index 0000000000000..77e83c7acd48f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/get_custom_schema.sql @@ -0,0 +1,4 @@ +-- see https://docs.getdbt.com/docs/building-a-dbt-project/building-models/using-custom-schemas/#an-alternative-pattern-for-generating-schema-names +{% macro generate_schema_name(custom_schema_name, node) -%} + {{ generate_schema_name_for_env(custom_schema_name, node) }} +{%- endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/incremental.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/incremental.sql new file mode 100644 index 0000000000000..f3f4c12d75df6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/incremental.sql @@ -0,0 +1,61 @@ +{# + These macros control how incremental models are updated in Airbyte's normalization step + - get_max_normalized_cursor retrieve the value of the last normalized data + - incremental_clause controls the predicate to filter on new data to process incrementally +#} + +{% macro incremental_clause(col_emitted_at, tablename) -%} + {{ adapter.dispatch('incremental_clause')(col_emitted_at, tablename) }} +{%- endmacro %} + +{%- macro default__incremental_clause(col_emitted_at, tablename) -%} +{% if is_incremental() %} +and coalesce( + cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }}) > (select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }})) from {{ tablename }}), + {# -- if {{ col_emitted_at }} is NULL in either table, the previous comparison would evaluate to NULL, #} + {# -- so we coalesce and make sure the row is always returned for incremental processing instead #} + true) +{% endif %} +{%- endmacro -%} + +{# -- see https://on-systems.tech/113-beware-dbt-incremental-updates-against-snowflake-external-tables/ #} +{%- macro snowflake__incremental_clause(col_emitted_at, tablename) -%} +{% if is_incremental() %} + {% if get_max_normalized_cursor(col_emitted_at, tablename) %} +and cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }}) > + cast('{{ get_max_normalized_cursor(col_emitted_at, tablename) }}' as {{ type_timestamp_with_timezone() }}) + {% endif %} +{% endif %} +{%- endmacro -%} + +{# -- see https://cloud.google.com/bigquery/docs/querying-partitioned-tables#best_practices_for_partition_pruning #} +{%- macro bigquery__incremental_clause(col_emitted_at, tablename) -%} +{% if is_incremental() %} + {% if get_max_normalized_cursor(col_emitted_at, tablename) %} +and cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }}) > + cast('{{ get_max_normalized_cursor(col_emitted_at, tablename) }}' as {{ type_timestamp_with_timezone() }}) + {% endif %} +{% endif %} +{%- endmacro -%} + +{%- macro sqlserver__incremental_clause(col_emitted_at, tablename) -%} +{% if is_incremental() %} +and ((select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }})) from {{ tablename }}) is null + or cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }}) > + (select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }})) from {{ tablename }})) +{% endif %} +{%- endmacro -%} + +{% macro get_max_normalized_cursor(col_emitted_at, tablename) %} +{% if execute and is_incremental() %} + {% if env_var('INCREMENTAL_CURSOR', 'UNSET') == 'UNSET' %} + {% set query %} + select max(cast({{ col_emitted_at }} as {{ type_timestamp_with_timezone() }})) from {{ tablename }} + {% endset %} + {% set max_cursor = run_query(query).columns[0][0] %} + {% do return(max_cursor) %} + {% else %} + {% do return(env_var('INCREMENTAL_CURSOR')) %} + {% endif %} +{% endif %} +{% endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/schema_tests/equal_rowcount.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/schema_tests/equal_rowcount.sql new file mode 100644 index 0000000000000..0dd4dc62000e4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/schema_tests/equal_rowcount.sql @@ -0,0 +1,34 @@ +{% macro oracle__test_equal_rowcount(model, compare_model) %} + +{#-- Needs to be set at parse time, before we return '' below --#} +{{ config(fail_calc = 'coalesce(diff_count, 0)') }} + +{#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} +{%- if not execute -%} + {{ return('') }} +{% endif %} + +with a as ( + + select count(*) as count_a from {{ model }} + +), +b as ( + + select count(*) as count_b from {{ compare_model }} + +), +final as ( + + select + count_a, + count_b, + abs(count_a - count_b) as diff_count + from a + cross join b + +) + +select diff_count from final + +{% endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/schema_tests/equality.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/schema_tests/equality.sql new file mode 100644 index 0000000000000..ef83a024f479f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/schema_tests/equality.sql @@ -0,0 +1,107 @@ +{# +-- Adapted from https://github.com/dbt-labs/dbt-utils/blob/0-19-0-updates/macros/schema_tests/equality.sql +-- dbt-utils version: 0.6.4 +-- This macro needs to be updated accordingly when dbt-utils is upgraded. +-- This is needed because MySQL does not support the EXCEPT operator! +#} + +{% macro mysql__test_equality(model, compare_model, compare_columns=None) %} + + {%- if not execute -%} + {{ return('') }} + {% endif %} + + {%- do dbt_utils._is_relation(model, 'test_equality') -%} + + {%- if not compare_columns -%} + {%- do dbt_utils._is_ephemeral(model, 'test_equality') -%} + {%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='quoted') -%} + {%- endif -%} + + {% set compare_cols_csv = compare_columns | join(', ') %} + + with a as ( + select * from {{ model }} + ), + + b as ( + select * from {{ compare_model }} + ), + + a_minus_b as ( + select {{ compare_cols_csv }} from a + where ({{ compare_cols_csv }}) not in + (select {{ compare_cols_csv }} from b) + ), + + b_minus_a as ( + select {{ compare_cols_csv }} from b + where ({{ compare_cols_csv }}) not in + (select {{ compare_cols_csv }} from a) + ), + + unioned as ( + select * from a_minus_b + union all + select * from b_minus_a + ), + + final as ( + select (select count(*) from unioned) + + (select abs( + (select count(*) from a_minus_b) - + (select count(*) from b_minus_a) + )) + as count + ) + + select count from final + +{% endmacro %} + +{% macro oracle__test_equality(model) %} + {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} + {%- if not execute -%} + {{ return('') }} + {% endif %} + + -- setup + {%- do dbt_utils._is_relation(model, 'test_equality') -%} + + {#- + If the compare_cols arg is provided, we can run this test without querying the + information schema — this allows the model to be an ephemeral model + -#} + {%- set compare_columns = kwargs.get('compare_columns', None) -%} + + {%- if not compare_columns -%} + {%- do dbt_utils._is_ephemeral(model, 'test_equality') -%} + {%- set compare_columns = adapter.get_columns_in_relation(model) | map(attribute='quoted') -%} + {%- endif -%} + + {% set compare_model = kwargs.get('compare_model', kwargs.get('arg')) %} + {% set compare_cols_csv = compare_columns | join(', ') %} + + with a as ( + select * from {{ model }} + ), + b as ( + select * from {{ compare_model }} + ), + a_minus_b as ( + select {{compare_cols_csv}} from a + {{ dbt_utils.except() }} + select {{compare_cols_csv}} from b + ), + b_minus_a as ( + select {{compare_cols_csv}} from b + {{ dbt_utils.except() }} + select {{compare_cols_csv}} from a + ), + unioned as ( + select * from a_minus_b + union all + select * from b_minus_a + ) + select count(*) from unioned +{% endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/should_full_refresh.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/should_full_refresh.sql new file mode 100644 index 0000000000000..ff2c6d54ecce3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/should_full_refresh.sql @@ -0,0 +1,51 @@ +{# + This overrides the behavior of the macro `should_full_refresh` so full refresh are triggered if: + - the dbt cli is run with --full-refresh flag or the model is configured explicitly to full_refresh + - the column _airbyte_ab_id does not exists in the normalized tables and make sure it is well populated. +#} + +{%- macro need_full_refresh(col_ab_id, target_table=this) -%} + {%- if not execute -%} + {{ return(false) }} + {%- endif -%} + {%- set found_column = [] %} + {%- set cols = adapter.get_columns_in_relation(target_table) -%} + {%- for col in cols -%} + {%- if col.column == col_ab_id -%} + {% do found_column.append(col.column) %} + {%- endif -%} + {%- endfor -%} + {%- if found_column -%} + {{ return(false) }} + {%- else -%} + {{ dbt_utils.log_info(target_table ~ "." ~ col_ab_id ~ " does not exist yet. The table will be created or rebuilt with dbt.full_refresh") }} + {{ return(true) }} + {%- endif -%} +{%- endmacro -%} + +{%- macro should_full_refresh() -%} + {% set config_full_refresh = config.get('full_refresh') %} + {%- if config_full_refresh is none -%} + {% set config_full_refresh = flags.FULL_REFRESH %} + {%- endif -%} + {%- if not config_full_refresh -%} + {% set config_full_refresh = need_full_refresh(get_col_ab_id(), this) %} + {%- endif -%} + {% do return(config_full_refresh) %} +{%- endmacro -%} + +{%- macro get_col_ab_id() -%} + {{ adapter.dispatch('get_col_ab_id')() }} +{%- endmacro -%} + +{%- macro default__get_col_ab_id() -%} + _airbyte_ab_id +{%- endmacro -%} + +{%- macro oracle__get_col_ab_id() -%} + "_AIRBYTE_AB_ID" +{%- endmacro -%} + +{%- macro snowflake__get_col_ab_id() -%} + _AIRBYTE_AB_ID +{%- endmacro -%} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/star_intersect.sql b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/star_intersect.sql new file mode 100644 index 0000000000000..3f3d06c4eb106 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/macros/star_intersect.sql @@ -0,0 +1,46 @@ +{# + Similar to the star macro here: https://github.com/dbt-labs/dbt-utils/blob/main/macros/sql/star.sql + + This star_intersect macro takes an additional 'intersect' relation as argument. + Its behavior is to select columns from both 'intersect' and 'from' relations with the following rules: + - if the columns are existing in both 'from' and the 'intersect' relations, then the column from 'intersect' is used + - if it's not in the both relation, then only the column in the 'from' relation is used +#} +{% macro star_intersect(from, intersect, from_alias=False, intersect_alias=False, except=[]) -%} + {%- do dbt_utils._is_relation(from, 'star_intersect') -%} + {%- do dbt_utils._is_ephemeral(from, 'star_intersect') -%} + {%- do dbt_utils._is_relation(intersect, 'star_intersect') -%} + {%- do dbt_utils._is_ephemeral(intersect, 'star_intersect') -%} + + {#-- Prevent querying of db in parsing mode. This works because this macro does not create any new refs. #} + {%- if not execute -%} + {{ return('') }} + {% endif %} + + {%- set include_cols = [] %} + {%- set cols = adapter.get_columns_in_relation(from) -%} + {%- set except = except | map("lower") | list %} + {%- for col in cols -%} + {%- if col.column|lower not in except -%} + {% do include_cols.append(col.column) %} + {%- endif %} + {%- endfor %} + + {%- set include_intersect_cols = [] %} + {%- set intersect_cols = adapter.get_columns_in_relation(intersect) -%} + {%- for col in intersect_cols -%} + {%- if col.column|lower not in except -%} + {% do include_intersect_cols.append(col.column) %} + {%- endif %} + {%- endfor %} + + {%- for col in include_cols %} + {%- if col in include_intersect_cols -%} + {%- if intersect_alias %}{{ intersect_alias }}.{% else %}{%- endif -%}{{ adapter.quote(col)|trim }} + {%- if not loop.last %},{{ '\n ' }}{% endif %} + {%- else %} + {%- if from_alias %}{{ from_alias }}.{% else %}{{ from }}.{%- endif -%}{{ adapter.quote(col)|trim }} as {{ adapter.quote(col)|trim }} + {%- if not loop.last %},{{ '\n ' }}{% endif %} + {%- endif %} + {%- endfor -%} +{%- endmacro %} diff --git a/airbyte-integrations/bases/base-normalization/dbt-project-template/packages.yml b/airbyte-integrations/bases/base-normalization/dbt-project-template/packages.yml new file mode 100755 index 0000000000000..33b4edd58c8c6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt-project-template/packages.yml @@ -0,0 +1,5 @@ +# add dependencies. these will get pulled during the `dbt deps` process. + +packages: + - git: "https://github.com/fishtown-analytics/dbt-utils.git" + revision: 0.8.2 diff --git a/airbyte-integrations/bases/base-normalization/dbt.Dockerfile b/airbyte-integrations/bases/base-normalization/dbt.Dockerfile new file mode 100644 index 0000000000000..09b0e3c94064a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/dbt.Dockerfile @@ -0,0 +1,3 @@ +# This dockerfile only exists to pull and re-export this image converted to the local arch of this machine +# It is then consumed by the Dockerfile in this direcotry as "fishtownanalytics/dbt:1.0.0-dev" +FROM fishtownanalytics/dbt:1.0.0 \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/docker-compose.build.yaml b/airbyte-integrations/bases/base-normalization/docker-compose.build.yaml new file mode 100644 index 0000000000000..c9b9331f3e295 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/docker-compose.build.yaml @@ -0,0 +1,66 @@ +version: "3.7" + +services: + normalization: + image: airbyte/normalization:${VERSION} + build: + dockerfile: Dockerfile + context: . + labels: + io.airbyte.git-revision: ${GIT_REVISION} + normalization-mssql: + image: airbyte/normalization-mssql:${VERSION} + build: + dockerfile: mssql.Dockerfile + context: . + labels: + io.airbyte.git-revision: ${GIT_REVISION} + normalization-mysql: + image: airbyte/normalization-mysql:${VERSION} + build: + dockerfile: mysql.Dockerfile + context: . + labels: + io.airbyte.git-revision: ${GIT_REVISION} + normalization-oracle: + image: airbyte/normalization-oracle:${VERSION} + build: + dockerfile: oracle.Dockerfile + context: . + labels: + io.airbyte.git-revision: ${GIT_REVISION} + normalization-clickhouse: + image: airbyte/normalization-clickhouse:${VERSION} + build: + dockerfile: clickhouse.Dockerfile + context: . + labels: + io.airbyte.git-revision: ${GIT_REVISION} + normalization-snowflake: + image: airbyte/normalization-snowflake:${VERSION} + build: + dockerfile: snowflake.Dockerfile + context: . + labels: + io.airbyte.git-revision: ${GIT_REVISION} + normalization-redshift: + image: airbyte/normalization-redshift:${VERSION} + build: + dockerfile: redshift.Dockerfile + context: . + labels: + io.airbyte.git-revision: ${GIT_REVISION} + normalization-tidb: + image: airbyte/normalization-tidb:${VERSION} + build: + dockerfile: tidb.Dockerfile + context: . + labels: + io.airbyte.git-revision: ${GIT_REVISION} + normalization-duckdb: + image: airbyte/normalization-duckdb:${VERSION} + build: + dockerfile: duckdb.Dockerfile + context: . + labels: + io.airbyte.git-revision: ${GIT_REVISION} diff --git a/airbyte-integrations/bases/base-normalization/docker-compose.yaml b/airbyte-integrations/bases/base-normalization/docker-compose.yaml new file mode 100644 index 0000000000000..3b85f9bf0e9ec --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/docker-compose.yaml @@ -0,0 +1,22 @@ +version: "3.7" + +# this file only exists so that we can easily check that all of these images exist in docker hub in check_images_exist.sh +services: + normalization: + image: airbyte/normalization:${VERSION} + normalization-mssql: + image: airbyte/normalization-mssql:${VERSION} + normalization-mysql: + image: airbyte/normalization-mysql:${VERSION} + normalization-oracle: + image: airbyte/normalization-oracle:${VERSION} + normalization-clickhouse: + image: airbyte/normalization-clickhouse:${VERSION} + normalization-snowflake: + image: airbyte/normalization-snowflake:${VERSION} + normalization-redshift: + image: airbyte/normalization-redshift:${VERSION} + normalization-tidb: + image: airbyte/normalization-tidb:${VERSION} + normalization-duckdb: + image: airbyte/normalization-duckdb:${VERSION} diff --git a/airbyte-integrations/bases/base-normalization/duckdb.Dockerfile b/airbyte-integrations/bases/base-normalization/duckdb.Dockerfile new file mode 100644 index 0000000000000..af039e7114ecd --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/duckdb.Dockerfile @@ -0,0 +1,40 @@ +FROM fishtownanalytics/dbt:1.0.0 +COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte + +# Install SSH Tunneling dependencies +RUN apt-get update && apt-get install -y jq sshpass + +WORKDIR /airbyte +COPY entrypoint.sh . +COPY build/sshtunneling.sh . + +WORKDIR /airbyte/normalization_code +COPY normalization ./normalization +COPY setup.py . +COPY dbt-project-template/ ./dbt-template/ + +# Install python dependencies +WORKDIR /airbyte/base_python_structs + +# workaround for https://github.com/yaml/pyyaml/issues/601 +# this should be fixed in the airbyte/base-airbyte-protocol-python image +RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation + +RUN pip install . + +WORKDIR /airbyte/normalization_code +RUN pip install . +RUN pip install dbt-duckdb==1.0.1 + +#adding duckdb manually (outside of setup.py - lots of errors) +RUN pip install duckdb + +WORKDIR /airbyte/normalization_code/dbt-template/ +# Download external dbt dependencies +RUN dbt deps + +WORKDIR /airbyte +ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" +ENTRYPOINT ["/airbyte/entrypoint.sh"] + +LABEL io.airbyte.name=airbyte/normalization-duckdb diff --git a/airbyte-integrations/bases/base-normalization/entrypoint.sh b/airbyte-integrations/bases/base-normalization/entrypoint.sh new file mode 100755 index 0000000000000..a1df178483c27 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/entrypoint.sh @@ -0,0 +1,160 @@ +#!/usr/bin/env bash + +set -e # tells bash, in a script, to exit whenever anything returns a non-zero return value. + +function echo2() { + echo >&2 "$@" +} + +function error() { + echo2 "$@" + exit 1 +} + +function config_cleanup() { + # Remove config file as it might still contain sensitive credentials (for example, + # injected OAuth Parameters should not be visible to custom docker images running custom transformation operations) + rm -f "${CONFIG_FILE}" +} + +function check_dbt_event_buffer_size() { + ret=0 + dbt --help | grep -E -- '--event-buffer-size' && return + ret=1 +} + +PROJECT_DIR=$(pwd) + +# How many commits should be downloaded from git to view history of a branch +GIT_HISTORY_DEPTH=5 + +# This function produces a working DBT project folder at the $PROJECT_DIR path so that dbt commands can be run +# from it successfully with the proper credentials. This can be accomplished by providing different custom variables +# to tweak the final project structure. For example, we can either use a user-provided base folder (git repo) or +# use the standard/base template folder to generate normalization models from. +function configuredbt() { + # We first need to generate a workspace folder for a dbt project to run from: + if [[ -z "${GIT_REPO}" ]]; then + # No git repository provided, use the dbt-template folder (shipped inside normalization docker image) + # as the base folder for dbt workspace + cp -r /airbyte/normalization_code/dbt-template/* "${PROJECT_DIR}" + echo "Running: transform-config --config ${CONFIG_FILE} --integration-type ${INTEGRATION_TYPE} --out ${PROJECT_DIR}" + set +e # allow script to continue running even if next commands fail to run properly + # Generate a profiles.yml file for the selected destination/integration type + transform-config --config "${CONFIG_FILE}" --integration-type "${INTEGRATION_TYPE}" --out "${PROJECT_DIR}" + if [[ -n "${CATALOG_FILE}" ]]; then + # If catalog file is provided, generate normalization models, otherwise skip it + echo "Running: transform-catalog --integration-type ${INTEGRATION_TYPE} --profile-config-dir ${PROJECT_DIR} --catalog ${CATALOG_FILE} --out ${PROJECT_DIR}/models/generated/ --json-column _airbyte_data" + transform-catalog --integration-type "${INTEGRATION_TYPE}" --profile-config-dir "${PROJECT_DIR}" --catalog "${CATALOG_FILE}" --out "${PROJECT_DIR}/models/generated/" --json-column "_airbyte_data" + TRANSFORM_EXIT_CODE=$? + if [ ${TRANSFORM_EXIT_CODE} -ne 0 ]; then + echo -e "\nShowing destination_catalog.json to diagnose/debug errors (${TRANSFORM_EXIT_CODE}):\n" + cat "${CATALOG_FILE}" | jq + exit ${TRANSFORM_EXIT_CODE} + fi + fi + set -e # tells bash, in a script, to exit whenever anything returns a non-zero return value. + else + trap config_cleanup EXIT + # Use git repository as a base workspace folder for dbt projects + if [[ -d git_repo ]]; then + rm -rf git_repo + fi + # Make a shallow clone of the latest git repository in the workspace folder + if [[ -z "${GIT_BRANCH}" ]]; then + # Checkout a particular branch from the git repository + echo "Running: git clone --depth ${GIT_HISTORY_DEPTH} --single-branch \$GIT_REPO git_repo" + git clone --depth ${GIT_HISTORY_DEPTH} --single-branch "${GIT_REPO}" git_repo + else + # No git branch specified, use the default branch of the git repository + echo "Running: git clone --depth ${GIT_HISTORY_DEPTH} -b ${GIT_BRANCH} --single-branch \$GIT_REPO git_repo" + git clone --depth ${GIT_HISTORY_DEPTH} -b "${GIT_BRANCH}" --single-branch "${GIT_REPO}" git_repo + fi + # Print few history logs to make it easier for users to verify the right code version has been checked out from git + echo "Last 5 commits in git_repo:" + (cd git_repo; git log --oneline -${GIT_HISTORY_DEPTH}; cd -) + # Generate a profiles.yml file for the selected destination/integration type + echo "Running: transform-config --config ${CONFIG_FILE} --integration-type ${INTEGRATION_TYPE} --out ${PROJECT_DIR}" + transform-config --config "${CONFIG_FILE}" --integration-type "${INTEGRATION_TYPE}" --out "${PROJECT_DIR}" + config_cleanup + fi +} + +## todo: make it easy to select source or destination and validate based on selection by adding an integration type env variable. +function main() { + CMD="$1" + shift 1 || error "command not specified." + + while [ $# -ne 0 ]; do + case "$1" in + --config) + CONFIG_FILE="$2" + shift 2 + ;; + --catalog) + CATALOG_FILE="$2" + shift 2 + ;; + --integration-type) + INTEGRATION_TYPE="$2" + shift 2 + ;; + --git-repo) + GIT_REPO="$2" + shift 2 + ;; + --git-branch) + GIT_BRANCH="$2" + shift 2 + ;; + *) + error "Unknown option: $1" + ;; + esac + done + + case "$CMD" in + run) + configuredbt + . /airbyte/sshtunneling.sh + openssh "${PROJECT_DIR}/ssh.json" + trap 'closessh' EXIT + + set +e # allow script to continue running even if next commands fail to run properly + # We don't run dbt 1.0.x on all destinations (because their plugins don't support it yet) + # So we need to only pass `--event-buffer-size` if it's supported by DBT. + # Same goes for JSON formatted logging. + check_dbt_event_buffer_size + if [ "$ret" -eq 0 ]; then + echo -e "\nDBT >=1.0.0 detected; using 10K event buffer size\n" + dbt_additional_args="--event-buffer-size=10000 --log-format json" + else + dbt_additional_args="" + fi + + # Run dbt to compile and execute the generated normalization models + dbt ${dbt_additional_args} run --profiles-dir "${PROJECT_DIR}" --project-dir "${PROJECT_DIR}" + DBT_EXIT_CODE=$? + if [ ${DBT_EXIT_CODE} -ne 0 ]; then + echo -e "\nDiagnosing dbt debug to check if destination is available for dbt and well configured (${DBT_EXIT_CODE}):\n" + dbt debug --profiles-dir "${PROJECT_DIR}" --project-dir "${PROJECT_DIR}" + DBT_DEBUG_EXIT_CODE=$? + if [ ${DBT_DEBUG_EXIT_CODE} -eq 0 ]; then + # dbt debug is successful, so the error must be somewhere else... + echo -e "\nForward dbt output logs to diagnose/debug errors (${DBT_DEBUG_EXIT_CODE}):\n" + cat "${PROJECT_DIR}/../logs/dbt.log" + fi + fi + closessh + exit ${DBT_EXIT_CODE} + ;; + configure-dbt) + configuredbt + ;; + *) + error "Unknown command: $CMD" + ;; + esac +} + +main "$@" diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/__init__.py b/airbyte-integrations/bases/base-normalization/integration_tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py b/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py new file mode 100644 index 0000000000000..b70b9248eac19 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/dbt_integration_test.py @@ -0,0 +1,740 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import json +import os +import pathlib +import random +import re +import socket +import string +import subprocess +import sys +import threading +import time +from copy import copy +from typing import Any, Callable, Dict, List, Union + +import yaml +from normalization.destination_type import DestinationType +from normalization.transform_catalog.transform import read_yaml_config, write_yaml_config +from normalization.transform_config.transform import TransformConfig + +NORMALIZATION_TEST_TARGET = "NORMALIZATION_TEST_TARGET" +NORMALIZATION_TEST_MSSQL_DB_PORT = "NORMALIZATION_TEST_MSSQL_DB_PORT" +NORMALIZATION_TEST_MYSQL_DB_PORT = "NORMALIZATION_TEST_MYSQL_DB_PORT" +NORMALIZATION_TEST_POSTGRES_DB_PORT = "NORMALIZATION_TEST_POSTGRES_DB_PORT" +NORMALIZATION_TEST_CLICKHOUSE_DB_PORT = "NORMALIZATION_TEST_CLICKHOUSE_DB_PORT" +NORMALIZATION_TEST_TIDB_DB_PORT = "NORMALIZATION_TEST_TIDB_DB_PORT" +NORMALIZATION_TEST_DUCKDB_DESTINATION_PATH = "NORMALIZATION_TEST_DUCKDB_DESTINATION_PATH" + + +class DbtIntegrationTest(object): + def __init__(self): + self.target_schema = "test_normalization" + self.container_prefix = f"test_normalization_db_{self.random_string(3)}" + self.db_names = [] + + @staticmethod + def generate_random_string(prefix: str) -> str: + return prefix + DbtIntegrationTest.random_string(5) + + @staticmethod + def random_string(length: int) -> str: + return "".join(random.choice(string.ascii_lowercase) for i in range(length)) + + def set_target_schema(self, target_schema: str): + self.target_schema = target_schema + + def setup_db(self, destinations_to_test: List[str]): + if DestinationType.POSTGRES.value in destinations_to_test: + self.setup_postgres_db() + if DestinationType.MYSQL.value in destinations_to_test: + self.setup_mysql_db() + if DestinationType.MSSQL.value in destinations_to_test: + self.setup_mssql_db() + if DestinationType.CLICKHOUSE.value in destinations_to_test: + self.setup_clickhouse_db() + if DestinationType.TIDB.value in destinations_to_test: + self.setup_tidb_db() + + def setup_postgres_db(self): + start_db = True + if os.getenv(NORMALIZATION_TEST_POSTGRES_DB_PORT): + port = int(os.getenv(NORMALIZATION_TEST_POSTGRES_DB_PORT)) + start_db = False + else: + port = self.find_free_port() + config = { + "host": "localhost", + "username": "integration-tests", + "password": "integration-tests", + "port": port, + "database": "postgres", + "schema": self.target_schema, + } + if start_db: + self.db_names.append("postgres") + print("Starting localhost postgres container for tests") + commands = [ + "docker", + "run", + "--rm", + "--name", + f"{self.container_prefix}_postgres", + "-e", + f"POSTGRES_USER={config['username']}", + "-e", + f"POSTGRES_PASSWORD={config['password']}", + "-p", + f"{config['port']}:5432", + "-d", + "marcosmarxm/postgres-ssl:dev", + "-c", + "ssl=on", + "-c", + "ssl_cert_file=/var/lib/postgresql/server.crt", + "-c", + "ssl_key_file=/var/lib/postgresql/server.key", + ] + print("Executing: ", " ".join(commands)) + subprocess.call(commands) + print("....Waiting for Postgres DB to start...15 sec") + time.sleep(15) + if not os.path.exists("../secrets"): + os.makedirs("../secrets") + with open("../secrets/postgres.json", "w") as fh: + fh.write(json.dumps(config)) + + def setup_mysql_db(self): + start_db = True + if os.getenv(NORMALIZATION_TEST_MYSQL_DB_PORT): + port = int(os.getenv(NORMALIZATION_TEST_MYSQL_DB_PORT)) + start_db = False + else: + port = self.find_free_port() + config = { + "host": "localhost", + "port": port, + "database": self.target_schema, + "username": "root", + "password": "", + } + if start_db: + self.db_names.append("mysql") + print("Starting localhost mysql container for tests") + commands = [ + "docker", + "run", + "--rm", + "--name", + f"{self.container_prefix}_mysql", + "-e", + "MYSQL_ALLOW_EMPTY_PASSWORD=yes", + "-e", + "MYSQL_INITDB_SKIP_TZINFO=yes", + "-e", + f"MYSQL_DATABASE={config['database']}", + "-e", + "MYSQL_ROOT_HOST=%", + "-p", + f"{config['port']}:3306", + "-d", + "mysql/mysql-server", + ] + print("Executing: ", " ".join(commands)) + subprocess.call(commands) + print("....Waiting for MySQL DB to start...15 sec") + time.sleep(15) + if not os.path.exists("../secrets"): + os.makedirs("../secrets") + with open("../secrets/mysql.json", "w") as fh: + fh.write(json.dumps(config)) + + def setup_mssql_db(self): + start_db = True + if os.getenv(NORMALIZATION_TEST_MSSQL_DB_PORT): + port = int(os.getenv(NORMALIZATION_TEST_MSSQL_DB_PORT)) + start_db = False + else: + port = self.find_free_port() + config = { + "host": "localhost", + "username": "SA", + "password": "MyStr0ngP@ssw0rd", + "port": port, + "database": self.target_schema, + "schema": self.target_schema, + } + if start_db: + self.db_names.append("mssql") + print("Starting localhost MS SQL Server container for tests") + command_start_container = [ + "docker", + "run", + "--rm", + "--name", + f"{self.container_prefix}_mssql", + "-h", + f"{self.container_prefix}_mssql", + "-e", + "ACCEPT_EULA='Y'", + "-e", + f"SA_PASSWORD='{config['password']}'", + "-e", + "MSSQL_PID='Standard'", + "-p", + f"{config['port']}:1433", + "-d", + "mcr.microsoft.com/mssql/server:2019-GA-ubuntu-16.04", + ] + # cmds & parameters + cmd_start_container = " ".join(command_start_container) + wait_sec = 30 + # run the docker container + print("Executing: ", cmd_start_container) + subprocess.check_call(cmd_start_container, shell=True) + # wait for service is available + print(f"....Waiting for MS SQL Server to start...{wait_sec} sec") + time.sleep(wait_sec) + # Run additional commands to prepare the table + command_create_db = [ + "docker", + "exec", + f"{self.container_prefix}_mssql", + "/opt/mssql-tools/bin/sqlcmd", + "-S", + config["host"], + "-U", + config["username"], + "-P", + config["password"], + "-Q", + f"CREATE DATABASE [{config['database']}]", + ] + # create test db + print("Executing: ", " ".join(command_create_db)) + subprocess.call(command_create_db) + if not os.path.exists("../secrets"): + os.makedirs("../secrets") + with open("../secrets/mssql.json", "w") as fh: + fh.write(json.dumps(config)) + + def setup_clickhouse_db(self): + """ + ClickHouse official JDBC driver uses HTTP port 8123. + + Ref: https://altinity.com/blog/2019/3/15/clickhouse-networking-part-1 + """ + start_db = True + port = 8123 + if os.getenv(NORMALIZATION_TEST_CLICKHOUSE_DB_PORT): + port = int(os.getenv(NORMALIZATION_TEST_CLICKHOUSE_DB_PORT)) + start_db = False + if start_db: + port = self.find_free_port() + config = { + "host": "localhost", + "port": port, + "database": self.target_schema, + "username": "default", + "password": "", + "ssl": False, + } + if start_db: + self.db_names.append("clickhouse") + print("Starting localhost clickhouse container for tests") + commands = [ + "docker", + "run", + "--rm", + "--name", + f"{self.container_prefix}_clickhouse", + "--ulimit", + "nofile=262144:262144", + "-p", + f"{config['port']}:8123", # clickhouse JDBC driver use HTTP port + "-d", + # so far, only the latest version ClickHouse server image turned on + # window functions + "clickhouse/clickhouse-server:latest", + ] + print("Executing: ", " ".join(commands)) + subprocess.call(commands) + print("....Waiting for ClickHouse DB to start...15 sec") + time.sleep(15) + # Run additional commands to prepare the table + command_create_db = [ + "docker", + "run", + "--rm", + "--link", + f"{self.container_prefix}_clickhouse:clickhouse-server", + "clickhouse/clickhouse-client:21.8.10.19", + "--host", + "clickhouse-server", + "--query", + f"CREATE DATABASE IF NOT EXISTS {config['database']}", + ] + # create test db + print("Executing: ", " ".join(command_create_db)) + subprocess.call(command_create_db) + if not os.path.exists("../secrets"): + os.makedirs("../secrets") + with open("../secrets/clickhouse.json", "w") as fh: + fh.write(json.dumps(config)) + + def setup_tidb_db(self): + start_db = True + if os.getenv(NORMALIZATION_TEST_TIDB_DB_PORT): + port = int(os.getenv(NORMALIZATION_TEST_TIDB_DB_PORT)) + start_db = False + else: + port = self.find_free_port() + config = { + "host": "127.0.0.1", + "port": port, + "database": self.target_schema, + "schema": self.target_schema, + "username": "root", + "password": "", + "ssl": False, + } + if start_db: + self.db_names.append("tidb") + print("Starting tidb container for tests") + commands = [ + "docker", + "run", + "--rm", + "--name", + f"{self.container_prefix}_tidb", + "-p", + f"{config['port']}:4000", + "-d", + "pingcap/tidb:v5.4.0", + ] + print("Executing: ", " ".join(commands)) + subprocess.call(commands) + print("....Waiting for TiDB to start...15 sec") + time.sleep(15) + command_create_db = [ + "docker", + "run", + "--rm", + "--link", + f"{self.container_prefix}_tidb:tidb", + "arey/mysql-client", + "--host=tidb", + "--user=root", + "--port=4000", + f"--execute=CREATE DATABASE IF NOT EXISTS {self.target_schema}", + ] + print("Executing: ", " ".join(command_create_db)) + subprocess.call(command_create_db) + if not os.path.exists("../secrets"): + os.makedirs("../secrets") + with open("../secrets/tidb.json", "w") as fh: + fh.write(json.dumps(config)) + + @staticmethod + def find_free_port(): + """ + Find an unused port to create a database listening on localhost to run destination-postgres + """ + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.bind(("", 0)) + addr = s.getsockname() + s.close() + return addr[1] + + def tear_down_db(self): + for db_name in self.db_names: + print(f"Stopping localhost {db_name} container for tests") + try: + subprocess.call(["docker", "kill", f"{self.container_prefix}_{db_name}"]) + except Exception as e: + print(f"WARN: Exception while shutting down {db_name}: {e}") + + @staticmethod + def change_current_test_dir(request): + # This makes the test run whether it is executed from the tests folder (with pytest/gradle) + # or from the base-normalization folder (through pycharm) + integration_tests_dir = os.path.join(request.fspath.dirname, "integration_tests") + if os.path.exists(integration_tests_dir): + os.chdir(integration_tests_dir) + else: + os.chdir(request.fspath.dirname) + + def generate_profile_yaml_file( + self, destination_type: DestinationType, test_root_dir: str, random_schema: bool = False + ) -> Dict[str, Any]: + """ + Each destination requires different settings to connect to. This step generates the adequate profiles.yml + as described here: https://docs.getdbt.com/reference/profiles.yml + """ + config_generator = TransformConfig() + profiles_config = config_generator.read_json_config(f"../secrets/{destination_type.value.lower()}.json") + # Adapt credential file to look like destination config.json + if destination_type.value == DestinationType.BIGQUERY.value: + credentials = profiles_config["basic_bigquery_config"] + profiles_config = { + "credentials_json": json.dumps(credentials), + "dataset_id": self.target_schema, + "project_id": credentials["project_id"], + "dataset_location": "US", + } + elif destination_type.value == DestinationType.MYSQL.value: + profiles_config["database"] = self.target_schema + elif destination_type.value == DestinationType.REDSHIFT.value: + profiles_config["schema"] = self.target_schema + if random_schema: + profiles_config["schema"] = self.target_schema + "_" + "".join(random.choices(string.ascii_lowercase, k=5)) + else: + profiles_config["schema"] = self.target_schema + if destination_type.value == DestinationType.CLICKHOUSE.value: + clickhouse_config = copy(profiles_config) + profiles_yaml = config_generator.transform(destination_type, clickhouse_config) + else: + profiles_yaml = config_generator.transform(destination_type, profiles_config) + config_generator.write_yaml_config(test_root_dir, profiles_yaml, "profiles.yml") + return profiles_config + + @staticmethod + def run_destination_process(message_file: str, test_root_dir: str, commands: List[str]): + print("Executing: ", " ".join(commands)) + with open(os.path.join(test_root_dir, "destination_output.log"), "ab") as f: + process = subprocess.Popen(commands, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + + def writer(): + if os.path.exists(message_file): + with open(message_file, "rb") as input_data: + while True: + line = input_data.readline() + if not line: + break + if not line.startswith(b"//"): + process.stdin.write(line) + process.stdin.close() + + thread = threading.Thread(target=writer) + thread.start() + for line in iter(process.stdout.readline, b""): + f.write(line) + sys.stdout.write(line.decode("utf-8")) + thread.join() + process.wait() + return process.returncode == 0 + + @staticmethod + def get_normalization_image(destination_type: DestinationType) -> str: + if DestinationType.MSSQL.value == destination_type.value: + return "airbyte/normalization-mssql:dev" + elif DestinationType.MYSQL.value == destination_type.value: + return "airbyte/normalization-mysql:dev" + elif DestinationType.ORACLE.value == destination_type.value: + return "airbyte/normalization-oracle:dev" + elif DestinationType.CLICKHOUSE.value == destination_type.value: + return "airbyte/normalization-clickhouse:dev" + elif DestinationType.SNOWFLAKE.value == destination_type.value: + return "airbyte/normalization-snowflake:dev" + elif DestinationType.REDSHIFT.value == destination_type.value: + return "airbyte/normalization-redshift:dev" + elif DestinationType.TIDB.value == destination_type.value: + return "airbyte/normalization-tidb:dev" + else: + return "airbyte/normalization:dev" + + def dbt_check(self, destination_type: DestinationType, test_root_dir: str): + """ + Run the dbt CLI to perform transformations on the test raw data in the destination + """ + normalization_image: str = self.get_normalization_image(destination_type) + # Perform sanity check on dbt project settings + assert self.run_check_dbt_command(normalization_image, "debug", test_root_dir) + assert self.run_check_dbt_command(normalization_image, "deps", test_root_dir) + + def dbt_run(self, destination_type: DestinationType, test_root_dir: str, force_full_refresh: bool = False): + """ + Run the dbt CLI to perform transformations on the test raw data in the destination + """ + normalization_image: str = self.get_normalization_image(destination_type) + # Compile dbt models files into destination sql dialect, then run the transformation queries + assert self.run_check_dbt_command(normalization_image, "run", test_root_dir, force_full_refresh) + + def dbt_run_macro(self, destination_type: DestinationType, test_root_dir: str, macro: str, macro_args: str = None): + """ + Run the dbt CLI to perform transformations on the test raw data in the destination, using independent macro. + """ + normalization_image: str = self.get_normalization_image(destination_type) + # Compile dbt models files into destination sql dialect, then run the transformation queries + assert self.run_dbt_run_operation(normalization_image, test_root_dir, macro, macro_args) + + def run_check_dbt_command(self, normalization_image: str, command: str, cwd: str, force_full_refresh: bool = False) -> bool: + """ + Run dbt subprocess while checking and counting for "ERROR", "FAIL" or "WARNING" printed in its outputs + """ + if any([normalization_image.startswith(x) for x in ["airbyte/normalization-oracle", "airbyte/normalization-clickhouse"]]): + dbtAdditionalArgs = [] + else: + dbtAdditionalArgs = ["--event-buffer-size=10000"] + + commands = ( + [ + "docker", + "run", + "--rm", + "--init", + "-v", + f"{cwd}:/workspace", + "-v", + f"{cwd}/build:/build", + "-v", + f"{cwd}/logs:/logs", + "-v", + f"{cwd}/build/dbt_packages:/dbt", + "--network", + "host", + "--entrypoint", + "/usr/local/bin/dbt", + "-i", + normalization_image, + ] + + dbtAdditionalArgs + + [ + command, + "--profiles-dir=/workspace", + "--project-dir=/workspace", + ] + ) + if force_full_refresh: + commands.append("--full-refresh") + command = f"{command} --full-refresh" + print("Executing: ", " ".join(commands)) + print(f"Equivalent to: dbt {command} --profiles-dir={cwd} --project-dir={cwd}") + return self.run_check_dbt_subprocess(commands, cwd) + + def run_dbt_run_operation(self, normalization_image: str, cwd: str, macro: str, macro_args: str = None) -> bool: + """ + Run dbt subprocess while checking and counting for "ERROR", "FAIL" or "WARNING" printed in its outputs + """ + args = ["--args", macro_args] if macro_args else [] + commands = ( + [ + "docker", + "run", + "--rm", + "--init", + "-v", + f"{cwd}:/workspace", + "-v", + f"{cwd}/build:/build", + "-v", + f"{cwd}/logs:/logs", + "-v", + f"{cwd}/build/dbt_packages:/dbt", + "--network", + "host", + "--entrypoint", + "/usr/local/bin/dbt", + "-i", + normalization_image, + ] + + ["run-operation", macro] + + args + + ["--profiles-dir=/workspace", "--project-dir=/workspace"] + ) + + print("Executing: ", " ".join(commands)) + print(f"Equivalent to: dbt run-operation {macro} --args {macro_args} --profiles-dir={cwd} --project-dir={cwd}") + return self.run_check_dbt_subprocess(commands, cwd) + + def run_check_dbt_subprocess(self, commands: list, cwd: str): + error_count = 0 + with open(os.path.join(cwd, "dbt_output.log"), "ab") as f: + process = subprocess.Popen(commands, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=os.environ) + for line in iter(lambda: process.stdout.readline(), b""): + f.write(line) + str_line = line.decode("utf-8") + sys.stdout.write(str_line) + # keywords to match lines as signaling errors + if "ERROR" in str_line or "FAIL" in str_line or "WARNING" in str_line: + # exception keywords in lines to ignore as errors (such as summary or expected warnings) + is_exception = False + for except_clause in [ + "Done.", # DBT Summary + "PASS=", # DBT Summary + "Nothing to do.", # When no schema/data tests are setup + "Configuration paths exist in your dbt_project.yml", # When no cte / view are generated + "Error loading config file: .dockercfg: $HOME is not defined", # ignore warning + "depends on a node named 'disabled_test' which was not found", # Tests throwing warning because it is disabled + "The requested image's platform (linux/amd64) does not match the detected host platform " + + "(linux/arm64/v8) and no specific platform was requested", # temporary patch until we publish images for arm64 + ]: + if except_clause in str_line: + is_exception = True + break + if not is_exception: + # count lines signaling an error/failure/warning + error_count += 1 + process.wait() + message = ( + f"{' '.join(commands)}\n\tterminated with return code {process.returncode} " + f"with {error_count} 'Error/Warning/Fail' mention(s)." + ) + print(message) + assert error_count == 0, message + assert process.returncode == 0, message + if error_count > 0: + return False + return process.returncode == 0 + + @staticmethod + def copy_replace(src, dst, pattern=None, replace_value=None): + """ + Copies a file from src to dst replacing pattern by replace_value + Parameters + ---------- + src : string + Path to the source filename to copy from + dst : string + Path to the output filename to copy to + pattern + list of Patterns to replace inside the src file + replace_value + list of Values to replace by in the dst file + """ + file1 = open(src, "r") if isinstance(src, str) else src + file2 = open(dst, "w") if isinstance(dst, str) else dst + pattern = [pattern] if isinstance(pattern, str) else pattern + replace_value = [replace_value] if isinstance(replace_value, str) else replace_value + if replace_value and pattern: + if len(replace_value) != len(pattern): + raise Exception("Invalid parameters: pattern and replace_value" " have different sizes.") + rules = [(re.compile(regex, re.IGNORECASE), value) for regex, value in zip(pattern, replace_value)] + else: + rules = [] + for line in file1: + if rules: + for rule in rules: + line = re.sub(rule[0], rule[1], line) + file2.write(line) + if isinstance(src, str): + file1.close() + if isinstance(dst, str): + file2.close() + + @staticmethod + def get_test_targets() -> List[str]: + """ + Returns a list of destinations to run tests on. + + if the environment variable NORMALIZATION_TEST_TARGET is set with a comma separated list of destination names, + then the tests are run only on that subsets of destinations + Otherwise tests are run against all destinations + """ + if os.getenv(NORMALIZATION_TEST_TARGET): + target_str = os.getenv(NORMALIZATION_TEST_TARGET) + return [d.value for d in {DestinationType.from_string(s.strip()) for s in target_str.split(",")}] + else: + return [d.value for d in DestinationType] + + @staticmethod + def update_yaml_file(filename: str, callback: Callable): + config = read_yaml_config(filename) + updated, config = callback(config) + if updated: + write_yaml_config(config, filename) + + def clean_tmp_tables( + self, + destination_type: Union[DestinationType, List[DestinationType]], + test_type: str, + tmp_folders: list = None, + git_versioned_tests: list = None, + ): + """ + Cleans-up all temporary schemas created during the test session. + It parses the provided tmp_folders: List[str] or uses `git_versioned_tests` to find sources.yml files generated for the tests. + It gets target schemas created by the tests and removes them using custom scenario specified in + `dbt-project-template/macros/clean_tmp_tables.sql` macro. + + REQUIREMENTS: + 1) Idealy, the schemas should have unique names like: test_normalization_ to avoid conflicts. + 2) The `clean_tmp_tables.sql` macro should have the specific macro for target destination to proceed. + + INPUT ARGUMENTS: + :: destination_type : either single destination or list of destinations + :: test_type: either "ephemeral" or "normalization" should be supplied. + :: tmp_folders: should be supplied if test_type = "ephemeral", to get schemas from /build/normalization_test_output folders + :: git_versioned_tests: should be supplied if test_type = "normalization", to get schemas from integration_tests/normalization_test_output folders + + EXAMPLE: + clean_up_args = { + "destination_type": [ DestinationType.REDSHIFT, DestinationType.POSTGRES, ... ] + "test_type": "normalization", + "git_versioned_tests": git_versioned_tests, + } + """ + + path_to_sources: str = "/models/generated/sources.yml" + test_folders: dict = {} + source_files: dict = {} + schemas_to_remove: dict = {} + + # collecting information about tmp_tables created for the test for each destination + for destination in destination_type: + test_folders[destination.value] = [] + source_files[destination.value] = [] + schemas_to_remove[destination.value] = [] + + # based on test_type select path to source files + if test_type == "ephemeral" or test_type == "test_reset_scd_overwrite": + if not tmp_folders: + raise TypeError("`tmp_folders` arg is not provided.") + for folder in tmp_folders: + if destination.value in folder: + test_folders[destination.value].append(folder) + source_files[destination.value].append(f"{folder}{path_to_sources}") + elif test_type == "normalization": + if not git_versioned_tests: + raise TypeError("`git_versioned_tests` arg is not provided.") + base_path = f"{pathlib.Path().absolute()}/integration_tests/normalization_test_output" + for test in git_versioned_tests: + test_root_dir: str = f"{base_path}/{destination.value}/{test}" + test_folders[destination.value].append(test_root_dir) + source_files[destination.value].append(f"{test_root_dir}{path_to_sources}") + else: + raise TypeError(f"\n`test_type`: {test_type} is not a registered, use `ephemeral` or `normalization` instead.\n") + + # parse source.yml files from test folders to get schemas and table names created for the tests + for file in source_files[destination.value]: + source_yml = {} + try: + with open(file, "r") as source_file: + source_yml = yaml.safe_load(source_file) + except FileNotFoundError: + print(f"\n{destination.value}: {file} doesn't exist, consider to remove any temp_tables and schemas manually!\n") + pass + test_sources: list = source_yml.get("sources", []) if source_yml else [] + + for source in test_sources: + target_schema: str = source.get("name") + if target_schema not in schemas_to_remove[destination.value]: + schemas_to_remove[destination.value].append(target_schema) + # adding _airbyte_* tmp schemas to be removed + schemas_to_remove[destination.value].append(f"_airbyte_{target_schema}") + + # cleaning up tmp_tables generated by the tests + for destination in destination_type: + if not schemas_to_remove[destination.value]: + print(f"\n\t{destination.value.upper()} DESTINATION: SKIP CLEANING, NOTHING TO REMOVE.\n") + else: + print(f"\n\t{destination.value.upper()} DESTINATION: CLEANING LEFTOVERS...\n") + print(f"\t{schemas_to_remove[destination.value]}\n") + test_root_folder = test_folders[destination.value][0] + args = json.dumps({"schemas": schemas_to_remove[destination.value]}) + self.dbt_check(destination, test_root_folder) + self.dbt_run_macro(destination, test_root_folder, "clean_tmp_tables", args) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/dbt_project.yml new file mode 100755 index 0000000000000..474ab801dbf43 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/dbt_project.yml @@ -0,0 +1,125 @@ +name: airbyte_utils +version: '1.0' +config-version: 2 +profile: normalize +model-paths: +- models +docs-paths: +- docs +analysis-paths: +- analysis +test-paths: +- tests +seed-paths: +- data +macro-paths: +- macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: +- build +- dbt_modules +quoting: + database: true + schema: false + identifier: true +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + +on_schema_change: sync_all_columns + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view +dispatch: +- macro_namespace: dbt_utils + search_order: + - airbyte_utils + - dbt_utils +vars: + json_column: _airbyte_data + models_to_source: + nested_stream_with_complex_columns_resulting_into_long_names_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_stg: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_scd: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + non_nested_stream_without_namespace_resulting_into_long_names_ab1: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + non_nested_stream_without_namespace_resulting_into_long_names_ab2: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + non_nested_stream_without_namespace_resulting_into_long_names_ab3: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + non_nested_stream_without_namespace_resulting_into_long_names: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + some_stream_that_was_empty_ab1: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_ab2: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_stg: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_scd: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty: test_normalization._airbyte_raw_some_stream_that_was_empty + simple_stream_with_namespace_resulting_into_long_names_ab1: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + simple_stream_with_namespace_resulting_into_long_names_ab2: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + simple_stream_with_namespace_resulting_into_long_names_ab3: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + simple_stream_with_namespace_resulting_into_long_names: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_scalar_ab1: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar_ab2: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar_ab3: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_array_ab1: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array_ab2: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array_ab3: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array: test_normalization._airbyte_raw_conflict_stream_array + unnest_alias_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias: test_normalization._airbyte_raw_unnest_alias + arrays_ab1: test_normalization._airbyte_raw_arrays + arrays_ab2: test_normalization._airbyte_raw_arrays + arrays_ab3: test_normalization._airbyte_raw_arrays + arrays: test_normalization._airbyte_raw_arrays + nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + conflict_stream_name_conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + unnest_alias_children_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children: test_normalization._airbyte_raw_unnest_alias + arrays_nested_array_parent_ab1: test_normalization._airbyte_raw_arrays + arrays_nested_array_parent_ab2: test_normalization._airbyte_raw_arrays + arrays_nested_array_parent_ab3: test_normalization._airbyte_raw_arrays + arrays_nested_array_parent: test_normalization._airbyte_raw_arrays + nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + conflict_stream_name_conflict_stream_name_conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + unnest_alias_children_owner_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_column___with__quotes_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_column___with__quotes_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_column___with__quotes_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_column___with__quotes: test_normalization._airbyte_raw_unnest_alias diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql new file mode 100644 index 0000000000000..59cf6d3a78044 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -0,0 +1,90 @@ + + + create or replace table `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd` + partition by range_bucket( + _airbyte_active_row, + generate_array(0, 1, 1) + ) + cluster by _airbyte_unique_key_scd, _airbyte_emitted_at + OPTIONS() + as ( + +-- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') +with + +input_data as ( + select * + from `dataline-integration-testing`._airbyte_test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_stg` + -- nested_stream_with_complex_columns_resulting_into_long_names from `dataline-integration-testing`.test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + to_hex(md5(cast(concat(coalesce(cast(id as + string +), '')) as + string +))) as _airbyte_unique_key, + id, + date, + `partition`, + date as _airbyte_start_at, + lag(date) over ( + partition by id + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by id + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + to_hex(md5(cast(concat(coalesce(cast(_airbyte_unique_key as + string +), ''), '-', coalesce(cast(_airbyte_start_at as + string +), ''), '-', coalesce(cast(_airbyte_emitted_at as + string +), '')) as + string +))) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + date, + `partition`, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at, + _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid +from dedup_data where _airbyte_row_num = 1 + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql new file mode 100644 index 0000000000000..39484347df2bd --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql @@ -0,0 +1,26 @@ + + + create or replace table `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names` + partition by timestamp_trunc(_airbyte_emitted_at, day) + cluster by _airbyte_unique_key, _airbyte_emitted_at + OPTIONS() + as ( + +-- Final base SQL model +-- depends_on: `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd` +select + _airbyte_unique_key, + id, + date, + `partition`, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at, + _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid +from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd` +-- nested_stream_with_complex_columns_resulting_into_long_names from `dataline-integration-testing`.test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names +where 1 = 1 +and _airbyte_active_row = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql new file mode 100644 index 0000000000000..bfd09f00f73a2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql @@ -0,0 +1,74 @@ + + + create or replace table `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition` + partition by timestamp_trunc(_airbyte_emitted_at, day) + cluster by _airbyte_emitted_at + OPTIONS() + as ( + +with __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd` +select + _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid, + json_extract_array(`partition`, "$['double_array_data']") as double_array_data, + json_extract_array(`partition`, "$['DATA']") as DATA, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at +from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd` as table_alias +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 +and `partition` is not null + +), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1 +select + _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid, + double_array_data, + DATA, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at +from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1 +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 + +), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2 +select + to_hex(md5(cast(concat(coalesce(cast(_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid as + string +), ''), '-', coalesce(cast(array_to_string(double_array_data, "|", "") as + string +), ''), '-', coalesce(cast(array_to_string(DATA, "|", "") as + string +), '')) as + string +))) as _airbyte_partition_hashid, + tmp.* +from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2 tmp +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 + +)-- Final base SQL model +-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3 +select + _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid, + double_array_data, + DATA, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at, + _airbyte_partition_hashid +from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3 +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd` +where 1 = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql new file mode 100644 index 0000000000000..e1d9a01e02e8e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql @@ -0,0 +1,73 @@ + + + create or replace table `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA` + partition by timestamp_trunc(_airbyte_emitted_at, day) + cluster by _airbyte_emitted_at + OPTIONS() + as ( + +with __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition` + +select + _airbyte_partition_hashid, + json_extract_scalar(DATA, "$['currency']") as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at +from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition` as table_alias +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +cross join unnest(DATA) as DATA +where 1 = 1 +and DATA is not null + +), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1 +select + _airbyte_partition_hashid, + cast(currency as + string +) as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at +from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1 +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +where 1 = 1 + +), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab2 +select + to_hex(md5(cast(concat(coalesce(cast(_airbyte_partition_hashid as + string +), ''), '-', coalesce(cast(currency as + string +), '')) as + string +))) as _airbyte_DATA_hashid, + tmp.* +from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab2 tmp +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +where 1 = 1 + +)-- Final base SQL model +-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3 +select + _airbyte_partition_hashid, + currency, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at, + _airbyte_DATA_hashid +from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3 +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition` +where 1 = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql new file mode 100644 index 0000000000000..f537df341d3a3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql @@ -0,0 +1,73 @@ + + + create or replace table `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data` + partition by timestamp_trunc(_airbyte_emitted_at, day) + cluster by _airbyte_emitted_at + OPTIONS() + as ( + +with __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition` + +select + _airbyte_partition_hashid, + json_extract_scalar(double_array_data, "$['id']") as id, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at +from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition` as table_alias +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +cross join unnest(double_array_data) as double_array_data +where 1 = 1 +and double_array_data is not null + +), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1 +select + _airbyte_partition_hashid, + cast(id as + string +) as id, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at +from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1 +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +where 1 = 1 + +), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2 +select + to_hex(md5(cast(concat(coalesce(cast(_airbyte_partition_hashid as + string +), ''), '-', coalesce(cast(id as + string +), '')) as + string +))) as _airbyte_double_array_data_hashid, + tmp.* +from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2 tmp +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +where 1 = 1 + +)-- Final base SQL model +-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3 +select + _airbyte_partition_hashid, + id, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at, + _airbyte_double_array_data_hashid +from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3 +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition` +where 1 = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql new file mode 100644 index 0000000000000..b988a169ef1f2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql @@ -0,0 +1,21 @@ +{{ config( + cluster_by = "_airbyte_emitted_at", + partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date, + {{ json_extract('table_alias', '_airbyte_data', ['partition'], ['partition']) }} as {{ adapter.quote('partition') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} as table_alias +-- nested_stream_with_complex_columns_resulting_into_long_names +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql new file mode 100644 index 0000000000000..3c6ed6e761a2b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql @@ -0,0 +1,21 @@ +{{ config( + cluster_by = "_airbyte_emitted_at", + partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_ab1') }} +select + cast(id as {{ dbt_utils.type_string() }}) as id, + cast(date as {{ dbt_utils.type_string() }}) as date, + cast({{ adapter.quote('partition') }} as {{ type_json() }}) as {{ adapter.quote('partition') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_ab1') }} +-- nested_stream_with_complex_columns_resulting_into_long_names +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1.sql new file mode 100644 index 0000000000000..3ada03a427fe1 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab1.sql @@ -0,0 +1,22 @@ +{{ config( + cluster_by = "_airbyte_emitted_at", + partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} +{{ unnest_cte(ref('nested_stream_with_complex_columns_resulting_into_long_names_partition'), 'partition', 'DATA') }} +select + _airbyte_partition_hashid, + {{ json_extract_scalar(unnested_column_value('DATA'), ['currency'], ['currency']) }} as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} as table_alias +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +{{ cross_join_unnest('partition', 'DATA') }} +where 1 = 1 +and DATA is not null +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql new file mode 100644 index 0000000000000..0734951e51265 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql @@ -0,0 +1,21 @@ +{{ config( + cluster_by = "_airbyte_emitted_at", + partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} +select + _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid, + {{ json_extract_array(adapter.quote('partition'), ['double_array_data'], ['double_array_data']) }} as double_array_data, + {{ json_extract_array(adapter.quote('partition'), ['DATA'], ['DATA']) }} as DATA, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} as table_alias +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 +and {{ adapter.quote('partition') }} is not null +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql new file mode 100644 index 0000000000000..912073c317273 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql @@ -0,0 +1,22 @@ +{{ config( + cluster_by = "_airbyte_emitted_at", + partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} +{{ unnest_cte(ref('nested_stream_with_complex_columns_resulting_into_long_names_partition'), 'partition', 'double_array_data') }} +select + _airbyte_partition_hashid, + {{ json_extract_scalar(unnested_column_value('double_array_data'), ['id'], ['id']) }} as id, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} as table_alias +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +{{ cross_join_unnest('partition', 'double_array_data') }} +where 1 = 1 +and double_array_data is not null +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql new file mode 100644 index 0000000000000..1df163184ca05 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -0,0 +1,164 @@ +{{ config( + cluster_by = ["_airbyte_unique_key_scd","_airbyte_emitted_at"], + partition_by = {"field": "_airbyte_active_row", "data_type": "int64", "range": {"start": 0, "end": 1, "interval": 1}}, + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='nested_stream_with_complex_columns_resulting_into_long_names' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view _airbyte_test_normalization.nested_stream_with_complex_columns_resulting_into_long_names_stg"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') }} + -- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') }} + -- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key, + id, + date, + {{ adapter.quote('partition') }}, + date as _airbyte_start_at, + lag(date) over ( + partition by id + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by id + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + date, + {{ adapter.quote('partition') }}, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql new file mode 100644 index 0000000000000..c0bd55eeb61d0 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql @@ -0,0 +1,24 @@ +{{ config( + cluster_by = ["_airbyte_unique_key","_airbyte_emitted_at"], + partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} +select + _airbyte_unique_key, + id, + date, + {{ adapter.quote('partition') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid +from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} +-- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql new file mode 100644 index 0000000000000..f8cd174b2a5b7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql @@ -0,0 +1,21 @@ +{{ config( + cluster_by = "_airbyte_emitted_at", + partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, + schema = "test_normalization", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3') }} +select + _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid, + double_array_data, + DATA, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_partition_hashid +from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3') }} +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql new file mode 100644 index 0000000000000..861e33d4859a1 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql @@ -0,0 +1,20 @@ +{{ config( + cluster_by = "_airbyte_emitted_at", + partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, + schema = "test_normalization", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3') }} +select + _airbyte_partition_hashid, + currency, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_DATA_hashid +from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA_ab3') }} +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql new file mode 100644 index 0000000000000..c6b980124a5a6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql @@ -0,0 +1,20 @@ +{{ config( + cluster_by = "_airbyte_emitted_at", + partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, + schema = "test_normalization", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3') }} +select + _airbyte_partition_hashid, + id, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_double_array_data_hashid +from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3') }} +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/sources.yml new file mode 100644 index 0000000000000..29bae1b4b5105 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/sources.yml @@ -0,0 +1,23 @@ +version: 2 +sources: +- name: test_normalization + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_arrays + - name: _airbyte_raw_conflict_stream_array + - name: _airbyte_raw_conflict_stream_name + - name: _airbyte_raw_conflict_stream_scalar + - name: _airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + - name: _airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + - name: _airbyte_raw_some_stream_that_was_empty + - name: _airbyte_raw_unnest_alias +- name: test_normalization_namespace + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_simple_stream_with_namespace_resulting_into_long_names diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql new file mode 100644 index 0000000000000..e2187e231d380 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -0,0 +1,27 @@ + + + + + + + + merge into `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd` as DBT_INTERNAL_DEST + using ( + select * from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_scd__dbt_tmp` + ) as DBT_INTERNAL_SOURCE + on + DBT_INTERNAL_SOURCE._airbyte_unique_key_scd = DBT_INTERNAL_DEST._airbyte_unique_key_scd + + + + when matched then update set + `_airbyte_unique_key` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key`,`_airbyte_unique_key_scd` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key_scd`,`id` = DBT_INTERNAL_SOURCE.`id`,`date` = DBT_INTERNAL_SOURCE.`date`,`partition` = DBT_INTERNAL_SOURCE.`partition`,`_airbyte_start_at` = DBT_INTERNAL_SOURCE.`_airbyte_start_at`,`_airbyte_end_at` = DBT_INTERNAL_SOURCE.`_airbyte_end_at`,`_airbyte_active_row` = DBT_INTERNAL_SOURCE.`_airbyte_active_row`,`_airbyte_ab_id` = DBT_INTERNAL_SOURCE.`_airbyte_ab_id`,`_airbyte_emitted_at` = DBT_INTERNAL_SOURCE.`_airbyte_emitted_at`,`_airbyte_normalized_at` = DBT_INTERNAL_SOURCE.`_airbyte_normalized_at`,`_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid` = DBT_INTERNAL_SOURCE.`_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid` + + + when not matched then insert + (`_airbyte_unique_key`, `_airbyte_unique_key_scd`, `id`, `date`, `partition`, `_airbyte_start_at`, `_airbyte_end_at`, `_airbyte_active_row`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`) + values + (`_airbyte_unique_key`, `_airbyte_unique_key_scd`, `id`, `date`, `partition`, `_airbyte_start_at`, `_airbyte_end_at`, `_airbyte_active_row`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`) + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql new file mode 100644 index 0000000000000..c2f7397d2c3bd --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql @@ -0,0 +1,27 @@ + + + + + + + + merge into `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names` as DBT_INTERNAL_DEST + using ( + select * from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names__dbt_tmp` + ) as DBT_INTERNAL_SOURCE + on + DBT_INTERNAL_SOURCE._airbyte_unique_key = DBT_INTERNAL_DEST._airbyte_unique_key + + + + when matched then update set + `_airbyte_unique_key` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key`,`id` = DBT_INTERNAL_SOURCE.`id`,`date` = DBT_INTERNAL_SOURCE.`date`,`partition` = DBT_INTERNAL_SOURCE.`partition`,`_airbyte_ab_id` = DBT_INTERNAL_SOURCE.`_airbyte_ab_id`,`_airbyte_emitted_at` = DBT_INTERNAL_SOURCE.`_airbyte_emitted_at`,`_airbyte_normalized_at` = DBT_INTERNAL_SOURCE.`_airbyte_normalized_at`,`_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid` = DBT_INTERNAL_SOURCE.`_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid` + + + when not matched then insert + (`_airbyte_unique_key`, `id`, `date`, `partition`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`) + values + (`_airbyte_unique_key`, `id`, `date`, `partition`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`) + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql new file mode 100644 index 0000000000000..2a9c82fbe4001 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql @@ -0,0 +1,21 @@ + + + + + + + merge into `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition` as DBT_INTERNAL_DEST + using ( + select * from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition__dbt_tmp` + ) as DBT_INTERNAL_SOURCE + on FALSE + + + + when not matched then insert + (`_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`, `double_array_data`, `DATA`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_partition_hashid`) + values + (`_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid`, `double_array_data`, `DATA`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_partition_hashid`) + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql new file mode 100644 index 0000000000000..da77d8e6172f0 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA.sql @@ -0,0 +1,21 @@ + + + + + + + merge into `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA` as DBT_INTERNAL_DEST + using ( + select * from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA__dbt_tmp` + ) as DBT_INTERNAL_SOURCE + on FALSE + + + + when not matched then insert + (`_airbyte_partition_hashid`, `currency`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_DATA_hashid`) + values + (`_airbyte_partition_hashid`, `currency`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_DATA_hashid`) + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql new file mode 100644 index 0000000000000..a1198af2586c1 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql @@ -0,0 +1,21 @@ + + + + + + + merge into `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data` as DBT_INTERNAL_DEST + using ( + select * from `dataline-integration-testing`.test_normalization.`nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data__dbt_tmp` + ) as DBT_INTERNAL_SOURCE + on FALSE + + + + when not matched then insert + (`_airbyte_partition_hashid`, `id`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_double_array_data_hashid`) + values + (`_airbyte_partition_hashid`, `id`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_double_array_data_hashid`) + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/dbt_project.yml new file mode 100755 index 0000000000000..013a446b320a5 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/dbt_project.yml @@ -0,0 +1,70 @@ +name: airbyte_utils +version: '1.0' +config-version: 2 +profile: normalize +model-paths: +- modified_models +docs-paths: +- docs +analysis-paths: +- analysis +test-paths: +- tests +seed-paths: +- data +macro-paths: +- macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: +- build +- dbt_modules +quoting: + database: true + schema: false + identifier: true +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + +on_schema_change: sync_all_columns + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view +dispatch: +- macro_namespace: dbt_utils + search_order: + - airbyte_utils + - dbt_utils +vars: + json_column: _airbyte_data + models_to_source: + exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate + exchange_rate: test_normalization._airbyte_raw_exchange_rate + dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate + renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_dbt_project.yml new file mode 100644 index 0000000000000..12745c37a1508 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_dbt_project.yml @@ -0,0 +1,90 @@ +name: airbyte_utils +version: '1.0' +config-version: 2 +profile: normalize +model-paths: +- models +docs-paths: +- docs +analysis-paths: +- analysis +test-paths: +- tests +seed-paths: +- data +macro-paths: +- macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: +- build +- dbt_modules +quoting: + database: true + schema: false + identifier: true +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + +on_schema_change: sync_all_columns + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view +dispatch: +- macro_namespace: dbt_utils + search_order: + - airbyte_utils + - dbt_utils +vars: + json_column: _airbyte_data + models_to_source: + exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate + exchange_rate: test_normalization._airbyte_raw_exchange_rate + dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate + renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded + pos_dedup_cdcx_ab1: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_ab2: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_stg: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_scd: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx: test_normalization._airbyte_raw_pos_dedup_cdcx + 1_prefix_startwith_number_ab1: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_ab2: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_stg: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_scd: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number: test_normalization._airbyte_raw_1_prefix_startwith_number + multiple_column_names_conflicts_ab1: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_ab2: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts + types_testing_ab1: test_normalization._airbyte_raw_types_testing + types_testing_ab2: test_normalization._airbyte_raw_types_testing + types_testing_stg: test_normalization._airbyte_raw_types_testing + types_testing_scd: test_normalization._airbyte_raw_types_testing + types_testing: test_normalization._airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..d7fd59df15b5d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,108 @@ + + + create or replace table `dataline-integration-testing`.test_normalization.`dedup_exchange_rate_scd` + partition by range_bucket( + _airbyte_active_row, + generate_array(0, 1, 1) + ) + cluster by _airbyte_unique_key_scd, _airbyte_emitted_at + OPTIONS() + as ( + +-- depends_on: ref('dedup_exchange_rate_stg') +with + +input_data as ( + select * + from `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_stg` + -- dedup_exchange_rate from `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + to_hex(md5(cast(concat(coalesce(cast(id as + string +), ''), '-', coalesce(cast(currency as + string +), ''), '-', coalesce(cast(NZD as + string +), '')) as + string +))) as _airbyte_unique_key, + id, + currency, + date, + timestamp_col, + HKD_special___characters, + HKD_special___characters_1, + NZD, + USD, + date as _airbyte_start_at, + lag(date) over ( + partition by id, currency, cast(NZD as + string +) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by id, currency, cast(NZD as + string +) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_exchange_rate_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + to_hex(md5(cast(concat(coalesce(cast(_airbyte_unique_key as + string +), ''), '-', coalesce(cast(_airbyte_start_at as + string +), ''), '-', coalesce(cast(_airbyte_emitted_at as + string +), '')) as + string +))) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + currency, + date, + timestamp_col, + HKD_special___characters, + HKD_special___characters_1, + NZD, + USD, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from dedup_data where _airbyte_row_num = 1 + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..d862d7ae1f082 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,31 @@ + + + create or replace table `dataline-integration-testing`.test_normalization.`dedup_exchange_rate` + partition by timestamp_trunc(_airbyte_emitted_at, day) + cluster by _airbyte_unique_key, _airbyte_emitted_at + OPTIONS() + as ( + +-- Final base SQL model +-- depends_on: `dataline-integration-testing`.test_normalization.`dedup_exchange_rate_scd` +select + _airbyte_unique_key, + id, + currency, + date, + timestamp_col, + HKD_special___characters, + HKD_special___characters_1, + NZD, + USD, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from `dataline-integration-testing`.test_normalization.`dedup_exchange_rate_scd` +-- dedup_exchange_rate from `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate +where 1 = 1 +and _airbyte_active_row = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..3d32bbb2838a9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,145 @@ + + + create or replace table `dataline-integration-testing`.test_normalization.`exchange_rate` + partition by timestamp_trunc(_airbyte_emitted_at, day) + cluster by _airbyte_emitted_at + OPTIONS() + as ( + +with __dbt__cte__exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate +select + json_extract_scalar(_airbyte_data, "$['id']") as id, + json_extract_scalar(_airbyte_data, "$['currency']") as currency, + json_extract_scalar(_airbyte_data, "$['date']") as date, + json_extract_scalar(_airbyte_data, "$['timestamp_col']") as timestamp_col, + json_extract_scalar(_airbyte_data, "$['HKD@spéçiäl & characters']") as HKD_special___characters, + json_extract_scalar(_airbyte_data, "$['HKD_special___characters']") as HKD_special___characters_1, + json_extract_scalar(_airbyte_data, "$['NZD']") as NZD, + json_extract_scalar(_airbyte_data, "$['USD']") as USD, + json_extract_scalar(_airbyte_data, "$['column___with__quotes']") as column___with__quotes, + json_extract_scalar(_airbyte_data, "$['datetime_tz']") as datetime_tz, + json_extract_scalar(_airbyte_data, "$['datetime_no_tz']") as datetime_no_tz, + json_extract_scalar(_airbyte_data, "$['time_tz']") as time_tz, + json_extract_scalar(_airbyte_data, "$['time_no_tz']") as time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at +from `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate as table_alias +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__exchange_rate_ab1 +select + cast(id as + int64 +) as id, + cast(currency as + string +) as currency, + cast(nullif(date, '') as + date +) as date, + cast(nullif(timestamp_col, '') as + timestamp +) as timestamp_col, + cast(HKD_special___characters as + float64 +) as HKD_special___characters, + cast(HKD_special___characters_1 as + string +) as HKD_special___characters_1, + cast(NZD as + float64 +) as NZD, + cast(USD as + float64 +) as USD, + cast(column___with__quotes as + string +) as column___with__quotes, + cast(nullif(datetime_tz, '') as + timestamp +) as datetime_tz, + cast(nullif(datetime_no_tz, '') as + datetime +) as datetime_no_tz, + cast(nullif(time_tz, '') as + STRING +) as time_tz, + cast(nullif(time_no_tz, '') as + time +) as time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at +from __dbt__cte__exchange_rate_ab1 +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__exchange_rate_ab2 +select + to_hex(md5(cast(concat(coalesce(cast(id as + string +), ''), '-', coalesce(cast(currency as + string +), ''), '-', coalesce(cast(date as + string +), ''), '-', coalesce(cast(timestamp_col as + string +), ''), '-', coalesce(cast(HKD_special___characters as + string +), ''), '-', coalesce(cast(HKD_special___characters_1 as + string +), ''), '-', coalesce(cast(NZD as + string +), ''), '-', coalesce(cast(USD as + string +), ''), '-', coalesce(cast(column___with__quotes as + string +), ''), '-', coalesce(cast(datetime_tz as + string +), ''), '-', coalesce(cast(datetime_no_tz as + string +), ''), '-', coalesce(cast(time_tz as + string +), ''), '-', coalesce(cast(time_no_tz as + string +), '')) as + string +))) as _airbyte_exchange_rate_hashid, + tmp.* +from __dbt__cte__exchange_rate_ab2 tmp +-- exchange_rate +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__exchange_rate_ab3 +select + id, + currency, + date, + timestamp_col, + HKD_special___characters, + HKD_special___characters_1, + NZD, + USD, + column___with__quotes, + datetime_tz, + datetime_no_tz, + time_tz, + time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from __dbt__cte__exchange_rate_ab3 +-- exchange_rate from `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate +where 1 = 1 + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..5f4138f62093a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,89 @@ + + + create or replace view `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_stg` + OPTIONS() + as +with __dbt__cte__dedup_exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate +select + json_extract_scalar(_airbyte_data, "$['id']") as id, + json_extract_scalar(_airbyte_data, "$['currency']") as currency, + json_extract_scalar(_airbyte_data, "$['date']") as date, + json_extract_scalar(_airbyte_data, "$['timestamp_col']") as timestamp_col, + json_extract_scalar(_airbyte_data, "$['HKD@spéçiäl & characters']") as HKD_special___characters, + json_extract_scalar(_airbyte_data, "$['HKD_special___characters']") as HKD_special___characters_1, + json_extract_scalar(_airbyte_data, "$['NZD']") as NZD, + json_extract_scalar(_airbyte_data, "$['USD']") as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at +from `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate as table_alias +-- dedup_exchange_rate +where 1 = 1 + +), __dbt__cte__dedup_exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__dedup_exchange_rate_ab1 +select + cast(id as + int64 +) as id, + cast(currency as + string +) as currency, + cast(nullif(date, '') as + date +) as date, + cast(nullif(timestamp_col, '') as + timestamp +) as timestamp_col, + cast(HKD_special___characters as + float64 +) as HKD_special___characters, + cast(HKD_special___characters_1 as + string +) as HKD_special___characters_1, + cast(NZD as + float64 +) as NZD, + cast(USD as + float64 +) as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at +from __dbt__cte__dedup_exchange_rate_ab1 +-- dedup_exchange_rate +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__dedup_exchange_rate_ab2 +select + to_hex(md5(cast(concat(coalesce(cast(id as + string +), ''), '-', coalesce(cast(currency as + string +), ''), '-', coalesce(cast(date as + string +), ''), '-', coalesce(cast(timestamp_col as + string +), ''), '-', coalesce(cast(HKD_special___characters as + string +), ''), '-', coalesce(cast(HKD_special___characters_1 as + string +), ''), '-', coalesce(cast(NZD as + string +), ''), '-', coalesce(cast(USD as + string +), '')) as + string +))) as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from __dbt__cte__dedup_exchange_rate_ab2 tmp +-- dedup_exchange_rate +where 1 = 1 +; + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql new file mode 100644 index 0000000000000..f5079fc4f3003 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql @@ -0,0 +1,83 @@ + + + create or replace view `dataline-integration-testing`._airbyte_test_normalization.`multiple_column_names_conflicts_stg` + OPTIONS() + as +with __dbt__cte__multiple_column_names_conflicts_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_multiple_column_names_conflicts +select + json_extract_scalar(_airbyte_data, "$['id']") as id, + json_extract_scalar(_airbyte_data, "$['User Id']") as User_Id, + json_extract_scalar(_airbyte_data, "$['user_id']") as user_id_1, + json_extract_scalar(_airbyte_data, "$['User id']") as User_id_2, + json_extract_scalar(_airbyte_data, "$['user id']") as user_id_3, + json_extract_scalar(_airbyte_data, "$['User@Id']") as User_Id_4, + json_extract_scalar(_airbyte_data, "$['UserId']") as UserId, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at +from `dataline-integration-testing`.test_normalization._airbyte_raw_multiple_column_names_conflicts as table_alias +-- multiple_column_names_conflicts +where 1 = 1 + +), __dbt__cte__multiple_column_names_conflicts_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab1 +select + cast(id as + int64 +) as id, + cast(User_Id as + string +) as User_Id, + cast(user_id_1 as + float64 +) as user_id_1, + cast(User_id_2 as + float64 +) as User_id_2, + cast(user_id_3 as + float64 +) as user_id_3, + cast(User_Id_4 as + string +) as User_Id_4, + cast(UserId as + float64 +) as UserId, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at +from __dbt__cte__multiple_column_names_conflicts_ab1 +-- multiple_column_names_conflicts +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab2 +select + to_hex(md5(cast(concat(coalesce(cast(id as + string +), ''), '-', coalesce(cast(User_Id as + string +), ''), '-', coalesce(cast(user_id_1 as + string +), ''), '-', coalesce(cast(User_id_2 as + string +), ''), '-', coalesce(cast(user_id_3 as + string +), ''), '-', coalesce(cast(User_Id_4 as + string +), ''), '-', coalesce(cast(UserId as + string +), '')) as + string +))) as _airbyte_multiple_column_names_conflicts_hashid, + tmp.* +from __dbt__cte__multiple_column_names_conflicts_ab2 tmp +-- multiple_column_names_conflicts +where 1 = 1 +; + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql new file mode 100644 index 0000000000000..8ef08eb1d426d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -0,0 +1,26 @@ +{{ config( + cluster_by = "_airbyte_emitted_at", + partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date, + {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, + {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as HKD_special___characters, + {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as HKD_special___characters_1, + {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as NZD, + {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql new file mode 100644 index 0000000000000..eb02cc4ecf859 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -0,0 +1,26 @@ +{{ config( + cluster_by = "_airbyte_emitted_at", + partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('dedup_exchange_rate_ab1') }} +select + cast(id as {{ dbt_utils.type_bigint() }}) as id, + cast(currency as {{ dbt_utils.type_string() }}) as currency, + cast({{ empty_string_to_null('date') }} as {{ type_date() }}) as date, + cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, + cast(HKD_special___characters as {{ dbt_utils.type_float() }}) as HKD_special___characters, + cast(HKD_special___characters_1 as {{ dbt_utils.type_string() }}) as HKD_special___characters_1, + cast(NZD as {{ dbt_utils.type_float() }}) as NZD, + cast(USD as {{ dbt_utils.type_float() }}) as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('dedup_exchange_rate_ab1') }} +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..ce21bef8c7221 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,178 @@ +{{ config( + cluster_by = ["_airbyte_unique_key_scd","_airbyte_emitted_at"], + partition_by = {"field": "_airbyte_active_row", "data_type": "int64", "range": {"start": 0, "end": 1, "interval": 1}}, + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('dedup_exchange_rate_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'NZD', + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'NZD', + ]) }} as _airbyte_unique_key, + id, + currency, + date, + timestamp_col, + HKD_special___characters, + HKD_special___characters_1, + NZD, + USD, + date as _airbyte_start_at, + lag(date) over ( + partition by id, currency, cast(NZD as {{ dbt_utils.type_string() }}) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by id, currency, cast(NZD as {{ dbt_utils.type_string() }}) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_exchange_rate_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + currency, + date, + timestamp_col, + HKD_special___characters, + HKD_special___characters_1, + NZD, + USD, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..eb3c93754b6b0 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,29 @@ +{{ config( + cluster_by = ["_airbyte_unique_key","_airbyte_emitted_at"], + partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('dedup_exchange_rate_scd') }} +select + _airbyte_unique_key, + id, + currency, + date, + timestamp_col, + HKD_special___characters, + HKD_special___characters_1, + NZD, + USD, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from {{ ref('dedup_exchange_rate_scd') }} +-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..61b42d20863c3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,31 @@ +{{ config( + cluster_by = "_airbyte_emitted_at", + partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, + unique_key = '_airbyte_ab_id', + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('exchange_rate_ab3') }} +select + id, + currency, + date, + timestamp_col, + HKD_special___characters, + HKD_special___characters_1, + NZD, + USD, + column___with__quotes, + datetime_tz, + datetime_no_tz, + time_tz, + time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from {{ ref('exchange_rate_ab3') }} +-- exchange_rate from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..45262775f20b1 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,26 @@ +{{ config( + cluster_by = "_airbyte_emitted_at", + partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('dedup_exchange_rate_ab2') }} +select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'date', + 'timestamp_col', + 'HKD_special___characters', + 'HKD_special___characters_1', + 'NZD', + 'USD', + ]) }} as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from {{ ref('dedup_exchange_rate_ab2') }} tmp +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/sources.yml new file mode 100644 index 0000000000000..f51802427655e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/sources.yml @@ -0,0 +1,16 @@ +version: 2 +sources: +- name: test_normalization + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_1_prefix_startwith_number + - name: _airbyte_raw_dedup_cdc_excluded + - name: _airbyte_raw_dedup_exchange_rate + - name: _airbyte_raw_exchange_rate + - name: _airbyte_raw_multiple_column_names_conflicts + - name: _airbyte_raw_pos_dedup_cdcx + - name: _airbyte_raw_renamed_dedup_cdc_excluded + - name: _airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql new file mode 100644 index 0000000000000..b86bc98fe997f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -0,0 +1,26 @@ +{{ config( + cluster_by = "_airbyte_emitted_at", + partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, + {{ json_extract_scalar('_airbyte_data', ['new_column'], ['new_column']) }} as new_column, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date, + {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, + {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as HKD_special___characters, + {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as NZD, + {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql new file mode 100644 index 0000000000000..09146ddd1c9f8 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -0,0 +1,26 @@ +{{ config( + cluster_by = "_airbyte_emitted_at", + partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('dedup_exchange_rate_ab1') }} +select + cast(id as {{ dbt_utils.type_float() }}) as id, + cast(currency as {{ dbt_utils.type_string() }}) as currency, + cast(new_column as {{ dbt_utils.type_float() }}) as new_column, + cast({{ empty_string_to_null('date') }} as {{ type_date() }}) as date, + cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, + cast(HKD_special___characters as {{ dbt_utils.type_float() }}) as HKD_special___characters, + cast(NZD as {{ dbt_utils.type_float() }}) as NZD, + cast(USD as {{ dbt_utils.type_bigint() }}) as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('dedup_exchange_rate_ab1') }} +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..4f6b80934992c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,178 @@ +{{ config( + cluster_by = ["_airbyte_unique_key_scd","_airbyte_emitted_at"], + partition_by = {"field": "_airbyte_active_row", "data_type": "int64", "range": {"start": 0, "end": 1, "interval": 1}}, + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} final_table where final_table._airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('dedup_exchange_rate_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'NZD', + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'NZD', + ]) }} as _airbyte_unique_key, + id, + currency, + new_column, + date, + timestamp_col, + HKD_special___characters, + NZD, + USD, + date as _airbyte_start_at, + lag(date) over ( + partition by cast(id as {{ dbt_utils.type_string() }}), currency, cast(NZD as {{ dbt_utils.type_string() }}) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by cast(id as {{ dbt_utils.type_string() }}), currency, cast(NZD as {{ dbt_utils.type_string() }}) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_exchange_rate_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + currency, + new_column, + date, + timestamp_col, + HKD_special___characters, + NZD, + USD, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..96601fc9d2873 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,29 @@ +{{ config( + cluster_by = ["_airbyte_unique_key","_airbyte_emitted_at"], + partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('dedup_exchange_rate_scd') }} +select + _airbyte_unique_key, + id, + currency, + new_column, + date, + timestamp_col, + HKD_special___characters, + NZD, + USD, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from {{ ref('dedup_exchange_rate_scd') }} +-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..84cb4985e8c95 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,27 @@ +{{ config( + cluster_by = "_airbyte_emitted_at", + partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, + unique_key = '_airbyte_ab_id', + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('exchange_rate_ab3') }} +select + id, + currency, + new_column, + date, + timestamp_col, + HKD_special___characters, + NZD, + USD, + column___with__quotes, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from {{ ref('exchange_rate_ab3') }} +-- exchange_rate from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..da37e7dc7eaeb --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,26 @@ +{{ config( + cluster_by = "_airbyte_emitted_at", + partition_by = {"field": "_airbyte_emitted_at", "data_type": "timestamp", "granularity": "day"}, + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('dedup_exchange_rate_ab2') }} +select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'new_column', + 'date', + 'timestamp_col', + 'HKD_special___characters', + 'NZD', + 'USD', + ]) }} as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from {{ ref('dedup_exchange_rate_ab2') }} tmp +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/sources.yml new file mode 100644 index 0000000000000..6a5d7bdc09a16 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/sources.yml @@ -0,0 +1,12 @@ +version: 2 +sources: +- name: test_normalization + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_dedup_cdc_excluded + - name: _airbyte_raw_dedup_exchange_rate + - name: _airbyte_raw_exchange_rate + - name: _airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..591dfe0b4c344 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,27 @@ + + + + + + + + merge into `dataline-integration-testing`.test_normalization.`dedup_exchange_rate_scd` as DBT_INTERNAL_DEST + using ( + select * from `dataline-integration-testing`.test_normalization.`dedup_exchange_rate_scd__dbt_tmp` + ) as DBT_INTERNAL_SOURCE + on + DBT_INTERNAL_SOURCE._airbyte_unique_key_scd = DBT_INTERNAL_DEST._airbyte_unique_key_scd + + + + when matched then update set + `_airbyte_unique_key` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key`,`_airbyte_unique_key_scd` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key_scd`,`id` = DBT_INTERNAL_SOURCE.`id`,`currency` = DBT_INTERNAL_SOURCE.`currency`,`date` = DBT_INTERNAL_SOURCE.`date`,`timestamp_col` = DBT_INTERNAL_SOURCE.`timestamp_col`,`HKD_special___characters` = DBT_INTERNAL_SOURCE.`HKD_special___characters`,`HKD_special___characters_1` = DBT_INTERNAL_SOURCE.`HKD_special___characters_1`,`NZD` = DBT_INTERNAL_SOURCE.`NZD`,`USD` = DBT_INTERNAL_SOURCE.`USD`,`_airbyte_start_at` = DBT_INTERNAL_SOURCE.`_airbyte_start_at`,`_airbyte_end_at` = DBT_INTERNAL_SOURCE.`_airbyte_end_at`,`_airbyte_active_row` = DBT_INTERNAL_SOURCE.`_airbyte_active_row`,`_airbyte_ab_id` = DBT_INTERNAL_SOURCE.`_airbyte_ab_id`,`_airbyte_emitted_at` = DBT_INTERNAL_SOURCE.`_airbyte_emitted_at`,`_airbyte_normalized_at` = DBT_INTERNAL_SOURCE.`_airbyte_normalized_at`,`_airbyte_dedup_exchange_rate_hashid` = DBT_INTERNAL_SOURCE.`_airbyte_dedup_exchange_rate_hashid` + + + when not matched then insert + (`_airbyte_unique_key`, `_airbyte_unique_key_scd`, `id`, `currency`, `date`, `timestamp_col`, `HKD_special___characters`, `HKD_special___characters_1`, `NZD`, `USD`, `_airbyte_start_at`, `_airbyte_end_at`, `_airbyte_active_row`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`) + values + (`_airbyte_unique_key`, `_airbyte_unique_key_scd`, `id`, `currency`, `date`, `timestamp_col`, `HKD_special___characters`, `HKD_special___characters_1`, `NZD`, `USD`, `_airbyte_start_at`, `_airbyte_end_at`, `_airbyte_active_row`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`) + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..0691294c98c3e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,27 @@ + + + + + + + + merge into `dataline-integration-testing`.test_normalization.`dedup_exchange_rate` as DBT_INTERNAL_DEST + using ( + select * from `dataline-integration-testing`.test_normalization.`dedup_exchange_rate__dbt_tmp` + ) as DBT_INTERNAL_SOURCE + on + DBT_INTERNAL_SOURCE._airbyte_unique_key = DBT_INTERNAL_DEST._airbyte_unique_key + + + + when matched then update set + `_airbyte_unique_key` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key`,`id` = DBT_INTERNAL_SOURCE.`id`,`currency` = DBT_INTERNAL_SOURCE.`currency`,`date` = DBT_INTERNAL_SOURCE.`date`,`timestamp_col` = DBT_INTERNAL_SOURCE.`timestamp_col`,`HKD_special___characters` = DBT_INTERNAL_SOURCE.`HKD_special___characters`,`HKD_special___characters_1` = DBT_INTERNAL_SOURCE.`HKD_special___characters_1`,`NZD` = DBT_INTERNAL_SOURCE.`NZD`,`USD` = DBT_INTERNAL_SOURCE.`USD`,`_airbyte_ab_id` = DBT_INTERNAL_SOURCE.`_airbyte_ab_id`,`_airbyte_emitted_at` = DBT_INTERNAL_SOURCE.`_airbyte_emitted_at`,`_airbyte_normalized_at` = DBT_INTERNAL_SOURCE.`_airbyte_normalized_at`,`_airbyte_dedup_exchange_rate_hashid` = DBT_INTERNAL_SOURCE.`_airbyte_dedup_exchange_rate_hashid` + + + when not matched then insert + (`_airbyte_unique_key`, `id`, `currency`, `date`, `timestamp_col`, `HKD_special___characters`, `HKD_special___characters_1`, `NZD`, `USD`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`) + values + (`_airbyte_unique_key`, `id`, `currency`, `date`, `timestamp_col`, `HKD_special___characters`, `HKD_special___characters_1`, `NZD`, `USD`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`) + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..3d32bbb2838a9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,145 @@ + + + create or replace table `dataline-integration-testing`.test_normalization.`exchange_rate` + partition by timestamp_trunc(_airbyte_emitted_at, day) + cluster by _airbyte_emitted_at + OPTIONS() + as ( + +with __dbt__cte__exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate +select + json_extract_scalar(_airbyte_data, "$['id']") as id, + json_extract_scalar(_airbyte_data, "$['currency']") as currency, + json_extract_scalar(_airbyte_data, "$['date']") as date, + json_extract_scalar(_airbyte_data, "$['timestamp_col']") as timestamp_col, + json_extract_scalar(_airbyte_data, "$['HKD@spéçiäl & characters']") as HKD_special___characters, + json_extract_scalar(_airbyte_data, "$['HKD_special___characters']") as HKD_special___characters_1, + json_extract_scalar(_airbyte_data, "$['NZD']") as NZD, + json_extract_scalar(_airbyte_data, "$['USD']") as USD, + json_extract_scalar(_airbyte_data, "$['column___with__quotes']") as column___with__quotes, + json_extract_scalar(_airbyte_data, "$['datetime_tz']") as datetime_tz, + json_extract_scalar(_airbyte_data, "$['datetime_no_tz']") as datetime_no_tz, + json_extract_scalar(_airbyte_data, "$['time_tz']") as time_tz, + json_extract_scalar(_airbyte_data, "$['time_no_tz']") as time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at +from `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate as table_alias +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__exchange_rate_ab1 +select + cast(id as + int64 +) as id, + cast(currency as + string +) as currency, + cast(nullif(date, '') as + date +) as date, + cast(nullif(timestamp_col, '') as + timestamp +) as timestamp_col, + cast(HKD_special___characters as + float64 +) as HKD_special___characters, + cast(HKD_special___characters_1 as + string +) as HKD_special___characters_1, + cast(NZD as + float64 +) as NZD, + cast(USD as + float64 +) as USD, + cast(column___with__quotes as + string +) as column___with__quotes, + cast(nullif(datetime_tz, '') as + timestamp +) as datetime_tz, + cast(nullif(datetime_no_tz, '') as + datetime +) as datetime_no_tz, + cast(nullif(time_tz, '') as + STRING +) as time_tz, + cast(nullif(time_no_tz, '') as + time +) as time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at +from __dbt__cte__exchange_rate_ab1 +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__exchange_rate_ab2 +select + to_hex(md5(cast(concat(coalesce(cast(id as + string +), ''), '-', coalesce(cast(currency as + string +), ''), '-', coalesce(cast(date as + string +), ''), '-', coalesce(cast(timestamp_col as + string +), ''), '-', coalesce(cast(HKD_special___characters as + string +), ''), '-', coalesce(cast(HKD_special___characters_1 as + string +), ''), '-', coalesce(cast(NZD as + string +), ''), '-', coalesce(cast(USD as + string +), ''), '-', coalesce(cast(column___with__quotes as + string +), ''), '-', coalesce(cast(datetime_tz as + string +), ''), '-', coalesce(cast(datetime_no_tz as + string +), ''), '-', coalesce(cast(time_tz as + string +), ''), '-', coalesce(cast(time_no_tz as + string +), '')) as + string +))) as _airbyte_exchange_rate_hashid, + tmp.* +from __dbt__cte__exchange_rate_ab2 tmp +-- exchange_rate +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__exchange_rate_ab3 +select + id, + currency, + date, + timestamp_col, + HKD_special___characters, + HKD_special___characters_1, + NZD, + USD, + column___with__quotes, + datetime_tz, + datetime_no_tz, + time_tz, + time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from __dbt__cte__exchange_rate_ab3 +-- exchange_rate from `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate +where 1 = 1 + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..5f4138f62093a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,89 @@ + + + create or replace view `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_stg` + OPTIONS() + as +with __dbt__cte__dedup_exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate +select + json_extract_scalar(_airbyte_data, "$['id']") as id, + json_extract_scalar(_airbyte_data, "$['currency']") as currency, + json_extract_scalar(_airbyte_data, "$['date']") as date, + json_extract_scalar(_airbyte_data, "$['timestamp_col']") as timestamp_col, + json_extract_scalar(_airbyte_data, "$['HKD@spéçiäl & characters']") as HKD_special___characters, + json_extract_scalar(_airbyte_data, "$['HKD_special___characters']") as HKD_special___characters_1, + json_extract_scalar(_airbyte_data, "$['NZD']") as NZD, + json_extract_scalar(_airbyte_data, "$['USD']") as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at +from `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate as table_alias +-- dedup_exchange_rate +where 1 = 1 + +), __dbt__cte__dedup_exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__dedup_exchange_rate_ab1 +select + cast(id as + int64 +) as id, + cast(currency as + string +) as currency, + cast(nullif(date, '') as + date +) as date, + cast(nullif(timestamp_col, '') as + timestamp +) as timestamp_col, + cast(HKD_special___characters as + float64 +) as HKD_special___characters, + cast(HKD_special___characters_1 as + string +) as HKD_special___characters_1, + cast(NZD as + float64 +) as NZD, + cast(USD as + float64 +) as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at +from __dbt__cte__dedup_exchange_rate_ab1 +-- dedup_exchange_rate +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__dedup_exchange_rate_ab2 +select + to_hex(md5(cast(concat(coalesce(cast(id as + string +), ''), '-', coalesce(cast(currency as + string +), ''), '-', coalesce(cast(date as + string +), ''), '-', coalesce(cast(timestamp_col as + string +), ''), '-', coalesce(cast(HKD_special___characters as + string +), ''), '-', coalesce(cast(HKD_special___characters_1 as + string +), ''), '-', coalesce(cast(NZD as + string +), ''), '-', coalesce(cast(USD as + string +), '')) as + string +))) as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from __dbt__cte__dedup_exchange_rate_ab2 tmp +-- dedup_exchange_rate +where 1 = 1 +; + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..ac1136c84b7ae --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,27 @@ + + + + + + + + merge into `dataline-integration-testing`.test_normalization.`dedup_exchange_rate_scd` as DBT_INTERNAL_DEST + using ( + select * from `dataline-integration-testing`.test_normalization.`dedup_exchange_rate_scd__dbt_tmp` + ) as DBT_INTERNAL_SOURCE + on + DBT_INTERNAL_SOURCE._airbyte_unique_key_scd = DBT_INTERNAL_DEST._airbyte_unique_key_scd + + + + when matched then update set + `_airbyte_unique_key` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key`,`_airbyte_unique_key_scd` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key_scd`,`id` = DBT_INTERNAL_SOURCE.`id`,`currency` = DBT_INTERNAL_SOURCE.`currency`,`new_column` = DBT_INTERNAL_SOURCE.`new_column`,`date` = DBT_INTERNAL_SOURCE.`date`,`timestamp_col` = DBT_INTERNAL_SOURCE.`timestamp_col`,`HKD_special___characters` = DBT_INTERNAL_SOURCE.`HKD_special___characters`,`NZD` = DBT_INTERNAL_SOURCE.`NZD`,`USD` = DBT_INTERNAL_SOURCE.`USD`,`_airbyte_start_at` = DBT_INTERNAL_SOURCE.`_airbyte_start_at`,`_airbyte_end_at` = DBT_INTERNAL_SOURCE.`_airbyte_end_at`,`_airbyte_active_row` = DBT_INTERNAL_SOURCE.`_airbyte_active_row`,`_airbyte_ab_id` = DBT_INTERNAL_SOURCE.`_airbyte_ab_id`,`_airbyte_emitted_at` = DBT_INTERNAL_SOURCE.`_airbyte_emitted_at`,`_airbyte_normalized_at` = DBT_INTERNAL_SOURCE.`_airbyte_normalized_at`,`_airbyte_dedup_exchange_rate_hashid` = DBT_INTERNAL_SOURCE.`_airbyte_dedup_exchange_rate_hashid` + + + when not matched then insert + (`_airbyte_unique_key`, `_airbyte_unique_key_scd`, `id`, `currency`, `new_column`, `date`, `timestamp_col`, `HKD_special___characters`, `NZD`, `USD`, `_airbyte_start_at`, `_airbyte_end_at`, `_airbyte_active_row`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`) + values + (`_airbyte_unique_key`, `_airbyte_unique_key_scd`, `id`, `currency`, `new_column`, `date`, `timestamp_col`, `HKD_special___characters`, `NZD`, `USD`, `_airbyte_start_at`, `_airbyte_end_at`, `_airbyte_active_row`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`) + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..a36197a213f4e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,27 @@ + + + + + + + + merge into `dataline-integration-testing`.test_normalization.`dedup_exchange_rate` as DBT_INTERNAL_DEST + using ( + select * from `dataline-integration-testing`.test_normalization.`dedup_exchange_rate__dbt_tmp` + ) as DBT_INTERNAL_SOURCE + on + DBT_INTERNAL_SOURCE._airbyte_unique_key = DBT_INTERNAL_DEST._airbyte_unique_key + + + + when matched then update set + `_airbyte_unique_key` = DBT_INTERNAL_SOURCE.`_airbyte_unique_key`,`id` = DBT_INTERNAL_SOURCE.`id`,`currency` = DBT_INTERNAL_SOURCE.`currency`,`new_column` = DBT_INTERNAL_SOURCE.`new_column`,`date` = DBT_INTERNAL_SOURCE.`date`,`timestamp_col` = DBT_INTERNAL_SOURCE.`timestamp_col`,`HKD_special___characters` = DBT_INTERNAL_SOURCE.`HKD_special___characters`,`NZD` = DBT_INTERNAL_SOURCE.`NZD`,`USD` = DBT_INTERNAL_SOURCE.`USD`,`_airbyte_ab_id` = DBT_INTERNAL_SOURCE.`_airbyte_ab_id`,`_airbyte_emitted_at` = DBT_INTERNAL_SOURCE.`_airbyte_emitted_at`,`_airbyte_normalized_at` = DBT_INTERNAL_SOURCE.`_airbyte_normalized_at`,`_airbyte_dedup_exchange_rate_hashid` = DBT_INTERNAL_SOURCE.`_airbyte_dedup_exchange_rate_hashid` + + + when not matched then insert + (`_airbyte_unique_key`, `id`, `currency`, `new_column`, `date`, `timestamp_col`, `HKD_special___characters`, `NZD`, `USD`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`) + values + (`_airbyte_unique_key`, `id`, `currency`, `new_column`, `date`, `timestamp_col`, `HKD_special___characters`, `NZD`, `USD`, `_airbyte_ab_id`, `_airbyte_emitted_at`, `_airbyte_normalized_at`, `_airbyte_dedup_exchange_rate_hashid`) + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..49688da71ec1a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,117 @@ + + + create or replace table `dataline-integration-testing`.test_normalization.`exchange_rate` + partition by timestamp_trunc(_airbyte_emitted_at, day) + cluster by _airbyte_emitted_at + OPTIONS() + as ( + +with __dbt__cte__exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate +select + json_extract_scalar(_airbyte_data, "$['id']") as id, + json_extract_scalar(_airbyte_data, "$['currency']") as currency, + json_extract_scalar(_airbyte_data, "$['new_column']") as new_column, + json_extract_scalar(_airbyte_data, "$['date']") as date, + json_extract_scalar(_airbyte_data, "$['timestamp_col']") as timestamp_col, + json_extract_scalar(_airbyte_data, "$['HKD@spéçiäl & characters']") as HKD_special___characters, + json_extract_scalar(_airbyte_data, "$['NZD']") as NZD, + json_extract_scalar(_airbyte_data, "$['USD']") as USD, + json_extract_scalar(_airbyte_data, "$['column___with__quotes']") as column___with__quotes, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at +from `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate as table_alias +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__exchange_rate_ab1 +select + cast(id as + float64 +) as id, + cast(currency as + string +) as currency, + cast(new_column as + float64 +) as new_column, + cast(nullif(date, '') as + date +) as date, + cast(nullif(timestamp_col, '') as + timestamp +) as timestamp_col, + cast(HKD_special___characters as + float64 +) as HKD_special___characters, + cast(NZD as + float64 +) as NZD, + cast(USD as + float64 +) as USD, + cast(column___with__quotes as + string +) as column___with__quotes, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at +from __dbt__cte__exchange_rate_ab1 +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__exchange_rate_ab2 +select + to_hex(md5(cast(concat(coalesce(cast(id as + string +), ''), '-', coalesce(cast(currency as + string +), ''), '-', coalesce(cast(new_column as + string +), ''), '-', coalesce(cast(date as + string +), ''), '-', coalesce(cast(timestamp_col as + string +), ''), '-', coalesce(cast(HKD_special___characters as + string +), ''), '-', coalesce(cast(NZD as + string +), ''), '-', coalesce(cast(USD as + string +), ''), '-', coalesce(cast(column___with__quotes as + string +), '')) as + string +))) as _airbyte_exchange_rate_hashid, + tmp.* +from __dbt__cte__exchange_rate_ab2 tmp +-- exchange_rate +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__exchange_rate_ab3 +select + id, + currency, + new_column, + date, + timestamp_col, + HKD_special___characters, + NZD, + USD, + column___with__quotes, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from __dbt__cte__exchange_rate_ab3 +-- exchange_rate from `dataline-integration-testing`.test_normalization._airbyte_raw_exchange_rate +where 1 = 1 + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..4b1d0e917e33f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,89 @@ + + + create or replace view `dataline-integration-testing`._airbyte_test_normalization.`dedup_exchange_rate_stg` + OPTIONS() + as +with __dbt__cte__dedup_exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate +select + json_extract_scalar(_airbyte_data, "$['id']") as id, + json_extract_scalar(_airbyte_data, "$['currency']") as currency, + json_extract_scalar(_airbyte_data, "$['new_column']") as new_column, + json_extract_scalar(_airbyte_data, "$['date']") as date, + json_extract_scalar(_airbyte_data, "$['timestamp_col']") as timestamp_col, + json_extract_scalar(_airbyte_data, "$['HKD@spéçiäl & characters']") as HKD_special___characters, + json_extract_scalar(_airbyte_data, "$['NZD']") as NZD, + json_extract_scalar(_airbyte_data, "$['USD']") as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at +from `dataline-integration-testing`.test_normalization._airbyte_raw_dedup_exchange_rate as table_alias +-- dedup_exchange_rate +where 1 = 1 + +), __dbt__cte__dedup_exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__dedup_exchange_rate_ab1 +select + cast(id as + float64 +) as id, + cast(currency as + string +) as currency, + cast(new_column as + float64 +) as new_column, + cast(nullif(date, '') as + date +) as date, + cast(nullif(timestamp_col, '') as + timestamp +) as timestamp_col, + cast(HKD_special___characters as + float64 +) as HKD_special___characters, + cast(NZD as + float64 +) as NZD, + cast(USD as + int64 +) as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + CURRENT_TIMESTAMP() as _airbyte_normalized_at +from __dbt__cte__dedup_exchange_rate_ab1 +-- dedup_exchange_rate +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__dedup_exchange_rate_ab2 +select + to_hex(md5(cast(concat(coalesce(cast(id as + string +), ''), '-', coalesce(cast(currency as + string +), ''), '-', coalesce(cast(new_column as + string +), ''), '-', coalesce(cast(date as + string +), ''), '-', coalesce(cast(timestamp_col as + string +), ''), '-', coalesce(cast(HKD_special___characters as + string +), ''), '-', coalesce(cast(NZD as + string +), ''), '-', coalesce(cast(USD as + string +), '')) as + string +))) as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from __dbt__cte__dedup_exchange_rate_ab2 tmp +-- dedup_exchange_rate +where 1 = 1 +; + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/dbt_project.yml new file mode 100755 index 0000000000000..4028a91611828 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/dbt_project.yml @@ -0,0 +1,90 @@ +name: airbyte_utils +version: '1.0' +config-version: 2 +profile: normalize +model-paths: +- models +docs-paths: +- docs +analysis-paths: +- analysis +test-paths: +- tests +seed-paths: +- data +macro-paths: +- macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: +- build +- dbt_modules +quoting: + database: true + schema: true + identifier: true +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: view + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + +on_schema_change: ignore + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view +dispatch: +- macro_namespace: dbt_utils + search_order: + - airbyte_utils + - dbt_utils +vars: + json_column: _airbyte_data + models_to_source: + exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate + exchange_rate: test_normalization._airbyte_raw_exchange_rate + dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate + renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded + pos_dedup_cdcx_ab1: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_ab2: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_stg: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_scd: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx: test_normalization._airbyte_raw_pos_dedup_cdcx + 1_prefix_startwith_number_ab1: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_ab2: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_stg: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_scd: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number: test_normalization._airbyte_raw_1_prefix_startwith_number + multiple_column_names_conflicts_ab1: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_ab2: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts + types_testing_ab1: test_normalization._airbyte_raw_types_testing + types_testing_ab2: test_normalization._airbyte_raw_types_testing + types_testing_stg: test_normalization._airbyte_raw_types_testing + types_testing_scd: test_normalization._airbyte_raw_types_testing + types_testing: test_normalization._airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql new file mode 100644 index 0000000000000..2609c12f32d36 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -0,0 +1,25 @@ + + + create view _airbyte_test_normalization.dedup_exchange_rate_ab1__dbt_tmp + + as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization._airbyte_raw_dedup_exchange_rate +select + JSONExtractRaw(assumeNotNull(_airbyte_data), 'id') as id, + JSONExtractRaw(assumeNotNull(_airbyte_data), 'currency') as currency, + JSONExtractRaw(assumeNotNull(_airbyte_data), 'date') as date, + JSONExtractRaw(assumeNotNull(_airbyte_data), 'timestamp_col') as timestamp_col, + JSONExtractRaw(assumeNotNull(_airbyte_data), 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", + JSONExtractRaw(assumeNotNull(_airbyte_data), 'HKD_special___characters') as HKD_special___characters, + JSONExtractRaw(assumeNotNull(_airbyte_data), 'NZD') as NZD, + JSONExtractRaw(assumeNotNull(_airbyte_data), 'USD') as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_dedup_exchange_rate as table_alias +-- dedup_exchange_rate +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql new file mode 100644 index 0000000000000..07778080d6faa --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -0,0 +1,33 @@ + + + create view _airbyte_test_normalization.dedup_exchange_rate_ab2__dbt_tmp + + as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: _airbyte_test_normalization.dedup_exchange_rate_ab1 +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from currency), 'String'), 'null') as currency, + toDate(parseDateTimeBestEffortOrNull(trim(BOTH '"' from nullif(date, '')))) as date, + parseDateTime64BestEffortOrNull(trim(BOTH '"' from nullif(timestamp_col, ''))) as timestamp_col, + accurateCastOrNull("HKD@spéçiäl & characters", ' + Float64 +') as "HKD@spéçiäl & characters", + nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), 'String'), 'null') as HKD_special___characters, + accurateCastOrNull(NZD, ' + Float64 +') as NZD, + accurateCastOrNull(USD, ' + Float64 +') as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from _airbyte_test_normalization.dedup_exchange_rate_ab1 +-- dedup_exchange_rate +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql new file mode 100644 index 0000000000000..7dac7b7d793f6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -0,0 +1,104 @@ + + + + + insert into test_normalization.dedup_cdc_excluded_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid") + +-- depends_on: ref('dedup_cdc_excluded_stg') +with + +input_data as ( + select * + from _airbyte_test_normalization.dedup_cdc_excluded_stg + -- dedup_cdc_excluded from test_normalization._airbyte_raw_dedup_cdc_excluded +), + +input_data_with_active_row_num as ( + select *, + row_number() over ( + partition by id + order by + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ) as _airbyte_active_row_num + from input_data +), +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + assumeNotNull(hex(MD5( + + toString(id) + + ))) as _airbyte_unique_key, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_lsn as _airbyte_start_at, + case when _airbyte_active_row_num = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, + anyOrNull(_ab_cdc_lsn) over ( + partition by id + order by + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) as _airbyte_end_at, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_cdc_excluded_hashid + from input_data_with_active_row_num +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, 'String'), accurateCastOrNull(_ab_cdc_updated_at, 'String') + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + assumeNotNull(hex(MD5( + + toString(_airbyte_unique_key) || '~' || + + + toString(_airbyte_start_at) || '~' || + + + toString(_airbyte_emitted_at) || '~' || + + + toString(_ab_cdc_deleted_at) || '~' || + + + toString(_ab_cdc_updated_at) + + ))) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_dedup_cdc_excluded_hashid +from dedup_data where _airbyte_row_num = 1 + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..a3527b053dc31 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,108 @@ + + + + + insert into test_normalization.dedup_exchange_rate_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + +-- depends_on: ref('dedup_exchange_rate_stg') +with + +input_data as ( + select * + from _airbyte_test_normalization.dedup_exchange_rate_stg + -- dedup_exchange_rate from test_normalization._airbyte_raw_dedup_exchange_rate +), + +input_data_with_active_row_num as ( + select *, + row_number() over ( + partition by id, currency, cast(NZD as String) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ) as _airbyte_active_row_num + from input_data +), +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + assumeNotNull(hex(MD5( + + toString(id) || '~' || + + + toString(currency) || '~' || + + + toString(NZD) + + ))) as _airbyte_unique_key, + id, + currency, + date, + timestamp_col, + "HKD@spéçiäl & characters", + HKD_special___characters, + NZD, + USD, + date as _airbyte_start_at, + case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, + anyOrNull(date) over ( + partition by id, currency, cast(NZD as String) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) as _airbyte_end_at, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_exchange_rate_hashid + from input_data_with_active_row_num +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + assumeNotNull(hex(MD5( + + toString(_airbyte_unique_key) || '~' || + + + toString(_airbyte_start_at) || '~' || + + + toString(_airbyte_emitted_at) + + ))) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + currency, + date, + timestamp_col, + "HKD@spéçiäl & characters", + HKD_special___characters, + NZD, + USD, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from dedup_data where _airbyte_row_num = 1 + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql new file mode 100644 index 0000000000000..cf48610f8b82c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -0,0 +1,90 @@ + + + + + insert into test_normalization.renamed_dedup_cdc_excluded_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") + +-- depends_on: ref('renamed_dedup_cdc_excluded_stg') +with + +input_data as ( + select * + from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg + -- renamed_dedup_cdc_excluded from test_normalization._airbyte_raw_renamed_dedup_cdc_excluded +), + +input_data_with_active_row_num as ( + select *, + row_number() over ( + partition by id + order by + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ) as _airbyte_active_row_num + from input_data +), +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + assumeNotNull(hex(MD5( + + toString(id) + + ))) as _airbyte_unique_key, + id, + _ab_cdc_updated_at, + _ab_cdc_updated_at as _airbyte_start_at, + case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, + anyOrNull(_ab_cdc_updated_at) over ( + partition by id + order by + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) as _airbyte_end_at, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_renamed_dedup_cdc_excluded_hashid + from input_data_with_active_row_num +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + assumeNotNull(hex(MD5( + + toString(_airbyte_unique_key) || '~' || + + + toString(_airbyte_start_at) || '~' || + + + toString(_airbyte_emitted_at) + + ))) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + _ab_cdc_updated_at, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_renamed_dedup_cdc_excluded_hashid +from dedup_data where _airbyte_row_num = 1 + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..11d81fef34b9b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,29 @@ + + + + + insert into test_normalization.dedup_exchange_rate ("_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + +-- Final base SQL model +-- depends_on: test_normalization.dedup_exchange_rate_scd +select + _airbyte_unique_key, + id, + currency, + date, + timestamp_col, + "HKD@spéçiäl & characters", + HKD_special___characters, + NZD, + USD, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from test_normalization.dedup_exchange_rate_scd +-- dedup_exchange_rate from test_normalization._airbyte_raw_dedup_exchange_rate +where 1 = 1 +and _airbyte_active_row = 1 + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql new file mode 100644 index 0000000000000..b237171bc7fe8 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -0,0 +1,23 @@ + + + + + insert into test_normalization.renamed_dedup_cdc_excluded ("_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") + +-- Final base SQL model +-- depends_on: test_normalization.renamed_dedup_cdc_excluded_scd +select + _airbyte_unique_key, + id, + _ab_cdc_updated_at, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_renamed_dedup_cdc_excluded_hashid +from test_normalization.renamed_dedup_cdc_excluded_scd +-- renamed_dedup_cdc_excluded from test_normalization._airbyte_raw_renamed_dedup_cdc_excluded +where 1 = 1 +and _airbyte_active_row = 1 + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..c2be71e63fc94 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,29 @@ + + + + insert into test_normalization.exchange_rate__dbt_tmp ("id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "column___with__quotes", "datetime_tz", "datetime_no_tz", "time_tz", "time_no_tz", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_exchange_rate_hashid") + +-- Final base SQL model +-- depends_on: _airbyte_test_normalization.exchange_rate_ab3 +select + id, + currency, + date, + timestamp_col, + "HKD@spéçiäl & characters", + HKD_special___characters, + NZD, + USD, + "column___with__quotes", + datetime_tz, + datetime_no_tz, + time_tz, + time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from _airbyte_test_normalization.exchange_rate_ab3 +-- exchange_rate from test_normalization._airbyte_raw_exchange_rate +where 1 = 1 + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..9a932053975b7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,41 @@ + + + create view _airbyte_test_normalization.dedup_exchange_rate_stg__dbt_tmp + + as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: _airbyte_test_normalization.dedup_exchange_rate_ab2 +select + assumeNotNull(hex(MD5( + + toString(id) || '~' || + + + toString(currency) || '~' || + + + toString(date) || '~' || + + + toString(timestamp_col) || '~' || + + + toString("HKD@spéçiäl & characters") || '~' || + + + toString(HKD_special___characters) || '~' || + + + toString(NZD) || '~' || + + + toString(USD) + + ))) as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from _airbyte_test_normalization.dedup_exchange_rate_ab2 tmp +-- dedup_exchange_rate +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql new file mode 100644 index 0000000000000..5f10629995793 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql @@ -0,0 +1,38 @@ + + + create view _airbyte_test_normalization.multiple_column_names_conflicts_stg__dbt_tmp + + as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: _airbyte_test_normalization.multiple_column_names_conflicts_ab2 +select + assumeNotNull(hex(MD5( + + toString(id) || '~' || + + + toString("User Id") || '~' || + + + toString(user_id) || '~' || + + + toString("User id") || '~' || + + + toString("user id") || '~' || + + + toString("User@Id") || '~' || + + + toString(UserId) + + ))) as _airbyte_multiple_co__ames_conflicts_hashid, + tmp.* +from _airbyte_test_normalization.multiple_column_names_conflicts_ab2 tmp +-- multiple_column_names_conflicts +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql new file mode 100644 index 0000000000000..b0c2c4aa7fa33 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -0,0 +1,24 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date, + {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, + {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ quote('HKD@spéçiäl & characters') }}, + {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as HKD_special___characters, + {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as NZD, + {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql new file mode 100644 index 0000000000000..22f82153a5cd8 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -0,0 +1,24 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('dedup_exchange_rate_ab1') }} +select + accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from currency), '{{ dbt_utils.type_string() }}'), 'null') as currency, + toDate(parseDateTimeBestEffortOrNull(trim(BOTH '"' from {{ empty_string_to_null('date') }}))) as date, + parseDateTime64BestEffortOrNull(trim(BOTH '"' from {{ empty_string_to_null('timestamp_col') }})) as timestamp_col, + accurateCastOrNull({{ quote('HKD@spéçiäl & characters') }}, '{{ dbt_utils.type_float() }}') as {{ quote('HKD@spéçiäl & characters') }}, + nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), '{{ dbt_utils.type_string() }}'), 'null') as HKD_special___characters, + accurateCastOrNull(NZD, '{{ dbt_utils.type_float() }}') as NZD, + accurateCastOrNull(USD, '{{ dbt_utils.type_float() }}') as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('dedup_exchange_rate_ab1') }} +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql new file mode 100644 index 0000000000000..5d3e0d7f6abf0 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql @@ -0,0 +1,18 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} as table_alias +-- renamed_dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql new file mode 100644 index 0000000000000..c6885e98962eb --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql @@ -0,0 +1,18 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('renamed_dedup_cdc_excluded_ab1') }} +select + accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, + accurateCastOrNull(_ab_cdc_updated_at, '{{ dbt_utils.type_float() }}') as _ab_cdc_updated_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('renamed_dedup_cdc_excluded_ab1') }} +-- renamed_dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql new file mode 100644 index 0000000000000..1570a1b5fddf3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -0,0 +1,173 @@ +{{ config( + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_cdc_excluded' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + alter table {{ final_table_relation }} delete where _airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', quote(this.schema) + '.' + quote('dedup_cdc_excluded')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', quote(this.schema) + '.' + quote('dedup_cdc_excluded')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + alter table {{ this }} delete where 1=0 + {% endif %} + ","drop view _airbyte_test_normalization.dedup_cdc_excluded_stg"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('dedup_cdc_excluded_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_cdc_excluded_stg') }} + -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('dedup_cdc_excluded_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + --left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('dedup_cdc_excluded_stg') }} + -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} +), +{% endif %} +input_data_with_active_row_num as ( + select *, + row_number() over ( + partition by id + order by + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ) as _airbyte_active_row_num + from input_data +), +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_lsn as _airbyte_start_at, + case when _airbyte_active_row_num = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, + anyOrNull(_ab_cdc_lsn) over ( + partition by id + order by + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) as _airbyte_end_at, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_cdc_excluded_hashid + from input_data_with_active_row_num +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, '{{ dbt_utils.type_string() }}'), accurateCastOrNull(_ab_cdc_updated_at, '{{ dbt_utils.type_string() }}') + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at', '_ab_cdc_deleted_at', '_ab_cdc_updated_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_cdc_excluded_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..e29cf7f7906c9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,181 @@ +{{ config( + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + alter table {{ final_table_relation }} delete where _airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', quote(this.schema) + '.' + quote('dedup_exchange_rate')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', quote(this.schema) + '.' + quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + alter table {{ this }} delete where 1=0 + {% endif %} + ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('dedup_exchange_rate_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'NZD', + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + --left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +), +{% endif %} +input_data_with_active_row_num as ( + select *, + row_number() over ( + partition by id, currency, cast(NZD as {{ dbt_utils.type_string() }}) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ) as _airbyte_active_row_num + from input_data +), +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'NZD', + ]) }} as _airbyte_unique_key, + id, + currency, + date, + timestamp_col, + {{ quote('HKD@spéçiäl & characters') }}, + HKD_special___characters, + NZD, + USD, + date as _airbyte_start_at, + case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, + anyOrNull(date) over ( + partition by id, currency, cast(NZD as {{ dbt_utils.type_string() }}) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) as _airbyte_end_at, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_exchange_rate_hashid + from input_data_with_active_row_num +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + currency, + date, + timestamp_col, + {{ quote('HKD@spéçiäl & characters') }}, + HKD_special___characters, + NZD, + USD, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..5b8ff875d3a3b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,27 @@ +{{ config( + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('dedup_exchange_rate_scd') }} +select + _airbyte_unique_key, + id, + currency, + date, + timestamp_col, + {{ quote('HKD@spéçiäl & characters') }}, + HKD_special___characters, + NZD, + USD, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from {{ ref('dedup_exchange_rate_scd') }} +-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql new file mode 100644 index 0000000000000..4051dd3178c94 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -0,0 +1,21 @@ +{{ config( + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('renamed_dedup_cdc_excluded_scd') }} +select + _airbyte_unique_key, + id, + _ab_cdc_updated_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_renamed_dedup_cdc_excluded_hashid +from {{ ref('renamed_dedup_cdc_excluded_scd') }} +-- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..c66443b3a1501 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,29 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('exchange_rate_ab3') }} +select + id, + currency, + date, + timestamp_col, + {{ quote('HKD@spéçiäl & characters') }}, + HKD_special___characters, + NZD, + USD, + {{ quote('column___with__quotes') }}, + datetime_tz, + datetime_no_tz, + time_tz, + time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from {{ ref('exchange_rate_ab3') }} +-- exchange_rate from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..beb710676cb02 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,24 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('dedup_exchange_rate_ab2') }} +select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'date', + 'timestamp_col', + quote('HKD@spéçiäl & characters'), + 'HKD_special___characters', + 'NZD', + 'USD', + ]) }} as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from {{ ref('dedup_exchange_rate_ab2') }} tmp +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/sources.yml new file mode 100644 index 0000000000000..f51802427655e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/sources.yml @@ -0,0 +1,16 @@ +version: 2 +sources: +- name: test_normalization + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_1_prefix_startwith_number + - name: _airbyte_raw_dedup_cdc_excluded + - name: _airbyte_raw_dedup_exchange_rate + - name: _airbyte_raw_exchange_rate + - name: _airbyte_raw_multiple_column_names_conflicts + - name: _airbyte_raw_pos_dedup_cdcx + - name: _airbyte_raw_renamed_dedup_cdc_excluded + - name: _airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql new file mode 100644 index 0000000000000..2609c12f32d36 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -0,0 +1,25 @@ + + + create view _airbyte_test_normalization.dedup_exchange_rate_ab1__dbt_tmp + + as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization._airbyte_raw_dedup_exchange_rate +select + JSONExtractRaw(assumeNotNull(_airbyte_data), 'id') as id, + JSONExtractRaw(assumeNotNull(_airbyte_data), 'currency') as currency, + JSONExtractRaw(assumeNotNull(_airbyte_data), 'date') as date, + JSONExtractRaw(assumeNotNull(_airbyte_data), 'timestamp_col') as timestamp_col, + JSONExtractRaw(assumeNotNull(_airbyte_data), 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", + JSONExtractRaw(assumeNotNull(_airbyte_data), 'HKD_special___characters') as HKD_special___characters, + JSONExtractRaw(assumeNotNull(_airbyte_data), 'NZD') as NZD, + JSONExtractRaw(assumeNotNull(_airbyte_data), 'USD') as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from test_normalization._airbyte_raw_dedup_exchange_rate as table_alias +-- dedup_exchange_rate +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql new file mode 100644 index 0000000000000..07778080d6faa --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -0,0 +1,33 @@ + + + create view _airbyte_test_normalization.dedup_exchange_rate_ab2__dbt_tmp + + as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: _airbyte_test_normalization.dedup_exchange_rate_ab1 +select + accurateCastOrNull(id, ' + BIGINT +') as id, + nullif(accurateCastOrNull(trim(BOTH '"' from currency), 'String'), 'null') as currency, + toDate(parseDateTimeBestEffortOrNull(trim(BOTH '"' from nullif(date, '')))) as date, + parseDateTime64BestEffortOrNull(trim(BOTH '"' from nullif(timestamp_col, ''))) as timestamp_col, + accurateCastOrNull("HKD@spéçiäl & characters", ' + Float64 +') as "HKD@spéçiäl & characters", + nullif(accurateCastOrNull(trim(BOTH '"' from HKD_special___characters), 'String'), 'null') as HKD_special___characters, + accurateCastOrNull(NZD, ' + Float64 +') as NZD, + accurateCastOrNull(USD, ' + Float64 +') as USD, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from _airbyte_test_normalization.dedup_exchange_rate_ab1 +-- dedup_exchange_rate +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..a793d7412e483 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,6 @@ + + insert into test_normalization.dedup_exchange_rate_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" + from dedup_exchange_rate_scd__dbt_tmp + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql new file mode 100644 index 0000000000000..8f84c4f3c1620 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -0,0 +1,6 @@ + + insert into test_normalization.renamed_dedup_cdc_excluded_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" + from renamed_dedup_cdc_excluded_scd__dbt_tmp + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..4a895d6cf480a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,6 @@ + + insert into test_normalization.dedup_exchange_rate ("_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + select "_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" + from dedup_exchange_rate__dbt_tmp + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql new file mode 100644 index 0000000000000..1b96d3f87152e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -0,0 +1,6 @@ + + insert into test_normalization.renamed_dedup_cdc_excluded ("_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") + select "_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" + from renamed_dedup_cdc_excluded__dbt_tmp + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..c2be71e63fc94 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,29 @@ + + + + insert into test_normalization.exchange_rate__dbt_tmp ("id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "HKD_special___characters", "NZD", "USD", "column___with__quotes", "datetime_tz", "datetime_no_tz", "time_tz", "time_no_tz", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_exchange_rate_hashid") + +-- Final base SQL model +-- depends_on: _airbyte_test_normalization.exchange_rate_ab3 +select + id, + currency, + date, + timestamp_col, + "HKD@spéçiäl & characters", + HKD_special___characters, + NZD, + USD, + "column___with__quotes", + datetime_tz, + datetime_no_tz, + time_tz, + time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from _airbyte_test_normalization.exchange_rate_ab3 +-- exchange_rate from test_normalization._airbyte_raw_exchange_rate +where 1 = 1 + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..9a932053975b7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,41 @@ + + + create view _airbyte_test_normalization.dedup_exchange_rate_stg__dbt_tmp + + as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: _airbyte_test_normalization.dedup_exchange_rate_ab2 +select + assumeNotNull(hex(MD5( + + toString(id) || '~' || + + + toString(currency) || '~' || + + + toString(date) || '~' || + + + toString(timestamp_col) || '~' || + + + toString("HKD@spéçiäl & characters") || '~' || + + + toString(HKD_special___characters) || '~' || + + + toString(NZD) || '~' || + + + toString(USD) + + ))) as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from _airbyte_test_normalization.dedup_exchange_rate_ab2 tmp +-- dedup_exchange_rate +where 1 = 1 + + ) \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/duckdb/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/duckdb/test_nested_streams/dbt_project.yml new file mode 100755 index 0000000000000..7631ef356dc92 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/duckdb/test_nested_streams/dbt_project.yml @@ -0,0 +1,63 @@ +# This file is necessary to install dbt-utils with dbt deps +# the content will be overwritten by the transform function + +# Name your package! Package names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: "airbyte_utils" +version: "1.0" +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. Profiles contain +# database connection information, and should be configured in the ~/.dbt/profiles.yml file +profile: "normalize" + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that source models can be found +# in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +docs-paths: ["docs"] +analysis-paths: ["analysis"] +test-paths: ["tests"] +seed-paths: ["data"] +macro-paths: ["macros"] + +target-path: "../build" # directory which will store compiled SQL files +log-path: "../logs" # directory which will store DBT logs +packages-install-path: "/dbt" # directory which will store external DBT dependencies + +clean-targets: # directories to be removed by `dbt clean` + - "build" + - "dbt_modules" + +quoting: + database: true + # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) + # all schemas should be unquoted + schema: false + identifier: true + +# You can define configurations for models in the `model-paths` directory here. +# Using these configurations, you can enable or disable models, change how they +# are materialized, and more! +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + +on_schema_change: sync_all_columns + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view + +dispatch: + - macro_namespace: dbt_utils + search_order: ["airbyte_utils", "dbt_utils"] diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/duckdb/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/duckdb/test_simple_streams/dbt_project.yml new file mode 100755 index 0000000000000..7631ef356dc92 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/duckdb/test_simple_streams/dbt_project.yml @@ -0,0 +1,63 @@ +# This file is necessary to install dbt-utils with dbt deps +# the content will be overwritten by the transform function + +# Name your package! Package names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: "airbyte_utils" +version: "1.0" +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. Profiles contain +# database connection information, and should be configured in the ~/.dbt/profiles.yml file +profile: "normalize" + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that source models can be found +# in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +docs-paths: ["docs"] +analysis-paths: ["analysis"] +test-paths: ["tests"] +seed-paths: ["data"] +macro-paths: ["macros"] + +target-path: "../build" # directory which will store compiled SQL files +log-path: "../logs" # directory which will store DBT logs +packages-install-path: "/dbt" # directory which will store external DBT dependencies + +clean-targets: # directories to be removed by `dbt clean` + - "build" + - "dbt_modules" + +quoting: + database: true + # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) + # all schemas should be unquoted + schema: false + identifier: true + +# You can define configurations for models in the `model-paths` directory here. +# Using these configurations, you can enable or disable models, change how they +# are materialized, and more! +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + +on_schema_change: sync_all_columns + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view + +dispatch: + - macro_namespace: dbt_utils + search_order: ["airbyte_utils", "dbt_utils"] diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/dbt_project.yml new file mode 100755 index 0000000000000..8ed082f367749 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/dbt_project.yml @@ -0,0 +1,61 @@ +# This file is necessary to install dbt-utils with dbt deps +# the content will be overwritten by the transform function + +# Name your package! Package names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: "airbyte_utils" +version: "1.0" +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. Profiles contain +# database connection information, and should be configured in the ~/.dbt/profiles.yml file +profile: "normalize" + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that source models can be found +# in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +docs-paths: ["docs"] +analysis-paths: ["analysis"] +test-paths: ["tests"] +seed-paths: ["data"] +macro-paths: ["macros"] + +target-path: "../build" # directory which will store compiled SQL files +log-path: "../logs" # directory which will store DBT logs +packages-install-path: "/dbt" # directory which will store external DBT dependencies + +clean-targets: # directories to be removed by `dbt clean` + - "build" + - "dbt_modules" + +quoting: + database: true + # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) + # all schemas should be unquoted + schema: false + identifier: true + +# You can define configurations for models in the `model-paths` directory here. +# Using these configurations, you can enable or disable models, change how they +# are materialized, and more! +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view + +vars: + dbt_utils_dispatch_list: ["airbyte_utils"] diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql new file mode 100644 index 0000000000000..3ea4e25cfc959 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql @@ -0,0 +1,127 @@ + + + + USE [test_normalization]; + if object_id ('test_normalization."nested_stream_with_co__lting_into_long_names_scd_temp_view"','V') is not null + begin + drop view test_normalization."nested_stream_with_co__lting_into_long_names_scd_temp_view" + end + + + + + USE [test_normalization]; + if object_id ('test_normalization."nested_stream_with_co__lting_into_long_names_scd"','U') is not null + begin + drop table test_normalization."nested_stream_with_co__lting_into_long_names_scd" + end + + + USE [test_normalization]; + EXEC('create view test_normalization."nested_stream_with_co__lting_into_long_names_scd_temp_view" as + +-- depends_on: ref(''nested_stream_with_co__lting_into_long_names_stg'') +with + +input_data as ( + select * + from "test_normalization"._airbyte_test_normalization."nested_stream_with_co__lting_into_long_names_stg" + -- nested_stream_with_co__lting_into_long_names from "test_normalization".test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + convert(varchar(32), HashBytes(''md5'', coalesce(cast( + + + + concat(concat(coalesce(cast(id as + NVARCHAR(max)), ''''),''''), '''') as + NVARCHAR(max)), '''')), 2) as _airbyte_unique_key, + id, + "date", + "partition", + "date" as _airbyte_start_at, + lag("date") over ( + partition by id + order by + "date" desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by id + order by + "date" desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_nested_strea__nto_long_names_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + convert(varchar(32), HashBytes(''md5'', coalesce(cast( + + + + concat(concat(coalesce(cast(_airbyte_unique_key as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(_airbyte_start_at as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(_airbyte_emitted_at as + NVARCHAR(max)), ''''),''''), '''') as + NVARCHAR(max)), '''')), 2) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + "date", + "partition", + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at, + _airbyte_nested_strea__nto_long_names_hashid +from dedup_data where _airbyte_row_num = 1 + '); + + SELECT * INTO "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names_scd" FROM + "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names_scd_temp_view" + + + + USE [test_normalization]; + if object_id ('test_normalization."nested_stream_with_co__lting_into_long_names_scd_temp_view"','V') is not null + begin + drop view test_normalization."nested_stream_with_co__lting_into_long_names_scd_temp_view" + end + + + use [test_normalization]; + if EXISTS ( + SELECT * FROM + sys.indexes WHERE name = 'test_normalization_nested_stream_with_co__lting_into_long_names_scd_cci' + AND object_id=object_id('test_normalization_nested_stream_with_co__lting_into_long_names_scd') + ) + DROP index test_normalization.nested_stream_with_co__lting_into_long_names_scd.test_normalization_nested_stream_with_co__lting_into_long_names_scd_cci + CREATE CLUSTERED COLUMNSTORE INDEX test_normalization_nested_stream_with_co__lting_into_long_names_scd_cci + ON test_normalization.nested_stream_with_co__lting_into_long_names_scd + + + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql new file mode 100644 index 0000000000000..7cfc356688fbe --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql @@ -0,0 +1,116 @@ + + + + USE [test_normalization]; + if object_id ('test_normalization."nested_stream_with_co___long_names_partition_temp_view"','V') is not null + begin + drop view test_normalization."nested_stream_with_co___long_names_partition_temp_view" + end + + + + + USE [test_normalization]; + if object_id ('test_normalization."nested_stream_with_co___long_names_partition"','U') is not null + begin + drop table test_normalization."nested_stream_with_co___long_names_partition" + end + + + USE [test_normalization]; + EXEC('create view test_normalization."nested_stream_with_co___long_names_partition_temp_view" as + +with __dbt__cte__nested_stream_with_co___long_names_partition_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names_scd" +select + _airbyte_nested_strea__nto_long_names_hashid, + json_query("partition", ''$."double_array_data"'') as double_array_data, + json_query("partition", ''$."DATA"'') as "DATA", + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at +from "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names_scd" as table_alias +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 +and "partition" is not null + +), __dbt__cte__nested_stream_with_co___long_names_partition_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__nested_stream_with_co___long_names_partition_ab1 +select + _airbyte_nested_strea__nto_long_names_hashid, + double_array_data, + "DATA", + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at +from __dbt__cte__nested_stream_with_co___long_names_partition_ab1 +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 + +), __dbt__cte__nested_stream_with_co___long_names_partition_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__nested_stream_with_co___long_names_partition_ab2 +select + convert(varchar(32), HashBytes(''md5'', coalesce(cast( + + + + concat(concat(coalesce(cast(_airbyte_nested_strea__nto_long_names_hashid as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(cast(double_array_data as + NVARCHAR(max)) as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(cast("DATA" as + NVARCHAR(max)) as + NVARCHAR(max)), ''''),''''), '''') as + NVARCHAR(max)), '''')), 2) as _airbyte_partition_hashid, + tmp.* +from __dbt__cte__nested_stream_with_co___long_names_partition_ab2 tmp +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 + +)-- Final base SQL model +-- depends_on: __dbt__cte__nested_stream_with_co___long_names_partition_ab3 +select + _airbyte_nested_strea__nto_long_names_hashid, + double_array_data, + "DATA", + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at, + _airbyte_partition_hashid +from __dbt__cte__nested_stream_with_co___long_names_partition_ab3 +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names_scd" +where 1 = 1 + + '); + + SELECT * INTO "test_normalization".test_normalization."nested_stream_with_co___long_names_partition" FROM + "test_normalization".test_normalization."nested_stream_with_co___long_names_partition_temp_view" + + + + USE [test_normalization]; + if object_id ('test_normalization."nested_stream_with_co___long_names_partition_temp_view"','V') is not null + begin + drop view test_normalization."nested_stream_with_co___long_names_partition_temp_view" + end + + + use [test_normalization]; + if EXISTS ( + SELECT * FROM + sys.indexes WHERE name = 'test_normalization_nested_stream_with_co___long_names_partition_cci' + AND object_id=object_id('test_normalization_nested_stream_with_co___long_names_partition') + ) + DROP index test_normalization.nested_stream_with_co___long_names_partition.test_normalization_nested_stream_with_co___long_names_partition_cci + CREATE CLUSTERED COLUMNSTORE INDEX test_normalization_nested_stream_with_co___long_names_partition_cci + ON test_normalization.nested_stream_with_co___long_names_partition + + + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql new file mode 100644 index 0000000000000..d5b645c4e07e5 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql @@ -0,0 +1,121 @@ + + + + USE [test_normalization]; + if object_id ('test_normalization."nested_stream_with_co___names_partition_data_temp_view"','V') is not null + begin + drop view test_normalization."nested_stream_with_co___names_partition_data_temp_view" + end + + + + + USE [test_normalization]; + if object_id ('test_normalization."nested_stream_with_co___names_partition_data"','U') is not null + begin + drop table test_normalization."nested_stream_with_co___names_partition_data" + end + + + USE [test_normalization]; + EXEC('create view test_normalization."nested_stream_with_co___names_partition_data_temp_view" as + +with __dbt__cte__nested_stream_with_co___names_partition_data_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "test_normalization".test_normalization."nested_stream_with_co___long_names_partition" + +select + _airbyte_partition_hashid, + json_value( + "DATA".value, ''$."currency"'') as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at +from "test_normalization".test_normalization."nested_stream_with_co___long_names_partition" as table_alias +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA + + CROSS APPLY ( + SELECT [value] = CASE + WHEN [type] = 4 THEN (SELECT [value] FROM OPENJSON([value])) + WHEN [type] = 5 THEN [value] + END + FROM OPENJSON("DATA") + ) AS "DATA" +where 1 = 1 +and "DATA" is not null + +), __dbt__cte__nested_stream_with_co___names_partition_data_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__nested_stream_with_co___names_partition_data_ab1 +select + _airbyte_partition_hashid, + cast(currency as + NVARCHAR(max)) as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at +from __dbt__cte__nested_stream_with_co___names_partition_data_ab1 +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +where 1 = 1 + +), __dbt__cte__nested_stream_with_co___names_partition_data_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__nested_stream_with_co___names_partition_data_ab2 +select + convert(varchar(32), HashBytes(''md5'', coalesce(cast( + + + + concat(concat(coalesce(cast(_airbyte_partition_hashid as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(currency as + NVARCHAR(max)), ''''),''''), '''') as + NVARCHAR(max)), '''')), 2) as _airbyte_data_hashid, + tmp.* +from __dbt__cte__nested_stream_with_co___names_partition_data_ab2 tmp +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +where 1 = 1 + +)-- Final base SQL model +-- depends_on: __dbt__cte__nested_stream_with_co___names_partition_data_ab3 +select + _airbyte_partition_hashid, + currency, + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at, + _airbyte_data_hashid +from __dbt__cte__nested_stream_with_co___names_partition_data_ab3 +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from "test_normalization".test_normalization."nested_stream_with_co___long_names_partition" +where 1 = 1 + + '); + + SELECT * INTO "test_normalization".test_normalization."nested_stream_with_co___names_partition_data" FROM + "test_normalization".test_normalization."nested_stream_with_co___names_partition_data_temp_view" + + + + USE [test_normalization]; + if object_id ('test_normalization."nested_stream_with_co___names_partition_data_temp_view"','V') is not null + begin + drop view test_normalization."nested_stream_with_co___names_partition_data_temp_view" + end + + + use [test_normalization]; + if EXISTS ( + SELECT * FROM + sys.indexes WHERE name = 'test_normalization_nested_stream_with_co___names_partition_data_cci' + AND object_id=object_id('test_normalization_nested_stream_with_co___names_partition_data') + ) + DROP index test_normalization.nested_stream_with_co___names_partition_data.test_normalization_nested_stream_with_co___names_partition_data_cci + CREATE CLUSTERED COLUMNSTORE INDEX test_normalization_nested_stream_with_co___names_partition_data_cci + ON test_normalization.nested_stream_with_co___names_partition_data + + + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql new file mode 100644 index 0000000000000..6cb8120f52ca2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql @@ -0,0 +1,121 @@ + + + + USE [test_normalization]; + if object_id ('test_normalization."nested_stream_with_co__ion_double_array_data_temp_view"','V') is not null + begin + drop view test_normalization."nested_stream_with_co__ion_double_array_data_temp_view" + end + + + + + USE [test_normalization]; + if object_id ('test_normalization."nested_stream_with_co__ion_double_array_data"','U') is not null + begin + drop table test_normalization."nested_stream_with_co__ion_double_array_data" + end + + + USE [test_normalization]; + EXEC('create view test_normalization."nested_stream_with_co__ion_double_array_data_temp_view" as + +with __dbt__cte__nested_stream_with_co__ion_double_array_data_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "test_normalization".test_normalization."nested_stream_with_co___long_names_partition" + +select + _airbyte_partition_hashid, + json_value( + double_array_data.value, ''$."id"'') as id, + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at +from "test_normalization".test_normalization."nested_stream_with_co___long_names_partition" as table_alias +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data + + CROSS APPLY ( + SELECT [value] = CASE + WHEN [type] = 4 THEN (SELECT [value] FROM OPENJSON([value])) + WHEN [type] = 5 THEN [value] + END + FROM OPENJSON(double_array_data) + ) AS double_array_data +where 1 = 1 +and double_array_data is not null + +), __dbt__cte__nested_stream_with_co__ion_double_array_data_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__nested_stream_with_co__ion_double_array_data_ab1 +select + _airbyte_partition_hashid, + cast(id as + NVARCHAR(max)) as id, + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at +from __dbt__cte__nested_stream_with_co__ion_double_array_data_ab1 +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +where 1 = 1 + +), __dbt__cte__nested_stream_with_co__ion_double_array_data_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__nested_stream_with_co__ion_double_array_data_ab2 +select + convert(varchar(32), HashBytes(''md5'', coalesce(cast( + + + + concat(concat(coalesce(cast(_airbyte_partition_hashid as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(id as + NVARCHAR(max)), ''''),''''), '''') as + NVARCHAR(max)), '''')), 2) as _airbyte_double_array_data_hashid, + tmp.* +from __dbt__cte__nested_stream_with_co__ion_double_array_data_ab2 tmp +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +where 1 = 1 + +)-- Final base SQL model +-- depends_on: __dbt__cte__nested_stream_with_co__ion_double_array_data_ab3 +select + _airbyte_partition_hashid, + id, + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at, + _airbyte_double_array_data_hashid +from __dbt__cte__nested_stream_with_co__ion_double_array_data_ab3 +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from "test_normalization".test_normalization."nested_stream_with_co___long_names_partition" +where 1 = 1 + + '); + + SELECT * INTO "test_normalization".test_normalization."nested_stream_with_co__ion_double_array_data" FROM + "test_normalization".test_normalization."nested_stream_with_co__ion_double_array_data_temp_view" + + + + USE [test_normalization]; + if object_id ('test_normalization."nested_stream_with_co__ion_double_array_data_temp_view"','V') is not null + begin + drop view test_normalization."nested_stream_with_co__ion_double_array_data_temp_view" + end + + + use [test_normalization]; + if EXISTS ( + SELECT * FROM + sys.indexes WHERE name = 'test_normalization_nested_stream_with_co__ion_double_array_data_cci' + AND object_id=object_id('test_normalization_nested_stream_with_co__ion_double_array_data') + ) + DROP index test_normalization.nested_stream_with_co__ion_double_array_data.test_normalization_nested_stream_with_co__ion_double_array_data_cci + CREATE CLUSTERED COLUMNSTORE INDEX test_normalization_nested_stream_with_co__ion_double_array_data_cci + ON test_normalization.nested_stream_with_co__ion_double_array_data + + + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql new file mode 100644 index 0000000000000..492b941921216 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql @@ -0,0 +1,66 @@ + + + + USE [test_normalization]; + if object_id ('test_normalization."nested_stream_with_co__lting_into_long_names_temp_view"','V') is not null + begin + drop view test_normalization."nested_stream_with_co__lting_into_long_names_temp_view" + end + + + + + USE [test_normalization]; + if object_id ('test_normalization."nested_stream_with_co__lting_into_long_names"','U') is not null + begin + drop table test_normalization."nested_stream_with_co__lting_into_long_names" + end + + + USE [test_normalization]; + EXEC('create view test_normalization."nested_stream_with_co__lting_into_long_names_temp_view" as + +-- Final base SQL model +-- depends_on: "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names_scd" +select + _airbyte_unique_key, + id, + "date", + "partition", + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at, + _airbyte_nested_strea__nto_long_names_hashid +from "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names_scd" +-- nested_stream_with_co__lting_into_long_names from "test_normalization".test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names +where 1 = 1 +and _airbyte_active_row = 1 + + '); + + SELECT * INTO "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names" FROM + "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names_temp_view" + + + + USE [test_normalization]; + if object_id ('test_normalization."nested_stream_with_co__lting_into_long_names_temp_view"','V') is not null + begin + drop view test_normalization."nested_stream_with_co__lting_into_long_names_temp_view" + end + + + use [test_normalization]; + if EXISTS ( + SELECT * FROM + sys.indexes WHERE name = 'test_normalization_nested_stream_with_co__lting_into_long_names_cci' + AND object_id=object_id('test_normalization_nested_stream_with_co__lting_into_long_names') + ) + DROP index test_normalization.nested_stream_with_co__lting_into_long_names.test_normalization_nested_stream_with_co__lting_into_long_names_cci + CREATE CLUSTERED COLUMNSTORE INDEX test_normalization_nested_stream_with_co__lting_into_long_names_cci + ON test_normalization.nested_stream_with_co__lting_into_long_names + + + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co___long_names_partition_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co___long_names_partition_ab1.sql new file mode 100644 index 0000000000000..35ebff8092c70 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co___long_names_partition_ab1.sql @@ -0,0 +1,19 @@ +{{ config( + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('nested_stream_with_co__lting_into_long_names_scd') }} +select + _airbyte_nested_strea__nto_long_names_hashid, + {{ json_extract_array(adapter.quote('partition'), ['double_array_data'], ['double_array_data']) }} as double_array_data, + {{ json_extract_array(adapter.quote('partition'), ['DATA'], ['DATA']) }} as {{ adapter.quote('DATA') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_co__lting_into_long_names_scd') }} as table_alias +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 +and {{ adapter.quote('partition') }} is not null +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co___names_partition_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co___names_partition_data_ab1.sql new file mode 100644 index 0000000000000..cdf1151ee10d7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co___names_partition_data_ab1.sql @@ -0,0 +1,20 @@ +{{ config( + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('nested_stream_with_co___long_names_partition') }} +{{ unnest_cte(ref('nested_stream_with_co___long_names_partition'), 'partition', adapter.quote('DATA')) }} +select + _airbyte_partition_hashid, + {{ json_extract_scalar(unnested_column_value(adapter.quote('DATA')), ['currency'], ['currency']) }} as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_co___long_names_partition') }} as table_alias +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +{{ cross_join_unnest('partition', adapter.quote('DATA')) }} +where 1 = 1 +and {{ adapter.quote('DATA') }} is not null +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co__ion_double_array_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co__ion_double_array_data_ab1.sql new file mode 100644 index 0000000000000..a8ca4bbb7d40f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co__ion_double_array_data_ab1.sql @@ -0,0 +1,20 @@ +{{ config( + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('nested_stream_with_co___long_names_partition') }} +{{ unnest_cte(ref('nested_stream_with_co___long_names_partition'), 'partition', 'double_array_data') }} +select + _airbyte_partition_hashid, + {{ json_extract_scalar(unnested_column_value('double_array_data'), ['id'], ['id']) }} as id, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_co___long_names_partition') }} as table_alias +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +{{ cross_join_unnest('partition', 'double_array_data') }} +where 1 = 1 +and double_array_data is not null +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co__lting_into_long_names_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co__lting_into_long_names_ab1.sql new file mode 100644 index 0000000000000..3274f1fabcc1a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co__lting_into_long_names_ab1.sql @@ -0,0 +1,19 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, + {{ json_extract('table_alias', '_airbyte_data', ['partition'], ['partition']) }} as {{ adapter.quote('partition') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} as table_alias +-- nested_stream_with_co__lting_into_long_names +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co__lting_into_long_names_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co__lting_into_long_names_ab2.sql new file mode 100644 index 0000000000000..b810108779e79 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co__lting_into_long_names_ab2.sql @@ -0,0 +1,19 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('nested_stream_with_co__lting_into_long_names_ab1') }} +select + cast(id as {{ dbt_utils.type_string() }}) as id, + cast({{ adapter.quote('date') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('date') }}, + cast({{ adapter.quote('partition') }} as {{ type_json() }}) as {{ adapter.quote('partition') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_co__lting_into_long_names_ab1') }} +-- nested_stream_with_co__lting_into_long_names +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql new file mode 100644 index 0000000000000..b4683a3ea301c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql @@ -0,0 +1,160 @@ +{{ config( + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='nested_stream_with_co__lting_into_long_names' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view _airbyte_test_normalization.nested_stream_with_co__lting_into_long_names_stg"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('nested_stream_with_co__lting_into_long_names_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('nested_stream_with_co__lting_into_long_names_stg') }} + -- nested_stream_with_co__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('nested_stream_with_co__lting_into_long_names_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('nested_stream_with_co__lting_into_long_names_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('nested_stream_with_co__lting_into_long_names_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('nested_stream_with_co__lting_into_long_names_stg') }} + -- nested_stream_with_co__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key, + id, + {{ adapter.quote('date') }}, + {{ adapter.quote('partition') }}, + {{ adapter.quote('date') }} as _airbyte_start_at, + lag({{ adapter.quote('date') }}) over ( + partition by id + order by + {{ adapter.quote('date') }} desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by id + order by + {{ adapter.quote('date') }} desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_nested_strea__nto_long_names_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + {{ adapter.quote('date') }}, + {{ adapter.quote('partition') }}, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_nested_strea__nto_long_names_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql new file mode 100644 index 0000000000000..33830638af517 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql @@ -0,0 +1,19 @@ +{{ config( + schema = "test_normalization", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('nested_stream_with_co___long_names_partition_ab3') }} +select + _airbyte_nested_strea__nto_long_names_hashid, + double_array_data, + {{ adapter.quote('DATA') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_partition_hashid +from {{ ref('nested_stream_with_co___long_names_partition_ab3') }} +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from {{ ref('nested_stream_with_co__lting_into_long_names_scd') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql new file mode 100644 index 0000000000000..3a0dedfa076e5 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql @@ -0,0 +1,18 @@ +{{ config( + schema = "test_normalization", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('nested_stream_with_co___names_partition_data_ab3') }} +select + _airbyte_partition_hashid, + currency, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_data_hashid +from {{ ref('nested_stream_with_co___names_partition_data_ab3') }} +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from {{ ref('nested_stream_with_co___long_names_partition') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql new file mode 100644 index 0000000000000..74323fef10e6e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql @@ -0,0 +1,18 @@ +{{ config( + schema = "test_normalization", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('nested_stream_with_co__ion_double_array_data_ab3') }} +select + _airbyte_partition_hashid, + id, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_double_array_data_hashid +from {{ ref('nested_stream_with_co__ion_double_array_data_ab3') }} +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from {{ ref('nested_stream_with_co___long_names_partition') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql new file mode 100644 index 0000000000000..aa2caf12165ce --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql @@ -0,0 +1,22 @@ +{{ config( + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('nested_stream_with_co__lting_into_long_names_scd') }} +select + _airbyte_unique_key, + id, + {{ adapter.quote('date') }}, + {{ adapter.quote('partition') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_nested_strea__nto_long_names_hashid +from {{ ref('nested_stream_with_co__lting_into_long_names_scd') }} +-- nested_stream_with_co__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/sources.yml new file mode 100644 index 0000000000000..92fa4c9a2580e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/sources.yml @@ -0,0 +1,22 @@ +version: 2 +sources: +- name: test_normalization + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_conflict_stream_array + - name: _airbyte_raw_conflict_stream_name + - name: _airbyte_raw_conflict_stream_scalar + - name: _airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + - name: _airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + - name: _airbyte_raw_some_stream_that_was_empty + - name: _airbyte_raw_unnest_alias +- name: test_normalization_namespace + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_simple_stream_with_namespace_resulting_into_long_names diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql new file mode 100644 index 0000000000000..22684ecf70c29 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql @@ -0,0 +1,17 @@ + + + + delete from "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "test_normalization".test_normalization."#nested_stream_with_co__lting_into_long_names_scd__dbt_tmp" + ); + + + insert into "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "partition", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_strea__nto_long_names_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "partition", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_strea__nto_long_names_hashid" + from "test_normalization".test_normalization."#nested_stream_with_co__lting_into_long_names_scd__dbt_tmp" + ); + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql new file mode 100644 index 0000000000000..8eacd04b88489 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql @@ -0,0 +1,11 @@ + + + + + insert into "test_normalization".test_normalization."nested_stream_with_co___long_names_partition" ("_airbyte_nested_strea__nto_long_names_hashid", "double_array_data", "DATA", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_partition_hashid") + ( + select "_airbyte_nested_strea__nto_long_names_hashid", "double_array_data", "DATA", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_partition_hashid" + from "test_normalization".test_normalization."#nested_stream_with_co___long_names_partition__dbt_tmp" + ); + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql new file mode 100644 index 0000000000000..3d7b97c0c96bf --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql @@ -0,0 +1,11 @@ + + + + + insert into "test_normalization".test_normalization."nested_stream_with_co___names_partition_data" ("_airbyte_partition_hashid", "currency", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_data_hashid") + ( + select "_airbyte_partition_hashid", "currency", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_data_hashid" + from "test_normalization".test_normalization."#nested_stream_with_co___names_partition_data__dbt_tmp" + ); + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql new file mode 100644 index 0000000000000..d3c525c77c34d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql @@ -0,0 +1,11 @@ + + + + + insert into "test_normalization".test_normalization."nested_stream_with_co__ion_double_array_data" ("_airbyte_partition_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_double_array_data_hashid") + ( + select "_airbyte_partition_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_double_array_data_hashid" + from "test_normalization".test_normalization."#nested_stream_with_co__ion_double_array_data__dbt_tmp" + ); + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql new file mode 100644 index 0000000000000..befc1e8d025db --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql @@ -0,0 +1,17 @@ + + + + delete from "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "test_normalization".test_normalization."#nested_stream_with_co__lting_into_long_names__dbt_tmp" + ); + + + insert into "test_normalization".test_normalization."nested_stream_with_co__lting_into_long_names" ("_airbyte_unique_key", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_strea__nto_long_names_hashid") + ( + select "_airbyte_unique_key", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_strea__nto_long_names_hashid" + from "test_normalization".test_normalization."#nested_stream_with_co__lting_into_long_names__dbt_tmp" + ); + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/dbt_project.yml new file mode 100755 index 0000000000000..8ed082f367749 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/dbt_project.yml @@ -0,0 +1,61 @@ +# This file is necessary to install dbt-utils with dbt deps +# the content will be overwritten by the transform function + +# Name your package! Package names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: "airbyte_utils" +version: "1.0" +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. Profiles contain +# database connection information, and should be configured in the ~/.dbt/profiles.yml file +profile: "normalize" + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that source models can be found +# in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +docs-paths: ["docs"] +analysis-paths: ["analysis"] +test-paths: ["tests"] +seed-paths: ["data"] +macro-paths: ["macros"] + +target-path: "../build" # directory which will store compiled SQL files +log-path: "../logs" # directory which will store DBT logs +packages-install-path: "/dbt" # directory which will store external DBT dependencies + +clean-targets: # directories to be removed by `dbt clean` + - "build" + - "dbt_modules" + +quoting: + database: true + # Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785) + # all schemas should be unquoted + schema: false + identifier: true + +# You can define configurations for models in the `model-paths` directory here. +# Using these configurations, you can enable or disable models, change how they +# are materialized, and more! +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view + +vars: + dbt_utils_dispatch_list: ["airbyte_utils"] diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..36e1fe2b4afb4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,141 @@ + + + + USE [test_normalization]; + if object_id ('test_normalization."dedup_exchange_rate_scd_temp_view"','V') is not null + begin + drop view test_normalization."dedup_exchange_rate_scd_temp_view" + end + + + + + USE [test_normalization]; + if object_id ('test_normalization."dedup_exchange_rate_scd"','U') is not null + begin + drop table test_normalization."dedup_exchange_rate_scd" + end + + + USE [test_normalization]; + EXEC('create view test_normalization."dedup_exchange_rate_scd_temp_view" as + +-- depends_on: ref(''dedup_exchange_rate_stg'') +with + +input_data as ( + select * + from "test_normalization"._airbyte_test_normalization."dedup_exchange_rate_stg" + -- dedup_exchange_rate from "test_normalization".test_normalization._airbyte_raw_dedup_exchange_rate +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + convert(varchar(32), HashBytes(''md5'', coalesce(cast( + + + + concat(concat(coalesce(cast(id as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(currency as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(nzd as + NVARCHAR(max)), ''''),''''), '''') as + NVARCHAR(max)), '''')), 2) as _airbyte_unique_key, + id, + currency, + "date", + timestamp_col, + "HKD@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, + "date" as _airbyte_start_at, + lag("date") over ( + partition by id, currency, cast(nzd as + NVARCHAR(max)) + order by + "date" desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by id, currency, cast(nzd as + NVARCHAR(max)) + order by + "date" desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_exchange_rate_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + convert(varchar(32), HashBytes(''md5'', coalesce(cast( + + + + concat(concat(coalesce(cast(_airbyte_unique_key as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(_airbyte_start_at as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(_airbyte_emitted_at as + NVARCHAR(max)), ''''),''''), '''') as + NVARCHAR(max)), '''')), 2) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + currency, + "date", + timestamp_col, + "HKD@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from dedup_data where _airbyte_row_num = 1 + '); + + SELECT * INTO "test_normalization".test_normalization."dedup_exchange_rate_scd" FROM + "test_normalization".test_normalization."dedup_exchange_rate_scd_temp_view" + + + + USE [test_normalization]; + if object_id ('test_normalization."dedup_exchange_rate_scd_temp_view"','V') is not null + begin + drop view test_normalization."dedup_exchange_rate_scd_temp_view" + end + + + use [test_normalization]; + if EXISTS ( + SELECT * FROM + sys.indexes WHERE name = 'test_normalization_dedup_exchange_rate_scd_cci' + AND object_id=object_id('test_normalization_dedup_exchange_rate_scd') + ) + DROP index test_normalization.dedup_exchange_rate_scd.test_normalization_dedup_exchange_rate_scd_cci + CREATE CLUSTERED COLUMNSTORE INDEX test_normalization_dedup_exchange_rate_scd_cci + ON test_normalization.dedup_exchange_rate_scd + + + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..b1600851cf4bb --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,71 @@ + + + + USE [test_normalization]; + if object_id ('test_normalization."dedup_exchange_rate_temp_view"','V') is not null + begin + drop view test_normalization."dedup_exchange_rate_temp_view" + end + + + + + USE [test_normalization]; + if object_id ('test_normalization."dedup_exchange_rate"','U') is not null + begin + drop table test_normalization."dedup_exchange_rate" + end + + + USE [test_normalization]; + EXEC('create view test_normalization."dedup_exchange_rate_temp_view" as + +-- Final base SQL model +-- depends_on: "test_normalization".test_normalization."dedup_exchange_rate_scd" +select + _airbyte_unique_key, + id, + currency, + "date", + timestamp_col, + "HKD@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from "test_normalization".test_normalization."dedup_exchange_rate_scd" +-- dedup_exchange_rate from "test_normalization".test_normalization._airbyte_raw_dedup_exchange_rate +where 1 = 1 +and _airbyte_active_row = 1 + + '); + + SELECT * INTO "test_normalization".test_normalization."dedup_exchange_rate" FROM + "test_normalization".test_normalization."dedup_exchange_rate_temp_view" + + + + USE [test_normalization]; + if object_id ('test_normalization."dedup_exchange_rate_temp_view"','V') is not null + begin + drop view test_normalization."dedup_exchange_rate_temp_view" + end + + + use [test_normalization]; + if EXISTS ( + SELECT * FROM + sys.indexes WHERE name = 'test_normalization_dedup_exchange_rate_cci' + AND object_id=object_id('test_normalization_dedup_exchange_rate') + ) + DROP index test_normalization.dedup_exchange_rate.test_normalization_dedup_exchange_rate_cci + CREATE CLUSTERED COLUMNSTORE INDEX test_normalization_dedup_exchange_rate_cci + ON test_normalization.dedup_exchange_rate + + + + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..830e76c6f0ef8 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,159 @@ + + + USE [test_normalization]; + if object_id ('test_normalization."exchange_rate__dbt_tmp_temp_view"','V') is not null + begin + drop view test_normalization."exchange_rate__dbt_tmp_temp_view" + end + + + + + USE [test_normalization]; + if object_id ('test_normalization."exchange_rate__dbt_tmp"','U') is not null + begin + drop table test_normalization."exchange_rate__dbt_tmp" + end + + + USE [test_normalization]; + EXEC('create view test_normalization."exchange_rate__dbt_tmp_temp_view" as + +with __dbt__cte__exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "test_normalization".test_normalization._airbyte_raw_exchange_rate +select + json_value(_airbyte_data, ''$."id"'') as id, + json_value(_airbyte_data, ''$."currency"'') as currency, + json_value(_airbyte_data, ''$."date"'') as "date", + json_value(_airbyte_data, ''$."timestamp_col"'') as timestamp_col, + json_value(_airbyte_data, ''$."HKD@spéçiäl & characters"'') as "HKD@spéçiäl & characters", + json_value(_airbyte_data, ''$."HKD_special___characters"'') as hkd_special___characters, + json_value(_airbyte_data, ''$."NZD"'') as nzd, + json_value(_airbyte_data, ''$."USD"'') as usd, + json_value(_airbyte_data, ''$."column`_''''with\"_quotes"'') as "column`_''with""_quotes", + json_value(_airbyte_data, ''$."datetime_tz"'') as datetime_tz, + json_value(_airbyte_data, ''$."datetime_no_tz"'') as datetime_no_tz, + json_value(_airbyte_data, ''$."time_tz"'') as time_tz, + json_value(_airbyte_data, ''$."time_no_tz"'') as time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at +from "test_normalization".test_normalization._airbyte_raw_exchange_rate as table_alias +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__exchange_rate_ab1 +select + cast(id as + bigint +) as id, + cast(currency as + NVARCHAR(max)) as currency, + try_parse(nullif("date", '''') as date) as "date", + try_parse(nullif(timestamp_col, '''') as datetimeoffset) as timestamp_col, + cast("HKD@spéçiäl & characters" as + float +) as "HKD@spéçiäl & characters", + cast(hkd_special___characters as + NVARCHAR(max)) as hkd_special___characters, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + cast("column`_''with""_quotes" as + NVARCHAR(max)) as "column`_''with""_quotes", + try_parse(nullif(datetime_tz, '''') as datetimeoffset) as datetime_tz, + try_parse(nullif(datetime_no_tz, '''') as datetime2) as datetime_no_tz, + cast(nullif(time_tz, '''') as NVARCHAR(max)) as time_tz, + cast(nullif(time_no_tz, '''') as + time +) as time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at +from __dbt__cte__exchange_rate_ab1 +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__exchange_rate_ab2 +select + convert(varchar(32), HashBytes(''md5'', coalesce(cast( + + + + concat(concat(coalesce(cast(id as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(currency as + NVARCHAR(max)), ''''), ''-'', coalesce(cast("date" as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(timestamp_col as + NVARCHAR(max)), ''''), ''-'', coalesce(cast("HKD@spéçiäl & characters" as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(hkd_special___characters as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(nzd as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(usd as + NVARCHAR(max)), ''''), ''-'', coalesce(cast("column`_''with""_quotes" as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(datetime_tz as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(datetime_no_tz as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(time_tz as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(time_no_tz as + NVARCHAR(max)), ''''),''''), '''') as + NVARCHAR(max)), '''')), 2) as _airbyte_exchange_rate_hashid, + tmp.* +from __dbt__cte__exchange_rate_ab2 tmp +-- exchange_rate +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__exchange_rate_ab3 +select + id, + currency, + "date", + timestamp_col, + "HKD@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, + "column`_''with""_quotes", + datetime_tz, + datetime_no_tz, + time_tz, + time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from __dbt__cte__exchange_rate_ab3 +-- exchange_rate from "test_normalization".test_normalization._airbyte_raw_exchange_rate +where 1 = 1 + '); + + SELECT * INTO "test_normalization".test_normalization."exchange_rate__dbt_tmp" FROM + "test_normalization".test_normalization."exchange_rate__dbt_tmp_temp_view" + + + + USE [test_normalization]; + if object_id ('test_normalization."exchange_rate__dbt_tmp_temp_view"','V') is not null + begin + drop view test_normalization."exchange_rate__dbt_tmp_temp_view" + end + + + use [test_normalization]; + if EXISTS ( + SELECT * FROM + sys.indexes WHERE name = 'test_normalization_exchange_rate__dbt_tmp_cci' + AND object_id=object_id('test_normalization_exchange_rate__dbt_tmp') + ) + DROP index test_normalization.exchange_rate__dbt_tmp.test_normalization_exchange_rate__dbt_tmp_cci + CREATE CLUSTERED COLUMNSTORE INDEX test_normalization_exchange_rate__dbt_tmp_cci + ON test_normalization.exchange_rate__dbt_tmp + + + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..ed018a2680b4c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,77 @@ +USE [test_normalization]; + execute('create view _airbyte_test_normalization."dedup_exchange_rate_stg__dbt_tmp" as + +with __dbt__cte__dedup_exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "test_normalization".test_normalization._airbyte_raw_dedup_exchange_rate +select + json_value(_airbyte_data, ''$."id"'') as id, + json_value(_airbyte_data, ''$."currency"'') as currency, + json_value(_airbyte_data, ''$."date"'') as "date", + json_value(_airbyte_data, ''$."timestamp_col"'') as timestamp_col, + json_value(_airbyte_data, ''$."HKD@spéçiäl & characters"'') as "HKD@spéçiäl & characters", + json_value(_airbyte_data, ''$."HKD_special___characters"'') as hkd_special___characters, + json_value(_airbyte_data, ''$."NZD"'') as nzd, + json_value(_airbyte_data, ''$."USD"'') as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at +from "test_normalization".test_normalization._airbyte_raw_dedup_exchange_rate as table_alias +-- dedup_exchange_rate +where 1 = 1 + +), __dbt__cte__dedup_exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__dedup_exchange_rate_ab1 +select + cast(id as + bigint +) as id, + cast(currency as + NVARCHAR(max)) as currency, + try_parse(nullif("date", '''') as date) as "date", + try_parse(nullif(timestamp_col, '''') as datetimeoffset) as timestamp_col, + cast("HKD@spéçiäl & characters" as + float +) as "HKD@spéçiäl & characters", + cast(hkd_special___characters as + NVARCHAR(max)) as hkd_special___characters, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at +from __dbt__cte__dedup_exchange_rate_ab1 +-- dedup_exchange_rate +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__dedup_exchange_rate_ab2 +select + convert(varchar(32), HashBytes(''md5'', coalesce(cast( + + + + concat(concat(coalesce(cast(id as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(currency as + NVARCHAR(max)), ''''), ''-'', coalesce(cast("date" as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(timestamp_col as + NVARCHAR(max)), ''''), ''-'', coalesce(cast("HKD@spéçiäl & characters" as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(hkd_special___characters as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(nzd as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(usd as + NVARCHAR(max)), ''''),''''), '''') as + NVARCHAR(max)), '''')), 2) as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from __dbt__cte__dedup_exchange_rate_ab2 tmp +-- dedup_exchange_rate +where 1 = 1 + + '); + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql new file mode 100644 index 0000000000000..b15582c5ec555 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql @@ -0,0 +1,76 @@ +USE [test_normalization]; + execute('create view _airbyte_test_normalization."multiple_column_names_conflicts_stg__dbt_tmp" as + +with __dbt__cte__multiple_column_names_conflicts_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "test_normalization".test_normalization._airbyte_raw_multiple_column_names_conflicts +select + json_value(_airbyte_data, ''$."id"'') as id, + json_value(_airbyte_data, ''$."User Id"'') as "User Id", + json_value(_airbyte_data, ''$."user_id"'') as user_id, + json_value(_airbyte_data, ''$."User id"'') as "User id_1", + json_value(_airbyte_data, ''$."user id"'') as "user id_2", + json_value(_airbyte_data, ''$."User@Id"'') as "User@Id", + json_value(_airbyte_data, ''$."UserId"'') as userid, + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at +from "test_normalization".test_normalization._airbyte_raw_multiple_column_names_conflicts as table_alias +-- multiple_column_names_conflicts +where 1 = 1 + +), __dbt__cte__multiple_column_names_conflicts_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab1 +select + cast(id as + bigint +) as id, + cast("User Id" as + NVARCHAR(max)) as "User Id", + cast(user_id as + float +) as user_id, + cast("User id_1" as + float +) as "User id_1", + cast("user id_2" as + float +) as "user id_2", + cast("User@Id" as + NVARCHAR(max)) as "User@Id", + cast(userid as + float +) as userid, + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at +from __dbt__cte__multiple_column_names_conflicts_ab1 +-- multiple_column_names_conflicts +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab2 +select + convert(varchar(32), HashBytes(''md5'', coalesce(cast( + + + + concat(concat(coalesce(cast(id as + NVARCHAR(max)), ''''), ''-'', coalesce(cast("User Id" as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(user_id as + NVARCHAR(max)), ''''), ''-'', coalesce(cast("User id_1" as + NVARCHAR(max)), ''''), ''-'', coalesce(cast("user id_2" as + NVARCHAR(max)), ''''), ''-'', coalesce(cast("User@Id" as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(userid as + NVARCHAR(max)), ''''),''''), '''') as + NVARCHAR(max)), '''')), 2) as _airbyte_multiple_col__ames_conflicts_hashid, + tmp.* +from __dbt__cte__multiple_column_names_conflicts_ab2 tmp +-- multiple_column_names_conflicts +where 1 = 1 + + '); + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql new file mode 100644 index 0000000000000..670db0869ae22 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -0,0 +1,24 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, + {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, + {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ adapter.quote('HKD@spéçiäl & characters') }}, + {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as hkd_special___characters, + {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as nzd, + {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql new file mode 100644 index 0000000000000..556ece9aaaeaf --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -0,0 +1,24 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('dedup_exchange_rate_ab1') }} +select + cast(id as {{ dbt_utils.type_bigint() }}) as id, + cast(currency as {{ dbt_utils.type_string() }}) as currency, + try_parse({{ empty_string_to_null(adapter.quote('date')) }} as {{ type_date() }}) as {{ adapter.quote('date') }}, + try_parse({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, + cast({{ adapter.quote('HKD@spéçiäl & characters') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('HKD@spéçiäl & characters') }}, + cast(hkd_special___characters as {{ dbt_utils.type_string() }}) as hkd_special___characters, + cast(nzd as {{ dbt_utils.type_float() }}) as nzd, + cast(usd as {{ dbt_utils.type_float() }}) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('dedup_exchange_rate_ab1') }} +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..8d96481142613 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,174 @@ +{{ config( + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('dedup_exchange_rate_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key, + id, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, + {{ adapter.quote('date') }} as _airbyte_start_at, + lag({{ adapter.quote('date') }}) over ( + partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) + order by + {{ adapter.quote('date') }} desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) + order by + {{ adapter.quote('date') }} desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_exchange_rate_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..dd4432bd60a5e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,27 @@ +{{ config( + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('dedup_exchange_rate_scd') }} +select + _airbyte_unique_key, + id, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from {{ ref('dedup_exchange_rate_scd') }} +-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..8a74de4c15332 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,29 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('exchange_rate_ab3') }} +select + id, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, + {{ adapter.quote('column`_\'with""_quotes') }}, + datetime_tz, + datetime_no_tz, + time_tz, + time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from {{ ref('exchange_rate_ab3') }} +-- exchange_rate from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..86ec2c9e8b1b7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,24 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('dedup_exchange_rate_ab2') }} +select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + adapter.quote('date'), + 'timestamp_col', + adapter.quote('HKD@spéçiäl & characters'), + 'hkd_special___characters', + 'nzd', + 'usd', + ]) }} as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from {{ ref('dedup_exchange_rate_ab2') }} tmp +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/sources.yml new file mode 100644 index 0000000000000..97bf0d05cbd40 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/sources.yml @@ -0,0 +1,15 @@ +version: 2 +sources: +- name: test_normalization + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_1_prefix_startwith_number + - name: _airbyte_raw_dedup_cdc_excluded + - name: _airbyte_raw_dedup_exchange_rate + - name: _airbyte_raw_exchange_rate + - name: _airbyte_raw_multiple_column_names_conflicts + - name: _airbyte_raw_pos_dedup_cdcx + - name: _airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..1b22ba3f4a729 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,17 @@ + + + + delete from "test_normalization".test_normalization."dedup_exchange_rate_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "test_normalization".test_normalization."#dedup_exchange_rate_scd__dbt_tmp" + ); + + + insert into "test_normalization".test_normalization."dedup_exchange_rate_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" + from "test_normalization".test_normalization."#dedup_exchange_rate_scd__dbt_tmp" + ); + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..1315385a34459 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,17 @@ + + + + delete from "test_normalization".test_normalization."dedup_exchange_rate" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "test_normalization".test_normalization."#dedup_exchange_rate__dbt_tmp" + ); + + + insert into "test_normalization".test_normalization."dedup_exchange_rate" ("_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + ( + select "_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" + from "test_normalization".test_normalization."#dedup_exchange_rate__dbt_tmp" + ); + + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..830e76c6f0ef8 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,159 @@ + + + USE [test_normalization]; + if object_id ('test_normalization."exchange_rate__dbt_tmp_temp_view"','V') is not null + begin + drop view test_normalization."exchange_rate__dbt_tmp_temp_view" + end + + + + + USE [test_normalization]; + if object_id ('test_normalization."exchange_rate__dbt_tmp"','U') is not null + begin + drop table test_normalization."exchange_rate__dbt_tmp" + end + + + USE [test_normalization]; + EXEC('create view test_normalization."exchange_rate__dbt_tmp_temp_view" as + +with __dbt__cte__exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "test_normalization".test_normalization._airbyte_raw_exchange_rate +select + json_value(_airbyte_data, ''$."id"'') as id, + json_value(_airbyte_data, ''$."currency"'') as currency, + json_value(_airbyte_data, ''$."date"'') as "date", + json_value(_airbyte_data, ''$."timestamp_col"'') as timestamp_col, + json_value(_airbyte_data, ''$."HKD@spéçiäl & characters"'') as "HKD@spéçiäl & characters", + json_value(_airbyte_data, ''$."HKD_special___characters"'') as hkd_special___characters, + json_value(_airbyte_data, ''$."NZD"'') as nzd, + json_value(_airbyte_data, ''$."USD"'') as usd, + json_value(_airbyte_data, ''$."column`_''''with\"_quotes"'') as "column`_''with""_quotes", + json_value(_airbyte_data, ''$."datetime_tz"'') as datetime_tz, + json_value(_airbyte_data, ''$."datetime_no_tz"'') as datetime_no_tz, + json_value(_airbyte_data, ''$."time_tz"'') as time_tz, + json_value(_airbyte_data, ''$."time_no_tz"'') as time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at +from "test_normalization".test_normalization._airbyte_raw_exchange_rate as table_alias +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__exchange_rate_ab1 +select + cast(id as + bigint +) as id, + cast(currency as + NVARCHAR(max)) as currency, + try_parse(nullif("date", '''') as date) as "date", + try_parse(nullif(timestamp_col, '''') as datetimeoffset) as timestamp_col, + cast("HKD@spéçiäl & characters" as + float +) as "HKD@spéçiäl & characters", + cast(hkd_special___characters as + NVARCHAR(max)) as hkd_special___characters, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + cast("column`_''with""_quotes" as + NVARCHAR(max)) as "column`_''with""_quotes", + try_parse(nullif(datetime_tz, '''') as datetimeoffset) as datetime_tz, + try_parse(nullif(datetime_no_tz, '''') as datetime2) as datetime_no_tz, + cast(nullif(time_tz, '''') as NVARCHAR(max)) as time_tz, + cast(nullif(time_no_tz, '''') as + time +) as time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at +from __dbt__cte__exchange_rate_ab1 +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__exchange_rate_ab2 +select + convert(varchar(32), HashBytes(''md5'', coalesce(cast( + + + + concat(concat(coalesce(cast(id as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(currency as + NVARCHAR(max)), ''''), ''-'', coalesce(cast("date" as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(timestamp_col as + NVARCHAR(max)), ''''), ''-'', coalesce(cast("HKD@spéçiäl & characters" as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(hkd_special___characters as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(nzd as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(usd as + NVARCHAR(max)), ''''), ''-'', coalesce(cast("column`_''with""_quotes" as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(datetime_tz as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(datetime_no_tz as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(time_tz as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(time_no_tz as + NVARCHAR(max)), ''''),''''), '''') as + NVARCHAR(max)), '''')), 2) as _airbyte_exchange_rate_hashid, + tmp.* +from __dbt__cte__exchange_rate_ab2 tmp +-- exchange_rate +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__exchange_rate_ab3 +select + id, + currency, + "date", + timestamp_col, + "HKD@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, + "column`_''with""_quotes", + datetime_tz, + datetime_no_tz, + time_tz, + time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from __dbt__cte__exchange_rate_ab3 +-- exchange_rate from "test_normalization".test_normalization._airbyte_raw_exchange_rate +where 1 = 1 + '); + + SELECT * INTO "test_normalization".test_normalization."exchange_rate__dbt_tmp" FROM + "test_normalization".test_normalization."exchange_rate__dbt_tmp_temp_view" + + + + USE [test_normalization]; + if object_id ('test_normalization."exchange_rate__dbt_tmp_temp_view"','V') is not null + begin + drop view test_normalization."exchange_rate__dbt_tmp_temp_view" + end + + + use [test_normalization]; + if EXISTS ( + SELECT * FROM + sys.indexes WHERE name = 'test_normalization_exchange_rate__dbt_tmp_cci' + AND object_id=object_id('test_normalization_exchange_rate__dbt_tmp') + ) + DROP index test_normalization.exchange_rate__dbt_tmp.test_normalization_exchange_rate__dbt_tmp_cci + CREATE CLUSTERED COLUMNSTORE INDEX test_normalization_exchange_rate__dbt_tmp_cci + ON test_normalization.exchange_rate__dbt_tmp + + + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..ed018a2680b4c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,77 @@ +USE [test_normalization]; + execute('create view _airbyte_test_normalization."dedup_exchange_rate_stg__dbt_tmp" as + +with __dbt__cte__dedup_exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "test_normalization".test_normalization._airbyte_raw_dedup_exchange_rate +select + json_value(_airbyte_data, ''$."id"'') as id, + json_value(_airbyte_data, ''$."currency"'') as currency, + json_value(_airbyte_data, ''$."date"'') as "date", + json_value(_airbyte_data, ''$."timestamp_col"'') as timestamp_col, + json_value(_airbyte_data, ''$."HKD@spéçiäl & characters"'') as "HKD@spéçiäl & characters", + json_value(_airbyte_data, ''$."HKD_special___characters"'') as hkd_special___characters, + json_value(_airbyte_data, ''$."NZD"'') as nzd, + json_value(_airbyte_data, ''$."USD"'') as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at +from "test_normalization".test_normalization._airbyte_raw_dedup_exchange_rate as table_alias +-- dedup_exchange_rate +where 1 = 1 + +), __dbt__cte__dedup_exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__dedup_exchange_rate_ab1 +select + cast(id as + bigint +) as id, + cast(currency as + NVARCHAR(max)) as currency, + try_parse(nullif("date", '''') as date) as "date", + try_parse(nullif(timestamp_col, '''') as datetimeoffset) as timestamp_col, + cast("HKD@spéçiäl & characters" as + float +) as "HKD@spéçiäl & characters", + cast(hkd_special___characters as + NVARCHAR(max)) as hkd_special___characters, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + SYSDATETIME() as _airbyte_normalized_at +from __dbt__cte__dedup_exchange_rate_ab1 +-- dedup_exchange_rate +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__dedup_exchange_rate_ab2 +select + convert(varchar(32), HashBytes(''md5'', coalesce(cast( + + + + concat(concat(coalesce(cast(id as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(currency as + NVARCHAR(max)), ''''), ''-'', coalesce(cast("date" as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(timestamp_col as + NVARCHAR(max)), ''''), ''-'', coalesce(cast("HKD@spéçiäl & characters" as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(hkd_special___characters as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(nzd as + NVARCHAR(max)), ''''), ''-'', coalesce(cast(usd as + NVARCHAR(max)), ''''),''''), '''') as + NVARCHAR(max)), '''')), 2) as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from __dbt__cte__dedup_exchange_rate_ab2 tmp +-- dedup_exchange_rate +where 1 = 1 + + '); + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/dbt_project.yml new file mode 100755 index 0000000000000..f187620c7c7c9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/dbt_project.yml @@ -0,0 +1,121 @@ +name: airbyte_utils +version: '1.0' +config-version: 2 +profile: normalize +model-paths: +- models +docs-paths: +- docs +analysis-paths: +- analysis +test-paths: +- tests +seed-paths: +- data +macro-paths: +- macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: +- build +- dbt_modules +quoting: + database: true + schema: false + identifier: true +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: table + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view +vars: + dbt_utils_dispatch_list: + - airbyte_utils + json_column: _airbyte_data + models_to_source: + nested_stream_with_co_1g_into_long_names_ab1: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_1g_into_long_names_ab2: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_1g_into_long_names_stg: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_1g_into_long_names_scd: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co__lting_into_long_names: test_normalization._airbyte_raw_nested_s__lting_into_long_names + non_nested_stream_wit_1g_into_long_names_ab1: test_normalization._airbyte_raw_non_nest__lting_into_long_names + non_nested_stream_wit_1g_into_long_names_ab2: test_normalization._airbyte_raw_non_nest__lting_into_long_names + non_nested_stream_wit_1g_into_long_names_ab3: test_normalization._airbyte_raw_non_nest__lting_into_long_names + non_nested_stream_wit__lting_into_long_names: test_normalization._airbyte_raw_non_nest__lting_into_long_names + some_stream_that_was_empty_ab1: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_ab2: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_stg: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_scd: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty: test_normalization._airbyte_raw_some_stream_that_was_empty + simple_stream_with_na_1g_into_long_names_ab1: test_normalization_namespace._airbyte_raw_simple_s__lting_into_long_names + simple_stream_with_na_1g_into_long_names_ab2: test_normalization_namespace._airbyte_raw_simple_s__lting_into_long_names + simple_stream_with_na_1g_into_long_names_ab3: test_normalization_namespace._airbyte_raw_simple_s__lting_into_long_names + simple_stream_with_na__lting_into_long_names: test_normalization_namespace._airbyte_raw_simple_s__lting_into_long_names + conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_scalar_ab1: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar_ab2: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar_ab3: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_array_ab1: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array_ab2: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array_ab3: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array: test_normalization._airbyte_raw_conflict_stream_array + unnest_alias_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias: test_normalization._airbyte_raw_unnest_alias + arrays_ab1: test_normalization._airbyte_raw_arrays + arrays_ab2: test_normalization._airbyte_raw_arrays + arrays_ab3: test_normalization._airbyte_raw_arrays + arrays: test_normalization._airbyte_raw_arrays + nested_stream_with_co_2g_names_partition_ab1: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_2g_names_partition_ab2: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_2g_names_partition_ab3: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co___long_names_partition: test_normalization._airbyte_raw_nested_s__lting_into_long_names + conflict_stream_name__2flict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name__2flict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name__2flict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + unnest_alias_children_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children: test_normalization._airbyte_raw_unnest_alias + arrays_nested_array_parent_ab1: test_normalization._airbyte_raw_arrays + arrays_nested_array_parent_ab2: test_normalization._airbyte_raw_arrays + arrays_nested_array_parent_ab3: test_normalization._airbyte_raw_arrays + arrays_nested_array_parent: test_normalization._airbyte_raw_arrays + nested_stream_with_co_3double_array_data_ab1: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_3double_array_data_ab2: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_3double_array_data_ab3: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co__ion_double_array_data: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_3es_partition_data_ab1: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_3es_partition_data_ab2: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co_3es_partition_data_ab3: test_normalization._airbyte_raw_nested_s__lting_into_long_names + nested_stream_with_co___names_partition_data: test_normalization._airbyte_raw_nested_s__lting_into_long_names + conflict_stream_name__3flict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name__3flict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name__3flict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name____conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + unnest_alias_children_owner_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_4mn___with__quotes_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_4mn___with__quotes_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_4mn___with__quotes_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children__column___with__quotes: test_normalization._airbyte_raw_unnest_alias diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql new file mode 100644 index 0000000000000..e5f3e4859deba --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -0,0 +1,74 @@ + + + create table + test_normalization.`nested_stream_with_co_1g_into_long_names_scd__dbt_tmp` + as ( + +-- depends_on: ref('nested_stream_with_co_1g_into_long_names_stg') +with + +input_data as ( + select * + from _airbyte_test_normalization.`nested_stream_with_co_1g_into_long_names_stg` + -- nested_stream_with_co__lting_into_long_names from test_normalization._airbyte_raw_nested_s__lting_into_long_names +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + md5(cast(concat(coalesce(cast(id as char), '')) as char)) as _airbyte_unique_key, + id, + `date`, + `partition`, + `date` as _airbyte_start_at, + lag(`date`) over ( + partition by id + order by + `date` is null asc, + `date` desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by id + order by + `date` is null asc, + `date` desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_nested_strea__nto_long_names_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + md5(cast(concat(coalesce(cast(_airbyte_unique_key as char), ''), '-', coalesce(cast(_airbyte_start_at as char), ''), '-', coalesce(cast(_airbyte_emitted_at as char), '')) as char)) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + `date`, + `partition`, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at, + _airbyte_nested_strea__nto_long_names_hashid +from dedup_data where _airbyte_row_num = 1 + ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql new file mode 100644 index 0000000000000..9d4975c21dac1 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql @@ -0,0 +1,71 @@ + + + create table + test_normalization.`nested_stream_with_co___long_names_partition__dbt_tmp` + as ( + +with __dbt__cte__nested_stream_with_co_2g_names_partition_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization.`nested_stream_with_co_1g_into_long_names_scd` +select + _airbyte_nested_strea__nto_long_names_hashid, + json_extract(`partition`, + '$."double_array_data"') as double_array_data, + json_extract(`partition`, + '$."DATA"') as `DATA`, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at +from test_normalization.`nested_stream_with_co_1g_into_long_names_scd` as table_alias +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 +and `partition` is not null + +), __dbt__cte__nested_stream_with_co_2g_names_partition_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__nested_stream_with_co_2g_names_partition_ab1 +select + _airbyte_nested_strea__nto_long_names_hashid, + double_array_data, + `DATA`, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at +from __dbt__cte__nested_stream_with_co_2g_names_partition_ab1 +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 + +), __dbt__cte__nested_stream_with_co_2g_names_partition_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__nested_stream_with_co_2g_names_partition_ab2 +select + md5(cast(concat(coalesce(cast(_airbyte_nested_strea__nto_long_names_hashid as char), ''), '-', coalesce(cast(double_array_data as char), ''), '-', coalesce(cast(`DATA` as char), '')) as char)) as _airbyte_partition_hashid, + tmp.* +from __dbt__cte__nested_stream_with_co_2g_names_partition_ab2 tmp +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 + +)-- Final base SQL model +-- depends_on: __dbt__cte__nested_stream_with_co_2g_names_partition_ab3 +select + _airbyte_nested_strea__nto_long_names_hashid, + double_array_data, + `DATA`, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at, + _airbyte_partition_hashid +from __dbt__cte__nested_stream_with_co_2g_names_partition_ab3 +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from test_normalization.`nested_stream_with_co_1g_into_long_names_scd` +where 1 = 1 + + ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql new file mode 100644 index 0000000000000..e68283420cfdc --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql @@ -0,0 +1,113 @@ + + + create table + test_normalization.`nested_stream_with_co___names_partition_data__dbt_tmp` + as ( + +with __dbt__cte__nested_stream_with_co_3es_partition_data_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization.`nested_stream_with_co___long_names_partition` +with numbers as ( + + + + + with p as ( + select 0 as generated_number union all select 1 + ), unioned as ( + + select + + + p0.generated_number * power(2, 0) + + + + 1 + as generated_number + + from + + + p as p0 + + + + ) + + select * + from unioned + where generated_number <= 1 + order by generated_number + + + ), + joined as ( + select + _airbyte_partition_hashid as _airbyte_hashid, + + json_extract(`DATA`, concat("$[", numbers.generated_number - 1, "][0]")) as _airbyte_nested_data + from test_normalization.`nested_stream_with_co___long_names_partition` + cross join numbers + -- only generate the number of records in the cross join that corresponds + -- to the number of items in test_normalization.`nested_stream_with_co___long_names_partition`.`DATA` + where numbers.generated_number <= json_length(`DATA`) + ) +select + _airbyte_partition_hashid, + json_value(_airbyte_nested_data, + '$."currency"' RETURNING CHAR) as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at +from test_normalization.`nested_stream_with_co___long_names_partition` as table_alias +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +left join joined on _airbyte_partition_hashid = joined._airbyte_hashid +where 1 = 1 +and `DATA` is not null + +), __dbt__cte__nested_stream_with_co_3es_partition_data_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__nested_stream_with_co_3es_partition_data_ab1 +select + _airbyte_partition_hashid, + cast(currency as char(1024)) as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at +from __dbt__cte__nested_stream_with_co_3es_partition_data_ab1 +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +where 1 = 1 + +), __dbt__cte__nested_stream_with_co_3es_partition_data_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__nested_stream_with_co_3es_partition_data_ab2 +select + md5(cast(concat(coalesce(cast(_airbyte_partition_hashid as char), ''), '-', coalesce(cast(currency as char), '')) as char)) as _airbyte_data_hashid, + tmp.* +from __dbt__cte__nested_stream_with_co_3es_partition_data_ab2 tmp +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +where 1 = 1 + +)-- Final base SQL model +-- depends_on: __dbt__cte__nested_stream_with_co_3es_partition_data_ab3 +select + _airbyte_partition_hashid, + currency, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at, + _airbyte_data_hashid +from __dbt__cte__nested_stream_with_co_3es_partition_data_ab3 +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from test_normalization.`nested_stream_with_co___long_names_partition` +where 1 = 1 + + ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql new file mode 100644 index 0000000000000..4b276edcc316f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql @@ -0,0 +1,113 @@ + + + create table + test_normalization.`nested_stream_with_co__ion_double_array_data__dbt_tmp` + as ( + +with __dbt__cte__nested_stream_with_co_3double_array_data_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization.`nested_stream_with_co___long_names_partition` +with numbers as ( + + + + + with p as ( + select 0 as generated_number union all select 1 + ), unioned as ( + + select + + + p0.generated_number * power(2, 0) + + + + 1 + as generated_number + + from + + + p as p0 + + + + ) + + select * + from unioned + where generated_number <= 2 + order by generated_number + + + ), + joined as ( + select + _airbyte_partition_hashid as _airbyte_hashid, + + json_extract(double_array_data, concat("$[", numbers.generated_number - 1, "][0]")) as _airbyte_nested_data + from test_normalization.`nested_stream_with_co___long_names_partition` + cross join numbers + -- only generate the number of records in the cross join that corresponds + -- to the number of items in test_normalization.`nested_stream_with_co___long_names_partition`.double_array_data + where numbers.generated_number <= json_length(double_array_data) + ) +select + _airbyte_partition_hashid, + json_value(_airbyte_nested_data, + '$."id"' RETURNING CHAR) as id, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at +from test_normalization.`nested_stream_with_co___long_names_partition` as table_alias +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +left join joined on _airbyte_partition_hashid = joined._airbyte_hashid +where 1 = 1 +and double_array_data is not null + +), __dbt__cte__nested_stream_with_co_3double_array_data_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__nested_stream_with_co_3double_array_data_ab1 +select + _airbyte_partition_hashid, + cast(id as char(1024)) as id, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at +from __dbt__cte__nested_stream_with_co_3double_array_data_ab1 +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +where 1 = 1 + +), __dbt__cte__nested_stream_with_co_3double_array_data_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__nested_stream_with_co_3double_array_data_ab2 +select + md5(cast(concat(coalesce(cast(_airbyte_partition_hashid as char), ''), '-', coalesce(cast(id as char), '')) as char)) as _airbyte_double_array_data_hashid, + tmp.* +from __dbt__cte__nested_stream_with_co_3double_array_data_ab2 tmp +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +where 1 = 1 + +)-- Final base SQL model +-- depends_on: __dbt__cte__nested_stream_with_co_3double_array_data_ab3 +select + _airbyte_partition_hashid, + id, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at, + _airbyte_double_array_data_hashid +from __dbt__cte__nested_stream_with_co_3double_array_data_ab3 +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from test_normalization.`nested_stream_with_co___long_names_partition` +where 1 = 1 + + ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql new file mode 100644 index 0000000000000..8be6ef88d622a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql @@ -0,0 +1,25 @@ + + + create table + test_normalization.`nested_stream_with_co__lting_into_long_names__dbt_tmp` + as ( + +-- Final base SQL model +-- depends_on: test_normalization.`nested_stream_with_co_1g_into_long_names_scd` +select + _airbyte_unique_key, + id, + `date`, + `partition`, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at, + _airbyte_nested_strea__nto_long_names_hashid +from test_normalization.`nested_stream_with_co_1g_into_long_names_scd` +-- nested_stream_with_co__lting_into_long_names from test_normalization._airbyte_raw_nested_s__lting_into_long_names +where 1 = 1 +and _airbyte_active_row = 1 + + ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab1.sql new file mode 100644 index 0000000000000..d638e7a898ff3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab1.sql @@ -0,0 +1,19 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_nested_s__lting_into_long_names') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, + {{ json_extract('table_alias', '_airbyte_data', ['partition'], ['partition']) }} as {{ adapter.quote('partition') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_nested_s__lting_into_long_names') }} as table_alias +-- nested_stream_with_co__lting_into_long_names +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab2.sql new file mode 100644 index 0000000000000..a86a84248a87c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_1g_into_long_names_ab2.sql @@ -0,0 +1,19 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('nested_stream_with_co_1g_into_long_names_ab1') }} +select + cast(id as {{ dbt_utils.type_string() }}(1024)) as id, + cast({{ adapter.quote('date') }} as {{ dbt_utils.type_string() }}(1024)) as {{ adapter.quote('date') }}, + cast({{ adapter.quote('partition') }} as {{ type_json() }}) as {{ adapter.quote('partition') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_co_1g_into_long_names_ab1') }} +-- nested_stream_with_co__lting_into_long_names +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_2g_names_partition_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_2g_names_partition_ab1.sql new file mode 100644 index 0000000000000..427a929211b27 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_2g_names_partition_ab1.sql @@ -0,0 +1,19 @@ +{{ config( + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('nested_stream_with_co_1g_into_long_names_scd') }} +select + _airbyte_nested_strea__nto_long_names_hashid, + {{ json_extract_array(adapter.quote('partition'), ['double_array_data'], ['double_array_data']) }} as double_array_data, + {{ json_extract_array(adapter.quote('partition'), ['DATA'], ['DATA']) }} as {{ adapter.quote('DATA') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_co_1g_into_long_names_scd') }} as table_alias +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 +and {{ adapter.quote('partition') }} is not null +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3double_array_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3double_array_data_ab1.sql new file mode 100644 index 0000000000000..a8ca4bbb7d40f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3double_array_data_ab1.sql @@ -0,0 +1,20 @@ +{{ config( + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('nested_stream_with_co___long_names_partition') }} +{{ unnest_cte(ref('nested_stream_with_co___long_names_partition'), 'partition', 'double_array_data') }} +select + _airbyte_partition_hashid, + {{ json_extract_scalar(unnested_column_value('double_array_data'), ['id'], ['id']) }} as id, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_co___long_names_partition') }} as table_alias +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +{{ cross_join_unnest('partition', 'double_array_data') }} +where 1 = 1 +and double_array_data is not null +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3es_partition_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3es_partition_data_ab1.sql new file mode 100644 index 0000000000000..cdf1151ee10d7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_co_3es_partition_data_ab1.sql @@ -0,0 +1,20 @@ +{{ config( + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('nested_stream_with_co___long_names_partition') }} +{{ unnest_cte(ref('nested_stream_with_co___long_names_partition'), 'partition', adapter.quote('DATA')) }} +select + _airbyte_partition_hashid, + {{ json_extract_scalar(unnested_column_value(adapter.quote('DATA')), ['currency'], ['currency']) }} as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_co___long_names_partition') }} as table_alias +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +{{ cross_join_unnest('partition', adapter.quote('DATA')) }} +where 1 = 1 +and {{ adapter.quote('DATA') }} is not null +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql new file mode 100644 index 0000000000000..9ffb6bd5558cc --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -0,0 +1,162 @@ +{{ config( + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='nested_stream_with_co__lting_into_long_names' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_co__lting_into_long_names')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view _airbyte_test_normalization.nested_stream_with_co_1g_into_long_names_stg"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('nested_stream_with_co_1g_into_long_names_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('nested_stream_with_co_1g_into_long_names_stg') }} + -- nested_stream_with_co__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_s__lting_into_long_names') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('nested_stream_with_co_1g_into_long_names_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('nested_stream_with_co_1g_into_long_names_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('nested_stream_with_co_1g_into_long_names_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('nested_stream_with_co_1g_into_long_names_stg') }} + -- nested_stream_with_co__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_s__lting_into_long_names') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key, + id, + {{ adapter.quote('date') }}, + {{ adapter.quote('partition') }}, + {{ adapter.quote('date') }} as _airbyte_start_at, + lag({{ adapter.quote('date') }}) over ( + partition by id + order by + {{ adapter.quote('date') }} is null asc, + {{ adapter.quote('date') }} desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by id + order by + {{ adapter.quote('date') }} is null asc, + {{ adapter.quote('date') }} desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_nested_strea__nto_long_names_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + {{ adapter.quote('date') }}, + {{ adapter.quote('partition') }}, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_nested_strea__nto_long_names_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql new file mode 100644 index 0000000000000..0c8adc779de9f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql @@ -0,0 +1,19 @@ +{{ config( + schema = "test_normalization", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('nested_stream_with_co_2g_names_partition_ab3') }} +select + _airbyte_nested_strea__nto_long_names_hashid, + double_array_data, + {{ adapter.quote('DATA') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_partition_hashid +from {{ ref('nested_stream_with_co_2g_names_partition_ab3') }} +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from {{ ref('nested_stream_with_co_1g_into_long_names_scd') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql new file mode 100644 index 0000000000000..92e44abc92988 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql @@ -0,0 +1,18 @@ +{{ config( + schema = "test_normalization", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('nested_stream_with_co_3es_partition_data_ab3') }} +select + _airbyte_partition_hashid, + currency, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_data_hashid +from {{ ref('nested_stream_with_co_3es_partition_data_ab3') }} +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from {{ ref('nested_stream_with_co___long_names_partition') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql new file mode 100644 index 0000000000000..6a17d6258b3e6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql @@ -0,0 +1,18 @@ +{{ config( + schema = "test_normalization", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('nested_stream_with_co_3double_array_data_ab3') }} +select + _airbyte_partition_hashid, + id, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_double_array_data_hashid +from {{ ref('nested_stream_with_co_3double_array_data_ab3') }} +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from {{ ref('nested_stream_with_co___long_names_partition') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql new file mode 100644 index 0000000000000..0ea84390902e9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql @@ -0,0 +1,22 @@ +{{ config( + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('nested_stream_with_co_1g_into_long_names_scd') }} +select + _airbyte_unique_key, + id, + {{ adapter.quote('date') }}, + {{ adapter.quote('partition') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_nested_strea__nto_long_names_hashid +from {{ ref('nested_stream_with_co_1g_into_long_names_scd') }} +-- nested_stream_with_co__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_s__lting_into_long_names') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/sources.yml new file mode 100644 index 0000000000000..50def309c8c44 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/sources.yml @@ -0,0 +1,23 @@ +version: 2 +sources: +- name: test_normalization + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_arrays + - name: _airbyte_raw_conflict_stream_array + - name: _airbyte_raw_conflict_stream_name + - name: _airbyte_raw_conflict_stream_scalar + - name: _airbyte_raw_nested_s__lting_into_long_names + - name: _airbyte_raw_non_nest__lting_into_long_names + - name: _airbyte_raw_some_stream_that_was_empty + - name: _airbyte_raw_unnest_alias +- name: test_normalization_namespace + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_simple_s__lting_into_long_names diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql new file mode 100644 index 0000000000000..e5f3e4859deba --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -0,0 +1,74 @@ + + + create table + test_normalization.`nested_stream_with_co_1g_into_long_names_scd__dbt_tmp` + as ( + +-- depends_on: ref('nested_stream_with_co_1g_into_long_names_stg') +with + +input_data as ( + select * + from _airbyte_test_normalization.`nested_stream_with_co_1g_into_long_names_stg` + -- nested_stream_with_co__lting_into_long_names from test_normalization._airbyte_raw_nested_s__lting_into_long_names +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + md5(cast(concat(coalesce(cast(id as char), '')) as char)) as _airbyte_unique_key, + id, + `date`, + `partition`, + `date` as _airbyte_start_at, + lag(`date`) over ( + partition by id + order by + `date` is null asc, + `date` desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by id + order by + `date` is null asc, + `date` desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_nested_strea__nto_long_names_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + md5(cast(concat(coalesce(cast(_airbyte_unique_key as char), ''), '-', coalesce(cast(_airbyte_start_at as char), ''), '-', coalesce(cast(_airbyte_emitted_at as char), '')) as char)) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + `date`, + `partition`, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at, + _airbyte_nested_strea__nto_long_names_hashid +from dedup_data where _airbyte_row_num = 1 + ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql new file mode 100644 index 0000000000000..9d4975c21dac1 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___long_names_partition.sql @@ -0,0 +1,71 @@ + + + create table + test_normalization.`nested_stream_with_co___long_names_partition__dbt_tmp` + as ( + +with __dbt__cte__nested_stream_with_co_2g_names_partition_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization.`nested_stream_with_co_1g_into_long_names_scd` +select + _airbyte_nested_strea__nto_long_names_hashid, + json_extract(`partition`, + '$."double_array_data"') as double_array_data, + json_extract(`partition`, + '$."DATA"') as `DATA`, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at +from test_normalization.`nested_stream_with_co_1g_into_long_names_scd` as table_alias +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 +and `partition` is not null + +), __dbt__cte__nested_stream_with_co_2g_names_partition_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__nested_stream_with_co_2g_names_partition_ab1 +select + _airbyte_nested_strea__nto_long_names_hashid, + double_array_data, + `DATA`, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at +from __dbt__cte__nested_stream_with_co_2g_names_partition_ab1 +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 + +), __dbt__cte__nested_stream_with_co_2g_names_partition_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__nested_stream_with_co_2g_names_partition_ab2 +select + md5(cast(concat(coalesce(cast(_airbyte_nested_strea__nto_long_names_hashid as char), ''), '-', coalesce(cast(double_array_data as char), ''), '-', coalesce(cast(`DATA` as char), '')) as char)) as _airbyte_partition_hashid, + tmp.* +from __dbt__cte__nested_stream_with_co_2g_names_partition_ab2 tmp +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 + +)-- Final base SQL model +-- depends_on: __dbt__cte__nested_stream_with_co_2g_names_partition_ab3 +select + _airbyte_nested_strea__nto_long_names_hashid, + double_array_data, + `DATA`, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at, + _airbyte_partition_hashid +from __dbt__cte__nested_stream_with_co_2g_names_partition_ab3 +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from test_normalization.`nested_stream_with_co_1g_into_long_names_scd` +where 1 = 1 + + ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql new file mode 100644 index 0000000000000..e68283420cfdc --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co___names_partition_data.sql @@ -0,0 +1,113 @@ + + + create table + test_normalization.`nested_stream_with_co___names_partition_data__dbt_tmp` + as ( + +with __dbt__cte__nested_stream_with_co_3es_partition_data_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization.`nested_stream_with_co___long_names_partition` +with numbers as ( + + + + + with p as ( + select 0 as generated_number union all select 1 + ), unioned as ( + + select + + + p0.generated_number * power(2, 0) + + + + 1 + as generated_number + + from + + + p as p0 + + + + ) + + select * + from unioned + where generated_number <= 1 + order by generated_number + + + ), + joined as ( + select + _airbyte_partition_hashid as _airbyte_hashid, + + json_extract(`DATA`, concat("$[", numbers.generated_number - 1, "][0]")) as _airbyte_nested_data + from test_normalization.`nested_stream_with_co___long_names_partition` + cross join numbers + -- only generate the number of records in the cross join that corresponds + -- to the number of items in test_normalization.`nested_stream_with_co___long_names_partition`.`DATA` + where numbers.generated_number <= json_length(`DATA`) + ) +select + _airbyte_partition_hashid, + json_value(_airbyte_nested_data, + '$."currency"' RETURNING CHAR) as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at +from test_normalization.`nested_stream_with_co___long_names_partition` as table_alias +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +left join joined on _airbyte_partition_hashid = joined._airbyte_hashid +where 1 = 1 +and `DATA` is not null + +), __dbt__cte__nested_stream_with_co_3es_partition_data_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__nested_stream_with_co_3es_partition_data_ab1 +select + _airbyte_partition_hashid, + cast(currency as char(1024)) as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at +from __dbt__cte__nested_stream_with_co_3es_partition_data_ab1 +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +where 1 = 1 + +), __dbt__cte__nested_stream_with_co_3es_partition_data_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__nested_stream_with_co_3es_partition_data_ab2 +select + md5(cast(concat(coalesce(cast(_airbyte_partition_hashid as char), ''), '-', coalesce(cast(currency as char), '')) as char)) as _airbyte_data_hashid, + tmp.* +from __dbt__cte__nested_stream_with_co_3es_partition_data_ab2 tmp +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +where 1 = 1 + +)-- Final base SQL model +-- depends_on: __dbt__cte__nested_stream_with_co_3es_partition_data_ab3 +select + _airbyte_partition_hashid, + currency, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at, + _airbyte_data_hashid +from __dbt__cte__nested_stream_with_co_3es_partition_data_ab3 +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from test_normalization.`nested_stream_with_co___long_names_partition` +where 1 = 1 + + ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql new file mode 100644 index 0000000000000..4b276edcc316f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__ion_double_array_data.sql @@ -0,0 +1,113 @@ + + + create table + test_normalization.`nested_stream_with_co__ion_double_array_data__dbt_tmp` + as ( + +with __dbt__cte__nested_stream_with_co_3double_array_data_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization.`nested_stream_with_co___long_names_partition` +with numbers as ( + + + + + with p as ( + select 0 as generated_number union all select 1 + ), unioned as ( + + select + + + p0.generated_number * power(2, 0) + + + + 1 + as generated_number + + from + + + p as p0 + + + + ) + + select * + from unioned + where generated_number <= 2 + order by generated_number + + + ), + joined as ( + select + _airbyte_partition_hashid as _airbyte_hashid, + + json_extract(double_array_data, concat("$[", numbers.generated_number - 1, "][0]")) as _airbyte_nested_data + from test_normalization.`nested_stream_with_co___long_names_partition` + cross join numbers + -- only generate the number of records in the cross join that corresponds + -- to the number of items in test_normalization.`nested_stream_with_co___long_names_partition`.double_array_data + where numbers.generated_number <= json_length(double_array_data) + ) +select + _airbyte_partition_hashid, + json_value(_airbyte_nested_data, + '$."id"' RETURNING CHAR) as id, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at +from test_normalization.`nested_stream_with_co___long_names_partition` as table_alias +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +left join joined on _airbyte_partition_hashid = joined._airbyte_hashid +where 1 = 1 +and double_array_data is not null + +), __dbt__cte__nested_stream_with_co_3double_array_data_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__nested_stream_with_co_3double_array_data_ab1 +select + _airbyte_partition_hashid, + cast(id as char(1024)) as id, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at +from __dbt__cte__nested_stream_with_co_3double_array_data_ab1 +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +where 1 = 1 + +), __dbt__cte__nested_stream_with_co_3double_array_data_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__nested_stream_with_co_3double_array_data_ab2 +select + md5(cast(concat(coalesce(cast(_airbyte_partition_hashid as char), ''), '-', coalesce(cast(id as char), '')) as char)) as _airbyte_double_array_data_hashid, + tmp.* +from __dbt__cte__nested_stream_with_co_3double_array_data_ab2 tmp +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +where 1 = 1 + +)-- Final base SQL model +-- depends_on: __dbt__cte__nested_stream_with_co_3double_array_data_ab3 +select + _airbyte_partition_hashid, + id, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at, + _airbyte_double_array_data_hashid +from __dbt__cte__nested_stream_with_co_3double_array_data_ab3 +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from test_normalization.`nested_stream_with_co___long_names_partition` +where 1 = 1 + + ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql new file mode 100644 index 0000000000000..8be6ef88d622a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_co__lting_into_long_names.sql @@ -0,0 +1,25 @@ + + + create table + test_normalization.`nested_stream_with_co__lting_into_long_names__dbt_tmp` + as ( + +-- Final base SQL model +-- depends_on: test_normalization.`nested_stream_with_co_1g_into_long_names_scd` +select + _airbyte_unique_key, + id, + `date`, + `partition`, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at, + _airbyte_nested_strea__nto_long_names_hashid +from test_normalization.`nested_stream_with_co_1g_into_long_names_scd` +-- nested_stream_with_co__lting_into_long_names from test_normalization._airbyte_raw_nested_s__lting_into_long_names +where 1 = 1 +and _airbyte_active_row = 1 + + ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/dbt_project.yml new file mode 100755 index 0000000000000..bc7fa6d501663 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/dbt_project.yml @@ -0,0 +1,86 @@ +name: airbyte_utils +version: '1.0' +config-version: 2 +profile: normalize +model-paths: +- models +docs-paths: +- docs +analysis-paths: +- analysis +test-paths: +- tests +seed-paths: +- data +macro-paths: +- macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: +- build +- dbt_modules +quoting: + database: true + schema: false + identifier: true +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: table + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view +vars: + dbt_utils_dispatch_list: + - airbyte_utils + json_column: _airbyte_data + models_to_source: + exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate + exchange_rate: test_normalization._airbyte_raw_exchange_rate + dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate + renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded + pos_dedup_cdcx_ab1: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_ab2: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_stg: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_scd: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx: test_normalization._airbyte_raw_pos_dedup_cdcx + 1_prefix_startwith_number_ab1: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_ab2: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_stg: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_scd: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number: test_normalization._airbyte_raw_1_prefix_startwith_number + multiple_column_names_conflicts_ab1: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_ab2: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts + types_testing_ab1: test_normalization._airbyte_raw_types_testing + types_testing_ab2: test_normalization._airbyte_raw_types_testing + types_testing_stg: test_normalization._airbyte_raw_types_testing + types_testing_scd: test_normalization._airbyte_raw_types_testing + types_testing: test_normalization._airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..59d722cb4f381 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,84 @@ + + + create table + test_normalization.`dedup_exchange_rate_scd__dbt_tmp` + as ( + +-- depends_on: ref('dedup_exchange_rate_stg') +with + +input_data as ( + select * + from _airbyte_test_normalization.`dedup_exchange_rate_stg` + -- dedup_exchange_rate from test_normalization._airbyte_raw_dedup_exchange_rate +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + md5(cast(concat(coalesce(cast(id as char), ''), '-', coalesce(cast(currency as char), ''), '-', coalesce(cast(nzd as char), '')) as char)) as _airbyte_unique_key, + id, + currency, + `date`, + timestamp_col, + `HKD@spéçiäl & characters`, + hkd_special___characters, + nzd, + usd, + `date` as _airbyte_start_at, + lag(`date`) over ( + partition by id, currency, cast(nzd as char) + order by + `date` is null asc, + `date` desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by id, currency, cast(nzd as char) + order by + `date` is null asc, + `date` desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_exchange_rate_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + md5(cast(concat(coalesce(cast(_airbyte_unique_key as char), ''), '-', coalesce(cast(_airbyte_start_at as char), ''), '-', coalesce(cast(_airbyte_emitted_at as char), '')) as char)) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + currency, + `date`, + timestamp_col, + `HKD@spéçiäl & characters`, + hkd_special___characters, + nzd, + usd, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from dedup_data where _airbyte_row_num = 1 + ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..d6ab488f2f636 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,30 @@ + + + create table + test_normalization.`dedup_exchange_rate__dbt_tmp` + as ( + +-- Final base SQL model +-- depends_on: test_normalization.`dedup_exchange_rate_scd` +select + _airbyte_unique_key, + id, + currency, + `date`, + timestamp_col, + `HKD@spéçiäl & characters`, + hkd_special___characters, + nzd, + usd, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from test_normalization.`dedup_exchange_rate_scd` +-- dedup_exchange_rate from test_normalization._airbyte_raw_dedup_exchange_rate +where 1 = 1 +and _airbyte_active_row = 1 + + ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..540fc0e7911f6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,123 @@ + + + create table + test_normalization.`exchange_rate__dbt_tmp` + as ( + +with __dbt__cte__exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization._airbyte_raw_exchange_rate +select + json_value(_airbyte_data, + '$."id"' RETURNING CHAR) as id, + json_value(_airbyte_data, + '$."currency"' RETURNING CHAR) as currency, + json_value(_airbyte_data, + '$."date"' RETURNING CHAR) as `date`, + json_value(_airbyte_data, + '$."timestamp_col"' RETURNING CHAR) as timestamp_col, + json_value(_airbyte_data, + '$."HKD@spéçiäl & characters"' RETURNING CHAR) as `HKD@spéçiäl & characters`, + json_value(_airbyte_data, + '$."HKD_special___characters"' RETURNING CHAR) as hkd_special___characters, + json_value(_airbyte_data, + '$."NZD"' RETURNING CHAR) as nzd, + json_value(_airbyte_data, + '$."USD"' RETURNING CHAR) as usd, + json_value(_airbyte_data, + '$."column___with__quotes"' RETURNING CHAR) as `column__'with"_quotes`, + json_value(_airbyte_data, + '$."datetime_tz"' RETURNING CHAR) as datetime_tz, + json_value(_airbyte_data, + '$."datetime_no_tz"' RETURNING CHAR) as datetime_no_tz, + json_value(_airbyte_data, + '$."time_tz"' RETURNING CHAR) as time_tz, + json_value(_airbyte_data, + '$."time_no_tz"' RETURNING CHAR) as time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at +from test_normalization._airbyte_raw_exchange_rate as table_alias +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__exchange_rate_ab1 +select + cast(id as + signed +) as id, + cast(currency as char(1024)) as currency, + case when `date` = '' then NULL + else cast(`date` as date) + end as `date` + , + cast(nullif(timestamp_col, '') as char(1024)) as timestamp_col, + cast(`HKD@spéçiäl & characters` as + float +) as `HKD@spéçiäl & characters`, + cast(hkd_special___characters as char(1024)) as hkd_special___characters, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + cast(`column__'with"_quotes` as char(1024)) as `column__'with"_quotes`, + cast(nullif(datetime_tz, '') as char(1024)) as datetime_tz, + case when datetime_no_tz regexp '\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.*' THEN STR_TO_DATE(SUBSTR(datetime_no_tz, 1, 19), '%Y-%m-%dT%H:%i:%S') + else cast(if(datetime_no_tz = '', NULL, datetime_no_tz) as datetime) + end as datetime_no_tz + , + nullif(cast(time_tz as char(1024)), "") as time_tz, + nullif(cast(time_no_tz as + time +), "") as time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at +from __dbt__cte__exchange_rate_ab1 +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__exchange_rate_ab2 +select + md5(cast(concat(coalesce(cast(id as char), ''), '-', coalesce(cast(currency as char), ''), '-', coalesce(cast(`date` as char), ''), '-', coalesce(cast(timestamp_col as char), ''), '-', coalesce(cast(`HKD@spéçiäl & characters` as char), ''), '-', coalesce(cast(hkd_special___characters as char), ''), '-', coalesce(cast(nzd as char), ''), '-', coalesce(cast(usd as char), ''), '-', coalesce(cast(`column__'with"_quotes` as char), ''), '-', coalesce(cast(datetime_tz as char), ''), '-', coalesce(cast(datetime_no_tz as char), ''), '-', coalesce(cast(time_tz as char), ''), '-', coalesce(cast(time_no_tz as char), '')) as char)) as _airbyte_exchange_rate_hashid, + tmp.* +from __dbt__cte__exchange_rate_ab2 tmp +-- exchange_rate +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__exchange_rate_ab3 +select + id, + currency, + `date`, + timestamp_col, + `HKD@spéçiäl & characters`, + hkd_special___characters, + nzd, + usd, + `column__'with"_quotes`, + datetime_tz, + datetime_no_tz, + time_tz, + time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from __dbt__cte__exchange_rate_ab3 +-- exchange_rate from test_normalization._airbyte_raw_exchange_rate +where 1 = 1 + ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..367544ad79b7b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,76 @@ + + create view _airbyte_test_normalization.`dedup_exchange_rate_stg__dbt_tmp` as ( + +with __dbt__cte__dedup_exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization._airbyte_raw_dedup_exchange_rate +select + json_value(_airbyte_data, + '$."id"' RETURNING CHAR) as id, + json_value(_airbyte_data, + '$."currency"' RETURNING CHAR) as currency, + json_value(_airbyte_data, + '$."date"' RETURNING CHAR) as `date`, + json_value(_airbyte_data, + '$."timestamp_col"' RETURNING CHAR) as timestamp_col, + json_value(_airbyte_data, + '$."HKD@spéçiäl & characters"' RETURNING CHAR) as `HKD@spéçiäl & characters`, + json_value(_airbyte_data, + '$."HKD_special___characters"' RETURNING CHAR) as hkd_special___characters, + json_value(_airbyte_data, + '$."NZD"' RETURNING CHAR) as nzd, + json_value(_airbyte_data, + '$."USD"' RETURNING CHAR) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at +from test_normalization._airbyte_raw_dedup_exchange_rate as table_alias +-- dedup_exchange_rate +where 1 = 1 + +), __dbt__cte__dedup_exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__dedup_exchange_rate_ab1 +select + cast(id as + signed +) as id, + cast(currency as char(1024)) as currency, + case when `date` = '' then NULL + else cast(`date` as date) + end as `date` + , + cast(nullif(timestamp_col, '') as char(1024)) as timestamp_col, + cast(`HKD@spéçiäl & characters` as + float +) as `HKD@spéçiäl & characters`, + cast(hkd_special___characters as char(1024)) as hkd_special___characters, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at +from __dbt__cte__dedup_exchange_rate_ab1 +-- dedup_exchange_rate +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__dedup_exchange_rate_ab2 +select + md5(cast(concat(coalesce(cast(id as char), ''), '-', coalesce(cast(currency as char), ''), '-', coalesce(cast(`date` as char), ''), '-', coalesce(cast(timestamp_col as char), ''), '-', coalesce(cast(`HKD@spéçiäl & characters` as char), ''), '-', coalesce(cast(hkd_special___characters as char), ''), '-', coalesce(cast(nzd as char), ''), '-', coalesce(cast(usd as char), '')) as char)) as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from __dbt__cte__dedup_exchange_rate_ab2 tmp +-- dedup_exchange_rate +where 1 = 1 + + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql new file mode 100644 index 0000000000000..1bd990b39925d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql @@ -0,0 +1,72 @@ + + create view _airbyte_test_normalization.`multiple_column_names_conflicts_stg__dbt_tmp` as ( + +with __dbt__cte__multiple_column_names_conflicts_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization._airbyte_raw_multiple_column_names_conflicts +select + json_value(_airbyte_data, + '$."id"' RETURNING CHAR) as id, + json_value(_airbyte_data, + '$."User Id"' RETURNING CHAR) as `User Id`, + json_value(_airbyte_data, + '$."user_id"' RETURNING CHAR) as user_id, + json_value(_airbyte_data, + '$."User id"' RETURNING CHAR) as `User id_1`, + json_value(_airbyte_data, + '$."user id"' RETURNING CHAR) as `user id_2`, + json_value(_airbyte_data, + '$."User@Id"' RETURNING CHAR) as `User@Id`, + json_value(_airbyte_data, + '$."UserId"' RETURNING CHAR) as userid, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at +from test_normalization._airbyte_raw_multiple_column_names_conflicts as table_alias +-- multiple_column_names_conflicts +where 1 = 1 + +), __dbt__cte__multiple_column_names_conflicts_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab1 +select + cast(id as + signed +) as id, + cast(`User Id` as char(1024)) as `User Id`, + cast(user_id as + float +) as user_id, + cast(`User id_1` as + float +) as `User id_1`, + cast(`user id_2` as + float +) as `user id_2`, + cast(`User@Id` as char(1024)) as `User@Id`, + cast(userid as + float +) as userid, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at +from __dbt__cte__multiple_column_names_conflicts_ab1 +-- multiple_column_names_conflicts +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab2 +select + md5(cast(concat(coalesce(cast(id as char), ''), '-', coalesce(cast(`User Id` as char), ''), '-', coalesce(cast(user_id as char), ''), '-', coalesce(cast(`User id_1` as char), ''), '-', coalesce(cast(`user id_2` as char), ''), '-', coalesce(cast(`User@Id` as char), ''), '-', coalesce(cast(userid as char), '')) as char)) as _airbyte_multiple_col__ames_conflicts_hashid, + tmp.* +from __dbt__cte__multiple_column_names_conflicts_ab2 tmp +-- multiple_column_names_conflicts +where 1 = 1 + + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql new file mode 100644 index 0000000000000..670db0869ae22 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -0,0 +1,24 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, + {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, + {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ adapter.quote('HKD@spéçiäl & characters') }}, + {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as hkd_special___characters, + {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as nzd, + {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql new file mode 100644 index 0000000000000..11466fe3ffd5e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -0,0 +1,27 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('dedup_exchange_rate_ab1') }} +select + cast(id as {{ dbt_utils.type_bigint() }}) as id, + cast(currency as {{ dbt_utils.type_string() }}(1024)) as currency, + case when {{ adapter.quote('date') }} = '' then NULL + else cast({{ adapter.quote('date') }} as date) + end as {{ adapter.quote('date') }} + , + cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, + cast({{ adapter.quote('HKD@spéçiäl & characters') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('HKD@spéçiäl & characters') }}, + cast(hkd_special___characters as {{ dbt_utils.type_string() }}(1024)) as hkd_special___characters, + cast(nzd as {{ dbt_utils.type_float() }}) as nzd, + cast(usd as {{ dbt_utils.type_float() }}) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('dedup_exchange_rate_ab1') }} +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..b1c2af62e4bf1 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,176 @@ +{{ config( + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view _airbyte_test_normalization.dedup_exchange_rate_stg"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('dedup_exchange_rate_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key, + id, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, + {{ adapter.quote('date') }} as _airbyte_start_at, + lag({{ adapter.quote('date') }}) over ( + partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) + order by + {{ adapter.quote('date') }} is null asc, + {{ adapter.quote('date') }} desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) + order by + {{ adapter.quote('date') }} is null asc, + {{ adapter.quote('date') }} desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_exchange_rate_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..dd4432bd60a5e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,27 @@ +{{ config( + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('dedup_exchange_rate_scd') }} +select + _airbyte_unique_key, + id, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from {{ ref('dedup_exchange_rate_scd') }} +-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..3fe3205727b89 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,29 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('exchange_rate_ab3') }} +select + id, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, + {{ adapter.quote('column__\'with"_quotes') }}, + datetime_tz, + datetime_no_tz, + time_tz, + time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from {{ ref('exchange_rate_ab3') }} +-- exchange_rate from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..86ec2c9e8b1b7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,24 @@ +{{ config( + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('dedup_exchange_rate_ab2') }} +select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + adapter.quote('date'), + 'timestamp_col', + adapter.quote('HKD@spéçiäl & characters'), + 'hkd_special___characters', + 'nzd', + 'usd', + ]) }} as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from {{ ref('dedup_exchange_rate_ab2') }} tmp +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/sources.yml new file mode 100644 index 0000000000000..f51802427655e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/sources.yml @@ -0,0 +1,16 @@ +version: 2 +sources: +- name: test_normalization + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_1_prefix_startwith_number + - name: _airbyte_raw_dedup_cdc_excluded + - name: _airbyte_raw_dedup_exchange_rate + - name: _airbyte_raw_exchange_rate + - name: _airbyte_raw_multiple_column_names_conflicts + - name: _airbyte_raw_pos_dedup_cdcx + - name: _airbyte_raw_renamed_dedup_cdc_excluded + - name: _airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..59d722cb4f381 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,84 @@ + + + create table + test_normalization.`dedup_exchange_rate_scd__dbt_tmp` + as ( + +-- depends_on: ref('dedup_exchange_rate_stg') +with + +input_data as ( + select * + from _airbyte_test_normalization.`dedup_exchange_rate_stg` + -- dedup_exchange_rate from test_normalization._airbyte_raw_dedup_exchange_rate +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + md5(cast(concat(coalesce(cast(id as char), ''), '-', coalesce(cast(currency as char), ''), '-', coalesce(cast(nzd as char), '')) as char)) as _airbyte_unique_key, + id, + currency, + `date`, + timestamp_col, + `HKD@spéçiäl & characters`, + hkd_special___characters, + nzd, + usd, + `date` as _airbyte_start_at, + lag(`date`) over ( + partition by id, currency, cast(nzd as char) + order by + `date` is null asc, + `date` desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by id, currency, cast(nzd as char) + order by + `date` is null asc, + `date` desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_exchange_rate_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + md5(cast(concat(coalesce(cast(_airbyte_unique_key as char), ''), '-', coalesce(cast(_airbyte_start_at as char), ''), '-', coalesce(cast(_airbyte_emitted_at as char), '')) as char)) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + currency, + `date`, + timestamp_col, + `HKD@spéçiäl & characters`, + hkd_special___characters, + nzd, + usd, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from dedup_data where _airbyte_row_num = 1 + ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..d6ab488f2f636 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,30 @@ + + + create table + test_normalization.`dedup_exchange_rate__dbt_tmp` + as ( + +-- Final base SQL model +-- depends_on: test_normalization.`dedup_exchange_rate_scd` +select + _airbyte_unique_key, + id, + currency, + `date`, + timestamp_col, + `HKD@spéçiäl & characters`, + hkd_special___characters, + nzd, + usd, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from test_normalization.`dedup_exchange_rate_scd` +-- dedup_exchange_rate from test_normalization._airbyte_raw_dedup_exchange_rate +where 1 = 1 +and _airbyte_active_row = 1 + + ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..540fc0e7911f6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,123 @@ + + + create table + test_normalization.`exchange_rate__dbt_tmp` + as ( + +with __dbt__cte__exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization._airbyte_raw_exchange_rate +select + json_value(_airbyte_data, + '$."id"' RETURNING CHAR) as id, + json_value(_airbyte_data, + '$."currency"' RETURNING CHAR) as currency, + json_value(_airbyte_data, + '$."date"' RETURNING CHAR) as `date`, + json_value(_airbyte_data, + '$."timestamp_col"' RETURNING CHAR) as timestamp_col, + json_value(_airbyte_data, + '$."HKD@spéçiäl & characters"' RETURNING CHAR) as `HKD@spéçiäl & characters`, + json_value(_airbyte_data, + '$."HKD_special___characters"' RETURNING CHAR) as hkd_special___characters, + json_value(_airbyte_data, + '$."NZD"' RETURNING CHAR) as nzd, + json_value(_airbyte_data, + '$."USD"' RETURNING CHAR) as usd, + json_value(_airbyte_data, + '$."column___with__quotes"' RETURNING CHAR) as `column__'with"_quotes`, + json_value(_airbyte_data, + '$."datetime_tz"' RETURNING CHAR) as datetime_tz, + json_value(_airbyte_data, + '$."datetime_no_tz"' RETURNING CHAR) as datetime_no_tz, + json_value(_airbyte_data, + '$."time_tz"' RETURNING CHAR) as time_tz, + json_value(_airbyte_data, + '$."time_no_tz"' RETURNING CHAR) as time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at +from test_normalization._airbyte_raw_exchange_rate as table_alias +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__exchange_rate_ab1 +select + cast(id as + signed +) as id, + cast(currency as char(1024)) as currency, + case when `date` = '' then NULL + else cast(`date` as date) + end as `date` + , + cast(nullif(timestamp_col, '') as char(1024)) as timestamp_col, + cast(`HKD@spéçiäl & characters` as + float +) as `HKD@spéçiäl & characters`, + cast(hkd_special___characters as char(1024)) as hkd_special___characters, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + cast(`column__'with"_quotes` as char(1024)) as `column__'with"_quotes`, + cast(nullif(datetime_tz, '') as char(1024)) as datetime_tz, + case when datetime_no_tz regexp '\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.*' THEN STR_TO_DATE(SUBSTR(datetime_no_tz, 1, 19), '%Y-%m-%dT%H:%i:%S') + else cast(if(datetime_no_tz = '', NULL, datetime_no_tz) as datetime) + end as datetime_no_tz + , + nullif(cast(time_tz as char(1024)), "") as time_tz, + nullif(cast(time_no_tz as + time +), "") as time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at +from __dbt__cte__exchange_rate_ab1 +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__exchange_rate_ab2 +select + md5(cast(concat(coalesce(cast(id as char), ''), '-', coalesce(cast(currency as char), ''), '-', coalesce(cast(`date` as char), ''), '-', coalesce(cast(timestamp_col as char), ''), '-', coalesce(cast(`HKD@spéçiäl & characters` as char), ''), '-', coalesce(cast(hkd_special___characters as char), ''), '-', coalesce(cast(nzd as char), ''), '-', coalesce(cast(usd as char), ''), '-', coalesce(cast(`column__'with"_quotes` as char), ''), '-', coalesce(cast(datetime_tz as char), ''), '-', coalesce(cast(datetime_no_tz as char), ''), '-', coalesce(cast(time_tz as char), ''), '-', coalesce(cast(time_no_tz as char), '')) as char)) as _airbyte_exchange_rate_hashid, + tmp.* +from __dbt__cte__exchange_rate_ab2 tmp +-- exchange_rate +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__exchange_rate_ab3 +select + id, + currency, + `date`, + timestamp_col, + `HKD@spéçiäl & characters`, + hkd_special___characters, + nzd, + usd, + `column__'with"_quotes`, + datetime_tz, + datetime_no_tz, + time_tz, + time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from __dbt__cte__exchange_rate_ab3 +-- exchange_rate from test_normalization._airbyte_raw_exchange_rate +where 1 = 1 + ) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..367544ad79b7b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,76 @@ + + create view _airbyte_test_normalization.`dedup_exchange_rate_stg__dbt_tmp` as ( + +with __dbt__cte__dedup_exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization._airbyte_raw_dedup_exchange_rate +select + json_value(_airbyte_data, + '$."id"' RETURNING CHAR) as id, + json_value(_airbyte_data, + '$."currency"' RETURNING CHAR) as currency, + json_value(_airbyte_data, + '$."date"' RETURNING CHAR) as `date`, + json_value(_airbyte_data, + '$."timestamp_col"' RETURNING CHAR) as timestamp_col, + json_value(_airbyte_data, + '$."HKD@spéçiäl & characters"' RETURNING CHAR) as `HKD@spéçiäl & characters`, + json_value(_airbyte_data, + '$."HKD_special___characters"' RETURNING CHAR) as hkd_special___characters, + json_value(_airbyte_data, + '$."NZD"' RETURNING CHAR) as nzd, + json_value(_airbyte_data, + '$."USD"' RETURNING CHAR) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at +from test_normalization._airbyte_raw_dedup_exchange_rate as table_alias +-- dedup_exchange_rate +where 1 = 1 + +), __dbt__cte__dedup_exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__dedup_exchange_rate_ab1 +select + cast(id as + signed +) as id, + cast(currency as char(1024)) as currency, + case when `date` = '' then NULL + else cast(`date` as date) + end as `date` + , + cast(nullif(timestamp_col, '') as char(1024)) as timestamp_col, + cast(`HKD@spéçiäl & characters` as + float +) as `HKD@spéçiäl & characters`, + cast(hkd_special___characters as char(1024)) as hkd_special___characters, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + + CURRENT_TIMESTAMP + as _airbyte_normalized_at +from __dbt__cte__dedup_exchange_rate_ab1 +-- dedup_exchange_rate +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__dedup_exchange_rate_ab2 +select + md5(cast(concat(coalesce(cast(id as char), ''), '-', coalesce(cast(currency as char), ''), '-', coalesce(cast(`date` as char), ''), '-', coalesce(cast(timestamp_col as char), ''), '-', coalesce(cast(`HKD@spéçiäl & characters` as char), ''), '-', coalesce(cast(hkd_special___characters as char), ''), '-', coalesce(cast(nzd as char), ''), '-', coalesce(cast(usd as char), '')) as char)) as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from __dbt__cte__dedup_exchange_rate_ab2 tmp +-- dedup_exchange_rate +where 1 = 1 + + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/dbt_project.yml new file mode 100755 index 0000000000000..e8b2b254e10be --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/dbt_project.yml @@ -0,0 +1,86 @@ +name: airbyte_utils +version: '1.0' +config-version: 2 +profile: normalize +source-paths: +- models +docs-paths: +- docs +analysis-paths: +- analysis +test-paths: +- tests +data-paths: +- data +macro-paths: +- macros +target-path: ../build +log-path: ../logs +modules-path: /dbt +clean-targets: +- build +- dbt_modules +quoting: + database: false + schema: false + identifier: false +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: table + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view +vars: + dbt_utils_dispatch_list: + - airbyte_utils + json_column: _airbyte_data + models_to_source: + exchange_rate_ab1: test_normalization.airbyte_raw_exchange_rate + exchange_rate_ab2: test_normalization.airbyte_raw_exchange_rate + exchange_rate_ab3: test_normalization.airbyte_raw_exchange_rate + exchange_rate: test_normalization.airbyte_raw_exchange_rate + dedup_exchange_rate_ab1: test_normalization.airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_ab2: test_normalization.airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_stg: test_normalization.airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd: test_normalization.airbyte_raw_dedup_exchange_rate + dedup_exchange_rate: test_normalization.airbyte_raw_dedup_exchange_rate + renamed_dedup_cdc_excluded_ab1: test_normalization.airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_ab2: test_normalization.airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_stg: test_normalization.airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd: test_normalization.airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded: test_normalization.airbyte_raw_renamed_dedup_cdc_excluded + dedup_cdc_excluded_ab1: test_normalization.airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_ab2: test_normalization.airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_stg: test_normalization.airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd: test_normalization.airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded: test_normalization.airbyte_raw_dedup_cdc_excluded + pos_dedup_cdcx_ab1: test_normalization.airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_ab2: test_normalization.airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_stg: test_normalization.airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_scd: test_normalization.airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx: test_normalization.airbyte_raw_pos_dedup_cdcx + ab_1_prefix_startwith_number_ab1: test_normalization.airbyte_raw_1_prefix_startwith_number + ab_1_prefix_startwith_number_ab2: test_normalization.airbyte_raw_1_prefix_startwith_number + ab_1_prefix_startwith_number_stg: test_normalization.airbyte_raw_1_prefix_startwith_number + ab_1_prefix_startwith_number_scd: test_normalization.airbyte_raw_1_prefix_startwith_number + ab_1_prefix_startwith_number: test_normalization.airbyte_raw_1_prefix_startwith_number + multiple_column_names_conflicts_ab1: test_normalization.airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_ab2: test_normalization.airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_stg: test_normalization.airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_scd: test_normalization.airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts: test_normalization.airbyte_raw_multiple_column_names_conflicts + types_testing_ab1: test_normalization.airbyte_raw_types_testing + types_testing_ab2: test_normalization.airbyte_raw_types_testing + types_testing_stg: test_normalization.airbyte_raw_types_testing + types_testing_scd: test_normalization.airbyte_raw_types_testing + types_testing: test_normalization.airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..cfd186b006ae3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,101 @@ + + + create table test_normalization.dedup_exchange_rate_scd__dbt_tmp + + as + +-- depends_on: ref('dedup_exchange_rate_stg') +with + +input_data as ( + select * + from test_normalization.dedup_exchange_rate_stg + -- dedup_exchange_rate from test_normalization.airbyte_raw_dedup_exchange_rate +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + ora_hash( + + id || '~' || + + + currency || '~' || + + + nzd + + ) as "_AIRBYTE_UNIQUE_KEY", + id, + currency, + "DATE", + timestamp_col, + hkd_special___characters, + hkd_special___characters_1, + nzd, + usd, + "DATE" as "_AIRBYTE_START_AT", + lag("DATE") over ( + partition by id, currency, cast(nzd as varchar2(4000)) + order by + "DATE" desc nulls last, + "_AIRBYTE_EMITTED_AT" desc + ) as "_AIRBYTE_END_AT", + case when row_number() over ( + partition by id, currency, cast(nzd as varchar2(4000)) + order by + "DATE" desc nulls last, + "_AIRBYTE_EMITTED_AT" desc + ) = 1 then 1 else 0 end as "_AIRBYTE_ACTIVE_ROW", + "_AIRBYTE_AB_ID", + "_AIRBYTE_EMITTED_AT", + "_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID" + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + "_AIRBYTE_UNIQUE_KEY", + "_AIRBYTE_START_AT", + "_AIRBYTE_EMITTED_AT" + order by "_AIRBYTE_ACTIVE_ROW" desc, "_AIRBYTE_AB_ID" + ) as "_AIRBYTE_ROW_NUM", + ora_hash( + + "_AIRBYTE_UNIQUE_KEY" || '~' || + + + "_AIRBYTE_START_AT" || '~' || + + + "_AIRBYTE_EMITTED_AT" + + ) as "_AIRBYTE_UNIQUE_KEY_SCD", + scd_data.* + from scd_data +) +select + "_AIRBYTE_UNIQUE_KEY", + "_AIRBYTE_UNIQUE_KEY_SCD", + id, + currency, + "DATE", + timestamp_col, + hkd_special___characters, + hkd_special___characters_1, + nzd, + usd, + "_AIRBYTE_START_AT", + "_AIRBYTE_END_AT", + "_AIRBYTE_ACTIVE_ROW", + "_AIRBYTE_AB_ID", + "_AIRBYTE_EMITTED_AT", + + CURRENT_TIMESTAMP + as "_AIRBYTE_NORMALIZED_AT", + "_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID" +from dedup_data where "_AIRBYTE_ROW_NUM" = 1 \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..e8d34d5c48dfc --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,28 @@ + + + create table test_normalization.dedup_exchange_rate__dbt_tmp + + as + +-- Final base SQL model +-- depends_on: test_normalization.dedup_exchange_rate_scd +select + "_AIRBYTE_UNIQUE_KEY", + id, + currency, + "DATE", + timestamp_col, + hkd_special___characters, + hkd_special___characters_1, + nzd, + usd, + "_AIRBYTE_AB_ID", + "_AIRBYTE_EMITTED_AT", + + CURRENT_TIMESTAMP + as "_AIRBYTE_NORMALIZED_AT", + "_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID" +from test_normalization.dedup_exchange_rate_scd +-- dedup_exchange_rate from test_normalization.airbyte_raw_dedup_exchange_rate +where 1 = 1 +and "_AIRBYTE_ACTIVE_ROW" = 1 diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..4292befa848b8 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,153 @@ + + + create table test_normalization.exchange_rate__dbt_tmp + + as + +with dbt__cte__exchange_rate_ab1__ as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization.airbyte_raw_exchange_rate +select + json_value("_AIRBYTE_DATA", '$."id"') as id, + json_value("_AIRBYTE_DATA", '$."currency"') as currency, + json_value("_AIRBYTE_DATA", '$."date"') as "DATE", + json_value("_AIRBYTE_DATA", '$."timestamp_col"') as timestamp_col, + json_value("_AIRBYTE_DATA", '$."HKD@spéçiäl & characters"') as hkd_special___characters, + json_value("_AIRBYTE_DATA", '$."HKD_special___characters"') as hkd_special___characters_1, + json_value("_AIRBYTE_DATA", '$."NZD"') as nzd, + json_value("_AIRBYTE_DATA", '$."USD"') as usd, + json_value("_AIRBYTE_DATA", '$."column___with__quotes"') as column___with__quotes, + json_value("_AIRBYTE_DATA", '$."datetime_tz"') as datetime_tz, + json_value("_AIRBYTE_DATA", '$."datetime_no_tz"') as datetime_no_tz, + json_value("_AIRBYTE_DATA", '$."time_tz"') as time_tz, + json_value("_AIRBYTE_DATA", '$."time_no_tz"') as time_no_tz, + "_AIRBYTE_AB_ID", + "_AIRBYTE_EMITTED_AT", + + CURRENT_TIMESTAMP + as "_AIRBYTE_NORMALIZED_AT" +from test_normalization.airbyte_raw_exchange_rate +-- exchange_rate +where 1 = 1 +), dbt__cte__exchange_rate_ab2__ as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: dbt__cte__exchange_rate_ab1__ +select + cast(id as + numeric +) as id, + cast(currency as varchar2(4000)) as currency, + cast(nullif("DATE", '') as + varchar2(4000) +) as "DATE", + cast(nullif(timestamp_col, '') as + varchar2(4000) +) as timestamp_col, + cast(hkd_special___characters as + float +) as hkd_special___characters, + cast(hkd_special___characters_1 as varchar2(4000)) as hkd_special___characters_1, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + cast(column___with__quotes as varchar2(4000)) as column___with__quotes, + cast(nullif(datetime_tz, '') as + varchar2(4000) +) as datetime_tz, + cast(nullif(datetime_no_tz, '') as + varchar2(4000) +) as datetime_no_tz, + cast(nullif(time_tz, '') as + varchar2(4000) +) as time_tz, + cast(nullif(time_no_tz, '') as + varchar2(4000) +) as time_no_tz, + "_AIRBYTE_AB_ID", + "_AIRBYTE_EMITTED_AT", + + CURRENT_TIMESTAMP + as "_AIRBYTE_NORMALIZED_AT" +from dbt__cte__exchange_rate_ab1__ +-- exchange_rate +where 1 = 1 +), dbt__cte__exchange_rate_ab3__ as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: dbt__cte__exchange_rate_ab2__ +select + ora_hash( + + id || '~' || + + + currency || '~' || + + + "DATE" || '~' || + + + timestamp_col || '~' || + + + hkd_special___characters || '~' || + + + hkd_special___characters_1 || '~' || + + + nzd || '~' || + + + usd || '~' || + + + column___with__quotes || '~' || + + + datetime_tz || '~' || + + + datetime_no_tz || '~' || + + + time_tz || '~' || + + + time_no_tz + + ) as "_AIRBYTE_EXCHANGE_RATE_HASHID", + tmp.* +from dbt__cte__exchange_rate_ab2__ tmp +-- exchange_rate +where 1 = 1 +)-- Final base SQL model +-- depends_on: dbt__cte__exchange_rate_ab3__ +select + id, + currency, + "DATE", + timestamp_col, + hkd_special___characters, + hkd_special___characters_1, + nzd, + usd, + column___with__quotes, + datetime_tz, + datetime_no_tz, + time_tz, + time_no_tz, + "_AIRBYTE_AB_ID", + "_AIRBYTE_EMITTED_AT", + + CURRENT_TIMESTAMP + as "_AIRBYTE_NORMALIZED_AT", + "_AIRBYTE_EXCHANGE_RATE_HASHID" +from dbt__cte__exchange_rate_ab3__ +-- exchange_rate from test_normalization.airbyte_raw_exchange_rate +where 1 = 1 \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..e1ad3ce68244f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,94 @@ + + create view test_normalization.dedup_exchange_rate_stg__dbt_tmp as + +with dbt__cte__dedup_exchange_rate_ab1__ as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization.airbyte_raw_dedup_exchange_rate +select + json_value("_AIRBYTE_DATA", '$."id"') as id, + json_value("_AIRBYTE_DATA", '$."currency"') as currency, + json_value("_AIRBYTE_DATA", '$."date"') as "DATE", + json_value("_AIRBYTE_DATA", '$."timestamp_col"') as timestamp_col, + json_value("_AIRBYTE_DATA", '$."HKD@spéçiäl & characters"') as hkd_special___characters, + json_value("_AIRBYTE_DATA", '$."HKD_special___characters"') as hkd_special___characters_1, + json_value("_AIRBYTE_DATA", '$."NZD"') as nzd, + json_value("_AIRBYTE_DATA", '$."USD"') as usd, + "_AIRBYTE_AB_ID", + "_AIRBYTE_EMITTED_AT", + + CURRENT_TIMESTAMP + as "_AIRBYTE_NORMALIZED_AT" +from test_normalization.airbyte_raw_dedup_exchange_rate +-- dedup_exchange_rate +where 1 = 1 + +), dbt__cte__dedup_exchange_rate_ab2__ as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: dbt__cte__dedup_exchange_rate_ab1__ +select + cast(id as + numeric +) as id, + cast(currency as varchar2(4000)) as currency, + cast(nullif("DATE", '') as + varchar2(4000) +) as "DATE", + cast(nullif(timestamp_col, '') as + varchar2(4000) +) as timestamp_col, + cast(hkd_special___characters as + float +) as hkd_special___characters, + cast(hkd_special___characters_1 as varchar2(4000)) as hkd_special___characters_1, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + "_AIRBYTE_AB_ID", + "_AIRBYTE_EMITTED_AT", + + CURRENT_TIMESTAMP + as "_AIRBYTE_NORMALIZED_AT" +from dbt__cte__dedup_exchange_rate_ab1__ +-- dedup_exchange_rate +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: dbt__cte__dedup_exchange_rate_ab2__ +select + ora_hash( + + id || '~' || + + + currency || '~' || + + + "DATE" || '~' || + + + timestamp_col || '~' || + + + hkd_special___characters || '~' || + + + hkd_special___characters_1 || '~' || + + + nzd || '~' || + + + usd + + ) as "_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID", + tmp.* +from dbt__cte__dedup_exchange_rate_ab2__ tmp +-- dedup_exchange_rate +where 1 = 1 + + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql new file mode 100644 index 0000000000000..5c34c11584562 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql @@ -0,0 +1,87 @@ + + create view test_normalization.multiple_column_names_conflicts_stg__dbt_tmp as + +with dbt__cte__multiple_column_names_conflicts_ab1__ as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization.airbyte_raw_multiple_column_names_conflicts +select + json_value("_AIRBYTE_DATA", '$."id"') as id, + json_value("_AIRBYTE_DATA", '$."User Id"') as user_id, + json_value("_AIRBYTE_DATA", '$."user_id"') as user_id_1, + json_value("_AIRBYTE_DATA", '$."User id"') as user_id_2, + json_value("_AIRBYTE_DATA", '$."user id"') as user_id_3, + json_value("_AIRBYTE_DATA", '$."User@Id"') as user_id_4, + json_value("_AIRBYTE_DATA", '$."UserId"') as userid, + "_AIRBYTE_AB_ID", + "_AIRBYTE_EMITTED_AT", + + CURRENT_TIMESTAMP + as "_AIRBYTE_NORMALIZED_AT" +from test_normalization.airbyte_raw_multiple_column_names_conflicts +-- multiple_column_names_conflicts +where 1 = 1 + +), dbt__cte__multiple_column_names_conflicts_ab2__ as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: dbt__cte__multiple_column_names_conflicts_ab1__ +select + cast(id as + numeric +) as id, + cast(user_id as varchar2(4000)) as user_id, + cast(user_id_1 as + float +) as user_id_1, + cast(user_id_2 as + float +) as user_id_2, + cast(user_id_3 as + float +) as user_id_3, + cast(user_id_4 as varchar2(4000)) as user_id_4, + cast(userid as + float +) as userid, + "_AIRBYTE_AB_ID", + "_AIRBYTE_EMITTED_AT", + + CURRENT_TIMESTAMP + as "_AIRBYTE_NORMALIZED_AT" +from dbt__cte__multiple_column_names_conflicts_ab1__ +-- multiple_column_names_conflicts +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: dbt__cte__multiple_column_names_conflicts_ab2__ +select + ora_hash( + + id || '~' || + + + user_id || '~' || + + + user_id_1 || '~' || + + + user_id_2 || '~' || + + + user_id_3 || '~' || + + + user_id_4 || '~' || + + + userid + + ) as "_AIRBYTE_MULTIPLE_COLUMN_NAMES_CONFLICTS_HASHID", + tmp.* +from dbt__cte__multiple_column_names_conflicts_ab2__ tmp +-- multiple_column_names_conflicts +where 1 = 1 + + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql new file mode 100644 index 0000000000000..f6b2863d9c445 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -0,0 +1,24 @@ +{{ config( + unique_key = quote('_AIRBYTE_AB_ID'), + schema = "test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', 'airbyte_raw_dedup_exchange_rate') }} +select + {{ json_extract_scalar(quote('_AIRBYTE_DATA'), ['id'], ['id']) }} as id, + {{ json_extract_scalar(quote('_AIRBYTE_DATA'), ['currency'], ['currency']) }} as currency, + {{ json_extract_scalar(quote('_AIRBYTE_DATA'), ['date'], ['date']) }} as {{ quote('DATE') }}, + {{ json_extract_scalar(quote('_AIRBYTE_DATA'), ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, + {{ json_extract_scalar(quote('_AIRBYTE_DATA'), ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as hkd_special___characters, + {{ json_extract_scalar(quote('_AIRBYTE_DATA'), ['HKD_special___characters'], ['HKD_special___characters']) }} as hkd_special___characters_1, + {{ json_extract_scalar(quote('_AIRBYTE_DATA'), ['NZD'], ['NZD']) }} as nzd, + {{ json_extract_scalar(quote('_AIRBYTE_DATA'), ['USD'], ['USD']) }} as usd, + {{ quote('_AIRBYTE_AB_ID') }}, + {{ quote('_AIRBYTE_EMITTED_AT') }}, + {{ current_timestamp() }} as {{ quote('_AIRBYTE_NORMALIZED_AT') }} +from {{ source('test_normalization', 'airbyte_raw_dedup_exchange_rate') }} +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause(quote('_AIRBYTE_EMITTED_AT'), this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql new file mode 100644 index 0000000000000..f3158bc2e9193 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -0,0 +1,24 @@ +{{ config( + unique_key = quote('_AIRBYTE_AB_ID'), + schema = "test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('dedup_exchange_rate_ab1') }} +select + cast(id as {{ dbt_utils.type_bigint() }}) as id, + cast(currency as {{ dbt_utils.type_string() }}) as currency, + cast({{ empty_string_to_null(quote('DATE')) }} as {{ type_date() }}) as {{ quote('DATE') }}, + cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, + cast(hkd_special___characters as {{ dbt_utils.type_float() }}) as hkd_special___characters, + cast(hkd_special___characters_1 as {{ dbt_utils.type_string() }}) as hkd_special___characters_1, + cast(nzd as {{ dbt_utils.type_float() }}) as nzd, + cast(usd as {{ dbt_utils.type_float() }}) as usd, + {{ quote('_AIRBYTE_AB_ID') }}, + {{ quote('_AIRBYTE_EMITTED_AT') }}, + {{ current_timestamp() }} as {{ quote('_AIRBYTE_NORMALIZED_AT') }} +from {{ ref('dedup_exchange_rate_ab1') }} +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause(quote('_AIRBYTE_EMITTED_AT'), this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..9320dbc51f60f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,174 @@ +{{ config( + unique_key = "{{ quote('_AIRBYTE_UNIQUE_KEY_SCD') }}", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and quote('_AIRBYTE_UNIQUE_KEY') in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} where {{ final_table_relation }}.{{ quote('_AIRBYTE_UNIQUE_KEY') }} in ( + select recent_records.unique_key + from ( + select distinct {{ quote('_AIRBYTE_UNIQUE_KEY') }} as unique_key + from {{ this }} + where 1=1 {{ incremental_clause(quote('_AIRBYTE_NORMALIZED_AT'), this.schema + '.' + quote('dedup_exchange_rate')) }} + ) recent_records + left join ( + select {{ quote('_AIRBYTE_UNIQUE_KEY') }} as unique_key, count({{ quote('_AIRBYTE_UNIQUE_KEY') }}) as active_count + from {{ this }} + where {{ quote('_AIRBYTE_ACTIVE_ROW') }} = 1 {{ incremental_clause(quote('_AIRBYTE_NORMALIZED_AT'), this.schema + '.' + quote('dedup_exchange_rate')) }} + group by {{ quote('_AIRBYTE_UNIQUE_KEY') }} + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view test_normalization.dedup_exchange_rate_stg"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('dedup_exchange_rate_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', 'airbyte_raw_dedup_exchange_rate') }} + where 1 = 1 + {{ incremental_clause(quote('_AIRBYTE_EMITTED_AT'), this) }} +), +new_data_ids as ( + -- build a subset of {{ quote('_AIRBYTE_UNIQUE_KEY') }} from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'nzd', + ]) }} as {{ quote('_AIRBYTE_UNIQUE_KEY') }} + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data.{{ quote('_AIRBYTE_UNIQUE_KEY') }} = new_data_ids.{{ quote('_AIRBYTE_UNIQUE_KEY') }} + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data.{{ quote('_AIRBYTE_AB_ID') }} = inc_data.{{ quote('_AIRBYTE_AB_ID') }} + where {{ quote('_AIRBYTE_ACTIVE_ROW') }} = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', 'airbyte_raw_dedup_exchange_rate') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'nzd', + ]) }} as {{ quote('_AIRBYTE_UNIQUE_KEY') }}, + id, + currency, + {{ quote('DATE') }}, + timestamp_col, + hkd_special___characters, + hkd_special___characters_1, + nzd, + usd, + {{ quote('DATE') }} as {{ quote('_AIRBYTE_START_AT') }}, + lag({{ quote('DATE') }}) over ( + partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) + order by + {{ quote('DATE') }} desc nulls last, + {{ quote('_AIRBYTE_EMITTED_AT') }} desc + ) as {{ quote('_AIRBYTE_END_AT') }}, + case when row_number() over ( + partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) + order by + {{ quote('DATE') }} desc nulls last, + {{ quote('_AIRBYTE_EMITTED_AT') }} desc + ) = 1 then 1 else 0 end as {{ quote('_AIRBYTE_ACTIVE_ROW') }}, + {{ quote('_AIRBYTE_AB_ID') }}, + {{ quote('_AIRBYTE_EMITTED_AT') }}, + {{ quote('_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID') }} + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + {{ quote('_AIRBYTE_UNIQUE_KEY') }}, + {{ quote('_AIRBYTE_START_AT') }}, + {{ quote('_AIRBYTE_EMITTED_AT') }} + order by {{ quote('_AIRBYTE_ACTIVE_ROW') }} desc, {{ quote('_AIRBYTE_AB_ID') }} + ) as {{ quote('_AIRBYTE_ROW_NUM') }}, + {{ dbt_utils.surrogate_key([ + quote('_AIRBYTE_UNIQUE_KEY'), + quote('_AIRBYTE_START_AT'), + quote('_AIRBYTE_EMITTED_AT') + ]) }} as {{ quote('_AIRBYTE_UNIQUE_KEY_SCD') }}, + scd_data.* + from scd_data +) +select + {{ quote('_AIRBYTE_UNIQUE_KEY') }}, + {{ quote('_AIRBYTE_UNIQUE_KEY_SCD') }}, + id, + currency, + {{ quote('DATE') }}, + timestamp_col, + hkd_special___characters, + hkd_special___characters_1, + nzd, + usd, + {{ quote('_AIRBYTE_START_AT') }}, + {{ quote('_AIRBYTE_END_AT') }}, + {{ quote('_AIRBYTE_ACTIVE_ROW') }}, + {{ quote('_AIRBYTE_AB_ID') }}, + {{ quote('_AIRBYTE_EMITTED_AT') }}, + {{ current_timestamp() }} as {{ quote('_AIRBYTE_NORMALIZED_AT') }}, + {{ quote('_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID') }} +from dedup_data where {{ quote('_AIRBYTE_ROW_NUM') }} = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..316e400418353 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,27 @@ +{{ config( + unique_key = "{{ quote('_AIRBYTE_UNIQUE_KEY') }}", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('dedup_exchange_rate_scd') }} +select + {{ quote('_AIRBYTE_UNIQUE_KEY') }}, + id, + currency, + {{ quote('DATE') }}, + timestamp_col, + hkd_special___characters, + hkd_special___characters_1, + nzd, + usd, + {{ quote('_AIRBYTE_AB_ID') }}, + {{ quote('_AIRBYTE_EMITTED_AT') }}, + {{ current_timestamp() }} as {{ quote('_AIRBYTE_NORMALIZED_AT') }}, + {{ quote('_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID') }} +from {{ ref('dedup_exchange_rate_scd') }} +-- dedup_exchange_rate from {{ source('test_normalization', 'airbyte_raw_dedup_exchange_rate') }} +where 1 = 1 +and {{ quote('_AIRBYTE_ACTIVE_ROW') }} = 1 +{{ incremental_clause(quote('_AIRBYTE_EMITTED_AT'), this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..2fa5061764670 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,29 @@ +{{ config( + unique_key = quote('_AIRBYTE_AB_ID'), + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('exchange_rate_ab3') }} +select + id, + currency, + {{ quote('DATE') }}, + timestamp_col, + hkd_special___characters, + hkd_special___characters_1, + nzd, + usd, + column___with__quotes, + datetime_tz, + datetime_no_tz, + time_tz, + time_no_tz, + {{ quote('_AIRBYTE_AB_ID') }}, + {{ quote('_AIRBYTE_EMITTED_AT') }}, + {{ current_timestamp() }} as {{ quote('_AIRBYTE_NORMALIZED_AT') }}, + {{ quote('_AIRBYTE_EXCHANGE_RATE_HASHID') }} +from {{ ref('exchange_rate_ab3') }} +-- exchange_rate from {{ source('test_normalization', 'airbyte_raw_exchange_rate') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..15c9c07d71e9a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,24 @@ +{{ config( + unique_key = quote('_AIRBYTE_AB_ID'), + schema = "test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('dedup_exchange_rate_ab2') }} +select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + quote('DATE'), + 'timestamp_col', + 'hkd_special___characters', + 'hkd_special___characters_1', + 'nzd', + 'usd', + ]) }} as {{ quote('_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID') }}, + tmp.* +from {{ ref('dedup_exchange_rate_ab2') }} tmp +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause(quote('_AIRBYTE_EMITTED_AT'), this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/sources.yml new file mode 100644 index 0000000000000..6fc61e6c97e1f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/sources.yml @@ -0,0 +1,16 @@ +version: 2 +sources: +- name: test_normalization + quoting: + database: true + schema: false + identifier: false + tables: + - name: airbyte_raw_1_prefix_startwith_number + - name: airbyte_raw_dedup_cdc_excluded + - name: airbyte_raw_dedup_exchange_rate + - name: airbyte_raw_exchange_rate + - name: airbyte_raw_multiple_column_names_conflicts + - name: airbyte_raw_pos_dedup_cdcx + - name: airbyte_raw_renamed_dedup_cdc_excluded + - name: airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..cfd186b006ae3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,101 @@ + + + create table test_normalization.dedup_exchange_rate_scd__dbt_tmp + + as + +-- depends_on: ref('dedup_exchange_rate_stg') +with + +input_data as ( + select * + from test_normalization.dedup_exchange_rate_stg + -- dedup_exchange_rate from test_normalization.airbyte_raw_dedup_exchange_rate +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + ora_hash( + + id || '~' || + + + currency || '~' || + + + nzd + + ) as "_AIRBYTE_UNIQUE_KEY", + id, + currency, + "DATE", + timestamp_col, + hkd_special___characters, + hkd_special___characters_1, + nzd, + usd, + "DATE" as "_AIRBYTE_START_AT", + lag("DATE") over ( + partition by id, currency, cast(nzd as varchar2(4000)) + order by + "DATE" desc nulls last, + "_AIRBYTE_EMITTED_AT" desc + ) as "_AIRBYTE_END_AT", + case when row_number() over ( + partition by id, currency, cast(nzd as varchar2(4000)) + order by + "DATE" desc nulls last, + "_AIRBYTE_EMITTED_AT" desc + ) = 1 then 1 else 0 end as "_AIRBYTE_ACTIVE_ROW", + "_AIRBYTE_AB_ID", + "_AIRBYTE_EMITTED_AT", + "_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID" + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + "_AIRBYTE_UNIQUE_KEY", + "_AIRBYTE_START_AT", + "_AIRBYTE_EMITTED_AT" + order by "_AIRBYTE_ACTIVE_ROW" desc, "_AIRBYTE_AB_ID" + ) as "_AIRBYTE_ROW_NUM", + ora_hash( + + "_AIRBYTE_UNIQUE_KEY" || '~' || + + + "_AIRBYTE_START_AT" || '~' || + + + "_AIRBYTE_EMITTED_AT" + + ) as "_AIRBYTE_UNIQUE_KEY_SCD", + scd_data.* + from scd_data +) +select + "_AIRBYTE_UNIQUE_KEY", + "_AIRBYTE_UNIQUE_KEY_SCD", + id, + currency, + "DATE", + timestamp_col, + hkd_special___characters, + hkd_special___characters_1, + nzd, + usd, + "_AIRBYTE_START_AT", + "_AIRBYTE_END_AT", + "_AIRBYTE_ACTIVE_ROW", + "_AIRBYTE_AB_ID", + "_AIRBYTE_EMITTED_AT", + + CURRENT_TIMESTAMP + as "_AIRBYTE_NORMALIZED_AT", + "_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID" +from dedup_data where "_AIRBYTE_ROW_NUM" = 1 \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..e8d34d5c48dfc --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,28 @@ + + + create table test_normalization.dedup_exchange_rate__dbt_tmp + + as + +-- Final base SQL model +-- depends_on: test_normalization.dedup_exchange_rate_scd +select + "_AIRBYTE_UNIQUE_KEY", + id, + currency, + "DATE", + timestamp_col, + hkd_special___characters, + hkd_special___characters_1, + nzd, + usd, + "_AIRBYTE_AB_ID", + "_AIRBYTE_EMITTED_AT", + + CURRENT_TIMESTAMP + as "_AIRBYTE_NORMALIZED_AT", + "_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID" +from test_normalization.dedup_exchange_rate_scd +-- dedup_exchange_rate from test_normalization.airbyte_raw_dedup_exchange_rate +where 1 = 1 +and "_AIRBYTE_ACTIVE_ROW" = 1 diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..4292befa848b8 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,153 @@ + + + create table test_normalization.exchange_rate__dbt_tmp + + as + +with dbt__cte__exchange_rate_ab1__ as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization.airbyte_raw_exchange_rate +select + json_value("_AIRBYTE_DATA", '$."id"') as id, + json_value("_AIRBYTE_DATA", '$."currency"') as currency, + json_value("_AIRBYTE_DATA", '$."date"') as "DATE", + json_value("_AIRBYTE_DATA", '$."timestamp_col"') as timestamp_col, + json_value("_AIRBYTE_DATA", '$."HKD@spéçiäl & characters"') as hkd_special___characters, + json_value("_AIRBYTE_DATA", '$."HKD_special___characters"') as hkd_special___characters_1, + json_value("_AIRBYTE_DATA", '$."NZD"') as nzd, + json_value("_AIRBYTE_DATA", '$."USD"') as usd, + json_value("_AIRBYTE_DATA", '$."column___with__quotes"') as column___with__quotes, + json_value("_AIRBYTE_DATA", '$."datetime_tz"') as datetime_tz, + json_value("_AIRBYTE_DATA", '$."datetime_no_tz"') as datetime_no_tz, + json_value("_AIRBYTE_DATA", '$."time_tz"') as time_tz, + json_value("_AIRBYTE_DATA", '$."time_no_tz"') as time_no_tz, + "_AIRBYTE_AB_ID", + "_AIRBYTE_EMITTED_AT", + + CURRENT_TIMESTAMP + as "_AIRBYTE_NORMALIZED_AT" +from test_normalization.airbyte_raw_exchange_rate +-- exchange_rate +where 1 = 1 +), dbt__cte__exchange_rate_ab2__ as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: dbt__cte__exchange_rate_ab1__ +select + cast(id as + numeric +) as id, + cast(currency as varchar2(4000)) as currency, + cast(nullif("DATE", '') as + varchar2(4000) +) as "DATE", + cast(nullif(timestamp_col, '') as + varchar2(4000) +) as timestamp_col, + cast(hkd_special___characters as + float +) as hkd_special___characters, + cast(hkd_special___characters_1 as varchar2(4000)) as hkd_special___characters_1, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + cast(column___with__quotes as varchar2(4000)) as column___with__quotes, + cast(nullif(datetime_tz, '') as + varchar2(4000) +) as datetime_tz, + cast(nullif(datetime_no_tz, '') as + varchar2(4000) +) as datetime_no_tz, + cast(nullif(time_tz, '') as + varchar2(4000) +) as time_tz, + cast(nullif(time_no_tz, '') as + varchar2(4000) +) as time_no_tz, + "_AIRBYTE_AB_ID", + "_AIRBYTE_EMITTED_AT", + + CURRENT_TIMESTAMP + as "_AIRBYTE_NORMALIZED_AT" +from dbt__cte__exchange_rate_ab1__ +-- exchange_rate +where 1 = 1 +), dbt__cte__exchange_rate_ab3__ as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: dbt__cte__exchange_rate_ab2__ +select + ora_hash( + + id || '~' || + + + currency || '~' || + + + "DATE" || '~' || + + + timestamp_col || '~' || + + + hkd_special___characters || '~' || + + + hkd_special___characters_1 || '~' || + + + nzd || '~' || + + + usd || '~' || + + + column___with__quotes || '~' || + + + datetime_tz || '~' || + + + datetime_no_tz || '~' || + + + time_tz || '~' || + + + time_no_tz + + ) as "_AIRBYTE_EXCHANGE_RATE_HASHID", + tmp.* +from dbt__cte__exchange_rate_ab2__ tmp +-- exchange_rate +where 1 = 1 +)-- Final base SQL model +-- depends_on: dbt__cte__exchange_rate_ab3__ +select + id, + currency, + "DATE", + timestamp_col, + hkd_special___characters, + hkd_special___characters_1, + nzd, + usd, + column___with__quotes, + datetime_tz, + datetime_no_tz, + time_tz, + time_no_tz, + "_AIRBYTE_AB_ID", + "_AIRBYTE_EMITTED_AT", + + CURRENT_TIMESTAMP + as "_AIRBYTE_NORMALIZED_AT", + "_AIRBYTE_EXCHANGE_RATE_HASHID" +from dbt__cte__exchange_rate_ab3__ +-- exchange_rate from test_normalization.airbyte_raw_exchange_rate +where 1 = 1 \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..e1ad3ce68244f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,94 @@ + + create view test_normalization.dedup_exchange_rate_stg__dbt_tmp as + +with dbt__cte__dedup_exchange_rate_ab1__ as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: test_normalization.airbyte_raw_dedup_exchange_rate +select + json_value("_AIRBYTE_DATA", '$."id"') as id, + json_value("_AIRBYTE_DATA", '$."currency"') as currency, + json_value("_AIRBYTE_DATA", '$."date"') as "DATE", + json_value("_AIRBYTE_DATA", '$."timestamp_col"') as timestamp_col, + json_value("_AIRBYTE_DATA", '$."HKD@spéçiäl & characters"') as hkd_special___characters, + json_value("_AIRBYTE_DATA", '$."HKD_special___characters"') as hkd_special___characters_1, + json_value("_AIRBYTE_DATA", '$."NZD"') as nzd, + json_value("_AIRBYTE_DATA", '$."USD"') as usd, + "_AIRBYTE_AB_ID", + "_AIRBYTE_EMITTED_AT", + + CURRENT_TIMESTAMP + as "_AIRBYTE_NORMALIZED_AT" +from test_normalization.airbyte_raw_dedup_exchange_rate +-- dedup_exchange_rate +where 1 = 1 + +), dbt__cte__dedup_exchange_rate_ab2__ as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: dbt__cte__dedup_exchange_rate_ab1__ +select + cast(id as + numeric +) as id, + cast(currency as varchar2(4000)) as currency, + cast(nullif("DATE", '') as + varchar2(4000) +) as "DATE", + cast(nullif(timestamp_col, '') as + varchar2(4000) +) as timestamp_col, + cast(hkd_special___characters as + float +) as hkd_special___characters, + cast(hkd_special___characters_1 as varchar2(4000)) as hkd_special___characters_1, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + "_AIRBYTE_AB_ID", + "_AIRBYTE_EMITTED_AT", + + CURRENT_TIMESTAMP + as "_AIRBYTE_NORMALIZED_AT" +from dbt__cte__dedup_exchange_rate_ab1__ +-- dedup_exchange_rate +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: dbt__cte__dedup_exchange_rate_ab2__ +select + ora_hash( + + id || '~' || + + + currency || '~' || + + + "DATE" || '~' || + + + timestamp_col || '~' || + + + hkd_special___characters || '~' || + + + hkd_special___characters_1 || '~' || + + + nzd || '~' || + + + usd + + ) as "_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID", + tmp.* +from dbt__cte__dedup_exchange_rate_ab2__ tmp +-- dedup_exchange_rate +where 1 = 1 + + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/dbt_project.yml new file mode 100755 index 0000000000000..6199d0a669d13 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/dbt_project.yml @@ -0,0 +1,125 @@ +name: airbyte_utils +version: '1.0' +config-version: 2 +profile: normalize +model-paths: +- models +docs-paths: +- docs +analysis-paths: +- analysis +test-paths: +- tests +seed-paths: +- data +macro-paths: +- macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: +- build +- dbt_modules +quoting: + database: true + schema: false + identifier: true +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + +on_schema_change: sync_all_columns + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view +dispatch: +- macro_namespace: dbt_utils + search_order: + - airbyte_utils + - dbt_utils +vars: + json_column: _airbyte_data + models_to_source: + nested_stream_with_c__lting_into_long_names_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c__lting_into_long_names_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c__lting_into_long_names_stg: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c__lting_into_long_names_scd: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c__lting_into_long_names: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + non_nested_stream_wi__lting_into_long_names_ab1: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + non_nested_stream_wi__lting_into_long_names_ab2: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + non_nested_stream_wi__lting_into_long_names_ab3: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + non_nested_stream_wi__lting_into_long_names: test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + some_stream_that_was_empty_ab1: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_ab2: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_stg: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_scd: test_normalization._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty: test_normalization._airbyte_raw_some_stream_that_was_empty + simple_stream_with_n__lting_into_long_names_ab1: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + simple_stream_with_n__lting_into_long_names_ab2: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + simple_stream_with_n__lting_into_long_names_ab3: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + simple_stream_with_n__lting_into_long_names: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_scalar_ab1: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar_ab2: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar_ab3: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar: test_normalization._airbyte_raw_conflict_stream_scalar + conflict_stream_array_ab1: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array_ab2: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array_ab3: test_normalization._airbyte_raw_conflict_stream_array + conflict_stream_array: test_normalization._airbyte_raw_conflict_stream_array + unnest_alias_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias: test_normalization._airbyte_raw_unnest_alias + arrays_ab1: test_normalization._airbyte_raw_arrays + arrays_ab2: test_normalization._airbyte_raw_arrays + arrays_ab3: test_normalization._airbyte_raw_arrays + arrays: test_normalization._airbyte_raw_arrays + nested_stream_with_c___long_names_partition_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c___long_names_partition_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c___long_names_partition_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c___long_names_partition: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + conflict_stream_name_conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + unnest_alias_children_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children: test_normalization._airbyte_raw_unnest_alias + arrays_nested_array_parent_ab1: test_normalization._airbyte_raw_arrays + arrays_nested_array_parent_ab2: test_normalization._airbyte_raw_arrays + arrays_nested_array_parent_ab3: test_normalization._airbyte_raw_arrays + arrays_nested_array_parent: test_normalization._airbyte_raw_arrays + nested_stream_with_c__ion_double_array_data_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c__ion_double_array_data_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c__ion_double_array_data_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c__ion_double_array_data: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c___names_partition_data_ab1: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c___names_partition_data_ab2: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c___names_partition_data_ab3: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_c___names_partition_data: test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + conflict_stream_name___conflict_stream_name_ab1: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name___conflict_stream_name_ab2: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name___conflict_stream_name_ab3: test_normalization._airbyte_raw_conflict_stream_name + conflict_stream_name___conflict_stream_name: test_normalization._airbyte_raw_conflict_stream_name + unnest_alias_children_owner_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_children_owner: test_normalization._airbyte_raw_unnest_alias + unnest_alias_childre__column___with__quotes_ab1: test_normalization._airbyte_raw_unnest_alias + unnest_alias_childre__column___with__quotes_ab2: test_normalization._airbyte_raw_unnest_alias + unnest_alias_childre__column___with__quotes_ab3: test_normalization._airbyte_raw_unnest_alias + unnest_alias_childre__column___with__quotes: test_normalization._airbyte_raw_unnest_alias diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql new file mode 100644 index 0000000000000..150407b1fbdf5 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql @@ -0,0 +1,73 @@ + + + + create table "postgres".test_normalization."nested_stream_with_c__lting_into_long_names_scd" + as ( + +-- depends_on: ref('nested_stream_with_c__lting_into_long_names_stg') +with + +input_data as ( + select * + from "postgres"._airbyte_test_normalization."nested_stream_with_c__lting_into_long_names_stg" + -- nested_stream_with_c__lting_into_long_names from "postgres".test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + md5(cast(coalesce(cast("id" as text), '') as text)) as _airbyte_unique_key, + "id", + "date", + "partition", + "date" as _airbyte_start_at, + lag("date") over ( + partition by "id" + order by + "date" is null asc, + "date" desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by "id" + order by + "date" is null asc, + "date" desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_nested_stre__nto_long_names_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + "id", + "date", + "partition", + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_nested_stre__nto_long_names_hashid +from dedup_data where _airbyte_row_num = 1 + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql new file mode 100644 index 0000000000000..885ba6546326a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql @@ -0,0 +1,71 @@ + + + + create table "postgres".test_normalization."some_stream_that_was_empty_scd" + as ( + +-- depends_on: ref('some_stream_that_was_empty_stg') +with + +input_data as ( + select * + from "postgres"._airbyte_test_normalization."some_stream_that_was_empty_stg" + -- some_stream_that_was_empty from "postgres".test_normalization._airbyte_raw_some_stream_that_was_empty +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + md5(cast(coalesce(cast("id" as text), '') as text)) as _airbyte_unique_key, + "id", + "date", + "date" as _airbyte_start_at, + lag("date") over ( + partition by "id" + order by + "date" is null asc, + "date" desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by "id" + order by + "date" is null asc, + "date" desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_some_stream_that_was_empty_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + "id", + "date", + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_some_stream_that_was_empty_hashid +from dedup_data where _airbyte_row_num = 1 + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql new file mode 100644 index 0000000000000..c2170eeb4df25 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql @@ -0,0 +1,64 @@ + + + + create table "postgres".test_normalization."nested_stream_with_c___long_names_partition" + as ( + +with __dbt__cte__nested_stream_with_c___long_names_partition_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization."nested_stream_with_c__lting_into_long_names_scd" +select + _airbyte_nested_stre__nto_long_names_hashid, + jsonb_extract_path("partition", 'double_array_data') as double_array_data, + jsonb_extract_path("partition", 'DATA') as "DATA", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization."nested_stream_with_c__lting_into_long_names_scd" as table_alias +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 +and "partition" is not null + +), __dbt__cte__nested_stream_with_c___long_names_partition_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__nested_stream_with_c___long_names_partition_ab1 +select + _airbyte_nested_stre__nto_long_names_hashid, + double_array_data, + "DATA", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__nested_stream_with_c___long_names_partition_ab1 +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 + +), __dbt__cte__nested_stream_with_c___long_names_partition_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__nested_stream_with_c___long_names_partition_ab2 +select + md5(cast(coalesce(cast(_airbyte_nested_stre__nto_long_names_hashid as text), '') || '-' || coalesce(cast(double_array_data as text), '') || '-' || coalesce(cast("DATA" as text), '') as text)) as _airbyte_partition_hashid, + tmp.* +from __dbt__cte__nested_stream_with_c___long_names_partition_ab2 tmp +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 + +)-- Final base SQL model +-- depends_on: __dbt__cte__nested_stream_with_c___long_names_partition_ab3 +select + _airbyte_nested_stre__nto_long_names_hashid, + double_array_data, + "DATA", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_partition_hashid +from __dbt__cte__nested_stream_with_c___long_names_partition_ab3 +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from "postgres".test_normalization."nested_stream_with_c__lting_into_long_names_scd" +where 1 = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql new file mode 100644 index 0000000000000..36a8a151153a7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql @@ -0,0 +1,67 @@ + + + + create table "postgres".test_normalization."nested_stream_with_c___names_partition_data" + as ( + +with __dbt__cte__nested_stream_with_c___names_partition_data_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization."nested_stream_with_c___long_names_partition" + +select + _airbyte_partition_hashid, + jsonb_extract_path_text(_airbyte_nested_data, 'currency') as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization."nested_stream_with_c___long_names_partition" as table_alias +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +cross join jsonb_array_elements( + case jsonb_typeof("DATA") + when 'array' then "DATA" + else '[]' end + ) as _airbyte_nested_data +where 1 = 1 +and "DATA" is not null + +), __dbt__cte__nested_stream_with_c___names_partition_data_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__nested_stream_with_c___names_partition_data_ab1 +select + _airbyte_partition_hashid, + cast(currency as text) as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__nested_stream_with_c___names_partition_data_ab1 +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +where 1 = 1 + +), __dbt__cte__nested_stream_with_c___names_partition_data_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__nested_stream_with_c___names_partition_data_ab2 +select + md5(cast(coalesce(cast(_airbyte_partition_hashid as text), '') || '-' || coalesce(cast(currency as text), '') as text)) as _airbyte_data_hashid, + tmp.* +from __dbt__cte__nested_stream_with_c___names_partition_data_ab2 tmp +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +where 1 = 1 + +)-- Final base SQL model +-- depends_on: __dbt__cte__nested_stream_with_c___names_partition_data_ab3 +select + _airbyte_partition_hashid, + currency, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_data_hashid +from __dbt__cte__nested_stream_with_c___names_partition_data_ab3 +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from "postgres".test_normalization."nested_stream_with_c___long_names_partition" +where 1 = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql new file mode 100644 index 0000000000000..4b6ec78084879 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql @@ -0,0 +1,67 @@ + + + + create table "postgres".test_normalization."nested_stream_with_c__ion_double_array_data" + as ( + +with __dbt__cte__nested_stream_with_c__ion_double_array_data_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization."nested_stream_with_c___long_names_partition" + +select + _airbyte_partition_hashid, + jsonb_extract_path_text(_airbyte_nested_data, 'id') as "id", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization."nested_stream_with_c___long_names_partition" as table_alias +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +cross join jsonb_array_elements( + case jsonb_typeof(double_array_data) + when 'array' then double_array_data + else '[]' end + ) as _airbyte_nested_data +where 1 = 1 +and double_array_data is not null + +), __dbt__cte__nested_stream_with_c__ion_double_array_data_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__nested_stream_with_c__ion_double_array_data_ab1 +select + _airbyte_partition_hashid, + cast("id" as text) as "id", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__nested_stream_with_c__ion_double_array_data_ab1 +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +where 1 = 1 + +), __dbt__cte__nested_stream_with_c__ion_double_array_data_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__nested_stream_with_c__ion_double_array_data_ab2 +select + md5(cast(coalesce(cast(_airbyte_partition_hashid as text), '') || '-' || coalesce(cast("id" as text), '') as text)) as _airbyte_double_array_data_hashid, + tmp.* +from __dbt__cte__nested_stream_with_c__ion_double_array_data_ab2 tmp +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +where 1 = 1 + +)-- Final base SQL model +-- depends_on: __dbt__cte__nested_stream_with_c__ion_double_array_data_ab3 +select + _airbyte_partition_hashid, + "id", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_double_array_data_hashid +from __dbt__cte__nested_stream_with_c__ion_double_array_data_ab3 +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from "postgres".test_normalization."nested_stream_with_c___long_names_partition" +where 1 = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names.sql new file mode 100644 index 0000000000000..a713c3b75e2f0 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names.sql @@ -0,0 +1,24 @@ + + + + create table "postgres".test_normalization."nested_stream_with_c__lting_into_long_names" + as ( + +-- Final base SQL model +-- depends_on: "postgres".test_normalization."nested_stream_with_c__lting_into_long_names_scd" +select + _airbyte_unique_key, + "id", + "date", + "partition", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_nested_stre__nto_long_names_hashid +from "postgres".test_normalization."nested_stream_with_c__lting_into_long_names_scd" +-- nested_stream_with_c__lting_into_long_names from "postgres".test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names +where 1 = 1 +and _airbyte_active_row = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql new file mode 100644 index 0000000000000..9062ea955a071 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql @@ -0,0 +1,51 @@ + + + + create table "postgres"._airbyte_test_normalization."nested_stream_with_c__lting_into_long_names_stg" + as ( + +with __dbt__cte__nested_stream_with_c__lting_into_long_names_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path_text(_airbyte_data, 'date') as "date", + + jsonb_extract_path(table_alias._airbyte_data, 'partition') + as "partition", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names as table_alias +-- nested_stream_with_c__lting_into_long_names +where 1 = 1 + +), __dbt__cte__nested_stream_with_c__lting_into_long_names_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__nested_stream_with_c__lting_into_long_names_ab1 +select + cast("id" as text) as "id", + cast("date" as text) as "date", + cast("partition" as + jsonb +) as "partition", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__nested_stream_with_c__lting_into_long_names_ab1 +-- nested_stream_with_c__lting_into_long_names +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__nested_stream_with_c__lting_into_long_names_ab2 +select + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("date" as text), '') || '-' || coalesce(cast("partition" as text), '') as text)) as _airbyte_nested_stre__nto_long_names_hashid, + tmp.* +from __dbt__cte__nested_stream_with_c__lting_into_long_names_ab2 tmp +-- nested_stream_with_c__lting_into_long_names +where 1 = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/some_stream_that_was_empty.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/some_stream_that_was_empty.sql new file mode 100644 index 0000000000000..1556a86262084 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/some_stream_that_was_empty.sql @@ -0,0 +1,23 @@ + + + + create table "postgres".test_normalization."some_stream_that_was_empty" + as ( + +-- Final base SQL model +-- depends_on: "postgres".test_normalization."some_stream_that_was_empty_scd" +select + _airbyte_unique_key, + "id", + "date", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_some_stream_that_was_empty_hashid +from "postgres".test_normalization."some_stream_that_was_empty_scd" +-- some_stream_that_was_empty from "postgres".test_normalization._airbyte_raw_some_stream_that_was_empty +where 1 = 1 +and _airbyte_active_row = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql new file mode 100644 index 0000000000000..e473519de41aa --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql @@ -0,0 +1,45 @@ + + + + create table "postgres"._airbyte_test_normalization."some_stream_that_was_empty_stg" + as ( + +with __dbt__cte__some_stream_that_was_empty_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_some_stream_that_was_empty +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path_text(_airbyte_data, 'date') as "date", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_some_stream_that_was_empty as table_alias +-- some_stream_that_was_empty +where 1 = 1 + +), __dbt__cte__some_stream_that_was_empty_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__some_stream_that_was_empty_ab1 +select + cast("id" as text) as "id", + cast("date" as text) as "date", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__some_stream_that_was_empty_ab1 +-- some_stream_that_was_empty +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__some_stream_that_was_empty_ab2 +select + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("date" as text), '') as text)) as _airbyte_some_stream_that_was_empty_hashid, + tmp.* +from __dbt__cte__some_stream_that_was_empty_ab2 tmp +-- some_stream_that_was_empty +where 1 = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql new file mode 100644 index 0000000000000..aea94f43825c1 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql @@ -0,0 +1,60 @@ + + + + create table "postgres".test_normalization_namespace."simple_stream_with_n__lting_into_long_names" + as ( + +with __dbt__cte__simple_stream_with_n__lting_into_long_names_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path_text(_airbyte_data, 'date') as "date", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names as table_alias +-- simple_stream_with_n__lting_into_long_names +where 1 = 1 + +), __dbt__cte__simple_stream_with_n__lting_into_long_names_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__simple_stream_with_n__lting_into_long_names_ab1 +select + cast("id" as text) as "id", + cast("date" as text) as "date", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__simple_stream_with_n__lting_into_long_names_ab1 +-- simple_stream_with_n__lting_into_long_names +where 1 = 1 + +), __dbt__cte__simple_stream_with_n__lting_into_long_names_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__simple_stream_with_n__lting_into_long_names_ab2 +select + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("date" as text), '') as text)) as _airbyte_simple_stre__nto_long_names_hashid, + tmp.* +from __dbt__cte__simple_stream_with_n__lting_into_long_names_ab2 tmp +-- simple_stream_with_n__lting_into_long_names +where 1 = 1 + +)-- Final base SQL model +-- depends_on: __dbt__cte__simple_stream_with_n__lting_into_long_names_ab3 +select + "id", + "date", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_simple_stre__nto_long_names_hashid +from __dbt__cte__simple_stream_with_n__lting_into_long_names_ab3 +-- simple_stream_with_n__lting_into_long_names from "postgres".test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names +where 1 = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/arrays.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/arrays.sql new file mode 100644 index 0000000000000..e10c4619e53a4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/arrays.sql @@ -0,0 +1,58 @@ + + + create table "postgres".test_normalization."arrays__dbt_tmp" + as ( + +with __dbt__cte__arrays_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_arrays +select + jsonb_extract_path(_airbyte_data, 'array_of_strings') as array_of_strings, + + jsonb_extract_path(table_alias._airbyte_data, 'nested_array_parent') + as nested_array_parent, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_arrays as table_alias +-- arrays +where 1 = 1 +), __dbt__cte__arrays_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__arrays_ab1 +select + array_of_strings, + cast(nested_array_parent as + jsonb +) as nested_array_parent, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__arrays_ab1 +-- arrays +where 1 = 1 +), __dbt__cte__arrays_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__arrays_ab2 +select + md5(cast(coalesce(cast(array_of_strings as text), '') || '-' || coalesce(cast(nested_array_parent as text), '') as text)) as _airbyte_arrays_hashid, + tmp.* +from __dbt__cte__arrays_ab2 tmp +-- arrays +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__arrays_ab3 +select + array_of_strings, + nested_array_parent, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_arrays_hashid +from __dbt__cte__arrays_ab3 +-- arrays from "postgres".test_normalization._airbyte_raw_arrays +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/arrays_nested_array_parent.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/arrays_nested_array_parent.sql new file mode 100644 index 0000000000000..09ad8fe3cd3f9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/arrays_nested_array_parent.sql @@ -0,0 +1,55 @@ + + + create table "postgres".test_normalization."arrays_nested_array_parent__dbt_tmp" + as ( + +with __dbt__cte__arrays_nested_array_parent_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization."arrays" +select + _airbyte_arrays_hashid, + jsonb_extract_path(nested_array_parent, 'nested_array') as nested_array, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization."arrays" as table_alias +-- nested_array_parent at arrays/nested_array_parent +where 1 = 1 +and nested_array_parent is not null +), __dbt__cte__arrays_nested_array_parent_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__arrays_nested_array_parent_ab1 +select + _airbyte_arrays_hashid, + nested_array, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__arrays_nested_array_parent_ab1 +-- nested_array_parent at arrays/nested_array_parent +where 1 = 1 +), __dbt__cte__arrays_nested_array_parent_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__arrays_nested_array_parent_ab2 +select + md5(cast(coalesce(cast(_airbyte_arrays_hashid as text), '') || '-' || coalesce(cast(nested_array as text), '') as text)) as _airbyte_nested_array_parent_hashid, + tmp.* +from __dbt__cte__arrays_nested_array_parent_ab2 tmp +-- nested_array_parent at arrays/nested_array_parent +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__arrays_nested_array_parent_ab3 +select + _airbyte_arrays_hashid, + nested_array, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_nested_array_parent_hashid +from __dbt__cte__arrays_nested_array_parent_ab3 +-- nested_array_parent at arrays/nested_array_parent from "postgres".test_normalization."arrays" +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_array.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_array.sql new file mode 100644 index 0000000000000..c1c6ab12a7b7c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_array.sql @@ -0,0 +1,54 @@ + + + create table "postgres".test_normalization."conflict_stream_array__dbt_tmp" + as ( + +with __dbt__cte__conflict_stream_array_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_conflict_stream_array +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path(_airbyte_data, 'conflict_stream_array') as conflict_stream_array, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_conflict_stream_array as table_alias +-- conflict_stream_array +where 1 = 1 +), __dbt__cte__conflict_stream_array_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__conflict_stream_array_ab1 +select + cast("id" as text) as "id", + conflict_stream_array, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__conflict_stream_array_ab1 +-- conflict_stream_array +where 1 = 1 +), __dbt__cte__conflict_stream_array_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__conflict_stream_array_ab2 +select + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(conflict_stream_array as text), '') as text)) as _airbyte_conflict_stream_array_hashid, + tmp.* +from __dbt__cte__conflict_stream_array_ab2 tmp +-- conflict_stream_array +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__conflict_stream_array_ab3 +select + "id", + conflict_stream_array, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_conflict_stream_array_hashid +from __dbt__cte__conflict_stream_array_ab3 +-- conflict_stream_array from "postgres".test_normalization._airbyte_raw_conflict_stream_array +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name.sql new file mode 100644 index 0000000000000..ac5cffb8d00d9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name.sql @@ -0,0 +1,58 @@ + + + create table "postgres".test_normalization."conflict_stream_name__dbt_tmp" + as ( + +with __dbt__cte__conflict_stream_name_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_conflict_stream_name +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + + jsonb_extract_path(table_alias._airbyte_data, 'conflict_stream_name') + as conflict_stream_name, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_conflict_stream_name as table_alias +-- conflict_stream_name +where 1 = 1 +), __dbt__cte__conflict_stream_name_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__conflict_stream_name_ab1 +select + cast("id" as text) as "id", + cast(conflict_stream_name as + jsonb +) as conflict_stream_name, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__conflict_stream_name_ab1 +-- conflict_stream_name +where 1 = 1 +), __dbt__cte__conflict_stream_name_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__conflict_stream_name_ab2 +select + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(conflict_stream_name as text), '') as text)) as _airbyte_conflict_stream_name_hashid, + tmp.* +from __dbt__cte__conflict_stream_name_ab2 tmp +-- conflict_stream_name +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__conflict_stream_name_ab3 +select + "id", + conflict_stream_name, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_conflict_stream_name_hashid +from __dbt__cte__conflict_stream_name_ab3 +-- conflict_stream_name from "postgres".test_normalization._airbyte_raw_conflict_stream_name +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql new file mode 100644 index 0000000000000..4aa2c420ed45d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql @@ -0,0 +1,55 @@ + + + create table "postgres".test_normalization."conflict_stream_name___conflict_stream_name__dbt_tmp" + as ( + +with __dbt__cte__conflict_stream_name___conflict_stream_name_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization."conflict_stream_name_conflict_stream_name" +select + _airbyte_conflict_stream_name_2_hashid, + jsonb_extract_path_text(conflict_stream_name, 'groups') as groups, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization."conflict_stream_name_conflict_stream_name" as table_alias +-- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name +where 1 = 1 +and conflict_stream_name is not null +), __dbt__cte__conflict_stream_name___conflict_stream_name_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__conflict_stream_name___conflict_stream_name_ab1 +select + _airbyte_conflict_stream_name_2_hashid, + cast(groups as text) as groups, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__conflict_stream_name___conflict_stream_name_ab1 +-- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name +where 1 = 1 +), __dbt__cte__conflict_stream_name___conflict_stream_name_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__conflict_stream_name___conflict_stream_name_ab2 +select + md5(cast(coalesce(cast(_airbyte_conflict_stream_name_2_hashid as text), '') || '-' || coalesce(cast(groups as text), '') as text)) as _airbyte_conflict_stream_name_3_hashid, + tmp.* +from __dbt__cte__conflict_stream_name___conflict_stream_name_ab2 tmp +-- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__conflict_stream_name___conflict_stream_name_ab3 +select + _airbyte_conflict_stream_name_2_hashid, + groups, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_conflict_stream_name_3_hashid +from __dbt__cte__conflict_stream_name___conflict_stream_name_ab3 +-- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name from "postgres".test_normalization."conflict_stream_name_conflict_stream_name" +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql new file mode 100644 index 0000000000000..82dfb023674e5 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql @@ -0,0 +1,59 @@ + + + create table "postgres".test_normalization."conflict_stream_name_conflict_stream_name__dbt_tmp" + as ( + +with __dbt__cte__conflict_stream_name_conflict_stream_name_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization."conflict_stream_name" +select + _airbyte_conflict_stream_name_hashid, + + jsonb_extract_path(table_alias.conflict_stream_name, 'conflict_stream_name') + as conflict_stream_name, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization."conflict_stream_name" as table_alias +-- conflict_stream_name at conflict_stream_name/conflict_stream_name +where 1 = 1 +and conflict_stream_name is not null +), __dbt__cte__conflict_stream_name_conflict_stream_name_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__conflict_stream_name_conflict_stream_name_ab1 +select + _airbyte_conflict_stream_name_hashid, + cast(conflict_stream_name as + jsonb +) as conflict_stream_name, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__conflict_stream_name_conflict_stream_name_ab1 +-- conflict_stream_name at conflict_stream_name/conflict_stream_name +where 1 = 1 +), __dbt__cte__conflict_stream_name_conflict_stream_name_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__conflict_stream_name_conflict_stream_name_ab2 +select + md5(cast(coalesce(cast(_airbyte_conflict_stream_name_hashid as text), '') || '-' || coalesce(cast(conflict_stream_name as text), '') as text)) as _airbyte_conflict_stream_name_2_hashid, + tmp.* +from __dbt__cte__conflict_stream_name_conflict_stream_name_ab2 tmp +-- conflict_stream_name at conflict_stream_name/conflict_stream_name +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__conflict_stream_name_conflict_stream_name_ab3 +select + _airbyte_conflict_stream_name_hashid, + conflict_stream_name, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_conflict_stream_name_2_hashid +from __dbt__cte__conflict_stream_name_conflict_stream_name_ab3 +-- conflict_stream_name at conflict_stream_name/conflict_stream_name from "postgres".test_normalization."conflict_stream_name" +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_scalar.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_scalar.sql new file mode 100644 index 0000000000000..09a4fa01de977 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/conflict_stream_scalar.sql @@ -0,0 +1,56 @@ + + + create table "postgres".test_normalization."conflict_stream_scalar__dbt_tmp" + as ( + +with __dbt__cte__conflict_stream_scalar_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_conflict_stream_scalar +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path_text(_airbyte_data, 'conflict_stream_scalar') as conflict_stream_scalar, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_conflict_stream_scalar as table_alias +-- conflict_stream_scalar +where 1 = 1 +), __dbt__cte__conflict_stream_scalar_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__conflict_stream_scalar_ab1 +select + cast("id" as text) as "id", + cast(conflict_stream_scalar as + bigint +) as conflict_stream_scalar, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__conflict_stream_scalar_ab1 +-- conflict_stream_scalar +where 1 = 1 +), __dbt__cte__conflict_stream_scalar_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__conflict_stream_scalar_ab2 +select + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(conflict_stream_scalar as text), '') as text)) as _airbyte_conflict_stream_scalar_hashid, + tmp.* +from __dbt__cte__conflict_stream_scalar_ab2 tmp +-- conflict_stream_scalar +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__conflict_stream_scalar_ab3 +select + "id", + conflict_stream_scalar, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_conflict_stream_scalar_hashid +from __dbt__cte__conflict_stream_scalar_ab3 +-- conflict_stream_scalar from "postgres".test_normalization._airbyte_raw_conflict_stream_scalar +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql new file mode 100644 index 0000000000000..31d2176c3888c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql @@ -0,0 +1,54 @@ + + + create table "postgres".test_normalization."non_nested_stream_wi__lting_into_long_names__dbt_tmp" + as ( + +with __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path_text(_airbyte_data, 'date') as "date", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names as table_alias +-- non_nested_stream_wi__lting_into_long_names +where 1 = 1 +), __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab1 +select + cast("id" as text) as "id", + cast("date" as text) as "date", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab1 +-- non_nested_stream_wi__lting_into_long_names +where 1 = 1 +), __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab2 +select + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("date" as text), '') as text)) as _airbyte_non_nested___nto_long_names_hashid, + tmp.* +from __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab2 tmp +-- non_nested_stream_wi__lting_into_long_names +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab3 +select + "id", + "date", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_non_nested___nto_long_names_hashid +from __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab3 +-- non_nested_stream_wi__lting_into_long_names from "postgres".test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias.sql new file mode 100644 index 0000000000000..7af2f04f81f87 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias.sql @@ -0,0 +1,56 @@ + + + create table "postgres".test_normalization."unnest_alias__dbt_tmp" + as ( + +with __dbt__cte__unnest_alias_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_unnest_alias +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path(_airbyte_data, 'children') as children, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_unnest_alias as table_alias +-- unnest_alias +where 1 = 1 +), __dbt__cte__unnest_alias_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__unnest_alias_ab1 +select + cast("id" as + bigint +) as "id", + children, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__unnest_alias_ab1 +-- unnest_alias +where 1 = 1 +), __dbt__cte__unnest_alias_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__unnest_alias_ab2 +select + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(children as text), '') as text)) as _airbyte_unnest_alias_hashid, + tmp.* +from __dbt__cte__unnest_alias_ab2 tmp +-- unnest_alias +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__unnest_alias_ab3 +select + "id", + children, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_unnest_alias_hashid +from __dbt__cte__unnest_alias_ab3 +-- unnest_alias from "postgres".test_normalization._airbyte_raw_unnest_alias +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql new file mode 100644 index 0000000000000..6688069a62f01 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql @@ -0,0 +1,61 @@ + + + create table "postgres".test_normalization."unnest_alias_childre__column___with__quotes__dbt_tmp" + as ( + +with __dbt__cte__unnest_alias_childre__column___with__quotes_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization."unnest_alias_children_owner" + +select + _airbyte_owner_hashid, + jsonb_extract_path_text(_airbyte_nested_data, 'currency') as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization."unnest_alias_children_owner" as table_alias +-- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes +cross join jsonb_array_elements( + case jsonb_typeof("column`_'with""_quotes") + when 'array' then "column`_'with""_quotes" + else '[]' end + ) as _airbyte_nested_data +where 1 = 1 +and "column`_'with""_quotes" is not null +), __dbt__cte__unnest_alias_childre__column___with__quotes_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__unnest_alias_childre__column___with__quotes_ab1 +select + _airbyte_owner_hashid, + cast(currency as text) as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__unnest_alias_childre__column___with__quotes_ab1 +-- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes +where 1 = 1 +), __dbt__cte__unnest_alias_childre__column___with__quotes_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__unnest_alias_childre__column___with__quotes_ab2 +select + md5(cast(coalesce(cast(_airbyte_owner_hashid as text), '') || '-' || coalesce(cast(currency as text), '') as text)) as _airbyte_column___with__quotes_hashid, + tmp.* +from __dbt__cte__unnest_alias_childre__column___with__quotes_ab2 tmp +-- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__unnest_alias_childre__column___with__quotes_ab3 +select + _airbyte_owner_hashid, + currency, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_column___with__quotes_hashid +from __dbt__cte__unnest_alias_childre__column___with__quotes_ab3 +-- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes from "postgres".test_normalization."unnest_alias_children_owner" +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_children.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_children.sql new file mode 100644 index 0000000000000..779394d5765dc --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_children.sql @@ -0,0 +1,70 @@ + + + create table "postgres".test_normalization."unnest_alias_children__dbt_tmp" + as ( + +with __dbt__cte__unnest_alias_children_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization."unnest_alias" + +select + _airbyte_unnest_alias_hashid, + jsonb_extract_path_text(_airbyte_nested_data, 'ab_id') as ab_id, + + jsonb_extract_path(_airbyte_nested_data, 'owner') + as "owner", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization."unnest_alias" as table_alias +-- children at unnest_alias/children +cross join jsonb_array_elements( + case jsonb_typeof(children) + when 'array' then children + else '[]' end + ) as _airbyte_nested_data +where 1 = 1 +and children is not null +), __dbt__cte__unnest_alias_children_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__unnest_alias_children_ab1 +select + _airbyte_unnest_alias_hashid, + cast(ab_id as + bigint +) as ab_id, + cast("owner" as + jsonb +) as "owner", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__unnest_alias_children_ab1 +-- children at unnest_alias/children +where 1 = 1 +), __dbt__cte__unnest_alias_children_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__unnest_alias_children_ab2 +select + md5(cast(coalesce(cast(_airbyte_unnest_alias_hashid as text), '') || '-' || coalesce(cast(ab_id as text), '') || '-' || coalesce(cast("owner" as text), '') as text)) as _airbyte_children_hashid, + tmp.* +from __dbt__cte__unnest_alias_children_ab2 tmp +-- children at unnest_alias/children +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__unnest_alias_children_ab3 +select + _airbyte_unnest_alias_hashid, + ab_id, + "owner", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_children_hashid +from __dbt__cte__unnest_alias_children_ab3 +-- children at unnest_alias/children from "postgres".test_normalization."unnest_alias" +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_children_owner.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_children_owner.sql new file mode 100644 index 0000000000000..651e1c11914eb --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_tables/test_normalization/unnest_alias_children_owner.sql @@ -0,0 +1,60 @@ + + + create table "postgres".test_normalization."unnest_alias_children_owner__dbt_tmp" + as ( + +with __dbt__cte__unnest_alias_children_owner_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization."unnest_alias_children" +select + _airbyte_children_hashid, + jsonb_extract_path_text("owner", 'owner_id') as owner_id, + jsonb_extract_path("owner", 'column`_''with"_quotes') as "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization."unnest_alias_children" as table_alias +-- owner at unnest_alias/children/owner +where 1 = 1 +and "owner" is not null +), __dbt__cte__unnest_alias_children_owner_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__unnest_alias_children_owner_ab1 +select + _airbyte_children_hashid, + cast(owner_id as + bigint +) as owner_id, + "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__unnest_alias_children_owner_ab1 +-- owner at unnest_alias/children/owner +where 1 = 1 +), __dbt__cte__unnest_alias_children_owner_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__unnest_alias_children_owner_ab2 +select + md5(cast(coalesce(cast(_airbyte_children_hashid as text), '') || '-' || coalesce(cast(owner_id as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') as text)) as _airbyte_owner_hashid, + tmp.* +from __dbt__cte__unnest_alias_children_owner_ab2 tmp +-- owner at unnest_alias/children/owner +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__unnest_alias_children_owner_ab3 +select + _airbyte_children_hashid, + owner_id, + "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_owner_hashid +from __dbt__cte__unnest_alias_children_owner_ab3 +-- owner at unnest_alias/children/owner from "postgres".test_normalization."unnest_alias_children" +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_ab1.sql new file mode 100644 index 0000000000000..6fbf79914b825 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_ab1.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_arrays') }} +select + {{ json_extract_string_array('_airbyte_data', ['array_of_strings'], ['array_of_strings']) }} as array_of_strings, + {{ json_extract('table_alias', '_airbyte_data', ['nested_array_parent'], ['nested_array_parent']) }} as nested_array_parent, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_arrays') }} as table_alias +-- arrays +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_ab2.sql new file mode 100644 index 0000000000000..97010a6648aa3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_ab2.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('arrays_ab1') }} +select + array_of_strings, + cast(nested_array_parent as {{ type_json() }}) as nested_array_parent, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('arrays_ab1') }} +-- arrays +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_ab3.sql new file mode 100644 index 0000000000000..c3c0afc5de7b3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_ab3.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('arrays_ab2') }} +select + {{ dbt_utils.surrogate_key([ + array_to_string('array_of_strings'), + object_to_string('nested_array_parent'), + ]) }} as _airbyte_arrays_hashid, + tmp.* +from {{ ref('arrays_ab2') }} tmp +-- arrays +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_nested_array_parent_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_nested_array_parent_ab1.sql new file mode 100644 index 0000000000000..2cbe78b134dc7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_nested_array_parent_ab1.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('arrays') }} +select + _airbyte_arrays_hashid, + {{ json_extract_string_array('nested_array_parent', ['nested_array'], ['nested_array']) }} as nested_array, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('arrays') }} as table_alias +-- nested_array_parent at arrays/nested_array_parent +where 1 = 1 +and nested_array_parent is not null + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_nested_array_parent_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_nested_array_parent_ab2.sql new file mode 100644 index 0000000000000..0a2dde68d0b85 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_nested_array_parent_ab2.sql @@ -0,0 +1,17 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('arrays_nested_array_parent_ab1') }} +select + _airbyte_arrays_hashid, + nested_array, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('arrays_nested_array_parent_ab1') }} +-- nested_array_parent at arrays/nested_array_parent +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_nested_array_parent_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_nested_array_parent_ab3.sql new file mode 100644 index 0000000000000..c59efa0e9ad20 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/arrays_nested_array_parent_ab3.sql @@ -0,0 +1,17 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('arrays_nested_array_parent_ab2') }} +select + {{ dbt_utils.surrogate_key([ + '_airbyte_arrays_hashid', + array_to_string('nested_array'), + ]) }} as _airbyte_nested_array_parent_hashid, + tmp.* +from {{ ref('arrays_nested_array_parent_ab2') }} tmp +-- nested_array_parent at arrays/nested_array_parent +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_array_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_array_ab1.sql new file mode 100644 index 0000000000000..611e84ed967b1 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_array_ab1.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_conflict_stream_array') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_array('_airbyte_data', ['conflict_stream_array'], ['conflict_stream_array']) }} as conflict_stream_array, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_conflict_stream_array') }} as table_alias +-- conflict_stream_array +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_array_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_array_ab2.sql new file mode 100644 index 0000000000000..2193fab3931cd --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_array_ab2.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('conflict_stream_array_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('id') }}, + conflict_stream_array, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('conflict_stream_array_ab1') }} +-- conflict_stream_array +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_array_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_array_ab3.sql new file mode 100644 index 0000000000000..e70e5cf665517 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_array_ab3.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('conflict_stream_array_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + array_to_string('conflict_stream_array'), + ]) }} as _airbyte_conflict_stream_array_hashid, + tmp.* +from {{ ref('conflict_stream_array_ab2') }} tmp +-- conflict_stream_array +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name___conflict_stream_name_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name___conflict_stream_name_ab1.sql new file mode 100644 index 0000000000000..87c51e6de1793 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name___conflict_stream_name_ab1.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('conflict_stream_name_conflict_stream_name') }} +select + _airbyte_conflict_stream_name_2_hashid, + {{ json_extract_scalar('conflict_stream_name', ['groups'], ['groups']) }} as groups, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('conflict_stream_name_conflict_stream_name') }} as table_alias +-- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name +where 1 = 1 +and conflict_stream_name is not null + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name___conflict_stream_name_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name___conflict_stream_name_ab2.sql new file mode 100644 index 0000000000000..06ff95b10ff85 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name___conflict_stream_name_ab2.sql @@ -0,0 +1,17 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('conflict_stream_name___conflict_stream_name_ab1') }} +select + _airbyte_conflict_stream_name_2_hashid, + cast(groups as {{ dbt_utils.type_string() }}) as groups, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('conflict_stream_name___conflict_stream_name_ab1') }} +-- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name___conflict_stream_name_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name___conflict_stream_name_ab3.sql new file mode 100644 index 0000000000000..09e0262357c90 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name___conflict_stream_name_ab3.sql @@ -0,0 +1,17 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('conflict_stream_name___conflict_stream_name_ab2') }} +select + {{ dbt_utils.surrogate_key([ + '_airbyte_conflict_stream_name_2_hashid', + 'groups', + ]) }} as _airbyte_conflict_stream_name_3_hashid, + tmp.* +from {{ ref('conflict_stream_name___conflict_stream_name_ab2') }} tmp +-- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_ab1.sql new file mode 100644 index 0000000000000..158c5358a3559 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_ab1.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_conflict_stream_name') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract('table_alias', '_airbyte_data', ['conflict_stream_name'], ['conflict_stream_name']) }} as conflict_stream_name, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_conflict_stream_name') }} as table_alias +-- conflict_stream_name +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_ab2.sql new file mode 100644 index 0000000000000..c2d58329204cf --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_ab2.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('conflict_stream_name_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('id') }}, + cast(conflict_stream_name as {{ type_json() }}) as conflict_stream_name, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('conflict_stream_name_ab1') }} +-- conflict_stream_name +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_ab3.sql new file mode 100644 index 0000000000000..78f7cfe9bea5e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_ab3.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('conflict_stream_name_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + object_to_string('conflict_stream_name'), + ]) }} as _airbyte_conflict_stream_name_hashid, + tmp.* +from {{ ref('conflict_stream_name_ab2') }} tmp +-- conflict_stream_name +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_conflict_stream_name_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_conflict_stream_name_ab1.sql new file mode 100644 index 0000000000000..fcee51f386031 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_conflict_stream_name_ab1.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('conflict_stream_name') }} +select + _airbyte_conflict_stream_name_hashid, + {{ json_extract('table_alias', 'conflict_stream_name', ['conflict_stream_name'], ['conflict_stream_name']) }} as conflict_stream_name, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('conflict_stream_name') }} as table_alias +-- conflict_stream_name at conflict_stream_name/conflict_stream_name +where 1 = 1 +and conflict_stream_name is not null + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_conflict_stream_name_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_conflict_stream_name_ab2.sql new file mode 100644 index 0000000000000..e097773611da6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_conflict_stream_name_ab2.sql @@ -0,0 +1,17 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('conflict_stream_name_conflict_stream_name_ab1') }} +select + _airbyte_conflict_stream_name_hashid, + cast(conflict_stream_name as {{ type_json() }}) as conflict_stream_name, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('conflict_stream_name_conflict_stream_name_ab1') }} +-- conflict_stream_name at conflict_stream_name/conflict_stream_name +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_conflict_stream_name_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_conflict_stream_name_ab3.sql new file mode 100644 index 0000000000000..0892d61432767 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_name_conflict_stream_name_ab3.sql @@ -0,0 +1,17 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('conflict_stream_name_conflict_stream_name_ab2') }} +select + {{ dbt_utils.surrogate_key([ + '_airbyte_conflict_stream_name_hashid', + object_to_string('conflict_stream_name'), + ]) }} as _airbyte_conflict_stream_name_2_hashid, + tmp.* +from {{ ref('conflict_stream_name_conflict_stream_name_ab2') }} tmp +-- conflict_stream_name at conflict_stream_name/conflict_stream_name +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_scalar_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_scalar_ab1.sql new file mode 100644 index 0000000000000..473ada08d890f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_scalar_ab1.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_conflict_stream_scalar') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['conflict_stream_scalar'], ['conflict_stream_scalar']) }} as conflict_stream_scalar, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_conflict_stream_scalar') }} as table_alias +-- conflict_stream_scalar +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_scalar_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_scalar_ab2.sql new file mode 100644 index 0000000000000..2f307fd526ecc --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_scalar_ab2.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('conflict_stream_scalar_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('id') }}, + cast(conflict_stream_scalar as {{ dbt_utils.type_bigint() }}) as conflict_stream_scalar, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('conflict_stream_scalar_ab1') }} +-- conflict_stream_scalar +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_scalar_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_scalar_ab3.sql new file mode 100644 index 0000000000000..c2fa037be1c00 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/conflict_stream_scalar_ab3.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('conflict_stream_scalar_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + 'conflict_stream_scalar', + ]) }} as _airbyte_conflict_stream_scalar_hashid, + tmp.* +from {{ ref('conflict_stream_scalar_ab2') }} tmp +-- conflict_stream_scalar +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab1.sql new file mode 100644 index 0000000000000..fafabe2d98407 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab1.sql @@ -0,0 +1,20 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('nested_stream_with_c__lting_into_long_names_scd') }} +select + _airbyte_nested_stre__nto_long_names_hashid, + {{ json_extract_array(adapter.quote('partition'), ['double_array_data'], ['double_array_data']) }} as double_array_data, + {{ json_extract_array(adapter.quote('partition'), ['DATA'], ['DATA']) }} as {{ adapter.quote('DATA') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_c__lting_into_long_names_scd') }} as table_alias +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 +and {{ adapter.quote('partition') }} is not null +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab2.sql new file mode 100644 index 0000000000000..a622952dbeff9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab2.sql @@ -0,0 +1,19 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('nested_stream_with_c___long_names_partition_ab1') }} +select + _airbyte_nested_stre__nto_long_names_hashid, + double_array_data, + {{ adapter.quote('DATA') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_c___long_names_partition_ab1') }} +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab3.sql new file mode 100644 index 0000000000000..3eb1b81838277 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___long_names_partition_ab3.sql @@ -0,0 +1,19 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('nested_stream_with_c___long_names_partition_ab2') }} +select + {{ dbt_utils.surrogate_key([ + '_airbyte_nested_stre__nto_long_names_hashid', + array_to_string('double_array_data'), + array_to_string(adapter.quote('DATA')), + ]) }} as _airbyte_partition_hashid, + tmp.* +from {{ ref('nested_stream_with_c___long_names_partition_ab2') }} tmp +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab1.sql new file mode 100644 index 0000000000000..0aab8469aefd2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab1.sql @@ -0,0 +1,21 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('nested_stream_with_c___long_names_partition') }} +{{ unnest_cte(ref('nested_stream_with_c___long_names_partition'), 'partition', adapter.quote('DATA')) }} +select + _airbyte_partition_hashid, + {{ json_extract_scalar(unnested_column_value(adapter.quote('DATA')), ['currency'], ['currency']) }} as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_c___long_names_partition') }} as table_alias +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +{{ cross_join_unnest('partition', adapter.quote('DATA')) }} +where 1 = 1 +and {{ adapter.quote('DATA') }} is not null +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab2.sql new file mode 100644 index 0000000000000..f6cb35f7d406b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab2.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('nested_stream_with_c___names_partition_data_ab1') }} +select + _airbyte_partition_hashid, + cast(currency as {{ dbt_utils.type_string() }}) as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_c___names_partition_data_ab1') }} +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab3.sql new file mode 100644 index 0000000000000..f06e21a1432e6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c___names_partition_data_ab3.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('nested_stream_with_c___names_partition_data_ab2') }} +select + {{ dbt_utils.surrogate_key([ + '_airbyte_partition_hashid', + 'currency', + ]) }} as _airbyte_data_hashid, + tmp.* +from {{ ref('nested_stream_with_c___names_partition_data_ab2') }} tmp +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab1.sql new file mode 100644 index 0000000000000..5f674cdcd1a69 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab1.sql @@ -0,0 +1,21 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('nested_stream_with_c___long_names_partition') }} +{{ unnest_cte(ref('nested_stream_with_c___long_names_partition'), 'partition', 'double_array_data') }} +select + _airbyte_partition_hashid, + {{ json_extract_scalar(unnested_column_value('double_array_data'), ['id'], ['id']) }} as {{ adapter.quote('id') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_c___long_names_partition') }} as table_alias +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +{{ cross_join_unnest('partition', 'double_array_data') }} +where 1 = 1 +and double_array_data is not null +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab2.sql new file mode 100644 index 0000000000000..6d785589955da --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab2.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('nested_stream_with_c__ion_double_array_data_ab1') }} +select + _airbyte_partition_hashid, + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('id') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_c__ion_double_array_data_ab1') }} +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab3.sql new file mode 100644 index 0000000000000..c83657e465f6f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__ion_double_array_data_ab3.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('nested_stream_with_c__ion_double_array_data_ab2') }} +select + {{ dbt_utils.surrogate_key([ + '_airbyte_partition_hashid', + adapter.quote('id'), + ]) }} as _airbyte_double_array_data_hashid, + tmp.* +from {{ ref('nested_stream_with_c__ion_double_array_data_ab2') }} tmp +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__lting_into_long_names_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__lting_into_long_names_ab1.sql new file mode 100644 index 0000000000000..767a1071f1745 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__lting_into_long_names_ab1.sql @@ -0,0 +1,20 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, + {{ json_extract('table_alias', '_airbyte_data', ['partition'], ['partition']) }} as {{ adapter.quote('partition') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} as table_alias +-- nested_stream_with_c__lting_into_long_names +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__lting_into_long_names_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__lting_into_long_names_ab2.sql new file mode 100644 index 0000000000000..6739cf914f383 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_c__lting_into_long_names_ab2.sql @@ -0,0 +1,20 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('nested_stream_with_c__lting_into_long_names_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('id') }}, + cast({{ adapter.quote('date') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('date') }}, + cast({{ adapter.quote('partition') }} as {{ type_json() }}) as {{ adapter.quote('partition') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_c__lting_into_long_names_ab1') }} +-- nested_stream_with_c__lting_into_long_names +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/non_nested_stream_wi__lting_into_long_names_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/non_nested_stream_wi__lting_into_long_names_ab1.sql new file mode 100644 index 0000000000000..dfbf901b64ab4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/non_nested_stream_wi__lting_into_long_names_ab1.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names') }} as table_alias +-- non_nested_stream_wi__lting_into_long_names +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/non_nested_stream_wi__lting_into_long_names_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/non_nested_stream_wi__lting_into_long_names_ab2.sql new file mode 100644 index 0000000000000..3488676ec99f7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/non_nested_stream_wi__lting_into_long_names_ab2.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('non_nested_stream_wi__lting_into_long_names_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('id') }}, + cast({{ adapter.quote('date') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('date') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('non_nested_stream_wi__lting_into_long_names_ab1') }} +-- non_nested_stream_wi__lting_into_long_names +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/non_nested_stream_wi__lting_into_long_names_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/non_nested_stream_wi__lting_into_long_names_ab3.sql new file mode 100644 index 0000000000000..a673655e03ff7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/non_nested_stream_wi__lting_into_long_names_ab3.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('non_nested_stream_wi__lting_into_long_names_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + adapter.quote('date'), + ]) }} as _airbyte_non_nested___nto_long_names_hashid, + tmp.* +from {{ ref('non_nested_stream_wi__lting_into_long_names_ab2') }} tmp +-- non_nested_stream_wi__lting_into_long_names +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/some_stream_that_was_empty_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/some_stream_that_was_empty_ab1.sql new file mode 100644 index 0000000000000..6862a6ac2688c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/some_stream_that_was_empty_ab1.sql @@ -0,0 +1,19 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_some_stream_that_was_empty') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_some_stream_that_was_empty') }} as table_alias +-- some_stream_that_was_empty +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/some_stream_that_was_empty_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/some_stream_that_was_empty_ab2.sql new file mode 100644 index 0000000000000..258f8b697b564 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/some_stream_that_was_empty_ab2.sql @@ -0,0 +1,19 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('some_stream_that_was_empty_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('id') }}, + cast({{ adapter.quote('date') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('date') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('some_stream_that_was_empty_ab1') }} +-- some_stream_that_was_empty +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_ab1.sql new file mode 100644 index 0000000000000..60085cd403242 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_ab1.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_unnest_alias') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_array('_airbyte_data', ['children'], ['children']) }} as children, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_unnest_alias') }} as table_alias +-- unnest_alias +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_ab2.sql new file mode 100644 index 0000000000000..3bffe697fa097 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_ab2.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('unnest_alias_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, + children, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('unnest_alias_ab1') }} +-- unnest_alias +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_ab3.sql new file mode 100644 index 0000000000000..36d29cbc26e67 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_ab3.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('unnest_alias_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + array_to_string('children'), + ]) }} as _airbyte_unnest_alias_hashid, + tmp.* +from {{ ref('unnest_alias_ab2') }} tmp +-- unnest_alias +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_childre__column___with__quotes_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_childre__column___with__quotes_ab1.sql new file mode 100644 index 0000000000000..505c4699fcc39 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_childre__column___with__quotes_ab1.sql @@ -0,0 +1,20 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('unnest_alias_children_owner') }} +{{ unnest_cte(ref('unnest_alias_children_owner'), 'owner', adapter.quote('column`_\'with""_quotes')) }} +select + _airbyte_owner_hashid, + {{ json_extract_scalar(unnested_column_value(adapter.quote('column`_\'with""_quotes')), ['currency'], ['currency']) }} as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('unnest_alias_children_owner') }} as table_alias +-- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes +{{ cross_join_unnest('owner', adapter.quote('column`_\'with""_quotes')) }} +where 1 = 1 +and {{ adapter.quote('column`_\'with""_quotes') }} is not null + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_childre__column___with__quotes_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_childre__column___with__quotes_ab2.sql new file mode 100644 index 0000000000000..fe150b0ef18cd --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_childre__column___with__quotes_ab2.sql @@ -0,0 +1,17 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('unnest_alias_childre__column___with__quotes_ab1') }} +select + _airbyte_owner_hashid, + cast(currency as {{ dbt_utils.type_string() }}) as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('unnest_alias_childre__column___with__quotes_ab1') }} +-- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_childre__column___with__quotes_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_childre__column___with__quotes_ab3.sql new file mode 100644 index 0000000000000..86b03f9708a90 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_childre__column___with__quotes_ab3.sql @@ -0,0 +1,17 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('unnest_alias_childre__column___with__quotes_ab2') }} +select + {{ dbt_utils.surrogate_key([ + '_airbyte_owner_hashid', + 'currency', + ]) }} as _airbyte_column___with__quotes_hashid, + tmp.* +from {{ ref('unnest_alias_childre__column___with__quotes_ab2') }} tmp +-- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_ab1.sql new file mode 100644 index 0000000000000..6ac97b369163b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_ab1.sql @@ -0,0 +1,21 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('unnest_alias') }} +{{ unnest_cte(ref('unnest_alias'), 'unnest_alias', 'children') }} +select + _airbyte_unnest_alias_hashid, + {{ json_extract_scalar(unnested_column_value('children'), ['ab_id'], ['ab_id']) }} as ab_id, + {{ json_extract('', unnested_column_value('children'), ['owner'], ['owner']) }} as {{ adapter.quote('owner') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('unnest_alias') }} as table_alias +-- children at unnest_alias/children +{{ cross_join_unnest('unnest_alias', 'children') }} +where 1 = 1 +and children is not null + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_ab2.sql new file mode 100644 index 0000000000000..aa7bd0d46c1de --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_ab2.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('unnest_alias_children_ab1') }} +select + _airbyte_unnest_alias_hashid, + cast(ab_id as {{ dbt_utils.type_bigint() }}) as ab_id, + cast({{ adapter.quote('owner') }} as {{ type_json() }}) as {{ adapter.quote('owner') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('unnest_alias_children_ab1') }} +-- children at unnest_alias/children +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_ab3.sql new file mode 100644 index 0000000000000..e5a3aa0268c54 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_ab3.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('unnest_alias_children_ab2') }} +select + {{ dbt_utils.surrogate_key([ + '_airbyte_unnest_alias_hashid', + 'ab_id', + object_to_string(adapter.quote('owner')), + ]) }} as _airbyte_children_hashid, + tmp.* +from {{ ref('unnest_alias_children_ab2') }} tmp +-- children at unnest_alias/children +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_owner_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_owner_ab1.sql new file mode 100644 index 0000000000000..1fe7e748b55dc --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_owner_ab1.sql @@ -0,0 +1,19 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('unnest_alias_children') }} +select + _airbyte_children_hashid, + {{ json_extract_scalar(adapter.quote('owner'), ['owner_id'], ['owner_id']) }} as owner_id, + {{ json_extract_array(adapter.quote('owner'), ['column`_\'with"_quotes'], ['column___with__quotes']) }} as {{ adapter.quote('column`_\'with""_quotes') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('unnest_alias_children') }} as table_alias +-- owner at unnest_alias/children/owner +where 1 = 1 +and {{ adapter.quote('owner') }} is not null + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_owner_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_owner_ab2.sql new file mode 100644 index 0000000000000..d6a8942fa8c59 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_owner_ab2.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('unnest_alias_children_owner_ab1') }} +select + _airbyte_children_hashid, + cast(owner_id as {{ dbt_utils.type_bigint() }}) as owner_id, + {{ adapter.quote('column`_\'with""_quotes') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('unnest_alias_children_owner_ab1') }} +-- owner at unnest_alias/children/owner +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_owner_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_owner_ab3.sql new file mode 100644 index 0000000000000..46eeb0375687b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization/unnest_alias_children_owner_ab3.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "_airbyte_test_normalization", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('unnest_alias_children_owner_ab2') }} +select + {{ dbt_utils.surrogate_key([ + '_airbyte_children_hashid', + 'owner_id', + array_to_string(adapter.quote('column`_\'with""_quotes')), + ]) }} as _airbyte_owner_hashid, + tmp.* +from {{ ref('unnest_alias_children_owner_ab2') }} tmp +-- owner at unnest_alias/children/owner +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab1.sql new file mode 100644 index 0000000000000..b732876827659 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab1.sql @@ -0,0 +1,19 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization_namespace", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization_namespace', '_airbyte_raw_simple_stream_with_namespace_resulting_into_long_names') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization_namespace', '_airbyte_raw_simple_stream_with_namespace_resulting_into_long_names') }} as table_alias +-- simple_stream_with_n__lting_into_long_names +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab2.sql new file mode 100644 index 0000000000000..a2f35bfcefb1c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab2.sql @@ -0,0 +1,19 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization_namespace", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('simple_stream_with_n__lting_into_long_names_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('id') }}, + cast({{ adapter.quote('date') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('date') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('simple_stream_with_n__lting_into_long_names_ab1') }} +-- simple_stream_with_n__lting_into_long_names +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab3.sql new file mode 100644 index 0000000000000..231ba585f7024 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_ctes/test_normalization_namespace/simple_stream_with_n__lting_into_long_names_ab3.sql @@ -0,0 +1,19 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization_namespace", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('simple_stream_with_n__lting_into_long_names_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + adapter.quote('date'), + ]) }} as _airbyte_simple_stre__nto_long_names_hashid, + tmp.* +from {{ ref('simple_stream_with_n__lting_into_long_names_ab2') }} tmp +-- simple_stream_with_n__lting_into_long_names +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql new file mode 100644 index 0000000000000..5eaf6186aaab4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql @@ -0,0 +1,163 @@ +{{ config( + indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='nested_stream_with_c__lting_into_long_names' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_c__lting_into_long_names')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from _airbyte_test_normalization.nested_stream_with_c__lting_into_long_names_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.nested_stream_with_c__lting_into_long_names_stg)"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('nested_stream_with_c__lting_into_long_names_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('nested_stream_with_c__lting_into_long_names_stg') }} + -- nested_stream_with_c__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('nested_stream_with_c__lting_into_long_names_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('nested_stream_with_c__lting_into_long_names_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('nested_stream_with_c__lting_into_long_names_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('nested_stream_with_c__lting_into_long_names_stg') }} + -- nested_stream_with_c__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key, + {{ adapter.quote('id') }}, + {{ adapter.quote('date') }}, + {{ adapter.quote('partition') }}, + {{ adapter.quote('date') }} as _airbyte_start_at, + lag({{ adapter.quote('date') }}) over ( + partition by {{ adapter.quote('id') }} + order by + {{ adapter.quote('date') }} is null asc, + {{ adapter.quote('date') }} desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by {{ adapter.quote('id') }} + order by + {{ adapter.quote('date') }} is null asc, + {{ adapter.quote('date') }} desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_nested_stre__nto_long_names_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + {{ adapter.quote('id') }}, + {{ adapter.quote('date') }}, + {{ adapter.quote('partition') }}, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_nested_stre__nto_long_names_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql new file mode 100644 index 0000000000000..c35233d432cb3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql @@ -0,0 +1,161 @@ +{{ config( + indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='some_stream_that_was_empty' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('some_stream_that_was_empty')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('some_stream_that_was_empty')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from _airbyte_test_normalization.some_stream_that_was_empty_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.some_stream_that_was_empty_stg)"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('some_stream_that_was_empty_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('some_stream_that_was_empty_stg') }} + -- some_stream_that_was_empty from {{ source('test_normalization', '_airbyte_raw_some_stream_that_was_empty') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('some_stream_that_was_empty_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('some_stream_that_was_empty_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('some_stream_that_was_empty_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('some_stream_that_was_empty_stg') }} + -- some_stream_that_was_empty from {{ source('test_normalization', '_airbyte_raw_some_stream_that_was_empty') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key, + {{ adapter.quote('id') }}, + {{ adapter.quote('date') }}, + {{ adapter.quote('date') }} as _airbyte_start_at, + lag({{ adapter.quote('date') }}) over ( + partition by {{ adapter.quote('id') }} + order by + {{ adapter.quote('date') }} is null asc, + {{ adapter.quote('date') }} desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by {{ adapter.quote('id') }} + order by + {{ adapter.quote('date') }} is null asc, + {{ adapter.quote('date') }} desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_some_stream_that_was_empty_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + {{ adapter.quote('id') }}, + {{ adapter.quote('date') }}, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_some_stream_that_was_empty_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql new file mode 100644 index 0000000000000..92e9c5d4fe088 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql @@ -0,0 +1,20 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "test_normalization", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('nested_stream_with_c___long_names_partition_ab3') }} +select + _airbyte_nested_stre__nto_long_names_hashid, + double_array_data, + {{ adapter.quote('DATA') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_partition_hashid +from {{ ref('nested_stream_with_c___long_names_partition_ab3') }} +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from {{ ref('nested_stream_with_c__lting_into_long_names_scd') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql new file mode 100644 index 0000000000000..f453cd838e21f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql @@ -0,0 +1,19 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "test_normalization", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('nested_stream_with_c___names_partition_data_ab3') }} +select + _airbyte_partition_hashid, + currency, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_data_hashid +from {{ ref('nested_stream_with_c___names_partition_data_ab3') }} +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from {{ ref('nested_stream_with_c___long_names_partition') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql new file mode 100644 index 0000000000000..ea7bc2e780956 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql @@ -0,0 +1,19 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "test_normalization", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('nested_stream_with_c__ion_double_array_data_ab3') }} +select + _airbyte_partition_hashid, + {{ adapter.quote('id') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_double_array_data_hashid +from {{ ref('nested_stream_with_c__ion_double_array_data_ab3') }} +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from {{ ref('nested_stream_with_c___long_names_partition') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names.sql new file mode 100644 index 0000000000000..26c3aded7063d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names.sql @@ -0,0 +1,23 @@ +{{ config( + indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('nested_stream_with_c__lting_into_long_names_scd') }} +select + _airbyte_unique_key, + {{ adapter.quote('id') }}, + {{ adapter.quote('date') }}, + {{ adapter.quote('partition') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_nested_stre__nto_long_names_hashid +from {{ ref('nested_stream_with_c__lting_into_long_names_scd') }} +-- nested_stream_with_c__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql new file mode 100644 index 0000000000000..8249fe95741a4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql @@ -0,0 +1,20 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('nested_stream_with_c__lting_into_long_names_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + adapter.quote('date'), + object_to_string(adapter.quote('partition')), + ]) }} as _airbyte_nested_stre__nto_long_names_hashid, + tmp.* +from {{ ref('nested_stream_with_c__lting_into_long_names_ab2') }} tmp +-- nested_stream_with_c__lting_into_long_names +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty.sql new file mode 100644 index 0000000000000..23bcd85bcf91c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty.sql @@ -0,0 +1,22 @@ +{{ config( + indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('some_stream_that_was_empty_scd') }} +select + _airbyte_unique_key, + {{ adapter.quote('id') }}, + {{ adapter.quote('date') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_some_stream_that_was_empty_hashid +from {{ ref('some_stream_that_was_empty_scd') }} +-- some_stream_that_was_empty from {{ source('test_normalization', '_airbyte_raw_some_stream_that_was_empty') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql new file mode 100644 index 0000000000000..ca645527eca86 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql @@ -0,0 +1,19 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('some_stream_that_was_empty_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + adapter.quote('date'), + ]) }} as _airbyte_some_stream_that_was_empty_hashid, + tmp.* +from {{ ref('some_stream_that_was_empty_ab2') }} tmp +-- some_stream_that_was_empty +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql new file mode 100644 index 0000000000000..7f70fc83c6163 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql @@ -0,0 +1,20 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "test_normalization_namespace", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('simple_stream_with_n__lting_into_long_names_ab3') }} +select + {{ adapter.quote('id') }}, + {{ adapter.quote('date') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_simple_stre__nto_long_names_hashid +from {{ ref('simple_stream_with_n__lting_into_long_names_ab3') }} +-- simple_stream_with_n__lting_into_long_names from {{ source('test_normalization_namespace', '_airbyte_raw_simple_stream_with_namespace_resulting_into_long_names') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/arrays.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/arrays.sql new file mode 100644 index 0000000000000..875d028168620 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/arrays.sql @@ -0,0 +1,19 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('arrays_ab3') }} +select + array_of_strings, + nested_array_parent, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_arrays_hashid +from {{ ref('arrays_ab3') }} +-- arrays from {{ source('test_normalization', '_airbyte_raw_arrays') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/arrays_nested_array_parent.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/arrays_nested_array_parent.sql new file mode 100644 index 0000000000000..73f13e380ac25 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/arrays_nested_array_parent.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "test_normalization", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('arrays_nested_array_parent_ab3') }} +select + _airbyte_arrays_hashid, + nested_array, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_nested_array_parent_hashid +from {{ ref('arrays_nested_array_parent_ab3') }} +-- nested_array_parent at arrays/nested_array_parent from {{ ref('arrays') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_array.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_array.sql new file mode 100644 index 0000000000000..ede71a891dc05 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_array.sql @@ -0,0 +1,19 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('conflict_stream_array_ab3') }} +select + {{ adapter.quote('id') }}, + conflict_stream_array, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_conflict_stream_array_hashid +from {{ ref('conflict_stream_array_ab3') }} +-- conflict_stream_array from {{ source('test_normalization', '_airbyte_raw_conflict_stream_array') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name.sql new file mode 100644 index 0000000000000..f203166febe17 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name.sql @@ -0,0 +1,19 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('conflict_stream_name_ab3') }} +select + {{ adapter.quote('id') }}, + conflict_stream_name, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_conflict_stream_name_hashid +from {{ ref('conflict_stream_name_ab3') }} +-- conflict_stream_name from {{ source('test_normalization', '_airbyte_raw_conflict_stream_name') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql new file mode 100644 index 0000000000000..2c221c2940b75 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "test_normalization", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('conflict_stream_name___conflict_stream_name_ab3') }} +select + _airbyte_conflict_stream_name_2_hashid, + groups, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_conflict_stream_name_3_hashid +from {{ ref('conflict_stream_name___conflict_stream_name_ab3') }} +-- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name from {{ ref('conflict_stream_name_conflict_stream_name') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql new file mode 100644 index 0000000000000..195d067ffe415 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "test_normalization", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('conflict_stream_name_conflict_stream_name_ab3') }} +select + _airbyte_conflict_stream_name_hashid, + conflict_stream_name, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_conflict_stream_name_2_hashid +from {{ ref('conflict_stream_name_conflict_stream_name_ab3') }} +-- conflict_stream_name at conflict_stream_name/conflict_stream_name from {{ ref('conflict_stream_name') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_scalar.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_scalar.sql new file mode 100644 index 0000000000000..31f263905b533 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/conflict_stream_scalar.sql @@ -0,0 +1,19 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('conflict_stream_scalar_ab3') }} +select + {{ adapter.quote('id') }}, + conflict_stream_scalar, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_conflict_stream_scalar_hashid +from {{ ref('conflict_stream_scalar_ab3') }} +-- conflict_stream_scalar from {{ source('test_normalization', '_airbyte_raw_conflict_stream_scalar') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql new file mode 100644 index 0000000000000..8b4cddcd4b179 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql @@ -0,0 +1,19 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('non_nested_stream_wi__lting_into_long_names_ab3') }} +select + {{ adapter.quote('id') }}, + {{ adapter.quote('date') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_non_nested___nto_long_names_hashid +from {{ ref('non_nested_stream_wi__lting_into_long_names_ab3') }} +-- non_nested_stream_wi__lting_into_long_names from {{ source('test_normalization', '_airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias.sql new file mode 100644 index 0000000000000..7c113e7291b5d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias.sql @@ -0,0 +1,19 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('unnest_alias_ab3') }} +select + {{ adapter.quote('id') }}, + children, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_unnest_alias_hashid +from {{ ref('unnest_alias_ab3') }} +-- unnest_alias from {{ source('test_normalization', '_airbyte_raw_unnest_alias') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql new file mode 100644 index 0000000000000..ae4165f58160f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql @@ -0,0 +1,18 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "test_normalization", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('unnest_alias_childre__column___with__quotes_ab3') }} +select + _airbyte_owner_hashid, + currency, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_column___with__quotes_hashid +from {{ ref('unnest_alias_childre__column___with__quotes_ab3') }} +-- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes from {{ ref('unnest_alias_children_owner') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_children.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_children.sql new file mode 100644 index 0000000000000..9f98219880ec5 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_children.sql @@ -0,0 +1,19 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "test_normalization", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('unnest_alias_children_ab3') }} +select + _airbyte_unnest_alias_hashid, + ab_id, + {{ adapter.quote('owner') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_children_hashid +from {{ ref('unnest_alias_children_ab3') }} +-- children at unnest_alias/children from {{ ref('unnest_alias') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_children_owner.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_children_owner.sql new file mode 100644 index 0000000000000..14c766c3dd59f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_tables/test_normalization/unnest_alias_children_owner.sql @@ -0,0 +1,19 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + schema = "test_normalization", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('unnest_alias_children_owner_ab3') }} +select + _airbyte_children_hashid, + owner_id, + {{ adapter.quote('column`_\'with""_quotes') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_owner_hashid +from {{ ref('unnest_alias_children_owner_ab3') }} +-- owner at unnest_alias/children/owner from {{ ref('unnest_alias_children') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/sources.yml new file mode 100644 index 0000000000000..29bae1b4b5105 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/sources.yml @@ -0,0 +1,23 @@ +version: 2 +sources: +- name: test_normalization + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_arrays + - name: _airbyte_raw_conflict_stream_array + - name: _airbyte_raw_conflict_stream_name + - name: _airbyte_raw_conflict_stream_scalar + - name: _airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + - name: _airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + - name: _airbyte_raw_some_stream_that_was_empty + - name: _airbyte_raw_unnest_alias +- name: test_normalization_namespace + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_simple_stream_with_namespace_resulting_into_long_names diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql new file mode 100644 index 0000000000000..7026a868cc5d3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."nested_stream_with_c__lting_into_long_names_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "nested_stream_with_c__lting_into_long_name__dbt_tmp" + ); + + + insert into "postgres".test_normalization."nested_stream_with_c__lting_into_long_names_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "partition", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stre__nto_long_names_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "partition", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stre__nto_long_names_hashid" + from "nested_stream_with_c__lting_into_long_name__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql new file mode 100644 index 0000000000000..cb4ff47eeea78 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."some_stream_that_was_empty_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "some_stream_that_was_empty_scd__dbt_tmp" + ); + + + insert into "postgres".test_normalization."some_stream_that_was_empty_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_some_stream_that_was_empty_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_some_stream_that_was_empty_hashid" + from "some_stream_that_was_empty_scd__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql new file mode 100644 index 0000000000000..3a98824ffdd13 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c___long_names_partition.sql @@ -0,0 +1,9 @@ + + + + insert into "postgres".test_normalization."nested_stream_with_c___long_names_partition" ("_airbyte_nested_stre__nto_long_names_hashid", "double_array_data", "DATA", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_partition_hashid") + ( + select "_airbyte_nested_stre__nto_long_names_hashid", "double_array_data", "DATA", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_partition_hashid" + from "nested_stream_with_c___long_names_partitio__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql new file mode 100644 index 0000000000000..b90c9cb238e0b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c___names_partition_data.sql @@ -0,0 +1,9 @@ + + + + insert into "postgres".test_normalization."nested_stream_with_c___names_partition_data" ("_airbyte_partition_hashid", "currency", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_data_hashid") + ( + select "_airbyte_partition_hashid", "currency", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_data_hashid" + from "nested_stream_with_c___names_partition_dat__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql new file mode 100644 index 0000000000000..98dfb2ba788b4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__ion_double_array_data.sql @@ -0,0 +1,9 @@ + + + + insert into "postgres".test_normalization."nested_stream_with_c__ion_double_array_data" ("_airbyte_partition_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_double_array_data_hashid") + ( + select "_airbyte_partition_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_double_array_data_hashid" + from "nested_stream_with_c__ion_double_array_dat__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names.sql new file mode 100644 index 0000000000000..bf109e096b702 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."nested_stream_with_c__lting_into_long_names" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "nested_stream_with_c__lting_into_long_name__dbt_tmp" + ); + + + insert into "postgres".test_normalization."nested_stream_with_c__lting_into_long_names" ("_airbyte_unique_key", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stre__nto_long_names_hashid") + ( + select "_airbyte_unique_key", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stre__nto_long_names_hashid" + from "nested_stream_with_c__lting_into_long_name__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql new file mode 100644 index 0000000000000..275461f97657f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_c__lting_into_long_names_stg.sql @@ -0,0 +1,15 @@ + + + delete from "postgres"._airbyte_test_normalization."nested_stream_with_c__lting_into_long_names_stg" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "nested_stream_with_c__lting_into_long_name__dbt_tmp" + ); + + + insert into "postgres"._airbyte_test_normalization."nested_stream_with_c__lting_into_long_names_stg" ("_airbyte_nested_stre__nto_long_names_hashid", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + ( + select "_airbyte_nested_stre__nto_long_names_hashid", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + from "nested_stream_with_c__lting_into_long_name__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/some_stream_that_was_empty.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/some_stream_that_was_empty.sql new file mode 100644 index 0000000000000..97759325fe3df --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/some_stream_that_was_empty.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."some_stream_that_was_empty" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "some_stream_that_was_empty__dbt_tmp" + ); + + + insert into "postgres".test_normalization."some_stream_that_was_empty" ("_airbyte_unique_key", "id", "date", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_some_stream_that_was_empty_hashid") + ( + select "_airbyte_unique_key", "id", "date", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_some_stream_that_was_empty_hashid" + from "some_stream_that_was_empty__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql new file mode 100644 index 0000000000000..a0aa7cb30dd64 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization/some_stream_that_was_empty_stg.sql @@ -0,0 +1,15 @@ + + + delete from "postgres"._airbyte_test_normalization."some_stream_that_was_empty_stg" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "some_stream_that_was_empty_stg__dbt_tmp" + ); + + + insert into "postgres"._airbyte_test_normalization."some_stream_that_was_empty_stg" ("_airbyte_some_stream_that_was_empty_hashid", "id", "date", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + ( + select "_airbyte_some_stream_that_was_empty_hashid", "id", "date", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + from "some_stream_that_was_empty_stg__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql new file mode 100644 index 0000000000000..b3397712e600e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_incremental/test_normalization_namespace/simple_stream_with_n__lting_into_long_names.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization_namespace."simple_stream_with_n__lting_into_long_names" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "simple_stream_with_n__lting_into_long_name__dbt_tmp" + ); + + + insert into "postgres".test_normalization_namespace."simple_stream_with_n__lting_into_long_names" ("id", "date", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_simple_stre__nto_long_names_hashid") + ( + select "id", "date", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_simple_stre__nto_long_names_hashid" + from "simple_stream_with_n__lting_into_long_name__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/arrays.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/arrays.sql new file mode 100644 index 0000000000000..e10c4619e53a4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/arrays.sql @@ -0,0 +1,58 @@ + + + create table "postgres".test_normalization."arrays__dbt_tmp" + as ( + +with __dbt__cte__arrays_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_arrays +select + jsonb_extract_path(_airbyte_data, 'array_of_strings') as array_of_strings, + + jsonb_extract_path(table_alias._airbyte_data, 'nested_array_parent') + as nested_array_parent, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_arrays as table_alias +-- arrays +where 1 = 1 +), __dbt__cte__arrays_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__arrays_ab1 +select + array_of_strings, + cast(nested_array_parent as + jsonb +) as nested_array_parent, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__arrays_ab1 +-- arrays +where 1 = 1 +), __dbt__cte__arrays_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__arrays_ab2 +select + md5(cast(coalesce(cast(array_of_strings as text), '') || '-' || coalesce(cast(nested_array_parent as text), '') as text)) as _airbyte_arrays_hashid, + tmp.* +from __dbt__cte__arrays_ab2 tmp +-- arrays +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__arrays_ab3 +select + array_of_strings, + nested_array_parent, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_arrays_hashid +from __dbt__cte__arrays_ab3 +-- arrays from "postgres".test_normalization._airbyte_raw_arrays +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/arrays_nested_array_parent.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/arrays_nested_array_parent.sql new file mode 100644 index 0000000000000..09ad8fe3cd3f9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/arrays_nested_array_parent.sql @@ -0,0 +1,55 @@ + + + create table "postgres".test_normalization."arrays_nested_array_parent__dbt_tmp" + as ( + +with __dbt__cte__arrays_nested_array_parent_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization."arrays" +select + _airbyte_arrays_hashid, + jsonb_extract_path(nested_array_parent, 'nested_array') as nested_array, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization."arrays" as table_alias +-- nested_array_parent at arrays/nested_array_parent +where 1 = 1 +and nested_array_parent is not null +), __dbt__cte__arrays_nested_array_parent_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__arrays_nested_array_parent_ab1 +select + _airbyte_arrays_hashid, + nested_array, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__arrays_nested_array_parent_ab1 +-- nested_array_parent at arrays/nested_array_parent +where 1 = 1 +), __dbt__cte__arrays_nested_array_parent_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__arrays_nested_array_parent_ab2 +select + md5(cast(coalesce(cast(_airbyte_arrays_hashid as text), '') || '-' || coalesce(cast(nested_array as text), '') as text)) as _airbyte_nested_array_parent_hashid, + tmp.* +from __dbt__cte__arrays_nested_array_parent_ab2 tmp +-- nested_array_parent at arrays/nested_array_parent +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__arrays_nested_array_parent_ab3 +select + _airbyte_arrays_hashid, + nested_array, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_nested_array_parent_hashid +from __dbt__cte__arrays_nested_array_parent_ab3 +-- nested_array_parent at arrays/nested_array_parent from "postgres".test_normalization."arrays" +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_array.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_array.sql new file mode 100644 index 0000000000000..c1c6ab12a7b7c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_array.sql @@ -0,0 +1,54 @@ + + + create table "postgres".test_normalization."conflict_stream_array__dbt_tmp" + as ( + +with __dbt__cte__conflict_stream_array_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_conflict_stream_array +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path(_airbyte_data, 'conflict_stream_array') as conflict_stream_array, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_conflict_stream_array as table_alias +-- conflict_stream_array +where 1 = 1 +), __dbt__cte__conflict_stream_array_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__conflict_stream_array_ab1 +select + cast("id" as text) as "id", + conflict_stream_array, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__conflict_stream_array_ab1 +-- conflict_stream_array +where 1 = 1 +), __dbt__cte__conflict_stream_array_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__conflict_stream_array_ab2 +select + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(conflict_stream_array as text), '') as text)) as _airbyte_conflict_stream_array_hashid, + tmp.* +from __dbt__cte__conflict_stream_array_ab2 tmp +-- conflict_stream_array +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__conflict_stream_array_ab3 +select + "id", + conflict_stream_array, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_conflict_stream_array_hashid +from __dbt__cte__conflict_stream_array_ab3 +-- conflict_stream_array from "postgres".test_normalization._airbyte_raw_conflict_stream_array +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name.sql new file mode 100644 index 0000000000000..ac5cffb8d00d9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name.sql @@ -0,0 +1,58 @@ + + + create table "postgres".test_normalization."conflict_stream_name__dbt_tmp" + as ( + +with __dbt__cte__conflict_stream_name_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_conflict_stream_name +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + + jsonb_extract_path(table_alias._airbyte_data, 'conflict_stream_name') + as conflict_stream_name, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_conflict_stream_name as table_alias +-- conflict_stream_name +where 1 = 1 +), __dbt__cte__conflict_stream_name_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__conflict_stream_name_ab1 +select + cast("id" as text) as "id", + cast(conflict_stream_name as + jsonb +) as conflict_stream_name, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__conflict_stream_name_ab1 +-- conflict_stream_name +where 1 = 1 +), __dbt__cte__conflict_stream_name_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__conflict_stream_name_ab2 +select + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(conflict_stream_name as text), '') as text)) as _airbyte_conflict_stream_name_hashid, + tmp.* +from __dbt__cte__conflict_stream_name_ab2 tmp +-- conflict_stream_name +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__conflict_stream_name_ab3 +select + "id", + conflict_stream_name, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_conflict_stream_name_hashid +from __dbt__cte__conflict_stream_name_ab3 +-- conflict_stream_name from "postgres".test_normalization._airbyte_raw_conflict_stream_name +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql new file mode 100644 index 0000000000000..4aa2c420ed45d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name___conflict_stream_name.sql @@ -0,0 +1,55 @@ + + + create table "postgres".test_normalization."conflict_stream_name___conflict_stream_name__dbt_tmp" + as ( + +with __dbt__cte__conflict_stream_name___conflict_stream_name_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization."conflict_stream_name_conflict_stream_name" +select + _airbyte_conflict_stream_name_2_hashid, + jsonb_extract_path_text(conflict_stream_name, 'groups') as groups, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization."conflict_stream_name_conflict_stream_name" as table_alias +-- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name +where 1 = 1 +and conflict_stream_name is not null +), __dbt__cte__conflict_stream_name___conflict_stream_name_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__conflict_stream_name___conflict_stream_name_ab1 +select + _airbyte_conflict_stream_name_2_hashid, + cast(groups as text) as groups, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__conflict_stream_name___conflict_stream_name_ab1 +-- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name +where 1 = 1 +), __dbt__cte__conflict_stream_name___conflict_stream_name_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__conflict_stream_name___conflict_stream_name_ab2 +select + md5(cast(coalesce(cast(_airbyte_conflict_stream_name_2_hashid as text), '') || '-' || coalesce(cast(groups as text), '') as text)) as _airbyte_conflict_stream_name_3_hashid, + tmp.* +from __dbt__cte__conflict_stream_name___conflict_stream_name_ab2 tmp +-- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__conflict_stream_name___conflict_stream_name_ab3 +select + _airbyte_conflict_stream_name_2_hashid, + groups, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_conflict_stream_name_3_hashid +from __dbt__cte__conflict_stream_name___conflict_stream_name_ab3 +-- conflict_stream_name at conflict_stream_name/conflict_stream_name/conflict_stream_name from "postgres".test_normalization."conflict_stream_name_conflict_stream_name" +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql new file mode 100644 index 0000000000000..82dfb023674e5 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_name_conflict_stream_name.sql @@ -0,0 +1,59 @@ + + + create table "postgres".test_normalization."conflict_stream_name_conflict_stream_name__dbt_tmp" + as ( + +with __dbt__cte__conflict_stream_name_conflict_stream_name_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization."conflict_stream_name" +select + _airbyte_conflict_stream_name_hashid, + + jsonb_extract_path(table_alias.conflict_stream_name, 'conflict_stream_name') + as conflict_stream_name, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization."conflict_stream_name" as table_alias +-- conflict_stream_name at conflict_stream_name/conflict_stream_name +where 1 = 1 +and conflict_stream_name is not null +), __dbt__cte__conflict_stream_name_conflict_stream_name_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__conflict_stream_name_conflict_stream_name_ab1 +select + _airbyte_conflict_stream_name_hashid, + cast(conflict_stream_name as + jsonb +) as conflict_stream_name, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__conflict_stream_name_conflict_stream_name_ab1 +-- conflict_stream_name at conflict_stream_name/conflict_stream_name +where 1 = 1 +), __dbt__cte__conflict_stream_name_conflict_stream_name_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__conflict_stream_name_conflict_stream_name_ab2 +select + md5(cast(coalesce(cast(_airbyte_conflict_stream_name_hashid as text), '') || '-' || coalesce(cast(conflict_stream_name as text), '') as text)) as _airbyte_conflict_stream_name_2_hashid, + tmp.* +from __dbt__cte__conflict_stream_name_conflict_stream_name_ab2 tmp +-- conflict_stream_name at conflict_stream_name/conflict_stream_name +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__conflict_stream_name_conflict_stream_name_ab3 +select + _airbyte_conflict_stream_name_hashid, + conflict_stream_name, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_conflict_stream_name_2_hashid +from __dbt__cte__conflict_stream_name_conflict_stream_name_ab3 +-- conflict_stream_name at conflict_stream_name/conflict_stream_name from "postgres".test_normalization."conflict_stream_name" +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_scalar.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_scalar.sql new file mode 100644 index 0000000000000..09a4fa01de977 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/conflict_stream_scalar.sql @@ -0,0 +1,56 @@ + + + create table "postgres".test_normalization."conflict_stream_scalar__dbt_tmp" + as ( + +with __dbt__cte__conflict_stream_scalar_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_conflict_stream_scalar +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path_text(_airbyte_data, 'conflict_stream_scalar') as conflict_stream_scalar, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_conflict_stream_scalar as table_alias +-- conflict_stream_scalar +where 1 = 1 +), __dbt__cte__conflict_stream_scalar_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__conflict_stream_scalar_ab1 +select + cast("id" as text) as "id", + cast(conflict_stream_scalar as + bigint +) as conflict_stream_scalar, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__conflict_stream_scalar_ab1 +-- conflict_stream_scalar +where 1 = 1 +), __dbt__cte__conflict_stream_scalar_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__conflict_stream_scalar_ab2 +select + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(conflict_stream_scalar as text), '') as text)) as _airbyte_conflict_stream_scalar_hashid, + tmp.* +from __dbt__cte__conflict_stream_scalar_ab2 tmp +-- conflict_stream_scalar +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__conflict_stream_scalar_ab3 +select + "id", + conflict_stream_scalar, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_conflict_stream_scalar_hashid +from __dbt__cte__conflict_stream_scalar_ab3 +-- conflict_stream_scalar from "postgres".test_normalization._airbyte_raw_conflict_stream_scalar +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql new file mode 100644 index 0000000000000..31d2176c3888c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/non_nested_stream_wi__lting_into_long_names.sql @@ -0,0 +1,54 @@ + + + create table "postgres".test_normalization."non_nested_stream_wi__lting_into_long_names__dbt_tmp" + as ( + +with __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path_text(_airbyte_data, 'date') as "date", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names as table_alias +-- non_nested_stream_wi__lting_into_long_names +where 1 = 1 +), __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab1 +select + cast("id" as text) as "id", + cast("date" as text) as "date", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab1 +-- non_nested_stream_wi__lting_into_long_names +where 1 = 1 +), __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab2 +select + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("date" as text), '') as text)) as _airbyte_non_nested___nto_long_names_hashid, + tmp.* +from __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab2 tmp +-- non_nested_stream_wi__lting_into_long_names +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab3 +select + "id", + "date", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_non_nested___nto_long_names_hashid +from __dbt__cte__non_nested_stream_wi__lting_into_long_names_ab3 +-- non_nested_stream_wi__lting_into_long_names from "postgres".test_normalization._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias.sql new file mode 100644 index 0000000000000..7af2f04f81f87 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias.sql @@ -0,0 +1,56 @@ + + + create table "postgres".test_normalization."unnest_alias__dbt_tmp" + as ( + +with __dbt__cte__unnest_alias_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_unnest_alias +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path(_airbyte_data, 'children') as children, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_unnest_alias as table_alias +-- unnest_alias +where 1 = 1 +), __dbt__cte__unnest_alias_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__unnest_alias_ab1 +select + cast("id" as + bigint +) as "id", + children, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__unnest_alias_ab1 +-- unnest_alias +where 1 = 1 +), __dbt__cte__unnest_alias_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__unnest_alias_ab2 +select + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(children as text), '') as text)) as _airbyte_unnest_alias_hashid, + tmp.* +from __dbt__cte__unnest_alias_ab2 tmp +-- unnest_alias +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__unnest_alias_ab3 +select + "id", + children, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_unnest_alias_hashid +from __dbt__cte__unnest_alias_ab3 +-- unnest_alias from "postgres".test_normalization._airbyte_raw_unnest_alias +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql new file mode 100644 index 0000000000000..6688069a62f01 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_childre__column___with__quotes.sql @@ -0,0 +1,61 @@ + + + create table "postgres".test_normalization."unnest_alias_childre__column___with__quotes__dbt_tmp" + as ( + +with __dbt__cte__unnest_alias_childre__column___with__quotes_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization."unnest_alias_children_owner" + +select + _airbyte_owner_hashid, + jsonb_extract_path_text(_airbyte_nested_data, 'currency') as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization."unnest_alias_children_owner" as table_alias +-- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes +cross join jsonb_array_elements( + case jsonb_typeof("column`_'with""_quotes") + when 'array' then "column`_'with""_quotes" + else '[]' end + ) as _airbyte_nested_data +where 1 = 1 +and "column`_'with""_quotes" is not null +), __dbt__cte__unnest_alias_childre__column___with__quotes_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__unnest_alias_childre__column___with__quotes_ab1 +select + _airbyte_owner_hashid, + cast(currency as text) as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__unnest_alias_childre__column___with__quotes_ab1 +-- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes +where 1 = 1 +), __dbt__cte__unnest_alias_childre__column___with__quotes_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__unnest_alias_childre__column___with__quotes_ab2 +select + md5(cast(coalesce(cast(_airbyte_owner_hashid as text), '') || '-' || coalesce(cast(currency as text), '') as text)) as _airbyte_column___with__quotes_hashid, + tmp.* +from __dbt__cte__unnest_alias_childre__column___with__quotes_ab2 tmp +-- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__unnest_alias_childre__column___with__quotes_ab3 +select + _airbyte_owner_hashid, + currency, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_column___with__quotes_hashid +from __dbt__cte__unnest_alias_childre__column___with__quotes_ab3 +-- column___with__quotes at unnest_alias/children/owner/column`_'with"_quotes from "postgres".test_normalization."unnest_alias_children_owner" +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_children.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_children.sql new file mode 100644 index 0000000000000..779394d5765dc --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_children.sql @@ -0,0 +1,70 @@ + + + create table "postgres".test_normalization."unnest_alias_children__dbt_tmp" + as ( + +with __dbt__cte__unnest_alias_children_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization."unnest_alias" + +select + _airbyte_unnest_alias_hashid, + jsonb_extract_path_text(_airbyte_nested_data, 'ab_id') as ab_id, + + jsonb_extract_path(_airbyte_nested_data, 'owner') + as "owner", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization."unnest_alias" as table_alias +-- children at unnest_alias/children +cross join jsonb_array_elements( + case jsonb_typeof(children) + when 'array' then children + else '[]' end + ) as _airbyte_nested_data +where 1 = 1 +and children is not null +), __dbt__cte__unnest_alias_children_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__unnest_alias_children_ab1 +select + _airbyte_unnest_alias_hashid, + cast(ab_id as + bigint +) as ab_id, + cast("owner" as + jsonb +) as "owner", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__unnest_alias_children_ab1 +-- children at unnest_alias/children +where 1 = 1 +), __dbt__cte__unnest_alias_children_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__unnest_alias_children_ab2 +select + md5(cast(coalesce(cast(_airbyte_unnest_alias_hashid as text), '') || '-' || coalesce(cast(ab_id as text), '') || '-' || coalesce(cast("owner" as text), '') as text)) as _airbyte_children_hashid, + tmp.* +from __dbt__cte__unnest_alias_children_ab2 tmp +-- children at unnest_alias/children +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__unnest_alias_children_ab3 +select + _airbyte_unnest_alias_hashid, + ab_id, + "owner", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_children_hashid +from __dbt__cte__unnest_alias_children_ab3 +-- children at unnest_alias/children from "postgres".test_normalization."unnest_alias" +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_children_owner.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_children_owner.sql new file mode 100644 index 0000000000000..651e1c11914eb --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/second_output/airbyte_tables/test_normalization/unnest_alias_children_owner.sql @@ -0,0 +1,60 @@ + + + create table "postgres".test_normalization."unnest_alias_children_owner__dbt_tmp" + as ( + +with __dbt__cte__unnest_alias_children_owner_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization."unnest_alias_children" +select + _airbyte_children_hashid, + jsonb_extract_path_text("owner", 'owner_id') as owner_id, + jsonb_extract_path("owner", 'column`_''with"_quotes') as "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization."unnest_alias_children" as table_alias +-- owner at unnest_alias/children/owner +where 1 = 1 +and "owner" is not null +), __dbt__cte__unnest_alias_children_owner_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__unnest_alias_children_owner_ab1 +select + _airbyte_children_hashid, + cast(owner_id as + bigint +) as owner_id, + "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__unnest_alias_children_owner_ab1 +-- owner at unnest_alias/children/owner +where 1 = 1 +), __dbt__cte__unnest_alias_children_owner_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__unnest_alias_children_owner_ab2 +select + md5(cast(coalesce(cast(_airbyte_children_hashid as text), '') || '-' || coalesce(cast(owner_id as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') as text)) as _airbyte_owner_hashid, + tmp.* +from __dbt__cte__unnest_alias_children_owner_ab2 tmp +-- owner at unnest_alias/children/owner +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__unnest_alias_children_owner_ab3 +select + _airbyte_children_hashid, + owner_id, + "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_owner_hashid +from __dbt__cte__unnest_alias_children_owner_ab3 +-- owner at unnest_alias/children/owner from "postgres".test_normalization."unnest_alias_children" +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml new file mode 100755 index 0000000000000..013a446b320a5 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/dbt_project.yml @@ -0,0 +1,70 @@ +name: airbyte_utils +version: '1.0' +config-version: 2 +profile: normalize +model-paths: +- modified_models +docs-paths: +- docs +analysis-paths: +- analysis +test-paths: +- tests +seed-paths: +- data +macro-paths: +- macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: +- build +- dbt_modules +quoting: + database: true + schema: false + identifier: true +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + +on_schema_change: sync_all_columns + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view +dispatch: +- macro_namespace: dbt_utils + search_order: + - airbyte_utils + - dbt_utils +vars: + json_column: _airbyte_data + models_to_source: + exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate + exchange_rate: test_normalization._airbyte_raw_exchange_rate + dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate + renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_dbt_project.yml new file mode 100644 index 0000000000000..12745c37a1508 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_dbt_project.yml @@ -0,0 +1,90 @@ +name: airbyte_utils +version: '1.0' +config-version: 2 +profile: normalize +model-paths: +- models +docs-paths: +- docs +analysis-paths: +- analysis +test-paths: +- tests +seed-paths: +- data +macro-paths: +- macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: +- build +- dbt_modules +quoting: + database: true + schema: false + identifier: true +models: + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + +on_schema_change: sync_all_columns + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view +dispatch: +- macro_namespace: dbt_utils + search_order: + - airbyte_utils + - dbt_utils +vars: + json_column: _airbyte_data + models_to_source: + exchange_rate_ab1: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab2: test_normalization._airbyte_raw_exchange_rate + exchange_rate_ab3: test_normalization._airbyte_raw_exchange_rate + exchange_rate: test_normalization._airbyte_raw_exchange_rate + dedup_exchange_rate_ab1: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_ab2: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_stg: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd: test_normalization._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate: test_normalization._airbyte_raw_dedup_exchange_rate + renamed_dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_stg: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded: test_normalization._airbyte_raw_renamed_dedup_cdc_excluded + dedup_cdc_excluded_ab1: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_ab2: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_stg: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd: test_normalization._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded: test_normalization._airbyte_raw_dedup_cdc_excluded + pos_dedup_cdcx_ab1: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_ab2: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_stg: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_scd: test_normalization._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx: test_normalization._airbyte_raw_pos_dedup_cdcx + 1_prefix_startwith_number_ab1: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_ab2: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_stg: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_scd: test_normalization._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number: test_normalization._airbyte_raw_1_prefix_startwith_number + multiple_column_names_conflicts_ab1: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_ab2: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_stg: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_scd: test_normalization._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts: test_normalization._airbyte_raw_multiple_column_names_conflicts + types_testing_ab1: test_normalization._airbyte_raw_types_testing + types_testing_ab2: test_normalization._airbyte_raw_types_testing + types_testing_stg: test_normalization._airbyte_raw_types_testing + types_testing_scd: test_normalization._airbyte_raw_types_testing + types_testing: test_normalization._airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql new file mode 100644 index 0000000000000..dac6628377db2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql @@ -0,0 +1,73 @@ + + + + create table "postgres".test_normalization."1_prefix_startwith_number_scd" + as ( + +-- depends_on: ref('1_prefix_startwith_number_stg') +with + +input_data as ( + select * + from "postgres"._airbyte_test_normalization."1_prefix_startwith_number_stg" + -- 1_prefix_startwith_number from "postgres".test_normalization._airbyte_raw_1_prefix_startwith_number +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + md5(cast(coalesce(cast("id" as text), '') as text)) as _airbyte_unique_key, + "id", + "date", + "text", + "date" as _airbyte_start_at, + lag("date") over ( + partition by "id" + order by + "date" is null asc, + "date" desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by "id" + order by + "date" is null asc, + "date" desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_1_prefix_startwith_number_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + "id", + "date", + "text", + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_1_prefix_startwith_number_hashid +from dedup_data where _airbyte_row_num = 1 + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql new file mode 100644 index 0000000000000..ba66363a77f5a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -0,0 +1,79 @@ + + + + create table "postgres".test_normalization."dedup_cdc_excluded_scd" + as ( + +-- depends_on: ref('dedup_cdc_excluded_stg') +with + +input_data as ( + select * + from "postgres"._airbyte_test_normalization."dedup_cdc_excluded_stg" + -- dedup_cdc_excluded from "postgres".test_normalization._airbyte_raw_dedup_cdc_excluded +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + md5(cast(coalesce(cast("id" as text), '') as text)) as _airbyte_unique_key, + "id", + "name", + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_lsn as _airbyte_start_at, + lag(_ab_cdc_lsn) over ( + partition by "id" + order by + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by "id" + order by + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_cdc_excluded_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at, cast(_ab_cdc_deleted_at as text), cast(_ab_cdc_updated_at as text) + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') || '-' || coalesce(cast(_ab_cdc_deleted_at as text), '') || '-' || coalesce(cast(_ab_cdc_updated_at as text), '') as text)) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + "id", + "name", + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_dedup_cdc_excluded_hashid +from dedup_data where _airbyte_row_num = 1 + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..c9440958247d2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,83 @@ + + + + create table "postgres".test_normalization."dedup_exchange_rate_scd" + as ( + +-- depends_on: ref('dedup_exchange_rate_stg') +with + +input_data as ( + select * + from "postgres"._airbyte_test_normalization."dedup_exchange_rate_stg" + -- dedup_exchange_rate from "postgres".test_normalization._airbyte_raw_dedup_exchange_rate +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(nzd as text), '') as text)) as _airbyte_unique_key, + "id", + currency, + "date", + timestamp_col, + "HKD@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, + "date" as _airbyte_start_at, + lag("date") over ( + partition by "id", currency, cast(nzd as text) + order by + "date" is null asc, + "date" desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by "id", currency, cast(nzd as text) + order by + "date" is null asc, + "date" desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_exchange_rate_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + "id", + currency, + "date", + timestamp_col, + "HKD@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from dedup_data where _airbyte_row_num = 1 + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql new file mode 100644 index 0000000000000..9eb7e6e349ab2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql @@ -0,0 +1,81 @@ + + + + create table "postgres".test_normalization."multiple_column_names_conflicts_scd" + as ( + +-- depends_on: ref('multiple_column_names_conflicts_stg') +with + +input_data as ( + select * + from "postgres"._airbyte_test_normalization."multiple_column_names_conflicts_stg" + -- multiple_column_names_conflicts from "postgres".test_normalization._airbyte_raw_multiple_column_names_conflicts +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + md5(cast(coalesce(cast("id" as text), '') as text)) as _airbyte_unique_key, + "id", + "User Id", + user_id, + "User id", + "user id", + "User@Id", + userid, + _airbyte_emitted_at as _airbyte_start_at, + lag(_airbyte_emitted_at) over ( + partition by "id" + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by "id" + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_multiple_co__ames_conflicts_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + "id", + "User Id", + user_id, + "User id", + "user id", + "User@Id", + userid, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_multiple_co__ames_conflicts_hashid +from dedup_data where _airbyte_row_num = 1 + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql new file mode 100644 index 0000000000000..450815d1ccc51 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql @@ -0,0 +1,83 @@ + + + + create table "postgres".test_normalization."pos_dedup_cdcx_scd" + as ( + +-- depends_on: ref('pos_dedup_cdcx_stg') +with + +input_data as ( + select * + from "postgres"._airbyte_test_normalization."pos_dedup_cdcx_stg" + -- pos_dedup_cdcx from "postgres".test_normalization._airbyte_raw_pos_dedup_cdcx +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + md5(cast(coalesce(cast("id" as text), '') as text)) as _airbyte_unique_key, + "id", + "name", + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, + _ab_cdc_updated_at as _airbyte_start_at, + lag(_ab_cdc_updated_at) over ( + partition by "id" + order by + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _ab_cdc_updated_at desc, + _ab_cdc_log_pos desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by "id" + order by + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _ab_cdc_updated_at desc, + _ab_cdc_log_pos desc, + _airbyte_emitted_at desc + ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_pos_dedup_cdcx_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at, cast(_ab_cdc_deleted_at as text), cast(_ab_cdc_updated_at as text), cast(_ab_cdc_log_pos as text) + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') || '-' || coalesce(cast(_ab_cdc_deleted_at as text), '') || '-' || coalesce(cast(_ab_cdc_updated_at as text), '') || '-' || coalesce(cast(_ab_cdc_log_pos as text), '') as text)) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + "id", + "name", + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_pos_dedup_cdcx_hashid +from dedup_data where _airbyte_row_num = 1 + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql new file mode 100644 index 0000000000000..31e25e700b601 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -0,0 +1,71 @@ + + + + create table "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" + as ( + +-- depends_on: ref('renamed_dedup_cdc_excluded_stg') +with + +input_data as ( + select * + from "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" + -- renamed_dedup_cdc_excluded from "postgres".test_normalization._airbyte_raw_renamed_dedup_cdc_excluded +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + md5(cast(coalesce(cast("id" as text), '') as text)) as _airbyte_unique_key, + "id", + _ab_cdc_updated_at, + _ab_cdc_updated_at as _airbyte_start_at, + lag(_ab_cdc_updated_at) over ( + partition by "id" + order by + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by "id" + order by + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_renamed_dedup_cdc_excluded_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + "id", + _ab_cdc_updated_at, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_renamed_dedup_cdc_excluded_hashid +from dedup_data where _airbyte_row_num = 1 + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/types_testing_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/types_testing_scd.sql new file mode 100644 index 0000000000000..238d662a00cfe --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/types_testing_scd.sql @@ -0,0 +1,73 @@ + + + + create table "postgres".test_normalization."types_testing_scd" + as ( + +-- depends_on: ref('types_testing_stg') +with + +input_data as ( + select * + from "postgres"._airbyte_test_normalization."types_testing_stg" + -- types_testing from "postgres".test_normalization._airbyte_raw_types_testing +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + md5(cast(coalesce(cast("id" as text), '') as text)) as _airbyte_unique_key, + "id", + airbyte_integer_column, + nullable_airbyte_integer_column, + _airbyte_emitted_at as _airbyte_start_at, + lag(_airbyte_emitted_at) over ( + partition by "id" + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by "id" + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_types_testing_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + "id", + airbyte_integer_column, + nullable_airbyte_integer_column, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_types_testing_hashid +from dedup_data where _airbyte_row_num = 1 + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql new file mode 100644 index 0000000000000..aad38834ac949 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql @@ -0,0 +1,24 @@ + + + + create table "postgres".test_normalization."1_prefix_startwith_number" + as ( + +-- Final base SQL model +-- depends_on: "postgres".test_normalization."1_prefix_startwith_number_scd" +select + _airbyte_unique_key, + "id", + "date", + "text", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_1_prefix_startwith_number_hashid +from "postgres".test_normalization."1_prefix_startwith_number_scd" +-- 1_prefix_startwith_number from "postgres".test_normalization._airbyte_raw_1_prefix_startwith_number +where 1 = 1 +and _airbyte_active_row = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql new file mode 100644 index 0000000000000..94b51fa8be0bd --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql @@ -0,0 +1,51 @@ + + + + create table "postgres"._airbyte_test_normalization."1_prefix_startwith_number_stg" + as ( + +with __dbt__cte__1_prefix_startwith_number_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_1_prefix_startwith_number +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path_text(_airbyte_data, 'date') as "date", + jsonb_extract_path_text(_airbyte_data, 'text') as "text", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_1_prefix_startwith_number as table_alias +-- 1_prefix_startwith_number +where 1 = 1 + +), __dbt__cte__1_prefix_startwith_number_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__1_prefix_startwith_number_ab1 +select + cast("id" as + bigint +) as "id", + cast(nullif("date", '') as + date +) as "date", + cast("text" as text) as "text", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__1_prefix_startwith_number_ab1 +-- 1_prefix_startwith_number +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__1_prefix_startwith_number_ab2 +select + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("date" as text), '') || '-' || coalesce(cast("text" as text), '') as text)) as _airbyte_1_prefix_startwith_number_hashid, + tmp.* +from __dbt__cte__1_prefix_startwith_number_ab2 tmp +-- 1_prefix_startwith_number +where 1 = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql new file mode 100644 index 0000000000000..8b6b3e96bc28c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql @@ -0,0 +1,26 @@ + + + + create table "postgres".test_normalization."dedup_cdc_excluded" + as ( + +-- Final base SQL model +-- depends_on: "postgres".test_normalization."dedup_cdc_excluded_scd" +select + _airbyte_unique_key, + "id", + "name", + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_dedup_cdc_excluded_hashid +from "postgres".test_normalization."dedup_cdc_excluded_scd" +-- dedup_cdc_excluded from "postgres".test_normalization._airbyte_raw_dedup_cdc_excluded +where 1 = 1 +and _airbyte_active_row = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql new file mode 100644 index 0000000000000..1c688fb2faa56 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql @@ -0,0 +1,59 @@ + + + + create table "postgres"._airbyte_test_normalization."dedup_cdc_excluded_stg" + as ( + +with __dbt__cte__dedup_cdc_excluded_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_dedup_cdc_excluded +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path_text(_airbyte_data, 'name') as "name", + jsonb_extract_path_text(_airbyte_data, '_ab_cdc_lsn') as _ab_cdc_lsn, + jsonb_extract_path_text(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, + jsonb_extract_path_text(_airbyte_data, '_ab_cdc_deleted_at') as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_dedup_cdc_excluded as table_alias +-- dedup_cdc_excluded +where 1 = 1 + +), __dbt__cte__dedup_cdc_excluded_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__dedup_cdc_excluded_ab1 +select + cast("id" as + bigint +) as "id", + cast("name" as text) as "name", + cast(_ab_cdc_lsn as + float +) as _ab_cdc_lsn, + cast(_ab_cdc_updated_at as + float +) as _ab_cdc_updated_at, + cast(_ab_cdc_deleted_at as + float +) as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__dedup_cdc_excluded_ab1 +-- dedup_cdc_excluded +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__dedup_cdc_excluded_ab2 +select + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("name" as text), '') || '-' || coalesce(cast(_ab_cdc_lsn as text), '') || '-' || coalesce(cast(_ab_cdc_updated_at as text), '') || '-' || coalesce(cast(_ab_cdc_deleted_at as text), '') as text)) as _airbyte_dedup_cdc_excluded_hashid, + tmp.* +from __dbt__cte__dedup_cdc_excluded_ab2 tmp +-- dedup_cdc_excluded +where 1 = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..93578cc1edcaf --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,29 @@ + + + + create table "postgres".test_normalization."dedup_exchange_rate" + as ( + +-- Final base SQL model +-- depends_on: "postgres".test_normalization."dedup_exchange_rate_scd" +select + _airbyte_unique_key, + "id", + currency, + "date", + timestamp_col, + "HKD@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from "postgres".test_normalization."dedup_exchange_rate_scd" +-- dedup_exchange_rate from "postgres".test_normalization._airbyte_raw_dedup_exchange_rate +where 1 = 1 +and _airbyte_active_row = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..128ec051327d6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,69 @@ + + + + create table "postgres"._airbyte_test_normalization."dedup_exchange_rate_stg" + as ( + +with __dbt__cte__dedup_exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_dedup_exchange_rate +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path_text(_airbyte_data, 'currency') as currency, + jsonb_extract_path_text(_airbyte_data, 'date') as "date", + jsonb_extract_path_text(_airbyte_data, 'timestamp_col') as timestamp_col, + jsonb_extract_path_text(_airbyte_data, 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", + jsonb_extract_path_text(_airbyte_data, 'HKD_special___characters') as hkd_special___characters, + jsonb_extract_path_text(_airbyte_data, 'NZD') as nzd, + jsonb_extract_path_text(_airbyte_data, 'USD') as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_dedup_exchange_rate as table_alias +-- dedup_exchange_rate +where 1 = 1 + +), __dbt__cte__dedup_exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__dedup_exchange_rate_ab1 +select + cast("id" as + bigint +) as "id", + cast(currency as text) as currency, + cast(nullif("date", '') as + date +) as "date", + cast(nullif(timestamp_col, '') as + timestamp with time zone +) as timestamp_col, + cast("HKD@spéçiäl & characters" as + float +) as "HKD@spéçiäl & characters", + cast(hkd_special___characters as text) as hkd_special___characters, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__dedup_exchange_rate_ab1 +-- dedup_exchange_rate +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__dedup_exchange_rate_ab2 +select + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast("date" as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') as text)) as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from __dbt__cte__dedup_exchange_rate_ab2 tmp +-- dedup_exchange_rate +where 1 = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql new file mode 100644 index 0000000000000..eba2d8af4fcee --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql @@ -0,0 +1,28 @@ + + + + create table "postgres".test_normalization."multiple_column_names_conflicts" + as ( + +-- Final base SQL model +-- depends_on: "postgres".test_normalization."multiple_column_names_conflicts_scd" +select + _airbyte_unique_key, + "id", + "User Id", + user_id, + "User id", + "user id", + "User@Id", + userid, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_multiple_co__ames_conflicts_hashid +from "postgres".test_normalization."multiple_column_names_conflicts_scd" +-- multiple_column_names_conflicts from "postgres".test_normalization._airbyte_raw_multiple_column_names_conflicts +where 1 = 1 +and _airbyte_active_row = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql new file mode 100644 index 0000000000000..dbb4726faf8f3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql @@ -0,0 +1,65 @@ + + + + create table "postgres"._airbyte_test_normalization."multiple_column_names_conflicts_stg" + as ( + +with __dbt__cte__multiple_column_names_conflicts_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_multiple_column_names_conflicts +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path_text(_airbyte_data, 'User Id') as "User Id", + jsonb_extract_path_text(_airbyte_data, 'user_id') as user_id, + jsonb_extract_path_text(_airbyte_data, 'User id') as "User id", + jsonb_extract_path_text(_airbyte_data, 'user id') as "user id", + jsonb_extract_path_text(_airbyte_data, 'User@Id') as "User@Id", + jsonb_extract_path_text(_airbyte_data, 'UserId') as userid, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_multiple_column_names_conflicts as table_alias +-- multiple_column_names_conflicts +where 1 = 1 + +), __dbt__cte__multiple_column_names_conflicts_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab1 +select + cast("id" as + bigint +) as "id", + cast("User Id" as text) as "User Id", + cast(user_id as + float +) as user_id, + cast("User id" as + float +) as "User id", + cast("user id" as + float +) as "user id", + cast("User@Id" as text) as "User@Id", + cast(userid as + float +) as userid, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__multiple_column_names_conflicts_ab1 +-- multiple_column_names_conflicts +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab2 +select + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("User Id" as text), '') || '-' || coalesce(cast(user_id as text), '') || '-' || coalesce(cast("User id" as text), '') || '-' || coalesce(cast("user id" as text), '') || '-' || coalesce(cast("User@Id" as text), '') || '-' || coalesce(cast(userid as text), '') as text)) as _airbyte_multiple_co__ames_conflicts_hashid, + tmp.* +from __dbt__cte__multiple_column_names_conflicts_ab2 tmp +-- multiple_column_names_conflicts +where 1 = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/pos_dedup_cdcx.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/pos_dedup_cdcx.sql new file mode 100644 index 0000000000000..59b2696002723 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/pos_dedup_cdcx.sql @@ -0,0 +1,27 @@ + + + + create table "postgres".test_normalization."pos_dedup_cdcx" + as ( + +-- Final base SQL model +-- depends_on: "postgres".test_normalization."pos_dedup_cdcx_scd" +select + _airbyte_unique_key, + "id", + "name", + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_pos_dedup_cdcx_hashid +from "postgres".test_normalization."pos_dedup_cdcx_scd" +-- pos_dedup_cdcx from "postgres".test_normalization._airbyte_raw_pos_dedup_cdcx +where 1 = 1 +and _airbyte_active_row = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql new file mode 100644 index 0000000000000..1b28a6bd09ddc --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql @@ -0,0 +1,63 @@ + + + + create table "postgres"._airbyte_test_normalization."pos_dedup_cdcx_stg" + as ( + +with __dbt__cte__pos_dedup_cdcx_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_pos_dedup_cdcx +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path_text(_airbyte_data, 'name') as "name", + jsonb_extract_path_text(_airbyte_data, '_ab_cdc_lsn') as _ab_cdc_lsn, + jsonb_extract_path_text(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, + jsonb_extract_path_text(_airbyte_data, '_ab_cdc_deleted_at') as _ab_cdc_deleted_at, + jsonb_extract_path_text(_airbyte_data, '_ab_cdc_log_pos') as _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_pos_dedup_cdcx as table_alias +-- pos_dedup_cdcx +where 1 = 1 + +), __dbt__cte__pos_dedup_cdcx_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__pos_dedup_cdcx_ab1 +select + cast("id" as + bigint +) as "id", + cast("name" as text) as "name", + cast(_ab_cdc_lsn as + float +) as _ab_cdc_lsn, + cast(_ab_cdc_updated_at as + float +) as _ab_cdc_updated_at, + cast(_ab_cdc_deleted_at as + float +) as _ab_cdc_deleted_at, + cast(_ab_cdc_log_pos as + float +) as _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__pos_dedup_cdcx_ab1 +-- pos_dedup_cdcx +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__pos_dedup_cdcx_ab2 +select + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast("name" as text), '') || '-' || coalesce(cast(_ab_cdc_lsn as text), '') || '-' || coalesce(cast(_ab_cdc_updated_at as text), '') || '-' || coalesce(cast(_ab_cdc_deleted_at as text), '') || '-' || coalesce(cast(_ab_cdc_log_pos as text), '') as text)) as _airbyte_pos_dedup_cdcx_hashid, + tmp.* +from __dbt__cte__pos_dedup_cdcx_ab2 tmp +-- pos_dedup_cdcx +where 1 = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql new file mode 100644 index 0000000000000..36303d71ef60e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -0,0 +1,23 @@ + + + + create table "postgres".test_normalization."renamed_dedup_cdc_excluded" + as ( + +-- Final base SQL model +-- depends_on: "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" +select + _airbyte_unique_key, + "id", + _ab_cdc_updated_at, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_renamed_dedup_cdc_excluded_hashid +from "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" +-- renamed_dedup_cdc_excluded from "postgres".test_normalization._airbyte_raw_renamed_dedup_cdc_excluded +where 1 = 1 +and _airbyte_active_row = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql new file mode 100644 index 0000000000000..7fba3805f3967 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql @@ -0,0 +1,49 @@ + + + + create table "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" + as ( + +with __dbt__cte__renamed_dedup_cdc_excluded_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_renamed_dedup_cdc_excluded +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path_text(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_renamed_dedup_cdc_excluded as table_alias +-- renamed_dedup_cdc_excluded +where 1 = 1 + +), __dbt__cte__renamed_dedup_cdc_excluded_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__renamed_dedup_cdc_excluded_ab1 +select + cast("id" as + bigint +) as "id", + cast(_ab_cdc_updated_at as + float +) as _ab_cdc_updated_at, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__renamed_dedup_cdc_excluded_ab1 +-- renamed_dedup_cdc_excluded +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__renamed_dedup_cdc_excluded_ab2 +select + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(_ab_cdc_updated_at as text), '') as text)) as _airbyte_renamed_dedup_cdc_excluded_hashid, + tmp.* +from __dbt__cte__renamed_dedup_cdc_excluded_ab2 tmp +-- renamed_dedup_cdc_excluded +where 1 = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/types_testing.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/types_testing.sql new file mode 100644 index 0000000000000..424c1918935bd --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/types_testing.sql @@ -0,0 +1,24 @@ + + + + create table "postgres".test_normalization."types_testing" + as ( + +-- Final base SQL model +-- depends_on: "postgres".test_normalization."types_testing_scd" +select + _airbyte_unique_key, + "id", + airbyte_integer_column, + nullable_airbyte_integer_column, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_types_testing_hashid +from "postgres".test_normalization."types_testing_scd" +-- types_testing from "postgres".test_normalization._airbyte_raw_types_testing +where 1 = 1 +and _airbyte_active_row = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/types_testing_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/types_testing_stg.sql new file mode 100644 index 0000000000000..7eccd56d06093 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/types_testing_stg.sql @@ -0,0 +1,53 @@ + + + + create table "postgres"._airbyte_test_normalization."types_testing_stg" + as ( + +with __dbt__cte__types_testing_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_types_testing +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path_text(_airbyte_data, 'airbyte_integer_column') as airbyte_integer_column, + jsonb_extract_path_text(_airbyte_data, 'nullable_airbyte_integer_column') as nullable_airbyte_integer_column, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_types_testing as table_alias +-- types_testing +where 1 = 1 + +), __dbt__cte__types_testing_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__types_testing_ab1 +select + cast("id" as + bigint +) as "id", + cast(airbyte_integer_column as + bigint +) as airbyte_integer_column, + cast(nullable_airbyte_integer_column as + bigint +) as nullable_airbyte_integer_column, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__types_testing_ab1 +-- types_testing +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__types_testing_ab2 +select + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(airbyte_integer_column as text), '') || '-' || coalesce(cast(nullable_airbyte_integer_column as text), '') as text)) as _airbyte_types_testing_hashid, + tmp.* +from __dbt__cte__types_testing_ab2 tmp +-- types_testing +where 1 = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..2773af0d8fa35 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,107 @@ + + + create table "postgres".test_normalization."exchange_rate__dbt_tmp" + as ( + +with __dbt__cte__exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_exchange_rate +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path_text(_airbyte_data, 'currency') as currency, + jsonb_extract_path_text(_airbyte_data, 'date') as "date", + jsonb_extract_path_text(_airbyte_data, 'timestamp_col') as timestamp_col, + jsonb_extract_path_text(_airbyte_data, 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", + jsonb_extract_path_text(_airbyte_data, 'HKD_special___characters') as hkd_special___characters, + jsonb_extract_path_text(_airbyte_data, 'NZD') as nzd, + jsonb_extract_path_text(_airbyte_data, 'USD') as usd, + jsonb_extract_path_text(_airbyte_data, 'column`_''with"_quotes') as "column`_'with""_quotes", + jsonb_extract_path_text(_airbyte_data, 'datetime_tz') as datetime_tz, + jsonb_extract_path_text(_airbyte_data, 'datetime_no_tz') as datetime_no_tz, + jsonb_extract_path_text(_airbyte_data, 'time_tz') as time_tz, + jsonb_extract_path_text(_airbyte_data, 'time_no_tz') as time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_exchange_rate as table_alias +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__exchange_rate_ab1 +select + cast("id" as + bigint +) as "id", + cast(currency as text) as currency, + cast(nullif("date", '') as + date +) as "date", + cast(nullif(timestamp_col, '') as + timestamp with time zone +) as timestamp_col, + cast("HKD@spéçiäl & characters" as + float +) as "HKD@spéçiäl & characters", + cast(hkd_special___characters as text) as hkd_special___characters, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + cast("column`_'with""_quotes" as text) as "column`_'with""_quotes", + cast(nullif(datetime_tz, '') as + timestamp with time zone +) as datetime_tz, + cast(nullif(datetime_no_tz, '') as + timestamp +) as datetime_no_tz, + cast(nullif(time_tz, '') as + time with time zone +) as time_tz, + cast(nullif(time_no_tz, '') as + time +) as time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__exchange_rate_ab1 +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__exchange_rate_ab2 +select + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast("date" as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') || '-' || coalesce(cast(datetime_tz as text), '') || '-' || coalesce(cast(datetime_no_tz as text), '') || '-' || coalesce(cast(time_tz as text), '') || '-' || coalesce(cast(time_no_tz as text), '') as text)) as _airbyte_exchange_rate_hashid, + tmp.* +from __dbt__cte__exchange_rate_ab2 tmp +-- exchange_rate +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__exchange_rate_ab3 +select + "id", + currency, + "date", + timestamp_col, + "HKD@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, + "column`_'with""_quotes", + datetime_tz, + datetime_no_tz, + time_tz, + time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from __dbt__cte__exchange_rate_ab3 +-- exchange_rate from "postgres".test_normalization._airbyte_raw_exchange_rate +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab1.sql new file mode 100644 index 0000000000000..f6697dcec7577 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab1.sql @@ -0,0 +1,20 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_1_prefix_startwith_number') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, + {{ json_extract_scalar('_airbyte_data', ['text'], ['text']) }} as {{ adapter.quote('text') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_1_prefix_startwith_number') }} as table_alias +-- 1_prefix_startwith_number +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab2.sql new file mode 100644 index 0000000000000..a9dd516725858 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/1_prefix_startwith_number_ab2.sql @@ -0,0 +1,20 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('1_prefix_startwith_number_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, + cast({{ empty_string_to_null(adapter.quote('date')) }} as {{ type_date() }}) as {{ adapter.quote('date') }}, + cast({{ adapter.quote('text') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('text') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('1_prefix_startwith_number_ab1') }} +-- 1_prefix_startwith_number +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql new file mode 100644 index 0000000000000..99a03831a8ba8 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql @@ -0,0 +1,22 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['name'], ['name']) }} as {{ adapter.quote('name') }}, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_lsn'], ['_ab_cdc_lsn']) }} as _ab_cdc_lsn, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_deleted_at'], ['_ab_cdc_deleted_at']) }} as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} as table_alias +-- dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql new file mode 100644 index 0000000000000..3d8803e27a664 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql @@ -0,0 +1,22 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('dedup_cdc_excluded_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, + cast({{ adapter.quote('name') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('name') }}, + cast(_ab_cdc_lsn as {{ dbt_utils.type_float() }}) as _ab_cdc_lsn, + cast(_ab_cdc_updated_at as {{ dbt_utils.type_float() }}) as _ab_cdc_updated_at, + cast(_ab_cdc_deleted_at as {{ dbt_utils.type_float() }}) as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('dedup_cdc_excluded_ab1') }} +-- dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql new file mode 100644 index 0000000000000..5009554c3391c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -0,0 +1,25 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, + {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, + {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ adapter.quote('HKD@spéçiäl & characters') }}, + {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as hkd_special___characters, + {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as nzd, + {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql new file mode 100644 index 0000000000000..187fc05ccc6fe --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -0,0 +1,25 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('dedup_exchange_rate_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, + cast(currency as {{ dbt_utils.type_string() }}) as currency, + cast({{ empty_string_to_null(adapter.quote('date')) }} as {{ type_date() }}) as {{ adapter.quote('date') }}, + cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, + cast({{ adapter.quote('HKD@spéçiäl & characters') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('HKD@spéçiäl & characters') }}, + cast(hkd_special___characters as {{ dbt_utils.type_string() }}) as hkd_special___characters, + cast(nzd as {{ dbt_utils.type_float() }}) as nzd, + cast(usd as {{ dbt_utils.type_float() }}) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('dedup_exchange_rate_ab1') }} +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql new file mode 100644 index 0000000000000..ca2b2520a2585 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql @@ -0,0 +1,29 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, + {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, + {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ adapter.quote('HKD@spéçiäl & characters') }}, + {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as hkd_special___characters, + {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as nzd, + {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as usd, + {{ json_extract_scalar('_airbyte_data', ['column`_\'with"_quotes'], ['column___with__quotes']) }} as {{ adapter.quote('column`_\'with""_quotes') }}, + {{ json_extract_scalar('_airbyte_data', ['datetime_tz'], ['datetime_tz']) }} as datetime_tz, + {{ json_extract_scalar('_airbyte_data', ['datetime_no_tz'], ['datetime_no_tz']) }} as datetime_no_tz, + {{ json_extract_scalar('_airbyte_data', ['time_tz'], ['time_tz']) }} as time_tz, + {{ json_extract_scalar('_airbyte_data', ['time_no_tz'], ['time_no_tz']) }} as time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} as table_alias +-- exchange_rate +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql new file mode 100644 index 0000000000000..0f457acbee982 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql @@ -0,0 +1,29 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('exchange_rate_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, + cast(currency as {{ dbt_utils.type_string() }}) as currency, + cast({{ empty_string_to_null(adapter.quote('date')) }} as {{ type_date() }}) as {{ adapter.quote('date') }}, + cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, + cast({{ adapter.quote('HKD@spéçiäl & characters') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('HKD@spéçiäl & characters') }}, + cast(hkd_special___characters as {{ dbt_utils.type_string() }}) as hkd_special___characters, + cast(nzd as {{ dbt_utils.type_float() }}) as nzd, + cast(usd as {{ dbt_utils.type_float() }}) as usd, + cast({{ adapter.quote('column`_\'with""_quotes') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('column`_\'with""_quotes') }}, + cast({{ empty_string_to_null('datetime_tz') }} as {{ type_timestamp_with_timezone() }}) as datetime_tz, + cast({{ empty_string_to_null('datetime_no_tz') }} as {{ type_timestamp_without_timezone() }}) as datetime_no_tz, + cast({{ empty_string_to_null('time_tz') }} as {{ type_time_with_timezone() }}) as time_tz, + cast({{ empty_string_to_null('time_no_tz') }} as {{ type_time_without_timezone() }}) as time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('exchange_rate_ab1') }} +-- exchange_rate +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql new file mode 100644 index 0000000000000..789086fe147aa --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql @@ -0,0 +1,29 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('exchange_rate_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + 'currency', + adapter.quote('date'), + 'timestamp_col', + adapter.quote('HKD@spéçiäl & characters'), + 'hkd_special___characters', + 'nzd', + 'usd', + adapter.quote('column`_\'with""_quotes'), + 'datetime_tz', + 'datetime_no_tz', + 'time_tz', + 'time_no_tz', + ]) }} as _airbyte_exchange_rate_hashid, + tmp.* +from {{ ref('exchange_rate_ab2') }} tmp +-- exchange_rate +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab1.sql new file mode 100644 index 0000000000000..3444e2fe46f97 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab1.sql @@ -0,0 +1,24 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_multiple_column_names_conflicts') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['User Id'], ['User Id']) }} as {{ adapter.quote('User Id') }}, + {{ json_extract_scalar('_airbyte_data', ['user_id'], ['user_id']) }} as user_id, + {{ json_extract_scalar('_airbyte_data', ['User id'], ['User id']) }} as {{ adapter.quote('User id') }}, + {{ json_extract_scalar('_airbyte_data', ['user id'], ['user id']) }} as {{ adapter.quote('user id') }}, + {{ json_extract_scalar('_airbyte_data', ['User@Id'], ['User@Id']) }} as {{ adapter.quote('User@Id') }}, + {{ json_extract_scalar('_airbyte_data', ['UserId'], ['UserId']) }} as userid, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_multiple_column_names_conflicts') }} as table_alias +-- multiple_column_names_conflicts +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab2.sql new file mode 100644 index 0000000000000..263d011d1bdeb --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/multiple_column_names_conflicts_ab2.sql @@ -0,0 +1,24 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('multiple_column_names_conflicts_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, + cast({{ adapter.quote('User Id') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('User Id') }}, + cast(user_id as {{ dbt_utils.type_float() }}) as user_id, + cast({{ adapter.quote('User id') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('User id') }}, + cast({{ adapter.quote('user id') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('user id') }}, + cast({{ adapter.quote('User@Id') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('User@Id') }}, + cast(userid as {{ dbt_utils.type_float() }}) as userid, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('multiple_column_names_conflicts_ab1') }} +-- multiple_column_names_conflicts +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql new file mode 100644 index 0000000000000..ee8f1538acb46 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql @@ -0,0 +1,23 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['name'], ['name']) }} as {{ adapter.quote('name') }}, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_lsn'], ['_ab_cdc_lsn']) }} as _ab_cdc_lsn, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_deleted_at'], ['_ab_cdc_deleted_at']) }} as _ab_cdc_deleted_at, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_log_pos'], ['_ab_cdc_log_pos']) }} as _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} as table_alias +-- pos_dedup_cdcx +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql new file mode 100644 index 0000000000000..96c252758b6d4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql @@ -0,0 +1,23 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('pos_dedup_cdcx_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, + cast({{ adapter.quote('name') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('name') }}, + cast(_ab_cdc_lsn as {{ dbt_utils.type_float() }}) as _ab_cdc_lsn, + cast(_ab_cdc_updated_at as {{ dbt_utils.type_float() }}) as _ab_cdc_updated_at, + cast(_ab_cdc_deleted_at as {{ dbt_utils.type_float() }}) as _ab_cdc_deleted_at, + cast(_ab_cdc_log_pos as {{ dbt_utils.type_float() }}) as _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('pos_dedup_cdcx_ab1') }} +-- pos_dedup_cdcx +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql new file mode 100644 index 0000000000000..fbe40aebf3c7c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql @@ -0,0 +1,19 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} as table_alias +-- renamed_dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql new file mode 100644 index 0000000000000..f0b99802de8b2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql @@ -0,0 +1,19 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('renamed_dedup_cdc_excluded_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, + cast(_ab_cdc_updated_at as {{ dbt_utils.type_float() }}) as _ab_cdc_updated_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('renamed_dedup_cdc_excluded_ab1') }} +-- renamed_dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/types_testing_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/types_testing_ab1.sql new file mode 100644 index 0000000000000..2fca430a9c393 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/types_testing_ab1.sql @@ -0,0 +1,20 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_types_testing') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['airbyte_integer_column'], ['airbyte_integer_column']) }} as airbyte_integer_column, + {{ json_extract_scalar('_airbyte_data', ['nullable_airbyte_integer_column'], ['nullable_airbyte_integer_column']) }} as nullable_airbyte_integer_column, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_types_testing') }} as table_alias +-- types_testing +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/types_testing_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/types_testing_ab2.sql new file mode 100644 index 0000000000000..da93832f7f778 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/types_testing_ab2.sql @@ -0,0 +1,20 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('types_testing_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, + cast(airbyte_integer_column as {{ dbt_utils.type_bigint() }}) as airbyte_integer_column, + cast(nullable_airbyte_integer_column as {{ dbt_utils.type_bigint() }}) as nullable_airbyte_integer_column, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('types_testing_ab1') }} +-- types_testing +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql new file mode 100644 index 0000000000000..01e0c49d1c7c4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql @@ -0,0 +1,163 @@ +{{ config( + indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='1_prefix_startwith_number' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('1_prefix_startwith_number')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('1_prefix_startwith_number')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from _airbyte_test_normalization.{{ adapter.quote('1_prefix_startwith_number_stg') }} where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.{{ adapter.quote('1_prefix_startwith_number_stg') }})"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('1_prefix_startwith_number_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('1_prefix_startwith_number_stg') }} + -- 1_prefix_startwith_number from {{ source('test_normalization', '_airbyte_raw_1_prefix_startwith_number') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('1_prefix_startwith_number_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('1_prefix_startwith_number_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('1_prefix_startwith_number_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('1_prefix_startwith_number_stg') }} + -- 1_prefix_startwith_number from {{ source('test_normalization', '_airbyte_raw_1_prefix_startwith_number') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key, + {{ adapter.quote('id') }}, + {{ adapter.quote('date') }}, + {{ adapter.quote('text') }}, + {{ adapter.quote('date') }} as _airbyte_start_at, + lag({{ adapter.quote('date') }}) over ( + partition by {{ adapter.quote('id') }} + order by + {{ adapter.quote('date') }} is null asc, + {{ adapter.quote('date') }} desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by {{ adapter.quote('id') }} + order by + {{ adapter.quote('date') }} is null asc, + {{ adapter.quote('date') }} desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_1_prefix_startwith_number_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + {{ adapter.quote('id') }}, + {{ adapter.quote('date') }}, + {{ adapter.quote('text') }}, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_1_prefix_startwith_number_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql new file mode 100644 index 0000000000000..5affe9825e3be --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -0,0 +1,169 @@ +{{ config( + indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_cdc_excluded' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from _airbyte_test_normalization.dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_cdc_excluded_stg)"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('dedup_cdc_excluded_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_cdc_excluded_stg') }} + -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('dedup_cdc_excluded_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('dedup_cdc_excluded_stg') }} + -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key, + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_lsn as _airbyte_start_at, + lag(_ab_cdc_lsn) over ( + partition by {{ adapter.quote('id') }} + order by + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by {{ adapter.quote('id') }} + order by + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_cdc_excluded_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at, cast(_ab_cdc_deleted_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_updated_at as {{ dbt_utils.type_string() }}) + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at', '_ab_cdc_deleted_at', '_ab_cdc_updated_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_cdc_excluded_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..ef0cf7e1e95f5 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,177 @@ +{{ config( + indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from _airbyte_test_normalization.dedup_exchange_rate_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_exchange_rate_stg)"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('dedup_exchange_rate_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key, + {{ adapter.quote('id') }}, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, + {{ adapter.quote('date') }} as _airbyte_start_at, + lag({{ adapter.quote('date') }}) over ( + partition by {{ adapter.quote('id') }}, currency, cast(nzd as {{ dbt_utils.type_string() }}) + order by + {{ adapter.quote('date') }} is null asc, + {{ adapter.quote('date') }} desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by {{ adapter.quote('id') }}, currency, cast(nzd as {{ dbt_utils.type_string() }}) + order by + {{ adapter.quote('date') }} is null asc, + {{ adapter.quote('date') }} desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_exchange_rate_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + {{ adapter.quote('id') }}, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql new file mode 100644 index 0000000000000..77d393c856892 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql @@ -0,0 +1,171 @@ +{{ config( + indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='multiple_column_names_conflicts' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('multiple_column_names_conflicts')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('multiple_column_names_conflicts')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from _airbyte_test_normalization.multiple_column_names_conflicts_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.multiple_column_names_conflicts_stg)"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('multiple_column_names_conflicts_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('multiple_column_names_conflicts_stg') }} + -- multiple_column_names_conflicts from {{ source('test_normalization', '_airbyte_raw_multiple_column_names_conflicts') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('multiple_column_names_conflicts_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('multiple_column_names_conflicts_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('multiple_column_names_conflicts_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('multiple_column_names_conflicts_stg') }} + -- multiple_column_names_conflicts from {{ source('test_normalization', '_airbyte_raw_multiple_column_names_conflicts') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key, + {{ adapter.quote('id') }}, + {{ adapter.quote('User Id') }}, + user_id, + {{ adapter.quote('User id') }}, + {{ adapter.quote('user id') }}, + {{ adapter.quote('User@Id') }}, + userid, + _airbyte_emitted_at as _airbyte_start_at, + lag(_airbyte_emitted_at) over ( + partition by {{ adapter.quote('id') }} + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by {{ adapter.quote('id') }} + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_multiple_co__ames_conflicts_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + {{ adapter.quote('id') }}, + {{ adapter.quote('User Id') }}, + user_id, + {{ adapter.quote('User id') }}, + {{ adapter.quote('user id') }}, + {{ adapter.quote('User@Id') }}, + userid, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_multiple_co__ames_conflicts_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql new file mode 100644 index 0000000000000..ff471c6abaab1 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql @@ -0,0 +1,173 @@ +{{ config( + indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='pos_dedup_cdcx' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('pos_dedup_cdcx')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('pos_dedup_cdcx')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from _airbyte_test_normalization.pos_dedup_cdcx_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.pos_dedup_cdcx_stg)"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('pos_dedup_cdcx_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('pos_dedup_cdcx_stg') }} + -- pos_dedup_cdcx from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('pos_dedup_cdcx_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('pos_dedup_cdcx_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('pos_dedup_cdcx_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('pos_dedup_cdcx_stg') }} + -- pos_dedup_cdcx from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key, + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, + _ab_cdc_updated_at as _airbyte_start_at, + lag(_ab_cdc_updated_at) over ( + partition by {{ adapter.quote('id') }} + order by + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _ab_cdc_updated_at desc, + _ab_cdc_log_pos desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by {{ adapter.quote('id') }} + order by + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _ab_cdc_updated_at desc, + _ab_cdc_log_pos desc, + _airbyte_emitted_at desc + ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_pos_dedup_cdcx_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at, cast(_ab_cdc_deleted_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_updated_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_log_pos as {{ dbt_utils.type_string() }}) + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at', '_ab_cdc_deleted_at', '_ab_cdc_updated_at', '_ab_cdc_log_pos' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_pos_dedup_cdcx_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql new file mode 100644 index 0000000000000..d8da713c68711 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -0,0 +1,161 @@ +{{ config( + indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='renamed_dedup_cdc_excluded' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg)"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('renamed_dedup_cdc_excluded_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('renamed_dedup_cdc_excluded_stg') }} + -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('renamed_dedup_cdc_excluded_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('renamed_dedup_cdc_excluded_stg') }} + -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key, + {{ adapter.quote('id') }}, + _ab_cdc_updated_at, + _ab_cdc_updated_at as _airbyte_start_at, + lag(_ab_cdc_updated_at) over ( + partition by {{ adapter.quote('id') }} + order by + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by {{ adapter.quote('id') }} + order by + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_renamed_dedup_cdc_excluded_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + {{ adapter.quote('id') }}, + _ab_cdc_updated_at, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_renamed_dedup_cdc_excluded_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/types_testing_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/types_testing_scd.sql new file mode 100644 index 0000000000000..0a0b409c90b72 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/types_testing_scd.sql @@ -0,0 +1,163 @@ +{{ config( + indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='types_testing' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('types_testing')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('types_testing')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from _airbyte_test_normalization.types_testing_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.types_testing_stg)"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('types_testing_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('types_testing_stg') }} + -- types_testing from {{ source('test_normalization', '_airbyte_raw_types_testing') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('types_testing_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('types_testing_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('types_testing_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('types_testing_stg') }} + -- types_testing from {{ source('test_normalization', '_airbyte_raw_types_testing') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key, + {{ adapter.quote('id') }}, + airbyte_integer_column, + nullable_airbyte_integer_column, + _airbyte_emitted_at as _airbyte_start_at, + lag(_airbyte_emitted_at) over ( + partition by {{ adapter.quote('id') }} + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by {{ adapter.quote('id') }} + order by + _airbyte_emitted_at is null asc, + _airbyte_emitted_at desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_types_testing_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + {{ adapter.quote('id') }}, + airbyte_integer_column, + nullable_airbyte_integer_column, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_types_testing_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql new file mode 100644 index 0000000000000..f3ea9897b65a4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql @@ -0,0 +1,23 @@ +{{ config( + indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('1_prefix_startwith_number_scd') }} +select + _airbyte_unique_key, + {{ adapter.quote('id') }}, + {{ adapter.quote('date') }}, + {{ adapter.quote('text') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_1_prefix_startwith_number_hashid +from {{ ref('1_prefix_startwith_number_scd') }} +-- 1_prefix_startwith_number from {{ source('test_normalization', '_airbyte_raw_1_prefix_startwith_number') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql new file mode 100644 index 0000000000000..c387201c974c8 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql @@ -0,0 +1,20 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('1_prefix_startwith_number_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + adapter.quote('date'), + adapter.quote('text'), + ]) }} as _airbyte_1_prefix_startwith_number_hashid, + tmp.* +from {{ ref('1_prefix_startwith_number_ab2') }} tmp +-- 1_prefix_startwith_number +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql new file mode 100644 index 0000000000000..32d70c680aa9d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql @@ -0,0 +1,25 @@ +{{ config( + indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('dedup_cdc_excluded_scd') }} +select + _airbyte_unique_key, + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_cdc_excluded_hashid +from {{ ref('dedup_cdc_excluded_scd') }} +-- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql new file mode 100644 index 0000000000000..b0cd4bf7cb134 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql @@ -0,0 +1,22 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('dedup_cdc_excluded_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + adapter.quote('name'), + '_ab_cdc_lsn', + '_ab_cdc_updated_at', + '_ab_cdc_deleted_at', + ]) }} as _airbyte_dedup_cdc_excluded_hashid, + tmp.* +from {{ ref('dedup_cdc_excluded_ab2') }} tmp +-- dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..42f7540dc6b9f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,28 @@ +{{ config( + indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('dedup_exchange_rate_scd') }} +select + _airbyte_unique_key, + {{ adapter.quote('id') }}, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from {{ ref('dedup_exchange_rate_scd') }} +-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..f892feed3fe7d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,25 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('dedup_exchange_rate_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + 'currency', + adapter.quote('date'), + 'timestamp_col', + adapter.quote('HKD@spéçiäl & characters'), + 'hkd_special___characters', + 'nzd', + 'usd', + ]) }} as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from {{ ref('dedup_exchange_rate_ab2') }} tmp +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql new file mode 100644 index 0000000000000..3451ce406b4d2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql @@ -0,0 +1,27 @@ +{{ config( + indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('multiple_column_names_conflicts_scd') }} +select + _airbyte_unique_key, + {{ adapter.quote('id') }}, + {{ adapter.quote('User Id') }}, + user_id, + {{ adapter.quote('User id') }}, + {{ adapter.quote('user id') }}, + {{ adapter.quote('User@Id') }}, + userid, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_multiple_co__ames_conflicts_hashid +from {{ ref('multiple_column_names_conflicts_scd') }} +-- multiple_column_names_conflicts from {{ source('test_normalization', '_airbyte_raw_multiple_column_names_conflicts') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql new file mode 100644 index 0000000000000..c549b49128a62 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql @@ -0,0 +1,24 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('multiple_column_names_conflicts_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + adapter.quote('User Id'), + 'user_id', + adapter.quote('User id'), + adapter.quote('user id'), + adapter.quote('User@Id'), + 'userid', + ]) }} as _airbyte_multiple_co__ames_conflicts_hashid, + tmp.* +from {{ ref('multiple_column_names_conflicts_ab2') }} tmp +-- multiple_column_names_conflicts +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx.sql new file mode 100644 index 0000000000000..57ddb1908b9d6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx.sql @@ -0,0 +1,26 @@ +{{ config( + indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('pos_dedup_cdcx_scd') }} +select + _airbyte_unique_key, + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_pos_dedup_cdcx_hashid +from {{ ref('pos_dedup_cdcx_scd') }} +-- pos_dedup_cdcx from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql new file mode 100644 index 0000000000000..692867ceaf4ed --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql @@ -0,0 +1,23 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('pos_dedup_cdcx_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + adapter.quote('name'), + '_ab_cdc_lsn', + '_ab_cdc_updated_at', + '_ab_cdc_deleted_at', + '_ab_cdc_log_pos', + ]) }} as _airbyte_pos_dedup_cdcx_hashid, + tmp.* +from {{ ref('pos_dedup_cdcx_ab2') }} tmp +-- pos_dedup_cdcx +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql new file mode 100644 index 0000000000000..603af9d4f80c3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -0,0 +1,22 @@ +{{ config( + indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('renamed_dedup_cdc_excluded_scd') }} +select + _airbyte_unique_key, + {{ adapter.quote('id') }}, + _ab_cdc_updated_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_renamed_dedup_cdc_excluded_hashid +from {{ ref('renamed_dedup_cdc_excluded_scd') }} +-- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql new file mode 100644 index 0000000000000..96371bb4931a9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql @@ -0,0 +1,19 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('renamed_dedup_cdc_excluded_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + '_ab_cdc_updated_at', + ]) }} as _airbyte_renamed_dedup_cdc_excluded_hashid, + tmp.* +from {{ ref('renamed_dedup_cdc_excluded_ab2') }} tmp +-- renamed_dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/types_testing.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/types_testing.sql new file mode 100644 index 0000000000000..8f979379656dc --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/types_testing.sql @@ -0,0 +1,23 @@ +{{ config( + indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('types_testing_scd') }} +select + _airbyte_unique_key, + {{ adapter.quote('id') }}, + airbyte_integer_column, + nullable_airbyte_integer_column, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_types_testing_hashid +from {{ ref('types_testing_scd') }} +-- types_testing from {{ source('test_normalization', '_airbyte_raw_types_testing') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/types_testing_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/types_testing_stg.sql new file mode 100644 index 0000000000000..3eabf9e4ae69a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/types_testing_stg.sql @@ -0,0 +1,20 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('types_testing_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + 'airbyte_integer_column', + 'nullable_airbyte_integer_column', + ]) }} as _airbyte_types_testing_hashid, + tmp.* +from {{ ref('types_testing_ab2') }} tmp +-- types_testing +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..72e4956780448 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,30 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('exchange_rate_ab3') }} +select + {{ adapter.quote('id') }}, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, + {{ adapter.quote('column`_\'with""_quotes') }}, + datetime_tz, + datetime_no_tz, + time_tz, + time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from {{ ref('exchange_rate_ab3') }} +-- exchange_rate from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/sources.yml new file mode 100644 index 0000000000000..f51802427655e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/sources.yml @@ -0,0 +1,16 @@ +version: 2 +sources: +- name: test_normalization + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_1_prefix_startwith_number + - name: _airbyte_raw_dedup_cdc_excluded + - name: _airbyte_raw_dedup_exchange_rate + - name: _airbyte_raw_exchange_rate + - name: _airbyte_raw_multiple_column_names_conflicts + - name: _airbyte_raw_pos_dedup_cdcx + - name: _airbyte_raw_renamed_dedup_cdc_excluded + - name: _airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql new file mode 100644 index 0000000000000..99a03831a8ba8 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql @@ -0,0 +1,22 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['name'], ['name']) }} as {{ adapter.quote('name') }}, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_lsn'], ['_ab_cdc_lsn']) }} as _ab_cdc_lsn, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_deleted_at'], ['_ab_cdc_deleted_at']) }} as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} as table_alias +-- dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql new file mode 100644 index 0000000000000..3d8803e27a664 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql @@ -0,0 +1,22 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('dedup_cdc_excluded_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, + cast({{ adapter.quote('name') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('name') }}, + cast(_ab_cdc_lsn as {{ dbt_utils.type_float() }}) as _ab_cdc_lsn, + cast(_ab_cdc_updated_at as {{ dbt_utils.type_float() }}) as _ab_cdc_updated_at, + cast(_ab_cdc_deleted_at as {{ dbt_utils.type_float() }}) as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('dedup_cdc_excluded_ab1') }} +-- dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql new file mode 100644 index 0000000000000..8dd3aff00d2cd --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -0,0 +1,25 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, + {{ json_extract_scalar('_airbyte_data', ['new_column'], ['new_column']) }} as new_column, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, + {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, + {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ adapter.quote('HKD@spéçiäl & characters') }}, + {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as nzd, + {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} as table_alias +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql new file mode 100644 index 0000000000000..b5e700b36aa6a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -0,0 +1,25 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('dedup_exchange_rate_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('id') }}, + cast(currency as {{ dbt_utils.type_string() }}) as currency, + cast(new_column as {{ dbt_utils.type_float() }}) as new_column, + cast({{ empty_string_to_null(adapter.quote('date')) }} as {{ type_date() }}) as {{ adapter.quote('date') }}, + cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, + cast({{ adapter.quote('HKD@spéçiäl & characters') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('HKD@spéçiäl & characters') }}, + cast(nzd as {{ dbt_utils.type_float() }}) as nzd, + cast(usd as {{ dbt_utils.type_bigint() }}) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('dedup_exchange_rate_ab1') }} +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql new file mode 100644 index 0000000000000..ba88ffa22b0d9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql @@ -0,0 +1,25 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, + {{ json_extract_scalar('_airbyte_data', ['new_column'], ['new_column']) }} as new_column, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as {{ adapter.quote('date') }}, + {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, + {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ adapter.quote('HKD@spéçiäl & characters') }}, + {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as nzd, + {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as usd, + {{ json_extract_scalar('_airbyte_data', ['column`_\'with"_quotes'], ['column___with__quotes']) }} as {{ adapter.quote('column`_\'with""_quotes') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} as table_alias +-- exchange_rate +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql new file mode 100644 index 0000000000000..e6cf7ee1e5760 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql @@ -0,0 +1,25 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('exchange_rate_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('id') }}, + cast(currency as {{ dbt_utils.type_string() }}) as currency, + cast(new_column as {{ dbt_utils.type_float() }}) as new_column, + cast({{ empty_string_to_null(adapter.quote('date')) }} as {{ type_date() }}) as {{ adapter.quote('date') }}, + cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, + cast({{ adapter.quote('HKD@spéçiäl & characters') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('HKD@spéçiäl & characters') }}, + cast(nzd as {{ dbt_utils.type_float() }}) as nzd, + cast(usd as {{ dbt_utils.type_float() }}) as usd, + cast({{ adapter.quote('column`_\'with""_quotes') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('column`_\'with""_quotes') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('exchange_rate_ab1') }} +-- exchange_rate +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql new file mode 100644 index 0000000000000..96c96a4d4799c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql @@ -0,0 +1,25 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('exchange_rate_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + 'currency', + 'new_column', + adapter.quote('date'), + 'timestamp_col', + adapter.quote('HKD@spéçiäl & characters'), + 'nzd', + 'usd', + adapter.quote('column`_\'with""_quotes'), + ]) }} as _airbyte_exchange_rate_hashid, + tmp.* +from {{ ref('exchange_rate_ab2') }} tmp +-- exchange_rate +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql new file mode 100644 index 0000000000000..dfa39c2a71eb7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql @@ -0,0 +1,22 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['name'], ['name']) }} as {{ adapter.quote('name') }}, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_lsn'], ['_ab_cdc_lsn']) }} as _ab_cdc_lsn, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_deleted_at'], ['_ab_cdc_deleted_at']) }} as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} as table_alias +-- renamed_dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql new file mode 100644 index 0000000000000..72f80140e0076 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql @@ -0,0 +1,22 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('renamed_dedup_cdc_excluded_ab1') }} +select + cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, + cast({{ adapter.quote('name') }} as {{ dbt_utils.type_string() }}) as {{ adapter.quote('name') }}, + cast(_ab_cdc_lsn as {{ dbt_utils.type_float() }}) as _ab_cdc_lsn, + cast(_ab_cdc_updated_at as {{ dbt_utils.type_float() }}) as _ab_cdc_updated_at, + cast(_ab_cdc_deleted_at as {{ dbt_utils.type_float() }}) as _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('renamed_dedup_cdc_excluded_ab1') }} +-- renamed_dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql new file mode 100644 index 0000000000000..5affe9825e3be --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -0,0 +1,169 @@ +{{ config( + indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_cdc_excluded' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_cdc_excluded')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from _airbyte_test_normalization.dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_cdc_excluded_stg)"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('dedup_cdc_excluded_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_cdc_excluded_stg') }} + -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('dedup_cdc_excluded_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('dedup_cdc_excluded_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('dedup_cdc_excluded_stg') }} + -- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key, + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_lsn as _airbyte_start_at, + lag(_ab_cdc_lsn) over ( + partition by {{ adapter.quote('id') }} + order by + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by {{ adapter.quote('id') }} + order by + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_cdc_excluded_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at, cast(_ab_cdc_deleted_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_updated_at as {{ dbt_utils.type_string() }}) + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at', '_ab_cdc_deleted_at', '_ab_cdc_updated_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_cdc_excluded_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..7e6225fb7cfc4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,177 @@ +{{ config( + indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from _airbyte_test_normalization.dedup_exchange_rate_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.dedup_exchange_rate_stg)"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('dedup_exchange_rate_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key, + {{ adapter.quote('id') }}, + currency, + new_column, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + nzd, + usd, + {{ adapter.quote('date') }} as _airbyte_start_at, + lag({{ adapter.quote('date') }}) over ( + partition by cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}), currency, cast(nzd as {{ dbt_utils.type_string() }}) + order by + {{ adapter.quote('date') }} is null asc, + {{ adapter.quote('date') }} desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}), currency, cast(nzd as {{ dbt_utils.type_string() }}) + order by + {{ adapter.quote('date') }} is null asc, + {{ adapter.quote('date') }} desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_exchange_rate_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + {{ adapter.quote('id') }}, + currency, + new_column, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + nzd, + usd, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql new file mode 100644 index 0000000000000..96f720b3d2659 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -0,0 +1,169 @@ +{{ config( + indexes = [{'columns':['_airbyte_active_row','_airbyte_unique_key_scd','_airbyte_emitted_at'],'type': 'btree'}], + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='renamed_dedup_cdc_excluded' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('renamed_dedup_cdc_excluded')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","delete from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg where _airbyte_emitted_at != (select max(_airbyte_emitted_at) from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg)"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('renamed_dedup_cdc_excluded_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('renamed_dedup_cdc_excluded_stg') }} + -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('renamed_dedup_cdc_excluded_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('renamed_dedup_cdc_excluded_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('renamed_dedup_cdc_excluded_stg') }} + -- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + ]) }} as _airbyte_unique_key, + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_updated_at as _airbyte_start_at, + lag(_ab_cdc_updated_at) over ( + partition by {{ adapter.quote('id') }} + order by + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by {{ adapter.quote('id') }} + order by + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_renamed_dedup_cdc_excluded_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at, cast(_ab_cdc_deleted_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_updated_at as {{ dbt_utils.type_string() }}) + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at', '_ab_cdc_deleted_at', '_ab_cdc_updated_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_renamed_dedup_cdc_excluded_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql new file mode 100644 index 0000000000000..32d70c680aa9d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql @@ -0,0 +1,25 @@ +{{ config( + indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('dedup_cdc_excluded_scd') }} +select + _airbyte_unique_key, + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_cdc_excluded_hashid +from {{ ref('dedup_cdc_excluded_scd') }} +-- dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql new file mode 100644 index 0000000000000..b0cd4bf7cb134 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql @@ -0,0 +1,22 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('dedup_cdc_excluded_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + adapter.quote('name'), + '_ab_cdc_lsn', + '_ab_cdc_updated_at', + '_ab_cdc_deleted_at', + ]) }} as _airbyte_dedup_cdc_excluded_hashid, + tmp.* +from {{ ref('dedup_cdc_excluded_ab2') }} tmp +-- dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..3e51ad4d72565 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,28 @@ +{{ config( + indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('dedup_exchange_rate_scd') }} +select + _airbyte_unique_key, + {{ adapter.quote('id') }}, + currency, + new_column, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + nzd, + usd, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from {{ ref('dedup_exchange_rate_scd') }} +-- dedup_exchange_rate from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..35c866ac4d364 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,25 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('dedup_exchange_rate_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + 'currency', + 'new_column', + adapter.quote('date'), + 'timestamp_col', + adapter.quote('HKD@spéçiäl & characters'), + 'nzd', + 'usd', + ]) }} as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from {{ ref('dedup_exchange_rate_ab2') }} tmp +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql new file mode 100644 index 0000000000000..672118dcf045c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -0,0 +1,25 @@ +{{ config( + indexes = [{'columns':['_airbyte_unique_key'],'unique':True}], + unique_key = "_airbyte_unique_key", + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('renamed_dedup_cdc_excluded_scd') }} +select + _airbyte_unique_key, + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_renamed_dedup_cdc_excluded_hashid +from {{ ref('renamed_dedup_cdc_excluded_scd') }} +-- renamed_dedup_cdc_excluded from {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql new file mode 100644 index 0000000000000..b2d5002b934a3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql @@ -0,0 +1,22 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('renamed_dedup_cdc_excluded_ab2') }} +select + {{ dbt_utils.surrogate_key([ + adapter.quote('id'), + adapter.quote('name'), + '_ab_cdc_lsn', + '_ab_cdc_updated_at', + '_ab_cdc_deleted_at', + ]) }} as _airbyte_renamed_dedup_cdc_excluded_hashid, + tmp.* +from {{ ref('renamed_dedup_cdc_excluded_ab2') }} tmp +-- renamed_dedup_cdc_excluded +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..40b5ffb3f87d9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,26 @@ +{{ config( + indexes = [{'columns':['_airbyte_emitted_at'],'type':'btree'}], + unique_key = '_airbyte_ab_id', + schema = "test_normalization", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('exchange_rate_ab3') }} +select + {{ adapter.quote('id') }}, + currency, + new_column, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + nzd, + usd, + {{ adapter.quote('column`_\'with""_quotes') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from {{ ref('exchange_rate_ab3') }} +-- exchange_rate from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/sources.yml new file mode 100644 index 0000000000000..6a5d7bdc09a16 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/sources.yml @@ -0,0 +1,12 @@ +version: 2 +sources: +- name: test_normalization + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_dedup_cdc_excluded + - name: _airbyte_raw_dedup_exchange_rate + - name: _airbyte_raw_exchange_rate + - name: _airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql new file mode 100644 index 0000000000000..6fe661c181e0b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/1_prefix_startwith_number_scd.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."1_prefix_startwith_number_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "1_prefix_startwith_number_scd__dbt_tmp" + ); + + + insert into "postgres".test_normalization."1_prefix_startwith_number_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "text", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_1_prefix_startwith_number_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "text", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_1_prefix_startwith_number_hashid" + from "1_prefix_startwith_number_scd__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql new file mode 100644 index 0000000000000..a1fba0a6d7ff4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."dedup_cdc_excluded_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "dedup_cdc_excluded_scd__dbt_tmp" + ); + + + insert into "postgres".test_normalization."dedup_cdc_excluded_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid" + from "dedup_cdc_excluded_scd__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..0155cd0360b1e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."dedup_exchange_rate_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "dedup_exchange_rate_scd__dbt_tmp" + ); + + + insert into "postgres".test_normalization."dedup_exchange_rate_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" + from "dedup_exchange_rate_scd__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql new file mode 100644 index 0000000000000..76e8539124374 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/multiple_column_names_conflicts_scd.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."multiple_column_names_conflicts_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "multiple_column_names_conflicts_scd__dbt_tmp" + ); + + + insert into "postgres".test_normalization."multiple_column_names_conflicts_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "User Id", "user_id", "User id", "user id", "User@Id", "userid", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_multiple_co__ames_conflicts_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "User Id", "user_id", "User id", "user id", "User@Id", "userid", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_multiple_co__ames_conflicts_hashid" + from "multiple_column_names_conflicts_scd__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql new file mode 100644 index 0000000000000..cafd98c1c127f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."pos_dedup_cdcx_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "pos_dedup_cdcx_scd__dbt_tmp" + ); + + + insert into "postgres".test_normalization."pos_dedup_cdcx_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_ab_cdc_log_pos", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_pos_dedup_cdcx_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_ab_cdc_log_pos", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_pos_dedup_cdcx_hashid" + from "pos_dedup_cdcx_scd__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql new file mode 100644 index 0000000000000..e6d1d5fd01605 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "renamed_dedup_cdc_excluded_scd__dbt_tmp" + ); + + + insert into "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" + from "renamed_dedup_cdc_excluded_scd__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/types_testing_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/types_testing_scd.sql new file mode 100644 index 0000000000000..8388a44777b85 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/types_testing_scd.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."types_testing_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "types_testing_scd__dbt_tmp" + ); + + + insert into "postgres".test_normalization."types_testing_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "airbyte_integer_column", "nullable_airbyte_integer_column", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_types_testing_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "airbyte_integer_column", "nullable_airbyte_integer_column", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_types_testing_hashid" + from "types_testing_scd__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql new file mode 100644 index 0000000000000..181af872ea063 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."1_prefix_startwith_number" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "1_prefix_startwith_number__dbt_tmp" + ); + + + insert into "postgres".test_normalization."1_prefix_startwith_number" ("_airbyte_unique_key", "id", "date", "text", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_1_prefix_startwith_number_hashid") + ( + select "_airbyte_unique_key", "id", "date", "text", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_1_prefix_startwith_number_hashid" + from "1_prefix_startwith_number__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql new file mode 100644 index 0000000000000..d9a69c73ea41a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/1_prefix_startwith_number_stg.sql @@ -0,0 +1,15 @@ + + + delete from "postgres"._airbyte_test_normalization."1_prefix_startwith_number_stg" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "1_prefix_startwith_number_stg__dbt_tmp" + ); + + + insert into "postgres"._airbyte_test_normalization."1_prefix_startwith_number_stg" ("_airbyte_1_prefix_startwith_number_hashid", "id", "date", "text", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + ( + select "_airbyte_1_prefix_startwith_number_hashid", "id", "date", "text", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + from "1_prefix_startwith_number_stg__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql new file mode 100644 index 0000000000000..b3012059b462d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."dedup_cdc_excluded" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "dedup_cdc_excluded__dbt_tmp" + ); + + + insert into "postgres".test_normalization."dedup_cdc_excluded" ("_airbyte_unique_key", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid") + ( + select "_airbyte_unique_key", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid" + from "dedup_cdc_excluded__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql new file mode 100644 index 0000000000000..d9f833d441bfa --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql @@ -0,0 +1,15 @@ + + + delete from "postgres"._airbyte_test_normalization."dedup_cdc_excluded_stg" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "dedup_cdc_excluded_stg__dbt_tmp" + ); + + + insert into "postgres"._airbyte_test_normalization."dedup_cdc_excluded_stg" ("_airbyte_dedup_cdc_excluded_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + ( + select "_airbyte_dedup_cdc_excluded_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + from "dedup_cdc_excluded_stg__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..871b95c607c94 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."dedup_exchange_rate" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "dedup_exchange_rate__dbt_tmp" + ); + + + insert into "postgres".test_normalization."dedup_exchange_rate" ("_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + ( + select "_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" + from "dedup_exchange_rate__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..1be7a088845ed --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,15 @@ + + + delete from "postgres"._airbyte_test_normalization."dedup_exchange_rate_stg" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "dedup_exchange_rate_stg__dbt_tmp" + ); + + + insert into "postgres"._airbyte_test_normalization."dedup_exchange_rate_stg" ("_airbyte_dedup_exchange_rate_hashid", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + ( + select "_airbyte_dedup_exchange_rate_hashid", "id", "currency", "date", "timestamp_col", "HKD@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + from "dedup_exchange_rate_stg__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql new file mode 100644 index 0000000000000..525dc2add9077 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."multiple_column_names_conflicts" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "multiple_column_names_conflicts__dbt_tmp" + ); + + + insert into "postgres".test_normalization."multiple_column_names_conflicts" ("_airbyte_unique_key", "id", "User Id", "user_id", "User id", "user id", "User@Id", "userid", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_multiple_co__ames_conflicts_hashid") + ( + select "_airbyte_unique_key", "id", "User Id", "user_id", "User id", "user id", "User@Id", "userid", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_multiple_co__ames_conflicts_hashid" + from "multiple_column_names_conflicts__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql new file mode 100644 index 0000000000000..391889ecb40a9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/multiple_column_names_conflicts_stg.sql @@ -0,0 +1,15 @@ + + + delete from "postgres"._airbyte_test_normalization."multiple_column_names_conflicts_stg" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "multiple_column_names_conflicts_stg__dbt_tmp" + ); + + + insert into "postgres"._airbyte_test_normalization."multiple_column_names_conflicts_stg" ("_airbyte_multiple_co__ames_conflicts_hashid", "id", "User Id", "user_id", "User id", "user id", "User@Id", "userid", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + ( + select "_airbyte_multiple_co__ames_conflicts_hashid", "id", "User Id", "user_id", "User id", "user id", "User@Id", "userid", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + from "multiple_column_names_conflicts_stg__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/pos_dedup_cdcx.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/pos_dedup_cdcx.sql new file mode 100644 index 0000000000000..1d618406e5c6d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/pos_dedup_cdcx.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."pos_dedup_cdcx" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "pos_dedup_cdcx__dbt_tmp" + ); + + + insert into "postgres".test_normalization."pos_dedup_cdcx" ("_airbyte_unique_key", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_ab_cdc_log_pos", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_pos_dedup_cdcx_hashid") + ( + select "_airbyte_unique_key", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_ab_cdc_log_pos", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_pos_dedup_cdcx_hashid" + from "pos_dedup_cdcx__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql new file mode 100644 index 0000000000000..c627c7bea1b0e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/pos_dedup_cdcx_stg.sql @@ -0,0 +1,15 @@ + + + delete from "postgres"._airbyte_test_normalization."pos_dedup_cdcx_stg" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "pos_dedup_cdcx_stg__dbt_tmp" + ); + + + insert into "postgres"._airbyte_test_normalization."pos_dedup_cdcx_stg" ("_airbyte_pos_dedup_cdcx_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_ab_cdc_log_pos", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + ( + select "_airbyte_pos_dedup_cdcx_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_ab_cdc_log_pos", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + from "pos_dedup_cdcx_stg__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql new file mode 100644 index 0000000000000..de66b557fa186 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."renamed_dedup_cdc_excluded" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "renamed_dedup_cdc_excluded__dbt_tmp" + ); + + + insert into "postgres".test_normalization."renamed_dedup_cdc_excluded" ("_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") + ( + select "_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" + from "renamed_dedup_cdc_excluded__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql new file mode 100644 index 0000000000000..6711170dbc9c4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql @@ -0,0 +1,15 @@ + + + delete from "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "renamed_dedup_cdc_excluded_stg__dbt_tmp" + ); + + + insert into "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" ("_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + ( + select "_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + from "renamed_dedup_cdc_excluded_stg__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/types_testing.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/types_testing.sql new file mode 100644 index 0000000000000..f01bbf8941931 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/types_testing.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."types_testing" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "types_testing__dbt_tmp" + ); + + + insert into "postgres".test_normalization."types_testing" ("_airbyte_unique_key", "id", "airbyte_integer_column", "nullable_airbyte_integer_column", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_types_testing_hashid") + ( + select "_airbyte_unique_key", "id", "airbyte_integer_column", "nullable_airbyte_integer_column", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_types_testing_hashid" + from "types_testing__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/types_testing_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/types_testing_stg.sql new file mode 100644 index 0000000000000..1295c519d0d7d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/types_testing_stg.sql @@ -0,0 +1,15 @@ + + + delete from "postgres"._airbyte_test_normalization."types_testing_stg" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "types_testing_stg__dbt_tmp" + ); + + + insert into "postgres"._airbyte_test_normalization."types_testing_stg" ("_airbyte_types_testing_hashid", "id", "airbyte_integer_column", "nullable_airbyte_integer_column", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + ( + select "_airbyte_types_testing_hashid", "id", "airbyte_integer_column", "nullable_airbyte_integer_column", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + from "types_testing_stg__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..2773af0d8fa35 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,107 @@ + + + create table "postgres".test_normalization."exchange_rate__dbt_tmp" + as ( + +with __dbt__cte__exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_exchange_rate +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path_text(_airbyte_data, 'currency') as currency, + jsonb_extract_path_text(_airbyte_data, 'date') as "date", + jsonb_extract_path_text(_airbyte_data, 'timestamp_col') as timestamp_col, + jsonb_extract_path_text(_airbyte_data, 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", + jsonb_extract_path_text(_airbyte_data, 'HKD_special___characters') as hkd_special___characters, + jsonb_extract_path_text(_airbyte_data, 'NZD') as nzd, + jsonb_extract_path_text(_airbyte_data, 'USD') as usd, + jsonb_extract_path_text(_airbyte_data, 'column`_''with"_quotes') as "column`_'with""_quotes", + jsonb_extract_path_text(_airbyte_data, 'datetime_tz') as datetime_tz, + jsonb_extract_path_text(_airbyte_data, 'datetime_no_tz') as datetime_no_tz, + jsonb_extract_path_text(_airbyte_data, 'time_tz') as time_tz, + jsonb_extract_path_text(_airbyte_data, 'time_no_tz') as time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_exchange_rate as table_alias +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__exchange_rate_ab1 +select + cast("id" as + bigint +) as "id", + cast(currency as text) as currency, + cast(nullif("date", '') as + date +) as "date", + cast(nullif(timestamp_col, '') as + timestamp with time zone +) as timestamp_col, + cast("HKD@spéçiäl & characters" as + float +) as "HKD@spéçiäl & characters", + cast(hkd_special___characters as text) as hkd_special___characters, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + cast("column`_'with""_quotes" as text) as "column`_'with""_quotes", + cast(nullif(datetime_tz, '') as + timestamp with time zone +) as datetime_tz, + cast(nullif(datetime_no_tz, '') as + timestamp +) as datetime_no_tz, + cast(nullif(time_tz, '') as + time with time zone +) as time_tz, + cast(nullif(time_no_tz, '') as + time +) as time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__exchange_rate_ab1 +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__exchange_rate_ab2 +select + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast("date" as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') || '-' || coalesce(cast(datetime_tz as text), '') || '-' || coalesce(cast(datetime_no_tz as text), '') || '-' || coalesce(cast(time_tz as text), '') || '-' || coalesce(cast(time_no_tz as text), '') as text)) as _airbyte_exchange_rate_hashid, + tmp.* +from __dbt__cte__exchange_rate_ab2 tmp +-- exchange_rate +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__exchange_rate_ab3 +select + "id", + currency, + "date", + timestamp_col, + "HKD@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, + "column`_'with""_quotes", + datetime_tz, + datetime_no_tz, + time_tz, + time_no_tz, + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from __dbt__cte__exchange_rate_ab3 +-- exchange_rate from "postgres".test_normalization._airbyte_raw_exchange_rate +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql new file mode 100644 index 0000000000000..a1fba0a6d7ff4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."dedup_cdc_excluded_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "dedup_cdc_excluded_scd__dbt_tmp" + ); + + + insert into "postgres".test_normalization."dedup_cdc_excluded_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid" + from "dedup_cdc_excluded_scd__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..521c016411b86 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."dedup_exchange_rate_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "dedup_exchange_rate_scd__dbt_tmp" + ); + + + insert into "postgres".test_normalization."dedup_exchange_rate_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "new_column", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "new_column", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" + from "dedup_exchange_rate_scd__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql new file mode 100644 index 0000000000000..3a30f5175e851 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "renamed_dedup_cdc_excluded_scd__dbt_tmp" + ); + + + insert into "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" + from "renamed_dedup_cdc_excluded_scd__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql new file mode 100644 index 0000000000000..b3012059b462d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."dedup_cdc_excluded" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "dedup_cdc_excluded__dbt_tmp" + ); + + + insert into "postgres".test_normalization."dedup_cdc_excluded" ("_airbyte_unique_key", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid") + ( + select "_airbyte_unique_key", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_cdc_excluded_hashid" + from "dedup_cdc_excluded__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql new file mode 100644 index 0000000000000..d9f833d441bfa --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_cdc_excluded_stg.sql @@ -0,0 +1,15 @@ + + + delete from "postgres"._airbyte_test_normalization."dedup_cdc_excluded_stg" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "dedup_cdc_excluded_stg__dbt_tmp" + ); + + + insert into "postgres"._airbyte_test_normalization."dedup_cdc_excluded_stg" ("_airbyte_dedup_cdc_excluded_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + ( + select "_airbyte_dedup_cdc_excluded_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + from "dedup_cdc_excluded_stg__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..9c85a59293773 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."dedup_exchange_rate" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "dedup_exchange_rate__dbt_tmp" + ); + + + insert into "postgres".test_normalization."dedup_exchange_rate" ("_airbyte_unique_key", "id", "currency", "new_column", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + ( + select "_airbyte_unique_key", "id", "currency", "new_column", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" + from "dedup_exchange_rate__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..1cca439173314 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,15 @@ + + + delete from "postgres"._airbyte_test_normalization."dedup_exchange_rate_stg" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "dedup_exchange_rate_stg__dbt_tmp" + ); + + + insert into "postgres"._airbyte_test_normalization."dedup_exchange_rate_stg" ("_airbyte_dedup_exchange_rate_hashid", "id", "currency", "new_column", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + ( + select "_airbyte_dedup_exchange_rate_hashid", "id", "currency", "new_column", "date", "timestamp_col", "HKD@spéçiäl & characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + from "dedup_exchange_rate_stg__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql new file mode 100644 index 0000000000000..f4ce2e8305828 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -0,0 +1,15 @@ + + + delete from "postgres".test_normalization."renamed_dedup_cdc_excluded" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "renamed_dedup_cdc_excluded__dbt_tmp" + ); + + + insert into "postgres".test_normalization."renamed_dedup_cdc_excluded" ("_airbyte_unique_key", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") + ( + select "_airbyte_unique_key", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" + from "renamed_dedup_cdc_excluded__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql new file mode 100644 index 0000000000000..18d5b4ab827c2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql @@ -0,0 +1,15 @@ + + + delete from "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" + where (_airbyte_ab_id) in ( + select (_airbyte_ab_id) + from "renamed_dedup_cdc_excluded_stg__dbt_tmp" + ); + + + insert into "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" ("_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + ( + select "_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + from "renamed_dedup_cdc_excluded_stg__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..155df4698f2d1 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,89 @@ + + + create table "postgres".test_normalization."exchange_rate__dbt_tmp" + as ( + +with __dbt__cte__exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "postgres".test_normalization._airbyte_raw_exchange_rate +select + jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path_text(_airbyte_data, 'currency') as currency, + jsonb_extract_path_text(_airbyte_data, 'new_column') as new_column, + jsonb_extract_path_text(_airbyte_data, 'date') as "date", + jsonb_extract_path_text(_airbyte_data, 'timestamp_col') as timestamp_col, + jsonb_extract_path_text(_airbyte_data, 'HKD@spéçiäl & characters') as "HKD@spéçiäl & characters", + jsonb_extract_path_text(_airbyte_data, 'NZD') as nzd, + jsonb_extract_path_text(_airbyte_data, 'USD') as usd, + jsonb_extract_path_text(_airbyte_data, 'column`_''with"_quotes') as "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from "postgres".test_normalization._airbyte_raw_exchange_rate as table_alias +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__exchange_rate_ab1 +select + cast("id" as + float +) as "id", + cast(currency as text) as currency, + cast(new_column as + float +) as new_column, + cast(nullif("date", '') as + date +) as "date", + cast(nullif(timestamp_col, '') as + timestamp with time zone +) as timestamp_col, + cast("HKD@spéçiäl & characters" as + float +) as "HKD@spéçiäl & characters", + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + cast("column`_'with""_quotes" as text) as "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at +from __dbt__cte__exchange_rate_ab1 +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__exchange_rate_ab2 +select + md5(cast(coalesce(cast("id" as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(new_column as text), '') || '-' || coalesce(cast("date" as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') as text)) as _airbyte_exchange_rate_hashid, + tmp.* +from __dbt__cte__exchange_rate_ab2 tmp +-- exchange_rate +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__exchange_rate_ab3 +select + "id", + currency, + new_column, + "date", + timestamp_col, + "HKD@spéçiäl & characters", + nzd, + usd, + "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + now() as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from __dbt__cte__exchange_rate_ab3 +-- exchange_rate from "postgres".test_normalization._airbyte_raw_exchange_rate +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/dbt_project.yml new file mode 100755 index 0000000000000..767544968e0b7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/dbt_project.yml @@ -0,0 +1,127 @@ +name: airbyte_utils +version: '1.0' +config-version: 2 +profile: normalize +model-paths: +- models +docs-paths: +- docs +analysis-paths: +- analysis +test-paths: +- tests +seed-paths: +- data +macro-paths: +- macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: +- build +- dbt_modules +quoting: + database: true + schema: false + identifier: true +models: + +transient: false + +pre-hook: SET enable_case_sensitive_identifier to TRUE + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + +on_schema_change: sync_all_columns + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view +dispatch: +- macro_namespace: dbt_utils + search_order: + - airbyte_utils + - dbt_utils +vars: + json_column: _airbyte_data + models_to_source: + nested_stream_with_complex_columns_resulting_into_long_names_ab1: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_ab2: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_stg: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_scd: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + non_nested_stream_without_namespace_resulting_into_long_names_ab1: test_normalization_xjvlg._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + non_nested_stream_without_namespace_resulting_into_long_names_ab2: test_normalization_xjvlg._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + non_nested_stream_without_namespace_resulting_into_long_names_ab3: test_normalization_xjvlg._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + non_nested_stream_without_namespace_resulting_into_long_names: test_normalization_xjvlg._airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + some_stream_that_was_empty_ab1: test_normalization_xjvlg._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_ab2: test_normalization_xjvlg._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_stg: test_normalization_xjvlg._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty_scd: test_normalization_xjvlg._airbyte_raw_some_stream_that_was_empty + some_stream_that_was_empty: test_normalization_xjvlg._airbyte_raw_some_stream_that_was_empty + simple_stream_with_namespace_resulting_into_long_names_ab1: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + simple_stream_with_namespace_resulting_into_long_names_ab2: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + simple_stream_with_namespace_resulting_into_long_names_ab3: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + simple_stream_with_namespace_resulting_into_long_names: test_normalization_namespace._airbyte_raw_simple_stream_with_namespace_resulting_into_long_names + conflict_stream_name_ab1: test_normalization_xjvlg._airbyte_raw_conflict_stream_name + conflict_stream_name_ab2: test_normalization_xjvlg._airbyte_raw_conflict_stream_name + conflict_stream_name_ab3: test_normalization_xjvlg._airbyte_raw_conflict_stream_name + conflict_stream_name: test_normalization_xjvlg._airbyte_raw_conflict_stream_name + conflict_stream_scalar_ab1: test_normalization_xjvlg._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar_ab2: test_normalization_xjvlg._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar_ab3: test_normalization_xjvlg._airbyte_raw_conflict_stream_scalar + conflict_stream_scalar: test_normalization_xjvlg._airbyte_raw_conflict_stream_scalar + conflict_stream_array_ab1: test_normalization_xjvlg._airbyte_raw_conflict_stream_array + conflict_stream_array_ab2: test_normalization_xjvlg._airbyte_raw_conflict_stream_array + conflict_stream_array_ab3: test_normalization_xjvlg._airbyte_raw_conflict_stream_array + conflict_stream_array: test_normalization_xjvlg._airbyte_raw_conflict_stream_array + unnest_alias_ab1: test_normalization_xjvlg._airbyte_raw_unnest_alias + unnest_alias_ab2: test_normalization_xjvlg._airbyte_raw_unnest_alias + unnest_alias_ab3: test_normalization_xjvlg._airbyte_raw_unnest_alias + unnest_alias: test_normalization_xjvlg._airbyte_raw_unnest_alias + arrays_ab1: test_normalization_xjvlg._airbyte_raw_arrays + arrays_ab2: test_normalization_xjvlg._airbyte_raw_arrays + arrays_ab3: test_normalization_xjvlg._airbyte_raw_arrays + arrays: test_normalization_xjvlg._airbyte_raw_arrays + nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + conflict_stream_name_conflict_stream_name_ab1: test_normalization_xjvlg._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_ab2: test_normalization_xjvlg._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_ab3: test_normalization_xjvlg._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name: test_normalization_xjvlg._airbyte_raw_conflict_stream_name + unnest_alias_children_ab1: test_normalization_xjvlg._airbyte_raw_unnest_alias + unnest_alias_children_ab2: test_normalization_xjvlg._airbyte_raw_unnest_alias + unnest_alias_children_ab3: test_normalization_xjvlg._airbyte_raw_unnest_alias + unnest_alias_children: test_normalization_xjvlg._airbyte_raw_unnest_alias + arrays_nested_array_parent_ab1: test_normalization_xjvlg._airbyte_raw_arrays + arrays_nested_array_parent_ab2: test_normalization_xjvlg._airbyte_raw_arrays + arrays_nested_array_parent_ab3: test_normalization_xjvlg._airbyte_raw_arrays + arrays_nested_array_parent: test_normalization_xjvlg._airbyte_raw_arrays + nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab2: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab3: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + nested_stream_with_complex_columns_resulting_into_long_names_partition_data: test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + conflict_stream_name_conflict_stream_name_conflict_stream_name_ab1: test_normalization_xjvlg._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_conflict_stream_name_ab2: test_normalization_xjvlg._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_conflict_stream_name_ab3: test_normalization_xjvlg._airbyte_raw_conflict_stream_name + conflict_stream_name_conflict_stream_name_conflict_stream_name: test_normalization_xjvlg._airbyte_raw_conflict_stream_name + unnest_alias_children_owner_ab1: test_normalization_xjvlg._airbyte_raw_unnest_alias + unnest_alias_children_owner_ab2: test_normalization_xjvlg._airbyte_raw_unnest_alias + unnest_alias_children_owner_ab3: test_normalization_xjvlg._airbyte_raw_unnest_alias + unnest_alias_children_owner: test_normalization_xjvlg._airbyte_raw_unnest_alias + unnest_alias_children_owner_column___with__quotes_ab1: test_normalization_xjvlg._airbyte_raw_unnest_alias + unnest_alias_children_owner_column___with__quotes_ab2: test_normalization_xjvlg._airbyte_raw_unnest_alias + unnest_alias_children_owner_column___with__quotes_ab3: test_normalization_xjvlg._airbyte_raw_unnest_alias + unnest_alias_children_owner_column___with__quotes: test_normalization_xjvlg._airbyte_raw_unnest_alias diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql new file mode 100644 index 0000000000000..9b59d6d77c88c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -0,0 +1,78 @@ + + + + create table + "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_scd" + + + compound sortkey(_airbyte_active_row,_airbyte_unique_key_scd,_airbyte_emitted_at) + + as ( + +-- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') +with + +input_data as ( + select * + from "integrationtests"._airbyte_test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_stg" + -- nested_stream_with_complex_columns_resulting_into_long_names from "integrationtests".test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + md5(cast(coalesce(cast(id as text), '') as text)) as _airbyte_unique_key, + id, + date, + "partition", + date as _airbyte_start_at, + lag(date) over ( + partition by id + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by id + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + date, + "partition", + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at, + _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid +from dedup_data where _airbyte_row_num = 1 + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql new file mode 100644 index 0000000000000..184fa2bf11042 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql @@ -0,0 +1,29 @@ + + + + create table + "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names" + + + compound sortkey(_airbyte_unique_key,_airbyte_emitted_at) + + as ( + +-- Final base SQL model +-- depends_on: "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_scd" +select + _airbyte_unique_key, + id, + date, + "partition", + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at, + _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid +from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_scd" +-- nested_stream_with_complex_columns_resulting_into_long_names from "integrationtests".test_normalization_xjvlg._airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names +where 1 = 1 +and _airbyte_active_row = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql new file mode 100644 index 0000000000000..4e1c7b1f39427 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql @@ -0,0 +1,69 @@ + + + + create table + "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" + + + compound sortkey(_airbyte_emitted_at) + + as ( + +with __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_scd" +select + _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid, + "partition"."double_array_data" as double_array_data, + "partition"."DATA" as data, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_scd" as table_alias +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 +and "partition" is not null + +), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1 +select + _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid, + double_array_data, + data, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1 +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 + +), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2 +select + md5(cast(coalesce(cast(_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid as text), '') || '-' || coalesce(cast(json_serialize(double_array_data) as text), '') || '-' || coalesce(cast(json_serialize(data) as text), '') as text)) as _airbyte_partition_hashid, + tmp.* +from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab2 tmp +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 + +)-- Final base SQL model +-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3 +select + _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid, + double_array_data, + data, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at, + _airbyte_partition_hashid +from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3 +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_scd" +where 1 = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql new file mode 100644 index 0000000000000..e19271e39a6fb --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql @@ -0,0 +1,74 @@ + + + + create table + "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition_data" + + + compound sortkey(_airbyte_emitted_at) + + as ( + +with __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" + + with joined as ( + select + table_alias._airbyte_partition_hashid as _airbyte_hashid, + _airbyte_nested_data + from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" as table_alias, table_alias.data as _airbyte_nested_data + ) +select + _airbyte_partition_hashid, + case when _airbyte_nested_data."currency" != '' then _airbyte_nested_data."currency" end as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" as table_alias +-- data at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +left join joined on _airbyte_partition_hashid = joined._airbyte_hashid +where 1 = 1 +and data is not null + +), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1 +select + _airbyte_partition_hashid, + cast(currency as text) as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1 +-- data at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +where 1 = 1 + +), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab2 +select + md5(cast(coalesce(cast(_airbyte_partition_hashid as text), '') || '-' || coalesce(cast(currency as text), '') as text)) as _airbyte_data_hashid, + tmp.* +from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab2 tmp +-- data at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +where 1 = 1 + +)-- Final base SQL model +-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab3 +select + _airbyte_partition_hashid, + currency, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at, + _airbyte_data_hashid +from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab3 +-- data at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" +where 1 = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql new file mode 100644 index 0000000000000..7e38b76f87fe4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql @@ -0,0 +1,74 @@ + + + + create table + "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data" + + + compound sortkey(_airbyte_emitted_at) + + as ( + +with __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" + + with joined as ( + select + table_alias._airbyte_partition_hashid as _airbyte_hashid, + _airbyte_nested_data + from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" as table_alias, table_alias.double_array_data as _airbyte_nested_data + ) +select + _airbyte_partition_hashid, + case when _airbyte_nested_data."id" != '' then _airbyte_nested_data."id" end as id, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" as table_alias +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +left join joined on _airbyte_partition_hashid = joined._airbyte_hashid +where 1 = 1 +and double_array_data is not null + +), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1 +select + _airbyte_partition_hashid, + cast(id as text) as id, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1 +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +where 1 = 1 + +), __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2 +select + md5(cast(coalesce(cast(_airbyte_partition_hashid as text), '') || '-' || coalesce(cast(id as text), '') as text)) as _airbyte_double_array_data_hashid, + tmp.* +from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab2 tmp +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +where 1 = 1 + +)-- Final base SQL model +-- depends_on: __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3 +select + _airbyte_partition_hashid, + id, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at, + _airbyte_double_array_data_hashid +from __dbt__cte__nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3 +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" +where 1 = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql new file mode 100644 index 0000000000000..ed49a5e916064 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab1.sql @@ -0,0 +1,20 @@ +{{ config( + sort = "_airbyte_emitted_at", + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization_xjvlg", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization_xjvlg', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date, + {{ json_extract('table_alias', '_airbyte_data', ['partition'], ['partition']) }} as {{ adapter.quote('partition') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization_xjvlg', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} as table_alias +-- nested_stream_with_complex_columns_resulting_into_long_names +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql new file mode 100644 index 0000000000000..19ab94bca1518 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_ab2.sql @@ -0,0 +1,20 @@ +{{ config( + sort = "_airbyte_emitted_at", + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization_xjvlg", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_ab1') }} +select + cast(id as {{ dbt_utils.type_string() }}) as id, + cast(date as {{ dbt_utils.type_string() }}) as date, + cast({{ adapter.quote('partition') }} as {{ type_json() }}) as {{ adapter.quote('partition') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_ab1') }} +-- nested_stream_with_complex_columns_resulting_into_long_names +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql new file mode 100644 index 0000000000000..18a21b4729811 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_ab1.sql @@ -0,0 +1,20 @@ +{{ config( + sort = "_airbyte_emitted_at", + schema = "_airbyte_test_normalization_xjvlg", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} +select + _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid, + {{ json_extract_array(adapter.quote('partition'), ['double_array_data'], ['double_array_data']) }} as double_array_data, + {{ json_extract_array(adapter.quote('partition'), ['DATA'], ['DATA']) }} as data, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} as table_alias +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 +and {{ adapter.quote('partition') }} is not null +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1.sql new file mode 100644 index 0000000000000..4cc3285a5f6e2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab1.sql @@ -0,0 +1,21 @@ +{{ config( + sort = "_airbyte_emitted_at", + schema = "_airbyte_test_normalization_xjvlg", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} +{{ unnest_cte(ref('nested_stream_with_complex_columns_resulting_into_long_names_partition'), 'partition', 'data') }} +select + _airbyte_partition_hashid, + {{ json_extract_scalar(unnested_column_value('data'), ['currency'], ['currency']) }} as currency, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} as table_alias +-- data at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +{{ cross_join_unnest('partition', 'data') }} +where 1 = 1 +and data is not null +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql new file mode 100644 index 0000000000000..4876b27d7cc0f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_ctes/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab1.sql @@ -0,0 +1,21 @@ +{{ config( + sort = "_airbyte_emitted_at", + schema = "_airbyte_test_normalization_xjvlg", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} +{{ unnest_cte(ref('nested_stream_with_complex_columns_resulting_into_long_names_partition'), 'partition', 'double_array_data') }} +select + _airbyte_partition_hashid, + {{ json_extract_scalar(unnested_column_value('double_array_data'), ['id'], ['id']) }} as id, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} as table_alias +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +{{ cross_join_unnest('partition', 'double_array_data') }} +where 1 = 1 +and double_array_data is not null +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql new file mode 100644 index 0000000000000..a629e4de4e5d6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -0,0 +1,163 @@ +{{ config( + sort = ["_airbyte_active_row", "_airbyte_unique_key_scd", "_airbyte_emitted_at"], + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization_xjvlg", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='nested_stream_with_complex_columns_resulting_into_long_names' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('nested_stream_with_complex_columns_resulting_into_long_names')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view _airbyte_test_normalization_xjvlg.nested_stream_with_complex_columns_resulting_into_long_names_stg"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') }} + -- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization_xjvlg', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('nested_stream_with_complex_columns_resulting_into_long_names_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_stg') }} + -- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization_xjvlg', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + 'id', + ]) }} as _airbyte_unique_key, + id, + date, + {{ adapter.quote('partition') }}, + date as _airbyte_start_at, + lag(date) over ( + partition by id + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by id + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + date, + {{ adapter.quote('partition') }}, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql new file mode 100644 index 0000000000000..f95f159eedc9f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql @@ -0,0 +1,23 @@ +{{ config( + sort = ["_airbyte_unique_key", "_airbyte_emitted_at"], + unique_key = "_airbyte_unique_key", + schema = "test_normalization_xjvlg", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} +select + _airbyte_unique_key, + id, + date, + {{ adapter.quote('partition') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid +from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} +-- nested_stream_with_complex_columns_resulting_into_long_names from {{ source('test_normalization_xjvlg', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql new file mode 100644 index 0000000000000..18a73cf63b7f7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql @@ -0,0 +1,20 @@ +{{ config( + sort = "_airbyte_emitted_at", + schema = "test_normalization_xjvlg", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3') }} +select + _airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid, + double_array_data, + data, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_partition_hashid +from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_ab3') }} +-- partition at nested_stream_with_complex_columns_resulting_into_long_names/partition from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_scd') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql new file mode 100644 index 0000000000000..ad3d8a9a61b53 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql @@ -0,0 +1,19 @@ +{{ config( + sort = "_airbyte_emitted_at", + schema = "test_normalization_xjvlg", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab3') }} +select + _airbyte_partition_hashid, + currency, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_data_hashid +from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_data_ab3') }} +-- data at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql new file mode 100644 index 0000000000000..2059cb60a01ae --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql @@ -0,0 +1,19 @@ +{{ config( + sort = "_airbyte_emitted_at", + schema = "test_normalization_xjvlg", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3') }} +select + _airbyte_partition_hashid, + id, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_double_array_data_hashid +from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data_ab3') }} +-- double_array_data at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/sources.yml new file mode 100644 index 0000000000000..56faa01c65dc7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/sources.yml @@ -0,0 +1,23 @@ +version: 2 +sources: +- name: test_normalization_namespace + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_simple_stream_with_namespace_resulting_into_long_names +- name: test_normalization_xjvlg + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_arrays + - name: _airbyte_raw_conflict_stream_array + - name: _airbyte_raw_conflict_stream_name + - name: _airbyte_raw_conflict_stream_scalar + - name: _airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names + - name: _airbyte_raw_non_nested_stream_without_namespace_resulting_into_long_names + - name: _airbyte_raw_some_stream_that_was_empty + - name: _airbyte_raw_unnest_alias diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql new file mode 100644 index 0000000000000..45c63e057a5ed --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -0,0 +1,15 @@ + + + delete from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "nested_stream_with_complex_columns_resulti__dbt_tmp" + ); + + + insert into "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "partition", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "date", "partition", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid" + from "nested_stream_with_complex_columns_resulti__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql new file mode 100644 index 0000000000000..e32bb140a0990 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names.sql @@ -0,0 +1,15 @@ + + + delete from "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "nested_stream_with_complex_columns_resulti__dbt_tmp" + ); + + + insert into "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names" ("_airbyte_unique_key", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid") + ( + select "_airbyte_unique_key", "id", "date", "partition", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid" + from "nested_stream_with_complex_columns_resulti__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql new file mode 100644 index 0000000000000..9944a91ca6425 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition.sql @@ -0,0 +1,9 @@ + + + + insert into "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition" ("_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid", "double_array_data", "data", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_partition_hashid") + ( + select "_airbyte_nested_stream_with_complex_columns_resulting_into_long_names_hashid", "double_array_data", "data", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_partition_hashid" + from "nested_stream_with_complex_columns_resulti__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql new file mode 100644 index 0000000000000..52b4bd4fc5f41 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_data.sql @@ -0,0 +1,9 @@ + + + + insert into "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition_data" ("_airbyte_partition_hashid", "currency", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_data_hashid") + ( + select "_airbyte_partition_hashid", "currency", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_data_hashid" + from "nested_stream_with_complex_columns_resulti__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql new file mode 100644 index 0000000000000..91aaa5e85cc0a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/second_output/airbyte_incremental/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data.sql @@ -0,0 +1,9 @@ + + + + insert into "integrationtests".test_normalization_xjvlg."nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data" ("_airbyte_partition_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_double_array_data_hashid") + ( + select "_airbyte_partition_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_double_array_data_hashid" + from "nested_stream_with_complex_columns_resulti__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/dbt_project.yml new file mode 100755 index 0000000000000..c645baf3c5fe8 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/dbt_project.yml @@ -0,0 +1,72 @@ +name: airbyte_utils +version: '1.0' +config-version: 2 +profile: normalize +model-paths: +- modified_models +docs-paths: +- docs +analysis-paths: +- analysis +test-paths: +- tests +seed-paths: +- data +macro-paths: +- macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: +- build +- dbt_modules +quoting: + database: true + schema: false + identifier: true +models: + +transient: false + +pre-hook: SET enable_case_sensitive_identifier to TRUE + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + +on_schema_change: sync_all_columns + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view +dispatch: +- macro_namespace: dbt_utils + search_order: + - airbyte_utils + - dbt_utils +vars: + json_column: _airbyte_data + models_to_source: + exchange_rate_ab1: test_normalization_bhhpj._airbyte_raw_exchange_rate + exchange_rate_ab2: test_normalization_bhhpj._airbyte_raw_exchange_rate + exchange_rate_ab3: test_normalization_bhhpj._airbyte_raw_exchange_rate + exchange_rate: test_normalization_bhhpj._airbyte_raw_exchange_rate + dedup_exchange_rate_ab1: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_ab2: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_stg: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate + renamed_dedup_cdc_excluded_ab1: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_ab2: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_stg: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded + dedup_cdc_excluded_ab1: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_ab2: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_stg: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_dbt_project.yml new file mode 100644 index 0000000000000..70d0b5b4fa3b6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_dbt_project.yml @@ -0,0 +1,92 @@ +name: airbyte_utils +version: '1.0' +config-version: 2 +profile: normalize +model-paths: +- models +docs-paths: +- docs +analysis-paths: +- analysis +test-paths: +- tests +seed-paths: +- data +macro-paths: +- macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: +- build +- dbt_modules +quoting: + database: true + schema: false + identifier: true +models: + +transient: false + +pre-hook: SET enable_case_sensitive_identifier to TRUE + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + +on_schema_change: sync_all_columns + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view +dispatch: +- macro_namespace: dbt_utils + search_order: + - airbyte_utils + - dbt_utils +vars: + json_column: _airbyte_data + models_to_source: + exchange_rate_ab1: test_normalization_bhhpj._airbyte_raw_exchange_rate + exchange_rate_ab2: test_normalization_bhhpj._airbyte_raw_exchange_rate + exchange_rate_ab3: test_normalization_bhhpj._airbyte_raw_exchange_rate + exchange_rate: test_normalization_bhhpj._airbyte_raw_exchange_rate + dedup_exchange_rate_ab1: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_ab2: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_stg: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate_scd: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate + dedup_exchange_rate: test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate + renamed_dedup_cdc_excluded_ab1: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_ab2: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_stg: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded_scd: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded + renamed_dedup_cdc_excluded: test_normalization_bhhpj._airbyte_raw_renamed_dedup_cdc_excluded + dedup_cdc_excluded_ab1: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_ab2: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_stg: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded_scd: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded + dedup_cdc_excluded: test_normalization_bhhpj._airbyte_raw_dedup_cdc_excluded + pos_dedup_cdcx_ab1: test_normalization_bhhpj._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_ab2: test_normalization_bhhpj._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_stg: test_normalization_bhhpj._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx_scd: test_normalization_bhhpj._airbyte_raw_pos_dedup_cdcx + pos_dedup_cdcx: test_normalization_bhhpj._airbyte_raw_pos_dedup_cdcx + 1_prefix_startwith_number_ab1: test_normalization_bhhpj._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_ab2: test_normalization_bhhpj._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_stg: test_normalization_bhhpj._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number_scd: test_normalization_bhhpj._airbyte_raw_1_prefix_startwith_number + 1_prefix_startwith_number: test_normalization_bhhpj._airbyte_raw_1_prefix_startwith_number + multiple_column_names_conflicts_ab1: test_normalization_bhhpj._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_ab2: test_normalization_bhhpj._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_stg: test_normalization_bhhpj._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts_scd: test_normalization_bhhpj._airbyte_raw_multiple_column_names_conflicts + multiple_column_names_conflicts: test_normalization_bhhpj._airbyte_raw_multiple_column_names_conflicts + types_testing_ab1: test_normalization_bhhpj._airbyte_raw_types_testing + types_testing_ab2: test_normalization_bhhpj._airbyte_raw_types_testing + types_testing_stg: test_normalization_bhhpj._airbyte_raw_types_testing + types_testing_scd: test_normalization_bhhpj._airbyte_raw_types_testing + types_testing: test_normalization_bhhpj._airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..3c1032d3297f2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,88 @@ + + + + create table + "integrationtests".test_normalization_bhhpj."dedup_exchange_rate_scd" + + + compound sortkey(_airbyte_active_row,_airbyte_unique_key_scd,_airbyte_emitted_at) + + as ( + +-- depends_on: ref('dedup_exchange_rate_stg') +with + +input_data as ( + select * + from "integrationtests"._airbyte_test_normalization_bhhpj."dedup_exchange_rate_stg" + -- dedup_exchange_rate from "integrationtests".test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(nzd as text), '') as text)) as _airbyte_unique_key, + id, + currency, + date, + timestamp_col, + "hkd@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, + date as _airbyte_start_at, + lag(date) over ( + partition by id, currency, cast(nzd as text) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by id, currency, cast(nzd as text) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_exchange_rate_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + md5(cast(coalesce(cast(_airbyte_unique_key as text), '') || '-' || coalesce(cast(_airbyte_start_at as text), '') || '-' || coalesce(cast(_airbyte_emitted_at as text), '') as text)) as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + currency, + date, + timestamp_col, + "hkd@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from dedup_data where _airbyte_row_num = 1 + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..b6903fe4ceb0d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,34 @@ + + + + create table + "integrationtests".test_normalization_bhhpj."dedup_exchange_rate" + + + compound sortkey(_airbyte_unique_key,_airbyte_emitted_at) + + as ( + +-- Final base SQL model +-- depends_on: "integrationtests".test_normalization_bhhpj."dedup_exchange_rate_scd" +select + _airbyte_unique_key, + id, + currency, + date, + timestamp_col, + "hkd@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from "integrationtests".test_normalization_bhhpj."dedup_exchange_rate_scd" +-- dedup_exchange_rate from "integrationtests".test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate +where 1 = 1 +and _airbyte_active_row = 1 + + ); + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..e2bd3830cb423 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,92 @@ + + + create table + "integrationtests".test_normalization_bhhpj."exchange_rate__dbt_tmp" + + + compound sortkey(_airbyte_emitted_at) + + as ( + +with __dbt__cte__exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate +select + case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, + case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, + case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, + case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, + case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", + case when _airbyte_data."HKD_special___characters" != '' then _airbyte_data."HKD_special___characters" end as hkd_special___characters, + case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, + case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, + case when _airbyte_data."column`_'with""_quotes" != '' then _airbyte_data."column`_'with""_quotes" end as "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate as table_alias +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__exchange_rate_ab1 +select + cast(id as + bigint +) as id, + cast(currency as text) as currency, + cast(nullif(date::varchar, '') as + date +) as date, + cast(nullif(timestamp_col::varchar, '') as + timestamp with time zone +) as timestamp_col, + cast("hkd@spéçiäl & characters" as + float +) as "hkd@spéçiäl & characters", + cast(hkd_special___characters as text) as hkd_special___characters, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + cast("column`_'with""_quotes" as text) as "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from __dbt__cte__exchange_rate_ab1 +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__exchange_rate_ab2 +select + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') as text)) as _airbyte_exchange_rate_hashid, + tmp.* +from __dbt__cte__exchange_rate_ab2 tmp +-- exchange_rate +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__exchange_rate_ab3 +select + id, + currency, + date, + timestamp_col, + "hkd@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, + "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from __dbt__cte__exchange_rate_ab3 +-- exchange_rate from "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..903a3141f6256 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,66 @@ + + + create view "integrationtests"._airbyte_test_normalization_bhhpj."dedup_exchange_rate_stg__dbt_tmp" as ( + +with __dbt__cte__dedup_exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "integrationtests".test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate +select + case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, + case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, + case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, + case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, + case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", + case when _airbyte_data."HKD_special___characters" != '' then _airbyte_data."HKD_special___characters" end as hkd_special___characters, + case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, + case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from "integrationtests".test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate as table_alias +-- dedup_exchange_rate +where 1 = 1 + +), __dbt__cte__dedup_exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__dedup_exchange_rate_ab1 +select + cast(id as + bigint +) as id, + cast(currency as text) as currency, + cast(nullif(date::varchar, '') as + date +) as date, + cast(nullif(timestamp_col::varchar, '') as + timestamp with time zone +) as timestamp_col, + cast("hkd@spéçiäl & characters" as + float +) as "hkd@spéçiäl & characters", + cast(hkd_special___characters as text) as hkd_special___characters, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from __dbt__cte__dedup_exchange_rate_ab1 +-- dedup_exchange_rate +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__dedup_exchange_rate_ab2 +select + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') as text)) as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from __dbt__cte__dedup_exchange_rate_ab2 tmp +-- dedup_exchange_rate +where 1 = 1 + + ) ; diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql new file mode 100644 index 0000000000000..b496abf0c5ecd --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_views/test_normalization/multiple_column_names_conflicts_stg.sql @@ -0,0 +1,62 @@ + + + create view "integrationtests"._airbyte_test_normalization_bhhpj."multiple_column_names_conflicts_stg__dbt_tmp" as ( + +with __dbt__cte__multiple_column_names_conflicts_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "integrationtests".test_normalization_bhhpj._airbyte_raw_multiple_column_names_conflicts +select + case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, + case when _airbyte_data."User Id" != '' then _airbyte_data."User Id" end as "user id", + case when _airbyte_data."user_id" != '' then _airbyte_data."user_id" end as user_id, + case when _airbyte_data."User id" != '' then _airbyte_data."User id" end as "user id_1", + case when _airbyte_data."user id" != '' then _airbyte_data."user id" end as "user id_2", + case when _airbyte_data."User@Id" != '' then _airbyte_data."User@Id" end as "user@id", + case when _airbyte_data."UserId" != '' then _airbyte_data."UserId" end as userid, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from "integrationtests".test_normalization_bhhpj._airbyte_raw_multiple_column_names_conflicts as table_alias +-- multiple_column_names_conflicts +where 1 = 1 + +), __dbt__cte__multiple_column_names_conflicts_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab1 +select + cast(id as + bigint +) as id, + cast("user id" as text) as "user id", + cast(user_id as + float +) as user_id, + cast("user id_1" as + float +) as "user id_1", + cast("user id_2" as + float +) as "user id_2", + cast("user@id" as text) as "user@id", + cast(userid as + float +) as userid, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from __dbt__cte__multiple_column_names_conflicts_ab1 +-- multiple_column_names_conflicts +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab2 +select + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast("user id" as text), '') || '-' || coalesce(cast(user_id as text), '') || '-' || coalesce(cast("user id_1" as text), '') || '-' || coalesce(cast("user id_2" as text), '') || '-' || coalesce(cast("user@id" as text), '') || '-' || coalesce(cast(userid as text), '') as text)) as _airbyte_multiple_column_names_conflicts_hashid, + tmp.* +from __dbt__cte__multiple_column_names_conflicts_ab2 tmp +-- multiple_column_names_conflicts +where 1 = 1 + + ) ; diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql new file mode 100644 index 0000000000000..b8200f8bf6791 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -0,0 +1,25 @@ +{{ config( + sort = "_airbyte_emitted_at", + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization_bhhpj", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date, + {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, + {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ adapter.quote('hkd@spéçiäl & characters') }}, + {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as hkd_special___characters, + {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as nzd, + {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} as table_alias +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql new file mode 100644 index 0000000000000..420c7c9869752 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -0,0 +1,25 @@ +{{ config( + sort = "_airbyte_emitted_at", + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization_bhhpj", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('dedup_exchange_rate_ab1') }} +select + cast(id as {{ dbt_utils.type_bigint() }}) as id, + cast(currency as {{ dbt_utils.type_string() }}) as currency, + cast({{ empty_string_to_null('date') }} as {{ type_date() }}) as date, + cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, + cast({{ adapter.quote('hkd@spéçiäl & characters') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('hkd@spéçiäl & characters') }}, + cast(hkd_special___characters as {{ dbt_utils.type_string() }}) as hkd_special___characters, + cast(nzd as {{ dbt_utils.type_float() }}) as nzd, + cast(usd as {{ dbt_utils.type_float() }}) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('dedup_exchange_rate_ab1') }} +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..b716e29bdf6ef --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,177 @@ +{{ config( + sort = ["_airbyte_active_row", "_airbyte_unique_key_scd", "_airbyte_emitted_at"], + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization_bhhpj", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view _airbyte_test_normalization_bhhpj.dedup_exchange_rate_stg"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('dedup_exchange_rate_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key, + id, + currency, + date, + timestamp_col, + {{ adapter.quote('hkd@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, + date as _airbyte_start_at, + lag(date) over ( + partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_exchange_rate_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + currency, + date, + timestamp_col, + {{ adapter.quote('hkd@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..8f8fd8c8e9bc7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,28 @@ +{{ config( + sort = ["_airbyte_unique_key", "_airbyte_emitted_at"], + unique_key = "_airbyte_unique_key", + schema = "test_normalization_bhhpj", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('dedup_exchange_rate_scd') }} +select + _airbyte_unique_key, + id, + currency, + date, + timestamp_col, + {{ adapter.quote('hkd@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from {{ ref('dedup_exchange_rate_scd') }} +-- dedup_exchange_rate from {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..a66a0b168c2e4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,26 @@ +{{ config( + sort = "_airbyte_emitted_at", + unique_key = '_airbyte_ab_id', + schema = "test_normalization_bhhpj", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('exchange_rate_ab3') }} +select + id, + currency, + date, + timestamp_col, + {{ adapter.quote('hkd@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, + {{ adapter.quote('column`_\'with""_quotes') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from {{ ref('exchange_rate_ab3') }} +-- exchange_rate from {{ source('test_normalization_bhhpj', '_airbyte_raw_exchange_rate') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..db45cc80a67aa --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,25 @@ +{{ config( + sort = "_airbyte_emitted_at", + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization_bhhpj", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('dedup_exchange_rate_ab2') }} +select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'date', + 'timestamp_col', + adapter.quote('hkd@spéçiäl & characters'), + 'hkd_special___characters', + 'nzd', + 'usd', + ]) }} as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from {{ ref('dedup_exchange_rate_ab2') }} tmp +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/sources.yml new file mode 100644 index 0000000000000..6aa768851a80c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/sources.yml @@ -0,0 +1,16 @@ +version: 2 +sources: +- name: test_normalization_bhhpj + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_1_prefix_startwith_number + - name: _airbyte_raw_dedup_cdc_excluded + - name: _airbyte_raw_dedup_exchange_rate + - name: _airbyte_raw_exchange_rate + - name: _airbyte_raw_multiple_column_names_conflicts + - name: _airbyte_raw_pos_dedup_cdcx + - name: _airbyte_raw_renamed_dedup_cdc_excluded + - name: _airbyte_raw_types_testing diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql new file mode 100644 index 0000000000000..cfb1d029d88ff --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab1.sql @@ -0,0 +1,25 @@ +{{ config( + sort = "_airbyte_emitted_at", + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization_bhhpj", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as currency, + {{ json_extract_scalar('_airbyte_data', ['new_column'], ['new_column']) }} as new_column, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as date, + {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as timestamp_col, + {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ adapter.quote('hkd@spéçiäl & characters') }}, + {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as nzd, + {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} as table_alias +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql new file mode 100644 index 0000000000000..2a9275c69a1ec --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_ctes/test_normalization/dedup_exchange_rate_ab2.sql @@ -0,0 +1,25 @@ +{{ config( + sort = "_airbyte_emitted_at", + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization_bhhpj", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('dedup_exchange_rate_ab1') }} +select + cast(id as {{ dbt_utils.type_float() }}) as id, + cast(currency as {{ dbt_utils.type_string() }}) as currency, + cast(new_column as {{ dbt_utils.type_float() }}) as new_column, + cast({{ empty_string_to_null('date') }} as {{ type_date() }}) as date, + cast({{ empty_string_to_null('timestamp_col') }} as {{ type_timestamp_with_timezone() }}) as timestamp_col, + cast({{ adapter.quote('hkd@spéçiäl & characters') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('hkd@spéçiäl & characters') }}, + cast(nzd as {{ dbt_utils.type_float() }}) as nzd, + cast(usd as {{ dbt_utils.type_bigint() }}) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at +from {{ ref('dedup_exchange_rate_ab1') }} +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..9f8c382ff834b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,177 @@ +{{ config( + sort = ["_airbyte_active_row", "_airbyte_unique_key_scd", "_airbyte_emitted_at"], + unique_key = "_airbyte_unique_key_scd", + schema = "test_normalization_bhhpj", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='dedup_exchange_rate' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_airbyte_unique_key' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} where {{ final_table_relation }}._airbyte_unique_key in ( + select recent_records.unique_key + from ( + select distinct _airbyte_unique_key as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + ) recent_records + left join ( + select _airbyte_unique_key as unique_key, count(_airbyte_unique_key) as active_count + from {{ this }} + where _airbyte_active_row = 1 {{ incremental_clause('_airbyte_normalized_at', this.schema + '.' + adapter.quote('dedup_exchange_rate')) }} + group by _airbyte_unique_key + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view _airbyte_test_normalization_bhhpj.dedup_exchange_rate_stg"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('dedup_exchange_rate_stg') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} + where 1 = 1 + {{ incremental_clause('_airbyte_emitted_at', this) }} +), +new_data_ids as ( + -- build a subset of _airbyte_unique_key from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('dedup_exchange_rate_stg'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._airbyte_unique_key = new_data_ids._airbyte_unique_key + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._airbyte_ab_id = inc_data._airbyte_ab_id + where _airbyte_active_row = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from new_data + union all + select {{ dbt_utils.star(ref('dedup_exchange_rate_stg')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('dedup_exchange_rate_stg') }} + -- dedup_exchange_rate from {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'nzd', + ]) }} as _airbyte_unique_key, + id, + currency, + new_column, + date, + timestamp_col, + {{ adapter.quote('hkd@spéçiäl & characters') }}, + nzd, + usd, + date as _airbyte_start_at, + lag(date) over ( + partition by cast(id as {{ dbt_utils.type_string() }}), currency, cast(nzd as {{ dbt_utils.type_string() }}) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ) as _airbyte_end_at, + case when row_number() over ( + partition by cast(id as {{ dbt_utils.type_string() }}), currency, cast(nzd as {{ dbt_utils.type_string() }}) + order by + date is null asc, + date desc, + _airbyte_emitted_at desc + ) = 1 then 1 else 0 end as _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + _airbyte_dedup_exchange_rate_hashid + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at + order by _airbyte_active_row desc, _airbyte_ab_id + ) as _airbyte_row_num, + {{ dbt_utils.surrogate_key([ + '_airbyte_unique_key', + '_airbyte_start_at', + '_airbyte_emitted_at' + ]) }} as _airbyte_unique_key_scd, + scd_data.* + from scd_data +) +select + _airbyte_unique_key, + _airbyte_unique_key_scd, + id, + currency, + new_column, + date, + timestamp_col, + {{ adapter.quote('hkd@spéçiäl & characters') }}, + nzd, + usd, + _airbyte_start_at, + _airbyte_end_at, + _airbyte_active_row, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from dedup_data where _airbyte_row_num = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..c5fed3b30237f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,28 @@ +{{ config( + sort = ["_airbyte_unique_key", "_airbyte_emitted_at"], + unique_key = "_airbyte_unique_key", + schema = "test_normalization_bhhpj", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('dedup_exchange_rate_scd') }} +select + _airbyte_unique_key, + id, + currency, + new_column, + date, + timestamp_col, + {{ adapter.quote('hkd@spéçiäl & characters') }}, + nzd, + usd, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_dedup_exchange_rate_hashid +from {{ ref('dedup_exchange_rate_scd') }} +-- dedup_exchange_rate from {{ source('test_normalization_bhhpj', '_airbyte_raw_dedup_exchange_rate') }} +where 1 = 1 +and _airbyte_active_row = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..9a7a498cc3754 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,26 @@ +{{ config( + sort = "_airbyte_emitted_at", + unique_key = '_airbyte_ab_id', + schema = "test_normalization_bhhpj", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('exchange_rate_ab3') }} +select + id, + currency, + new_column, + date, + timestamp_col, + {{ adapter.quote('hkd@spéçiäl & characters') }}, + nzd, + usd, + {{ adapter.quote('column`_\'with""_quotes') }}, + _airbyte_ab_id, + _airbyte_emitted_at, + {{ current_timestamp() }} as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from {{ ref('exchange_rate_ab3') }} +-- exchange_rate from {{ source('test_normalization_bhhpj', '_airbyte_raw_exchange_rate') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..9d10a9ea94901 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,25 @@ +{{ config( + sort = "_airbyte_emitted_at", + unique_key = '_airbyte_ab_id', + schema = "_airbyte_test_normalization_bhhpj", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('dedup_exchange_rate_ab2') }} +select + {{ dbt_utils.surrogate_key([ + 'id', + 'currency', + 'new_column', + 'date', + 'timestamp_col', + adapter.quote('hkd@spéçiäl & characters'), + 'nzd', + 'usd', + ]) }} as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from {{ ref('dedup_exchange_rate_ab2') }} tmp +-- dedup_exchange_rate +where 1 = 1 +{{ incremental_clause('_airbyte_emitted_at', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/sources.yml new file mode 100644 index 0000000000000..4daf898b3002b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/sources.yml @@ -0,0 +1,12 @@ +version: 2 +sources: +- name: test_normalization_bhhpj + quoting: + database: true + schema: false + identifier: false + tables: + - name: _airbyte_raw_dedup_cdc_excluded + - name: _airbyte_raw_dedup_exchange_rate + - name: _airbyte_raw_exchange_rate + - name: _airbyte_raw_renamed_dedup_cdc_excluded diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..de775a2e5c164 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,15 @@ + + + delete from "integrationtests".test_normalization_bhhpj."dedup_exchange_rate_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "dedup_exchange_rate_scd__dbt_tmp" + ); + + + insert into "integrationtests".test_normalization_bhhpj."dedup_exchange_rate_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "hkd@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "date", "timestamp_col", "hkd@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" + from "dedup_exchange_rate_scd__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..372889fb42bda --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,15 @@ + + + delete from "integrationtests".test_normalization_bhhpj."dedup_exchange_rate" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "dedup_exchange_rate__dbt_tmp" + ); + + + insert into "integrationtests".test_normalization_bhhpj."dedup_exchange_rate" ("_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "hkd@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + ( + select "_airbyte_unique_key", "id", "currency", "date", "timestamp_col", "hkd@spéçiäl & characters", "hkd_special___characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" + from "dedup_exchange_rate__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..e2bd3830cb423 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,92 @@ + + + create table + "integrationtests".test_normalization_bhhpj."exchange_rate__dbt_tmp" + + + compound sortkey(_airbyte_emitted_at) + + as ( + +with __dbt__cte__exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate +select + case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, + case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, + case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, + case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, + case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", + case when _airbyte_data."HKD_special___characters" != '' then _airbyte_data."HKD_special___characters" end as hkd_special___characters, + case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, + case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, + case when _airbyte_data."column`_'with""_quotes" != '' then _airbyte_data."column`_'with""_quotes" end as "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate as table_alias +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__exchange_rate_ab1 +select + cast(id as + bigint +) as id, + cast(currency as text) as currency, + cast(nullif(date::varchar, '') as + date +) as date, + cast(nullif(timestamp_col::varchar, '') as + timestamp with time zone +) as timestamp_col, + cast("hkd@spéçiäl & characters" as + float +) as "hkd@spéçiäl & characters", + cast(hkd_special___characters as text) as hkd_special___characters, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + cast("column`_'with""_quotes" as text) as "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from __dbt__cte__exchange_rate_ab1 +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__exchange_rate_ab2 +select + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') as text)) as _airbyte_exchange_rate_hashid, + tmp.* +from __dbt__cte__exchange_rate_ab2 tmp +-- exchange_rate +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__exchange_rate_ab3 +select + id, + currency, + date, + timestamp_col, + "hkd@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, + "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from __dbt__cte__exchange_rate_ab3 +-- exchange_rate from "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..903a3141f6256 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/second_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,66 @@ + + + create view "integrationtests"._airbyte_test_normalization_bhhpj."dedup_exchange_rate_stg__dbt_tmp" as ( + +with __dbt__cte__dedup_exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "integrationtests".test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate +select + case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, + case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, + case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, + case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, + case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", + case when _airbyte_data."HKD_special___characters" != '' then _airbyte_data."HKD_special___characters" end as hkd_special___characters, + case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, + case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from "integrationtests".test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate as table_alias +-- dedup_exchange_rate +where 1 = 1 + +), __dbt__cte__dedup_exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__dedup_exchange_rate_ab1 +select + cast(id as + bigint +) as id, + cast(currency as text) as currency, + cast(nullif(date::varchar, '') as + date +) as date, + cast(nullif(timestamp_col::varchar, '') as + timestamp with time zone +) as timestamp_col, + cast("hkd@spéçiäl & characters" as + float +) as "hkd@spéçiäl & characters", + cast(hkd_special___characters as text) as hkd_special___characters, + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from __dbt__cte__dedup_exchange_rate_ab1 +-- dedup_exchange_rate +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__dedup_exchange_rate_ab2 +select + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(hkd_special___characters as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') as text)) as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from __dbt__cte__dedup_exchange_rate_ab2 tmp +-- dedup_exchange_rate +where 1 = 1 + + ) ; diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql new file mode 100644 index 0000000000000..a193db25eb236 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -0,0 +1,15 @@ + + + delete from "integrationtests".test_normalization_bhhpj."dedup_exchange_rate_scd" + where (_airbyte_unique_key_scd) in ( + select (_airbyte_unique_key_scd) + from "dedup_exchange_rate_scd__dbt_tmp" + ); + + + insert into "integrationtests".test_normalization_bhhpj."dedup_exchange_rate_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "new_column", "date", "timestamp_col", "hkd@spéçiäl & characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + ( + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "currency", "new_column", "date", "timestamp_col", "hkd@spéçiäl & characters", "nzd", "usd", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" + from "dedup_exchange_rate_scd__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql new file mode 100644 index 0000000000000..6afa610cc7215 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql @@ -0,0 +1,15 @@ + + + delete from "integrationtests".test_normalization_bhhpj."dedup_exchange_rate" + where (_airbyte_unique_key) in ( + select (_airbyte_unique_key) + from "dedup_exchange_rate__dbt_tmp" + ); + + + insert into "integrationtests".test_normalization_bhhpj."dedup_exchange_rate" ("_airbyte_unique_key", "id", "currency", "new_column", "date", "timestamp_col", "hkd@spéçiäl & characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid") + ( + select "_airbyte_unique_key", "id", "currency", "new_column", "date", "timestamp_col", "hkd@spéçiäl & characters", "nzd", "usd", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_dedup_exchange_rate_hashid" + from "dedup_exchange_rate__dbt_tmp" + ) + \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql new file mode 100644 index 0000000000000..031baa2a7efbe --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_tables/test_normalization/exchange_rate.sql @@ -0,0 +1,94 @@ + + + create table + "integrationtests".test_normalization_bhhpj."exchange_rate__dbt_tmp" + + + compound sortkey(_airbyte_emitted_at) + + as ( + +with __dbt__cte__exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate +select + case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, + case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, + case when _airbyte_data."new_column" != '' then _airbyte_data."new_column" end as new_column, + case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, + case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, + case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", + case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, + case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, + case when _airbyte_data."column`_'with""_quotes" != '' then _airbyte_data."column`_'with""_quotes" end as "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate as table_alias +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__exchange_rate_ab1 +select + cast(id as + float +) as id, + cast(currency as text) as currency, + cast(new_column as + float +) as new_column, + cast(nullif(date::varchar, '') as + date +) as date, + cast(nullif(timestamp_col::varchar, '') as + timestamp with time zone +) as timestamp_col, + cast("hkd@spéçiäl & characters" as + float +) as "hkd@spéçiäl & characters", + cast(nzd as + float +) as nzd, + cast(usd as + float +) as usd, + cast("column`_'with""_quotes" as text) as "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from __dbt__cte__exchange_rate_ab1 +-- exchange_rate +where 1 = 1 +), __dbt__cte__exchange_rate_ab3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__exchange_rate_ab2 +select + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(new_column as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') || '-' || coalesce(cast("column`_'with""_quotes" as text), '') as text)) as _airbyte_exchange_rate_hashid, + tmp.* +from __dbt__cte__exchange_rate_ab2 tmp +-- exchange_rate +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__exchange_rate_ab3 +select + id, + currency, + new_column, + date, + timestamp_col, + "hkd@spéçiäl & characters", + nzd, + usd, + "column`_'with""_quotes", + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at, + _airbyte_exchange_rate_hashid +from __dbt__cte__exchange_rate_ab3 +-- exchange_rate from "integrationtests".test_normalization_bhhpj._airbyte_raw_exchange_rate +where 1 = 1 + ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql new file mode 100644 index 0000000000000..8c9d36dd07d19 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/third_output/airbyte_views/test_normalization/dedup_exchange_rate_stg.sql @@ -0,0 +1,68 @@ + + + create view "integrationtests"._airbyte_test_normalization_bhhpj."dedup_exchange_rate_stg__dbt_tmp" as ( + +with __dbt__cte__dedup_exchange_rate_ab1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "integrationtests".test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate +select + case when _airbyte_data."id" != '' then _airbyte_data."id" end as id, + case when _airbyte_data."currency" != '' then _airbyte_data."currency" end as currency, + case when _airbyte_data."new_column" != '' then _airbyte_data."new_column" end as new_column, + case when _airbyte_data."date" != '' then _airbyte_data."date" end as date, + case when _airbyte_data."timestamp_col" != '' then _airbyte_data."timestamp_col" end as timestamp_col, + case when _airbyte_data."HKD@spéçiäl & characters" != '' then _airbyte_data."HKD@spéçiäl & characters" end as "hkd@spéçiäl & characters", + case when _airbyte_data."NZD" != '' then _airbyte_data."NZD" end as nzd, + case when _airbyte_data."USD" != '' then _airbyte_data."USD" end as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from "integrationtests".test_normalization_bhhpj._airbyte_raw_dedup_exchange_rate as table_alias +-- dedup_exchange_rate +where 1 = 1 + +), __dbt__cte__dedup_exchange_rate_ab2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__dedup_exchange_rate_ab1 +select + cast(id as + float +) as id, + cast(currency as text) as currency, + cast(new_column as + float +) as new_column, + cast(nullif(date::varchar, '') as + date +) as date, + cast(nullif(timestamp_col::varchar, '') as + timestamp with time zone +) as timestamp_col, + cast("hkd@spéçiäl & characters" as + float +) as "hkd@spéçiäl & characters", + cast(nzd as + float +) as nzd, + cast(usd as + bigint +) as usd, + _airbyte_ab_id, + _airbyte_emitted_at, + getdate() as _airbyte_normalized_at +from __dbt__cte__dedup_exchange_rate_ab1 +-- dedup_exchange_rate +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__dedup_exchange_rate_ab2 +select + md5(cast(coalesce(cast(id as text), '') || '-' || coalesce(cast(currency as text), '') || '-' || coalesce(cast(new_column as text), '') || '-' || coalesce(cast(date as text), '') || '-' || coalesce(cast(timestamp_col as text), '') || '-' || coalesce(cast("hkd@spéçiäl & characters" as text), '') || '-' || coalesce(cast(nzd as text), '') || '-' || coalesce(cast(usd as text), '') as text)) as _airbyte_dedup_exchange_rate_hashid, + tmp.* +from __dbt__cte__dedup_exchange_rate_ab2 tmp +-- dedup_exchange_rate +where 1 = 1 + + ) ; diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/dbt_project.yml new file mode 100644 index 0000000000000..8a64d6b8085ff --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/dbt_project.yml @@ -0,0 +1,126 @@ +name: airbyte_utils +version: '1.0' +config-version: 2 +profile: normalize +model-paths: +- models +docs-paths: +- docs +analysis-paths: +- analysis +test-paths: +- tests +seed-paths: +- data +macro-paths: +- macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: +- build +- dbt_modules +quoting: + database: true + schema: false + identifier: true +models: + +transient: false + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + +on_schema_change: sync_all_columns + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view +dispatch: +- macro_namespace: dbt_utils + search_order: + - airbyte_utils + - dbt_utils +vars: + json_column: _airbyte_data + models_to_source: + NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES + NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES + NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES + NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES + NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES + NON_NESTED_STREAM_WITHOUT_NAMESPACE_RESULTING_INTO_LONG_NAMES_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_NON_NESTED_STREAM_WITHOUT_NAMESPACE_RESULTING_INTO_LONG_NAMES + NON_NESTED_STREAM_WITHOUT_NAMESPACE_RESULTING_INTO_LONG_NAMES_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_NON_NESTED_STREAM_WITHOUT_NAMESPACE_RESULTING_INTO_LONG_NAMES + NON_NESTED_STREAM_WITHOUT_NAMESPACE_RESULTING_INTO_LONG_NAMES_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_NON_NESTED_STREAM_WITHOUT_NAMESPACE_RESULTING_INTO_LONG_NAMES + NON_NESTED_STREAM_WITHOUT_NAMESPACE_RESULTING_INTO_LONG_NAMES: TEST_NORMALIZATION._AIRBYTE_RAW_NON_NESTED_STREAM_WITHOUT_NAMESPACE_RESULTING_INTO_LONG_NAMES + SOME_STREAM_THAT_WAS_EMPTY_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_SOME_STREAM_THAT_WAS_EMPTY + SOME_STREAM_THAT_WAS_EMPTY_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_SOME_STREAM_THAT_WAS_EMPTY + SOME_STREAM_THAT_WAS_EMPTY_STG: TEST_NORMALIZATION._AIRBYTE_RAW_SOME_STREAM_THAT_WAS_EMPTY + SOME_STREAM_THAT_WAS_EMPTY_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_SOME_STREAM_THAT_WAS_EMPTY + SOME_STREAM_THAT_WAS_EMPTY: TEST_NORMALIZATION._AIRBYTE_RAW_SOME_STREAM_THAT_WAS_EMPTY + SIMPLE_STREAM_WITH_NAMESPACE_RESULTING_INTO_LONG_NAMES_AB1: TEST_NORMALIZATION_NAMESPACE._AIRBYTE_RAW_SIMPLE_STREAM_WITH_NAMESPACE_RESULTING_INTO_LONG_NAMES + SIMPLE_STREAM_WITH_NAMESPACE_RESULTING_INTO_LONG_NAMES_AB2: TEST_NORMALIZATION_NAMESPACE._AIRBYTE_RAW_SIMPLE_STREAM_WITH_NAMESPACE_RESULTING_INTO_LONG_NAMES + SIMPLE_STREAM_WITH_NAMESPACE_RESULTING_INTO_LONG_NAMES_AB3: TEST_NORMALIZATION_NAMESPACE._AIRBYTE_RAW_SIMPLE_STREAM_WITH_NAMESPACE_RESULTING_INTO_LONG_NAMES + SIMPLE_STREAM_WITH_NAMESPACE_RESULTING_INTO_LONG_NAMES: TEST_NORMALIZATION_NAMESPACE._AIRBYTE_RAW_SIMPLE_STREAM_WITH_NAMESPACE_RESULTING_INTO_LONG_NAMES + CONFLICT_STREAM_NAME_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_NAME + CONFLICT_STREAM_NAME_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_NAME + CONFLICT_STREAM_NAME_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_NAME + CONFLICT_STREAM_NAME: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_NAME + CONFLICT_STREAM_SCALAR_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_SCALAR + CONFLICT_STREAM_SCALAR_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_SCALAR + CONFLICT_STREAM_SCALAR_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_SCALAR + CONFLICT_STREAM_SCALAR: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_SCALAR + CONFLICT_STREAM_ARRAY_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_ARRAY + CONFLICT_STREAM_ARRAY_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_ARRAY + CONFLICT_STREAM_ARRAY_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_ARRAY + CONFLICT_STREAM_ARRAY: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_ARRAY + UNNEST_ALIAS_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS + UNNEST_ALIAS_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS + UNNEST_ALIAS_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS + UNNEST_ALIAS: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS + ARRAYS_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_ARRAYS + ARRAYS_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_ARRAYS + ARRAYS_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_ARRAYS + ARRAYS: TEST_NORMALIZATION._AIRBYTE_RAW_ARRAYS + NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES + NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES + NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES + NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES + CONFLICT_STREAM_NAME_CONFLICT_STREAM_NAME_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_NAME + CONFLICT_STREAM_NAME_CONFLICT_STREAM_NAME_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_NAME + CONFLICT_STREAM_NAME_CONFLICT_STREAM_NAME_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_NAME + CONFLICT_STREAM_NAME_CONFLICT_STREAM_NAME: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_NAME + UNNEST_ALIAS_CHILDREN_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS + UNNEST_ALIAS_CHILDREN_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS + UNNEST_ALIAS_CHILDREN_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS + UNNEST_ALIAS_CHILDREN: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS + ARRAYS_NESTED_ARRAY_PARENT_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_ARRAYS + ARRAYS_NESTED_ARRAY_PARENT_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_ARRAYS + ARRAYS_NESTED_ARRAY_PARENT_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_ARRAYS + ARRAYS_NESTED_ARRAY_PARENT: TEST_NORMALIZATION._AIRBYTE_RAW_ARRAYS + NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES + NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES + NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES + NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES + NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES + NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES + NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES + NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA: TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES + CONFLICT_STREAM_NAME_CONFLICT_STREAM_NAME_CONFLICT_STREAM_NAME_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_NAME + CONFLICT_STREAM_NAME_CONFLICT_STREAM_NAME_CONFLICT_STREAM_NAME_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_NAME + CONFLICT_STREAM_NAME_CONFLICT_STREAM_NAME_CONFLICT_STREAM_NAME_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_NAME + CONFLICT_STREAM_NAME_CONFLICT_STREAM_NAME_CONFLICT_STREAM_NAME: TEST_NORMALIZATION._AIRBYTE_RAW_CONFLICT_STREAM_NAME + UNNEST_ALIAS_CHILDREN_OWNER_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS + UNNEST_ALIAS_CHILDREN_OWNER_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS + UNNEST_ALIAS_CHILDREN_OWNER_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS + UNNEST_ALIAS_CHILDREN_OWNER: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS + UNNEST_ALIAS_CHILDREN_OWNER_COLUMN___WITH__QUOTES_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS + UNNEST_ALIAS_CHILDREN_OWNER_COLUMN___WITH__QUOTES_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS + UNNEST_ALIAS_CHILDREN_OWNER_COLUMN___WITH__QUOTES_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS + UNNEST_ALIAS_CHILDREN_OWNER_COLUMN___WITH__QUOTES: TEST_NORMALIZATION._AIRBYTE_RAW_UNNEST_ALIAS diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql new file mode 100644 index 0000000000000..8a87924032e44 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql @@ -0,0 +1,24 @@ + + + create or replace table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES" as + (select * from( + +-- Final base SQL model +-- depends_on: "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD" +select + _AIRBYTE_UNIQUE_KEY, + ID, + DATE, + PARTITION, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT, + _AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID +from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD" +-- NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES +where 1 = 1 +and _AIRBYTE_ACTIVE_ROW = 1 + + ) order by (_AIRBYTE_UNIQUE_KEY, _AIRBYTE_EMITTED_AT) + ); + alter table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES" cluster by (_AIRBYTE_UNIQUE_KEY, _AIRBYTE_EMITTED_AT); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql new file mode 100644 index 0000000000000..2695e3388ca1d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql @@ -0,0 +1,72 @@ + + + create or replace table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION" as + (select * from( + +with __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD" +select + _AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID, + get_path(parse_json(PARTITION), '"double_array_data"') as DOUBLE_ARRAY_DATA, + get_path(parse_json(PARTITION), '"DATA"') as DATA, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT +from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD" as table_alias +-- PARTITION at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 +and PARTITION is not null + +), __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB1 +select + _AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID, + DOUBLE_ARRAY_DATA, + DATA, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT +from __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB1 +-- PARTITION at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 + +), __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB2 +select + md5(cast(coalesce(cast(_AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID as + varchar +), '') || '-' || coalesce(cast(DOUBLE_ARRAY_DATA as + varchar +), '') || '-' || coalesce(cast(DATA as + varchar +), '') as + varchar +)) as _AIRBYTE_PARTITION_HASHID, + tmp.* +from __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB2 tmp +-- PARTITION at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 + +)-- Final base SQL model +-- depends_on: __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB3 +select + _AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID, + DOUBLE_ARRAY_DATA, + DATA, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT, + _AIRBYTE_PARTITION_HASHID +from __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB3 +-- PARTITION at nested_stream_with_complex_columns_resulting_into_long_names/partition from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD" +where 1 = 1 + + ) order by (_AIRBYTE_EMITTED_AT) + ); + alter table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION" cluster by (_AIRBYTE_EMITTED_AT); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql new file mode 100644 index 0000000000000..436ec4cd9d191 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql @@ -0,0 +1,71 @@ + + + create or replace table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA" as + (select * from( + +with __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION" + +select + _AIRBYTE_PARTITION_HASHID, + to_varchar(get_path(parse_json(DATA.value), '"currency"')) as CURRENCY, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT +from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION" as table_alias +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +cross join table(flatten(DATA)) as DATA +where 1 = 1 +and DATA is not null + +), __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB1 +select + _AIRBYTE_PARTITION_HASHID, + cast(CURRENCY as + varchar +) as CURRENCY, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT +from __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB1 +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +where 1 = 1 + +), __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB2 +select + md5(cast(coalesce(cast(_AIRBYTE_PARTITION_HASHID as + varchar +), '') || '-' || coalesce(cast(CURRENCY as + varchar +), '') as + varchar +)) as _AIRBYTE_DATA_HASHID, + tmp.* +from __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB2 tmp +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +where 1 = 1 + +)-- Final base SQL model +-- depends_on: __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB3 +select + _AIRBYTE_PARTITION_HASHID, + CURRENCY, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT, + _AIRBYTE_DATA_HASHID +from __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB3 +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION" +where 1 = 1 + + ) order by (_AIRBYTE_EMITTED_AT) + ); + alter table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA" cluster by (_AIRBYTE_EMITTED_AT); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql new file mode 100644 index 0000000000000..c5a250dd0bc3f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql @@ -0,0 +1,71 @@ + + + create or replace table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA" as + (select * from( + +with __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION" + +select + _AIRBYTE_PARTITION_HASHID, + to_varchar(get_path(parse_json(DOUBLE_ARRAY_DATA.value), '"id"')) as ID, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT +from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION" as table_alias +-- DOUBLE_ARRAY_DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +cross join table(flatten(DOUBLE_ARRAY_DATA)) as DOUBLE_ARRAY_DATA +where 1 = 1 +and DOUBLE_ARRAY_DATA is not null + +), __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB1 +select + _AIRBYTE_PARTITION_HASHID, + cast(ID as + varchar +) as ID, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT +from __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB1 +-- DOUBLE_ARRAY_DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +where 1 = 1 + +), __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB2 +select + md5(cast(coalesce(cast(_AIRBYTE_PARTITION_HASHID as + varchar +), '') || '-' || coalesce(cast(ID as + varchar +), '') as + varchar +)) as _AIRBYTE_DOUBLE_ARRAY_DATA_HASHID, + tmp.* +from __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB2 tmp +-- DOUBLE_ARRAY_DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +where 1 = 1 + +)-- Final base SQL model +-- depends_on: __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB3 +select + _AIRBYTE_PARTITION_HASHID, + ID, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT, + _AIRBYTE_DOUBLE_ARRAY_DATA_HASHID +from __dbt__cte__NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB3 +-- DOUBLE_ARRAY_DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION" +where 1 = 1 + + ) order by (_AIRBYTE_EMITTED_AT) + ); + alter table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA" cluster by (_AIRBYTE_EMITTED_AT); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql new file mode 100644 index 0000000000000..b3072ce0004ca --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql @@ -0,0 +1,85 @@ + + + create or replace table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD" as + (select * from( + +-- depends_on: ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') +with + +input_data as ( + select * + from "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG" + -- NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + md5(cast(coalesce(cast(ID as + varchar +), '') as + varchar +)) as _AIRBYTE_UNIQUE_KEY, + ID, + DATE, + PARTITION, + DATE as _AIRBYTE_START_AT, + lag(DATE) over ( + partition by ID + order by + DATE is null asc, + DATE desc, + _AIRBYTE_EMITTED_AT desc + ) as _AIRBYTE_END_AT, + case when row_number() over ( + partition by ID + order by + DATE is null asc, + DATE desc, + _AIRBYTE_EMITTED_AT desc + ) = 1 then 1 else 0 end as _AIRBYTE_ACTIVE_ROW, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + _AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _AIRBYTE_UNIQUE_KEY, + _AIRBYTE_START_AT, + _AIRBYTE_EMITTED_AT + order by _AIRBYTE_ACTIVE_ROW desc, _AIRBYTE_AB_ID + ) as _AIRBYTE_ROW_NUM, + md5(cast(coalesce(cast(_AIRBYTE_UNIQUE_KEY as + varchar +), '') || '-' || coalesce(cast(_AIRBYTE_START_AT as + varchar +), '') || '-' || coalesce(cast(_AIRBYTE_EMITTED_AT as + varchar +), '') as + varchar +)) as _AIRBYTE_UNIQUE_KEY_SCD, + scd_data.* + from scd_data +) +select + _AIRBYTE_UNIQUE_KEY, + _AIRBYTE_UNIQUE_KEY_SCD, + ID, + DATE, + PARTITION, + _AIRBYTE_START_AT, + _AIRBYTE_END_AT, + _AIRBYTE_ACTIVE_ROW, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT, + _AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID +from dedup_data where _AIRBYTE_ROW_NUM = 1 + ) order by (_AIRBYTE_ACTIVE_ROW, _AIRBYTE_UNIQUE_KEY_SCD, _AIRBYTE_EMITTED_AT) + ); + alter table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD" cluster by (_AIRBYTE_ACTIVE_ROW, _AIRBYTE_UNIQUE_KEY_SCD, _AIRBYTE_EMITTED_AT); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB1.sql new file mode 100644 index 0000000000000..772f1976f2c6d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB1.sql @@ -0,0 +1,20 @@ +{{ config( + cluster_by = ["_AIRBYTE_EMITTED_AT"], + unique_key = '_AIRBYTE_AB_ID', + schema = "_AIRBYTE_TEST_NORMALIZATION", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as ID, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as DATE, + {{ json_extract('table_alias', '_airbyte_data', ['partition'], ['partition']) }} as PARTITION, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT +from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES') }} as table_alias +-- NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES +where 1 = 1 +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB2.sql new file mode 100644 index 0000000000000..fd49a8524a645 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB2.sql @@ -0,0 +1,20 @@ +{{ config( + cluster_by = ["_AIRBYTE_EMITTED_AT"], + unique_key = '_AIRBYTE_AB_ID', + schema = "_AIRBYTE_TEST_NORMALIZATION", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB1') }} +select + cast(ID as {{ dbt_utils.type_string() }}) as ID, + cast(DATE as {{ dbt_utils.type_string() }}) as DATE, + cast(PARTITION as {{ type_json() }}) as PARTITION, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT +from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_AB1') }} +-- NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES +where 1 = 1 +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB1.sql new file mode 100644 index 0000000000000..e6c344e6308d2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB1.sql @@ -0,0 +1,20 @@ +{{ config( + cluster_by = ["_AIRBYTE_EMITTED_AT"], + schema = "_AIRBYTE_TEST_NORMALIZATION", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD') }} +select + _AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID, + {{ json_extract_array('PARTITION', ['double_array_data'], ['double_array_data']) }} as DOUBLE_ARRAY_DATA, + {{ json_extract_array('PARTITION', ['DATA'], ['DATA']) }} as DATA, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT +from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD') }} as table_alias +-- PARTITION at nested_stream_with_complex_columns_resulting_into_long_names/partition +where 1 = 1 +and PARTITION is not null +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB1.sql new file mode 100644 index 0000000000000..050da953efddd --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB1.sql @@ -0,0 +1,21 @@ +{{ config( + cluster_by = ["_AIRBYTE_EMITTED_AT"], + schema = "_AIRBYTE_TEST_NORMALIZATION", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION') }} +{{ unnest_cte(ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION'), 'PARTITION', 'DATA') }} +select + _AIRBYTE_PARTITION_HASHID, + {{ json_extract_scalar(unnested_column_value('DATA'), ['currency'], ['currency']) }} as CURRENCY, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT +from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION') }} as table_alias +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA +{{ cross_join_unnest('PARTITION', 'DATA') }} +where 1 = 1 +and DATA is not null +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB1.sql new file mode 100644 index 0000000000000..13b208068c10a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB1.sql @@ -0,0 +1,21 @@ +{{ config( + cluster_by = ["_AIRBYTE_EMITTED_AT"], + schema = "_AIRBYTE_TEST_NORMALIZATION", + tags = [ "nested-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION') }} +{{ unnest_cte(ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION'), 'PARTITION', 'DOUBLE_ARRAY_DATA') }} +select + _AIRBYTE_PARTITION_HASHID, + {{ json_extract_scalar(unnested_column_value('DOUBLE_ARRAY_DATA'), ['id'], ['id']) }} as ID, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT +from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION') }} as table_alias +-- DOUBLE_ARRAY_DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data +{{ cross_join_unnest('PARTITION', 'DOUBLE_ARRAY_DATA') }} +where 1 = 1 +and DOUBLE_ARRAY_DATA is not null +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql new file mode 100644 index 0000000000000..110c17ef216dc --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql @@ -0,0 +1,23 @@ +{{ config( + cluster_by = ["_AIRBYTE_UNIQUE_KEY", "_AIRBYTE_EMITTED_AT"], + unique_key = "_AIRBYTE_UNIQUE_KEY", + schema = "TEST_NORMALIZATION", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD') }} +select + _AIRBYTE_UNIQUE_KEY, + ID, + DATE, + PARTITION, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT, + _AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID +from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD') }} +-- NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES') }} +where 1 = 1 +and _AIRBYTE_ACTIVE_ROW = 1 +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql new file mode 100644 index 0000000000000..3dda7efc9c613 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql @@ -0,0 +1,20 @@ +{{ config( + cluster_by = ["_AIRBYTE_EMITTED_AT"], + schema = "TEST_NORMALIZATION", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB3') }} +select + _AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID, + DOUBLE_ARRAY_DATA, + DATA, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT, + _AIRBYTE_PARTITION_HASHID +from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_AB3') }} +-- PARTITION at nested_stream_with_complex_columns_resulting_into_long_names/partition from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD') }} +where 1 = 1 +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql new file mode 100644 index 0000000000000..526c8b658f19c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql @@ -0,0 +1,19 @@ +{{ config( + cluster_by = ["_AIRBYTE_EMITTED_AT"], + schema = "TEST_NORMALIZATION", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB3') }} +select + _AIRBYTE_PARTITION_HASHID, + CURRENCY, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT, + _AIRBYTE_DATA_HASHID +from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA_AB3') }} +-- DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/DATA from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION') }} +where 1 = 1 +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql new file mode 100644 index 0000000000000..c46547e9a6242 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql @@ -0,0 +1,19 @@ +{{ config( + cluster_by = ["_AIRBYTE_EMITTED_AT"], + schema = "TEST_NORMALIZATION", + tags = [ "nested" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB3') }} +select + _AIRBYTE_PARTITION_HASHID, + ID, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT, + _AIRBYTE_DOUBLE_ARRAY_DATA_HASHID +from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA_AB3') }} +-- DOUBLE_ARRAY_DATA at nested_stream_with_complex_columns_resulting_into_long_names/partition/double_array_data from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION') }} +where 1 = 1 +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql new file mode 100644 index 0000000000000..7b46e390d0575 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql @@ -0,0 +1,163 @@ +{{ config( + cluster_by = ["_AIRBYTE_ACTIVE_ROW", "_AIRBYTE_UNIQUE_KEY_SCD", "_AIRBYTE_EMITTED_AT"], + unique_key = "_AIRBYTE_UNIQUE_KEY_SCD", + schema = "TEST_NORMALIZATION", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_AIRBYTE_UNIQUE_KEY' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} where {{ final_table_relation }}._AIRBYTE_UNIQUE_KEY in ( + select recent_records.unique_key + from ( + select distinct _AIRBYTE_UNIQUE_KEY as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} + ) recent_records + left join ( + select _AIRBYTE_UNIQUE_KEY as unique_key, count(_AIRBYTE_UNIQUE_KEY) as active_count + from {{ this }} + where _AIRBYTE_ACTIVE_ROW = 1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES')) }} + group by _AIRBYTE_UNIQUE_KEY + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view _AIRBYTE_TEST_NORMALIZATION.NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') }} + -- NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES') }} + where 1 = 1 + {{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} +), +new_data_ids as ( + -- build a subset of _AIRBYTE_UNIQUE_KEY from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + 'ID', + ]) }} as _AIRBYTE_UNIQUE_KEY + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._AIRBYTE_UNIQUE_KEY = new_data_ids._AIRBYTE_UNIQUE_KEY + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._AIRBYTE_AB_ID = inc_data._AIRBYTE_AB_ID + where _AIRBYTE_ACTIVE_ROW = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG')) }} from new_data + union all + select {{ dbt_utils.star(ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_STG') }} + -- NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + 'ID', + ]) }} as _AIRBYTE_UNIQUE_KEY, + ID, + DATE, + PARTITION, + DATE as _AIRBYTE_START_AT, + lag(DATE) over ( + partition by ID + order by + DATE is null asc, + DATE desc, + _AIRBYTE_EMITTED_AT desc + ) as _AIRBYTE_END_AT, + case when row_number() over ( + partition by ID + order by + DATE is null asc, + DATE desc, + _AIRBYTE_EMITTED_AT desc + ) = 1 then 1 else 0 end as _AIRBYTE_ACTIVE_ROW, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + _AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _AIRBYTE_UNIQUE_KEY, + _AIRBYTE_START_AT, + _AIRBYTE_EMITTED_AT + order by _AIRBYTE_ACTIVE_ROW desc, _AIRBYTE_AB_ID + ) as _AIRBYTE_ROW_NUM, + {{ dbt_utils.surrogate_key([ + '_AIRBYTE_UNIQUE_KEY', + '_AIRBYTE_START_AT', + '_AIRBYTE_EMITTED_AT' + ]) }} as _AIRBYTE_UNIQUE_KEY_SCD, + scd_data.* + from scd_data +) +select + _AIRBYTE_UNIQUE_KEY, + _AIRBYTE_UNIQUE_KEY_SCD, + ID, + DATE, + PARTITION, + _AIRBYTE_START_AT, + _AIRBYTE_END_AT, + _AIRBYTE_ACTIVE_ROW, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT, + _AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID +from dedup_data where _AIRBYTE_ROW_NUM = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/sources.yml new file mode 100644 index 0000000000000..b51dbe4cce7a1 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/sources.yml @@ -0,0 +1,23 @@ +version: 2 +sources: +- name: TEST_NORMALIZATION + quoting: + database: true + schema: false + identifier: false + tables: + - name: _AIRBYTE_RAW_ARRAYS + - name: _AIRBYTE_RAW_CONFLICT_STREAM_ARRAY + - name: _AIRBYTE_RAW_CONFLICT_STREAM_NAME + - name: _AIRBYTE_RAW_CONFLICT_STREAM_SCALAR + - name: _AIRBYTE_RAW_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES + - name: _AIRBYTE_RAW_NON_NESTED_STREAM_WITHOUT_NAMESPACE_RESULTING_INTO_LONG_NAMES + - name: _AIRBYTE_RAW_SOME_STREAM_THAT_WAS_EMPTY + - name: _AIRBYTE_RAW_UNNEST_ALIAS +- name: TEST_NORMALIZATION_NAMESPACE + quoting: + database: true + schema: false + identifier: false + tables: + - name: _AIRBYTE_RAW_SIMPLE_STREAM_WITH_NAMESPACE_RESULTING_INTO_LONG_NAMES diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql new file mode 100644 index 0000000000000..ce844e3777eff --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES.sql @@ -0,0 +1,26 @@ +begin; + + + + + + + + merge into "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES" as DBT_INTERNAL_DEST + using "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES__dbt_tmp" as DBT_INTERNAL_SOURCE + on + DBT_INTERNAL_SOURCE._AIRBYTE_UNIQUE_KEY = DBT_INTERNAL_DEST._AIRBYTE_UNIQUE_KEY + + + + when matched then update set + "_AIRBYTE_UNIQUE_KEY" = DBT_INTERNAL_SOURCE."_AIRBYTE_UNIQUE_KEY","ID" = DBT_INTERNAL_SOURCE."ID","DATE" = DBT_INTERNAL_SOURCE."DATE","PARTITION" = DBT_INTERNAL_SOURCE."PARTITION","_AIRBYTE_AB_ID" = DBT_INTERNAL_SOURCE."_AIRBYTE_AB_ID","_AIRBYTE_EMITTED_AT" = DBT_INTERNAL_SOURCE."_AIRBYTE_EMITTED_AT","_AIRBYTE_NORMALIZED_AT" = DBT_INTERNAL_SOURCE."_AIRBYTE_NORMALIZED_AT","_AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID" = DBT_INTERNAL_SOURCE."_AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID" + + + when not matched then insert + ("_AIRBYTE_UNIQUE_KEY", "ID", "DATE", "PARTITION", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID") + values + ("_AIRBYTE_UNIQUE_KEY", "ID", "DATE", "PARTITION", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID") + +; + commit; \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql new file mode 100644 index 0000000000000..8be85b4920f8e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION.sql @@ -0,0 +1,9 @@ +begin; + + + insert into "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION" ("_AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID", "DOUBLE_ARRAY_DATA", "DATA", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_PARTITION_HASHID") + ( + select "_AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID", "DOUBLE_ARRAY_DATA", "DATA", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_PARTITION_HASHID" + from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION__dbt_tmp" + ); + commit; \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql new file mode 100644 index 0000000000000..abd722a837d67 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA.sql @@ -0,0 +1,9 @@ +begin; + + + insert into "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA" ("_AIRBYTE_PARTITION_HASHID", "CURRENCY", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_DATA_HASHID") + ( + select "_AIRBYTE_PARTITION_HASHID", "CURRENCY", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_DATA_HASHID" + from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DATA__dbt_tmp" + ); + commit; \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql new file mode 100644 index 0000000000000..11746e9f32afd --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA.sql @@ -0,0 +1,9 @@ +begin; + + + insert into "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA" ("_AIRBYTE_PARTITION_HASHID", "ID", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_DOUBLE_ARRAY_DATA_HASHID") + ( + select "_AIRBYTE_PARTITION_HASHID", "ID", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_DOUBLE_ARRAY_DATA_HASHID" + from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_PARTITION_DOUBLE_ARRAY_DATA__dbt_tmp" + ); + commit; \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql new file mode 100644 index 0000000000000..308aaf13f9081 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/second_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql @@ -0,0 +1,26 @@ +begin; + + + + + + + + merge into "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD" as DBT_INTERNAL_DEST + using "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD__dbt_tmp" as DBT_INTERNAL_SOURCE + on + DBT_INTERNAL_SOURCE._AIRBYTE_UNIQUE_KEY_SCD = DBT_INTERNAL_DEST._AIRBYTE_UNIQUE_KEY_SCD + + + + when matched then update set + "_AIRBYTE_UNIQUE_KEY" = DBT_INTERNAL_SOURCE."_AIRBYTE_UNIQUE_KEY","_AIRBYTE_UNIQUE_KEY_SCD" = DBT_INTERNAL_SOURCE."_AIRBYTE_UNIQUE_KEY_SCD","ID" = DBT_INTERNAL_SOURCE."ID","DATE" = DBT_INTERNAL_SOURCE."DATE","PARTITION" = DBT_INTERNAL_SOURCE."PARTITION","_AIRBYTE_START_AT" = DBT_INTERNAL_SOURCE."_AIRBYTE_START_AT","_AIRBYTE_END_AT" = DBT_INTERNAL_SOURCE."_AIRBYTE_END_AT","_AIRBYTE_ACTIVE_ROW" = DBT_INTERNAL_SOURCE."_AIRBYTE_ACTIVE_ROW","_AIRBYTE_AB_ID" = DBT_INTERNAL_SOURCE."_AIRBYTE_AB_ID","_AIRBYTE_EMITTED_AT" = DBT_INTERNAL_SOURCE."_AIRBYTE_EMITTED_AT","_AIRBYTE_NORMALIZED_AT" = DBT_INTERNAL_SOURCE."_AIRBYTE_NORMALIZED_AT","_AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID" = DBT_INTERNAL_SOURCE."_AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID" + + + when not matched then insert + ("_AIRBYTE_UNIQUE_KEY", "_AIRBYTE_UNIQUE_KEY_SCD", "ID", "DATE", "PARTITION", "_AIRBYTE_START_AT", "_AIRBYTE_END_AT", "_AIRBYTE_ACTIVE_ROW", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID") + values + ("_AIRBYTE_UNIQUE_KEY", "_AIRBYTE_UNIQUE_KEY_SCD", "ID", "DATE", "PARTITION", "_AIRBYTE_START_AT", "_AIRBYTE_END_AT", "_AIRBYTE_ACTIVE_ROW", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_HASHID") + +; + commit; \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/dbt_project.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/dbt_project.yml new file mode 100644 index 0000000000000..2b466206f0839 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/dbt_project.yml @@ -0,0 +1,91 @@ +name: airbyte_utils +version: '1.0' +config-version: 2 +profile: normalize +model-paths: +- models +docs-paths: +- docs +analysis-paths: +- analysis +test-paths: +- tests +seed-paths: +- data +macro-paths: +- macros +target-path: ../build +log-path: ../logs +packages-install-path: /dbt +clean-targets: +- build +- dbt_modules +quoting: + database: true + schema: false + identifier: true +models: + +transient: false + airbyte_utils: + +materialized: table + generated: + airbyte_ctes: + +tags: airbyte_internal_cte + +materialized: ephemeral + airbyte_incremental: + +tags: incremental_tables + +materialized: incremental + +on_schema_change: sync_all_columns + airbyte_tables: + +tags: normalized_tables + +materialized: table + airbyte_views: + +tags: airbyte_internal_views + +materialized: view +dispatch: +- macro_namespace: dbt_utils + search_order: + - airbyte_utils + - dbt_utils +vars: + json_column: _airbyte_data + models_to_source: + EXCHANGE_RATE_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_EXCHANGE_RATE + EXCHANGE_RATE_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_EXCHANGE_RATE + EXCHANGE_RATE_AB3: TEST_NORMALIZATION._AIRBYTE_RAW_EXCHANGE_RATE + EXCHANGE_RATE: TEST_NORMALIZATION._AIRBYTE_RAW_EXCHANGE_RATE + DEDUP_EXCHANGE_RATE_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE + DEDUP_EXCHANGE_RATE_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE + DEDUP_EXCHANGE_RATE_STG: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE + DEDUP_EXCHANGE_RATE_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE + DEDUP_EXCHANGE_RATE: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE + RENAMED_DEDUP_CDC_EXCLUDED_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED + RENAMED_DEDUP_CDC_EXCLUDED_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED + RENAMED_DEDUP_CDC_EXCLUDED_STG: TEST_NORMALIZATION._AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED + RENAMED_DEDUP_CDC_EXCLUDED_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED + RENAMED_DEDUP_CDC_EXCLUDED: TEST_NORMALIZATION._AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED + DEDUP_CDC_EXCLUDED_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_CDC_EXCLUDED + DEDUP_CDC_EXCLUDED_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_CDC_EXCLUDED + DEDUP_CDC_EXCLUDED_STG: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_CDC_EXCLUDED + DEDUP_CDC_EXCLUDED_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_CDC_EXCLUDED + DEDUP_CDC_EXCLUDED: TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_CDC_EXCLUDED + POS_DEDUP_CDCX_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_POS_DEDUP_CDCX + POS_DEDUP_CDCX_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_POS_DEDUP_CDCX + POS_DEDUP_CDCX_STG: TEST_NORMALIZATION._AIRBYTE_RAW_POS_DEDUP_CDCX + POS_DEDUP_CDCX_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_POS_DEDUP_CDCX + POS_DEDUP_CDCX: TEST_NORMALIZATION._AIRBYTE_RAW_POS_DEDUP_CDCX + 1_prefix_startwith_number_ab1: TEST_NORMALIZATION._AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER + 1_prefix_startwith_number_ab2: TEST_NORMALIZATION._AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER + 1_prefix_startwith_number_stg: TEST_NORMALIZATION._AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER + 1_prefix_startwith_number_scd: TEST_NORMALIZATION._AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER + 1_prefix_startwith_number: TEST_NORMALIZATION._AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER + MULTIPLE_COLUMN_NAMES_CONFLICTS_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS + MULTIPLE_COLUMN_NAMES_CONFLICTS_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS + MULTIPLE_COLUMN_NAMES_CONFLICTS_STG: TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS + MULTIPLE_COLUMN_NAMES_CONFLICTS_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS + MULTIPLE_COLUMN_NAMES_CONFLICTS: TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS + TYPES_TESTING_AB1: TEST_NORMALIZATION._AIRBYTE_RAW_TYPES_TESTING + TYPES_TESTING_AB2: TEST_NORMALIZATION._AIRBYTE_RAW_TYPES_TESTING + TYPES_TESTING_STG: TEST_NORMALIZATION._AIRBYTE_RAW_TYPES_TESTING + TYPES_TESTING_SCD: TEST_NORMALIZATION._AIRBYTE_RAW_TYPES_TESTING + TYPES_TESTING: TEST_NORMALIZATION._AIRBYTE_RAW_TYPES_TESTING diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql new file mode 100644 index 0000000000000..7efd7f3244dbc --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql @@ -0,0 +1,29 @@ + + + create or replace table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE" as + (select * from( + +-- Final base SQL model +-- depends_on: "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_SCD" +select + _AIRBYTE_UNIQUE_KEY, + ID, + CURRENCY, + DATE, + TIMESTAMP_COL, + "HKD@spéçiäl & characters", + HKD_SPECIAL___CHARACTERS, + NZD, + USD, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT, + _AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID +from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_SCD" +-- DEDUP_EXCHANGE_RATE from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE +where 1 = 1 +and _AIRBYTE_ACTIVE_ROW = 1 + + ) order by (_AIRBYTE_UNIQUE_KEY, _AIRBYTE_EMITTED_AT) + ); + alter table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE" cluster by (_AIRBYTE_UNIQUE_KEY, _AIRBYTE_EMITTED_AT); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql new file mode 100644 index 0000000000000..220cd093da41e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql @@ -0,0 +1,103 @@ + + + create or replace table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_SCD" as + (select * from( + +-- depends_on: ref('DEDUP_EXCHANGE_RATE_STG') +with + +input_data as ( + select * + from "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_STG" + -- DEDUP_EXCHANGE_RATE from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE +), + +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + md5(cast(coalesce(cast(ID as + varchar +), '') || '-' || coalesce(cast(CURRENCY as + varchar +), '') || '-' || coalesce(cast(NZD as + varchar +), '') as + varchar +)) as _AIRBYTE_UNIQUE_KEY, + ID, + CURRENCY, + DATE, + TIMESTAMP_COL, + "HKD@spéçiäl & characters", + HKD_SPECIAL___CHARACTERS, + NZD, + USD, + DATE as _AIRBYTE_START_AT, + lag(DATE) over ( + partition by ID, CURRENCY, cast(NZD as + varchar +) + order by + DATE is null asc, + DATE desc, + _AIRBYTE_EMITTED_AT desc + ) as _AIRBYTE_END_AT, + case when row_number() over ( + partition by ID, CURRENCY, cast(NZD as + varchar +) + order by + DATE is null asc, + DATE desc, + _AIRBYTE_EMITTED_AT desc + ) = 1 then 1 else 0 end as _AIRBYTE_ACTIVE_ROW, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + _AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _AIRBYTE_UNIQUE_KEY, + _AIRBYTE_START_AT, + _AIRBYTE_EMITTED_AT + order by _AIRBYTE_ACTIVE_ROW desc, _AIRBYTE_AB_ID + ) as _AIRBYTE_ROW_NUM, + md5(cast(coalesce(cast(_AIRBYTE_UNIQUE_KEY as + varchar +), '') || '-' || coalesce(cast(_AIRBYTE_START_AT as + varchar +), '') || '-' || coalesce(cast(_AIRBYTE_EMITTED_AT as + varchar +), '') as + varchar +)) as _AIRBYTE_UNIQUE_KEY_SCD, + scd_data.* + from scd_data +) +select + _AIRBYTE_UNIQUE_KEY, + _AIRBYTE_UNIQUE_KEY_SCD, + ID, + CURRENCY, + DATE, + TIMESTAMP_COL, + "HKD@spéçiäl & characters", + HKD_SPECIAL___CHARACTERS, + NZD, + USD, + _AIRBYTE_START_AT, + _AIRBYTE_END_AT, + _AIRBYTE_ACTIVE_ROW, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT, + _AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID +from dedup_data where _AIRBYTE_ROW_NUM = 1 + ) order by (_AIRBYTE_ACTIVE_ROW, _AIRBYTE_UNIQUE_KEY_SCD, _AIRBYTE_EMITTED_AT) + ); + alter table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_SCD" cluster by (_AIRBYTE_ACTIVE_ROW, _AIRBYTE_UNIQUE_KEY_SCD, _AIRBYTE_EMITTED_AT); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql new file mode 100644 index 0000000000000..e35addfdeb762 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql @@ -0,0 +1,159 @@ + + + create or replace table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."EXCHANGE_RATE" as + (select * from( + +with __dbt__cte__EXCHANGE_RATE_AB1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_EXCHANGE_RATE +select + to_varchar(get_path(parse_json(_airbyte_data), '"id"')) as ID, + to_varchar(get_path(parse_json(_airbyte_data), '"currency"')) as CURRENCY, + to_varchar(get_path(parse_json(_airbyte_data), '"date"')) as DATE, + to_varchar(get_path(parse_json(_airbyte_data), '"timestamp_col"')) as TIMESTAMP_COL, + to_varchar(get_path(parse_json(_airbyte_data), '"HKD@spéçiäl & characters"')) as "HKD@spéçiäl & characters", + to_varchar(get_path(parse_json(_airbyte_data), '"HKD_special___characters"')) as HKD_SPECIAL___CHARACTERS, + to_varchar(get_path(parse_json(_airbyte_data), '"NZD"')) as NZD, + to_varchar(get_path(parse_json(_airbyte_data), '"USD"')) as USD, + to_varchar(get_path(parse_json(_airbyte_data), '"column`_''with""_quotes"')) as "column`_'with""_quotes", + to_varchar(get_path(parse_json(_airbyte_data), '"datetime_tz"')) as DATETIME_TZ, + to_varchar(get_path(parse_json(_airbyte_data), '"datetime_no_tz"')) as DATETIME_NO_TZ, + to_varchar(get_path(parse_json(_airbyte_data), '"time_tz"')) as TIME_TZ, + to_varchar(get_path(parse_json(_airbyte_data), '"time_no_tz"')) as TIME_NO_TZ, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT +from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_EXCHANGE_RATE as table_alias +-- EXCHANGE_RATE +where 1 = 1 +), __dbt__cte__EXCHANGE_RATE_AB2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__EXCHANGE_RATE_AB1 +select + cast(ID as + bigint +) as ID, + cast(CURRENCY as + varchar +) as CURRENCY, + cast(nullif(DATE, '') as + date +) as DATE, + case + when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{4}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SSTZHTZM') + when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{2}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SSTZH') + when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{4}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SS.FFTZHTZM') + when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{2}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SS.FFTZH') + when TIMESTAMP_COL = '' then NULL + else to_timestamp_tz(TIMESTAMP_COL) + end as TIMESTAMP_COL + , + cast("HKD@spéçiäl & characters" as + float +) as "HKD@spéçiäl & characters", + cast(HKD_SPECIAL___CHARACTERS as + varchar +) as HKD_SPECIAL___CHARACTERS, + cast(NZD as + float +) as NZD, + cast(USD as + float +) as USD, + cast("column`_'with""_quotes" as + varchar +) as "column`_'with""_quotes", + case + when DATETIME_TZ regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{4}' then to_timestamp_tz(DATETIME_TZ, 'YYYY-MM-DDTHH24:MI:SSTZHTZM') + when DATETIME_TZ regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{2}' then to_timestamp_tz(DATETIME_TZ, 'YYYY-MM-DDTHH24:MI:SSTZH') + when DATETIME_TZ regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{4}' then to_timestamp_tz(DATETIME_TZ, 'YYYY-MM-DDTHH24:MI:SS.FFTZHTZM') + when DATETIME_TZ regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{2}' then to_timestamp_tz(DATETIME_TZ, 'YYYY-MM-DDTHH24:MI:SS.FFTZH') + when DATETIME_TZ = '' then NULL + else to_timestamp_tz(DATETIME_TZ) + end as DATETIME_TZ + , + case + when DATETIME_NO_TZ regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}' then to_timestamp(DATETIME_NO_TZ, 'YYYY-MM-DDTHH24:MI:SS') + when DATETIME_NO_TZ regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}' then to_timestamp(DATETIME_NO_TZ, 'YYYY-MM-DDTHH24:MI:SS.FF') + when DATETIME_NO_TZ = '' then NULL + else to_timestamp(DATETIME_NO_TZ) + end as DATETIME_NO_TZ + , + cast(nullif(TIME_TZ, '') as + varchar +) as TIME_TZ, + cast(nullif(TIME_NO_TZ, '') as + time +) as TIME_NO_TZ, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT +from __dbt__cte__EXCHANGE_RATE_AB1 +-- EXCHANGE_RATE +where 1 = 1 +), __dbt__cte__EXCHANGE_RATE_AB3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__EXCHANGE_RATE_AB2 +select + md5(cast(coalesce(cast(ID as + varchar +), '') || '-' || coalesce(cast(CURRENCY as + varchar +), '') || '-' || coalesce(cast(DATE as + varchar +), '') || '-' || coalesce(cast(TIMESTAMP_COL as + varchar +), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as + varchar +), '') || '-' || coalesce(cast(HKD_SPECIAL___CHARACTERS as + varchar +), '') || '-' || coalesce(cast(NZD as + varchar +), '') || '-' || coalesce(cast(USD as + varchar +), '') || '-' || coalesce(cast("column`_'with""_quotes" as + varchar +), '') || '-' || coalesce(cast(DATETIME_TZ as + varchar +), '') || '-' || coalesce(cast(DATETIME_NO_TZ as + varchar +), '') || '-' || coalesce(cast(TIME_TZ as + varchar +), '') || '-' || coalesce(cast(TIME_NO_TZ as + varchar +), '') as + varchar +)) as _AIRBYTE_EXCHANGE_RATE_HASHID, + tmp.* +from __dbt__cte__EXCHANGE_RATE_AB2 tmp +-- EXCHANGE_RATE +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__EXCHANGE_RATE_AB3 +select + ID, + CURRENCY, + DATE, + TIMESTAMP_COL, + "HKD@spéçiäl & characters", + HKD_SPECIAL___CHARACTERS, + NZD, + USD, + "column`_'with""_quotes", + DATETIME_TZ, + DATETIME_NO_TZ, + TIME_TZ, + TIME_NO_TZ, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT, + _AIRBYTE_EXCHANGE_RATE_HASHID +from __dbt__cte__EXCHANGE_RATE_AB3 +-- EXCHANGE_RATE from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_EXCHANGE_RATE +where 1 = 1 + ) order by (_AIRBYTE_EMITTED_AT) + ); + alter table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."EXCHANGE_RATE" cluster by (_AIRBYTE_EMITTED_AT); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_STG.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_STG.sql new file mode 100644 index 0000000000000..e91864477ee70 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_STG.sql @@ -0,0 +1,95 @@ + + create or replace view "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_STG" + + as ( + +with __dbt__cte__DEDUP_EXCHANGE_RATE_AB1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE +select + to_varchar(get_path(parse_json(_airbyte_data), '"id"')) as ID, + to_varchar(get_path(parse_json(_airbyte_data), '"currency"')) as CURRENCY, + to_varchar(get_path(parse_json(_airbyte_data), '"date"')) as DATE, + to_varchar(get_path(parse_json(_airbyte_data), '"timestamp_col"')) as TIMESTAMP_COL, + to_varchar(get_path(parse_json(_airbyte_data), '"HKD@spéçiäl & characters"')) as "HKD@spéçiäl & characters", + to_varchar(get_path(parse_json(_airbyte_data), '"HKD_special___characters"')) as HKD_SPECIAL___CHARACTERS, + to_varchar(get_path(parse_json(_airbyte_data), '"NZD"')) as NZD, + to_varchar(get_path(parse_json(_airbyte_data), '"USD"')) as USD, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT +from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE as table_alias +-- DEDUP_EXCHANGE_RATE +where 1 = 1 + +), __dbt__cte__DEDUP_EXCHANGE_RATE_AB2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__DEDUP_EXCHANGE_RATE_AB1 +select + cast(ID as + bigint +) as ID, + cast(CURRENCY as + varchar +) as CURRENCY, + cast(nullif(DATE, '') as + date +) as DATE, + case + when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{4}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SSTZHTZM') + when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{2}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SSTZH') + when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{4}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SS.FFTZHTZM') + when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{2}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SS.FFTZH') + when TIMESTAMP_COL = '' then NULL + else to_timestamp_tz(TIMESTAMP_COL) + end as TIMESTAMP_COL + , + cast("HKD@spéçiäl & characters" as + float +) as "HKD@spéçiäl & characters", + cast(HKD_SPECIAL___CHARACTERS as + varchar +) as HKD_SPECIAL___CHARACTERS, + cast(NZD as + float +) as NZD, + cast(USD as + float +) as USD, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT +from __dbt__cte__DEDUP_EXCHANGE_RATE_AB1 +-- DEDUP_EXCHANGE_RATE +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__DEDUP_EXCHANGE_RATE_AB2 +select + md5(cast(coalesce(cast(ID as + varchar +), '') || '-' || coalesce(cast(CURRENCY as + varchar +), '') || '-' || coalesce(cast(DATE as + varchar +), '') || '-' || coalesce(cast(TIMESTAMP_COL as + varchar +), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as + varchar +), '') || '-' || coalesce(cast(HKD_SPECIAL___CHARACTERS as + varchar +), '') || '-' || coalesce(cast(NZD as + varchar +), '') || '-' || coalesce(cast(USD as + varchar +), '') as + varchar +)) as _AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID, + tmp.* +from __dbt__cte__DEDUP_EXCHANGE_RATE_AB2 tmp +-- DEDUP_EXCHANGE_RATE +where 1 = 1 + + ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_views/TEST_NORMALIZATION/MULTIPLE_COLUMN_NAMES_CONFLICTS_STG.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_views/TEST_NORMALIZATION/MULTIPLE_COLUMN_NAMES_CONFLICTS_STG.sql new file mode 100644 index 0000000000000..639671b74a4b4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_views/TEST_NORMALIZATION/MULTIPLE_COLUMN_NAMES_CONFLICTS_STG.sql @@ -0,0 +1,83 @@ + + create or replace view "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."MULTIPLE_COLUMN_NAMES_CONFLICTS_STG" + + as ( + +with __dbt__cte__MULTIPLE_COLUMN_NAMES_CONFLICTS_AB1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS +select + to_varchar(get_path(parse_json(_airbyte_data), '"id"')) as ID, + to_varchar(get_path(parse_json(_airbyte_data), '"User Id"')) as "User Id", + to_varchar(get_path(parse_json(_airbyte_data), '"user_id"')) as USER_ID, + to_varchar(get_path(parse_json(_airbyte_data), '"User id"')) as "User id", + to_varchar(get_path(parse_json(_airbyte_data), '"user id"')) as "user id", + to_varchar(get_path(parse_json(_airbyte_data), '"User@Id"')) as "User@Id", + to_varchar(get_path(parse_json(_airbyte_data), '"UserId"')) as USERID, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT +from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS as table_alias +-- MULTIPLE_COLUMN_NAMES_CONFLICTS +where 1 = 1 + +), __dbt__cte__MULTIPLE_COLUMN_NAMES_CONFLICTS_AB2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__MULTIPLE_COLUMN_NAMES_CONFLICTS_AB1 +select + cast(ID as + bigint +) as ID, + cast("User Id" as + varchar +) as "User Id", + cast(USER_ID as + float +) as USER_ID, + cast("User id" as + float +) as "User id", + cast("user id" as + float +) as "user id", + cast("User@Id" as + varchar +) as "User@Id", + cast(USERID as + float +) as USERID, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT +from __dbt__cte__MULTIPLE_COLUMN_NAMES_CONFLICTS_AB1 +-- MULTIPLE_COLUMN_NAMES_CONFLICTS +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__MULTIPLE_COLUMN_NAMES_CONFLICTS_AB2 +select + md5(cast(coalesce(cast(ID as + varchar +), '') || '-' || coalesce(cast("User Id" as + varchar +), '') || '-' || coalesce(cast(USER_ID as + varchar +), '') || '-' || coalesce(cast("User id" as + varchar +), '') || '-' || coalesce(cast("user id" as + varchar +), '') || '-' || coalesce(cast("User@Id" as + varchar +), '') || '-' || coalesce(cast(USERID as + varchar +), '') as + varchar +)) as _AIRBYTE_MULTIPLE_COLUMN_NAMES_CONFLICTS_HASHID, + tmp.* +from __dbt__cte__MULTIPLE_COLUMN_NAMES_CONFLICTS_AB2 tmp +-- MULTIPLE_COLUMN_NAMES_CONFLICTS +where 1 = 1 + + ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_AB1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_AB1.sql new file mode 100644 index 0000000000000..06be4a0eaa2fb --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_AB1.sql @@ -0,0 +1,25 @@ +{{ config( + cluster_by = ["_AIRBYTE_EMITTED_AT"], + unique_key = '_AIRBYTE_AB_ID', + schema = "_AIRBYTE_TEST_NORMALIZATION", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_DEDUP_EXCHANGE_RATE') }} +select + {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as ID, + {{ json_extract_scalar('_airbyte_data', ['currency'], ['currency']) }} as CURRENCY, + {{ json_extract_scalar('_airbyte_data', ['date'], ['date']) }} as DATE, + {{ json_extract_scalar('_airbyte_data', ['timestamp_col'], ['timestamp_col']) }} as TIMESTAMP_COL, + {{ json_extract_scalar('_airbyte_data', ['HKD@spéçiäl & characters'], ['HKD@spéçiäl & characters']) }} as {{ adapter.quote('HKD@spéçiäl & characters') }}, + {{ json_extract_scalar('_airbyte_data', ['HKD_special___characters'], ['HKD_special___characters']) }} as HKD_SPECIAL___CHARACTERS, + {{ json_extract_scalar('_airbyte_data', ['NZD'], ['NZD']) }} as NZD, + {{ json_extract_scalar('_airbyte_data', ['USD'], ['USD']) }} as USD, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT +from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_DEDUP_EXCHANGE_RATE') }} as table_alias +-- DEDUP_EXCHANGE_RATE +where 1 = 1 +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_AB2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_AB2.sql new file mode 100644 index 0000000000000..f3a40af778cc4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_ctes/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_AB2.sql @@ -0,0 +1,33 @@ +{{ config( + cluster_by = ["_AIRBYTE_EMITTED_AT"], + unique_key = '_AIRBYTE_AB_ID', + schema = "_AIRBYTE_TEST_NORMALIZATION", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ ref('DEDUP_EXCHANGE_RATE_AB1') }} +select + cast(ID as {{ dbt_utils.type_bigint() }}) as ID, + cast(CURRENCY as {{ dbt_utils.type_string() }}) as CURRENCY, + cast({{ empty_string_to_null('DATE') }} as {{ type_date() }}) as DATE, + case + when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{4}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SSTZHTZM') + when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{2}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SSTZH') + when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{4}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SS.FFTZHTZM') + when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{2}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SS.FFTZH') + when TIMESTAMP_COL = '' then NULL + else to_timestamp_tz(TIMESTAMP_COL) + end as TIMESTAMP_COL + , + cast({{ adapter.quote('HKD@spéçiäl & characters') }} as {{ dbt_utils.type_float() }}) as {{ adapter.quote('HKD@spéçiäl & characters') }}, + cast(HKD_SPECIAL___CHARACTERS as {{ dbt_utils.type_string() }}) as HKD_SPECIAL___CHARACTERS, + cast(NZD as {{ dbt_utils.type_float() }}) as NZD, + cast(USD as {{ dbt_utils.type_float() }}) as USD, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT +from {{ ref('DEDUP_EXCHANGE_RATE_AB1') }} +-- DEDUP_EXCHANGE_RATE +where 1 = 1 +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql new file mode 100644 index 0000000000000..0663a8d251e46 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql @@ -0,0 +1,28 @@ +{{ config( + cluster_by = ["_AIRBYTE_UNIQUE_KEY", "_AIRBYTE_EMITTED_AT"], + unique_key = "_AIRBYTE_UNIQUE_KEY", + schema = "TEST_NORMALIZATION", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('DEDUP_EXCHANGE_RATE_SCD') }} +select + _AIRBYTE_UNIQUE_KEY, + ID, + CURRENCY, + DATE, + TIMESTAMP_COL, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + HKD_SPECIAL___CHARACTERS, + NZD, + USD, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT, + _AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID +from {{ ref('DEDUP_EXCHANGE_RATE_SCD') }} +-- DEDUP_EXCHANGE_RATE from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_DEDUP_EXCHANGE_RATE') }} +where 1 = 1 +and _AIRBYTE_ACTIVE_ROW = 1 +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql new file mode 100644 index 0000000000000..13f4936015110 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql @@ -0,0 +1,177 @@ +{{ config( + cluster_by = ["_AIRBYTE_ACTIVE_ROW", "_AIRBYTE_UNIQUE_KEY_SCD", "_AIRBYTE_EMITTED_AT"], + unique_key = "_AIRBYTE_UNIQUE_KEY_SCD", + schema = "TEST_NORMALIZATION", + post_hook = [" + {% + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='DEDUP_EXCHANGE_RATE' + ) + %} + {# + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + #} + {% + if final_table_relation is not none and '_AIRBYTE_UNIQUE_KEY' in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + %} + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + delete from {{ final_table_relation }} where {{ final_table_relation }}._AIRBYTE_UNIQUE_KEY in ( + select recent_records.unique_key + from ( + select distinct _AIRBYTE_UNIQUE_KEY as unique_key + from {{ this }} + where 1=1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} + ) recent_records + left join ( + select _AIRBYTE_UNIQUE_KEY as unique_key, count(_AIRBYTE_UNIQUE_KEY) as active_count + from {{ this }} + where _AIRBYTE_ACTIVE_ROW = 1 {{ incremental_clause('_AIRBYTE_NORMALIZED_AT', this.schema + '.' + adapter.quote('DEDUP_EXCHANGE_RATE')) }} + group by _AIRBYTE_UNIQUE_KEY + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {% else %} + -- We have to have a non-empty query, so just do a noop delete + delete from {{ this }} where 1=0 + {% endif %} + ","drop view _AIRBYTE_TEST_NORMALIZATION.DEDUP_EXCHANGE_RATE_STG"], + tags = [ "top-level" ] +) }} +-- depends_on: ref('DEDUP_EXCHANGE_RATE_STG') +with +{% if is_incremental() %} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{ ref('DEDUP_EXCHANGE_RATE_STG') }} + -- DEDUP_EXCHANGE_RATE from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_DEDUP_EXCHANGE_RATE') }} + where 1 = 1 + {{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} +), +new_data_ids as ( + -- build a subset of _AIRBYTE_UNIQUE_KEY from rows that are new + select distinct + {{ dbt_utils.surrogate_key([ + 'ID', + 'CURRENCY', + 'NZD', + ]) }} as _AIRBYTE_UNIQUE_KEY + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ star_intersect(ref('DEDUP_EXCHANGE_RATE_STG'), this, from_alias='inc_data', intersect_alias='this_data') }} + from {{ this }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data._AIRBYTE_UNIQUE_KEY = new_data_ids._AIRBYTE_UNIQUE_KEY + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + left join empty_new_data as inc_data on this_data._AIRBYTE_AB_ID = inc_data._AIRBYTE_AB_ID + where _AIRBYTE_ACTIVE_ROW = 1 +), +input_data as ( + select {{ dbt_utils.star(ref('DEDUP_EXCHANGE_RATE_STG')) }} from new_data + union all + select {{ dbt_utils.star(ref('DEDUP_EXCHANGE_RATE_STG')) }} from previous_active_scd_data +), +{% else %} +input_data as ( + select * + from {{ ref('DEDUP_EXCHANGE_RATE_STG') }} + -- DEDUP_EXCHANGE_RATE from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_DEDUP_EXCHANGE_RATE') }} +), +{% endif %} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select + {{ dbt_utils.surrogate_key([ + 'ID', + 'CURRENCY', + 'NZD', + ]) }} as _AIRBYTE_UNIQUE_KEY, + ID, + CURRENCY, + DATE, + TIMESTAMP_COL, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + HKD_SPECIAL___CHARACTERS, + NZD, + USD, + DATE as _AIRBYTE_START_AT, + lag(DATE) over ( + partition by ID, CURRENCY, cast(NZD as {{ dbt_utils.type_string() }}) + order by + DATE is null asc, + DATE desc, + _AIRBYTE_EMITTED_AT desc + ) as _AIRBYTE_END_AT, + case when row_number() over ( + partition by ID, CURRENCY, cast(NZD as {{ dbt_utils.type_string() }}) + order by + DATE is null asc, + DATE desc, + _AIRBYTE_EMITTED_AT desc + ) = 1 then 1 else 0 end as _AIRBYTE_ACTIVE_ROW, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + _AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID + from input_data +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + _AIRBYTE_UNIQUE_KEY, + _AIRBYTE_START_AT, + _AIRBYTE_EMITTED_AT + order by _AIRBYTE_ACTIVE_ROW desc, _AIRBYTE_AB_ID + ) as _AIRBYTE_ROW_NUM, + {{ dbt_utils.surrogate_key([ + '_AIRBYTE_UNIQUE_KEY', + '_AIRBYTE_START_AT', + '_AIRBYTE_EMITTED_AT' + ]) }} as _AIRBYTE_UNIQUE_KEY_SCD, + scd_data.* + from scd_data +) +select + _AIRBYTE_UNIQUE_KEY, + _AIRBYTE_UNIQUE_KEY_SCD, + ID, + CURRENCY, + DATE, + TIMESTAMP_COL, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + HKD_SPECIAL___CHARACTERS, + NZD, + USD, + _AIRBYTE_START_AT, + _AIRBYTE_END_AT, + _AIRBYTE_ACTIVE_ROW, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT, + _AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID +from dedup_data where _AIRBYTE_ROW_NUM = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql new file mode 100644 index 0000000000000..6b42adb3962da --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql @@ -0,0 +1,30 @@ +{{ config( + cluster_by = ["_AIRBYTE_EMITTED_AT"], + unique_key = '_AIRBYTE_AB_ID', + schema = "TEST_NORMALIZATION", + tags = [ "top-level" ] +) }} +-- Final base SQL model +-- depends_on: {{ ref('EXCHANGE_RATE_AB3') }} +select + ID, + CURRENCY, + DATE, + TIMESTAMP_COL, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + HKD_SPECIAL___CHARACTERS, + NZD, + USD, + {{ adapter.quote('column`_\'with""_quotes') }}, + DATETIME_TZ, + DATETIME_NO_TZ, + TIME_TZ, + TIME_NO_TZ, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + {{ current_timestamp() }} as _AIRBYTE_NORMALIZED_AT, + _AIRBYTE_EXCHANGE_RATE_HASHID +from {{ ref('EXCHANGE_RATE_AB3') }} +-- EXCHANGE_RATE from {{ source('TEST_NORMALIZATION', '_AIRBYTE_RAW_EXCHANGE_RATE') }} +where 1 = 1 + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_STG.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_STG.sql new file mode 100644 index 0000000000000..d810a79652be6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_STG.sql @@ -0,0 +1,25 @@ +{{ config( + cluster_by = ["_AIRBYTE_EMITTED_AT"], + unique_key = '_AIRBYTE_AB_ID', + schema = "_AIRBYTE_TEST_NORMALIZATION", + tags = [ "top-level-intermediate" ] +) }} +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ ref('DEDUP_EXCHANGE_RATE_AB2') }} +select + {{ dbt_utils.surrogate_key([ + 'ID', + 'CURRENCY', + 'DATE', + 'TIMESTAMP_COL', + adapter.quote('HKD@spéçiäl & characters'), + 'HKD_SPECIAL___CHARACTERS', + 'NZD', + 'USD', + ]) }} as _AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID, + tmp.* +from {{ ref('DEDUP_EXCHANGE_RATE_AB2') }} tmp +-- DEDUP_EXCHANGE_RATE +where 1 = 1 +{{ incremental_clause('_AIRBYTE_EMITTED_AT', this) }} + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/sources.yml b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/sources.yml new file mode 100644 index 0000000000000..2932fe914c6c3 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/sources.yml @@ -0,0 +1,16 @@ +version: 2 +sources: +- name: TEST_NORMALIZATION + quoting: + database: true + schema: false + identifier: false + tables: + - name: _AIRBYTE_RAW_1_PREFIX_STARTWITH_NUMBER + - name: _AIRBYTE_RAW_DEDUP_CDC_EXCLUDED + - name: _AIRBYTE_RAW_DEDUP_EXCHANGE_RATE + - name: _AIRBYTE_RAW_EXCHANGE_RATE + - name: _AIRBYTE_RAW_MULTIPLE_COLUMN_NAMES_CONFLICTS + - name: _AIRBYTE_RAW_POS_DEDUP_CDCX + - name: _AIRBYTE_RAW_RENAMED_DEDUP_CDC_EXCLUDED + - name: _AIRBYTE_RAW_TYPES_TESTING diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql new file mode 100644 index 0000000000000..347a356730944 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_incremental/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE.sql @@ -0,0 +1,26 @@ +begin; + + + + + + + + merge into "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE" as DBT_INTERNAL_DEST + using "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE__dbt_tmp" as DBT_INTERNAL_SOURCE + on + DBT_INTERNAL_SOURCE._AIRBYTE_UNIQUE_KEY = DBT_INTERNAL_DEST._AIRBYTE_UNIQUE_KEY + + + + when matched then update set + "_AIRBYTE_UNIQUE_KEY" = DBT_INTERNAL_SOURCE."_AIRBYTE_UNIQUE_KEY","ID" = DBT_INTERNAL_SOURCE."ID","CURRENCY" = DBT_INTERNAL_SOURCE."CURRENCY","DATE" = DBT_INTERNAL_SOURCE."DATE","TIMESTAMP_COL" = DBT_INTERNAL_SOURCE."TIMESTAMP_COL","HKD@spéçiäl & characters" = DBT_INTERNAL_SOURCE."HKD@spéçiäl & characters","HKD_SPECIAL___CHARACTERS" = DBT_INTERNAL_SOURCE."HKD_SPECIAL___CHARACTERS","NZD" = DBT_INTERNAL_SOURCE."NZD","USD" = DBT_INTERNAL_SOURCE."USD","_AIRBYTE_AB_ID" = DBT_INTERNAL_SOURCE."_AIRBYTE_AB_ID","_AIRBYTE_EMITTED_AT" = DBT_INTERNAL_SOURCE."_AIRBYTE_EMITTED_AT","_AIRBYTE_NORMALIZED_AT" = DBT_INTERNAL_SOURCE."_AIRBYTE_NORMALIZED_AT","_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID" = DBT_INTERNAL_SOURCE."_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID" + + + when not matched then insert + ("_AIRBYTE_UNIQUE_KEY", "ID", "CURRENCY", "DATE", "TIMESTAMP_COL", "HKD@spéçiäl & characters", "HKD_SPECIAL___CHARACTERS", "NZD", "USD", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID") + values + ("_AIRBYTE_UNIQUE_KEY", "ID", "CURRENCY", "DATE", "TIMESTAMP_COL", "HKD@spéçiäl & characters", "HKD_SPECIAL___CHARACTERS", "NZD", "USD", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID") + +; + commit; \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql new file mode 100644 index 0000000000000..7323186545749 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql @@ -0,0 +1,26 @@ +begin; + + + + + + + + merge into "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_SCD" as DBT_INTERNAL_DEST + using "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_SCD__dbt_tmp" as DBT_INTERNAL_SOURCE + on + DBT_INTERNAL_SOURCE._AIRBYTE_UNIQUE_KEY_SCD = DBT_INTERNAL_DEST._AIRBYTE_UNIQUE_KEY_SCD + + + + when matched then update set + "_AIRBYTE_UNIQUE_KEY" = DBT_INTERNAL_SOURCE."_AIRBYTE_UNIQUE_KEY","_AIRBYTE_UNIQUE_KEY_SCD" = DBT_INTERNAL_SOURCE."_AIRBYTE_UNIQUE_KEY_SCD","ID" = DBT_INTERNAL_SOURCE."ID","CURRENCY" = DBT_INTERNAL_SOURCE."CURRENCY","DATE" = DBT_INTERNAL_SOURCE."DATE","TIMESTAMP_COL" = DBT_INTERNAL_SOURCE."TIMESTAMP_COL","HKD@spéçiäl & characters" = DBT_INTERNAL_SOURCE."HKD@spéçiäl & characters","HKD_SPECIAL___CHARACTERS" = DBT_INTERNAL_SOURCE."HKD_SPECIAL___CHARACTERS","NZD" = DBT_INTERNAL_SOURCE."NZD","USD" = DBT_INTERNAL_SOURCE."USD","_AIRBYTE_START_AT" = DBT_INTERNAL_SOURCE."_AIRBYTE_START_AT","_AIRBYTE_END_AT" = DBT_INTERNAL_SOURCE."_AIRBYTE_END_AT","_AIRBYTE_ACTIVE_ROW" = DBT_INTERNAL_SOURCE."_AIRBYTE_ACTIVE_ROW","_AIRBYTE_AB_ID" = DBT_INTERNAL_SOURCE."_AIRBYTE_AB_ID","_AIRBYTE_EMITTED_AT" = DBT_INTERNAL_SOURCE."_AIRBYTE_EMITTED_AT","_AIRBYTE_NORMALIZED_AT" = DBT_INTERNAL_SOURCE."_AIRBYTE_NORMALIZED_AT","_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID" = DBT_INTERNAL_SOURCE."_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID" + + + when not matched then insert + ("_AIRBYTE_UNIQUE_KEY", "_AIRBYTE_UNIQUE_KEY_SCD", "ID", "CURRENCY", "DATE", "TIMESTAMP_COL", "HKD@spéçiäl & characters", "HKD_SPECIAL___CHARACTERS", "NZD", "USD", "_AIRBYTE_START_AT", "_AIRBYTE_END_AT", "_AIRBYTE_ACTIVE_ROW", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID") + values + ("_AIRBYTE_UNIQUE_KEY", "_AIRBYTE_UNIQUE_KEY_SCD", "ID", "CURRENCY", "DATE", "TIMESTAMP_COL", "HKD@spéçiäl & characters", "HKD_SPECIAL___CHARACTERS", "NZD", "USD", "_AIRBYTE_START_AT", "_AIRBYTE_END_AT", "_AIRBYTE_ACTIVE_ROW", "_AIRBYTE_AB_ID", "_AIRBYTE_EMITTED_AT", "_AIRBYTE_NORMALIZED_AT", "_AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID") + +; + commit; \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql new file mode 100644 index 0000000000000..e35addfdeb762 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_tables/TEST_NORMALIZATION/EXCHANGE_RATE.sql @@ -0,0 +1,159 @@ + + + create or replace table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."EXCHANGE_RATE" as + (select * from( + +with __dbt__cte__EXCHANGE_RATE_AB1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_EXCHANGE_RATE +select + to_varchar(get_path(parse_json(_airbyte_data), '"id"')) as ID, + to_varchar(get_path(parse_json(_airbyte_data), '"currency"')) as CURRENCY, + to_varchar(get_path(parse_json(_airbyte_data), '"date"')) as DATE, + to_varchar(get_path(parse_json(_airbyte_data), '"timestamp_col"')) as TIMESTAMP_COL, + to_varchar(get_path(parse_json(_airbyte_data), '"HKD@spéçiäl & characters"')) as "HKD@spéçiäl & characters", + to_varchar(get_path(parse_json(_airbyte_data), '"HKD_special___characters"')) as HKD_SPECIAL___CHARACTERS, + to_varchar(get_path(parse_json(_airbyte_data), '"NZD"')) as NZD, + to_varchar(get_path(parse_json(_airbyte_data), '"USD"')) as USD, + to_varchar(get_path(parse_json(_airbyte_data), '"column`_''with""_quotes"')) as "column`_'with""_quotes", + to_varchar(get_path(parse_json(_airbyte_data), '"datetime_tz"')) as DATETIME_TZ, + to_varchar(get_path(parse_json(_airbyte_data), '"datetime_no_tz"')) as DATETIME_NO_TZ, + to_varchar(get_path(parse_json(_airbyte_data), '"time_tz"')) as TIME_TZ, + to_varchar(get_path(parse_json(_airbyte_data), '"time_no_tz"')) as TIME_NO_TZ, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT +from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_EXCHANGE_RATE as table_alias +-- EXCHANGE_RATE +where 1 = 1 +), __dbt__cte__EXCHANGE_RATE_AB2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__EXCHANGE_RATE_AB1 +select + cast(ID as + bigint +) as ID, + cast(CURRENCY as + varchar +) as CURRENCY, + cast(nullif(DATE, '') as + date +) as DATE, + case + when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{4}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SSTZHTZM') + when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{2}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SSTZH') + when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{4}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SS.FFTZHTZM') + when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{2}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SS.FFTZH') + when TIMESTAMP_COL = '' then NULL + else to_timestamp_tz(TIMESTAMP_COL) + end as TIMESTAMP_COL + , + cast("HKD@spéçiäl & characters" as + float +) as "HKD@spéçiäl & characters", + cast(HKD_SPECIAL___CHARACTERS as + varchar +) as HKD_SPECIAL___CHARACTERS, + cast(NZD as + float +) as NZD, + cast(USD as + float +) as USD, + cast("column`_'with""_quotes" as + varchar +) as "column`_'with""_quotes", + case + when DATETIME_TZ regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{4}' then to_timestamp_tz(DATETIME_TZ, 'YYYY-MM-DDTHH24:MI:SSTZHTZM') + when DATETIME_TZ regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{2}' then to_timestamp_tz(DATETIME_TZ, 'YYYY-MM-DDTHH24:MI:SSTZH') + when DATETIME_TZ regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{4}' then to_timestamp_tz(DATETIME_TZ, 'YYYY-MM-DDTHH24:MI:SS.FFTZHTZM') + when DATETIME_TZ regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{2}' then to_timestamp_tz(DATETIME_TZ, 'YYYY-MM-DDTHH24:MI:SS.FFTZH') + when DATETIME_TZ = '' then NULL + else to_timestamp_tz(DATETIME_TZ) + end as DATETIME_TZ + , + case + when DATETIME_NO_TZ regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}' then to_timestamp(DATETIME_NO_TZ, 'YYYY-MM-DDTHH24:MI:SS') + when DATETIME_NO_TZ regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}' then to_timestamp(DATETIME_NO_TZ, 'YYYY-MM-DDTHH24:MI:SS.FF') + when DATETIME_NO_TZ = '' then NULL + else to_timestamp(DATETIME_NO_TZ) + end as DATETIME_NO_TZ + , + cast(nullif(TIME_TZ, '') as + varchar +) as TIME_TZ, + cast(nullif(TIME_NO_TZ, '') as + time +) as TIME_NO_TZ, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT +from __dbt__cte__EXCHANGE_RATE_AB1 +-- EXCHANGE_RATE +where 1 = 1 +), __dbt__cte__EXCHANGE_RATE_AB3 as ( + +-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__EXCHANGE_RATE_AB2 +select + md5(cast(coalesce(cast(ID as + varchar +), '') || '-' || coalesce(cast(CURRENCY as + varchar +), '') || '-' || coalesce(cast(DATE as + varchar +), '') || '-' || coalesce(cast(TIMESTAMP_COL as + varchar +), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as + varchar +), '') || '-' || coalesce(cast(HKD_SPECIAL___CHARACTERS as + varchar +), '') || '-' || coalesce(cast(NZD as + varchar +), '') || '-' || coalesce(cast(USD as + varchar +), '') || '-' || coalesce(cast("column`_'with""_quotes" as + varchar +), '') || '-' || coalesce(cast(DATETIME_TZ as + varchar +), '') || '-' || coalesce(cast(DATETIME_NO_TZ as + varchar +), '') || '-' || coalesce(cast(TIME_TZ as + varchar +), '') || '-' || coalesce(cast(TIME_NO_TZ as + varchar +), '') as + varchar +)) as _AIRBYTE_EXCHANGE_RATE_HASHID, + tmp.* +from __dbt__cte__EXCHANGE_RATE_AB2 tmp +-- EXCHANGE_RATE +where 1 = 1 +)-- Final base SQL model +-- depends_on: __dbt__cte__EXCHANGE_RATE_AB3 +select + ID, + CURRENCY, + DATE, + TIMESTAMP_COL, + "HKD@spéçiäl & characters", + HKD_SPECIAL___CHARACTERS, + NZD, + USD, + "column`_'with""_quotes", + DATETIME_TZ, + DATETIME_NO_TZ, + TIME_TZ, + TIME_NO_TZ, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT, + _AIRBYTE_EXCHANGE_RATE_HASHID +from __dbt__cte__EXCHANGE_RATE_AB3 +-- EXCHANGE_RATE from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_EXCHANGE_RATE +where 1 = 1 + ) order by (_AIRBYTE_EMITTED_AT) + ); + alter table "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION."EXCHANGE_RATE" cluster by (_AIRBYTE_EMITTED_AT); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_STG.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_STG.sql new file mode 100644 index 0000000000000..e91864477ee70 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/second_output/airbyte_views/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_STG.sql @@ -0,0 +1,95 @@ + + create or replace view "INTEGRATION_TEST_NORMALIZATION"._AIRBYTE_TEST_NORMALIZATION."DEDUP_EXCHANGE_RATE_STG" + + as ( + +with __dbt__cte__DEDUP_EXCHANGE_RATE_AB1 as ( + +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE +select + to_varchar(get_path(parse_json(_airbyte_data), '"id"')) as ID, + to_varchar(get_path(parse_json(_airbyte_data), '"currency"')) as CURRENCY, + to_varchar(get_path(parse_json(_airbyte_data), '"date"')) as DATE, + to_varchar(get_path(parse_json(_airbyte_data), '"timestamp_col"')) as TIMESTAMP_COL, + to_varchar(get_path(parse_json(_airbyte_data), '"HKD@spéçiäl & characters"')) as "HKD@spéçiäl & characters", + to_varchar(get_path(parse_json(_airbyte_data), '"HKD_special___characters"')) as HKD_SPECIAL___CHARACTERS, + to_varchar(get_path(parse_json(_airbyte_data), '"NZD"')) as NZD, + to_varchar(get_path(parse_json(_airbyte_data), '"USD"')) as USD, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT +from "INTEGRATION_TEST_NORMALIZATION".TEST_NORMALIZATION._AIRBYTE_RAW_DEDUP_EXCHANGE_RATE as table_alias +-- DEDUP_EXCHANGE_RATE +where 1 = 1 + +), __dbt__cte__DEDUP_EXCHANGE_RATE_AB2 as ( + +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: __dbt__cte__DEDUP_EXCHANGE_RATE_AB1 +select + cast(ID as + bigint +) as ID, + cast(CURRENCY as + varchar +) as CURRENCY, + cast(nullif(DATE, '') as + date +) as DATE, + case + when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{4}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SSTZHTZM') + when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{2}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SSTZH') + when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{4}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SS.FFTZHTZM') + when TIMESTAMP_COL regexp '\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{2}' then to_timestamp_tz(TIMESTAMP_COL, 'YYYY-MM-DDTHH24:MI:SS.FFTZH') + when TIMESTAMP_COL = '' then NULL + else to_timestamp_tz(TIMESTAMP_COL) + end as TIMESTAMP_COL + , + cast("HKD@spéçiäl & characters" as + float +) as "HKD@spéçiäl & characters", + cast(HKD_SPECIAL___CHARACTERS as + varchar +) as HKD_SPECIAL___CHARACTERS, + cast(NZD as + float +) as NZD, + cast(USD as + float +) as USD, + _AIRBYTE_AB_ID, + _AIRBYTE_EMITTED_AT, + convert_timezone('UTC', current_timestamp()) as _AIRBYTE_NORMALIZED_AT +from __dbt__cte__DEDUP_EXCHANGE_RATE_AB1 +-- DEDUP_EXCHANGE_RATE +where 1 = 1 + +)-- SQL model to build a hash column based on the values of this record +-- depends_on: __dbt__cte__DEDUP_EXCHANGE_RATE_AB2 +select + md5(cast(coalesce(cast(ID as + varchar +), '') || '-' || coalesce(cast(CURRENCY as + varchar +), '') || '-' || coalesce(cast(DATE as + varchar +), '') || '-' || coalesce(cast(TIMESTAMP_COL as + varchar +), '') || '-' || coalesce(cast("HKD@spéçiäl & characters" as + varchar +), '') || '-' || coalesce(cast(HKD_SPECIAL___CHARACTERS as + varchar +), '') || '-' || coalesce(cast(NZD as + varchar +), '') || '-' || coalesce(cast(USD as + varchar +), '') as + varchar +)) as _AIRBYTE_DEDUP_EXCHANGE_RATE_HASHID, + tmp.* +from __dbt__cte__DEDUP_EXCHANGE_RATE_AB2 tmp +-- DEDUP_EXCHANGE_RATE +where 1 = 1 + + ); diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/README.md b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/README.md new file mode 100644 index 0000000000000..470ec8ed70091 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/README.md @@ -0,0 +1,27 @@ +# test_nested_streams + +The stream `nested_stream_with_complex_columns_resulting_into_long_names` is testing primary key definition on a stream +with nested fields with different complex types: + +- nested object +- nested array +- nested array of array + +# Stream names collisions + +The following three streams are purposely named with very long descriptions to break postgres 64 characters limits: +(even if they are set in different schemas) + +- `test_normalization_nested_stream_with_complex_columns_resulting_into_long_names` +- `test_normalization_non_nested_stream_without_namespace_resulting_into_long_names` +- `test_normalization_namespace_simple_stream_with_namespace_resulting_into_long_names` + +which could all be truncated into: + +- `test_normalization_n__lting_into_long_names` + +Resulting into collisions... + +# Stream name conflicts + +`conflict_stream_name_*` tables and `unnest_alias` are testing naming conflicts between stream and columns names when combined with nesting diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/catalog.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/catalog.json new file mode 100644 index 0000000000000..4e5105f136e09 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/catalog.json @@ -0,0 +1,311 @@ +{ + "streams": [ + { + "stream": { + "name": "nested_stream_with_complex_columns_resulting_into_long_names", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "number", "string"] + }, + "date": { + "type": ["null", "string"] + }, + "partition": { + "type": ["null", "object"], + "properties": { + "double_array_data": { + "type": ["null", "array"], + "items": { + "type": ["null", "array"], + "items": { + "properties": { + "id": { + "type": ["null", "string"] + } + } + } + } + }, + "DATA": { + "type": ["null", "array"], + "items": { + "properties": { + "currency": { + "type": ["null", "string"] + } + } + } + } + } + } + } + }, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "cursor_field": ["date"], + "destination_sync_mode": "append_dedup", + "primary_key": [["id"]] + }, + { + "stream": { + "name": "non_nested_stream_without_namespace_resulting_into_long_names", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "number", "string"] + }, + "date": { + "type": ["null", "string"] + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + "primary_key": [] + }, + { + "stream": { + "name": "some_stream_that_was_empty", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "number", "string"] + }, + "date": { + "type": ["null", "string"] + } + } + }, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": false, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "cursor_field": ["date"], + "destination_sync_mode": "append_dedup", + "primary_key": [["id"]] + }, + { + "stream": { + "name": "simple_stream_with_namespace_resulting_into_long_names", + "namespace": "test_normalization_namespace", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "number", "string"] + }, + "date": { + "type": ["null", "string"] + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "append", + "primary_key": [] + }, + { + "stream": { + "name": "conflict_stream_name", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "number", "string"] + }, + "conflict_stream_name": { + "type": ["null", "object"], + "properties": { + "conflict_stream_name": { + "type": "object", + "items": { + "type": "object", + "properties": { + "groups": { + "type": "string" + } + }, + "custom_fields": { + "items": { + "properties": { + "id": { + "type": ["null", "integer"] + }, + "value": {} + }, + "type": ["null", "object"] + }, + "type": ["null", "array"] + }, + "conflict_stream_name": { + "type": "integer" + } + } + } + } + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + "primary_key": [] + }, + { + "stream": { + "name": "conflict_stream_scalar", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "number", "string"] + }, + "conflict_stream_scalar": { + "type": "integer" + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + "primary_key": [] + }, + { + "stream": { + "name": "conflict_stream_array", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "number", "string"] + }, + "conflict_stream_array": { + "type": ["null", "array"], + "properties": { + "conflict_stream_name": { + "type": ["null", "array"], + "items": { + "properties": { + "id": { + "type": ["null", "integer"] + } + } + } + } + } + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "full_refresh", + "destination_sync_mode": "overwrite", + "primary_key": [] + }, + { + "stream": { + "name": "unnest_alias", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": "integer" + }, + "children": { + "type": ["null", "array"], + "items": { + "type": "object", + "properties": { + "ab_id": { + "type": ["null", "integer"] + }, + "owner": { + "type": ["null", "object"], + "properties": { + "owner_id": { + "type": ["null", "integer"] + }, + "column`_'with\"_quotes": { + "type": ["null", "array"], + "items": { + "properties": { + "currency": { + "type": ["null", "string"] + } + } + } + } + } + } + } + } + } + } + }, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "cursor_field": [], + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "arrays", + "json_schema": { + "type": ["null", "object"], + "properties": { + "array_of_strings": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + }, + "nested_array_parent": { + "type": ["null", "object"], + "properties": { + "nested_array": { + "type": ["null", "array"], + "items": { + "type": ["null", "string"] + } + } + } + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "full_refresh", + "cursor_field": [], + "destination_sync_mode": "overwrite", + "primary_key": [] + } + ] +} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/messages.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/messages.txt new file mode 100644 index 0000000000000..e349c09afc31b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/messages.txt @@ -0,0 +1,18 @@ +{"type": "RECORD", "record": {"stream": "nested_stream_with_complex_columns_resulting_into_long_names", "emitted_at": 1602638599000, "data": { "id": 4.2, "date": "2020-08-29T00:00:00Z", "partition": { "double_array_data": [[ { "id": "EUR" } ]], "DATA": [ {"currency": "EUR" } ], "column`_'with\"_quotes": [ {"currency": "EUR" } ] } }}} +{"type": "RECORD", "record": {"stream": "nested_stream_with_complex_columns_resulting_into_long_names", "emitted_at": 1602638599100, "data": { "id": "test record", "date": "2020-08-31T00:00:00Z", "partition": { "double_array_data": [[ { "id": "USD" } ], [ { "id": "GBP" } ]], "DATA": [ {"currency": "EUR" } ], "column`_'with\"_quotes": [ {"currency": "EUR" } ] } }}} + +{"type":"RECORD","record":{"stream":"conflict_stream_name","data":{"id":1,"conflict_stream_name":{"conflict_stream_name": {"groups": "1", "custom_fields": [{"id":1, "value":3}, {"id":2, "value":4}], "conflict_stream_name": 3}}},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"conflict_stream_name","data":{"id":2,"conflict_stream_name":{"conflict_stream_name": {"groups": "2", "custom_fields": [{"id":1, "value":3}, {"id":2, "value":4}], "conflict_stream_name": 3}}},"emitted_at":1623861660}} + +{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":1,"conflict_stream_scalar": 2},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":2,"conflict_stream_scalar": 2},"emitted_at":1623861660}} + +{"type":"RECORD","record":{"stream":"conflict_stream_array","data":{"id":1, "conflict_stream_array": {"conflict_stream_array": [{"id": 1}, {"id": 2}, {"id": 3}]}}, "emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"conflict_stream_array","data":{"id":2, "conflict_stream_array": {"conflict_stream_array": [{"id": 4}, {"id": 5}, {"id": 6}]}}, "emitted_at":1623861860}} + +{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":1,"conflict_stream_scalar": 2},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":2,"conflict_stream_scalar": 2},"emitted_at":1623861660}} + +{"type":"RECORD","record":{"stream":"unnest_alias","data":{"id":1, "children": [{"ab_id": 1, "owner": {"owner_id": 1, "column`_'with\"_quotes": [ {"currency": "EUR" } ]}},{"ab_id": 2, "owner": {"owner_id": 2, "column`_'with\"_quotes": [ {"currency": "EUR" } ]}}]},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"unnest_alias","data":{"id":2, "children": [{"ab_id": 3, "owner": {"owner_id": 3, "column`_'with\"_quotes": [ {"currency": "EUR" } ]}},{"ab_id": 4, "owner": {"owner_id": 4, "column`_'with\"_quotes": [ {"currency": "EUR" } ]}}]},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"arrays","emitted_at":1602638599000,"data":{"array_of_strings":["string1",null,"string2","string3"],"nested_array_parent":{"nested_array":["string1",null,"string2"]}}}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/messages_incremental.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/messages_incremental.txt new file mode 100644 index 0000000000000..ae1cf0f5c0b4e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/messages_incremental.txt @@ -0,0 +1,22 @@ +{"type": "RECORD", "record": {"stream": "nested_stream_with_complex_columns_resulting_into_long_names", "emitted_at": 1602638599000, "data": { "id": 4.2, "date": "2020-08-29T00:00:00Z", "partition": { "double_array_data": [[ { "id": "EUR" } ]], "DATA": [ {"currency": "EUR" } ], "column`_'with\"_quotes": [ {"currency": "EUR" } ] } }}} +{"type": "RECORD", "record": {"stream": "nested_stream_with_complex_columns_resulting_into_long_names", "emitted_at": 1602638599100, "data": { "id": "test record", "date": "2020-08-31T00:00:00Z", "partition": { "double_array_data": [[ { "id": "USD" } ], [ { "id": "GBP" } ]], "DATA": [ {"currency": "EUR" } ], "column`_'with\"_quotes": [ {"currency": "EUR" } ] } }}} +{"type": "RECORD", "record": {"stream": "nested_stream_with_complex_columns_resulting_into_long_names", "emitted_at": 1602638600000, "data": { "id": "new record", "date": "2020-09-10T00:00:00Z", "partition": { "double_array_data": [[ { "id": "GBP" } ], [ { "id": "HKD" } ]], "DATA": [ {"currency": "EUR" } ], "column`_'with\"_quotes": [ {"currency": "EUR" } ] } }}} + +{"type":"RECORD","record":{"stream":"conflict_stream_name","data":{"id":1,"conflict_stream_name":{"conflict_stream_name": {"groups": "1", "custom_fields": [{"id":1, "value":3}, {"id":2, "value":4}], "conflict_stream_name": 3}}},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"conflict_stream_name","data":{"id":2,"conflict_stream_name":{"conflict_stream_name": {"groups": "2", "custom_fields": [{"id":1, "value":3}, {"id":2, "value":4}], "conflict_stream_name": 3}}},"emitted_at":1623861660}} + +{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":1,"conflict_stream_scalar": 2},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":2,"conflict_stream_scalar": 2},"emitted_at":1623861660}} + +{"type":"RECORD","record":{"stream":"conflict_stream_array","data":{"id":1, "conflict_stream_array": {"conflict_stream_array": [{"id": 1}, {"id": 2}, {"id": 3}]}}, "emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"conflict_stream_array","data":{"id":2, "conflict_stream_array": {"conflict_stream_array": [{"id": 4}, {"id": 5}, {"id": 6}]}}, "emitted_at":1623861860}} + +{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":1,"conflict_stream_scalar": 2},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"conflict_stream_scalar","data":{"id":2,"conflict_stream_scalar": 2},"emitted_at":1623861660}} + +{"type":"RECORD","record":{"stream":"unnest_alias","data":{"id":1, "children": [{"ab_id": 1, "owner": {"owner_id": 1, "column`_'with\"_quotes": [ {"currency": "EUR" } ]}},{"ab_id": 2, "owner": {"owner_id": 2, "column`_'with\"_quotes": [ {"currency": "EUR" } ]}}]},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"unnest_alias","data":{"id":2, "children": [{"ab_id": 3, "owner": {"owner_id": 3, "column`_'with\"_quotes": [ {"currency": "EUR" } ]}},{"ab_id": 4, "owner": {"owner_id": 4, "column`_'with\"_quotes": [ {"currency": "EUR" } ]}}]},"emitted_at":1623861660}} + +{"type":"RECORD","record":{"stream":"some_stream_that_was_empty","data":{"id":1,"date": "2020-11-05"},"emitted_at":1623871660}} +{"type":"RECORD","record":{"stream":"some_stream_that_was_empty","data":{"id":2,"date": "2020-11-06"},"emitted_at":1623872660}} +{"type":"RECORD","record":{"stream":"some_stream_that_was_empty","data":{"id":3,"date": "2020-11-06"},"emitted_at":1623873660}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/replace_identifiers.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/replace_identifiers.json new file mode 100644 index 0000000000000..0c2197f2d759c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/data_input/replace_identifiers.json @@ -0,0 +1,130 @@ +{ + "bigquery": [ + { "double_array_data is not null": "array_length(double_array_data) > 0" }, + { "DATA is not null": "array_length(DATA) > 0" }, + { + "\\\"column`_'with\\\"\\\"_quotes\\\" is not null": "array_length(column___with__quotes) > 0" + } + ], + "oracle": [], + "postgres": [ + { + "nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data": "nested_stream_with_c__ion_double_array_data" + }, + { + "nested_stream_with_complex_columns_resulting_into_long_names_partition_data": "nested_stream_with_c___names_partition_data" + }, + { + "nested_stream_with_complex_columns_resulting_into_long_names_partition": "nested_stream_with_c___long_names_partition" + }, + { + "'nested_stream_with_complex_columns_resulting_into_long_names'": "'nested_stream_with_c__lting_into_long_names'" + }, + { + "'non_nested_stream_without_namespace_resulting_into_long_names'": "'non_nested_stream_wi__lting_into_long_names'" + }, + { + "expression: \"DATA is not null\"": "expression: \"\\\"DATA\\\" is not null\"" + } + ], + "snowflake": [ + { + "NESTED_STREAMS_FIRST_RUN_ROW_COUNTS": "nested_streams_first_run_row_counts" + }, + { + "NESTED_STREAMS_SECOND_RUN_ROW_COUNTS": "nested_streams_second_run_row_counts" + } + ], + "redshift": [], + "mysql": [ + { + "_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names": "_airbyte_raw_nested_s__lting_into_long_names" + }, + { + "nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data": "nested_stream_with_co__ion_double_array_data" + }, + { + "nested_stream_with_complex_columns_resulting_into_long_names_partition_data": "nested_stream_with_co___names_partition_data" + }, + { + "nested_stream_with_complex_columns_resulting_into_long_names_partition": "nested_stream_with_co___long_names_partition" + }, + { + "'nested_stream_with_complex_columns_resulting_into_long_names'": "'nested_stream_with_co__lting_into_long_names'" + }, + { + "non_nested_stream_without_namespace_resulting_into_long_names": "non_nested_stream_wit__lting_into_long_names" + }, + { + "double_array_data is not null": "coalesce(json_length(double_array_data), 0) > 0" + }, + { "DATA is not null": "coalesce(json_length(DATA), 0) > 0" }, + { + "\\\"column`_'with\\\"\\\"_quotes\\\" is not null": "coalesce(json_length(`column__'with\\\"_quotes`), 0) > 0" + } + ], + "mssql": [ + { + "nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data": "nested_stream_with_co__ion_double_array_data" + }, + { + "nested_stream_with_complex_columns_resulting_into_long_names_partition_data": "nested_stream_with_co___names_partition_data" + }, + { + "nested_stream_with_complex_columns_resulting_into_long_names_partition": "nested_stream_with_co___long_names_partition" + }, + { + "'nested_stream_with_complex_columns_resulting_into_long_names'": "'nested_stream_with_co__lting_into_long_names'" + }, + { + "non_nested_stream_without_namespace_resulting_into_long_names": "non_nested_stream_wit__lting_into_long_names" + } + ], + "tidb": [ + { + "_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names": "_airbyte_raw_nested_s__lting_into_long_names" + }, + { + "nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data": "nested_stream_with_co__ion_double_array_data" + }, + { + "nested_stream_with_complex_columns_resulting_into_long_names_partition_data": "nested_stream_with_co___names_partition_data" + }, + { + "nested_stream_with_complex_columns_resulting_into_long_names_partition": "nested_stream_with_co___long_names_partition" + }, + { + "'nested_stream_with_complex_columns_resulting_into_long_names'": "'nested_stream_with_co__lting_into_long_names'" + }, + { + "non_nested_stream_without_namespace_resulting_into_long_names": "non_nested_stream_wit__lting_into_long_names" + }, + { + "double_array_data is not null": "coalesce(json_length(double_array_data), 0) > 0" + }, + { "DATA is not null": "coalesce(json_length(DATA), 0) > 0" }, + { + "\\\"column`_'with\\\"\\\"_quotes\\\" is not null": "coalesce(json_length(`column__'with\\\"_quotes`), 0) > 0" + } + ], + "duckdb": [ + { + "_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names": "_airbyte_raw_nested_s__lting_into_long_names" + }, + { + "nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data": "nested_stream_with_co__ion_double_array_data" + }, + { + "nested_stream_with_complex_columns_resulting_into_long_names_partition_data": "nested_stream_with_co___names_partition_data" + }, + { + "nested_stream_with_complex_columns_resulting_into_long_names_partition": "nested_stream_with_co___long_names_partition" + }, + { + "'nested_stream_with_complex_columns_resulting_into_long_names'": "'nested_stream_with_co__lting_into_long_names'" + }, + { + "non_nested_stream_without_namespace_resulting_into_long_names": "non_nested_stream_wit__lting_into_long_names" + } + ] +} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_data_tests/test_check_first_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_data_tests/test_check_first_run_row_counts.sql new file mode 100644 index 0000000000000..4764acc1d39a2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_data_tests/test_check_first_run_row_counts.sql @@ -0,0 +1,2 @@ +select * from {{ ref('nested_streams_first_run_row_counts') }} +where row_count != expected_count diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_data_tests_incremental/test_check_second_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_data_tests_incremental/test_check_second_run_row_counts.sql new file mode 100644 index 0000000000000..169bb80895e6a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_data_tests_incremental/test_check_second_run_row_counts.sql @@ -0,0 +1,2 @@ +select * from {{ ref('nested_streams_second_run_row_counts') }} +where row_count != expected_count diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_data_tests_tmp/nested_streams_first_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_data_tests_tmp/nested_streams_first_run_row_counts.sql new file mode 100644 index 0000000000000..42c4d3c229846 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_data_tests_tmp/nested_streams_first_run_row_counts.sql @@ -0,0 +1,30 @@ +with table_row_counts as ( + select distinct '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names' as label, count(*) as row_count, 2 as expected_count + from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} +union all + select distinct 'nested_stream_with_complex_columns_resulting_into_long_names' as label, count(*) as row_count, 2 as expected_count + from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names') }} +union all + select distinct 'nested_stream_with_complex_columns_resulting_into_long_names_partition' as label, count(*) as row_count, 2 as expected_count + from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition') }} +union all + select 'nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA' as label, count(distinct currency) as row_count, 1 as expected_count + from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA') }} +-- union all +-- select count(distinct id) as row_count, 3 as expected_count +-- from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data') }} +union all + select 'some_stream_that_was_empty_scd' as label, count(*) as row_count, 0 as expected_count + from {{ ref('some_stream_that_was_empty_scd') }} +union all + select 'some_stream_that_was_empty' as label, count(*) as row_count, 0 as expected_count + from {{ ref('some_stream_that_was_empty') }} +union all + select 'arrays' as label, count(*) as row_count, 1 as expected_count + from {{ ref('arrays') }} +union all + select 'arrays_nested_array_parent' as label, count(*) as row_count, 1 as expected_count + from {{ ref('arrays_nested_array_parent') }} +) +select * +from table_row_counts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_data_tests_tmp_incremental/nested_streams_second_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_data_tests_tmp_incremental/nested_streams_second_run_row_counts.sql new file mode 100644 index 0000000000000..d2652ef2fd3bf --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_data_tests_tmp_incremental/nested_streams_second_run_row_counts.sql @@ -0,0 +1,21 @@ +with table_row_counts as ( + select distinct '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names' as label, count(*) as row_count, 3 as expected_count + from {{ source('test_normalization', '_airbyte_raw_nested_stream_with_complex_columns_resulting_into_long_names') }} +union all + select distinct 'nested_stream_with_complex_columns_resulting_into_long_names' as label, count(*) as row_count, 3 as expected_count + from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names') }} +union all + select 'nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA' as label, count(distinct currency) as row_count, 1 as expected_count + from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA') }} +-- union all +-- select count(distinct id) as row_count, 3 as expected_count +-- from {{ ref('nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data') }} +union all + select 'some_stream_that_was_empty_scd' as label, count(*) as row_count, 3 as expected_count + from {{ ref('some_stream_that_was_empty_scd') }} +union all + select 'some_stream_that_was_empty' as label, count(*) as row_count, 3 as expected_count + from {{ ref('some_stream_that_was_empty') }} +) +select * +from table_row_counts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_schema_tests/schema_test.yml b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_schema_tests/schema_test.yml new file mode 100644 index 0000000000000..2695a9e408bc2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_schema_tests/schema_test.yml @@ -0,0 +1,23 @@ +version: 2 + +models: + - name: nested_stream_with_complex_columns_resulting_into_long_names_partition + tests: + - dbt_utils.expression_is_true: + expression: "double_array_data is not null" + - dbt_utils.expression_is_true: + expression: "DATA is not null" + - name: nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA + columns: + - name: currency + tests: + - not_null +# - name: nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data +# columns: +# - name: id +# tests: + # - not_null # TODO Fix bug here + - name: unnest_alias_children_owner + tests: + - dbt_utils.expression_is_true: + expression: "\"column`_'with\"\"_quotes\" is not null" diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_schema_tests_incremental/schema_test.yml b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_schema_tests_incremental/schema_test.yml new file mode 100644 index 0000000000000..2695a9e408bc2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_nested_streams/dbt_test_config/dbt_schema_tests_incremental/schema_test.yml @@ -0,0 +1,23 @@ +version: 2 + +models: + - name: nested_stream_with_complex_columns_resulting_into_long_names_partition + tests: + - dbt_utils.expression_is_true: + expression: "double_array_data is not null" + - dbt_utils.expression_is_true: + expression: "DATA is not null" + - name: nested_stream_with_complex_columns_resulting_into_long_names_partition_DATA + columns: + - name: currency + tests: + - not_null +# - name: nested_stream_with_complex_columns_resulting_into_long_names_partition_double_array_data +# columns: +# - name: id +# tests: + # - not_null # TODO Fix bug here + - name: unnest_alias_children_owner + tests: + - dbt_utils.expression_is_true: + expression: "\"column`_'with\"\"_quotes\" is not null" diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog.json new file mode 100644 index 0000000000000..37d6c7d9a939c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog.json @@ -0,0 +1,46 @@ +{ + "streams": [ + { + "stream": { + "name": "stream_test_scd_drop", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": "integer" + }, + "date": { + "type": "string", + "format": "date" + }, + "timestamp_col": { + "type": "string", + "format": "date-time" + }, + "datetime_to_string": { + "type": "string", + "format": "date-time", + "airbyte_type": "timestamp_with_timezone" + }, + "string_to_dt": { + "type": "string" + }, + "number_to_int": { + "type": "number" + }, + "int_to_number": { + "type": "integer" + } + } + }, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "cursor_field": ["date"], + "destination_sync_mode": "append_dedup", + "primary_key": [["id"]] + } + ] +} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog_incremental.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog_incremental.json new file mode 100644 index 0000000000000..04b78b4b435f6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog_incremental.json @@ -0,0 +1,46 @@ +{ + "streams": [ + { + "stream": { + "name": "stream_test_scd_drop", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": "integer" + }, + "date": { + "type": "string", + "format": "date" + }, + "timestamp_col": { + "type": "string", + "format": "date-time" + }, + "datetime_to_string": { + "type": "string" + }, + "string_to_dt": { + "type": "string", + "format": "date-time", + "airbyte_type": "timestamp_with_timezone" + }, + "number_to_int": { + "type": "integer" + }, + "int_to_number": { + "type": "number" + } + } + }, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "cursor_field": ["date"], + "destination_sync_mode": "append_dedup", + "primary_key": [["id"]] + } + ] +} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog_reset.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog_reset.json new file mode 100644 index 0000000000000..9a76b76cda8b6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_catalog_reset.json @@ -0,0 +1,46 @@ +{ + "streams": [ + { + "stream": { + "name": "stream_test_scd_drop", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": "integer" + }, + "date": { + "type": "string", + "format": "date" + }, + "timestamp_col": { + "type": "string", + "format": "date-time" + }, + "datetime_to_string": { + "type": "string" + }, + "string_to_dt": { + "type": "string", + "format": "date-time", + "airbyte_type": "timestamp_with_timezone" + }, + "number_to_int": { + "type": "integer" + }, + "int_to_number": { + "type": "number" + } + } + }, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "cursor_field": ["date"], + "destination_sync_mode": "overwrite", + "primary_key": [["id"]] + } + ] +} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_messages.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_messages.txt new file mode 100644 index 0000000000000..e35685cb629a4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_drop_scd_messages.txt @@ -0,0 +1,5 @@ +{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637589000, "data": { "id": 1, "date": "2022-08-29", "timestamp_col": "2020-08-29T00:00:00.000000-0000", "datetime_to_string":"2022-10-01T01:04:04-04:00", "string_to_dt":"2022-11-01T02:03:04-07:00", "number_to_int": 1, "int_to_number": 10}}} +{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637689100, "data": { "id": 2, "date": "2022-08-30", "timestamp_col": "2020-08-30T00:00:00.000-00", "datetime_to_string":"2022-10-02T01:04:04-04:00", "string_to_dt":"2022-11-02T03:04:05-07:00", "number_to_int": 10, "int_to_number": 11}}} +{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637789200, "data": { "id": 3, "date": "2022-08-31", "timestamp_col": "2020-08-31T00:00:00+00", "datetime_to_string":"2022-10-03T01:04:04-04:00", "string_to_dt":"2022-11-03T03:04:06-07:00", "number_to_int": 11, "int_to_number": 12}}} +{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637889300, "data": { "id": 4, "date": "2022-09-01", "timestamp_col": "2020-08-31T00:00:00+0000", "datetime_to_string":"2022-10-04T01:04:04-04:00", "string_to_dt":"2022-11-04T03:04:07-07:00", "number_to_int": 111, "int_to_number": 133}}} +{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637989400, "data": { "id": 5, "date": "2022-09-02", "timestamp_col": "2020-09-01T00:00:00Z", "datetime_to_string":"2022-10-05T01:04:04-04:00", "string_to_dt":"2022-11-05T03:04:08-12:00", "number_to_int": 1010, "int_to_number": 1300}}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_scd_reset_messages_incremental.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_scd_reset_messages_incremental.txt new file mode 100644 index 0000000000000..492efbaea0aea --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/data_input/test_scd_reset_messages_incremental.txt @@ -0,0 +1,6 @@ +{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637589000, "data": { "id": 1, "date": "2022-08-29", "timestamp_col": "2020-08-29T00:00:00.000000-0000", "datetime_to_string":"2022-10-01T01:04:04-04:00", "string_to_dt":"2022-11-01T02:03:04-07:00", "number_to_int": 1, "int_to_number": 10}}} +{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637689100, "data": { "id": 2, "date": "2022-08-30", "timestamp_col": "2020-08-30T00:00:00.000-00", "datetime_to_string":"2022-10-02T01:04:04-04:00", "string_to_dt":"2022-11-02T03:04:05-07:00", "number_to_int": 10, "int_to_number": 11}}} +{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637789200, "data": { "id": 3, "date": "2022-08-31", "timestamp_col": "2020-08-31T00:00:00+00", "datetime_to_string":"2022-10-03T01:04:04-04:00", "string_to_dt":"2022-11-03T03:04:06-07:00", "number_to_int": 11, "int_to_number": 12}}} +{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637889300, "data": { "id": 4, "date": "2022-09-01", "timestamp_col": "2020-08-31T00:00:00+0000", "datetime_to_string":"2022-10-04T01:04:04-04:00", "string_to_dt":"2022-11-04T03:04:07-07:00", "number_to_int": 111, "int_to_number": 133}}} +{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637989400, "data": { "id": 5, "date": "2022-09-02", "timestamp_col": "2020-09-01T00:00:00Z", "datetime_to_string":"2022-10-05T01:04:04-04:00", "string_to_dt":"2022-11-05T03:04:08-12:00", "number_to_int": 1010, "int_to_number": 1300}}} +{"type": "RECORD", "record": {"stream": "stream_test_scd_drop", "emitted_at": 1602637989400, "data": { "id": 6, "date": "2022-09-03", "timestamp_col": "2020-09-01T00:00:00Z", "datetime_to_string":"this is a string, not a datetime value", "string_to_dt":"2022-11-05T03:04:08-12:00", "number_to_int": 1010, "int_to_number": 1300.25}}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/dbt_test_config/dbt_data_tests/test_check_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/dbt_test_config/dbt_data_tests/test_check_row_counts.sql new file mode 100644 index 0000000000000..5b8755db9ec63 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_reset_scd_overwrite/dbt_test_config/dbt_data_tests/test_check_row_counts.sql @@ -0,0 +1,2 @@ +select * from {{ ref('test_scd_drop_row_counts') }} +where row_count != expected_count diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/README.md b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/README.md new file mode 100644 index 0000000000000..87e59f2f33e84 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/README.md @@ -0,0 +1,18 @@ +# test_simple_streams + +## Exchange Rate + +This test suite is focusing on testing a simple stream (non-nested) of data similar to `source-exchangerates` using two different +`destination_sync_modes`: + +- `incremental` + `overwrite` with stream `exchange_rate` +- `incremental` + `append_dedup` with stream `dedup_exchange_rate` + +To do so, we've setup two streams in the catalog.json and are using the exact same record messages data in both. + +Note that we are also making sure that one of the column used as primary key is of type `float` as this could be +an edge case using it as partition key on certain destinations. + +# CDC + +We've also included some streams as if they were produced by a CDC source, especially to test how they would behave regarding dedup sync modes where deleted rows should be removed from deduplicated tables diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json new file mode 100644 index 0000000000000..584f7f98d3599 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json @@ -0,0 +1,292 @@ +{ + "streams": [ + { + "stream": { + "name": "exchange_rate", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": "integer" + }, + "currency": { + "type": "string" + }, + "date": { + "type": "string", + "format": "date" + }, + "timestamp_col": { + "type": "string", + "format": "date-time" + }, + "HKD@spéçiäl & characters": { + "type": "number" + }, + "HKD_special___characters": { + "type": "string" + }, + "NZD": { + "type": "number" + }, + "USD": { + "type": "number" + }, + "column`_'with\"_quotes": { + "type": "string" + }, + "datetime_tz": { + "type": "string", + "format": "date-time", + "airbyte_type": "timestamp_with_timezone" + }, + "datetime_no_tz": { + "type": "string", + "format": "date-time", + "airbyte_type": "timestamp_without_timezone" + }, + "time_tz": { + "type": "string", + "format": "time", + "airbyte_type": "time_with_timezone" + }, + "time_no_tz": { + "type": "string", + "format": "time", + "airbyte_type": "time_without_timezone" + } + } + }, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "cursor_field": [], + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "dedup_exchange_rate", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": "integer" + }, + "currency": { + "type": "string" + }, + "date": { + "type": "string", + "format": "date" + }, + "timestamp_col": { + "type": "string", + "format": "date-time" + }, + "HKD@spéçiäl & characters": { + "type": "number" + }, + "HKD_special___characters": { + "type": "string" + }, + "NZD": { + "type": "number" + }, + "USD": { + "type": "number" + } + } + }, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "cursor_field": ["date"], + "destination_sync_mode": "append_dedup", + "primary_key": [["id"], ["currency"], ["NZD"]] + }, + { + "stream": { + "name": "renamed_dedup_cdc_excluded", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": "integer" + }, + "_ab_cdc_updated_at": { + "type": ["null", "number"] + } + } + }, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["_ab_cdc_updated_at"] + }, + "sync_mode": "incremental", + "cursor_field": ["_ab_cdc_updated_at"], + "destination_sync_mode": "append_dedup", + "primary_key": [["id"]] + }, + { + "stream": { + "name": "dedup_cdc_excluded", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": "integer" + }, + "name": { + "type": ["string", "null"] + }, + "_ab_cdc_lsn": { + "type": ["null", "number"] + }, + "_ab_cdc_updated_at": { + "type": ["null", "number"] + }, + "_ab_cdc_deleted_at": { + "type": ["null", "number"] + } + } + }, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["_ab_cdc_lsn"] + }, + "sync_mode": "incremental", + "cursor_field": ["_ab_cdc_lsn"], + "destination_sync_mode": "append_dedup", + "primary_key": [["id"]] + }, + { + "stream": { + "name": "pos_dedup_cdcx", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": "integer" + }, + "name": { + "type": ["string", "null"] + }, + "_ab_cdc_lsn": { + "type": ["null", "number"] + }, + "_ab_cdc_updated_at": { + "type": ["null", "number"] + }, + "_ab_cdc_deleted_at": { + "type": ["null", "number"] + }, + "_ab_cdc_log_pos": { + "type": ["null", "number"] + } + } + }, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["_ab_cdc_lsn"] + }, + "sync_mode": "full_refresh", + "cursor_field": ["_ab_cdc_lsn"], + "destination_sync_mode": "append_dedup", + "primary_key": [["id"]] + }, + { + "stream": { + "name": "1_prefix_startwith_number", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": "integer" + }, + "date": { + "type": "string", + "format": "date" + }, + "text": { + "type": "string" + } + } + }, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "cursor_field": ["date"], + "destination_sync_mode": "append_dedup", + "primary_key": [["id"]] + }, + { + "stream": { + "name": "multiple_column_names_conflicts", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": "integer" + }, + "User Id": { + "type": ["string", "null"] + }, + "user_id": { + "type": ["null", "number"] + }, + "User id": { + "type": ["null", "number"] + }, + "user id": { + "type": ["null", "number"] + }, + "User@Id": { + "type": ["null", "string"] + }, + "UserId": { + "type": ["null", "number"] + } + } + }, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "full_refresh", + "cursor_field": [], + "destination_sync_mode": "append_dedup", + "primary_key": [["id"]] + }, + { + "stream": { + "name": "types_testing", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": "integer" + }, + "airbyte_integer_column": { + "type": "number", + "airbyte_type": "integer" + }, + "nullable_airbyte_integer_column": { + "type": ["null", "number"], + "airbyte_type": "integer" + } + } + } + }, + "sync_mode": "full_refresh", + "cursor_field": [], + "destination_sync_mode": "append_dedup", + "primary_key": [["id"]] + } + ] +} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog_schema_change.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog_schema_change.json new file mode 100644 index 0000000000000..1f334071c928a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog_schema_change.json @@ -0,0 +1,156 @@ +{ + "streams": [ + { + "stream": { + "name": "exchange_rate", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": "number" + }, + "currency": { + "type": "string" + }, + "new_column": { + "type": "number" + }, + "date": { + "type": "string", + "format": "date" + }, + "timestamp_col": { + "type": "string", + "format": "date-time" + }, + "HKD@spéçiäl & characters": { + "type": "number" + }, + "NZD": { + "type": "number" + }, + "USD": { + "type": "number" + }, + "column`_'with\"_quotes": { + "type": "string" + } + } + }, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "cursor_field": [], + "destination_sync_mode": "overwrite" + }, + { + "stream": { + "name": "dedup_exchange_rate", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": "number" + }, + "currency": { + "type": "string" + }, + "new_column": { + "type": "number" + }, + "date": { + "type": "string", + "format": "date" + }, + "timestamp_col": { + "type": "string", + "format": "date-time" + }, + "HKD@spéçiäl & characters": { + "type": "number" + }, + "NZD": { + "type": "number" + }, + "USD": { + "type": "integer" + } + } + }, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "cursor_field": ["date"], + "destination_sync_mode": "append_dedup", + "primary_key": [["id"], ["currency"], ["NZD"]] + }, + { + "stream": { + "name": "renamed_dedup_cdc_excluded", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": "integer" + }, + "name": { + "type": ["string", "null"] + }, + "_ab_cdc_lsn": { + "type": ["null", "number"] + }, + "_ab_cdc_updated_at": { + "type": ["null", "number"] + }, + "_ab_cdc_deleted_at": { + "type": ["null", "number"] + } + } + }, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["_ab_cdc_lsn"] + }, + "sync_mode": "incremental", + "cursor_field": ["_ab_cdc_lsn"], + "destination_sync_mode": "append_dedup", + "primary_key": [["id"]] + }, + { + "stream": { + "name": "dedup_cdc_excluded", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": "integer" + }, + "name": { + "type": ["string", "null"] + }, + "_ab_cdc_lsn": { + "type": ["null", "number"] + }, + "_ab_cdc_updated_at": { + "type": ["null", "number"] + }, + "_ab_cdc_deleted_at": { + "type": ["null", "number"] + } + } + }, + "supported_sync_modes": ["full_refresh", "incremental"], + "source_defined_cursor": true, + "default_cursor_field": ["_ab_cdc_lsn"] + }, + "sync_mode": "incremental", + "cursor_field": ["_ab_cdc_lsn"], + "destination_sync_mode": "append_dedup", + "primary_key": [["id"]] + } + ] +} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages.txt new file mode 100644 index 0000000000000..a2ec40e1974cc --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages.txt @@ -0,0 +1,72 @@ +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637589000, "data": { "id": 1, "currency": "USD", "date": "2020-08-29", "timestamp_col": "2020-08-29T00:00:00.000000-0000", "NZD": 1.14, "HKD@spéçiäl & characters": 2.13, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a" }}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637689100, "data": { "id": 1, "currency": "USD", "date": "2020-08-30", "timestamp_col": "2020-08-30T00:00:00.000-00", "NZD": 1.14, "HKD@spéçiäl & characters": 7.15, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637789200, "data": { "id": 2, "currency": "EUR", "date": "2020-08-31", "timestamp_col": "2020-08-31T00:00:00+00", "NZD": 3.89, "HKD@spéçiäl & characters": 7.12, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10.16}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637889300, "data": { "id": 2, "currency": "EUR", "date": "2020-08-31", "timestamp_col": "2020-08-31T00:00:00+0000", "NZD": 1.14, "HKD@spéçiäl & characters": 7.99, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10.99}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637989400, "data": { "id": 2, "currency": "EUR", "date": "2020-09-01", "timestamp_col": "2020-09-01T00:00:00Z", "NZD": 2.43, "HKD@spéçiäl & characters": 8, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10.16}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637990700, "data": { "id": 1, "currency": "USD", "date": "2020-09-01", "timestamp_col": "2020-09-01T00:00:00Z", "NZD": 1.14, "HKD@spéçiäl & characters": 10.5, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637990800, "data": { "id": 2, "currency": "EUR", "date": "2020-09-01", "timestamp_col": "2020-09-01T00:00:00Z", "NZD": 2.43, "HKD@spéçiäl & characters": 5.4, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637990800, "data": { "id": 2, "currency": "EUR", "date": "", "timestamp_col": "", "NZD": 2.43, "HKD@spéçiäl & characters": 5.4, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637990900, "data": { "id": 3, "currency": "GBP", "NZD": 3.14, "HKD@spéçiäl & characters": 9.2, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637991000, "data": { "id": 2, "currency": "EUR", "NZD": 3.89, "HKD@spéçiäl & characters": 7.02, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637991100, "data": { "id": 5, "currency": "USD", "NZD": 0.01, "HKD@spéçiäl & characters": 8.12, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637991200, "data": { "id": 5, "currency": "USD", "NZD": 0.01, "HKD@spéçiäl & characters": 9.23, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637991300, "data": { "id": 6, "currency": "USD", "NZD": 0.01, "HKD@spéçiäl & characters": 9.23, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "datetime_tz": "2022-01-14T01:04:04-04:00", "datetime_no_tz": "2022-01-14T01:04:04", "time_tz": "01:04:04-04:00", "time_no_tz": "01:04:04"}}} + +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637589000, "data": { "id": 1, "currency": "USD", "date": "2020-08-29", "timestamp_col": "2020-08-29T00:00:00.000000-0000", "NZD": 1.14, "HKD@spéçiäl & characters": 2.13, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a" }}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637689100, "data": { "id": 1, "currency": "USD", "date": "2020-08-30", "timestamp_col": "2020-08-30T00:00:00.000-00", "NZD": 1.14, "HKD@spéçiäl & characters": 7.15, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637789200, "data": { "id": 2, "currency": "EUR", "date": "2020-08-31", "timestamp_col": "2020-08-31T00:00:00+00", "NZD": 3.89, "HKD@spéçiäl & characters": 7.12, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10.16}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637889300, "data": { "id": 2, "currency": "EUR", "date": "2020-08-31", "timestamp_col": "2020-08-31T00:00:00+0000", "NZD": 1.14, "HKD@spéçiäl & characters": 7.99, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10.99}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637989400, "data": { "id": 2, "currency": "EUR", "date": "2020-09-01", "timestamp_col": "2020-09-01T00:00:00Z", "NZD": 2.43, "HKD@spéçiäl & characters": 8, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10.16}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637990700, "data": { "id": 1, "currency": "USD", "date": "2020-09-01", "timestamp_col": "2020-09-01T00:00:00Z", "NZD": 1.14, "HKD@spéçiäl & characters": 10.5, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637990800, "data": { "id": 2, "currency": "EUR", "date": "2020-09-01", "timestamp_col": "2020-09-01T00:00:00Z", "NZD": 2.43, "HKD@spéçiäl & characters": 5.4, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637990800, "data": { "id": 2, "currency": "EUR", "date": "", "timestamp_col": "", "NZD": 2.43, "HKD@spéçiäl & characters": 5.4, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637990900, "data": { "id": 3, "currency": "GBP", "NZD": 3.14, "HKD@spéçiäl & characters": 9.2, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637991000, "data": { "id": 2, "currency": "EUR", "NZD": 3.89, "HKD@spéçiäl & characters": 7.02, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637991100, "data": { "id": 5, "currency": "USD", "NZD": 0.01, "HKD@spéçiäl & characters": 8.12, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637991200, "data": { "id": 5, "currency": "USD", "NZD": 0.01, "HKD@spéçiäl & characters": 9.23, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} + +// Note that some of the IDs are inserted and then deleted; this should be reflected as a single row in the SCD model with _airbyte_active_row set to 0. +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":1,"name":"mazda","_ab_cdc_updated_at":1623849130530,"_ab_cdc_lsn":26971624,"_ab_cdc_deleted_at":null},"emitted_at":1623859926}} +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":2,"name":"toyata","_ab_cdc_updated_at":1623849130549,"_ab_cdc_lsn":26971624,"_ab_cdc_deleted_at":null},"emitted_at":1623859926}} +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":4,"name":"bmw","_ab_cdc_updated_at":1623849314535,"_ab_cdc_lsn":26974776,"_ab_cdc_deleted_at":null},"emitted_at":1623860160}} +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":5,"name":"vw","_ab_cdc_updated_at":1623849314663,"_ab_cdc_lsn":26975264,"_ab_cdc_deleted_at":null},"emitted_at":1623860160}} +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":4,"name":null,"_ab_cdc_updated_at":1623849314791,"_ab_cdc_lsn":26975440,"_ab_cdc_deleted_at":1623849314791},"emitted_at":1623860160}} +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":6,"name":"opel","_ab_cdc_updated_at":1623850868109,"_ab_cdc_lsn":27009440,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":7,"name":"lotus","_ab_cdc_updated_at":1623850868237,"_ab_cdc_lsn":27010048,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} +// messages_incremental.txt has a dedup_cdc_excluded record with emitted_at=1623860160, i.e. older than this record. If you delete/modify this record, make sure to maintain that relationship. +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":6,"name":null,"_ab_cdc_updated_at":1623850868371,"_ab_cdc_lsn":27010232,"_ab_cdc_deleted_at":1623850868371},"emitted_at":1623861660}} +// these messages have the same _ab_cdc_updated_at, but different _ab_cdc_lsn. They should each get an entry in the SCD model, and the final table should reflect the highest lsn's data. +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":8,"name":"foo1","_ab_cdc_updated_at":1623850900000,"_ab_cdc_lsn":27010232,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} +// for now - increment lsn by 100 because mysql/mssql/tidb round are truncating it at the hundreds digit +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":8,"name":"foo3","_ab_cdc_updated_at":1623850900000,"_ab_cdc_lsn":27010432,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":8,"name":"foo2","_ab_cdc_updated_at":1623850900000,"_ab_cdc_lsn":27010332,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} + +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":1,"name":"mazda","_ab_cdc_updated_at":1623849130530,"_ab_cdc_lsn":26971624,"_ab_cdc_log_pos": 33274,"_ab_cdc_deleted_at":null},"emitted_at":1623859926}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":2,"name":"toyata","_ab_cdc_updated_at":1623849130549,"_ab_cdc_lsn":26971624,"_ab_cdc_log_pos": 33275,"_ab_cdc_deleted_at":null},"emitted_at":1623859926}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":2,"name":"bmw","_ab_cdc_updated_at":1623849314535,"_ab_cdc_lsn":26974776,"_ab_cdc_log_pos": 33278,"_ab_cdc_deleted_at":null},"emitted_at":1623860160}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":3,"name":null,"_ab_cdc_updated_at":1623849314791,"_ab_cdc_lsn":26975440,"_ab_cdc_log_pos": 33274,"_ab_cdc_deleted_at":1623849314791},"emitted_at":1623860160}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":4,"name":"lotus","_ab_cdc_updated_at":1623850868237,"_ab_cdc_lsn":27010048,"_ab_cdc_log_pos": 33271,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":4,"name":null,"_ab_cdc_updated_at":1623850868371,"_ab_cdc_lsn":27010232,"_ab_cdc_log_pos": 33279,"_ab_cdc_deleted_at":1623850868371},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":5,"name":"lotus","_ab_cdc_updated_at":1623850868371,"_ab_cdc_lsn":27010048,"_ab_cdc_log_pos": 33280,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":5,"name":"lily","_ab_cdc_updated_at":1623850868371,"_ab_cdc_lsn":27010232,"_ab_cdc_log_pos": 33281,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} + +{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637589000, "data": { "id": 1, "date": "2020-08-29", "text": "hi 1"}}} +{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637689100, "data": { "id": 1, "date": "2020-08-30", "text": "hi 2"}}} +{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637789200, "data": { "id": 2, "date": "2020-08-31", "text": "hi 1"}}} +{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637889300, "data": { "id": 2, "date": "2020-08-31", "text": "hi 2"}}} +{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637989400, "data": { "id": 2, "date": "2020-09-01", "text": "hi 3"}}} +{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637990700, "data": { "id": 1, "date": "2020-09-01", "text": "hi 3"}}} +{"type": "RECORD", "record": {"stream": "1_prefix_startwith_number", "emitted_at": 1602637990800, "data": { "id": 2, "date": "2020-09-01", "text": "hi 4"}}} + +{"type":"RECORD","record":{"stream":"multiple_column_names_conflicts","data":{"id":1,"User Id":"chris","user_id":42,"User id":300,"user id": 102,"UserId":101},"emitted_at":1623959926}} + +// These records are verified in types_testing_incorrect_values.sql. If you add/remove entries, make sure to update that file as well. +// IMPORTANT: big_integer_column and nullable_big_integer_column were removed from catalog.json because of difficulties in implementing NUMERIC support. +// This is fine, because no major sources currently produce big_integer fields. +// After that functionality is completed, we should restore their entries to catalog.json. +// Verify max value for int64, and a 28-digit value for big_integer. (28 is larger than an int64 can handle, but still within bounds for a BigQuery NUMERIC column) +{"type":"RECORD","record":{"stream":"types_testing","data":{"id":1,"airbyte_integer_column":9223372036854775807,"nullable_airbyte_integer_column":9223372036854775807,"big_integer_column":"1234567890123456789012345678","nullable_big_integer_column":"1234567890123456789012345678"},"emitted_at":1623959926}} +// Verify max value for int64, and a negative 28-digit value for big_integer +{"type":"RECORD","record":{"stream":"types_testing","data":{"id":2,"airbyte_integer_column":-9223372036854775808,"nullable_airbyte_integer_column":-9223372036854775808,"big_integer_column":"-1234567890123456789012345678","nullable_big_integer_column":"-1234567890123456789012345678"},"emitted_at":1623959926}} +// Verify nullable values +{"type":"RECORD","record":{"stream":"types_testing","data":{"id":3,"airbyte_integer_column":0,"big_integer_column":0},"emitted_at":1623959926}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_incremental.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_incremental.txt new file mode 100644 index 0000000000000..1a703548c5b95 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_incremental.txt @@ -0,0 +1,38 @@ +// Some records are duplicated from messages.txt - this mimics our "at-least-once" delivery policy. + +// Other records "go back in time", i.e. are new data but have an older emitted_at timestamp than some of the those duplicated records. +// (I think?) This mimics an interruption to normalization, such that some records were normalized but others were not. + +// These first records are old data. +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637990800, "data": { "id": 2, "currency": "EUR", "date": "", "timestamp_col": "", "NZD": 2.43, "HKD@spéçiäl & characters": 5.4, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602637990900, "data": { "id": 3, "currency": "GBP", "NZD": 3.14, "HKD@spéçiäl & characters": 9.2, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +// These records are new data. +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602650000000, "data": { "id": 2, "currency": "EUR", "NZD": 3.89, "HKD@spéçiäl & characters": 14.05, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602650010000, "data": { "id": 4, "currency": "HKD", "NZD": 1.19, "HKD@spéçiäl & characters": 0.01, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602650011000, "data": { "id": 1, "currency": "USD", "date": "2020-10-14", "timestamp_col": "2020-10-14T00:00:00.000-00", "NZD": 1.14, "HKD@spéçiäl & characters": 9.5, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602650012000, "data": { "id": 5, "currency": "USD", "NZD": 0.01, "HKD@spéçiäl & characters": 6.39, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} + +// These first records are old data. +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637990800, "data": { "id": 2, "currency": "EUR", "date": "", "timestamp_col": "", "NZD": 2.43, "HKD@spéçiäl & characters": 5.4, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602637990900, "data": { "id": 3, "currency": "GBP", "NZD": 3.14, "HKD@spéçiäl & characters": 9.2, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +// These records are new data. +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602650000000, "data": { "id": 2, "currency": "EUR", "NZD": 3.89, "HKD@spéçiäl & characters": 14.05, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602650010000, "data": { "id": 4, "currency": "HKD", "NZD": 1.19, "HKD@spéçiäl & characters": 0.01, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602650011000, "data": { "id": 1, "currency": "USD", "date": "2020-10-14", "timestamp_col": "2020-10-14T00:00:00.000-00", "NZD": 1.14, "HKD@spéçiäl & characters": 9.5, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602650012000, "data": { "id": 5, "currency": "USD", "NZD": 0.01, "HKD@spéçiäl & characters": 6.39, "HKD_special___characters": "column name collision?", "column`_'with\"_quotes":"ma\"z`d'a"}}} + +// All of these records are new data. +// This record has an _older_ emitted_at than the latest dedup_cdc_excluded record in messages.txt +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":5,"name":"vw","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1623849314663,"_ab_cdc_lsn":26975264,"_ab_cdc_deleted_at":null},"emitted_at":1623860160}} +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":5,"name":null,"column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1623900000000,"_ab_cdc_lsn":28010252,"_ab_cdc_deleted_at":1623900000000},"emitted_at":1623900000000}} +// Previously we had a bug where we only respected deletions from the most recent _airbyte_emitted_at. This message tests that ID 5 is still correctly deleted (i.e. marked with _airbyte_active_row = 0). +// This record is also deleted in messages_schema_change.txt. +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":8,"name":"ford","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1624000000000,"_ab_cdc_lsn":29010252,"_ab_cdc_deleted_at":null},"emitted_at":1624000000000}} + +// All of these records are old data. +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":1,"name":"mazda","_ab_cdc_updated_at":1623849130530,"_ab_cdc_lsn":26971624,"_ab_cdc_log_pos": 33274,"_ab_cdc_deleted_at":null},"emitted_at":1623859926}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":2,"name":"toyata","_ab_cdc_updated_at":1623849130549,"_ab_cdc_lsn":26971624,"_ab_cdc_log_pos": 33275,"_ab_cdc_deleted_at":null},"emitted_at":1623859926}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":2,"name":"bmw","_ab_cdc_updated_at":1623849314535,"_ab_cdc_lsn":26974776,"_ab_cdc_log_pos": 33278,"_ab_cdc_deleted_at":null},"emitted_at":1623860160}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":3,"name":null,"_ab_cdc_updated_at":1623849314791,"_ab_cdc_lsn":26975440,"_ab_cdc_log_pos": 33274,"_ab_cdc_deleted_at":1623849314791},"emitted_at":1623860160}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":4,"name":"lotus","_ab_cdc_updated_at":1623850868237,"_ab_cdc_lsn":27010048,"_ab_cdc_log_pos": 33271,"_ab_cdc_deleted_at":null},"emitted_at":1623861660}} +{"type":"RECORD","record":{"stream":"pos_dedup_cdcx","data":{"id":4,"name":null,"_ab_cdc_updated_at":1623850868371,"_ab_cdc_lsn":27010232,"_ab_cdc_log_pos": 33279,"_ab_cdc_deleted_at":1623850868371},"emitted_at":1623861660}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_schema_change.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_schema_change.txt new file mode 100644 index 0000000000000..4aeca6dbc2073 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/messages_schema_change.txt @@ -0,0 +1,16 @@ +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602661281900, "data": { "id": 3.14, "currency": "EUR", "new_column": 2.1, "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": 2.43, "HKD@spéçiäl & characters": 2.12, "column`_'with\"_quotes":"ma\"z`d'a", "USD": 7}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602661291900, "data": { "id": 0.12, "currency": "GBP", "new_column": 3.81, "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": 3.14, "HKD@spéçiäl & characters": 3.01, "column`_'with\"_quotes":"ma\"z`d'a", "USD": 11}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602661381900, "data": { "id": 4.22, "currency": "EUR", "new_column": 89.1, "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": 3.89, "HKD@spéçiäl & characters": 8.88, "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10}}} +{"type": "RECORD", "record": {"stream": "exchange_rate", "emitted_at": 1602661481900, "data": { "id": 1, "currency": "HKD", "new_column": 91.11, "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": 1.19, "HKD@spéçiäl & characters": 99.1, "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10}}} + +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602661281900, "data": { "id": 3.14, "currency": "EUR", "new_column": 2.1, "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": 2.43, "HKD@spéçiäl & characters": 2.12, "column`_'with\"_quotes":"ma\"z`d'a", "USD": 7}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602661291900, "data": { "id": 0.12, "currency": "GBP", "new_column": 3.81, "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": 3.14, "HKD@spéçiäl & characters": 3.01, "column`_'with\"_quotes":"ma\"z`d'a", "USD": 11}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602661381900, "data": { "id": 4.22, "currency": "EUR", "new_column": 89.1, "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": 3.89, "HKD@spéçiäl & characters": 8.88, "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10}}} +{"type": "RECORD", "record": {"stream": "dedup_exchange_rate", "emitted_at": 1602661481900, "data": { "id": 1, "currency": "HKD", "new_column": 91.11, "date": "2020-11-01", "timestamp_col": "2020-11-01T00:00:00Z", "NZD": 1.19, "HKD@spéçiäl & characters": 99.1, "column`_'with\"_quotes":"ma\"z`d'a", "USD": 10}}} + +{"type":"RECORD","record":{"stream":"renamed_dedup_cdc_excluded","data":{"id":8,"name":"vw","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1623949314663,"_ab_cdc_lsn":26985264,"_ab_cdc_deleted_at":null},"emitted_at":1623960160}} +{"type":"RECORD","record":{"stream":"renamed_dedup_cdc_excluded","data":{"id":9,"name":"opel","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1623950868109,"_ab_cdc_lsn":28009440,"_ab_cdc_deleted_at":null},"emitted_at":1623961660}} +{"type":"RECORD","record":{"stream":"renamed_dedup_cdc_excluded","data":{"id":9,"name":null,"column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1623950868371,"_ab_cdc_lsn":28010232,"_ab_cdc_deleted_at":1623950868371},"emitted_at":1623961660}} + +// This message tests the ability to delete a record which was inserted in a previous sync. See messages_incremental.txt for how it was inserted. +{"type":"RECORD","record":{"stream":"dedup_cdc_excluded","data":{"id":8,"name":"ford","column`_'with\"_quotes":"ma\"z`d'a","_ab_cdc_updated_at":1625000000000,"_ab_cdc_lsn":29020252,"_ab_cdc_deleted_at":1625000000000},"emitted_at":1625000000000}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/replace_identifiers.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/replace_identifiers.json new file mode 100644 index 0000000000000..4d65ce5e88e9d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/replace_identifiers.json @@ -0,0 +1,60 @@ +{ + "bigquery": [ + { "HKD_special___characters": "HKD_special___characters_1" }, + { "'\"HKD@spéçiäl & characters\"'": "HKD_special___characters" }, + { + "\\\"column`_'with\\\"\\\"_quotes\\\" is not null": "column___with__quotes is not null" + } + ], + "oracle": [ + { "HKD_special___characters": "HKD_special___characters_1" }, + { "'\"HKD@spéçiäl & characters\"'": "HKD_special___characters" }, + { "HKD@spéçiäl & characters": "hkd_special___characters" }, + { "\"hkd_special___characters\"": "hkd_special___characters" }, + { "- date": "- '\"DATE\"'" }, + { "_airbyte_raw_": "airbyte_raw_" }, + { + "\\\"column`_'with\\\"\\\"_quotes\\\" is not null": "column___with__quotes is not null" + } + ], + "postgres": [], + "snowflake": [ + { "HKD@SPÉÇIÄL & CHARACTERS": "HKD@spéçiäl & characters" }, + { + "SIMPLE_STREAMS_FIRST_RUN_ROW_COUNTS": "simple_streams_first_run_row_counts" + }, + { + "SIMPLE_STREAMS_SECOND_RUN_ROW_COUNTS": "simple_streams_second_run_row_counts" + }, + { + "TYPES_TESTING_INCORRECT_VALUES": "types_testing_incorrect_values" + }, + { + "DEDUP_CDC_EXCLUDED_FIRST_RUN_INCORRECT_NAMES": "dedup_cdc_excluded_first_run_incorrect_names" + }, + { + "DEDUP_CDC_EXCLUDED_SECOND_RUN_INCORRECT_NAMES": "dedup_cdc_excluded_second_run_incorrect_names" + }, + { + "DEDUP_CDC_EXCLUDED_THIRD_RUN_INCORRECT_NAMES": "dedup_cdc_excluded_third_run_incorrect_names" + } + ], + "redshift": [], + "mysql": [ + { "- HKD_special___characters": "- '\"HKD_special___characters\"'" }, + { "!= HKD_special___characters": "!= \"HKD_special___characters\"" }, + { + "\\\"column`_'with\\\"\\\"_quotes\\\" is not null": "`column__'with\\\"_quotes` is not null" + } + ], + "mssql": [ + { "- HKD_special___characters": "- '\"HKD_special___characters\"'" }, + { "!= HKD_special___characters": "!= \"HKD_special___characters\"" } + ], + "clickhouse": [ + { "'\"HKD@spéçiäl & characters\"'": "HKD_special___characters" }, + { + "\\\"column`_'with\\\"\\\"_quotes\\\" is not null": "column___with__quotes is not null" + } + ] +} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests/test_check_first_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests/test_check_first_run_row_counts.sql new file mode 100644 index 0000000000000..afbdc6ac5b303 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests/test_check_first_run_row_counts.sql @@ -0,0 +1,2 @@ +select * from {{ ref('simple_streams_first_run_row_counts') }} +where row_count != expected_count diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests/test_dedup_cdc_excluded_first_run_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests/test_dedup_cdc_excluded_first_run_names.sql new file mode 100644 index 0000000000000..2a24121d2c422 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests/test_dedup_cdc_excluded_first_run_names.sql @@ -0,0 +1 @@ +select * from {{ ref('dedup_cdc_excluded_first_run_incorrect_names') }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests/test_types_testing_values.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests/test_types_testing_values.sql new file mode 100644 index 0000000000000..41eff66fa3135 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests/test_types_testing_values.sql @@ -0,0 +1 @@ +select * from {{ ref('types_testing_incorrect_values') }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_incremental/test_check_second_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_incremental/test_check_second_run_row_counts.sql new file mode 100644 index 0000000000000..99e98a10a781c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_incremental/test_check_second_run_row_counts.sql @@ -0,0 +1,2 @@ +select * from {{ ref('simple_streams_second_run_row_counts') }} +where row_count != expected_count diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_incremental/test_dedup_cdc_excluded_second_run_incorrect_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_incremental/test_dedup_cdc_excluded_second_run_incorrect_names.sql new file mode 100644 index 0000000000000..adf1a31fc5eed --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_incremental/test_dedup_cdc_excluded_second_run_incorrect_names.sql @@ -0,0 +1 @@ +select * from {{ ref('dedup_cdc_excluded_second_run_incorrect_names') }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_schema_change/test_check_third_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_schema_change/test_check_third_run_row_counts.sql new file mode 100644 index 0000000000000..5979aa28cea48 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_schema_change/test_check_third_run_row_counts.sql @@ -0,0 +1,2 @@ +select * from {{ ref('simple_streams_third_run_row_counts') }} +where row_count != expected_count diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_schema_change/test_dedup_cdc_excluded_third_run_incorrect_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_schema_change/test_dedup_cdc_excluded_third_run_incorrect_names.sql new file mode 100644 index 0000000000000..b5d359fd6ac2d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_schema_change/test_dedup_cdc_excluded_third_run_incorrect_names.sql @@ -0,0 +1 @@ +select * from {{ ref('dedup_cdc_excluded_third_run_incorrect_names') }} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp/dedup_cdc_excluded_first_run_incorrect_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp/dedup_cdc_excluded_first_run_incorrect_names.sql new file mode 100644 index 0000000000000..22df0b07645b9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp/dedup_cdc_excluded_first_run_incorrect_names.sql @@ -0,0 +1,14 @@ +select * from {{ ref('dedup_cdc_excluded') }} where +( + id = 1 and name != 'mazda' +) or ( + id = 2 and name != 'toyata' +) or ( + id = 5 and name != 'vw' +) or ( + id = 7 and name != 'lotus' +) or ( + id = 8 and name != 'foo3' +) or ( + id not in (1, 2, 5, 7, 8) +) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp/simple_streams_first_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp/simple_streams_first_run_row_counts.sql new file mode 100644 index 0000000000000..33cc2898bf2b2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp/simple_streams_first_run_row_counts.sql @@ -0,0 +1,46 @@ +with table_row_counts as ( + select distinct '_airbyte_raw_exchange_rate' as label, count(*) as row_count, 13 as expected_count + from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} +union all + select distinct 'exchange_rate' as label, count(*) as row_count, 13 as expected_count + from {{ ref('exchange_rate') }} + +union all + + select distinct '_airbyte_raw_dedup_exchange_rate' as label, count(*) as row_count, 12 as expected_count + from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +union all + select distinct 'dedup_exchange_rate_scd' as label, count(*) as row_count, 12 as expected_count + from {{ ref('dedup_exchange_rate_scd') }} +union all + select distinct 'dedup_exchange_rate' as label, count(*) as row_count, 6 as expected_count + from {{ ref('dedup_exchange_rate') }} + +union all + + select distinct '_airbyte_raw_dedup_cdc_excluded' as label, count(*) as row_count, 11 as expected_count + from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} +union all + select distinct 'dedup_cdc_excluded_scd' as label, count(*) as row_count, 11 as expected_count + from {{ ref('dedup_cdc_excluded_scd') }} +union all + select distinct 'dedup_cdc_excluded' as label, count(*) as row_count, 5 as expected_count + from {{ ref('dedup_cdc_excluded') }} + +union all + + select distinct '_airbyte_raw_pos_dedup_cdcx' as label, count(*) as row_count, 8 as expected_count + from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} +union all + select distinct 'pos_dedup_cdcx_scd' as label, count(*) as row_count, 8 as expected_count + from {{ ref('pos_dedup_cdcx_scd') }} +union all + select distinct 'pos_dedup_cdcx' as label, count(*) as row_count, 3 as expected_count + from {{ ref('pos_dedup_cdcx') }} + +union all + select distinct 'types_testing' as label, count(*) as row_count, 3 as expected_count + from {{ ref('types_testing') }} +) +select * +from table_row_counts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp/types_testing_incorrect_values.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp/types_testing_incorrect_values.sql new file mode 100644 index 0000000000000..9a382eda267c8 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp/types_testing_incorrect_values.sql @@ -0,0 +1,34 @@ +-- Note that we cast columns_column to string to avoid any weird numeric equality nonsense. +-- For example, in Postgres, this query returns `true`, even though the two numbers are different: (9223372036854775807 is the max value of a signed 64-bit int) +-- select (9223372036854775807 :: double precision) = (9223372036854775806 :: double precision) +-- Because a double has only 15 decimals of precision, so both values are rounded off to 9.223372036854776e+18 + +select * from {{ ref('types_testing') }} where +( + id = 1 and ( + cast(airbyte_integer_column as {{ dbt_utils.type_string() }}) != '9223372036854775807' + or cast(nullable_airbyte_integer_column as {{ dbt_utils.type_string() }}) != '9223372036854775807' + {# + or cast(big_integer_column as {{ dbt_utils.type_string() }}) != '1234567890123456789012345678' + or cast(nullable_big_integer_column as {{ dbt_utils.type_string() }}) != '1234567890123456789012345678' + #} + ) +) or ( + id = 2 and ( + cast(airbyte_integer_column as {{ dbt_utils.type_string() }}) != '-9223372036854775808' + or cast(nullable_airbyte_integer_column as {{ dbt_utils.type_string() }}) != '-9223372036854775808' + {# + or cast(big_integer_column as {{ dbt_utils.type_string() }}) != '-1234567890123456789012345678' + or cast(nullable_big_integer_column as {{ dbt_utils.type_string() }}) != '-1234567890123456789012345678' + #} + ) +) or ( + id = 3 and ( + cast(airbyte_integer_column as {{ dbt_utils.type_string() }}) != '0' + or nullable_airbyte_integer_column is not null + {# + or cast(big_integer_column as {{ dbt_utils.type_string() }}) != '0' + or nullable_big_integer_column is not null + #} + ) +) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_incremental/dedup_cdc_excluded_second_run_incorrect_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_incremental/dedup_cdc_excluded_second_run_incorrect_names.sql new file mode 100644 index 0000000000000..6bff1b073fd1e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_incremental/dedup_cdc_excluded_second_run_incorrect_names.sql @@ -0,0 +1,13 @@ +select * from {{ ref('dedup_cdc_excluded') }} where +( + id = 1 and name != 'mazda' +) or ( + id = 2 and name != 'toyata' +) or ( + id = 7 and name != 'lotus' +) or ( + id = 8 and name != 'ford' +) or ( + id not in (1, 2, 7, 8) +) + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_incremental/simple_streams_second_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_incremental/simple_streams_second_run_row_counts.sql new file mode 100644 index 0000000000000..405337845bea7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_incremental/simple_streams_second_run_row_counts.sql @@ -0,0 +1,42 @@ +with table_row_counts as ( + select distinct '_airbyte_raw_exchange_rate' as label, count(*) as row_count, 6 as expected_count + from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} +union all + select distinct 'exchange_rate' as label, count(*) as row_count, 6 as expected_count + from {{ ref('exchange_rate') }} + +union all + + select distinct '_airbyte_raw_dedup_exchange_rate' as label, count(*) as row_count, 6 as expected_count + from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +union all + select distinct 'dedup_exchange_rate_scd' as label, count(*) as row_count, 16 as expected_count + from {{ ref('dedup_exchange_rate_scd') }} +union all + select distinct 'dedup_exchange_rate' as label, count(*) as row_count, 7 as expected_count + from {{ ref('dedup_exchange_rate') }} + +union all + + select distinct '_airbyte_raw_dedup_cdc_excluded' as label, count(*) as row_count, 3 as expected_count + from {{ source('test_normalization', '_airbyte_raw_dedup_cdc_excluded') }} +union all + select distinct 'dedup_cdc_excluded_scd' as label, count(*) as row_count, 13 as expected_count + from {{ ref('dedup_cdc_excluded_scd') }} +union all + select distinct 'dedup_cdc_excluded' as label, count(*) as row_count, 4 as expected_count + from {{ ref('dedup_cdc_excluded') }} + +union all + + select distinct '_airbyte_raw_pos_dedup_cdcx' as label, count(*) as row_count, 6 as expected_count + from {{ source('test_normalization', '_airbyte_raw_pos_dedup_cdcx') }} +union all + select distinct 'pos_dedup_cdcx_scd' as label, count(*) as row_count, 8 as expected_count + from {{ ref('pos_dedup_cdcx_scd') }} +union all + select distinct 'pos_dedup_cdcx' as label, count(*) as row_count, 3 as expected_count + from {{ ref('pos_dedup_cdcx') }} +) +select * +from table_row_counts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_schema_change/dedup_cdc_excluded_third_run_incorrect_names.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_schema_change/dedup_cdc_excluded_third_run_incorrect_names.sql new file mode 100644 index 0000000000000..859af36e7299e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_schema_change/dedup_cdc_excluded_third_run_incorrect_names.sql @@ -0,0 +1,11 @@ +select * from {{ ref('dedup_cdc_excluded') }} where +( + id = 1 and name != 'mazda' +) or ( + id = 2 and name != 'toyata' +) or ( + id = 7 and name != 'lotus' +) or ( + id not in (1, 2, 7) +) + diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_schema_change/simple_streams_third_run_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_schema_change/simple_streams_third_run_row_counts.sql new file mode 100644 index 0000000000000..775a055ae6dfa --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_data_tests_tmp_schema_change/simple_streams_third_run_row_counts.sql @@ -0,0 +1,31 @@ +with table_row_counts as ( + select distinct '_airbyte_raw_exchange_rate' as label, count(*) as row_count, 4 as expected_count + from {{ source('test_normalization', '_airbyte_raw_exchange_rate') }} +union all + select distinct 'exchange_rate' as label, count(*) as row_count, 4 as expected_count + from {{ ref('exchange_rate') }} + +union all + + select distinct '_airbyte_raw_dedup_exchange_rate' as label, count(*) as row_count, 10 as expected_count + from {{ source('test_normalization', '_airbyte_raw_dedup_exchange_rate') }} +union all + select distinct 'dedup_exchange_rate_scd' as label, count(*) as row_count, 20 as expected_count + from {{ ref('dedup_exchange_rate_scd') }} +union all + select distinct 'dedup_exchange_rate' as label, count(*) as row_count, 11 as expected_count + from {{ ref('dedup_exchange_rate') }} + +union all + + select distinct '_airbyte_raw_dedup_cdc_excluded' as label, count(*) as row_count, 4 as expected_count + from test_normalization._airbyte_raw_dedup_cdc_excluded +union all + select distinct 'dedup_cdc_excluded_scd' as label, count(*) as row_count, 14 as expected_count + from test_normalization.dedup_cdc_excluded_scd +union all + select distinct 'dedup_cdc_excluded' as label, count(*) as row_count, 3 as expected_count + from test_normalization.dedup_cdc_excluded +) +select * +from table_row_counts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_schema_tests/schema_test.yml b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_schema_tests/schema_test.yml new file mode 100644 index 0000000000000..fe6b3ef9a85a9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_schema_tests/schema_test.yml @@ -0,0 +1,44 @@ +version: 2 + +models: + - name: exchange_rate + tests: + - dbt_utils.expression_is_true: + # description: check no column collisions + # Two columns having similar names especially after removing special characters should remain distincts + expression: cast("HKD@spéçiäl & characters" as {{ dbt_utils.type_string() }}) != HKD_special___characters + - dbt_utils.expression_is_true: + expression: "\"column`_'with\"\"_quotes\" is not null" + columns: + - name: '"HKD@spéçiäl & characters"' + # description: check special charactesrs + # Use special characters in column names and make sure they are correctly parsed in the JSON blob and populated + tests: + - not_null + + - name: dedup_exchange_rate + tests: + - dbt_utils.unique_combination_of_columns: + # description: check_deduplication_by_primary_key + # The final table for this stream should have unique composite primary key values. + combination_of_columns: + - id + - currency + - NZD + + - name: dedup_cdc_excluded +# TODO: create/fix github issue in dbt-core/adapters repository to handle schema changes (outside airbyte's control) +# Disabling because incremental dbt is not handling quotes well atm (dbt 0.21.0) +# tests: +# - dbt_utils.expression_is_true: +# expression: "\"column`_'with\"\"_quotes\" is not null" + columns: + - name: name + tests: + - not_null + + - name: pos_dedup_cdcx + columns: + - name: name + tests: + - not_null diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_schema_tests_incremental/schema_test.yml b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_schema_tests_incremental/schema_test.yml new file mode 100644 index 0000000000000..fe6b3ef9a85a9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_schema_tests_incremental/schema_test.yml @@ -0,0 +1,44 @@ +version: 2 + +models: + - name: exchange_rate + tests: + - dbt_utils.expression_is_true: + # description: check no column collisions + # Two columns having similar names especially after removing special characters should remain distincts + expression: cast("HKD@spéçiäl & characters" as {{ dbt_utils.type_string() }}) != HKD_special___characters + - dbt_utils.expression_is_true: + expression: "\"column`_'with\"\"_quotes\" is not null" + columns: + - name: '"HKD@spéçiäl & characters"' + # description: check special charactesrs + # Use special characters in column names and make sure they are correctly parsed in the JSON blob and populated + tests: + - not_null + + - name: dedup_exchange_rate + tests: + - dbt_utils.unique_combination_of_columns: + # description: check_deduplication_by_primary_key + # The final table for this stream should have unique composite primary key values. + combination_of_columns: + - id + - currency + - NZD + + - name: dedup_cdc_excluded +# TODO: create/fix github issue in dbt-core/adapters repository to handle schema changes (outside airbyte's control) +# Disabling because incremental dbt is not handling quotes well atm (dbt 0.21.0) +# tests: +# - dbt_utils.expression_is_true: +# expression: "\"column`_'with\"\"_quotes\" is not null" + columns: + - name: name + tests: + - not_null + + - name: pos_dedup_cdcx + columns: + - name: name + tests: + - not_null diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_schema_tests_schema_change/schema_test.yml b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_schema_tests_schema_change/schema_test.yml new file mode 100644 index 0000000000000..485af162c4fb6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/dbt_test_config/dbt_schema_tests_schema_change/schema_test.yml @@ -0,0 +1,34 @@ +version: 2 + +models: + - name: exchange_rate + tests: + - dbt_utils.expression_is_true: + expression: "\"column`_'with\"\"_quotes\" is not null" + columns: + - name: '"HKD@spéçiäl & characters"' + # description: check special charactesrs + # Use special characters in column names and make sure they are correctly parsed in the JSON blob and populated + tests: + - not_null + + - name: dedup_exchange_rate + tests: + - dbt_utils.unique_combination_of_columns: + # description: check_deduplication_by_primary_key + # The final table for this stream should have unique composite primary key values. + combination_of_columns: + - id + - currency + - NZD + + - name: renamed_dedup_cdc_excluded +# TODO: create/fix github issue in dbt-core/adapters repository to handle schema changes (outside airbyte's control) +# Disabling because incremental dbt is not handling quotes well atm (dbt 0.21.0) +# tests: +# - dbt_utils.expression_is_true: +# expression: "\"column`_'with\"\"_quotes\" is not null" + columns: + - name: name + tests: + - not_null diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/catalog.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/catalog.json new file mode 100644 index 0000000000000..7f9ff3d2901c4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/catalog.json @@ -0,0 +1,114 @@ +{ + "streams": [ + { + "stream": { + "name": "sparse_nested_stream", + "json_schema": { + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "updated_at": { + "type": "integer" + }, + "obj_nest1": { + "type": "object", + "properties": { + "obj_nest2": { + "type": "object", + "properties": { + "foo": { + "type": "string" + } + } + } + } + }, + "arr_nest1": { + "type": "array", + "items": { + "type": "object", + "properties": { + "arr_nest2": { + "type": "array", + "items": { + "type": "object", + "properties": { + "foo": { + "type": "string" + } + } + } + } + } + } + } + } + }, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "cursor_field": ["updated_at"], + "destination_sync_mode": "append_dedup", + "primary_key": [["id"]] + }, + { + "stream": { + "name": "sparse_nested_stream_empty", + "json_schema": { + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "updated_at": { + "type": "integer" + }, + "obj_nest1": { + "type": "object", + "properties": { + "obj_nest2": { + "type": "object", + "properties": { + "foo": { + "type": "string" + } + } + } + } + }, + "arr_nest1": { + "type": "array", + "items": { + "type": "object", + "properties": { + "arr_nest2": { + "type": "array", + "items": { + "type": "object", + "properties": { + "foo": { + "type": "string" + } + } + } + } + } + } + } + } + }, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "cursor_field": ["updated_at"], + "destination_sync_mode": "append_dedup", + "primary_key": [["id"]] + } + ] +} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/messages.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/messages.txt new file mode 100644 index 0000000000000..de70d49e4b089 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/messages.txt @@ -0,0 +1,6 @@ +// This message is to verify that we don't encounter absurd duplication in deeply-nested fields with sparse data. +// We'll emit a message in the first sync with nested fields, but future syncs won't have the nested fields. +{"type": "RECORD", "record": {"stream": "sparse_nested_stream", "data": {"id": 1, "updated_at": 100, "obj_nest1": {"obj_nest2": {"foo": "bar"}}, "arr_nest1": [{"arr_nest2": [{"foo": "bar1"}, {"foo": "bar2"}]}, {"arr_nest2": [{"foo": "baz1"}, {"foo": "baz2"}]}]}, "emitted_at": 1672567200}} + +// This message is to verify our behavior in the case where the stream doesn't see any new data after the first sync. +{"type": "RECORD", "record": {"stream": "sparse_nested_stream_empty", "data": {"id": 1, "updated_at": 100, "obj_nest1": {"obj_nest2": {"foo": "bar"}}, "arr_nest1": [{"arr_nest2": [{"foo": "bar1"}, {"foo": "bar2"}]}, {"arr_nest2": [{"foo": "baz1"}, {"foo": "baz2"}]}]}, "emitted_at": 1672567200}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/messages2.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/messages2.txt new file mode 100644 index 0000000000000..6f2ee29261c17 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/messages2.txt @@ -0,0 +1 @@ +{"type": "RECORD", "record": {"stream": "sparse_nested_stream", "data": {"id": 2, "updated_at": 101}, "emitted_at": 1672568200}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/messages3.txt b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/messages3.txt new file mode 100644 index 0000000000000..7153c09b864ba --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/messages3.txt @@ -0,0 +1 @@ +{"type": "RECORD", "record": {"stream": "sparse_nested_stream", "data": {"id": 3, "updated_at": 102}, "emitted_at": 1672569200}} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/replace_identifiers.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/replace_identifiers.json new file mode 100644 index 0000000000000..18d5eb0f40dfd --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/data_input/replace_identifiers.json @@ -0,0 +1,54 @@ +{ + "bigquery": [ + { + "sparse_nested_stream__y_obj_nest1_obj_nest2": "sparse_nested_stream_empty_obj_nest1_obj_nest2" + }, + { + "sparse_nested_stream__y_arr_nest1_arr_nest2": "sparse_nested_stream_empty_arr_nest1_arr_nest2" + } + ], + "oracle": [], + "postgres": [], + "snowflake": [ + { + "sparse_nested_stream__y_obj_nest1_obj_nest2": "SPARSE_NESTED_STREAM_EMPTY_OBJ_NEST1_OBJ_NEST2" + }, + { + "sparse_nested_stream__y_arr_nest1_arr_nest2": "SPARSE_NESTED_STREAM_EMPTY_ARR_NEST1_ARR_NEST2" + }, + { + "SYNC1_ROW_COUNTS": "sync1_row_counts" + }, + { + "SYNC2_ROW_COUNTS": "sync2_row_counts" + }, + { + "SYNC3_ROW_COUNTS": "sync3_row_counts" + } + ], + "redshift": [ + { + "sparse_nested_stream__y_obj_nest1_obj_nest2": "sparse_nested_stream_empty_obj_nest1_obj_nest2" + }, + { + "sparse_nested_stream__y_arr_nest1_arr_nest2": "sparse_nested_stream_empty_arr_nest1_arr_nest2" + } + ], + "mysql": [], + "mssql": [ + { + "sparse_nested_stream__y_obj_nest1_obj_nest2": "sparse_nested_stream___y_obj_nest1_obj_nest2" + }, + { + "sparse_nested_stream__y_arr_nest1_arr_nest2": "sparse_nested_stream___y_arr_nest1_arr_nest2" + } + ], + "tidb": [ + { + "sparse_nested_stream__y_obj_nest1_obj_nest2": "sparse_nested_stream___y_obj_nest1_obj_nest2" + }, + { + "sparse_nested_stream__y_arr_nest1_arr_nest2": "sparse_nested_stream___y_arr_nest1_arr_nest2" + } + ] +} diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync1_assertions/test_sync1_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync1_assertions/test_sync1_row_counts.sql new file mode 100644 index 0000000000000..6ed78bb5f3973 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync1_assertions/test_sync1_row_counts.sql @@ -0,0 +1,2 @@ +select * from {{ ref('sync1_row_counts') }} +where row_count != expected_count diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync1_expectations/sync1_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync1_expectations/sync1_row_counts.sql new file mode 100644 index 0000000000000..f087c1d2f9e91 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync1_expectations/sync1_row_counts.sql @@ -0,0 +1,33 @@ +with table_row_counts as ( + select distinct 'sparse_nested_stream' as label, count(*) as row_count, 1 as expected_count + from {{ ref('sparse_nested_stream') }} +union all + select distinct 'sparse_nested_stream_obj_nest1' as label, count(*) as row_count, 1 as expected_count + from {{ ref('sparse_nested_stream_obj_nest1') }} +union all + select distinct 'sparse_nested_stream_obj_nest1_obj_nest2' as label, count(*) as row_count, 1 as expected_count + from {{ ref('sparse_nested_stream_obj_nest1_obj_nest2') }} +union all + select distinct 'sparse_nested_stream_arr_nest1' as label, count(*) as row_count, 2 as expected_count + from {{ ref('sparse_nested_stream_arr_nest1') }} +union all + select distinct 'sparse_nested_stream_arr_nest1_arr_nest2' as label, count(*) as row_count, 4 as expected_count + from {{ ref('sparse_nested_stream_arr_nest1_arr_nest2') }} +union all + select distinct 'sparse_nested_stream_empty' as label, count(*) as row_count, 1 as expected_count + from {{ ref('sparse_nested_stream_empty') }} +union all + select distinct 'sparse_nested_stream_empty_obj_nest1' as label, count(*) as row_count, 1 as expected_count + from {{ ref('sparse_nested_stream_empty_obj_nest1') }} +union all + select distinct 'sparse_nested_stream__y_obj_nest1_obj_nest2' as label, count(*) as row_count, 1 as expected_count + from {{ ref('sparse_nested_stream__y_obj_nest1_obj_nest2') }} +union all + select distinct 'sparse_nested_stream_empty_arr_nest1' as label, count(*) as row_count, 2 as expected_count + from {{ ref('sparse_nested_stream_empty_arr_nest1') }} +union all + select distinct 'sparse_nested_stream__y_arr_nest1_arr_nest2' as label, count(*) as row_count, 4 as expected_count + from {{ ref('sparse_nested_stream__y_arr_nest1_arr_nest2') }} +) +select * +from table_row_counts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync2_assertions/test_sync2_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync2_assertions/test_sync2_row_counts.sql new file mode 100644 index 0000000000000..78d233e8a90b1 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync2_assertions/test_sync2_row_counts.sql @@ -0,0 +1,2 @@ +select * from {{ ref('sync2_row_counts') }} +where row_count != expected_count diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync2_expectations/sync2_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync2_expectations/sync2_row_counts.sql new file mode 100644 index 0000000000000..557e969c20e24 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync2_expectations/sync2_row_counts.sql @@ -0,0 +1,34 @@ +with table_row_counts as ( + -- This is the only difference with sync1 - there's one new record in sparse_nested_stream. + select distinct 'sparse_nested_stream' as label, count(*) as row_count, 2 as expected_count + from {{ ref('sparse_nested_stream') }} +union all + select distinct 'sparse_nested_stream_obj_nest1' as label, count(*) as row_count, 1 as expected_count + from {{ ref('sparse_nested_stream_obj_nest1') }} +union all + select distinct 'sparse_nested_stream_obj_nest1_obj_nest2' as label, count(*) as row_count, 1 as expected_count + from {{ ref('sparse_nested_stream_obj_nest1_obj_nest2') }} +union all + select distinct 'sparse_nested_stream_arr_nest1' as label, count(*) as row_count, 2 as expected_count + from {{ ref('sparse_nested_stream_arr_nest1') }} +union all + select distinct 'sparse_nested_stream_arr_nest1_arr_nest2' as label, count(*) as row_count, 4 as expected_count + from {{ ref('sparse_nested_stream_arr_nest1_arr_nest2') }} +union all + select distinct 'sparse_nested_stream_empty' as label, count(*) as row_count, 1 as expected_count + from {{ ref('sparse_nested_stream_empty') }} +union all + select distinct 'sparse_nested_stream_empty_obj_nest1' as label, count(*) as row_count, 1 as expected_count + from {{ ref('sparse_nested_stream_empty_obj_nest1') }} +union all + select distinct 'sparse_nested_stream__y_obj_nest1_obj_nest2' as label, count(*) as row_count, 1 as expected_count + from {{ ref('sparse_nested_stream__y_obj_nest1_obj_nest2') }} +union all + select distinct 'sparse_nested_stream_empty_arr_nest1' as label, count(*) as row_count, 2 as expected_count + from {{ ref('sparse_nested_stream_empty_arr_nest1') }} +union all + select distinct 'sparse_nested_stream__y_arr_nest1_arr_nest2' as label, count(*) as row_count, 4 as expected_count + from {{ ref('sparse_nested_stream__y_arr_nest1_arr_nest2') }} +) +select * +from table_row_counts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync3_assertions/test_sync3_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync3_assertions/test_sync3_row_counts.sql new file mode 100644 index 0000000000000..764aa7b804533 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync3_assertions/test_sync3_row_counts.sql @@ -0,0 +1,2 @@ +select * from {{ ref('sync3_row_counts') }} +where row_count != expected_count diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync3_expectations/sync3_row_counts.sql b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync3_expectations/sync3_row_counts.sql new file mode 100644 index 0000000000000..eb078d0b1570a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_sparse_nested_streams/dbt_test_config/sync3_expectations/sync3_row_counts.sql @@ -0,0 +1,34 @@ +with table_row_counts as ( + -- Again, this is the only difference with sync2 - one new record in sparse_nested_stream. + select distinct 'sparse_nested_stream' as label, count(*) as row_count, 3 as expected_count + from {{ ref('sparse_nested_stream') }} +union all + select distinct 'sparse_nested_stream_obj_nest1' as label, count(*) as row_count, 1 as expected_count + from {{ ref('sparse_nested_stream_obj_nest1') }} +union all + select distinct 'sparse_nested_stream_obj_nest1_obj_nest2' as label, count(*) as row_count, 1 as expected_count + from {{ ref('sparse_nested_stream_obj_nest1_obj_nest2') }} +union all + select distinct 'sparse_nested_stream_arr_nest1' as label, count(*) as row_count, 2 as expected_count + from {{ ref('sparse_nested_stream_arr_nest1') }} +union all + select distinct 'sparse_nested_stream_arr_nest1_arr_nest2' as label, count(*) as row_count, 4 as expected_count + from {{ ref('sparse_nested_stream_arr_nest1_arr_nest2') }} +union all + select distinct 'sparse_nested_stream_empty' as label, count(*) as row_count, 1 as expected_count + from {{ ref('sparse_nested_stream_empty') }} +union all + select distinct 'sparse_nested_stream_empty_obj_nest1' as label, count(*) as row_count, 1 as expected_count + from {{ ref('sparse_nested_stream_empty_obj_nest1') }} +union all + select distinct 'sparse_nested_stream__y_obj_nest1_obj_nest2' as label, count(*) as row_count, 1 as expected_count + from {{ ref('sparse_nested_stream__y_obj_nest1_obj_nest2') }} +union all + select distinct 'sparse_nested_stream_empty_arr_nest1' as label, count(*) as row_count, 2 as expected_count + from {{ ref('sparse_nested_stream_empty_arr_nest1') }} +union all + select distinct 'sparse_nested_stream__y_arr_nest1_arr_nest2' as label, count(*) as row_count, 4 as expected_count + from {{ ref('sparse_nested_stream__y_arr_nest1_arr_nest2') }} +) +select * +from table_row_counts diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/test_drop_scd_overwrite.py b/airbyte-integrations/bases/base-normalization/integration_tests/test_drop_scd_overwrite.py new file mode 100644 index 0000000000000..f5f177a1499d9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/test_drop_scd_overwrite.py @@ -0,0 +1,161 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import json +import os +import pathlib +import shutil + +import pytest +from integration_tests.dbt_integration_test import DbtIntegrationTest +from integration_tests.utils import generate_dbt_models, run_destination_process, setup_test_dir +from normalization import DestinationType + +temporary_folders = set() +dbt_test_utils = DbtIntegrationTest() + + +@pytest.fixture(scope="module", autouse=True) +def before_all_tests(request): + destinations_to_test = dbt_test_utils.get_test_targets() + # set clean-up args to clean target destination after the test + clean_up_args = { + "destination_type": [d for d in DestinationType if d.value in destinations_to_test], + "test_type": "test_reset_scd_overwrite", + "tmp_folders": temporary_folders, + } + dbt_test_utils.set_target_schema("test_reset_scd_overwrite") + dbt_test_utils.change_current_test_dir(request) + dbt_test_utils.setup_db(destinations_to_test) + os.environ["PATH"] = os.path.abspath("../.venv/bin/") + ":" + os.environ["PATH"] + yield + dbt_test_utils.clean_tmp_tables(**clean_up_args) + dbt_test_utils.tear_down_db() + for folder in temporary_folders: + print(f"Deleting temporary test folder {folder}") + shutil.rmtree(folder, ignore_errors=True) + + +@pytest.fixture +def setup_test_path(request): + dbt_test_utils.change_current_test_dir(request) + print(f"Running from: {pathlib.Path().absolute()}") + print(f"Current PATH is: {os.environ['PATH']}") + yield + os.chdir(request.config.invocation_dir) + + +@pytest.mark.parametrize("destination_type", DestinationType.testable_destinations()) +def test_reset_scd_on_overwrite(destination_type: DestinationType, setup_test_path): + if destination_type.value not in dbt_test_utils.get_test_targets(): + pytest.skip(f"Destinations {destination_type} is not in NORMALIZATION_TEST_TARGET env variable") + + if destination_type.value in [DestinationType.ORACLE.value, DestinationType.TIDB.value]: + # Oracle and TiDB do not support incremental syncs with schema changes yet + pytest.skip(f"{destination_type} does not support incremental sync with schema change yet") + elif destination_type.value == DestinationType.REDSHIFT.value: + # set unique schema for Redshift test + dbt_test_utils.set_target_schema(dbt_test_utils.generate_random_string("test_reset_scd_")) + + test_resource_name = "test_reset_scd_overwrite" + # Select target schema + target_schema = dbt_test_utils.target_schema + + try: + print(f"Testing resetting SCD tables on overwrite with {destination_type} in schema {target_schema}") + run_reset_scd_on_overwrite_test(destination_type, test_resource_name) + finally: + dbt_test_utils.set_target_schema(target_schema) + + +def run_reset_scd_on_overwrite_test(destination_type: DestinationType, test_resource_name: str): + # Generate DBT profile yaml + integration_type = destination_type.value + test_root_dir = setup_test_dir(integration_type, temporary_folders) + destination_config = dbt_test_utils.generate_profile_yaml_file(destination_type, test_root_dir) + test_directory = os.path.join(test_root_dir, "models/generated") + shutil.rmtree(test_directory, ignore_errors=True) + + # Generate config file for the destination + config_file = os.path.join(test_root_dir, "destination_config.json") + with open(config_file, "w") as f: + f.write(json.dumps(destination_config)) + + # make sure DBT dependencies are installed + dbt_test_utils.dbt_check(destination_type, test_root_dir) + + # Generate catalog for an initial reset/cleanup (pre-test) + original_catalog_file = os.path.join("resources", test_resource_name, "data_input", "test_drop_scd_catalog.json") + dbt_test_utils.copy_replace( + original_catalog_file, + os.path.join(test_root_dir, "initial_reset_catalog.json"), + pattern='"destination_sync_mode": ".*"', + replace_value='"destination_sync_mode": "overwrite"', + ) + + # Force a reset in destination raw tables to remove any data left over from previous test runs + assert run_destination_process(destination_type, test_root_dir, "", "initial_reset_catalog.json", dbt_test_utils) + # generate models from catalog + generate_dbt_models(destination_type, test_resource_name, test_root_dir, "models", "test_drop_scd_catalog_reset.json", dbt_test_utils) + + # Run dbt process to normalize data from the first sync + dbt_test_utils.dbt_run(destination_type, test_root_dir, force_full_refresh=True) + + # Remove models generated in previous step to avoid DBT compilation errors + test_directory = os.path.join(test_root_dir, "models/generated/airbyte_incremental") + shutil.rmtree(test_directory, ignore_errors=True) + test_directory = os.path.join(test_root_dir, "models/generated/airbyte_views") + shutil.rmtree(test_directory, ignore_errors=True) + test_directory = os.path.join(test_root_dir, "models/generated/airbyte_ctes") + shutil.rmtree(test_directory, ignore_errors=True) + test_directory = os.path.join(test_root_dir, "models/generated/airbyte_tables") + shutil.rmtree(test_directory, ignore_errors=True) + + # Run the first sync to create raw tables in destinations + dbt_test_utils.copy_replace(original_catalog_file, os.path.join(test_root_dir, "destination_catalog.json")) + message_file = os.path.join("resources", test_resource_name, "data_input", "test_drop_scd_messages.txt") + assert run_destination_process(destination_type, test_root_dir, message_file, "destination_catalog.json", dbt_test_utils) + + # generate models from catalog + generate_dbt_models(destination_type, test_resource_name, test_root_dir, "models", "test_drop_scd_catalog.json", dbt_test_utils) + + # Run dbt process to normalize data from the first sync + dbt_test_utils.dbt_run(destination_type, test_root_dir, force_full_refresh=True) + + # Remove models generated in previous step to avoid DBT compilation errors + test_directory = os.path.join(test_root_dir, "models/generated/airbyte_incremental") + shutil.rmtree(test_directory, ignore_errors=True) + test_directory = os.path.join(test_root_dir, "models/generated/airbyte_views") + shutil.rmtree(test_directory, ignore_errors=True) + test_directory = os.path.join(test_root_dir, "models/generated/airbyte_ctes") + shutil.rmtree(test_directory, ignore_errors=True) + + # Generate a catalog with modified schema for a reset + reset_catalog_file = os.path.join("resources", test_resource_name, "data_input", "test_drop_scd_catalog_reset.json") + dbt_test_utils.copy_replace(reset_catalog_file, os.path.join(test_root_dir, "reset_catalog.json")) + + # Run a reset + assert run_destination_process(destination_type, test_root_dir, "", "reset_catalog.json", dbt_test_utils) + + # Run dbt process after reset to drop SCD table + generate_dbt_models(destination_type, test_resource_name, test_root_dir, "models", "test_drop_scd_catalog_reset.json", dbt_test_utils) + dbt_test_utils.dbt_run(destination_type, test_root_dir, force_full_refresh=True) + + # Remove models generated in previous step to avoid DBT compilation errors + test_directory = os.path.join(test_root_dir, "models/generated/airbyte_incremental") + shutil.rmtree(test_directory, ignore_errors=True) + test_directory = os.path.join(test_root_dir, "models/generated/airbyte_views") + shutil.rmtree(test_directory, ignore_errors=True) + test_directory = os.path.join(test_root_dir, "models/generated/airbyte_ctes") + shutil.rmtree(test_directory, ignore_errors=True) + + # Run another sync with modified catalog + modified_catalog_file = os.path.join("resources", test_resource_name, "data_input", "test_drop_scd_catalog_incremental.json") + dbt_test_utils.copy_replace(modified_catalog_file, os.path.join(test_root_dir, "destination_catalog.json")) + message_file = os.path.join("resources", test_resource_name, "data_input", "test_scd_reset_messages_incremental.txt") + assert run_destination_process(destination_type, test_root_dir, message_file, "destination_catalog.json", dbt_test_utils) + + # Run dbt process + generate_dbt_models(destination_type, test_resource_name, test_root_dir, "models", "test_drop_scd_catalog_reset.json", dbt_test_utils) + dbt_test_utils.dbt_run(destination_type, test_root_dir) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/test_ephemeral.py b/airbyte-integrations/bases/base-normalization/integration_tests/test_ephemeral.py new file mode 100644 index 0000000000000..8a530db76d910 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/test_ephemeral.py @@ -0,0 +1,208 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import json +import os +import pathlib +import re +import shutil +from typing import Any, Dict + +import pytest +from integration_tests.dbt_integration_test import DbtIntegrationTest +from integration_tests.utils import setup_test_dir +from normalization.destination_type import DestinationType +from normalization.transform_catalog import TransformCatalog + +temporary_folders = set() +dbt_test_utils = DbtIntegrationTest() + + +@pytest.fixture(scope="module", autouse=True) +def before_all_tests(request): + destinations_to_test = dbt_test_utils.get_test_targets() + # set clean-up args to clean target destination after the test + clean_up_args = { + "destination_type": [d for d in DestinationType if d.value in destinations_to_test], + "test_type": "ephemeral", + "tmp_folders": temporary_folders, + } + + dbt_test_utils.set_target_schema("test_ephemeral") + dbt_test_utils.change_current_test_dir(request) + dbt_test_utils.setup_db(destinations_to_test) + os.environ["PATH"] = os.path.abspath("../.venv/bin/") + ":" + os.environ["PATH"] + yield + dbt_test_utils.clean_tmp_tables(**clean_up_args) + dbt_test_utils.tear_down_db() + for folder in temporary_folders: + print(f"Deleting temporary test folder {folder}") + shutil.rmtree(folder, ignore_errors=True) + + +@pytest.fixture +def setup_test_path(request): + dbt_test_utils.change_current_test_dir(request) + print(f"Running from: {pathlib.Path().absolute()}") + print(f"Current PATH is: {os.environ['PATH']}") + yield + os.chdir(request.config.invocation_dir) + + +@pytest.mark.parametrize("column_count", [1000]) +@pytest.mark.parametrize("destination_type", DestinationType.testable_destinations()) +def test_destination_supported_limits(destination_type: DestinationType, column_count: int): + if destination_type.value == DestinationType.MYSQL.value: + # In MySQL, the max number of columns is limited by row size (8KB), + # not by absolute column count. It is way fewer than 1000. + pytest.skip("Skipping test for column limit, because in MySQL, the max number of columns is limited by row size (8KB)") + if destination_type.value == DestinationType.ORACLE.value: + # Airbyte uses a few columns for metadata and Oracle limits are right at 1000 + column_count = 993 + if destination_type.value == DestinationType.MSSQL.value: + column_count = 999 + run_test(destination_type, column_count) + + +@pytest.mark.parametrize( + "integration_type, column_count, expected_exception_message", + [ + ("Postgres", 1665, "target lists can have at most 1664 entries"), + ("BigQuery", 3000, "The view is too large."), + ("Snowflake", 2000, "Operation failed because soft limit on objects of type 'Column' per table was exceeded."), + ("Redshift", 1665, "target lists can have at most 1664 entries"), + ("MySQL", 250, "Row size too large"), + ("Oracle", 1001, "ORA-01792: maximum number of columns in a table or view is 1000"), + ("MSSQL", 1025, "exceeds the maximum of 1024 columns."), + ], +) +def test_destination_failure_over_limits(integration_type: str, column_count: int, expected_exception_message: str, setup_test_path): + destination_type = DestinationType.from_string(integration_type) + if destination_type.value not in dbt_test_utils.get_test_targets(): + pytest.skip(f"Destinations {destination_type} is not in NORMALIZATION_TEST_TARGET env variable") + run_test(destination_type, column_count, expected_exception_message) + + +@pytest.mark.parametrize("destination_type", DestinationType.testable_destinations()) +def test_empty_streams(destination_type: DestinationType, setup_test_path): + run_test(destination_type, 0) + + +@pytest.mark.parametrize("destination_type", DestinationType.testable_destinations()) +def test_stream_with_1_airbyte_column(destination_type: DestinationType, setup_test_path): + run_test(destination_type, 1) + + +def run_test(destination_type: DestinationType, column_count: int, expected_exception_message: str = ""): + if destination_type.value not in dbt_test_utils.get_test_targets(): + pytest.skip(f"Destinations {destination_type} is not in NORMALIZATION_TEST_TARGET env variable") + + if destination_type.value == DestinationType.CLICKHOUSE.value: + pytest.skip("ephemeral materialization isn't supported in ClickHouse yet") + if destination_type.value == DestinationType.ORACLE.value: + # Oracle does not allow changing to random schema + dbt_test_utils.set_target_schema("test_normalization") + elif destination_type.value == DestinationType.REDSHIFT.value: + # set unique schema for Redshift test + dbt_test_utils.set_target_schema(dbt_test_utils.generate_random_string("test_ephemeral_")) + else: + dbt_test_utils.set_target_schema("test_ephemeral") + print(f"Testing ephemeral for destination {destination_type.value} with column count {column_count}") + integration_type = destination_type.value + # Create the test folder with dbt project and appropriate destination settings to run integration tests from + test_root_dir = setup_test_dir(integration_type, temporary_folders) + destination_config = dbt_test_utils.generate_profile_yaml_file(destination_type, test_root_dir) + # generate a catalog and associated dbt models files + generate_dbt_models(destination_type, test_root_dir, column_count) + # Use destination connector to create empty _airbyte_raw_* tables to use as input for the test + assert setup_input_raw_data(integration_type, test_root_dir, destination_config) + dbt_test_utils.dbt_check(destination_type, test_root_dir) + if expected_exception_message: + with pytest.raises(AssertionError): + dbt_test_utils.dbt_run(destination_type, test_root_dir) + assert search_logs_for_pattern(test_root_dir + "/dbt_output.log", expected_exception_message) + else: + dbt_test_utils.dbt_run(destination_type, test_root_dir) + + +def search_logs_for_pattern(log_file: str, pattern: str): + with open(log_file, "r") as file: + for line in file: + if re.search(pattern, line): + return True + return False + + +def setup_input_raw_data(integration_type: str, test_root_dir: str, destination_config: Dict[str, Any]) -> bool: + """ + This should populate the associated "raw" tables from which normalization is reading from when running dbt CLI. + """ + config_file = os.path.join(test_root_dir, "destination_config.json") + with open(config_file, "w") as f: + f.write(json.dumps(destination_config)) + commands = [ + "docker", + "run", + "--rm", + "--init", + "-v", + f"{test_root_dir}:/data", + "--network", + "host", + "-i", + f"airbyte/destination-{integration_type.lower()}:dev", + "write", + "--config", + "/data/destination_config.json", + "--catalog", + "/data/catalog.json", + ] + # Force a reset in destination raw tables + return dbt_test_utils.run_destination_process("", test_root_dir, commands) + + +def generate_dbt_models(destination_type: DestinationType, test_root_dir: str, column_count: int): + """ + This is the normalization step generating dbt models files from the destination_catalog.json taken as input. + """ + output_directory = os.path.join(test_root_dir, "models", "generated") + shutil.rmtree(output_directory, ignore_errors=True) + catalog_config = { + "streams": [ + { + "stream": { + "name": dbt_test_utils.generate_random_string(f"stream_with_{column_count}_columns"), + "json_schema": { + "type": ["null", "object"], + "properties": {}, + }, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": True, + "default_cursor_field": [], + }, + "sync_mode": "incremental", + "cursor_field": [], + "destination_sync_mode": "overwrite", + } + ] + } + if column_count == 1: + catalog_config["streams"][0]["stream"]["json_schema"]["properties"]["_airbyte_id"] = {"type": "integer"} + else: + for column in [dbt_test_utils.random_string(5) for _ in range(column_count)]: + catalog_config["streams"][0]["stream"]["json_schema"]["properties"][column] = {"type": "string"} + catalog = os.path.join(test_root_dir, "catalog.json") + with open(catalog, "w") as fh: + fh.write(json.dumps(catalog_config)) + + transform_catalog = TransformCatalog() + transform_catalog.config = { + "integration_type": destination_type.value, + "schema": dbt_test_utils.target_schema, + "catalog": [catalog], + "output_path": output_directory, + "json_column": "_airbyte_data", + "profile_config_dir": test_root_dir, + } + transform_catalog.process_catalog() diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py b/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py new file mode 100644 index 0000000000000..f1cea41e1a4f5 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/test_normalization.py @@ -0,0 +1,482 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import json +import os +import pathlib +import re +import shutil +import tempfile +from distutils.dir_util import copy_tree +from typing import Any, Dict + +import pytest +from integration_tests.dbt_integration_test import DbtIntegrationTest +from integration_tests.utils import generate_dbt_models, run_destination_process +from normalization.destination_type import DestinationType + +temporary_folders = set() + +# dbt models and final sql outputs from the following git versioned tests will be written in a folder included in +# airbyte git repository. +git_versioned_tests = ["test_simple_streams", "test_nested_streams"] + +dbt_test_utils = DbtIntegrationTest() + + +@pytest.fixture(scope="module", autouse=True) +def before_all_tests(request): + destinations_to_test = dbt_test_utils.get_test_targets() + # set clean-up args to clean target destination after the test + clean_up_args = { + "destination_type": [d for d in DestinationType if d.value in destinations_to_test], + "test_type": "normalization", + "git_versioned_tests": git_versioned_tests, + } + for integration_type in [d.value for d in DestinationType]: + if integration_type in destinations_to_test: + test_root_dir = f"{pathlib.Path().absolute()}/normalization_test_output/{integration_type.lower()}" + shutil.rmtree(test_root_dir, ignore_errors=True) + if os.getenv("RANDOM_TEST_SCHEMA"): + target_schema = dbt_test_utils.generate_random_string("test_normalization_ci_") + dbt_test_utils.set_target_schema(target_schema) + dbt_test_utils.change_current_test_dir(request) + dbt_test_utils.setup_db(destinations_to_test) + os.environ["PATH"] = os.path.abspath("../.venv/bin/") + ":" + os.environ["PATH"] + yield + dbt_test_utils.clean_tmp_tables(**clean_up_args) + dbt_test_utils.tear_down_db() + for folder in temporary_folders: + print(f"Deleting temporary test folder {folder}") + shutil.rmtree(folder, ignore_errors=True) + + +@pytest.fixture +def setup_test_path(request): + dbt_test_utils.change_current_test_dir(request) + print(f"Running from: {pathlib.Path().absolute()}") + print(f"Current PATH is: {os.environ['PATH']}") + yield + os.chdir(request.config.invocation_dir) + + +@pytest.mark.parametrize( + "test_resource_name", + set( + git_versioned_tests + + [ + # Non-versioned tests outputs below will be written to /tmp folders instead + ] + ), +) +@pytest.mark.parametrize("destination_type", DestinationType.testable_destinations()) +def test_normalization(destination_type: DestinationType, test_resource_name: str, setup_test_path): + if destination_type.value not in dbt_test_utils.get_test_targets(): + pytest.skip(f"Destinations {destination_type} is not in NORMALIZATION_TEST_TARGET env variable") + if ( + destination_type.value in (DestinationType.ORACLE.value, DestinationType.CLICKHOUSE.value) + and test_resource_name == "test_nested_streams" + ): + pytest.skip(f"Destinations {destination_type} does not support nested streams") + + target_schema = dbt_test_utils.target_schema + if destination_type.value == DestinationType.ORACLE.value: + # Oracle does not allow changing to random schema + dbt_test_utils.set_target_schema("test_normalization") + elif destination_type.value == DestinationType.REDSHIFT.value: + # set unique schema for Redshift test + dbt_test_utils.set_target_schema(dbt_test_utils.generate_random_string("test_normalization_")) + try: + run_test_normalization(destination_type, test_resource_name) + finally: + dbt_test_utils.set_target_schema(target_schema) + + +def run_test_normalization(destination_type: DestinationType, test_resource_name: str): + print(f"Testing normalization {destination_type} for {test_resource_name} in ", dbt_test_utils.target_schema) + # Create the test folder with dbt project and appropriate destination settings to run integration tests from + test_root_dir = setup_test_dir(destination_type, test_resource_name) + run_first_normalization(destination_type, test_resource_name, test_root_dir) + if os.path.exists(os.path.join("resources", test_resource_name, "data_input", "messages_incremental.txt")): + run_incremental_normalization(destination_type, test_resource_name, test_root_dir) + if os.path.exists(os.path.join("resources", test_resource_name, "data_input", "messages_schema_change.txt")): + run_schema_change_normalization(destination_type, test_resource_name, test_root_dir) + + +def run_first_normalization(destination_type: DestinationType, test_resource_name: str, test_root_dir: str): + destination_config = dbt_test_utils.generate_profile_yaml_file(destination_type, test_root_dir) + # Use destination connector to create _airbyte_raw_* tables to use as input for the test + assert setup_input_raw_data(destination_type, test_resource_name, test_root_dir, destination_config) + # generate models from catalog + generate_dbt_models(destination_type, test_resource_name, test_root_dir, "models", "catalog.json", dbt_test_utils) + # Setup test resources and models + setup_dbt_test(destination_type, test_resource_name, test_root_dir) + dbt_test_utils.dbt_check(destination_type, test_root_dir) + # Run dbt process + dbt_test_utils.dbt_run(destination_type, test_root_dir, force_full_refresh=True) + copy_tree(os.path.join(test_root_dir, "build/run/airbyte_utils/models/generated/"), os.path.join(test_root_dir, "first_output")) + shutil.rmtree(os.path.join(test_root_dir, "build/run/airbyte_utils/models/generated/"), ignore_errors=True) + # Verify dbt process + dbt_test(destination_type, test_root_dir) + + +def run_incremental_normalization(destination_type: DestinationType, test_resource_name: str, test_root_dir: str): + # Use destination connector to reset _airbyte_raw_* tables with new incremental data + setup_incremental_data(destination_type, test_resource_name, test_root_dir) + # setup new test files + setup_dbt_incremental_test(destination_type, test_resource_name, test_root_dir) + # Run dbt process + dbt_test_utils.dbt_run(destination_type, test_root_dir) + normalize_dbt_output(test_root_dir, "build/run/airbyte_utils/models/generated/", "second_output") + + if destination_type.value in [DestinationType.MYSQL.value, DestinationType.ORACLE.value]: + pytest.skip(f"{destination_type} does not support incremental yet") + dbt_test(destination_type, test_root_dir) + + +def run_schema_change_normalization(destination_type: DestinationType, test_resource_name: str, test_root_dir: str): + if destination_type.value in [DestinationType.MYSQL.value, DestinationType.ORACLE.value]: + # TODO: upgrade dbt-adapter repositories to work with dbt 0.21.0+ (outside airbyte's control) + pytest.skip(f"{destination_type} does not support schema change in incremental yet (requires dbt 0.21.0+)") + if destination_type.value in [ + DestinationType.SNOWFLAKE.value, + DestinationType.CLICKHOUSE.value, + DestinationType.TIDB.value, + DestinationType.DUCKDB.value, + ]: + pytest.skip(f"{destination_type} is disabled as it doesnt support schema change in incremental yet (column type changes)") + if destination_type.value in [DestinationType.MSSQL.value, DestinationType.SNOWFLAKE.value]: + # TODO: create/fix github issue in corresponding dbt-adapter repository to handle schema changes (outside airbyte's control) + pytest.skip(f"{destination_type} is disabled as it doesnt fully support schema change in incremental yet") + + setup_schema_change_data(destination_type, test_resource_name, test_root_dir) + generate_dbt_models( + destination_type, test_resource_name, test_root_dir, "modified_models", "catalog_schema_change.json", dbt_test_utils + ) + setup_dbt_schema_change_test(destination_type, test_resource_name, test_root_dir) + dbt_test_utils.dbt_run(destination_type, test_root_dir) + normalize_dbt_output(test_root_dir, "build/run/airbyte_utils/modified_models/generated/", "third_output") + dbt_test(destination_type, test_root_dir) + + +def normalize_dbt_output(test_root_dir: str, input_dir: str, output_dir: str): + tmp_dir = os.path.join(test_root_dir, input_dir) + output_dir = os.path.join(test_root_dir, output_dir) + shutil.rmtree(output_dir, ignore_errors=True) + + def copy_replace_dbt_tmp(src, dst): + dbt_test_utils.copy_replace(src, dst, "__dbt_tmp[0-9]+", "__dbt_tmp") + + shutil.copytree(tmp_dir, output_dir, copy_function=copy_replace_dbt_tmp) + shutil.rmtree(tmp_dir, ignore_errors=True) + + +def setup_test_dir(destination_type: DestinationType, test_resource_name: str) -> str: + """ + We prepare a clean folder to run the tests from. + + if the test_resource_name is part of git_versioned_tests, then dbt models and final sql outputs + will be written to a folder included in airbyte git repository. + + Non-versioned tests will be written in /tmp folders instead. + + The purpose is to keep track of a small set of downstream changes on selected integration tests cases. + - generated dbt models created by normalization script from an input destination_catalog.json + - final output sql files created by dbt CLI from the generated dbt models (dbt models are sql files with jinja templating, + these are interpreted and compiled into the native SQL dialect of the final destination engine) + """ + if test_resource_name in git_versioned_tests: + test_root_dir = f"{pathlib.Path().absolute()}/normalization_test_output/{destination_type.value.lower()}" + else: + test_root_dir = f"{pathlib.Path().joinpath('..', 'build', 'normalization_test_output', destination_type.value.lower()).resolve()}" + os.makedirs(test_root_dir, exist_ok=True) + test_root_dir = f"{test_root_dir}/{test_resource_name}" + shutil.rmtree(test_root_dir, ignore_errors=True) + print(f"Setting up test folder {test_root_dir}") + dbt_project_yaml = "../dbt-project-template/dbt_project.yml" + copy_tree("../dbt-project-template", test_root_dir) + if destination_type.value == DestinationType.MSSQL.value: + copy_tree("../dbt-project-template-mssql", test_root_dir) + dbt_project_yaml = "../dbt-project-template-mssql/dbt_project.yml" + elif destination_type.value == DestinationType.MYSQL.value: + copy_tree("../dbt-project-template-mysql", test_root_dir) + dbt_project_yaml = "../dbt-project-template-mysql/dbt_project.yml" + elif destination_type.value == DestinationType.ORACLE.value: + copy_tree("../dbt-project-template-oracle", test_root_dir) + dbt_project_yaml = "../dbt-project-template-oracle/dbt_project.yml" + elif destination_type.value == DestinationType.CLICKHOUSE.value: + copy_tree("../dbt-project-template-clickhouse", test_root_dir) + dbt_project_yaml = "../dbt-project-template-clickhouse/dbt_project.yml" + elif destination_type.value == DestinationType.SNOWFLAKE.value: + copy_tree("../dbt-project-template-snowflake", test_root_dir) + dbt_project_yaml = "../dbt-project-template-snowflake/dbt_project.yml" + elif destination_type.value == DestinationType.REDSHIFT.value: + copy_tree("../dbt-project-template-redshift", test_root_dir) + dbt_project_yaml = "../dbt-project-template-redshift/dbt_project.yml" + elif destination_type.value == DestinationType.TIDB.value: + copy_tree("../dbt-project-template-tidb", test_root_dir) + dbt_project_yaml = "../dbt-project-template-tidb/dbt_project.yml" + elif destination_type.value == DestinationType.DUCKDB.value: + copy_tree("../dbt-project-template-duckdb", test_root_dir) + dbt_project_yaml = "../dbt-project-template-duckdb/dbt_project.yml" + dbt_test_utils.copy_replace(dbt_project_yaml, os.path.join(test_root_dir, "dbt_project.yml")) + return test_root_dir + + +def setup_input_raw_data( + destination_type: DestinationType, test_resource_name: str, test_root_dir: str, destination_config: Dict[str, Any] +) -> bool: + """ + We run docker images of destinations to upload test data stored in the messages.txt file for each test case. + This should populate the associated "raw" tables from which normalization is reading from when running dbt CLI. + """ + catalog_file = os.path.join("resources", test_resource_name, "data_input", "catalog.json") + message_file = os.path.join("resources", test_resource_name, "data_input", "messages.txt") + dbt_test_utils.copy_replace( + catalog_file, + os.path.join(test_root_dir, "reset_catalog.json"), + pattern='"destination_sync_mode": ".*"', + replace_value='"destination_sync_mode": "overwrite"', + ) + dbt_test_utils.copy_replace(catalog_file, os.path.join(test_root_dir, "destination_catalog.json")) + config_file = os.path.join(test_root_dir, "destination_config.json") + with open(config_file, "w") as f: + f.write(json.dumps(destination_config)) + # Force a reset in destination raw tables + assert run_destination_process(destination_type, test_root_dir, "", "reset_catalog.json", dbt_test_utils) + # Run a sync to create raw tables in destinations + return run_destination_process(destination_type, test_root_dir, message_file, "destination_catalog.json", dbt_test_utils) + + +def setup_incremental_data(destination_type: DestinationType, test_resource_name: str, test_root_dir: str) -> bool: + message_file = os.path.join("resources", test_resource_name, "data_input", "messages_incremental.txt") + # Force a reset in destination raw tables + assert run_destination_process(destination_type, test_root_dir, "", "reset_catalog.json", dbt_test_utils) + # Run a sync to create raw tables in destinations + return run_destination_process(destination_type, test_root_dir, message_file, "destination_catalog.json", dbt_test_utils) + + +def setup_schema_change_data(destination_type: DestinationType, test_resource_name: str, test_root_dir: str) -> bool: + catalog_file = os.path.join("resources", test_resource_name, "data_input", "catalog_schema_change.json") + message_file = os.path.join("resources", test_resource_name, "data_input", "messages_schema_change.txt") + dbt_test_utils.copy_replace( + catalog_file, + os.path.join(test_root_dir, "reset_catalog.json"), + pattern='"destination_sync_mode": ".*"', + replace_value='"destination_sync_mode": "overwrite"', + ) + dbt_test_utils.copy_replace(catalog_file, os.path.join(test_root_dir, "destination_catalog.json")) + dbt_test_utils.copy_replace( + os.path.join(test_root_dir, "dbt_project.yml"), + os.path.join(test_root_dir, "first_dbt_project.yml"), + ) + + def update(config_yaml): + if config_yaml["model-paths"] == ["models"]: + config_yaml["model-paths"] = ["modified_models"] + return True, config_yaml + return False, None + + dbt_test_utils.update_yaml_file(os.path.join(test_root_dir, "dbt_project.yml"), update) + # Run a sync to update raw tables in destinations + return run_destination_process(destination_type, test_root_dir, message_file, "destination_catalog.json", dbt_test_utils) + + +def setup_dbt_test(destination_type: DestinationType, test_resource_name: str, test_root_dir: str): + """ + Prepare the data (copy) for the models for dbt test. + """ + replace_identifiers = os.path.join("resources", test_resource_name, "data_input", "replace_identifiers.json") + copy_test_files( + os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_schema_tests"), + os.path.join(test_root_dir, "models/dbt_schema_tests"), + destination_type, + replace_identifiers, + ) + copy_test_files( + os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_data_tests_tmp"), + os.path.join(test_root_dir, "models/dbt_data_tests"), + destination_type, + replace_identifiers, + ) + copy_test_files( + os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_data_tests"), + os.path.join(test_root_dir, "tests"), + destination_type, + replace_identifiers, + ) + + +def setup_dbt_incremental_test(destination_type: DestinationType, test_resource_name: str, test_root_dir: str): + """ + Prepare the data (copy) for the models for dbt test. + """ + replace_identifiers = os.path.join("resources", test_resource_name, "data_input", "replace_identifiers.json") + copy_test_files( + os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_schema_tests_incremental"), + os.path.join(test_root_dir, "models/dbt_schema_tests"), + destination_type, + replace_identifiers, + ) + test_directory = os.path.join(test_root_dir, "models/dbt_data_tests") + shutil.rmtree(test_directory, ignore_errors=True) + os.makedirs(test_directory, exist_ok=True) + copy_test_files( + os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_data_tests_tmp_incremental"), + test_directory, + destination_type, + replace_identifiers, + ) + test_directory = os.path.join(test_root_dir, "tests") + shutil.rmtree(test_directory, ignore_errors=True) + os.makedirs(test_directory, exist_ok=True) + copy_test_files( + os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_data_tests_incremental"), + test_directory, + destination_type, + replace_identifiers, + ) + + +def setup_dbt_schema_change_test(destination_type: DestinationType, test_resource_name: str, test_root_dir: str): + """ + Prepare the data (copy) for the models for dbt test. + """ + replace_identifiers = os.path.join("resources", test_resource_name, "data_input", "replace_identifiers.json") + copy_test_files( + os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_schema_tests_schema_change"), + os.path.join(test_root_dir, "modified_models/dbt_schema_tests"), + destination_type, + replace_identifiers, + ) + test_directory = os.path.join(test_root_dir, "modified_models/dbt_data_tests") + shutil.rmtree(test_directory, ignore_errors=True) + os.makedirs(test_directory, exist_ok=True) + copy_test_files( + os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_data_tests_tmp_schema_change"), + test_directory, + destination_type, + replace_identifiers, + ) + test_directory = os.path.join(test_root_dir, "tests") + shutil.rmtree(test_directory, ignore_errors=True) + os.makedirs(test_directory, exist_ok=True) + copy_test_files( + os.path.join("resources", test_resource_name, "dbt_test_config", "dbt_data_tests_schema_change"), + test_directory, + destination_type, + replace_identifiers, + ) + + +def dbt_test(destination_type: DestinationType, test_root_dir: str): + """ + dbt provides a way to run dbt tests as described here: https://docs.getdbt.com/docs/building-a-dbt-project/tests + - Schema tests are added in .yml files from the schema_tests directory + - see additional macros for testing here: https://github.com/fishtown-analytics/dbt-utils#schema-tests + - Data tests are added in .sql files from the data_tests directory and should return 0 records to be successful + + We use this mechanism to verify the output of our integration tests. + """ + normalization_image: str = dbt_test_utils.get_normalization_image(destination_type) + assert dbt_test_utils.run_check_dbt_command(normalization_image, "test", test_root_dir) + + +def copy_test_files(src: str, dst: str, destination_type: DestinationType, replace_identifiers: str): + """ + Copy file while hacking snowflake identifiers that needs to be uppercased... + (so we can share these dbt tests files accross destinations) + """ + if os.path.exists(src): + temp_dir = tempfile.mkdtemp(dir="/tmp/", prefix="normalization_test_") + temporary_folders.add(temp_dir) + # Copy and adapt capitalization + if destination_type.value == DestinationType.SNOWFLAKE.value: + shutil.copytree(src, temp_dir + "/upper", copy_function=copy_upper) + src = temp_dir + "/upper" + elif destination_type.value == DestinationType.REDSHIFT.value: + shutil.copytree(src, temp_dir + "/lower", copy_function=copy_lower) + src = temp_dir + "/lower" + if os.path.exists(replace_identifiers): + with open(replace_identifiers, "r") as file: + contents = file.read() + identifiers_map = json.loads(contents) + pattern = [] + replace_value = [] + if dbt_test_utils.target_schema != "test_normalization": + pattern.append("test_normalization") + if destination_type.value == DestinationType.SNOWFLAKE.value: + replace_value.append(dbt_test_utils.target_schema.upper()) + else: + replace_value.append(dbt_test_utils.target_schema) + if destination_type.value in identifiers_map: + for entry in identifiers_map[destination_type.value]: + for k in entry: + # re.escape() must not be used for the replacement string in sub(), only backslashes should be escaped: + # see https://docs.python.org/3/library/re.html#re.escape + pattern.append(k.replace("\\", r"\\")) + replace_value.append(entry[k]) + if pattern and replace_value: + + def copy_replace_identifiers(src, dst): + dbt_test_utils.copy_replace(src, dst, pattern, replace_value) + + shutil.copytree(src, temp_dir + "/replace", copy_function=copy_replace_identifiers) + src = temp_dir + "/replace" + # final copy + copy_tree(src, dst) + + +def copy_upper(src, dst): + print(src, "->", dst) + dbt_test_utils.copy_replace( + src, + dst, + pattern=[ + r"(- name:) *(.*)", + r"(ref\(')(.*)('\))", + r"(source\(')(.*)('\))", + ], + replace_value=[ + to_upper_identifier, + to_upper_identifier, + to_upper_identifier, + ], + ) + + +def copy_lower(src, dst): + print(src, "->", dst) + dbt_test_utils.copy_replace( + src, + dst, + pattern=[ + r"(- name:) *(.*)", + r"(ref\(')(.*)('\))", + r"(source\(')(.*)('\))", + ], + replace_value=[ + to_lower_identifier, + to_lower_identifier, + to_lower_identifier, + ], + ) + + +def to_upper_identifier(input: re.Match) -> str: + if len(input.groups()) == 2: + return f"{input.group(1)} {input.group(2).upper()}" + elif len(input.groups()) == 3: + return f"{input.group(1)}{input.group(2).upper()}{input.group(3)}" + else: + raise Exception(f"Unexpected number of groups in {input}") + + +def to_lower_identifier(input: re.Match) -> str: + if len(input.groups()) == 2: + return f"{input.group(1)} {input.group(2).lower()}" + elif len(input.groups()) == 3: + return f"{input.group(1)}{input.group(2).lower()}{input.group(3)}" + else: + raise Exception(f"Unexpected number of groups in {input}") diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/test_sparse_nested_fields.py b/airbyte-integrations/bases/base-normalization/integration_tests/test_sparse_nested_fields.py new file mode 100644 index 0000000000000..d67547c1a3527 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/test_sparse_nested_fields.py @@ -0,0 +1,340 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import json +import os +import pathlib +import re +import shutil +import tempfile +from distutils.dir_util import copy_tree +from typing import Any, Dict + +import pytest +from integration_tests.dbt_integration_test import DbtIntegrationTest +from integration_tests.utils import generate_dbt_models, run_destination_process +from normalization.destination_type import DestinationType + +# from normalization.transform_catalog import TransformCatalog + +temporary_folders = set() + +# dbt models and final sql outputs from the following git versioned tests will be written in a folder included in +# airbyte git repository. +git_versioned_tests = ["test_simple_streams", "test_nested_streams"] + +dbt_test_utils = DbtIntegrationTest() + + +@pytest.fixture(scope="module", autouse=True) +def before_all_tests(request): + destinations_to_test = dbt_test_utils.get_test_targets() + # set clean-up args to clean target destination after the test + clean_up_args = { + "destination_type": [d for d in DestinationType if d.value in destinations_to_test], + "test_type": "normalization", + "git_versioned_tests": git_versioned_tests, + } + for integration_type in [d.value for d in DestinationType]: + if integration_type in destinations_to_test: + test_root_dir = f"{pathlib.Path().absolute()}/normalization_test_output/{integration_type.lower()}" + shutil.rmtree(test_root_dir, ignore_errors=True) + if os.getenv("RANDOM_TEST_SCHEMA"): + target_schema = dbt_test_utils.generate_random_string("test_normalization_ci_") + dbt_test_utils.set_target_schema(target_schema) + dbt_test_utils.change_current_test_dir(request) + dbt_test_utils.setup_db(destinations_to_test) + os.environ["PATH"] = os.path.abspath("../.venv/bin/") + ":" + os.environ["PATH"] + yield + dbt_test_utils.clean_tmp_tables(**clean_up_args) + dbt_test_utils.tear_down_db() + for folder in temporary_folders: + print(f"Deleting temporary test folder {folder}") + shutil.rmtree(folder, ignore_errors=True) + + +@pytest.fixture +def setup_test_path(request): + dbt_test_utils.change_current_test_dir(request) + print(f"Running from: {pathlib.Path().absolute()}") + print(f"Current PATH is: {os.environ['PATH']}") + yield + os.chdir(request.config.invocation_dir) + + +@pytest.mark.parametrize("destination_type", DestinationType.testable_destinations()) +def test_sparse_nested_fields(destination_type: DestinationType): + # TODO extract these conditions? + if destination_type.value not in dbt_test_utils.get_test_targets(): + pytest.skip(f"Destinations {destination_type} is not in NORMALIZATION_TEST_TARGET env variable") + if destination_type.value in (DestinationType.ORACLE.value, DestinationType.CLICKHOUSE.value): + pytest.skip(f"Destinations {destination_type} does not support nested streams") + if destination_type.value in [DestinationType.MYSQL.value, DestinationType.ORACLE.value]: + pytest.skip(f"{destination_type} does not support incremental yet") + + target_schema = dbt_test_utils.target_schema + if destination_type.value == DestinationType.ORACLE.value: + # Oracle does not allow changing to random schema + dbt_test_utils.set_target_schema("test_normalization") + elif destination_type.value == DestinationType.REDSHIFT.value: + # set unique schema for Redshift test + dbt_test_utils.set_target_schema(dbt_test_utils.generate_random_string("test_normalization_")) + + try: + print(f"Testing sparse nested field normalization {destination_type} in ", dbt_test_utils.target_schema) + test_resource_name = "test_sparse_nested_streams" + + # Create the test folder with dbt project and appropriate destination settings to run integration tests from + test_root_dir = setup_test_dir(destination_type, test_resource_name) + + # First sync + destination_config = dbt_test_utils.generate_profile_yaml_file(destination_type, test_root_dir) + assert setup_input_raw_data(destination_type, test_resource_name, test_root_dir, destination_config) + generate_dbt_models(destination_type, test_resource_name, test_root_dir, "models", "catalog.json", dbt_test_utils) + dbt_test_utils.dbt_check(destination_type, test_root_dir) + setup_dbt_sparse_nested_streams_test(destination_type, test_resource_name, test_root_dir, 1) + dbt_test_utils.dbt_run(destination_type, test_root_dir) + copy_tree(os.path.join(test_root_dir, "build/run/airbyte_utils/models/generated/"), os.path.join(test_root_dir, "sync1_output")) + shutil.rmtree(os.path.join(test_root_dir, "build/run/airbyte_utils/models/generated/"), ignore_errors=True) + dbt_test(destination_type, test_root_dir) + + # Second sync + message_file = os.path.join("resources", test_resource_name, "data_input", "messages2.txt") + assert run_destination_process(destination_type, test_root_dir, message_file, "destination_catalog.json", dbt_test_utils) + setup_dbt_sparse_nested_streams_test(destination_type, test_resource_name, test_root_dir, 2) + dbt_test_utils.dbt_run(destination_type, test_root_dir) + copy_tree(os.path.join(test_root_dir, "build/run/airbyte_utils/models/generated/"), os.path.join(test_root_dir, "sync2_output")) + shutil.rmtree(os.path.join(test_root_dir, "build/run/airbyte_utils/models/generated/"), ignore_errors=True) + dbt_test(destination_type, test_root_dir) + + # Third sync + message_file = os.path.join("resources", test_resource_name, "data_input", "messages3.txt") + assert run_destination_process(destination_type, test_root_dir, message_file, "destination_catalog.json", dbt_test_utils) + setup_dbt_sparse_nested_streams_test(destination_type, test_resource_name, test_root_dir, 3) + dbt_test_utils.dbt_run(destination_type, test_root_dir) + copy_tree(os.path.join(test_root_dir, "build/run/airbyte_utils/models/generated/"), os.path.join(test_root_dir, "sync3_output")) + shutil.rmtree(os.path.join(test_root_dir, "build/run/airbyte_utils/models/generated/"), ignore_errors=True) + dbt_test(destination_type, test_root_dir) + finally: + dbt_test_utils.set_target_schema(target_schema) + clean_up_args = { + "destination_type": [destination_type], + "test_type": "ephemeral", + "tmp_folders": [str(test_root_dir)], + } + dbt_test_utils.clean_tmp_tables(**clean_up_args) + + +def setup_test_dir(destination_type: DestinationType, test_resource_name: str) -> str: + """ + We prepare a clean folder to run the tests from. + + if the test_resource_name is part of git_versioned_tests, then dbt models and final sql outputs + will be written to a folder included in airbyte git repository. + + Non-versioned tests will be written in /tmp folders instead. + + The purpose is to keep track of a small set of downstream changes on selected integration tests cases. + - generated dbt models created by normalization script from an input destination_catalog.json + - final output sql files created by dbt CLI from the generated dbt models (dbt models are sql files with jinja templating, + these are interpreted and compiled into the native SQL dialect of the final destination engine) + """ + if test_resource_name in git_versioned_tests: + test_root_dir = f"{pathlib.Path().absolute()}/normalization_test_output/{destination_type.value.lower()}" + else: + test_root_dir = f"{pathlib.Path().joinpath('..', 'build', 'normalization_test_output', destination_type.value.lower()).resolve()}" + os.makedirs(test_root_dir, exist_ok=True) + test_root_dir = f"{test_root_dir}/{test_resource_name}" + shutil.rmtree(test_root_dir, ignore_errors=True) + print(f"Setting up test folder {test_root_dir}") + dbt_project_yaml = "../dbt-project-template/dbt_project.yml" + copy_tree("../dbt-project-template", test_root_dir) + if destination_type.value == DestinationType.MSSQL.value: + copy_tree("../dbt-project-template-mssql", test_root_dir) + dbt_project_yaml = "../dbt-project-template-mssql/dbt_project.yml" + elif destination_type.value == DestinationType.MYSQL.value: + copy_tree("../dbt-project-template-mysql", test_root_dir) + dbt_project_yaml = "../dbt-project-template-mysql/dbt_project.yml" + elif destination_type.value == DestinationType.ORACLE.value: + copy_tree("../dbt-project-template-oracle", test_root_dir) + dbt_project_yaml = "../dbt-project-template-oracle/dbt_project.yml" + elif destination_type.value == DestinationType.CLICKHOUSE.value: + copy_tree("../dbt-project-template-clickhouse", test_root_dir) + dbt_project_yaml = "../dbt-project-template-clickhouse/dbt_project.yml" + elif destination_type.value == DestinationType.SNOWFLAKE.value: + copy_tree("../dbt-project-template-snowflake", test_root_dir) + dbt_project_yaml = "../dbt-project-template-snowflake/dbt_project.yml" + elif destination_type.value == DestinationType.REDSHIFT.value: + copy_tree("../dbt-project-template-redshift", test_root_dir) + dbt_project_yaml = "../dbt-project-template-redshift/dbt_project.yml" + elif destination_type.value == DestinationType.TIDB.value: + copy_tree("../dbt-project-template-tidb", test_root_dir) + dbt_project_yaml = "../dbt-project-template-tidb/dbt_project.yml" + elif destination_type.value == DestinationType.DUCKDB.value: + copy_tree("../dbt-project-template-duckdb", test_root_dir) + dbt_project_yaml = "../dbt-project-template-duckdb/dbt_project.yml" + dbt_test_utils.copy_replace(dbt_project_yaml, os.path.join(test_root_dir, "dbt_project.yml")) + return test_root_dir + + +def setup_input_raw_data( + destination_type: DestinationType, test_resource_name: str, test_root_dir: str, destination_config: Dict[str, Any] +) -> bool: + """ + We run docker images of destinations to upload test data stored in the messages.txt file for each test case. + This should populate the associated "raw" tables from which normalization is reading from when running dbt CLI. + """ + catalog_file = os.path.join("resources", test_resource_name, "data_input", "catalog.json") + message_file = os.path.join("resources", test_resource_name, "data_input", "messages.txt") + dbt_test_utils.copy_replace( + catalog_file, + os.path.join(test_root_dir, "reset_catalog.json"), + pattern='"destination_sync_mode": ".*"', + replace_value='"destination_sync_mode": "overwrite"', + ) + dbt_test_utils.copy_replace(catalog_file, os.path.join(test_root_dir, "destination_catalog.json")) + config_file = os.path.join(test_root_dir, "destination_config.json") + with open(config_file, "w") as f: + f.write(json.dumps(destination_config)) + # Force a reset in destination raw tables + assert run_destination_process(destination_type, test_root_dir, "", "reset_catalog.json", dbt_test_utils) + # Run a sync to create raw tables in destinations + return run_destination_process(destination_type, test_root_dir, message_file, "destination_catalog.json", dbt_test_utils) + + +def setup_dbt_sparse_nested_streams_test(destination_type: DestinationType, test_resource_name: str, test_root_dir: str, sync_number: int): + """ + Prepare the data (copy) for the models for dbt test. + """ + replace_identifiers = os.path.join("resources", test_resource_name, "data_input", "replace_identifiers.json") + test_directory = os.path.join(test_root_dir, "models/dbt_data_tests") + shutil.rmtree(test_directory, ignore_errors=True) + os.makedirs(test_directory, exist_ok=True) + copy_test_files( + os.path.join("resources", test_resource_name, "dbt_test_config", f"sync{sync_number}_expectations"), + test_directory, + destination_type, + replace_identifiers, + ) + test_directory = os.path.join(test_root_dir, "tests") + shutil.rmtree(test_directory, ignore_errors=True) + os.makedirs(test_directory, exist_ok=True) + copy_test_files( + os.path.join("resources", test_resource_name, "dbt_test_config", f"sync{sync_number}_assertions"), + test_directory, + destination_type, + replace_identifiers, + ) + + +def dbt_test(destination_type: DestinationType, test_root_dir: str): + """ + dbt provides a way to run dbt tests as described here: https://docs.getdbt.com/docs/building-a-dbt-project/tests + - Schema tests are added in .yml files from the schema_tests directory + - see additional macros for testing here: https://github.com/fishtown-analytics/dbt-utils#schema-tests + - Data tests are added in .sql files from the data_tests directory and should return 0 records to be successful + + We use this mechanism to verify the output of our integration tests. + """ + normalization_image: str = dbt_test_utils.get_normalization_image(destination_type) + assert dbt_test_utils.run_check_dbt_command(normalization_image, "test", test_root_dir) + + +def copy_test_files(src: str, dst: str, destination_type: DestinationType, replace_identifiers: str): + """ + Copy file while hacking snowflake identifiers that needs to be uppercased... + (so we can share these dbt tests files accross destinations) + """ + if os.path.exists(src): + temp_dir = tempfile.mkdtemp(dir="/tmp/", prefix="normalization_test_") + temporary_folders.add(temp_dir) + # Copy and adapt capitalization + if destination_type.value == DestinationType.SNOWFLAKE.value: + shutil.copytree(src, temp_dir + "/upper", copy_function=copy_upper) + src = temp_dir + "/upper" + elif destination_type.value == DestinationType.REDSHIFT.value: + shutil.copytree(src, temp_dir + "/lower", copy_function=copy_lower) + src = temp_dir + "/lower" + if os.path.exists(replace_identifiers): + with open(replace_identifiers, "r") as file: + contents = file.read() + identifiers_map = json.loads(contents) + pattern = [] + replace_value = [] + if dbt_test_utils.target_schema != "test_normalization": + pattern.append("test_normalization") + if destination_type.value == DestinationType.SNOWFLAKE.value: + replace_value.append(dbt_test_utils.target_schema.upper()) + else: + replace_value.append(dbt_test_utils.target_schema) + if destination_type.value in identifiers_map: + for entry in identifiers_map[destination_type.value]: + for k in entry: + # re.escape() must not be used for the replacement string in sub(), only backslashes should be escaped: + # see https://docs.python.org/3/library/re.html#re.escape + pattern.append(k.replace("\\", r"\\")) + replace_value.append(entry[k]) + if pattern and replace_value: + + def copy_replace_identifiers(src, dst): + dbt_test_utils.copy_replace(src, dst, pattern, replace_value) + + shutil.copytree(src, temp_dir + "/replace", copy_function=copy_replace_identifiers) + src = temp_dir + "/replace" + # final copy + copy_tree(src, dst) + + +def copy_upper(src, dst): + print(src, "->", dst) + dbt_test_utils.copy_replace( + src, + dst, + pattern=[ + r"(- name:) *(.*)", + r"(ref\(')(.*)('\))", + r"(source\(')(.*)('\))", + ], + replace_value=[ + to_upper_identifier, + to_upper_identifier, + to_upper_identifier, + ], + ) + + +def copy_lower(src, dst): + print(src, "->", dst) + dbt_test_utils.copy_replace( + src, + dst, + pattern=[ + r"(- name:) *(.*)", + r"(ref\(')(.*)('\))", + r"(source\(')(.*)('\))", + ], + replace_value=[ + to_lower_identifier, + to_lower_identifier, + to_lower_identifier, + ], + ) + + +def to_upper_identifier(input: re.Match) -> str: + if len(input.groups()) == 2: + return f"{input.group(1)} {input.group(2).upper()}" + elif len(input.groups()) == 3: + return f"{input.group(1)}{input.group(2).upper()}{input.group(3)}" + else: + raise Exception(f"Unexpected number of groups in {input}") + + +def to_lower_identifier(input: re.Match) -> str: + if len(input.groups()) == 2: + return f"{input.group(1)} {input.group(2).lower()}" + elif len(input.groups()) == 3: + return f"{input.group(1)}{input.group(2).lower()}{input.group(3)}" + else: + raise Exception(f"Unexpected number of groups in {input}") diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/utils.py b/airbyte-integrations/bases/base-normalization/integration_tests/utils.py new file mode 100644 index 0000000000000..30c7cb3e84129 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/integration_tests/utils.py @@ -0,0 +1,87 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import os +import pathlib +import shutil +import tempfile +from distutils.dir_util import copy_tree + +from integration_tests.dbt_integration_test import DbtIntegrationTest +from normalization import DestinationType, TransformCatalog + + +def setup_test_dir(integration_type: str, temporary_folders: set) -> str: + """ + We prepare a clean folder to run the tests from. + """ + test_root_dir = f"{pathlib.Path().joinpath('..', 'build', 'normalization_test_output', integration_type.lower()).resolve()}" + os.makedirs(test_root_dir, exist_ok=True) + test_root_dir = tempfile.mkdtemp(dir=test_root_dir) + temporary_folders.add(test_root_dir) + shutil.rmtree(test_root_dir, ignore_errors=True) + current_path = os.getcwd() + print(f"Setting up test folder {test_root_dir}. Current path {current_path}") + copy_tree("../dbt-project-template", test_root_dir) + if integration_type == DestinationType.MSSQL.value: + copy_tree("../dbt-project-template-mssql", test_root_dir) + elif integration_type == DestinationType.MYSQL.value: + copy_tree("../dbt-project-template-mysql", test_root_dir) + elif integration_type == DestinationType.ORACLE.value: + copy_tree("../dbt-project-template-oracle", test_root_dir) + elif integration_type == DestinationType.SNOWFLAKE.value: + copy_tree("../dbt-project-template-snowflake", test_root_dir) + elif integration_type == DestinationType.TIDB.value: + copy_tree("../dbt-project-template-tidb", test_root_dir) + return test_root_dir + + +def run_destination_process( + destination_type: DestinationType, + test_root_dir: str, + message_file: str, + catalog_file: str, + dbt_test_utils: DbtIntegrationTest, + docker_tag="dev", +): + commands = [ + "docker", + "run", + "--rm", + "--init", + "-v", + f"{test_root_dir}:/data", + "--network", + "host", + "-i", + f"airbyte/destination-{destination_type.value.lower()}:{docker_tag}", + "write", + "--config", + "/data/destination_config.json", + "--catalog", + ] + return dbt_test_utils.run_destination_process(message_file, test_root_dir, commands + [f"/data/{catalog_file}"]) + + +def generate_dbt_models( + destination_type: DestinationType, + test_resource_name: str, + test_root_dir: str, + output_dir: str, + catalog_file: str, + dbt_test_utils: DbtIntegrationTest, +): + """ + This is the normalization step generating dbt models files from the destination_catalog.json taken as input. + """ + transform_catalog = TransformCatalog() + transform_catalog.config = { + "integration_type": destination_type.value, + "schema": dbt_test_utils.target_schema, + "catalog": [os.path.join("resources", test_resource_name, "data_input", catalog_file)], + "output_path": os.path.join(test_root_dir, output_dir, "generated"), + "json_column": "_airbyte_data", + "profile_config_dir": test_root_dir, + } + transform_catalog.process_catalog() diff --git a/airbyte-integrations/bases/base-normalization/main_dev_transform_catalog.py b/airbyte-integrations/bases/base-normalization/main_dev_transform_catalog.py new file mode 100644 index 0000000000000..22e5e57cf2771 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/main_dev_transform_catalog.py @@ -0,0 +1,21 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import logging + +from airbyte_cdk.exception_handler import init_uncaught_exception_handler +from airbyte_cdk.utils.traced_exception import AirbyteTracedException +from normalization.transform_catalog.transform import main + +if __name__ == "__main__": + init_uncaught_exception_handler(logging.getLogger("airbyte")) + try: + main() + except Exception as e: + msg = ( + "Something went wrong while normalizing the data moved in this sync " + + "(failed to transform catalog into dbt project). See the logs for more details." + ) + raise AirbyteTracedException.from_exception(e, message=msg) diff --git a/airbyte-integrations/bases/base-normalization/main_dev_transform_config.py b/airbyte-integrations/bases/base-normalization/main_dev_transform_config.py new file mode 100644 index 0000000000000..579ccb80d99d0 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/main_dev_transform_config.py @@ -0,0 +1,21 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import logging + +from airbyte_cdk.exception_handler import init_uncaught_exception_handler +from airbyte_cdk.utils.traced_exception import AirbyteTracedException +from normalization.transform_config.transform import main + +if __name__ == "__main__": + init_uncaught_exception_handler(logging.getLogger("airbyte")) + try: + main() + except Exception as e: + msg = ( + "Something went wrong while normalizing the data moved in this sync " + + "(failed to transform config for dbt project). See the logs for more details." + ) + raise AirbyteTracedException.from_exception(e, message=msg) diff --git a/airbyte-integrations/bases/base-normalization/mssql.Dockerfile b/airbyte-integrations/bases/base-normalization/mssql.Dockerfile new file mode 100644 index 0000000000000..1ec0997242035 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/mssql.Dockerfile @@ -0,0 +1,72 @@ +FROM fishtownanalytics/dbt:1.0.0 +COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte + +# Install curl & gnupg dependencies +USER root +WORKDIR /tmp +RUN apt-get update --allow-insecure-repositories && apt-get install -y \ + wget \ + curl \ + unzip \ + libaio-dev \ + libaio1 \ + gnupg \ + gnupg1 \ + gnupg2 \ + equivs + +# Remove multiarch-support package to use Debian 10 packages +# see https://causlayer.orgs.hk/mlocati/docker-php-extension-installer/issues/432#issuecomment-921341138 +RUN echo 'Package: multiarch-support-dummy\nProvides: multiarch-support\nDescription: Fake multiarch-support' > multiarch-support-dummy.ctl \ + && equivs-build multiarch-support-dummy.ctl && dpkg -i multiarch-support-dummy*.deb && rm multiarch-support-dummy*.* \ + && apt-get -y purge equivs +RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - +RUN curl https://packages.microsoft.com/config/debian/10/prod.list > /etc/apt/sources.list.d/mssql-release.list + +# Install MS SQL Server dependencies +RUN apt-get update && ACCEPT_EULA=Y apt-get install -y \ + libgssapi-krb5-2 \ + unixodbc-dev \ + msodbcsql17 \ + mssql-tools +ENV PATH=$PATH:/opt/mssql-tools/bin + +# Install SSH Tunneling dependencies +RUN apt-get install -y jq sshpass + +# clean up +RUN apt-get -y autoremove && apt-get clean + +WORKDIR /airbyte +COPY entrypoint.sh . +COPY build/sshtunneling.sh . + +WORKDIR /airbyte/normalization_code +COPY normalization ./normalization +COPY setup.py . +COPY dbt-project-template/ ./dbt-template/ +COPY dbt-project-template-mssql/* ./dbt-template/ + +# Install python dependencies +WORKDIR /airbyte/base_python_structs + +# workaround for https://github.com/yaml/pyyaml/issues/601 +# this should be fixed in the airbyte/base-airbyte-protocol-python image +RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation + +RUN pip install . + +WORKDIR /airbyte/normalization_code +RUN pip install . +# Based of https://github.com/dbt-msft/dbt-sqlserver/tree/v1.0.0 +RUN pip install dbt-sqlserver==1.0.0 + +WORKDIR /airbyte/normalization_code/dbt-template/ +# Download external dbt dependencies +RUN dbt deps + +WORKDIR /airbyte +ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" +ENTRYPOINT ["/airbyte/entrypoint.sh"] + +LABEL io.airbyte.name=airbyte/normalization-mssql diff --git a/airbyte-integrations/bases/base-normalization/mysql.Dockerfile b/airbyte-integrations/bases/base-normalization/mysql.Dockerfile new file mode 100644 index 0000000000000..efc25fcb38d9a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/mysql.Dockerfile @@ -0,0 +1,38 @@ +FROM fishtownanalytics/dbt:1.0.0 +COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte + +# Install SSH Tunneling dependencies +RUN apt-get update && apt-get install -y jq sshpass + +WORKDIR /airbyte +COPY entrypoint.sh . +COPY build/sshtunneling.sh . + +WORKDIR /airbyte/normalization_code +COPY normalization ./normalization +COPY setup.py . +COPY dbt-project-template/ ./dbt-template/ +COPY dbt-project-template-mysql/* ./dbt-template/ + +# Install python dependencies +WORKDIR /airbyte/base_python_structs + +# workaround for https://github.com/yaml/pyyaml/issues/601 +# this should be fixed in the airbyte/base-airbyte-protocol-python image +RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation + +RUN pip install . + +WORKDIR /airbyte/normalization_code +RUN pip install . +RUN pip install dbt-mysql==1.0.0 + +WORKDIR /airbyte/normalization_code/dbt-template/ +# Download external dbt dependencies +RUN dbt deps + +WORKDIR /airbyte +ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" +ENTRYPOINT ["/airbyte/entrypoint.sh"] + +LABEL io.airbyte.name=airbyte/normalization-mysql diff --git a/airbyte-integrations/bases/base-normalization/normalization/__init__.py b/airbyte-integrations/bases/base-normalization/normalization/__init__.py new file mode 100644 index 0000000000000..142fa6695aca7 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/normalization/__init__.py @@ -0,0 +1,9 @@ +from normalization.destination_type import DestinationType +from normalization.transform_catalog.transform import TransformCatalog +from normalization.transform_config.transform import TransformConfig + +__all__ = [ + "DestinationType", + "TransformCatalog", + "TransformConfig", +] diff --git a/airbyte-integrations/bases/base-normalization/normalization/destination_type.py b/airbyte-integrations/bases/base-normalization/normalization/destination_type.py new file mode 100644 index 0000000000000..2a3681f2d1c45 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/normalization/destination_type.py @@ -0,0 +1,27 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from enum import Enum + + +class DestinationType(Enum): + BIGQUERY = "bigquery" + CLICKHOUSE = "clickhouse" + MSSQL = "mssql" + MYSQL = "mysql" + ORACLE = "oracle" + POSTGRES = "postgres" + REDSHIFT = "redshift" + SNOWFLAKE = "snowflake" + TIDB = "tidb" + DUCKDB = "duckdb" + + @classmethod + def from_string(cls, string_value: str) -> "DestinationType": + return DestinationType[string_value.upper()] + + @staticmethod + def testable_destinations(): + return [dest for dest in list(DestinationType) if dest != DestinationType.DUCKDB] diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/__init__.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/__init__.py new file mode 100644 index 0000000000000..fc34c615f84a4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/__init__.py @@ -0,0 +1,3 @@ +from normalization.transform_catalog.transform import TransformCatalog + +__all__ = ["TransformCatalog"] diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/catalog_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/catalog_processor.py new file mode 100644 index 0000000000000..5c55b776c67bb --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/catalog_processor.py @@ -0,0 +1,299 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import json +import os +import re +from typing import Any, Dict, List, Set + +import yaml +from airbyte_cdk.models.airbyte_protocol import DestinationSyncMode, SyncMode # type: ignore +from normalization.destination_type import DestinationType +from normalization.transform_catalog import dbt_macro +from normalization.transform_catalog.destination_name_transformer import DestinationNameTransformer +from normalization.transform_catalog.stream_processor import StreamProcessor +from normalization.transform_catalog.table_name_registry import TableNameRegistry + + +class CatalogProcessor: + """ + Takes as input an AirbyteCatalog file (stored as Json Schema). + Associated input raw data is expected to be stored in a staging area called "raw_schema". + + This processor reads the catalog file, extracts streams descriptions and transforms them to final tables in their + targeted destination schema. + + This is relying on a StreamProcessor to handle the conversion of a stream to a table one at a time. + """ + + def __init__(self, output_directory: str, destination_type: DestinationType): + """ + @param output_directory is the path to the directory where this processor should write the resulting SQL files (DBT models) + @param destination_type is the destination type of warehouse + """ + self.output_directory: str = output_directory + self.destination_type: DestinationType = destination_type + self.name_transformer: DestinationNameTransformer = DestinationNameTransformer(destination_type) + self.models_to_source: Dict[str, str] = {} + + def process(self, catalog_file: str, json_column_name: str, default_schema: str): + """ + This method first parse and build models to handle top-level streams. + In a second loop will go over the substreams that were nested in a breadth-first traversal manner. + + @param catalog_file input AirbyteCatalog file in JSON Schema describing the structure of the raw data + @param json_column_name is the column name containing the JSON Blob with the raw data + @param default_schema is the final schema where to output the final transformed data to + """ + tables_registry: TableNameRegistry = TableNameRegistry(self.destination_type) + schema_to_source_tables: Dict[str, Set[str]] = {} + catalog = read_json(catalog_file) + # print(json.dumps(catalog, separators=(",", ":"))) + substreams = [] + stream_processors = self.build_stream_processor( + catalog=catalog, + json_column_name=json_column_name, + default_schema=default_schema, + name_transformer=self.name_transformer, + destination_type=self.destination_type, + tables_registry=tables_registry, + ) + for stream_processor in stream_processors: + stream_processor.collect_table_names() + for conflict in tables_registry.resolve_names(): + print( + f"WARN: Resolving conflict: {conflict.schema}.{conflict.table_name_conflict} " + f"from '{'.'.join(conflict.json_path)}' into {conflict.table_name_resolved}" + ) + for stream_processor in stream_processors: + # MySQL table names need to be manually truncated, because it does not do it automatically + truncate = ( + self.destination_type == DestinationType.MYSQL + or self.destination_type == DestinationType.TIDB + or self.destination_type == DestinationType.DUCKDB + ) + raw_table_name = self.name_transformer.normalize_table_name(f"_airbyte_raw_{stream_processor.stream_name}", truncate=truncate) + add_table_to_sources(schema_to_source_tables, stream_processor.schema, raw_table_name) + + nested_processors = stream_processor.process() + self.models_to_source.update(stream_processor.models_to_source) + + if nested_processors and len(nested_processors) > 0: + substreams += nested_processors + for file in stream_processor.sql_outputs: + output_sql_file(os.path.join(self.output_directory, file), stream_processor.sql_outputs[file]) + self.write_yaml_sources_file(schema_to_source_tables) + self.process_substreams(substreams, tables_registry) + + @staticmethod + def build_stream_processor( + catalog: Dict, + json_column_name: str, + default_schema: str, + name_transformer: DestinationNameTransformer, + destination_type: DestinationType, + tables_registry: TableNameRegistry, + ) -> List[StreamProcessor]: + result = [] + for configured_stream in get_field(catalog, "streams", "Invalid Catalog: 'streams' is not defined in Catalog"): + stream_config = get_field(configured_stream, "stream", "Invalid Stream: 'stream' is not defined in Catalog streams") + + # The logic here matches the logic in JdbcBufferedConsumerFactory.java. + # Any modifications need to be reflected there and vice versa. + schema = default_schema + if "namespace" in stream_config: + schema = stream_config["namespace"] + + schema_name = name_transformer.normalize_schema_name(schema, truncate=False) + if destination_type == DestinationType.ORACLE: + quote_in_parenthesis = re.compile(r"quote\((.*)\)") + raw_schema_name = name_transformer.normalize_schema_name(schema, truncate=False) + if not quote_in_parenthesis.findall(json_column_name): + json_column_name = name_transformer.normalize_column_name(json_column_name, in_jinja=True) + else: + column_inside_single_quote = re.compile(r"\'(.*)\'") + raw_schema_name = name_transformer.normalize_schema_name(f"_airbyte_{schema}", truncate=False) + if not column_inside_single_quote.findall(json_column_name): + json_column_name = f"'{json_column_name}'" + + stream_name = get_field(stream_config, "name", f"Invalid Stream: 'name' is not defined in stream: {str(stream_config)}") + # MySQL table names need to be manually truncated, because it does not do it automatically + truncate = ( + destination_type == DestinationType.MYSQL + or destination_type == DestinationType.TIDB + or destination_type == DestinationType.DUCKDB + ) + raw_table_name = name_transformer.normalize_table_name(f"_airbyte_raw_{stream_name}", truncate=truncate) + + source_sync_mode = get_source_sync_mode(configured_stream, stream_name) + destination_sync_mode = get_destination_sync_mode(configured_stream, stream_name) + cursor_field = [] + primary_key = [] + if source_sync_mode.value == SyncMode.incremental.value or destination_sync_mode.value in [ + # DestinationSyncMode.upsert_dedup.value, + DestinationSyncMode.append_dedup.value, + ]: + cursor_field = get_field(configured_stream, "cursor_field", f"Undefined cursor field for stream {stream_name}") + if destination_sync_mode.value in [ + # DestinationSyncMode.upsert_dedup.value, + DestinationSyncMode.append_dedup.value + ]: + primary_key = get_field(configured_stream, "primary_key", f"Undefined primary key for stream {stream_name}") + + message = f"'json_schema'.'properties' are not defined for stream {stream_name}" + properties = get_field(get_field(stream_config, "json_schema", message), "properties", message) + + from_table = dbt_macro.Source(schema_name, raw_table_name) + + stream_processor = StreamProcessor.create( + stream_name=stream_name, + destination_type=destination_type, + raw_schema=raw_schema_name, + default_schema=default_schema, + schema=schema_name, + source_sync_mode=source_sync_mode, + destination_sync_mode=destination_sync_mode, + cursor_field=cursor_field, + primary_key=primary_key, + json_column_name=json_column_name, + properties=properties, + tables_registry=tables_registry, + from_table=from_table, + ) + result.append(stream_processor) + return result + + def process_substreams(self, substreams: List[StreamProcessor], tables_registry: TableNameRegistry): + """ + Handle nested stream/substream/children + """ + while substreams: + children = substreams + substreams = [] + for substream in children: + substream.tables_registry = tables_registry + nested_processors = substream.process() + self.models_to_source.update(substream.models_to_source) + if nested_processors: + substreams += nested_processors + for file in substream.sql_outputs: + output_sql_file(os.path.join(self.output_directory, file), substream.sql_outputs[file]) + + def write_yaml_sources_file(self, schema_to_source_tables: Dict[str, Set[str]]): + """ + Generate the sources.yaml file as described in https://docs.getdbt.com/docs/building-a-dbt-project/using-sources/ + """ + schemas = [] + for entry in sorted(schema_to_source_tables.items(), key=lambda kv: kv[0]): + schema = entry[0] + quoted_schema = self.name_transformer.needs_quotes(schema) + tables = [] + for source in sorted(schema_to_source_tables[schema]): + if quoted_schema: + tables.append({"name": source, "quoting": {"identifier": True}}) + else: + tables.append({"name": source}) + schemas.append( + { + "name": schema, + "quoting": { + "database": True, + "schema": quoted_schema, + "identifier": False, + }, + "tables": tables, + } + ) + source_config = {"version": 2, "sources": schemas} + source_path = os.path.join(self.output_directory, "sources.yml") + output_dir = os.path.dirname(source_path) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + with open(source_path, "w") as fh: + fh.write(yaml.dump(source_config, sort_keys=False)) + + +# Static Functions + + +def read_json(input_path: str) -> Any: + """ + Reads and load a json file + @param input_path is the path to the file to read + """ + with open(input_path, "r") as file: + contents = file.read() + return json.loads(contents) + + +def get_field(config: Dict, key: str, message: str): + """ + Retrieve value of field in a Dict object. Throw an error if key is not found with message as reason. + """ + if key in config: + return config[key] + else: + raise KeyError(message) + + +def get_source_sync_mode(stream_config: Dict, stream_name: str) -> SyncMode: + """ + Read the source sync_mode field from config or return a default value if not found + """ + if "sync_mode" in stream_config: + sync_mode = get_field(stream_config, "sync_mode", "") + else: + sync_mode = "" + try: + result = SyncMode(sync_mode) + except ValueError as e: + # Fallback to default source sync mode value + result = SyncMode.full_refresh + print(f"WARN: Source sync mode falling back to {result} for {stream_name}: {e}") + return result + + +def get_destination_sync_mode(stream_config: Dict, stream_name: str) -> DestinationSyncMode: + """ + Read the destination_sync_mode field from config or return a default value if not found + """ + if "destination_sync_mode" in stream_config: + dest_sync_mode = get_field(stream_config, "destination_sync_mode", "") + else: + dest_sync_mode = "" + try: + result = DestinationSyncMode(dest_sync_mode) + except ValueError as e: + # Fallback to default destination sync mode value + result = DestinationSyncMode.append + print(f"WARN: Destination sync mode falling back to {result} for {stream_name}: {e}") + return result + + +def add_table_to_sources(schema_to_source_tables: Dict[str, Set[str]], schema_name: str, table_name: str): + """ + Keeps track of source tables used in this catalog to build a source.yaml file for DBT + """ + if schema_name not in schema_to_source_tables: + schema_to_source_tables[schema_name] = set() + if table_name not in schema_to_source_tables[schema_name]: + schema_to_source_tables[schema_name].add(table_name) + else: + raise KeyError(f"Duplicate table {table_name} in {schema_name}") + + +def output_sql_file(file: str, sql: str): + """ + @param file is the path to filename to be written + @param sql is the dbt sql content to be written in the generated model file + """ + output_dir = os.path.dirname(file) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + with open(file, "w") as f: + for line in sql.splitlines(): + if line.strip(): + f.write(line + "\n") + f.write("\n") diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/dbt_macro.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/dbt_macro.py new file mode 100644 index 0000000000000..71ee02f0f3a73 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/dbt_macro.py @@ -0,0 +1,44 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from abc import ABC, abstractmethod + + +class Macro(ABC): + "https://docs.getdbt.com/docs/building-a-dbt-project/jinja-macros" + + @abstractmethod + def __str__(self): + pass + + def __repr__(self): + return str(self) + + def __add__(self, other): + return str(self) + str(other) + + def __radd__(self, other): + return str(other) + str(self) + + +class Source(Macro): + "https://docs.getdbt.com/reference/dbt-jinja-functions/source" + + def __init__(self, source_name: str, table_name: str): + self.source_name = source_name + self.table_name = table_name + + def __str__(self): + return "source('{}', '{}')".format(self.source_name, self.table_name) + + +class Ref(Macro): + "https://docs.getdbt.com/reference/dbt-jinja-functions/ref" + + def __init__(self, model_name: str): + self.model_name = model_name + + def __str__(self) -> str: + return "ref('{}')".format(self.model_name) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/destination_name_transformer.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/destination_name_transformer.py new file mode 100644 index 0000000000000..3db6b8858120d --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/destination_name_transformer.py @@ -0,0 +1,316 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import unicodedata as ud +from re import match, sub + +from normalization.destination_type import DestinationType +from normalization.transform_catalog.reserved_keywords import is_reserved_keyword +from normalization.transform_catalog.utils import jinja_call + +DESTINATION_SIZE_LIMITS = { + # https://cloud.google.com/bigquery/quotas#all_tables + DestinationType.BIGQUERY.value: 1024, + # https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html + DestinationType.SNOWFLAKE.value: 255, + # https://docs.aws.amazon.com/redshift/latest/dg/r_names.html + DestinationType.REDSHIFT.value: 127, + # https://www.postgresql.org/docs/12/limits.html + DestinationType.POSTGRES.value: 63, + # https://dev.mysql.com/doc/refman/8.0/en/identifier-length.html + DestinationType.MYSQL.value: 64, + # https://oracle-base.com/articles/12c/long-identifiers-12cr2 + DestinationType.ORACLE.value: 128, + # https://docs.microsoft.com/en-us/sql/odbc/microsoft/column-name-limitations?view=sql-server-ver15 + DestinationType.MSSQL.value: 64, + # https://stackoverflow.com/questions/68358686/what-is-the-maximum-length-of-a-column-in-clickhouse-can-it-be-modified + DestinationType.CLICKHOUSE.value: 63, + # https://docs.pingcap.com/tidb/stable/tidb-limitations + DestinationType.TIDB.value: 64, + # According to the DuckDB team there no restriction: We don't enforce a maximum right now but I would not recommend having column names + # longer than a few kilobytes. https://discord.com/channels/909674491309850675/1067042662827438122/1067043835768737893. + DestinationType.DUCKDB.value: 64, +} + +# DBT also needs to generate suffix to table names, so we need to make sure it has enough characters to do so... +TRUNCATE_DBT_RESERVED_SIZE = 12 +# we keep 4 characters for 1 underscore and 3 characters for suffix (_ab1, _ab2, etc) +# we keep 4 characters for 1 underscore and 3 characters hash (of the schema) +TRUNCATE_RESERVED_SIZE = 8 + + +class DestinationNameTransformer: + """ + Handles naming conventions in destinations for all kind of sql identifiers: + - schema + - table + - column + """ + + def __init__(self, destination_type: DestinationType): + """ + @param destination_type is the destination type of warehouse + """ + self.destination_type: DestinationType = destination_type + + # Public methods + + def needs_quotes(self, input_name: str) -> bool: + """ + @param input_name to test if it needs to manipulated with quotes or not + """ + if is_reserved_keyword(input_name, self.destination_type): + return True + if self.destination_type.value == DestinationType.BIGQUERY.value: + return False + if self.destination_type.value == DestinationType.ORACLE.value and input_name.startswith("_"): + return True + doesnt_start_with_alphaunderscore = match("[^A-Za-z_]", input_name[0]) is not None + contains_non_alphanumeric = match(".*[^A-Za-z0-9_].*", input_name) is not None + return doesnt_start_with_alphaunderscore or contains_non_alphanumeric + + def normalize_schema_name(self, schema_name: str, in_jinja: bool = False, truncate: bool = True) -> str: + """ + @param schema_name is the schema to normalize + @param in_jinja is a boolean to specify if the returned normalized will be used inside a jinja macro or not + @param truncate force ignoring truncate operation on resulting normalized name. For example, if we don't + control how the name would be normalized + """ + if self.destination_type == DestinationType.ORACLE and schema_name.startswith("_"): + schema_name = schema_name[1:] + return self.__normalize_non_column_identifier_name(input_name=schema_name, in_jinja=in_jinja, truncate=truncate) + + def normalize_table_name( + self, table_name: str, in_jinja: bool = False, truncate: bool = True, conflict: bool = False, conflict_level: int = 0 + ) -> str: + """ + @param table_name is the table to normalize + @param in_jinja is a boolean to specify if the returned normalized will be used inside a jinja macro or not + @param truncate force ignoring truncate operation on resulting normalized name. For example, if we don't + control how the name would be normalized + @param conflict if there is a conflict between stream name and fields + @param conflict_level is the json_path level conflict happened + """ + if self.destination_type == DestinationType.ORACLE and table_name.startswith("_"): + table_name = table_name[1:] + return self.__normalize_non_column_identifier_name( + input_name=table_name, in_jinja=in_jinja, truncate=truncate, conflict=conflict, conflict_level=conflict_level + ) + + def normalize_column_name( + self, column_name: str, in_jinja: bool = False, truncate: bool = True, conflict: bool = False, conflict_level: int = 0 + ) -> str: + """ + @param column_name is the column to normalize + @param in_jinja is a boolean to specify if the returned normalized will be used inside a jinja macro or not + @param truncate force ignoring truncate operation on resulting normalized name. For example, if we don't + control how the name would be normalized + @param conflict if there is a conflict between stream name and fields + @param conflict_level is the json_path level conflict happened + """ + return self.__normalize_identifier_name( + column_name=column_name, in_jinja=in_jinja, truncate=truncate, conflict=conflict, conflict_level=conflict_level + ) + + def truncate_identifier_name(self, input_name: str, custom_limit: int = -1, conflict: bool = False, conflict_level: int = 0) -> str: + """ + @param input_name is the identifier name to middle truncate + @param custom_limit uses a custom length as the max instead of the destination max length + @param conflict if there is a conflict between stream name and fields + @param conflict_level is the json_path level conflict happened + """ + limit = custom_limit - 1 if custom_limit > 0 else self.get_name_max_length() + + if limit < len(input_name): + middle = round(limit / 2) + # truncate in the middle to preserve prefix/suffix instead + prefix = input_name[: limit - middle - 1] + suffix = input_name[1 - middle :] + # Add extra characters '__', signaling a truncate in identifier + print(f"Truncating {input_name} (#{len(input_name)}) to {prefix}_{suffix} (#{2 + len(prefix) + len(suffix)})") + mid = "__" + if conflict: + mid = f"_{conflict_level}" + input_name = f"{prefix}{mid}{suffix}" + + return input_name + + def get_name_max_length(self): + if self.destination_type.value in DESTINATION_SIZE_LIMITS: + destination_limit = DESTINATION_SIZE_LIMITS[self.destination_type.value] + return destination_limit - TRUNCATE_DBT_RESERVED_SIZE - TRUNCATE_RESERVED_SIZE + else: + raise KeyError(f"Unknown destination type {self.destination_type}") + + # Private methods + + def __normalize_non_column_identifier_name( + self, input_name: str, in_jinja: bool = False, truncate: bool = True, conflict: bool = False, conflict_level: int = 0 + ) -> str: + # We force standard naming for non column names (see issue #1785) + result = transform_standard_naming(input_name) + result = self.__normalize_naming_conventions(result, is_column=False) + if truncate: + result = self.truncate_identifier_name(input_name=result, conflict=conflict, conflict_level=conflict_level) + result = self.__normalize_identifier_case(result, is_quoted=False) + if result[0].isdigit(): + if self.destination_type == DestinationType.MSSQL: + result = "_" + result + elif self.destination_type == DestinationType.ORACLE: + result = "ab_" + result + return result + + def __normalize_identifier_name( + self, column_name: str, in_jinja: bool = False, truncate: bool = True, conflict: bool = False, conflict_level: int = 0 + ) -> str: + result = self.__normalize_naming_conventions(column_name, is_column=True) + if truncate: + result = self.truncate_identifier_name(input_name=result, conflict=conflict, conflict_level=conflict_level) + if self.needs_quotes(result): + if self.destination_type.value == DestinationType.CLICKHOUSE.value: + result = result.replace('"', "_") + result = result.replace("`", "_") + result = result.replace("'", "_") + elif ( + self.destination_type.value != DestinationType.MYSQL.value + and self.destination_type.value != DestinationType.TIDB.value + and self.destination_type.value != DestinationType.DUCKDB.value + ): + result = result.replace('"', '""') + else: + result = result.replace("`", "_") + result = result.replace("'", "\\'") + result = self.__normalize_identifier_case(result, is_quoted=True) + result = self.apply_quote(result) + if not in_jinja: + result = jinja_call(result) + return result + else: + result = self.__normalize_identifier_case(result, is_quoted=False) + if in_jinja: + # to refer to columns while already in jinja context, always quote + return f"'{result}'" + return result + + def apply_quote(self, input: str, literal=True) -> str: + if literal: + input = f"'{input}'" + if self.destination_type == DestinationType.ORACLE: + # Oracle dbt lib doesn't implemented adapter quote yet. + return f"quote({input})" + elif self.destination_type == DestinationType.CLICKHOUSE: + return f"quote({input})" + return f"adapter.quote({input})" + + def __normalize_naming_conventions(self, input_name: str, is_column: bool = False) -> str: + result = input_name + if self.destination_type.value == DestinationType.ORACLE.value: + return transform_standard_naming(result) + elif self.destination_type.value == DestinationType.BIGQUERY.value: + # Can start with number: datasetId, table + # Can not start with number: column + result = transform_standard_naming(result) + doesnt_start_with_alphaunderscore = match("[^A-Za-z_]", result[0]) is not None + if is_column and doesnt_start_with_alphaunderscore: + result = f"_{result}" + return result + + def __normalize_identifier_case(self, input_name: str, is_quoted: bool = False) -> str: + result = input_name + if self.destination_type.value == DestinationType.BIGQUERY.value: + pass + elif self.destination_type.value == DestinationType.REDSHIFT.value: + # all tables (even quoted ones) are coerced to lowercase. + result = input_name.lower() + elif self.destination_type.value == DestinationType.POSTGRES.value: + if not is_quoted and not self.needs_quotes(input_name): + result = input_name.lower() + elif self.destination_type.value == DestinationType.SNOWFLAKE.value: + if not is_quoted and not self.needs_quotes(input_name): + result = input_name.upper() + elif self.destination_type.value == DestinationType.MYSQL.value: + if not is_quoted and not self.needs_quotes(input_name): + result = input_name.lower() + elif self.destination_type.value == DestinationType.MSSQL.value: + if not is_quoted and not self.needs_quotes(input_name): + result = input_name.lower() + elif self.destination_type.value == DestinationType.ORACLE.value: + if not is_quoted and not self.needs_quotes(input_name): + result = input_name.lower() + else: + result = input_name.upper() + elif self.destination_type.value == DestinationType.CLICKHOUSE.value: + pass + elif self.destination_type.value == DestinationType.TIDB.value: + if not is_quoted and not self.needs_quotes(input_name): + result = input_name.lower() + elif self.destination_type.value == DestinationType.DUCKDB.value: + if not is_quoted and not self.needs_quotes(input_name): + result = input_name.lower() + else: + raise KeyError(f"Unknown destination type {self.destination_type}") + return result + + def normalize_column_identifier_case_for_lookup(self, input_name: str, is_quoted: bool = False) -> str: + """ + This function adds an additional normalization regarding the column name casing to determine if multiple columns + are in collisions. On certain destinations/settings, case sensitivity matters, in others it does not. + We separate this from standard identifier normalization "__normalize_identifier_case", + so the generated SQL queries are keeping the original casing from the catalog. + But we still need to determine if casing matters or not, thus by using this function. + """ + result = input_name + if self.destination_type.value == DestinationType.BIGQUERY.value: + # Columns are considered identical regardless of casing + result = input_name.lower() + elif self.destination_type.value == DestinationType.REDSHIFT.value: + # Columns are considered identical regardless of casing (even quoted ones) + result = input_name.lower() + elif self.destination_type.value == DestinationType.POSTGRES.value: + if not is_quoted and not self.needs_quotes(input_name): + result = input_name.lower() + elif self.destination_type.value == DestinationType.SNOWFLAKE.value: + if not is_quoted and not self.needs_quotes(input_name): + result = input_name.upper() + elif self.destination_type.value == DestinationType.MYSQL.value: + # Columns are considered identical regardless of casing (even quoted ones) + result = input_name.lower() + elif self.destination_type.value == DestinationType.MSSQL.value: + # Columns are considered identical regardless of casing (even quoted ones) + result = input_name.lower() + elif self.destination_type.value == DestinationType.ORACLE.value: + if not is_quoted and not self.needs_quotes(input_name): + result = input_name.lower() + else: + result = input_name.upper() + elif self.destination_type.value == DestinationType.CLICKHOUSE.value: + pass + elif self.destination_type.value == DestinationType.TIDB.value: + result = input_name.lower() + elif self.destination_type.value == DestinationType.DUCKDB.value: + result = input_name.lower() + else: + raise KeyError(f"Unknown destination type {self.destination_type}") + return result + + +# Static Functions + + +def transform_standard_naming(input_name: str) -> str: + result = input_name.strip() + result = strip_accents(result) + result = sub(r"\s+", "_", result) + result = sub(r"[^a-zA-Z0-9_]", "_", result) + return result + + +def transform_json_naming(input_name: str) -> str: + result = sub(r"['\"`]", "_", input_name) + return result + + +def strip_accents(input_name: str) -> str: + return "".join(c for c in ud.normalize("NFD", input_name) if ud.category(c) != "Mn") diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py new file mode 100644 index 0000000000000..ccfd5eaf07c12 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/reserved_keywords.py @@ -0,0 +1,3276 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from typing import Set + +from normalization import DestinationType + +# https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords +BIGQUERY = { + "ALL", + "AND", + "ANY", + "ARRAY", + "AS", + "ASC", + "ASSERT_ROWS_MODIFIED", + "AT", + "BETWEEN", + "BY", + "CASE", + "CAST", + "COLLATE", + "CONTAINS", + "CREATE", + "CROSS", + "CUBE", + "CURRENT", + "CURRENT_DATE", + "CURRENT_TIME", + "CURRENT_TIMESTAMP", + "DEFAULT", + "DEFINE", + "DESC", + "DISTINCT", + "ELSE", + "END", + "ENUM", + "ESCAPE", + "EXCEPT", + "EXCLUDE", + "EXISTS", + "EXTRACT", + "FALSE", + "FETCH", + "FOLLOWING", + "FOR", + "FROM", + "FULL", + "GROUP", + "GROUPING", + "GROUPS", + "HASH", + "HAVING", + "IF", + "IGNORE", + "IN", + "INNER", + "INTERSECT", + "INTERVAL", + "INTO", + "IS", + "JOIN", + "LATERAL", + "LEFT", + "LIKE", + "LIMIT", + "LOOKUP", + "MERGE", + "NATURAL", + "NEW", + "NO", + "NOT", + "NULL", + "NULLS", + "OF", + "ON", + "OR", + "ORDER", + "OUTER", + "OVER", + "PARTITION", + "PRECEDING", + "PROTO", + "RANGE", + "RECURSIVE", + "RESPECT", + "RIGHT", + "ROLLUP", + "ROWS", + "SELECT", + "SET", + "SOME", + "STRUCT", + "TABLESAMPLE", + "THEN", + "TO", + "TREAT", + "TRUE", + "UNBOUNDED", + "UNION", + "UNNEST", + "USING", + "WHEN", + "WHERE", + "WINDOW", + "WITH", + "WITHIN", +} + +# https://docs.aws.amazon.com/redshift/latest/dg/r_pg_keywords.html +# Some additional keywords not supported by redshift are missing from their docs... +REDSHIFT = { + "AES128", + "AES256", + "ALL", + "ALLOWOVERWRITE", + "ANALYSE", + "ANALYZE", + "AND", + "ANY", + "ARRAY", + "AS", + "ASC", + "AUTHORIZATION", + "AZ64", + "BACKUP", + "BETWEEN", + "BINARY", + "BLANKSASNULL", + "BOTH", + "BOOLEAN", + "BYTEDICT", + "BZIP2", + "CASE", + "CAST", + "CHECK", + "COLLATE", + "COLUMN", + "CONSTRAINT", + "CREATE", + "CREDENTIALS", + "CROSS", + "CURRENT_DATE", + "CURRENT_TIME", + "CURRENT_TIMESTAMP", + "CURRENT_USER", + "CURRENT_USER_ID", + "DATETIME", + "DEFAULT", + "DEFERRABLE", + "DEFLATE", + "DEFRAG", + "DELTA", + "DELTA32K", + "DESC", + "DISABLE", + "DISTINCT", + "DO", + "ELSE", + "EMPTYASNULL", + "ENABLE", + "ENCODE", + "ENCRYPT", + "ENCRYPTION", + "END", + "EXCEPT", + "EXPLICIT", + "FALSE", + "FOR", + "FOREIGN", + "FREEZE", + "FROM", + "FULL", + "GLOBALDICT256", + "GLOBALDICT64K", + "GRANT", + "GROUP", + "GZIP", + "HAVING", + "IDENTITY", + "IGNORE", + "ILIKE", + "IN", + "INITIALLY", + "INNER", + "INTERSECT", + "INTERVAL", + "INTO", + "IS", + "ISNULL", + "JOIN", + "LANGUAGE", + "LEADING", + "LEFT", + "LIKE", + "LIMIT", + "LOCALTIME", + "LOCALTIMESTAMP", + "LUN", + "LUNS", + "LZO", + "LZOP", + "MINUS", + "MOSTLY16", + "MOSTLY32", + "MOSTLY8", + "NATURAL", + "NEW", + "NOT", + "NOTNULL", + "NULL", + "NULLS", + "OFF", + "OFFLINE", + "OFFSET", + "OID", + "OLD", + "ON", + "ONLY", + "OPEN", + "OR", + "ORDER", + "OUTER", + "OVERLAPS", + "PARALLEL", + "PARTITION", + "PERCENT", + "PERMISSIONS", + "PLACING", + "PRIMARY", + "RAW", + "READRATIO", + "RECOVER", + "REFERENCES", + "RESPECT", + "REJECTLOG", + "RESORT", + "RESTORE", + "RIGHT", + "SELECT", + "SESSION_USER", + "SIMILAR", + "SNAPSHOT", + "SOME", + "SYSDATE", + "SYSTEM", + "TABLE", + "TAG", + "TDES", + "TEXT255", + "TEXT32K", + "THEN", + "TIME", + "TIMESTAMP", + "TO", + "TOP", + "TRAILING", + "TRUE", + "TRUNCATECOLUMNS", + "UNION", + "UNIQUE", + "USER", + "USING", + "VERBOSE", + "WALLET", + "WHEN", + "WHERE", + "WITH", + "WITHOUT", +} + +# https://www.postgresql.org/docs/current/sql-keywords-appendix.html +POSTGRES = { + "A", + "ABORT", + "ABS", + "ABSENT", + "ABSOLUTE", + "ACCESS", + "ACCORDING", + "ACOS", + "ACTION", + "ADA", + "ADD", + "ADMIN", + "AFTER", + "AGGREGATE", + "ALL", + "ALLOCATE", + "ALSO", + "ALTER", + "ALWAYS", + "ANALYSE", + "ANALYZE", + "AND", + "ANY", + "ARE", + "ARRAY", + "ARRAY_AGG", + "ARRAY_MAX_CARDINALITY", + "AS", + "ASC", + "ASENSITIVE", + "ASIN", + "ASSERTION", + "ASSIGNMENT", + "ASYMMETRIC", + "AT", + "ATAN", + "ATOMIC", + "ATTACH", + "ATTRIBUTE", + "ATTRIBUTES", + "AUTHORIZATION", + "AVG", + "BACKWARD", + "BASE64", + "BEFORE", + "BEGIN", + "BEGIN_FRAME", + "BEGIN_PARTITION", + "BERNOULLI", + "BETWEEN", + "BIGINT", + "BINARY", + "BIT", + "BIT_LENGTH", + "BLOB", + "BLOCKED", + "BOM", + "BOOLEAN", + "BOTH", + "BREADTH", + "BY", + "C", + "CACHE", + "CALL", + "CALLED", + "CARDINALITY", + "CASCADE", + "CASCADED", + "CASE", + "CAST", + "CATALOG", + "CATALOG_NAME", + "CEIL", + "CEILING", + "CHAIN", + "CHAINING", + "CHAR", + "CHARACTER", + "CHARACTERISTICS", + "CHARACTERS", + "CHARACTER_LENGTH", + "CHARACTER_SET_CATALOG", + "CHARACTER_SET_NAME", + "CHARACTER_SET_SCHEMA", + "CHAR_LENGTH", + "CHECK", + "CHECKPOINT", + "CLASS", + "CLASSIFIER", + "CLASS_ORIGIN", + "CLOB", + "CLOSE", + "CLUSTER", + "COALESCE", + "COBOL", + "COLLATE", + "COLLATION", + "COLLATION_CATALOG", + "COLLATION_NAME", + "COLLATION_SCHEMA", + "COLLECT", + "COLUMN", + "COLUMNS", + "COLUMN_NAME", + "COMMAND_FUNCTION", + "COMMAND_FUNCTION_CODE", + "COMMENT", + "COMMENTS", + "COMMIT", + "COMMITTED", + "CONCURRENTLY", + "CONDITION", + "CONDITIONAL", + "CONDITION_NUMBER", + "CONFIGURATION", + "CONFLICT", + "CONNECT", + "CONNECTION", + "CONNECTION_NAME", + "CONSTRAINT", + "CONSTRAINTS", + "CONSTRAINT_CATALOG", + "CONSTRAINT_NAME", + "CONSTRAINT_SCHEMA", + "CONSTRUCTOR", + "CONTAINS", + "CONTENT", + "CONTINUE", + "CONTROL", + "CONVERSION", + "CONVERT", + "COPY", + "CORR", + "CORRESPONDING", + "COS", + "COSH", + "COST", + "COUNT", + "COVAR_POP", + "COVAR_SAMP", + "CREATE", + "CROSS", + "CSV", + "CUBE", + "CUME_DIST", + "CURRENT", + "CURRENT_CATALOG", + "CURRENT_DATE", + "CURRENT_DEFAULT_TRANSFORM_GROUP", + "CURRENT_PATH", + "CURRENT_ROLE", + "CURRENT_ROW", + "CURRENT_SCHEMA", + "CURRENT_TIME", + "CURRENT_TIMESTAMP", + "CURRENT_TRANSFORM_GROUP_FOR_TYPE", + "CURRENT_USER", + "CURSOR", + "CURSOR_NAME", + "CYCLE", + "DATA", + "DATABASE", + "DATALINK", + "DATE", + "DATETIME_INTERVAL_CODE", + "DATETIME_INTERVAL_PRECISION", + "DAY", + "DB", + "DEALLOCATE", + "DEC", + "DECFLOAT", + "DECIMAL", + "DECLARE", + "DEFAULT", + "DEFAULTS", + "DEFERRABLE", + "DEFERRED", + "DEFINE", + "DEFINED", + "DEFINER", + "DEGREE", + "DELETE", + "DELIMITER", + "DELIMITERS", + "DENSE_RANK", + "DEPENDS", + "DEPTH", + "DEREF", + "DERIVED", + "DESC", + "DESCRIBE", + "DESCRIPTOR", + "DETACH", + "DETERMINISTIC", + "DIAGNOSTICS", + "DICTIONARY", + "DISABLE", + "DISCARD", + "DISCONNECT", + "DISPATCH", + "DISTINCT", + "DLNEWCOPY", + "DLPREVIOUSCOPY", + "DLURLCOMPLETE", + "DLURLCOMPLETEONLY", + "DLURLCOMPLETEWRITE", + "DLURLPATH", + "DLURLPATHONLY", + "DLURLPATHWRITE", + "DLURLSCHEME", + "DLURLSERVER", + "DLVALUE", + "DO", + "DOCUMENT", + "DOMAIN", + "DOUBLE", + "DROP", + "DYNAMIC", + "DYNAMIC_FUNCTION", + "DYNAMIC_FUNCTION_CODE", + "EACH", + "ELEMENT", + "ELSE", + "EMPTY", + "ENABLE", + "ENCODING", + "ENCRYPTED", + "END", + "END-EXEC", + "END_FRAME", + "END_PARTITION", + "ENFORCED", + "ENUM", + "EQUALS", + "ERROR", + "ESCAPE", + "EVENT", + "EVERY", + "EXCEPT", + "EXCEPTION", + "EXCLUDE", + "EXCLUDING", + "EXCLUSIVE", + "EXEC", + "EXECUTE", + "EXISTS", + "EXP", + "EXPLAIN", + "EXPRESSION", + "EXTENSION", + "EXTERNAL", + "EXTRACT", + "FALSE", + "FAMILY", + "FETCH", + "FILE", + "FILTER", + "FINAL", + "FINISH", + "FIRST", + "FIRST_VALUE", + "FLAG", + "FLOAT", + "FLOOR", + "FOLLOWING", + "FOR", + "FORCE", + "FOREIGN", + "FORMAT", + "FORTRAN", + "FORWARD", + "FOUND", + "FRAME_ROW", + "FREE", + "FREEZE", + "FROM", + "FS", + "FULFILL", + "FULL", + "FUNCTION", + "FUNCTIONS", + "FUSION", + "G", + "GENERAL", + "GENERATED", + "GET", + "GLOBAL", + "GO", + "GOTO", + "GRANT", + "GRANTED", + "GREATEST", + "GROUP", + "GROUPING", + "HANDLER", + "HAVING", + "HEADER", + "HEX", + "HIERARCHY", + "HOLD", + "HOUR", + "ID", + "IDENTITY", + "IF", + "IGNORE", + "ILIKE", + "IMMEDIATE", + "IMMEDIATELY", + "IMMUTABLE", + "IMPLEMENTATION", + "IMPLICIT", + "IMPORT", + "IN", + "INCLUDE", + "INCLUDING", + "INCREMENT", + "INDENT", + "INDEX", + "INDEXES", + "INDICATOR", + "INHERIT", + "INHERITS", + "INITIAL", + "INITIALLY", + "INLINE", + "INNER", + "INOUT", + "INPUT", + "INSENSITIVE", + "INSERT", + "INSTANCE", + "INSTANTIABLE", + "INSTEAD", + "INT", + "INTEGER", + "INTEGRITY", + "INTERSECT", + "INTERSECTION", + "INTERVAL", + "INTO", + "INVOKER", + "IS", + "ISNULL", + "ISOLATION", + "JOIN", + "JSON", + "JSON_ARRAY", + "JSON_ARRAYAGG", + "JSON_EXISTS", + "JSON_OBJECT", + "JSON_OBJECTAGG", + "JSON_QUERY", + "JSON_TABLE", + "JSON_TABLE_PRIMITIVE", + "JSON_VALUE", + "K", + "KEEP", + "KEY", + "KEYS", + "KEY_MEMBER", + "KEY_TYPE", + "LABEL", + "LAG", + "LANGUAGE", + "LARGE", + "LAST", + "LAST_VALUE", + "LATERAL", + "LEAD", + "LEADING", + "LEAKPROOF", + "LEAST", + "LEFT", + "LENGTH", + "LEVEL", + "LIBRARY", + "LIKE", + "LIKE_REGEX", + "LIMIT", + "LINK", + "LISTAGG", + "LISTEN", + "LN", + "LOAD", + "LOCAL", + "LOCALTIME", + "LOCALTIMESTAMP", + "LOCATION", + "LOCATOR", + "LOCK", + "LOCKED", + "LOG", + "LOG10", + "LOGGED", + "LOWER", + "M", + "MAP", + "MAPPING", + "MATCH", + "MATCHED", + "MATCHES", + "MATCH_NUMBER", + "MATCH_RECOGNIZE", + "MATERIALIZED", + "MAX", + "MAXVALUE", + "MEASURES", + "MEMBER", + "MERGE", + "MESSAGE_LENGTH", + "MESSAGE_OCTET_LENGTH", + "MESSAGE_TEXT", + "METHOD", + "MIN", + "MINUTE", + "MINVALUE", + "MOD", + "MODE", + "MODIFIES", + "MODULE", + "MONTH", + "MORE", + "MOVE", + "MULTISET", + "MUMPS", + "NAME", + "NAMES", + "NAMESPACE", + "NATIONAL", + "NATURAL", + "NCHAR", + "NCLOB", + "NESTED", + "NESTING", + "NEW", + "NEXT", + "NFC", + "NFD", + "NFKC", + "NFKD", + "NIL", + "NO", + "NONE", + "NORMALIZE", + "NORMALIZED", + "NOT", + "NOTHING", + "NOTIFY", + "NOTNULL", + "NOWAIT", + "NTH_VALUE", + "NTILE", + "NULL", + "NULLABLE", + "NULLIF", + "NULLS", + "NUMBER", + "NUMERIC", + "OBJECT", + "OCCURRENCES_REGEX", + "OCTETS", + "OCTET_LENGTH", + "OF", + "OFF", + "OFFSET", + "OIDS", + "OLD", + "OMIT", + "ON", + "ONE", + "ONLY", + "OPEN", + "OPERATOR", + "OPTION", + "OPTIONS", + "OR", + "ORDER", + "ORDERING", + "ORDINALITY", + "OTHERS", + "OUT", + "OUTER", + "OUTPUT", + "OVER", + "OVERFLOW", + "OVERLAPS", + "OVERLAY", + "OVERRIDING", + "OWNED", + "OWNER", + "P", + "PAD", + "PARALLEL", + "PARAMETER", + "PARAMETER_MODE", + "PARAMETER_NAME", + "PARAMETER_ORDINAL_POSITION", + "PARAMETER_SPECIFIC_CATALOG", + "PARAMETER_SPECIFIC_NAME", + "PARAMETER_SPECIFIC_SCHEMA", + "PARSER", + "PARTIAL", + "PARTITION", + "PASCAL", + "PASS", + "PASSING", + "PASSTHROUGH", + "PASSWORD", + "PAST", + "PATH", + "PATTERN", + "PER", + "PERCENT", + "PERCENTILE_CONT", + "PERCENTILE_DISC", + "PERCENT_RANK", + "PERIOD", + "PERMISSION", + "PERMUTE", + "PLACING", + "PLAN", + "PLANS", + "PLI", + "POLICY", + "PORTION", + "POSITION", + "POSITION_REGEX", + "POWER", + "PRECEDES", + "PRECEDING", + "PRECISION", + "PREPARE", + "PREPARED", + "PRESERVE", + "PRIMARY", + "PRIOR", + "PRIVATE", + "PRIVILEGES", + "PROCEDURAL", + "PROCEDURE", + "PROCEDURES", + "PROGRAM", + "PRUNE", + "PTF", + "PUBLICATION", + "QUOTE", + "QUOTES", + "RANGE", + "RANK", + "READ", + "READS", + "REAL", + "REASSIGN", + "RECHECK", + "RECOVERY", + "RECURSIVE", + "REF", + "REFERENCES", + "REFERENCING", + "REFRESH", + "REGR_AVGX", + "REGR_AVGY", + "REGR_COUNT", + "REGR_INTERCEPT", + "REGR_R2", + "REGR_SLOPE", + "REGR_SXX", + "REGR_SXY", + "REGR_SYY", + "REINDEX", + "RELATIVE", + "RELEASE", + "RENAME", + "REPEATABLE", + "REPLACE", + "REPLICA", + "REQUIRING", + "RESET", + "RESPECT", + "RESTART", + "RESTORE", + "RESTRICT", + "RESULT", + "RETURN", + "RETURNED_CARDINALITY", + "RETURNED_LENGTH", + "RETURNED_OCTET_LENGTH", + "RETURNED_SQLSTATE", + "RETURNING", + "RETURNS", + "REVOKE", + "RIGHT", + "ROLE", + "ROLLBACK", + "ROLLUP", + "ROUTINE", + "ROUTINES", + "ROUTINE_CATALOG", + "ROUTINE_NAME", + "ROUTINE_SCHEMA", + "ROW", + "ROWS", + "ROW_COUNT", + "ROW_NUMBER", + "RULE", + "RUNNING", + "SAVEPOINT", + "SCALAR", + "SCALE", + "SCHEMA", + "SCHEMAS", + "SCHEMA_NAME", + "SCOPE", + "SCOPE_CATALOG", + "SCOPE_NAME", + "SCOPE_SCHEMA", + "SCROLL", + "SEARCH", + "SECOND", + "SECTION", + "SECURITY", + "SEEK", + "SELECT", + "SELECTIVE", + "SELF", + "SENSITIVE", + "SEQUENCE", + "SEQUENCES", + "SERIALIZABLE", + "SERVER", + "SERVER_NAME", + "SESSION", + "SESSION_USER", + "SET", + "SETOF", + "SETS", + "SHARE", + "SHOW", + "SIMILAR", + "SIMPLE", + "SIN", + "SINH", + "SIZE", + "SKIP", + "SMALLINT", + "SNAPSHOT", + "SOME", + "SOURCE", + "SPACE", + "SPECIFIC", + "SPECIFICTYPE", + "SPECIFIC_NAME", + "SQL", + "SQLCODE", + "SQLERROR", + "SQLEXCEPTION", + "SQLSTATE", + "SQLWARNING", + "SQRT", + "STABLE", + "STANDALONE", + "START", + "STATE", + "STATEMENT", + "STATIC", + "STATISTICS", + "STDDEV_POP", + "STDDEV_SAMP", + "STDIN", + "STDOUT", + "STORAGE", + "STORED", + "STRICT", + "STRING", + "STRIP", + "STRUCTURE", + "STYLE", + "SUBCLASS_ORIGIN", + "SUBMULTISET", + "SUBSCRIPTION", + "SUBSET", + "SUBSTRING", + "SUBSTRING_REGEX", + "SUCCEEDS", + "SUM", + "SUPPORT", + "SYMMETRIC", + "SYSID", + "SYSTEM", + "SYSTEM_TIME", + "SYSTEM_USER", + "T", + "TABLE", + "TABLES", + "TABLESAMPLE", + "TABLESPACE", + "TABLE_NAME", + "TAN", + "TANH", + "TEMP", + "TEMPLATE", + "TEMPORARY", + "TEXT", + "THEN", + "THROUGH", + "TIES", + "TIME", + "TIMESTAMP", + "TIMEZONE_HOUR", + "TIMEZONE_MINUTE", + "TO", + "TOKEN", + "TOP_LEVEL_COUNT", + "TRAILING", + "TRANSACTION", + "TRANSACTIONS_COMMITTED", + "TRANSACTIONS_ROLLED_BACK", + "TRANSACTION_ACTIVE", + "TRANSFORM", + "TRANSFORMS", + "TRANSLATE", + "TRANSLATE_REGEX", + "TRANSLATION", + "TREAT", + "TRIGGER", + "TRIGGER_CATALOG", + "TRIGGER_NAME", + "TRIGGER_SCHEMA", + "TRIM", + "TRIM_ARRAY", + "TRUE", + "TRUNCATE", + "TRUSTED", + "TYPE", + "TYPES", + "UESCAPE", + "UNBOUNDED", + "UNCOMMITTED", + "UNCONDITIONAL", + "UNDER", + "UNENCRYPTED", + "UNION", + "UNIQUE", + "UNKNOWN", + "UNLINK", + "UNLISTEN", + "UNLOGGED", + "UNMATCHED", + "UNNAMED", + "UNNEST", + "UNTIL", + "UNTYPED", + "UPDATE", + "UPPER", + "URI", + "USAGE", + "USER", + "USER_DEFINED_TYPE_CATALOG", + "USER_DEFINED_TYPE_CODE", + "USER_DEFINED_TYPE_NAME", + "USER_DEFINED_TYPE_SCHEMA", + "USING", + "UTF16", + "UTF32", + "UTF8", + "VACUUM", + "VALID", + "VALIDATE", + "VALIDATOR", + "VALUE", + "VALUES", + "VALUE_OF", + "VARBINARY", + "VARCHAR", + "VARIADIC", + "VARYING", + "VAR_POP", + "VAR_SAMP", + "VERBOSE", + "VERSION", + "VERSIONING", + "VIEW", + "VIEWS", + "VOLATILE", + "WHEN", + "WHENEVER", + "WHERE", + "WHITESPACE", + "WIDTH_BUCKET", + "WINDOW", + "WITH", + "WITHIN", + "WITHOUT", + "WORK", + "WRAPPER", + "WRITE", + "XML", + "XMLAGG", + "XMLATTRIBUTES", + "XMLBINARY", + "XMLCAST", + "XMLCOMMENT", + "XMLCONCAT", + "XMLDECLARATION", + "XMLDOCUMENT", + "XMLELEMENT", + "XMLEXISTS", + "XMLFOREST", + "XMLITERATE", + "XMLNAMESPACES", + "XMLPARSE", + "XMLPI", + "XMLQUERY", + "XMLROOT", + "XMLSCHEMA", + "XMLSERIALIZE", + "XMLTABLE", + "XMLTEXT", + "XMLVALIDATE", + "YEAR", + "YES", + "ZONE", +} + +# https://docs.snowflake.com/en/sql-reference/reserved-keywords.html +SNOWFLAKE = { + "ALL", + "ALTER", + "AND", + "ANY", + "AS", + "BETWEEN", + "BY", + "CASE", + "CAST", + "CHECK", + "COLUMN", + "CONNECT", + "CONNECTION", + "CONSTRAINT", + "CREATE", + "CROSS", + "CURRENT", + "CURRENT_DATE", + "CURRENT_TIME", + "CURRENT_TIMESTAMP", + "CURRENT_USER", + "DATABASE", + "DEFAULT", + "DELETE", + "DISTINCT", + "DROP", + "ELSE", + "EXISTS", + "FALSE", + "FOLLOWING", + "FOR", + "FROM", + "FULL", + "GRANT", + "GROUP", + "GSCLUSTER", + "HAVING", + "ILIKE", + "IN", + "INCREMENT", + "INNER", + "INSERT", + "INTERSECT", + "INTO", + "IS", + "ISSUE", + "JOIN", + "LATERAL", + "LEFT", + "LIKE", + "LOCALTIME", + "LOCALTIMESTAMP", + "MINUS", + "NATURAL", + "NOT", + "NULL", + "OF", + "ON", + "OR", + "ORDER", + "ORGANIZATION", + "QUALIFY", + "REGEXP", + "REVOKE", + "RIGHT", + "RLIKE", + "ROW", + "ROWS", + "SAMPLE", + "SCHEMA", + "SELECT", + "SET", + "SOME", + "START", + "TABLE", + "TABLESAMPLE", + "THEN", + "TO", + "TRIGGER", + "TRUE", + "TRY_CAST", + "UNION", + "UNIQUE", + "UPDATE", + "USING", + "VALUES", + "VIEW", + "WHEN", + "WHENEVER", + "WHERE", + "WITH", +} + +# https://dev.mysql.com/doc/refman/8.0/en/keywords.html +MYSQL = { + "ACCESSIBLE", + "ACCOUNT", + "ACTION", + "ACTIVE", + "ADD", + "ADMIN", + "AFTER", + "AGAINST", + "AGGREGATE", + "ALGORITHM", + "ALL", + "ALTER", + "ALWAYS", + "ANALYSE", + "ANALYZE", + "AND", + "ANY", + "ARRAY", + "AS", + "ASC", + "ASCII", + "ASENSITIVE", + "AT", + "ATTRIBUTE", + "AUTOEXTEND_SIZE", + "AUTO_INCREMENT", + "AVG", + "AVG_ROW_LENGTH", + "BACKUP", + "BEFORE", + "BEGIN", + "BETWEEN", + "BIGINT", + "BINARY", + "BINLOG", + "BIT", + "BLOB", + "BLOCK", + "BOOL", + "BOOLEAN", + "BOTH", + "BTREE", + "BUCKETS", + "BY", + "BYTE", + "CACHE", + "CALL", + "CASCADE", + "CASCADED", + "CASE", + "CATALOG_NAME", + "CHAIN", + "CHANGE", + "CHANGED", + "CHANNEL", + "CHAR", + "CHARACTER", + "CHARSET", + "CHECK", + "CHECKSUM", + "CIPHER", + "CLASS_ORIGIN", + "CLIENT", + "CLONE", + "CLOSE", + "COALESCE", + "CODE", + "COLLATE", + "COLLATION", + "COLUMN", + "COLUMNS", + "COLUMN_FORMAT", + "COLUMN_NAME", + "COMMENT", + "COMMIT", + "COMMITTED", + "COMPACT", + "COMPLETION", + "COMPONENT", + "COMPRESSED", + "COMPRESSION", + "CONCURRENT", + "CONDITION", + "CONNECTION", + "CONSISTENT", + "CONSTRAINT", + "CONSTRAINT_CATALOG", + "CONSTRAINT_NAME", + "CONSTRAINT_SCHEMA", + "CONTAINS", + "CONTEXT", + "CONTINUE", + "CONVERT", + "CPU", + "CREATE", + "CROSS", + "CUBE", + "CUME_DIST", + "CURRENT", + "CURRENT_DATE", + "CURRENT_TIME", + "CURRENT_TIMESTAMP", + "CURRENT_USER", + "CURSOR", + "CURSOR_NAME", + "DATA", + "DATABASE", + "DATABASES", + "DATAFILE", + "DATE", + "DATETIME", + "DAY", + "DAY_HOUR", + "DAY_MICROSECOND", + "DAY_MINUTE", + "DAY_SECOND", + "DEALLOCATE", + "DEC", + "DECIMAL", + "DECLARE", + "DEFAULT", + "DEFAULT_AUTH", + "DEFINER", + "DEFINITION", + "DELAYED", + "DELAY_KEY_WRITE", + "DELETE", + "DENSE_RANK", + "DESC", + "DESCRIBE", + "DESCRIPTION", + "DES_KEY_FILE", + "DETERMINISTIC", + "DIAGNOSTICS", + "DIRECTORY", + "DISABLE", + "DISCARD", + "DISK", + "DISTINCT", + "DISTINCTROW", + "DIV", + "DO", + "DOUBLE", + "DROP", + "DUAL", + "DUMPFILE", + "DUPLICATE", + "DYNAMIC", + "EACH", + "ELSE", + "ELSEIF", + "EMPTY", + "ENABLE", + "ENCLOSED", + "ENCRYPTION", + "END", + "ENDS", + "ENFORCED", + "ENGINE", + "ENGINES", + "ENGINE_ATTRIBUTE", + "ENUM", + "ERROR", + "ERRORS", + "ESCAPE", + "ESCAPED", + "EVENT", + "EVENTS", + "EVERY", + "EXCEPT", + "EXCHANGE", + "EXCLUDE", + "EXECUTE", + "EXISTS", + "EXIT", + "EXPANSION", + "EXPIRE", + "EXPLAIN", + "EXPORT", + "EXTENDED", + "EXTENT_SIZE", + "FAILED_LOGIN_ATTEMPTS", + "FALSE", + "FAST", + "FAULTS", + "FETCH", + "FIELDS", + "FILE", + "FILE_BLOCK_SIZE", + "FILTER", + "FIRST", + "FIRST_VALUE", + "FIXED", + "FLOAT", + "FLOAT4", + "FLOAT8", + "FLUSH", + "FOLLOWING", + "FOLLOWS", + "FOR", + "FORCE", + "FOREIGN", + "FORMAT", + "FOUND", + "FROM", + "FULL", + "FULLTEXT", + "FUNCTION", + "GENERAL", + "GENERATED", + "GEOMCOLLECTION", + "GEOMETRY", + "GEOMETRYCOLLECTION", + "GET", + "GET_FORMAT", + "GET_MASTER_PUBLIC_KEY", + "GET_SOURCE_PUBLIC_KEY", + "GLOBAL", + "GRANT", + "GRANTS", + "GROUP", + "GROUPING", + "GROUPS", + "GROUP_REPLICATION", + "HANDLER", + "HASH", + "HAVING", + "HELP", + "HIGH_PRIORITY", + "HISTOGRAM", + "HISTORY", + "HOST", + "HOSTS", + "HOUR", + "HOUR_MICROSECOND", + "HOUR_MINUTE", + "HOUR_SECOND", + "IDENTIFIED", + "IF", + "IGNORE", + "IGNORE_SERVER_IDS", + "IMPORT", + "IN", + "INACTIVE", + "INDEX", + "INDEXES", + "INFILE", + "INITIAL_SIZE", + "INNER", + "INOUT", + "INSENSITIVE", + "INSERT", + "INSERT_METHOD", + "INSTALL", + "INSTANCE", + "INT", + "INT1", + "INT2", + "INT3", + "INT4", + "INT8", + "INTEGER", + "INTERVAL", + "INTO", + "INVISIBLE", + "INVOKER", + "IO", + "IO_AFTER_GTIDS", + "IO_BEFORE_GTIDS", + "IO_THREAD", + "IPC", + "IS", + "ISOLATION", + "ISSUER", + "ITERATE", + "JOIN", + "JSON", + "JSON_TABLE", + "JSON_VALUE", + "KEY", + "KEYRING", + "KEYS", + "KEY_BLOCK_SIZE", + "KILL", + "LAG", + "LANGUAGE", + "LAST", + "LAST_VALUE", + "LATERAL", + "LEAD", + "LEADING", + "LEAVE", + "LEAVES", + "LEFT", + "LESS", + "LEVEL", + "LIKE", + "LIMIT", + "LINEAR", + "LINES", + "LINESTRING", + "LIST", + "LOAD", + "LOCAL", + "LOCALTIME", + "LOCALTIMESTAMP", + "LOCK", + "LOCKED", + "LOCKS", + "LOGFILE", + "LOGS", + "LONG", + "LONGBLOB", + "LONGTEXT", + "LOOP", + "LOW_PRIORITY", + "MASTER", + "MASTER_AUTO_POSITION", + "MASTER_BIND", + "MASTER_COMPRESSION_ALGORITHMS", + "MASTER_CONNECT_RETRY", + "MASTER_DELAY", + "MASTER_HEARTBEAT_PERIOD", + "MASTER_HOST", + "MASTER_LOG_FILE", + "MASTER_LOG_POS", + "MASTER_PASSWORD", + "MASTER_PORT", + "MASTER_PUBLIC_KEY_PATH", + "MASTER_RETRY_COUNT", + "MASTER_SERVER_ID", + "MASTER_SSL", + "MASTER_SSL_CA", + "MASTER_SSL_CAPATH", + "MASTER_SSL_CERT", + "MASTER_SSL_CIPHER", + "MASTER_SSL_CRL", + "MASTER_SSL_CRLPATH", + "MASTER_SSL_KEY", + "MASTER_SSL_VERIFY_SERVER_CERT", + "MASTER_TLS_CIPHERSUITES", + "MASTER_TLS_VERSION", + "MASTER_USER", + "MASTER_ZSTD_COMPRESSION_LEVEL", + "MATCH", + "MAXVALUE", + "MAX_CONNECTIONS_PER_HOUR", + "MAX_QUERIES_PER_HOUR", + "MAX_ROWS", + "MAX_SIZE", + "MAX_UPDATES_PER_HOUR", + "MAX_USER_CONNECTIONS", + "MEDIUM", + "MEDIUMBLOB", + "MEDIUMINT", + "MEDIUMTEXT", + "MEMBER", + "MEMORY", + "MERGE", + "MESSAGE_TEXT", + "MICROSECOND", + "MIDDLEINT", + "MIGRATE", + "MINUTE", + "MINUTE_MICROSECOND", + "MINUTE_SECOND", + "MIN_ROWS", + "MOD", + "MODE", + "MODIFIES", + "MODIFY", + "MONTH", + "MULTILINESTRING", + "MULTIPOINT", + "MULTIPOLYGON", + "MUTEX", + "MYSQL_ERRNO", + "NAME", + "NAMES", + "NATIONAL", + "NATURAL", + "NCHAR", + "NDB", + "NDBCLUSTER", + "NESTED", + "NETWORK_NAMESPACE", + "NEVER", + "NEW", + "NEXT", + "NO", + "NODEGROUP", + "NONE", + "NOT", + "NOWAIT", + "NO_WAIT", + "NO_WRITE_TO_BINLOG", + "NTH_VALUE", + "NTILE", + "NULL", + "NULLS", + "NUMBER", + "NUMERIC", + "NVARCHAR", + "OF", + "OFF", + "OFFSET", + "OJ", + "OLD", + "ON", + "ONE", + "ONLY", + "OPEN", + "OPTIMIZE", + "OPTIMIZER_COSTS", + "OPTION", + "OPTIONAL", + "OPTIONALLY", + "OPTIONS", + "OR", + "ORDER", + "ORDINALITY", + "ORGANIZATION", + "OTHERS", + "OUT", + "OUTER", + "OUTFILE", + "OVER", + "OWNER", + "PACK_KEYS", + "PAGE", + "PARSER", + "PARSE_GCOL_EXPR", + "PARTIAL", + "PARTITION", + "PARTITIONING", + "PARTITIONS", + "PASSWORD", + "PASSWORD_LOCK_TIME", + "PATH", + "PERCENT_RANK", + "PERSIST", + "PERSIST_ONLY", + "PHASE", + "PLUGIN", + "PLUGINS", + "PLUGIN_DIR", + "POINT", + "POLYGON", + "PORT", + "PRECEDES", + "PRECEDING", + "PRECISION", + "PREPARE", + "PRESERVE", + "PREV", + "PRIMARY", + "PRIVILEGES", + "PRIVILEGE_CHECKS_USER", + "PROCEDURE", + "PROCESS", + "PROCESSLIST", + "PROFILE", + "PROFILES", + "PROXY", + "PURGE", + "QUARTER", + "QUERY", + "QUICK", + "RANDOM", + "RANGE", + "RANK", + "READ", + "READS", + "READ_ONLY", + "READ_WRITE", + "REAL", + "REBUILD", + "RECOVER", + "RECURSIVE", + "REDOFILE", + "REDO_BUFFER_SIZE", + "REDUNDANT", + "REFERENCE", + "REFERENCES", + "REGEXP", + "RELAY", + "RELAYLOG", + "RELAY_LOG_FILE", + "RELAY_LOG_POS", + "RELAY_THREAD", + "RELEASE", + "RELOAD", + "REMOTE", + "REMOVE", + "RENAME", + "REORGANIZE", + "REPAIR", + "REPEAT", + "REPEATABLE", + "REPLACE", + "REPLICA", + "REPLICAS", + "REPLICATE_DO_DB", + "REPLICATE_DO_TABLE", + "REPLICATE_IGNORE_DB", + "REPLICATE_IGNORE_TABLE", + "REPLICATE_REWRITE_DB", + "REPLICATE_WILD_DO_TABLE", + "REPLICATE_WILD_IGNORE_TABLE", + "REPLICATION", + "REQUIRE", + "REQUIRE_ROW_FORMAT", + "RESET", + "RESIGNAL", + "RESOURCE", + "RESPECT", + "RESTART", + "RESTORE", + "RESTRICT", + "RESUME", + "RETAIN", + "RETURN", + "RETURNED_SQLSTATE", + "RETURNING", + "RETURNS", + "REUSE", + "REVERSE", + "REVOKE", + "RIGHT", + "RLIKE", + "ROLE", + "ROLLBACK", + "ROLLUP", + "ROTATE", + "ROUTINE", + "ROW", + "ROWS", + "ROW_COUNT", + "ROW_FORMAT", + "ROW_NUMBER", + "RTREE", + "SAVEPOINT", + "SCHEDULE", + "SCHEMA", + "SCHEMAS", + "SCHEMA_NAME", + "SECOND", + "SECONDARY", + "SECONDARY_ENGINE", + "SECONDARY_ENGINE_ATTRIBUTE", + "SECONDARY_LOAD", + "SECONDARY_UNLOAD", + "SECOND_MICROSECOND", + "SECURITY", + "SELECT", + "SENSITIVE", + "SEPARATOR", + "SERIAL", + "SERIALIZABLE", + "SERVER", + "SESSION", + "SET", + "SHARE", + "SHOW", + "SHUTDOWN", + "SIGNAL", + "SIGNED", + "SIMPLE", + "SKIP", + "SLAVE", + "SLOW", + "SMALLINT", + "SNAPSHOT", + "SOCKET", + "SOME", + "SONAME", + "SOUNDS", + "SOURCE", + "SOURCE_AUTO_POSITION", + "SOURCE_BIND", + "SOURCE_COMPRESSION_ALGORITHMS", + "SOURCE_CONNECT_RETRY", + "SOURCE_DELAY", + "SOURCE_HEARTBEAT_PERIOD", + "SOURCE_HOST", + "SOURCE_LOG_FILE", + "SOURCE_LOG_POS", + "SOURCE_PASSWORD", + "SOURCE_PORT", + "SOURCE_PUBLIC_KEY_PATH", + "SOURCE_RETRY_COUNT", + "SOURCE_SSL", + "SOURCE_SSL_CA", + "SOURCE_SSL_CAPATH", + "SOURCE_SSL_CERT", + "SOURCE_SSL_CIPHER", + "SOURCE_SSL_CRL", + "SOURCE_SSL_CRLPATH", + "SOURCE_SSL_KEY", + "SOURCE_SSL_VERIFY_SERVER_CERT", + "SOURCE_TLS_CIPHERSUITES", + "SOURCE_TLS_VERSION", + "SOURCE_USER", + "SOURCE_ZSTD_COMPRESSION_LEVEL", + "SPATIAL", + "SPECIFIC", + "SQL", + "SQLEXCEPTION", + "SQLSTATE", + "SQLWARNING", + "SQL_AFTER_GTIDS", + "SQL_AFTER_MTS_GAPS", + "SQL_BEFORE_GTIDS", + "SQL_BIG_RESULT", + "SQL_BUFFER_RESULT", + "SQL_CACHE", + "SQL_CALC_FOUND_ROWS", + "SQL_NO_CACHE", + "SQL_SMALL_RESULT", + "SQL_THREAD", + "SQL_TSI_DAY", + "SQL_TSI_HOUR", + "SQL_TSI_MINUTE", + "SQL_TSI_MONTH", + "SQL_TSI_QUARTER", + "SQL_TSI_SECOND", + "SQL_TSI_WEEK", + "SQL_TSI_YEAR", + "SRID", + "SSL", + "STACKED", + "START", + "STARTING", + "STARTS", + "STATS_AUTO_RECALC", + "STATS_PERSISTENT", + "STATS_SAMPLE_PAGES", + "STATUS", + "STOP", + "STORAGE", + "STORED", + "STRAIGHT_JOIN", + "STREAM", + "STRING", + "SUBCLASS_ORIGIN", + "SUBJECT", + "SUBPARTITION", + "SUBPARTITIONS", + "SUPER", + "SUSPEND", + "SWAPS", + "SWITCHES", + "SYSTEM", + "TABLE", + "TABLES", + "TABLESPACE", + "TABLE_CHECKSUM", + "TABLE_NAME", + "TEMPORARY", + "TEMPTABLE", + "TERMINATED", + "TEXT", + "THAN", + "THEN", + "THREAD_PRIORITY", + "TIES", + "TIME", + "TIMESTAMP", + "TIMESTAMPADD", + "TIMESTAMPDIFF", + "TINYBLOB", + "TINYINT", + "TINYTEXT", + "TLS", + "TO", + "TRAILING", + "TRANSACTION", + "TRIGGER", + "TRIGGERS", + "TRUE", + "TRUNCATE", + "TYPE", + "TYPES", + "UNBOUNDED", + "UNCOMMITTED", + "UNDEFINED", + "UNDO", + "UNDOFILE", + "UNDO_BUFFER_SIZE", + "UNICODE", + "UNINSTALL", + "UNION", + "UNIQUE", + "UNKNOWN", + "UNLOCK", + "UNSIGNED", + "UNTIL", + "UPDATE", + "UPGRADE", + "USAGE", + "USE", + "USER", + "USER_RESOURCES", + "USE_FRM", + "USING", + "UTC_DATE", + "UTC_TIME", + "UTC_TIMESTAMP", + "VALIDATION", + "VALUE", + "VALUES", + "VARBINARY", + "VARCHAR", + "VARCHARACTER", + "VARIABLES", + "VARYING", + "VCPU", + "VIEW", + "VIRTUAL", + "VISIBLE", + "WAIT", + "WARNINGS", + "WEEK", + "WEIGHT_STRING", + "WHEN", + "WHERE", + "WHILE", + "WINDOW", + "WITH", + "WITHOUT", + "WORK", + "WRAPPER", + "WRITE", + "X509", + "XA", + "XID", + "XML", + "XOR", + "YEAR", + "YEAR_MONTH", + "ZEROFILL", + "ZONE", +} + +# https://docs.oracle.com/cd/B19306_01/server.102/b14200/ap_keywd.htm +ORACLE = { + "ACCESS", + "ADD", + "ALL", + "ALTER", + "AND", + "ANY", + "AS", + "ASC", + "AUDIT", + "BETWEEN", + "BY", + "CHAR", + "CHECK", + "CLUSTER", + "COLUMN", + "COMMENT", + "COMPRESS", + "CONNECT", + "CREATE", + "CURRENT", + "DATE", + "DECIMAL", + "DEFAULT", + "DELETE", + "DESC", + "DISTINCT", + "DROP", + "ELSE", + "EXCLUSIVE", + "EXISTS", + "FILE", + "FLOAT", + "FOR", + "FROM", + "GRANT", + "GROUP", + "HAVING", + "IDENTIFIED", + "IMMEDIATE", + "IN", + "INCREMENT", + "INDEX", + "INITIAL", + "INSERT", + "INTEGER", + "INTERSECT", + "INTO", + "IS", + "LEVEL", + "LIKE", + "LOCK", + "LONG", + "MAXEXTENTS", + "MINUS", + "MLSLABEL", + "MODE", + "MODIFY", + "NOAUDIT", + "NOCOMPRESS", + "NOT", + "NOWAIT", + "NULL", + "NUMBER", + "OF", + "OFFLINE", + "ON", + "ONLINE", + "OPTION", + "OR", + "ORDER", + "PCTFREE", + "PRIOR", + "PRIVILEGES", + "PUBLIC", + "RAW", + "RENAME", + "RESOURCE", + "REVOKE", + "ROW", + "ROWID", + "ROWNUM", + "ROWS", + "SELECT", + "SESSION", + "SET", + "SHARE", + "SIZE", + "SMALLINT", + "START", + "SUCCESSFUL", + "SYNONYM", + "SYSDATE", + "TABLE", + "THEN", + "TO", + "TRIGGER", + "UID", + "UNION", + "UNIQUE", + "UPDATE", + "USER", + "VALIDATE", + "VALUES", + "VARCHAR", + "VARCHAR2", + "VIEW", + "WHENEVER", + "WHERE", + "WITH", +} + + +# https://docs.microsoft.com/en-us/sql/t-sql/language-elements/reserved-keywords-transact-sql?view=sql-server-ver15 +MSSQL = { + "ADD", + "EXTERNAL", + "PROCEDURE", + "ALL", + "FETCH", + "PUBLIC", + "ALTER", + "FILE", + "RAISERROR", + "AND", + "FILLFACTOR", + "READ", + "ANY", + "FOR", + "READTEXT", + "AS", + "FOREIGN", + "RECONFIGURE", + "ASC", + "FREETEXT", + "REFERENCES", + "AUTHORIZATION", + "FREETEXTTABLE", + "REPLICATION", + "BACKUP", + "FROM", + "RESTORE", + "BEGIN", + "FULL", + "RESTRICT", + "BETWEEN", + "FUNCTION", + "RETURN", + "BREAK", + "GOTO", + "REVERT", + "BROWSE", + "GRANT", + "REVOKE", + "BULK", + "GROUP", + "RIGHT", + "BY", + "HAVING", + "ROLLBACK", + "CASCADE", + "HOLDLOCK", + "ROWCOUNT", + "CASE", + "IDENTITY", + "ROWGUIDCOL", + "CHECK", + "IDENTITY_INSERT", + "RULE", + "CHECKPOINT", + "IDENTITYCOL", + "SAVE", + "CLOSE", + "IF", + "SCHEMA", + "CLUSTERED", + "IN", + "SECURITYAUDIT", + "COALESCE", + "INDEX", + "SELECT", + "COLLATE", + "INNER", + "SEMANTICKEYPHRASETABLE", + "COLUMN", + "INSERT", + "SEMANTICSIMILARITYDETAILSTABLE", + "COMMIT", + "INTERSECT", + "SEMANTICSIMILARITYTABLE", + "COMPUTE", + "INTO", + "SESSION_USER", + "CONSTRAINT", + "IS", + "SET", + "CONTAINS", + "JOIN", + "SETUSER", + "CONTAINSTABLE", + "KEY", + "SHUTDOWN", + "CONTINUE", + "KILL", + "SOME", + "CONVERT", + "LEFT", + "STATISTICS", + "CREATE", + "LIKE", + "SYSTEM_USER", + "CROSS", + "LINENO", + "TABLE", + "CURRENT", + "LOAD", + "TABLESAMPLE", + "CURRENT_DATE", + "MERGE", + "TEXTSIZE", + "CURRENT_TIME", + "NATIONAL", + "THEN", + "CURRENT_TIMESTAMP", + "NOCHECK", + "TO", + "CURRENT_USER", + "NONCLUSTERED", + "TOP", + "CURSOR", + "NOT", + "TRAN", + "DATABASE", + "NULL", + "TRANSACTION", + "DBCC", + "NULLIF", + "TRIGGER", + "DEALLOCATE", + "OF", + "TRUNCATE", + "DECLARE", + "OFF", + "TRY_CONVERT", + "DEFAULT", + "OFFSETS", + "TSEQUAL", + "DELETE", + "ON", + "UNION", + "DENY", + "OPEN", + "UNIQUE", + "DESC", + "OPENDATASOURCE", + "UNPIVOT", + "DISK", + "OPENQUERY", + "UPDATE", + "DISTINCT", + "OPENROWSET", + "UPDATETEXT", + "DISTRIBUTED", + "OPENXML", + "USE", + "DOUBLE", + "OPTION", + "USER", + "DROP", + "OR", + "VALUES", + "DUMP", + "ORDER", + "VARYING", + "ELSE", + "OUTER", + "VIEW", + "END", + "OVER", + "WAITFOR", + "ERRLVL", + "PERCENT", + "WHEN", + "ESCAPE", + "PIVOT", + "WHERE", + "EXCEPT", + "PLAN", + "WHILE", + "EXEC", + "PRECISION", + "WITH", + "EXECUTE", + "PRIMARY", + "WITHIN GROUP", + "EXISTS", + "PRINT", + "WRITETEXT", + "EXIT", + "PROC", + "ABSOLUTE", + "OVERLAPS", + "ACTION", + "PAD", + "ADA", + "PARTIAL", + "PASCAL", + "EXTRACT", + "POSITION", + "ALLOCATE", + "FALSE", + "PREPARE", + "FIRST", + "PRESERVE", + "FLOAT", + "ARE", + "PRIOR", + "PRIVILEGES", + "FORTRAN", + "ASSERTION", + "FOUND", + "AT", + "REAL", + "AVG", + "GET", + "GLOBAL", + "RELATIVE", + "GO", + "BIT", + "BIT_LENGTH", + "BOTH", + "ROWS", + "HOUR", + "CASCADED", + "SCROLL", + "IMMEDIATE", + "SECOND", + "CAST", + "SECTION", + "CATALOG", + "INCLUDE", + "CHAR", + "SESSION", + "CHAR_LENGTH", + "INDICATOR", + "CHARACTER", + "INITIALLY", + "CHARACTER_LENGTH", + "SIZE", + "INPUT", + "SMALLINT", + "INSENSITIVE", + "SPACE", + "INT", + "SQL", + "COLLATION", + "INTEGER", + "SQLCA", + "SQLCODE", + "INTERVAL", + "SQLERROR", + "CONNECT", + "SQLSTATE", + "CONNECTION", + "SQLWARNING", + "ISOLATION", + "SUBSTRING", + "CONSTRAINTS", + "SUM", + "LANGUAGE", + "CORRESPONDING", + "LAST", + "TEMPORARY", + "COUNT", + "LEADING", + "TIME", + "LEVEL", + "TIMESTAMP", + "TIMEZONE_HOUR", + "LOCAL", + "TIMEZONE_MINUTE", + "LOWER", + "MATCH", + "TRAILING", + "MAX", + "MIN", + "TRANSLATE", + "DATE", + "MINUTE", + "TRANSLATION", + "DAY", + "MODULE", + "TRIM", + "MONTH", + "TRUE", + "DEC", + "NAMES", + "DECIMAL", + "NATURAL", + "UNKNOWN", + "NCHAR", + "DEFERRABLE", + "NEXT", + "UPPER", + "DEFERRED", + "NO", + "USAGE", + "NONE", + "USING", + "DESCRIBE", + "VALUE", + "DESCRIPTOR", + "DIAGNOSTICS", + "NUMERIC", + "VARCHAR", + "DISCONNECT", + "OCTET_LENGTH", + "DOMAIN", + "ONLY", + "WHENEVER", + "WORK", + "END-EXEC", + "WRITE", + "YEAR", + "OUTPUT", + "ZONE", + "EXCEPTION", + "HOST", + "RELEASE", + "ADMIN", + "IGNORE", + "RESULT", + "AFTER", + "RETURNS", + "AGGREGATE", + "ROLE", + "ALIAS", + "INITIALIZE", + "ROLLUP", + "ROUTINE", + "INOUT", + "ROW", + "ARRAY", + "ASENSITIVE", + "SAVEPOINT", + "ASYMMETRIC", + "INTERSECTION", + "SCOPE", + "SEARCH", + "ATOMIC", + "BEFORE", + "ITERATE", + "BINARY", + "SENSITIVE", + "LARGE", + "SEQUENCE", + "BLOB", + "BOOLEAN", + "LATERAL", + "SETS", + "SIMILAR", + "BREADTH", + "LESS", + "CALL", + "CALLED", + "LIKE_REGEX", + "CARDINALITY", + "LIMIT", + "SPECIFIC", + "LN", + "SPECIFICTYPE", + "LOCALTIME", + "SQLEXCEPTION", + "LOCALTIMESTAMP", + "LOCATOR", + "CLASS", + "MAP", + "START", + "CLOB", + "STATE", + "MEMBER", + "STATEMENT", + "COLLECT", + "METHOD", + "STATIC", + "COMPLETION", + "STDDEV_POP", + "CONDITION", + "MOD", + "STDDEV_SAMP", + "MODIFIES", + "STRUCTURE", + "MODIFY", + "SUBMULTISET", + "SUBSTRING_REGEX", + "CONSTRUCTOR", + "SYMMETRIC", + "CORR", + "MULTISET", + "SYSTEM", + "COVAR_POP", + "TERMINATE", + "COVAR_SAMP", + "THAN", + "CUBE", + "NCLOB", + "CUME_DIST", + "NEW", + "CURRENT_CATALOG", + "CURRENT_DEFAULT_TRANSFORM_GROUP", + "CURRENT_PATH", + "CURRENT_ROLE", + "NORMALIZE", + "TRANSLATE_REGEX", + "CURRENT_SCHEMA", + "CURRENT_TRANSFORM_GROUP_FOR_TYPE", + "OBJECT", + "TREAT", + "CYCLE", + "OCCURRENCES_REGEX", + "DATA", + "OLD", + "UESCAPE", + "UNDER", + "OPERATION", + "ORDINALITY", + "UNNEST", + "OUT", + "OVERLAY", + "DEPTH", + "VAR_POP", + "DEREF", + "PARAMETER", + "VAR_SAMP", + "PARAMETERS", + "VARIABLE", + "DESTROY", + "PARTITION", + "DESTRUCTOR", + "PATH", + "WIDTH_BUCKET", + "DETERMINISTIC", + "POSTFIX", + "WITHOUT", + "DICTIONARY", + "PREFIX", + "WINDOW", + "PREORDER", + "WITHIN", + "PERCENT_RANK", + "DYNAMIC", + "PERCENTILE_CONT", + "XMLAGG", + "EACH", + "PERCENTILE_DISC", + "XMLATTRIBUTES", + "ELEMENT", + "POSITION_REGEX", + "XMLBINARY", + "XMLCAST", + "EQUALS", + "XMLCOMMENT", + "EVERY", + "XMLCONCAT", + "RANGE", + "XMLDOCUMENT", + "READS", + "XMLELEMENT", + "FILTER", + "XMLEXISTS", + "RECURSIVE", + "XMLFOREST", + "REF", + "XMLITERATE", + "REFERENCING", + "XMLNAMESPACES", + "FREE", + "REGR_AVGX", + "XMLPARSE", + "FULLTEXTTABLE", + "REGR_AVGY", + "XMLPI", + "FUSION", + "REGR_COUNT", + "XMLQUERY", + "GENERAL", + "REGR_INTERCEPT", + "XMLSERIALIZE", + "REGR_R2", + "XMLTABLE", + "REGR_SLOPE", + "XMLTEXT", + "REGR_SXX", + "XMLVALIDATE", + "GROUPING", + "REGR_SXY", + "HOLD", + "REGR_SYY", +} + +# In ClickHouse, keywords are not reserved. +# Ref: https://clickhouse.com/docs/en/sql-reference/syntax/#syntax-keywords +CLICKHOUSE: Set[str] = set() + +# https://docs.pingcap.com/tidb/stable/keywords#keywords +TIDB = { + "ACCOUNT", + "ACTION", + "ADD", + "ADMIN", + "ADVISE", + "AFTER", + "AGAINST", + "AGO", + "ALGORITHM", + "ALL", + "ALTER", + "ALWAYS", + "ANALYZE", + "AND", + "ANY", + "AS", + "ASC", + "ASCII", + "AUTO_ID_CACHE", + "AUTO_INCREMENT", + "AUTO_RANDOM", + "AUTO_RANDOM_BASE", + "AVG", + "AVG_ROW_LENGTH", + "BACKEND", + "BACKUP", + "BACKUPS", + "BEGIN", + "BETWEEN", + "BIGINT", + "BINARY", + "BINDING", + "BINDINGS", + "BINLOG", + "BIT", + "BLOB", + "BLOCK", + "BOOL", + "BOOLEAN", + "BOTH", + "BTREE", + "BUCKETS", + "BUILTINS", + "BY", + "BYTE", + "CACHE", + "CANCEL", + "CAPTURE", + "CASCADE", + "CASCADED", + "CASE", + "CHAIN", + "CHANGE", + "CHAR", + "CHARACTER", + "CHARSET", + "CHECK", + "CHECKPOINT", + "CHECKSUM", + "CIPHER", + "CLEANUP", + "CLIENT", + "CMSKETCH", + "COALESCE", + "COLLATE", + "COLLATION", + "COLUMN", + "COLUMNS", + "COLUMN_FORMAT", + "COMMENT", + "COMMIT", + "COMMITTED", + "COMPACT", + "COMPRESSED", + "COMPRESSION", + "CONCURRENCY", + "CONFIG", + "CONNECTION", + "CONSISTENT", + "CONSTRAINT", + "CONTEXT", + "CONVERT", + "CPU", + "CREATE", + "CROSS", + "CSV_BACKSLASH_ESCAPE", + "CSV_DELIMITER", + "CSV_HEADER", + "CSV_NOT_NULL", + "CSV_NULL", + "CSV_SEPARATOR", + "CSV_TRIM_LAST_SEPARATORS", + "CUME_DIST", + "CURRENT", + "CURRENT_DATE", + "CURRENT_ROLE", + "CURRENT_TIME", + "CURRENT_TIMESTAMP", + "CURRENT_USER", + "CYCLE", + "DATA", + "DATABASE", + "DATABASES", + "DATE", + "DATETIME", + "DAY", + "DAY_HOUR", + "DAY_MICROSECOND", + "DAY_MINUTE", + "DAY_SECOND", + "DDL", + "DEALLOCATE", + "DECIMAL", + "DEFAULT", + "DEFINER", + "DELAYED", + "DELAY_KEY_WRITE", + "DELETE", + "DENSE_RANK", + "DEPTH", + "DESC", + "DESCRIBE", + "DIRECTORY", + "DISABLE", + "DISCARD", + "DISK", + "DISTINCT", + "DISTINCTROW", + "DIV", + "DO", + "DOUBLE", + "DRAINER", + "DROP", + "DUAL", + "DUPLICATE", + "DYNAMIC", + "ELSE", + "ENABLE", + "ENCLOSED", + "ENCRYPTION", + "END", + "ENFORCED", + "ENGINE", + "ENGINES", + "ENUM", + "ERROR", + "ERRORS", + "ESCAPE", + "ESCAPED", + "EVENT", + "EVENTS", + "EVOLVE", + "EXCEPT", + "EXCHANGE", + "EXCLUSIVE", + "EXECUTE", + "EXISTS", + "EXPANSION", + "EXPIRE", + "EXPLAIN", + "EXTENDED", + "FALSE", + "FAULTS", + "FIELDS", + "FILE", + "FIRST", + "FIRST_VALUE", + "FIXED", + "FLOAT", + "FLUSH", + "FOLLOWING", + "FOR", + "FORCE", + "FOREIGN", + "FORMAT", + "FROM", + "FULL", + "FULLTEXT", + "FUNCTION", + "GENERAL", + "GENERATED", + "GLOBAL", + "GRANT", + "GRANTS", + "GROUP", + "GROUPS", + "HASH", + "HAVING", + "HIGH_PRIORITY", + "HISTORY", + "HOSTS", + "HOUR", + "HOUR_MICROSECOND", + "HOUR_MINUTE", + "HOUR_SECOND", + "IDENTIFIED", + "IF", + "IGNORE", + "IMPORT", + "IMPORTS", + "IN", + "INCREMENT", + "INCREMENTAL", + "INDEX", + "INDEXES", + "INFILE", + "INNER", + "INSERT", + "INSERT_METHOD", + "INSTANCE", + "INT", + "INT1", + "INT2", + "INT3", + "INT4", + "INT8", + "INTEGER", + "INTERVAL", + "INTO", + "INVISIBLE", + "INVOKER", + "IO", + "IPC", + "IS", + "ISOLATION", + "ISSUER", + "IDENTIFIED", + "IF", + "IGNORE", + "IMPORT", + "IMPORTS", + "IN", + "INCREMENT", + "INCREMENTAL", + "INDEX", + "INDEXES", + "INFILE", + "INNER", + "INSERT", + "INSERT_METHOD", + "INSTANCE", + "INT", + "INT1", + "INT2", + "INT3", + "INT4", + "INT8", + "INTEGER", + "INTERVAL" "INTO", + "INVISIBLE", + "INVOKER", + "IO", + "IPC", + "IS", + "ISOLATION", + "ISSUER", + "KEY", + "KEYS", + "KEY_BLOCK_SIZE", + "KILL", + "LABELS", + "LAG", + "LANGUAGE", + "LAST", + "LASTVAL", + "LAST_BACKUP", + "LAST_VALUE", + "LEAD", + "LEADING", + "LEFT", + "LESS", + "LEVEL", + "LIKE", + "LIMIT", + "LINEAR", + "LINES", + "LIST", + "LOAD", + "LOCAL", + "LOCALTIME", + "LOCALTIMESTAMP", + "LOCATION", + "LOCK", + "LOGS", + "LONG", + "LONGBLOB", + "LONGTEXT", + "LOW_PRIORITY", + "MASTER", + "MATCH", + "MAXVALUE", + "MAX_CONNECTIONS_PER_HOUR", + "MAX_IDXNUM", + "MAX_MINUTES", + "MAX_QUERIES_PER_HOUR", + "MAX_ROWS", + "MAX_UPDATES_PER_HOUR", + "MAX_USER_CONNECTIONS", + "MB", + "MEDIUMBLOB", + "MEDIUMINT", + "MEDIUMTEXT", + "MEMORY", + "MERGE", + "MICROSECOND", + "MINUTE", + "MINUTE_MICROSECOND", + "MINUTE_SECOND", + "MINVALUE", + "MIN_ROWS", + "MOD", + "MODE", + "MODIFY", + "MONTH", + "NAMES", + "NATIONAL", + "NATURAL", + "NCHAR", + "NEVER", + "NEXT", + "NEXTVAL", + "NO", + "NOCACHE", + "NOCYCLE", + "NODEGROUP", + "NODE_ID", + "NODE_STATE", + "NOMAXVALUE", + "NOMINVALUE", + "NONE", + "NOT", + "NOWAIT", + "NO_WRITE_TO_BINLOG", + "NTH_VALUE", + "NTILE", + "NULL", + "NULLS", + "NUMERIC", + "NVARCHAR", + "OFFSET", + "ON", + "ONLINE", + "ONLY", + "ON_DUPLICATE", + "OPEN", + "OPTIMISTIC", + "OPTIMIZE", + "OPTION", + "OPTIONALLY", + "OR", + "ORDER", + "OUTER", + "OUTFILE", + "OVER", + "PACK_KEYS", + "PAGE", + "PARSER", + "PARTIAL", + "PARTITION", + "PARTITIONING", + "PARTITIONS", + "PASSWORD", + "PERCENT_RANK", + "PER_DB", + "PER_TABLE", + "PESSIMISTIC", + "PLUGINS", + "PRECEDING", + "PRECISION", + "PREPARE", + "PRE_SPLIT_REGIONS", + "PRIMARY", + "PRIVILEGES", + "PROCEDURE", + "PROCESS", + "PROCESSLIST", + "PROFILE", + "PROFILES", + "PUMP", + "QUARTER", + "QUERIES", + "QUERY", + "QUICK", + "RANGE", + "RANK", + "RATE_LIMIT", + "READ", + "REAL", + "REBUILD", + "RECOVER", + "REDUNDANT", + "REFERENCES", + "REGEXP", + "REGION", + "REGIONS", + "RELEASE", + "RELOAD", + "REMOVE", + "RENAME", + "REORGANIZE", + "REPAIR", + "REPEAT", + "REPEATABLE", + "REPLACE", + "REPLICA", + "REPLICATION", + "REQUIRE", + "RESPECT", + "RESTORE", + "RESTORES", + "RESTRICT", + "REVERSE", + "REVOKE", + "RIGHT", + "RLIKE", + "ROLE", + "ROLLBACK", + "ROUTINE", + "ROW", + "ROWS", + "ROW_COUNT", + "ROW_FORMAT", + "ROW_NUMBER", + "RTREE", + "SAMPLES", + "SECOND", + "SECONDARY_ENGINE", + "SECONDARY_LOAD", + "SECONDARY_UNLOAD", + "SECOND_MICROSECOND", + "SECURITY", + "SELECT", + "SEND_CREDENTIALS_TO_TIKV", + "SEPARATOR", + "SEQUENCE", + "SERIAL", + "SERIALIZABLE", + "SESSION", + "SET", + "SETVAL", + "SHARD_ROW_ID_BITS", + "SHARE", + "SHARED", + "SHOW", + "SHUTDOWN", + "SIGNED", + "SIMPLE", + "SKIP_SCHEMA_FILES", + "SLAVE", + "SLOW", + "SMALLINT", + "SNAPSHOT", + "SOME", + "SOURCE", + "SPATIAL", + "SPLIT", + "SQL", + "SQL_BIG_RESULT", + "SQL_BUFFER_RESULT", + "SQL_CACHE", + "SQL_CALC_FOUND_ROWS", + "SQL_NO_CACHE", + "SQL_SMALL_RESULT", + "SQL_TSI_DAY", + "SQL_TSI_HOUR", + "SQL_TSI_MINUTE", + "SQL_TSI_MONTH", + "SQL_TSI_QUARTER", + "SQL_TSI_SECOND", + "SQL_TSI_WEEK", + "SQL_TSI_YEAR", + "SSL", + "START", + "STARTING", + "STATS", + "STATS_AUTO_RECALC", + "STATS_BUCKETS", + "STATS_HEALTHY", + "STATS_HISTOGRAMS", + "STATS_META", + "STATS_PERSISTENT", + "STATS_SAMPLE_PAGES", + "STATUS", + "STORAGE", + "STORED", + "STRAIGHT_JOIN", + "STRICT_FORMAT", + "SUBJECT", + "SUBPARTITION", + "SUBPARTITIONS", + "SUPER", + "SWAPS", + "SWITCHES", + "SYSTEM_TIME", + "TABLE", + "TABLES", + "TABLESPACE", + "TABLE_CHECKSUM", + "TEMPORARY", + "TEMPTABLE", + "TERMINATED", + "TEXT", + "THAN", + "THEN", + "TIDB", + "TIFLASH", + "TIKV_IMPORTER", + "TIME", + "TIMESTAMP", + "TINYBLOB", + "TINYINT", + "TINYTEXT", + "TO", + "TOPN", + "TRACE", + "TRADITIONAL", + "TRAILING", + "TRANSACTION", + "TRIGGER", + "TRIGGERS", + "TRUE", + "TRUNCATE", + "TYPE", + "UNBOUNDED", + "UNCOMMITTED", + "UNDEFINED", + "UNICODE", + "UNION", + "UNIQUE", + "UNKNOWN", + "UNLOCK", + "UNSIGNED", + "UPDATE", + "USAGE", + "USE", + "USER", + "USING", + "UTC_DATE", + "UTC_TIME", + "UTC_TIMESTAMP", + "VALIDATION", + "VALUE", + "VALUES", + "VARBINARY", + "VARCHAR", + "VARCHARACTER", + "VARIABLES", + "VARYING", + "VIEW", + "VIRTUAL", + "VISIBLE", + "WARNINGS", + "WEEK", + "WEIGHT_STRING", + "WHEN", + "WHERE", + "WIDTH", + "WINDOW", + "WITH", + "WITHOUT", + "WRITE", + "X", + "X509", + "XOR", + "YEAR", + "YEAR_MONTH", + "ZEROFILL", +} + +# DuckDB uses Sqlite interface: https://www.sqlite.org/lang_keywords.html +DUCKDB = { + "ANALYZE", + "AND", + "AS", + "ASC", + "ATTACH", + "AUTOINCREMENT", + "BEFORE", + "BEGIN", + "BETWEEN", + "BY", + "CASCADE", + "CASE", + "CAST", + "CHECK", + "COLLATE", + "COLUMN", + "COMMIT", + "CONFLICT", + "CONSTRAINT", + "CREATE", + "CROSS", + "CURRENT", + "CURRENT_DATE", + "CURRENT_TIME", + "CURRENT_TIMESTAMP", + "DATABASE", + "DEFAULT", + "DEFERRABLE", + "DEFERRED", + "DELETE", + "DESC", + "DETACH", + "DISTINCT", + "DO", + "DROP", + "EACH", + "ELSE", + "END", + "ESCAPE", + "EXCEPT", + "EXCLUDE", + "EXCLUSIVE", + "EXISTS", + "EXPLAIN", + "FAIL", + "FILTER", + "FIRST", + "FOLLOWING", + "FOR", + "FOREIGN", + "FROM", + "FULL", + "GENERATED", + "GLOB", + "GROUP", + "GROUPS", + "HAVING", + "IF", + "IGNORE", + "IMMEDIATE", + "IN", + "INDEX", + "INDEXED", + "INITIALLY", + "INNER", + "INSERT", + "INSTEAD", + "INTERSECT", + "INTO", + "IS", + "ISNULL", + "JOIN", + "KEY", + "LAST", + "LEFT", + "LIKE", + "LIMIT", + "MATCH", + "MATERIALIZED", + "NATURAL", + "NO", + "NOT", + "NOTHING", + "NOTNULL", + "NULL", + "NULLS", + "OF", + "OFFSET", + "ON", + "OR", + "ORDER", + "OTHERS", + "OUTER", + "OVER", + "PARTITION", + "PLAN", + "PRAGMA", + "PRECEDING", + "PRIMARY", + "QUERY", + "RAISE", + "RANGE", + "RECURSIVE", + "REFERENCES", + "REGEXP", + "REINDEX", + "RELEASE", + "RENAME", + "REPLACE", + "RESTRICT", + "RETURNING", + "RIGHT", + "ROLLBACK", + "ROW", + "ROWS", + "SAVEPOINT", + "SELECT", + "SET", + "TABLE", + "TEMP", + "TEMPORARY", + "THEN", + "TIES", + "TO", + "TRANSACTION", + "TRIGGER", + "UNBOUNDED", + "UNION", + "UNIQUE", + "UPDATE", + "USING", + "VACUUM", + "VALUES", + "VIEW", + "VIRTUAL", + "WHEN", + "WHERE", + "WINDOW", + "WITH", + "WITHOUT", +} + +RESERVED_KEYWORDS = { + DestinationType.BIGQUERY.value: BIGQUERY, + DestinationType.POSTGRES.value: POSTGRES, + DestinationType.REDSHIFT.value: REDSHIFT, + DestinationType.SNOWFLAKE.value: SNOWFLAKE, + DestinationType.MYSQL.value: MYSQL, + DestinationType.ORACLE.value: ORACLE, + DestinationType.MSSQL.value: MSSQL, + DestinationType.CLICKHOUSE.value: CLICKHOUSE, + DestinationType.TIDB.value: TIDB, + DestinationType.DUCKDB.value: DUCKDB, +} + + +def is_reserved_keyword(token: str, integration_type: DestinationType) -> bool: + return token.upper() in RESERVED_KEYWORDS[integration_type.value] diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py new file mode 100644 index 0000000000000..6c1f70d6756c2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -0,0 +1,1530 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import os +import re +from enum import Enum +from typing import Any, Dict, List, Optional, Tuple, Union + +from airbyte_cdk.models.airbyte_protocol import DestinationSyncMode, SyncMode # type: ignore +from jinja2 import Template +from normalization.destination_type import DestinationType +from normalization.transform_catalog import dbt_macro +from normalization.transform_catalog.destination_name_transformer import DestinationNameTransformer, transform_json_naming +from normalization.transform_catalog.table_name_registry import TableNameRegistry +from normalization.transform_catalog.utils import ( + is_airbyte_column, + is_array, + is_big_integer, + is_boolean, + is_combining_node, + is_date, + is_datetime, + is_datetime_with_timezone, + is_datetime_without_timezone, + is_long, + is_number, + is_object, + is_simple_property, + is_string, + is_time, + is_time_with_timezone, + jinja_call, + remove_jinja, +) + +# using too many columns breaks ephemeral materialization (somewhere between 480 and 490 columns) +# let's use a lower value to be safely away from the limit... +MAXIMUM_COLUMNS_TO_USE_EPHEMERAL = 450 + + +class PartitionScheme(Enum): + """ + When possible, normalization will try to output partitioned/indexed/sorted tables (depending on the destination support) + This enum specifies which column to use when doing so (which affects how fast the table can be read using that column as predicate) + """ + + ACTIVE_ROW = "active_row" # partition by _airbyte_active_row + UNIQUE_KEY = "unique_key" # partition by _airbyte_emitted_at, sorted by _airbyte_unique_key + NOTHING = "nothing" # no partitions + DEFAULT = "" # partition by _airbyte_emitted_at + + +class TableMaterializationType(Enum): + """ + Defines the folders and dbt materialization mode of models (as configured in dbt_project.yml file) + """ + + CTE = "airbyte_ctes" + VIEW = "airbyte_views" + TABLE = "airbyte_tables" + INCREMENTAL = "airbyte_incremental" + + +class StreamProcessor(object): + """ + Takes as input an Airbyte Stream as described in the (configured) Airbyte Catalog's Json Schema. + Associated input raw data is expected to be stored in a staging area table. + + This processor generates SQL models to transform such a stream into a final table in the destination schema. + This is done by generating a DBT pipeline of transformations (multiple SQL models queries) that may be materialized + in the intermediate schema "raw_schema" (changing the dbt_project.yml settings). + The final output data should be written in "schema". + + The pipeline includes transformations such as: + - Parsing a JSON blob column and extracting each field property in its own SQL column + - Casting each SQL column to the proper JSON data type + - Generating an artificial (primary key) ID column based on the hashing of the row + + If any nested columns are discovered in the stream, a JSON blob SQL column is created in the top level parent stream + and a new StreamProcessor instance will be spawned for each children substreams. These Sub-Stream Processors are then + able to generate models to parse and extract recursively from its parent StreamProcessor model into separate SQL tables + the content of that JSON blob SQL column. + """ + + def __init__( + self, + stream_name: str, + destination_type: DestinationType, + raw_schema: str, + default_schema: str, + schema: str, + source_sync_mode: SyncMode, + destination_sync_mode: DestinationSyncMode, + cursor_field: List[str], + primary_key: List[List[str]], + json_column_name: str, + properties: Dict, + tables_registry: TableNameRegistry, + from_table: Union[str, dbt_macro.Macro], + ): + """ + See StreamProcessor.create() + """ + self.stream_name: str = stream_name + self.destination_type: DestinationType = destination_type + self.raw_schema: str = raw_schema + self.schema: str = schema + self.source_sync_mode: SyncMode = source_sync_mode + self.destination_sync_mode: DestinationSyncMode = destination_sync_mode + self.cursor_field: List[str] = cursor_field + self.primary_key: List[List[str]] = primary_key + self.json_column_name: str = json_column_name + self.properties: Dict = properties + self.tables_registry: TableNameRegistry = tables_registry + self.from_table: Union[str, dbt_macro.Macro] = from_table + + self.name_transformer: DestinationNameTransformer = DestinationNameTransformer(destination_type) + self.json_path: List[str] = [stream_name] + self.final_table_name: str = "" + self.sql_outputs: Dict[str, str] = {} + self.parent: Optional["StreamProcessor"] = None + self.is_nested_array: bool = False + self.default_schema: str = default_schema + self.airbyte_ab_id = "_airbyte_ab_id" + self.airbyte_emitted_at = "_airbyte_emitted_at" + self.airbyte_normalized_at = "_airbyte_normalized_at" + self.airbyte_unique_key = "_airbyte_unique_key" + self.models_to_source: Dict[str, str] = {} + + @staticmethod + def create_from_parent( + parent, child_name: str, json_column_name: str, properties: Dict, is_nested_array: bool, from_table: str + ) -> "StreamProcessor": + """ + @param parent is the Stream Processor that originally created this instance to handle a nested column from that parent table. + + @param json_column_name is the name of the column in the parent data table containing the json column to transform + @param properties is the json schema description of this nested stream + @param is_nested_array is a boolean flag specifying if the child is a nested array that needs to be extracted + + @param tables_registry is the global context recording all tables created so far + @param from_table is the parent table to extract the nested stream from + + The child stream processor will create a separate table to contain the unnested data. + """ + if parent.destination_sync_mode.value == DestinationSyncMode.append_dedup.value: + # nested streams can't be deduped like their parents (as they may not share the same cursor/primary keys) + parent_sync_mode = DestinationSyncMode.append + else: + parent_sync_mode = parent.destination_sync_mode + result = StreamProcessor.create( + stream_name=child_name, + destination_type=parent.destination_type, + raw_schema=parent.raw_schema, + default_schema=parent.default_schema, + schema=parent.schema, + source_sync_mode=parent.source_sync_mode, + destination_sync_mode=parent_sync_mode, + cursor_field=[], + primary_key=[], + json_column_name=json_column_name, + properties=properties, + tables_registry=parent.tables_registry, + from_table=from_table, + ) + result.parent = parent + result.is_nested_array = is_nested_array + result.json_path = parent.json_path + [child_name] + return result + + @staticmethod + def create( + stream_name: str, + destination_type: DestinationType, + raw_schema: str, + default_schema: str, + schema: str, + source_sync_mode: SyncMode, + destination_sync_mode: DestinationSyncMode, + cursor_field: List[str], + primary_key: List[List[str]], + json_column_name: str, + properties: Dict, + tables_registry: TableNameRegistry, + from_table: Union[str, dbt_macro.Macro], + ) -> "StreamProcessor": + """ + @param stream_name of the stream being processed + + @param destination_type is the destination type of warehouse + @param raw_schema is the name of the staging intermediate schema where to create internal tables/views + @param schema is the name of the schema where to store the final tables where to store the transformed data + + @param source_sync_mode is describing how source are producing data + @param destination_sync_mode is describing how destination should handle the new data batch + @param cursor_field is the field to use to determine order of records + @param primary_key is a list of fields to use as a (composite) primary key + + @param json_column_name is the name of the column in the raw data table containing the json column to transform + @param properties is the json schema description of this stream + + @param tables_registry is the global context recording all tables created so far + @param from_table is the table this stream is being extracted from originally + """ + return StreamProcessor( + stream_name, + destination_type, + raw_schema, + default_schema, + schema, + source_sync_mode, + destination_sync_mode, + cursor_field, + primary_key, + json_column_name, + properties, + tables_registry, + from_table, + ) + + def collect_table_names(self): + column_names = self.extract_column_names() + self.tables_registry.register_table(self.get_schema(True), self.get_schema(False), self.stream_name, self.json_path) + for child in self.find_children_streams(self.from_table, column_names): + child.collect_table_names() + + def get_stream_source(self): + if not self.parent: + return self.from_table.source_name + "." + self.from_table.table_name + cur = self.parent + while cur.parent: + cur = cur.parent + return cur.from_table.source_name + "." + cur.from_table.table_name + + def process(self) -> List["StreamProcessor"]: + """ + See description of StreamProcessor class. + @return List of StreamProcessor to handle recursively nested columns from this stream + """ + # Check properties + if not self.properties: + print(f" Ignoring stream '{self.stream_name}' from {self.current_json_path()} because properties list is empty") + return [] + + column_names = self.extract_column_names() + column_count = len(column_names) + + if column_count == 0: + print(f" Ignoring stream '{self.stream_name}' from {self.current_json_path()} because no columns were identified") + return [] + + from_table = str(self.from_table) + # Transformation Pipeline for this stream + from_table = self.add_to_outputs( + self.generate_json_parsing_model(from_table, column_names), + self.get_model_materialization_mode(is_intermediate=True), + is_intermediate=True, + suffix="ab1", + ) + from_table = self.add_to_outputs( + self.generate_column_typing_model(from_table, column_names), + self.get_model_materialization_mode(is_intermediate=True, column_count=column_count), + is_intermediate=True, + suffix="ab2", + ) + if self.destination_sync_mode != DestinationSyncMode.append_dedup: + from_table = self.add_to_outputs( + self.generate_id_hashing_model(from_table, column_names), + self.get_model_materialization_mode(is_intermediate=True, column_count=column_count), + is_intermediate=True, + suffix="ab3", + ) + from_table = self.add_to_outputs( + self.generate_final_model(from_table, column_names), + self.get_model_materialization_mode(is_intermediate=False, column_count=column_count), + is_intermediate=False, + ) + else: + if self.is_incremental_mode(self.destination_sync_mode): + # Force different materialization here because incremental scd models rely on star* macros that requires it + if self.destination_type.value == DestinationType.POSTGRES.value: + # because of https://github.com/dbt-labs/docs.getdbt.com/issues/335, we avoid VIEW for postgres + forced_materialization_type = TableMaterializationType.INCREMENTAL + else: + forced_materialization_type = TableMaterializationType.VIEW + else: + forced_materialization_type = TableMaterializationType.CTE + from_table = self.add_to_outputs( + self.generate_id_hashing_model(from_table, column_names), + forced_materialization_type, + is_intermediate=True, + suffix="stg", + ) + + from_table = self.add_to_outputs( + self.generate_scd_type_2_model(from_table, column_names), + self.get_model_materialization_mode(is_intermediate=False, column_count=column_count), + is_intermediate=False, + suffix="scd", + subdir="scd", + unique_key=self.name_transformer.normalize_column_name(f"{self.airbyte_unique_key}_scd"), + partition_by=PartitionScheme.ACTIVE_ROW, + ) + where_clause = f"\nand {self.name_transformer.normalize_column_name('_airbyte_active_row')} = 1" + # from_table should not use the de-duplicated final table or tables downstream (nested streams) will miss non active rows + self.add_to_outputs( + self.generate_final_model(from_table, column_names, unique_key=self.get_unique_key()) + where_clause, + self.get_model_materialization_mode(is_intermediate=False, column_count=column_count), + is_intermediate=False, + unique_key=self.get_unique_key(), + partition_by=PartitionScheme.UNIQUE_KEY, + ) + return self.find_children_streams(from_table, column_names) + + def extract_column_names(self) -> Dict[str, Tuple[str, str]]: + """ + Generate a mapping of JSON properties to normalized SQL Column names, handling collisions and avoid duplicate names + + The mapped value to a field property is a tuple where: + - the first value is the normalized "raw" column name + - the second value is the normalized quoted column name to be used in jinja context + """ + fields = [] + for field in self.properties.keys(): + if not is_airbyte_column(field): + fields.append(field) + result = {} + field_names = set() + for field in fields: + field_name = self.name_transformer.normalize_column_name(field, in_jinja=False) + field_name_lookup = self.name_transformer.normalize_column_identifier_case_for_lookup(field_name) + jinja_name = self.name_transformer.normalize_column_name(field, in_jinja=True) + if field_name_lookup in field_names: + # TODO handle column name duplicates or collisions deterministically in this stream + for i in range(1, 1000): + field_name = self.name_transformer.normalize_column_name(f"{field}_{i}", in_jinja=False) + field_name_lookup = self.name_transformer.normalize_column_identifier_case_for_lookup(field_name) + jinja_name = self.name_transformer.normalize_column_name(f"{field}_{i}", in_jinja=True) + if field_name_lookup not in field_names: + break + field_names.add(field_name_lookup) + result[field] = (field_name, jinja_name) + return result + + def find_children_streams(self, from_table: str, column_names: Dict[str, Tuple[str, str]]) -> List["StreamProcessor"]: + """ + For each complex type properties, generate a new child StreamProcessor that produce separate child pipelines. + The current stream/table is used as the parent from which to extract data from. + """ + properties = self.properties + children: List[StreamProcessor] = [] + for field in properties.keys(): + children_properties = None + is_nested_array = False + json_column_name = "" + if is_airbyte_column(field): + pass + elif is_combining_node(properties[field]): + # TODO: merge properties of all combinations + pass + elif "type" not in properties[field] or is_object(properties[field]["type"]): + # properties without 'type' field are treated like properties with 'type' = 'object' + children_properties = find_properties_object([], field, properties[field]) + is_nested_array = False + json_column_name = column_names[field][1] + elif is_array(properties[field]["type"]) and "items" in properties[field]: + quoted_field = column_names[field][1] + children_properties = find_properties_object([], field, properties[field]["items"]) + is_nested_array = True + json_column_name = f"unnested_column_value({quoted_field})" + if children_properties: + for child_key in children_properties: + stream_processor = StreamProcessor.create_from_parent( + parent=self, + child_name=field, + json_column_name=json_column_name, + properties=children_properties[child_key], + is_nested_array=is_nested_array, + from_table=from_table, + ) + children.append(stream_processor) + return children + + def generate_json_parsing_model(self, from_table: str, column_names: Dict[str, Tuple[str, str]]) -> Any: + if self.destination_type == DestinationType.ORACLE: + table_alias = "" + else: + table_alias = "as table_alias" + template = Template( + """ +-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema +-- depends_on: {{ from_table }} +{{ unnesting_before_query }} +select +{%- if parent_hash_id %} + {{ parent_hash_id }}, +{%- endif %} +{%- for field in fields %} + {{ field }}, +{%- endfor %} + {{ col_ab_id }}, + {{ col_emitted_at }}, + {{ '{{ current_timestamp() }}' }} as {{ col_normalized_at }} +from {{ from_table }} {{ table_alias }} +{{ sql_table_comment }} +{{ unnesting_from }} +where 1 = 1 +{{ unnesting_where }} +""" + ) + sql = template.render( + col_ab_id=self.get_ab_id(), + col_emitted_at=self.get_emitted_at(), + col_normalized_at=self.get_normalized_at(), + table_alias=table_alias, + unnesting_before_query=self.unnesting_before_query(from_table), + parent_hash_id=self.parent_hash_id(), + fields=self.extract_json_columns(column_names), + from_table=jinja_call(from_table), + unnesting_from=self.unnesting_from(), + unnesting_where=self.unnesting_where(), + sql_table_comment=self.sql_table_comment(), + ) + return sql + + def get_ab_id(self, in_jinja: bool = False): + # this is also tied to dbt-project-template/macros/should_full_refresh.sql + # as it is needed by the macro should_full_refresh + return self.name_transformer.normalize_column_name(self.airbyte_ab_id, in_jinja, False) + + def get_emitted_at(self, in_jinja: bool = False): + return self.name_transformer.normalize_column_name(self.airbyte_emitted_at, in_jinja, False) + + def get_normalized_at(self, in_jinja: bool = False): + return self.name_transformer.normalize_column_name(self.airbyte_normalized_at, in_jinja, False) + + def get_unique_key(self, in_jinja: bool = False): + return self.name_transformer.normalize_column_name(self.airbyte_unique_key, in_jinja, False) + + def extract_json_columns(self, column_names: Dict[str, Tuple[str, str]]) -> List[str]: + return [ + self.extract_json_column(field, self.json_column_name, self.properties[field], column_names[field][0], "table_alias") + for field in column_names + ] + + @staticmethod + def extract_json_column(property_name: str, json_column_name: str, definition: Dict, column_name: str, table_alias: str) -> str: + json_path = [property_name] + # In some cases, some destination aren't able to parse the JSON blob using the original property name + # we make their life easier by using a pre-populated and sanitized column name instead... + normalized_json_path = [transform_json_naming(property_name)] + table_alias = f"{table_alias}" + if "unnested_column_value" in json_column_name: + table_alias = "" + + json_extract = jinja_call(f"json_extract('{table_alias}', {json_column_name}, {json_path})") + if "type" in definition: + if is_array(definition["type"]): + json_extract = jinja_call(f"json_extract_array({json_column_name}, {json_path}, {normalized_json_path})") + if is_simple_property(definition.get("items", {"type": "object"})): + json_extract = jinja_call(f"json_extract_string_array({json_column_name}, {json_path}, {normalized_json_path})") + elif is_object(definition["type"]): + json_extract = jinja_call(f"json_extract('{table_alias}', {json_column_name}, {json_path}, {normalized_json_path})") + elif is_simple_property(definition): + json_extract = jinja_call(f"json_extract_scalar({json_column_name}, {json_path}, {normalized_json_path})") + + return f"{json_extract} as {column_name}" + + def generate_column_typing_model(self, from_table: str, column_names: Dict[str, Tuple[str, str]]) -> Any: + template = Template( + """ +-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type +-- depends_on: {{ from_table }} +select +{%- if parent_hash_id %} + {{ parent_hash_id }}, +{%- endif %} +{%- for field in fields %} + {{ field }}, +{%- endfor %} + {{ col_ab_id }}, + {{ col_emitted_at }}, + {{ '{{ current_timestamp() }}' }} as {{ col_normalized_at }} +from {{ from_table }} +{{ sql_table_comment }} +where 1 = 1 + """ + ) + sql = template.render( + col_ab_id=self.get_ab_id(), + col_emitted_at=self.get_emitted_at(), + col_normalized_at=self.get_normalized_at(), + parent_hash_id=self.parent_hash_id(), + fields=self.cast_property_types(column_names), + from_table=jinja_call(from_table), + sql_table_comment=self.sql_table_comment(), + ) + return sql + + def cast_property_types(self, column_names: Dict[str, Tuple[str, str]]) -> List[str]: + return [self.cast_property_type(field, column_names[field][0], column_names[field][1]) for field in column_names] + + def cast_property_type(self, property_name: str, column_name: str, jinja_column: str) -> Any: # noqa: C901 + definition = self.properties[property_name] + if "type" not in definition: + print(f"WARN: Unknown type for column {property_name} at {self.current_json_path()}") + return column_name + elif is_array(definition["type"]): + return column_name + elif is_object(definition["type"]): + sql_type = jinja_call("type_json()") + # Treat simple types from narrower to wider scope type: boolean < integer < number < string + elif is_boolean(definition["type"], definition): + cast_operation = jinja_call(f"cast_to_boolean({jinja_column})") + return f"{cast_operation} as {column_name}" + elif is_big_integer(definition): + sql_type = jinja_call("type_very_large_integer()") + elif is_long(definition["type"], definition): + sql_type = jinja_call("dbt_utils.type_bigint()") + elif is_number(definition["type"]): + sql_type = jinja_call("dbt_utils.type_float()") + elif is_datetime(definition): + if self.destination_type == DestinationType.SNOWFLAKE: + # snowflake uses case when statement to parse timestamp field + # in this case [cast] operator is not needed as data already converted to timestamp type + if is_datetime_without_timezone(definition): + return self.generate_snowflake_timestamp_statement(column_name) + return self.generate_snowflake_timestamp_tz_statement(column_name) + if self.destination_type == DestinationType.MYSQL and is_datetime_without_timezone(definition): + # MySQL does not support [cast] and [nullif] functions together + return self.generate_mysql_datetime_format_statement(column_name) + replace_operation = jinja_call(f"empty_string_to_null({jinja_column})") + if self.destination_type.value == DestinationType.MSSQL.value: + # in case of datetime, we don't need to use [cast] function, use try_parse instead. + if is_datetime_with_timezone(definition): + sql_type = jinja_call("type_timestamp_with_timezone()") + else: + sql_type = jinja_call("type_timestamp_without_timezone()") + return f"try_parse({replace_operation} as {sql_type}) as {column_name}" + if self.destination_type == DestinationType.CLICKHOUSE: + return f"parseDateTime64BestEffortOrNull(trim(BOTH '\"' from {replace_operation})) as {column_name}" + # in all other cases + if is_datetime_without_timezone(definition): + sql_type = jinja_call("type_timestamp_without_timezone()") + else: + sql_type = jinja_call("type_timestamp_with_timezone()") + return f"cast({replace_operation} as {sql_type}) as {column_name}" + elif is_date(definition): + if ( + self.destination_type.value == DestinationType.MYSQL.value + or self.destination_type.value == DestinationType.TIDB.value + or self.destination_type.value == DestinationType.DUCKDB.value + ): + # MySQL does not support [cast] and [nullif] functions together + return self.generate_mysql_date_format_statement(column_name) + replace_operation = jinja_call(f"empty_string_to_null({jinja_column})") + if self.destination_type.value == DestinationType.MSSQL.value: + # in case of date, we don't need to use [cast] function, use try_parse instead. + sql_type = jinja_call("type_date()") + return f"try_parse({replace_operation} as {sql_type}) as {column_name}" + if self.destination_type == DestinationType.CLICKHOUSE: + return f"toDate(parseDateTimeBestEffortOrNull(trim(BOTH '\"' from {replace_operation}))) as {column_name}" + # in all other cases + sql_type = jinja_call("type_date()") + return f"cast({replace_operation} as {sql_type}) as {column_name}" + elif is_time(definition): + if is_time_with_timezone(definition): + sql_type = jinja_call("type_time_with_timezone()") + else: + sql_type = jinja_call("type_time_without_timezone()") + if self.destination_type == DestinationType.CLICKHOUSE: + trimmed_column_name = f"trim(BOTH '\"' from {column_name})" + sql_type = f"'{sql_type}'" + return f"nullif(accurateCastOrNull({trimmed_column_name}, {sql_type}), 'null') as {column_name}" + if ( + self.destination_type == DestinationType.MYSQL + or self.destination_type == DestinationType.TIDB + or self.destination_type == DestinationType.DUCKDB + ): + return f'nullif(cast({column_name} as {sql_type}), "") as {column_name}' + replace_operation = jinja_call(f"empty_string_to_null({jinja_column})") + return f"cast({replace_operation} as {sql_type}) as {column_name}" + elif is_string(definition["type"]): + sql_type = jinja_call("dbt_utils.type_string()") + if self.destination_type == DestinationType.CLICKHOUSE: + trimmed_column_name = f"trim(BOTH '\"' from {column_name})" + sql_type = f"'{sql_type}'" + return f"nullif(accurateCastOrNull({trimmed_column_name}, {sql_type}), 'null') as {column_name}" + elif self.destination_type == DestinationType.MYSQL: + # Cast to `text` datatype. See https://github.com/airbytehq/airbyte/issues/7994 + sql_type = f"{sql_type}(1024)" + else: + print(f"WARN: Unknown type {definition['type']} for column {property_name} at {self.current_json_path()}") + return column_name + + if self.destination_type == DestinationType.CLICKHOUSE: + return f"accurateCastOrNull({column_name}, '{sql_type}') as {column_name}" + else: + return f"cast({column_name} as {sql_type}) as {column_name}" + + @staticmethod + def generate_mysql_date_format_statement(column_name: str) -> Any: + template = Template( + """ + case when {{column_name}} = '' then NULL + else cast({{column_name}} as date) + end as {{column_name}} + """ + ) + return template.render(column_name=column_name) + + @staticmethod + def generate_mysql_datetime_format_statement(column_name: str) -> Any: + regexp = r"\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}.*" + template = Template( + """ + case when {{column_name}} regexp '{{regexp}}' THEN STR_TO_DATE(SUBSTR({{column_name}}, 1, 19), '%Y-%m-%dT%H:%i:%S') + else cast(if({{column_name}} = '', NULL, {{column_name}}) as datetime) + end as {{column_name}} + """ + ) + return template.render(column_name=column_name, regexp=regexp) + + @staticmethod + def generate_snowflake_timestamp_tz_statement(column_name: str) -> Any: + """ + Generates snowflake DB specific timestamp case when statement + """ + formats = [ + {"regex": r"\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{4}", "format": "YYYY-MM-DDTHH24:MI:SSTZHTZM"}, + {"regex": r"\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}(\\+|-)\\d{2}", "format": "YYYY-MM-DDTHH24:MI:SSTZH"}, + { + "regex": r"\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{4}", + "format": "YYYY-MM-DDTHH24:MI:SS.FFTZHTZM", + }, + {"regex": r"\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}(\\+|-)\\d{2}", "format": "YYYY-MM-DDTHH24:MI:SS.FFTZH"}, + ] + template = Template( + """ + case +{% for format_item in formats %} + when {{column_name}} regexp '{{format_item['regex']}}' then to_timestamp_tz({{column_name}}, '{{format_item['format']}}') +{% endfor %} + when {{column_name}} = '' then NULL + else to_timestamp_tz({{column_name}}) + end as {{column_name}} + """ + ) + return template.render(formats=formats, column_name=column_name) + + @staticmethod + def generate_snowflake_timestamp_statement(column_name: str) -> Any: + """ + Generates snowflake DB specific timestamp case when statement + """ + formats = [ + {"regex": r"\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}", "format": "YYYY-MM-DDTHH24:MI:SS"}, + {"regex": r"\\d{4}-\\d{2}-\\d{2}T(\\d{2}:){2}\\d{2}\\.\\d{1,7}", "format": "YYYY-MM-DDTHH24:MI:SS.FF"}, + ] + template = Template( + """ + case +{% for format_item in formats %} + when {{column_name}} regexp '{{format_item['regex']}}' then to_timestamp({{column_name}}, '{{format_item['format']}}') +{% endfor %} + when {{column_name}} = '' then NULL + else to_timestamp({{column_name}}) + end as {{column_name}} + """ + ) + return template.render(formats=formats, column_name=column_name) + + def generate_id_hashing_model(self, from_table: str, column_names: Dict[str, Tuple[str, str]]) -> Any: + + template = Template( + """ +-- SQL model to build a hash column based on the values of this record +-- depends_on: {{ from_table }} +select + {{ '{{' }} dbt_utils.surrogate_key([ +{%- if parent_hash_id %} + {{ parent_hash_id }}, +{%- endif %} +{%- for field in fields %} + {{ field }}, +{%- endfor %} + ]) {{ '}}' }} as {{ hash_id }}, + tmp.* +from {{ from_table }} tmp +{{ sql_table_comment }} +where 1 = 1 + """ + ) + + sql = template.render( + parent_hash_id=self.parent_hash_id(in_jinja=True), + fields=self.safe_cast_to_strings(column_names), + hash_id=self.hash_id(), + from_table=jinja_call(from_table), + sql_table_comment=self.sql_table_comment(), + ) + return sql + + def safe_cast_to_strings(self, column_names: Dict[str, Tuple[str, str]]) -> List[str]: + + return [ + StreamProcessor.safe_cast_to_string(self.properties[field], column_names[field][1], self.destination_type) + for field in column_names + ] + + @staticmethod + def safe_cast_to_string(definition: Dict, column_name: str, destination_type: DestinationType) -> str: + """ + Note that the result from this static method should always be used within a + jinja context (for example, from jinja macro surrogate_key call) + + The jinja_remove function is necessary because of Oracle database, some columns + are created with {{ quote('column_name') }} and reused the same fields for this + operation. Because the quote is injected inside a jinja macro we need to remove + the curly brackets. + """ + + if "type" not in definition: + col = column_name + elif is_boolean(definition["type"], definition): + col = f"boolean_to_string({column_name})" + elif is_array(definition["type"]): + col = f"array_to_string({column_name})" + elif is_object(definition["type"]): + col = f"object_to_string({column_name})" + else: + col = column_name + + if destination_type == DestinationType.ORACLE: + quote_in_parenthesis = re.compile(r"quote\((.*)\)") + return remove_jinja(col) if quote_in_parenthesis.findall(col) else col + + return col + + def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tuple[str, str]]) -> Any: + """ + This model pulls data from the ID-hashing model and appends it to a log of record updates. When inserting an update to a record, it also + checks whether that record had a previously-existing row in the SCD model; if it does, then that previous row's end_at column is set to + the new update's start_at. + + See the docs for more details: https://docs.airbyte.com/understanding-airbyte/basic-normalization#normalization-metadata-columns + """ + cursor_field = self.get_cursor_field(column_names) + order_null = f"is null asc,\n {cursor_field} desc" + if self.destination_type.value == DestinationType.ORACLE.value: + order_null = "desc nulls last" + if self.destination_type.value == DestinationType.MSSQL.value: + # SQL Server treats NULL values as the lowest values, thus NULLs come last when desc. + order_null = "desc" + + lag_begin = "lag" + lag_end = "" + input_data_table = "input_data" + if self.destination_type == DestinationType.CLICKHOUSE: + # ClickHouse doesn't support lag() yet, this is a workaround solution + # Ref: https://clickhouse.com/docs/en/sql-reference/window-functions/ + lag_begin = "anyOrNull" + lag_end = " ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING" + input_data_table = "input_data_with_active_row_num" + + enable_left_join_null = "" + cast_begin = "cast(" + cast_as = " as " + cast_end = ")" + if self.destination_type == DestinationType.CLICKHOUSE: + enable_left_join_null = "--" + cast_begin = "accurateCastOrNull(" + cast_as = ", '" + cast_end = "')" + + # TODO move all cdc columns out of scd models + cdc_active_row_pattern = "" + cdc_updated_order_pattern = "" + cdc_cols = "" + quoted_cdc_cols = "" + if "_ab_cdc_deleted_at" in column_names.keys(): + col_cdc_deleted_at = self.name_transformer.normalize_column_name("_ab_cdc_deleted_at") + col_cdc_updated_at = self.name_transformer.normalize_column_name("_ab_cdc_updated_at") + quoted_col_cdc_deleted_at = self.name_transformer.normalize_column_name("_ab_cdc_deleted_at", in_jinja=True) + quoted_col_cdc_updated_at = self.name_transformer.normalize_column_name("_ab_cdc_updated_at", in_jinja=True) + cdc_active_row_pattern = f" and {col_cdc_deleted_at} is null" + cdc_updated_order_pattern = f"\n {col_cdc_updated_at} desc," + cdc_cols = ( + f", {cast_begin}{col_cdc_deleted_at}{cast_as}" + + "{{ dbt_utils.type_string() }}" + + f"{cast_end}" + + f", {cast_begin}{col_cdc_updated_at}{cast_as}" + + "{{ dbt_utils.type_string() }}" + + f"{cast_end}" + ) + quoted_cdc_cols = f", {quoted_col_cdc_deleted_at}, {quoted_col_cdc_updated_at}" + + if "_ab_cdc_log_pos" in column_names.keys(): + col_cdc_log_pos = self.name_transformer.normalize_column_name("_ab_cdc_log_pos") + quoted_col_cdc_log_pos = self.name_transformer.normalize_column_name("_ab_cdc_log_pos", in_jinja=True) + cdc_updated_order_pattern += f"\n {col_cdc_log_pos} desc," + cdc_cols += "".join([", ", cast_begin, col_cdc_log_pos, cast_as, "{{ dbt_utils.type_string() }}", cast_end]) + quoted_cdc_cols += f", {quoted_col_cdc_log_pos}" + + if "_ab_cdc_lsn" in column_names.keys(): + col_cdc_lsn = self.name_transformer.normalize_column_name("_ab_cdc_lsn") + quoted_col_cdc_lsn = self.name_transformer.normalize_column_name("_ab_cdc_lsn", in_jinja=True) + cdc_updated_order_pattern += f"\n {col_cdc_lsn} desc," + cdc_cols += "".join([", ", cast_begin, col_cdc_lsn, cast_as, "{{ dbt_utils.type_string() }}", cast_end]) + quoted_cdc_cols += f", {quoted_col_cdc_lsn}" + + if ( + self.destination_type == DestinationType.BIGQUERY + and self.get_cursor_field_property_name(column_names) != self.airbyte_emitted_at + and is_number(self.properties[self.get_cursor_field_property_name(column_names)]["type"]) + ): + # partition by float columns is not allowed in BigQuery, cast it to string + airbyte_start_at_string = ( + cast_begin + + self.name_transformer.normalize_column_name("_airbyte_start_at") + + cast_as + + "{{ dbt_utils.type_string() }}" + + cast_end + ) + else: + airbyte_start_at_string = self.name_transformer.normalize_column_name("_airbyte_start_at") + + jinja_variables = { + "active_row": self.name_transformer.normalize_column_name("_airbyte_active_row"), + "airbyte_end_at": self.name_transformer.normalize_column_name("_airbyte_end_at"), + "airbyte_row_num": self.name_transformer.normalize_column_name("_airbyte_row_num"), + "airbyte_start_at": self.name_transformer.normalize_column_name("_airbyte_start_at"), + "airbyte_start_at_string": airbyte_start_at_string, + "airbyte_unique_key_scd": self.name_transformer.normalize_column_name(f"{self.airbyte_unique_key}_scd"), + "cdc_active_row": cdc_active_row_pattern, + "cdc_cols": cdc_cols, + "cdc_updated_at_order": cdc_updated_order_pattern, + "col_ab_id": self.get_ab_id(), + "col_emitted_at": self.get_emitted_at(), + "col_normalized_at": self.get_normalized_at(), + "cursor_field": cursor_field, + "enable_left_join_null": enable_left_join_null, + "fields": self.list_fields(column_names), + "from_table": from_table, + "hash_id": self.hash_id(), + "incremental_clause": self.get_incremental_clause("this"), + "input_data_table": input_data_table, + "lag_begin": lag_begin, + "lag_end": lag_end, + "order_null": order_null, + "parent_hash_id": self.parent_hash_id(), + "primary_key_partition": self.get_primary_key_partition(column_names), + "primary_keys": self.list_primary_keys(column_names), + "quoted_airbyte_row_num": self.name_transformer.normalize_column_name("_airbyte_row_num", in_jinja=True), + "quoted_airbyte_start_at": self.name_transformer.normalize_column_name("_airbyte_start_at", in_jinja=True), + "quoted_cdc_cols": quoted_cdc_cols, + "quoted_col_emitted_at": self.get_emitted_at(in_jinja=True), + "quoted_unique_key": self.get_unique_key(in_jinja=True), + "sql_table_comment": self.sql_table_comment(include_from_table=True), + "unique_key": self.get_unique_key(), + } + if self.destination_type == DestinationType.CLICKHOUSE: + clickhouse_active_row_sql = Template( + """ +input_data_with_active_row_num as ( + select *, + row_number() over ( + partition by {{ primary_key_partition | join(", ") }} + order by + {{ cursor_field }} {{ order_null }},{{ cdc_updated_at_order }} + {{ col_emitted_at }} desc + ) as _airbyte_active_row_num + from input_data +),""" + ).render(jinja_variables) + jinja_variables["clickhouse_active_row_sql"] = clickhouse_active_row_sql + scd_columns_sql = Template( + """ + case when _airbyte_active_row_num = 1{{ cdc_active_row }} then 1 else 0 end as {{ active_row }}, + {{ lag_begin }}({{ cursor_field }}) over ( + partition by {{ primary_key_partition | join(", ") }} + order by + {{ cursor_field }} {{ order_null }},{{ cdc_updated_at_order }} + {{ col_emitted_at }} desc + {{ lag_end }}) as {{ airbyte_end_at }}""" + ).render(jinja_variables) + jinja_variables["scd_columns_sql"] = scd_columns_sql + else: + scd_columns_sql = Template( + """ + lag({{ cursor_field }}) over ( + partition by {{ primary_key_partition | join(", ") }} + order by + {{ cursor_field }} {{ order_null }},{{ cdc_updated_at_order }} + {{ col_emitted_at }} desc + ) as {{ airbyte_end_at }}, + case when row_number() over ( + partition by {{ primary_key_partition | join(", ") }} + order by + {{ cursor_field }} {{ order_null }},{{ cdc_updated_at_order }} + {{ col_emitted_at }} desc + ) = 1{{ cdc_active_row }} then 1 else 0 end as {{ active_row }}""" + ).render(jinja_variables) + jinja_variables["scd_columns_sql"] = scd_columns_sql + sql = Template( + """ +-- depends_on: {{ from_table }} +with +{{ '{% if is_incremental() %}' }} +new_data as ( + -- retrieve incremental "new" data + select + * + from {{'{{'}} {{ from_table }} {{'}}'}} + {{ sql_table_comment }} + where 1 = 1 + {{ incremental_clause }} +), +new_data_ids as ( + -- build a subset of {{ unique_key }} from rows that are new + select distinct + {{ '{{' }} dbt_utils.surrogate_key([ +{%- for primary_key in primary_keys %} + {{ primary_key }}, +{%- endfor %} + ]) {{ '}}' }} as {{ unique_key }} + from new_data +), +empty_new_data as ( + -- build an empty table to only keep the table's column types + select * from new_data where 1 = 0 +), +previous_active_scd_data as ( + -- retrieve "incomplete old" data that needs to be updated with an end date because of new changes + select + {{ '{{' }} star_intersect({{ from_table }}, this, from_alias='inc_data', intersect_alias='this_data') {{ '}}' }} + from {{ '{{ this }}' }} as this_data + -- make a join with new_data using primary key to filter active data that need to be updated only + join new_data_ids on this_data.{{ unique_key }} = new_data_ids.{{ unique_key }} + -- force left join to NULL values (we just need to transfer column types only for the star_intersect macro on schema changes) + {{ enable_left_join_null }}left join empty_new_data as inc_data on this_data.{{ col_ab_id }} = inc_data.{{ col_ab_id }} + where {{ active_row }} = 1 +), +input_data as ( + select {{ '{{' }} dbt_utils.star({{ from_table }}) {{ '}}' }} from new_data + union all + select {{ '{{' }} dbt_utils.star({{ from_table }}) {{ '}}' }} from previous_active_scd_data +), +{{ '{% else %}' }} +input_data as ( + select * + from {{'{{'}} {{ from_table }} {{'}}'}} + {{ sql_table_comment }} +), +{{ '{% endif %}' }} +{{ clickhouse_active_row_sql }} +scd_data as ( + -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key + select +{%- if parent_hash_id %} + {{ parent_hash_id }}, +{%- endif %} + {{ '{{' }} dbt_utils.surrogate_key([ +{%- for primary_key in primary_keys %} + {{ primary_key }}, +{%- endfor %} + ]) {{ '}}' }} as {{ unique_key }}, +{%- for field in fields %} + {{ field }}, +{%- endfor %} + {{ cursor_field }} as {{ airbyte_start_at }}, + {{ scd_columns_sql }}, + {{ col_ab_id }}, + {{ col_emitted_at }}, + {{ hash_id }} + from {{ input_data_table }} +), +dedup_data as ( + select + -- we need to ensure de-duplicated rows for merge/update queries + -- additionally, we generate a unique key for the scd table + row_number() over ( + partition by + {{ unique_key }}, + {{ airbyte_start_at_string }}, + {{ col_emitted_at }}{{ cdc_cols }} + order by {{ active_row }} desc, {{ col_ab_id }} + ) as {{ airbyte_row_num }}, + {{ '{{' }} dbt_utils.surrogate_key([ + {{ quoted_unique_key }}, + {{ quoted_airbyte_start_at }}, + {{ quoted_col_emitted_at }}{{ quoted_cdc_cols }} + ]) {{ '}}' }} as {{ airbyte_unique_key_scd }}, + scd_data.* + from scd_data +) +select +{%- if parent_hash_id %} + {{ parent_hash_id }}, +{%- endif %} + {{ unique_key }}, + {{ airbyte_unique_key_scd }}, +{%- for field in fields %} + {{ field }}, +{%- endfor %} + {{ airbyte_start_at }}, + {{ airbyte_end_at }}, + {{ active_row }}, + {{ col_ab_id }}, + {{ col_emitted_at }}, + {{ '{{ current_timestamp() }}' }} as {{ col_normalized_at }}, + {{ hash_id }} +from dedup_data where {{ airbyte_row_num }} = 1 +""" + ).render(jinja_variables) + return sql + + def get_cursor_field_property_name(self, column_names: Dict[str, Tuple[str, str]]) -> str: + if not self.cursor_field: + if "_ab_cdc_updated_at" in column_names.keys(): + return "_ab_cdc_updated_at" + elif "_ab_cdc_log_pos" in column_names.keys(): + return "_ab_cdc_log_pos" + elif "_ab_cdc_lsn" in column_names.keys(): + return "_ab_cdc_lsn" + else: + return self.airbyte_emitted_at + elif len(self.cursor_field) == 1: + return self.cursor_field[0] + else: + raise ValueError(f"Unsupported nested cursor field {'.'.join(self.cursor_field)} for stream {self.stream_name}") + + def get_cursor_field(self, column_names: Dict[str, Tuple[str, str]], in_jinja: bool = False) -> str: + if not self.cursor_field: + cursor = self.name_transformer.normalize_column_name(self.get_cursor_field_property_name(column_names), in_jinja) + elif len(self.cursor_field) == 1: + if not is_airbyte_column(self.cursor_field[0]): + cursor = column_names[self.cursor_field[0]][0] + else: + # using an airbyte generated column + cursor = self.cursor_field[0] + else: + raise ValueError(f"Unsupported nested cursor field {'.'.join(self.cursor_field)} for stream {self.stream_name}") + return cursor + + def list_primary_keys(self, column_names: Dict[str, Tuple[str, str]]) -> List[str]: + primary_keys = [] + for key_path in self.primary_key: + if len(key_path) == 1: + primary_keys.append(column_names[key_path[0]][1]) + else: + raise ValueError(f"Unsupported nested path {'.'.join(key_path)} for stream {self.stream_name}") + return primary_keys + + def get_primary_key_partition(self, column_names: Dict[str, Tuple[str, str]]) -> List[str]: + if self.primary_key and len(self.primary_key) > 0: + return [self.get_primary_key_from_path(column_names, path) for path in self.primary_key] + else: + raise ValueError(f"No primary key specified for stream {self.stream_name}") + + def get_primary_key_from_path(self, column_names: Dict[str, Tuple[str, str]], path: List[str]) -> str: + if path and len(path) == 1: + field = path[0] + if not is_airbyte_column(field): + if "type" in self.properties[field]: + property_type = self.properties[field]["type"] + else: + property_type = "object" + if is_number(property_type) or is_object(property_type): + # some destinations don't handle float columns (or complex types) as primary keys, turn them to string + return f"cast({column_names[field][0]} as {jinja_call('dbt_utils.type_string()')})" + else: + return column_names[field][0] + else: + # using an airbyte generated column + return f"cast({field} as {jinja_call('dbt_utils.type_string()')})" + else: + if path: + raise ValueError(f"Unsupported nested path {'.'.join(path)} for stream {self.stream_name}") + else: + raise ValueError(f"No path specified for stream {self.stream_name}") + + def generate_final_model(self, from_table: str, column_names: Dict[str, Tuple[str, str]], unique_key: str = "") -> Any: + """ + This is the table that the user actually wants. In addition to the columns that the source outputs, it has some additional metadata columns; + see the basic normalization docs for an explanation: https://docs.airbyte.com/understanding-airbyte/basic-normalization#normalization-metadata-columns + """ + template = Template( + """ +-- Final base SQL model +-- depends_on: {{ from_table }} +select +{%- if parent_hash_id %} + {{ parent_hash_id }}, +{%- endif %} +{%- if unique_key %} + {{ unique_key }}, +{%- endif %} +{%- for field in fields %} + {{ field }}, +{%- endfor %} + {{ col_ab_id }}, + {{ col_emitted_at }}, + {{ '{{ current_timestamp() }}' }} as {{ col_normalized_at }}, + {{ hash_id }} +from {{ from_table }} +{{ sql_table_comment }} +where 1 = 1 + """ + ) + sql = template.render( + col_ab_id=self.get_ab_id(), + col_emitted_at=self.get_emitted_at(), + col_normalized_at=self.get_normalized_at(), + parent_hash_id=self.parent_hash_id(), + fields=self.list_fields(column_names), + hash_id=self.hash_id(), + from_table=jinja_call(from_table), + sql_table_comment=self.sql_table_comment(include_from_table=True), + unique_key=unique_key, + ) + return sql + + @staticmethod + def is_incremental_mode(destination_sync_mode: DestinationSyncMode) -> bool: + return destination_sync_mode.value in [DestinationSyncMode.append.value, DestinationSyncMode.append_dedup.value] + + def add_incremental_clause(self, sql_query: str) -> Any: + template = Template( + """ +{{ sql_query }} +{{ incremental_clause }} + """ + ) + sql = template.render(sql_query=sql_query, incremental_clause=self.get_incremental_clause("this")) + return sql + + def get_incremental_clause(self, tablename: str) -> Any: + return self.get_incremental_clause_for_column(tablename, self.get_emitted_at(in_jinja=True)) + + def get_incremental_clause_for_column(self, tablename: str, column: str) -> Any: + return "{{ incremental_clause(" + column + ", " + tablename + ") }}" + + @staticmethod + def list_fields(column_names: Dict[str, Tuple[str, str]]) -> List[str]: + return [column_names[field][0] for field in column_names] + + def add_to_outputs( + self, + sql: str, + materialization_mode: TableMaterializationType, + is_intermediate: bool = True, + suffix: str = "", + unique_key: str = "", + subdir: str = "", + partition_by: PartitionScheme = PartitionScheme.DEFAULT, + ) -> str: + # Explicit function so that we can have type hints to satisfy the linter + def wrap_in_quotes(s: str) -> str: + return '"' + s + '"' + + schema = self.get_schema(is_intermediate) + # MySQL table names need to be manually truncated, because it does not do it automatically + truncate_name = ( + self.destination_type == DestinationType.MYSQL + or self.destination_type == DestinationType.TIDB + or self.destination_type == DestinationType.DUCKDB + ) + table_name = self.tables_registry.get_table_name(schema, self.json_path, self.stream_name, suffix, truncate_name) + file_name = self.tables_registry.get_file_name(schema, self.json_path, self.stream_name, suffix, truncate_name) + file = f"{file_name}.sql" + output = os.path.join(materialization_mode.value, subdir, self.schema, file) + config = self.get_model_partition_config(partition_by, unique_key) + if file_name != table_name: + # The alias() macro configs a model's final table name. + config["alias"] = f'"{table_name}"' + if self.destination_type == DestinationType.ORACLE: + # oracle does not allow changing schemas + config["schema"] = f'"{self.default_schema}"' + else: + config["schema"] = f'"{schema}"' + if self.is_incremental_mode(self.destination_sync_mode): + stg_schema = self.get_schema(True) + stg_table = self.tables_registry.get_file_name(schema, self.json_path, self.stream_name, "stg", truncate_name) + if self.name_transformer.needs_quotes(stg_table): + stg_table = jinja_call(self.name_transformer.apply_quote(stg_table)) + if suffix == "scd": + hooks = [] + + final_table_name = self.tables_registry.get_file_name(schema, self.json_path, self.stream_name, "", truncate_name) + active_row_column_name = self.name_transformer.normalize_column_name("_airbyte_active_row") + clickhouse_nullable_join_setting = "" + if self.destination_type == DestinationType.CLICKHOUSE: + # Clickhouse has special delete syntax + delete_statement = "alter table {{ final_table_relation }} delete" + unique_key_reference = self.get_unique_key(in_jinja=False) + noop_delete_statement = "alter table {{ this }} delete where 1=0" + # Without this, our LEFT JOIN would return empty string for non-matching rows, so our COUNT would include those rows. + # We want to exclude them (this is the default behavior in other DBs) so we have to set join_use_nulls=1 + clickhouse_nullable_join_setting = "SETTINGS join_use_nulls=1" + elif self.destination_type == DestinationType.BIGQUERY: + # Bigquery doesn't like the "delete from project.schema.table where project.schema.table.column in" syntax; + # it requires "delete from project.schema.table table_alias where table_alias.column in" + delete_statement = "delete from {{ final_table_relation }} final_table" + unique_key_reference = "final_table." + self.get_unique_key(in_jinja=False) + noop_delete_statement = "delete from {{ this }} where 1=0" + else: + delete_statement = "delete from {{ final_table_relation }}" + unique_key_reference = "{{ final_table_relation }}." + self.get_unique_key(in_jinja=False) + noop_delete_statement = "delete from {{ this }} where 1=0" + deletion_hook = Template( + """ + {{ '{%' }} + set final_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='{{ final_table_name }}' + ) + {{ '%}' }} + {{ '{#' }} + If the final table doesn't exist, then obviously we can't delete anything from it. + Also, after a reset, the final table is created without the _airbyte_unique_key column (this column is created during the first sync) + So skip this deletion if the column doesn't exist. (in this case, the table is guaranteed to be empty anyway) + {{ '#}' }} + {{ '{%' }} + if final_table_relation is not none and {{ quoted_unique_key }} in adapter.get_columns_in_relation(final_table_relation)|map(attribute='name') + {{ '%}' }} + + -- Delete records which are no longer active: + -- This query is equivalent, but the left join version is more performant: + -- delete from final_table where unique_key in ( + -- select unique_key from scd_table where 1 = 1 + -- ) and unique_key not in ( + -- select unique_key from scd_table where active_row = 1 + -- ) + -- We're incremental against normalized_at rather than emitted_at because we need to fetch the SCD + -- entries that were _updated_ recently. This is because a deleted record will have an SCD record + -- which was emitted a long time ago, but recently re-normalized to have active_row = 0. + {{ delete_statement }} where {{ unique_key_reference }} in ( + select recent_records.unique_key + from ( + select distinct {{ unique_key }} as unique_key + from {{ '{{ this }}' }} + where 1=1 {{ normalized_at_incremental_clause }} + ) recent_records + left join ( + select {{ unique_key }} as unique_key, count({{ unique_key }}) as active_count + from {{ '{{ this }}' }} + where {{ active_row_column_name }} = 1 {{ normalized_at_incremental_clause }} + group by {{ unique_key }} + ) active_counts + on recent_records.unique_key = active_counts.unique_key + where active_count is null or active_count = 0 + ) + {{ '{% else %}' }} + -- We have to have a non-empty query, so just do a noop delete + {{ noop_delete_statement }} + {{ '{% endif %}' }} + """ + ).render( + delete_statement=delete_statement, + noop_delete_statement=noop_delete_statement, + final_table_name=final_table_name, + unique_key=self.get_unique_key(in_jinja=False), + quoted_unique_key=self.get_unique_key(in_jinja=True), + active_row_column_name=active_row_column_name, + normalized_at_incremental_clause=self.get_incremental_clause_for_column( + "{} + '.' + {}".format( + self.name_transformer.apply_quote("this.schema", literal=False), + self.name_transformer.apply_quote(final_table_name), + ), + self.get_normalized_at(in_jinja=True), + ), + unique_key_reference=unique_key_reference, + clickhouse_nullable_join_setting=clickhouse_nullable_join_setting, + ) + hooks.append(deletion_hook) + + if self.destination_type.value == DestinationType.POSTGRES.value: + # Keep only rows with the max emitted_at to keep incremental behavior + hooks.append( + f"delete from {stg_schema}.{stg_table} where {self.airbyte_emitted_at} != (select max({self.airbyte_emitted_at}) from {stg_schema}.{stg_table})", + ) + else: + hooks.append(f"drop view {stg_schema}.{stg_table}") + + config["post_hook"] = "[" + ",".join(map(wrap_in_quotes, hooks)) + "]" + else: + # incremental is handled in the SCD SQL already + sql = self.add_incremental_clause(sql) + elif self.destination_sync_mode == DestinationSyncMode.overwrite: + if suffix == "" and not is_intermediate: + # drop SCD table after creating the destination table + scd_table_name = self.tables_registry.get_table_name(schema, self.json_path, self.stream_name, "scd", truncate_name) + print(f" Adding drop table hook for {scd_table_name} to {file_name}") + hooks = [ + Template( + """ + {{ '{%' }} + set scd_table_relation = adapter.get_relation( + database=this.database, + schema=this.schema, + identifier='{{ scd_table_name }}' + ) + {{ '%}' }} + {{ '{%' }} + if scd_table_relation is not none + {{ '%}' }} + {{ '{%' }} + do adapter.drop_relation(scd_table_relation) + {{ '%}' }} + {{ '{% endif %}' }} + """ + ).render(scd_table_name=scd_table_name) + ] + config["post_hook"] = "[" + ",".join(map(wrap_in_quotes, hooks)) + "]" + template = Template( + """ +{{ '{{' }} config( +{%- for key in config %} + {{ key }} = {{ config[key] }}, +{%- endfor %} + tags = [ {{ tags }} ] +) {{ '}}' }} +{{ sql }} + """ + ) + + self.sql_outputs[output] = template.render(config=config, sql=sql, tags=self.get_model_tags(is_intermediate)) + json_path = self.current_json_path() + print(f" Generating {output} from {json_path}") + self.models_to_source[file_name] = self.get_stream_source() + return str(dbt_macro.Ref(file_name)) + + def get_model_materialization_mode(self, is_intermediate: bool, column_count: int = 0) -> TableMaterializationType: + if is_intermediate: + if column_count <= MAXIMUM_COLUMNS_TO_USE_EPHEMERAL: + return TableMaterializationType.CTE + else: + # dbt throws "maximum recursion depth exceeded" exception at runtime + # if ephemeral is used with large number of columns, use views instead + return TableMaterializationType.VIEW + else: + if self.is_incremental_mode(self.destination_sync_mode): + return TableMaterializationType.INCREMENTAL + else: + return TableMaterializationType.TABLE + + def get_model_partition_config(self, partition_by: PartitionScheme, unique_key: str) -> Dict: + """ + Defines partition, clustering and unique key parameters for each destination. + The goal of these are to make read more performant. + + In general, we need to do lookups on the last emitted_at column to know if a record is freshly produced and need to be + incrementally processed or not. + But in certain models, such as SCD tables for example, we also need to retrieve older data to update their type 2 SCD end_dates, + thus a different partitioning scheme is used to optimize that use case. + """ + config = {} + if self.destination_type == DestinationType.BIGQUERY: + # see https://docs.getdbt.com/reference/resource-configs/bigquery-configs + if partition_by == PartitionScheme.UNIQUE_KEY: + config["cluster_by"] = f'["{self.airbyte_unique_key}","{self.airbyte_emitted_at}"]' + elif partition_by == PartitionScheme.ACTIVE_ROW: + config["cluster_by"] = f'["{self.airbyte_unique_key}_scd","{self.airbyte_emitted_at}"]' + else: + config["cluster_by"] = f'"{self.airbyte_emitted_at}"' + if partition_by == PartitionScheme.ACTIVE_ROW: + config["partition_by"] = ( + '{"field": "_airbyte_active_row", "data_type": "int64", ' '"range": {"start": 0, "end": 1, "interval": 1}}' + ) + elif partition_by == PartitionScheme.NOTHING: + pass + else: + config["partition_by"] = '{"field": "' + self.airbyte_emitted_at + '", "data_type": "timestamp", "granularity": "day"}' + elif self.destination_type == DestinationType.POSTGRES: + # see https://docs.getdbt.com/reference/resource-configs/postgres-configs + if partition_by == PartitionScheme.ACTIVE_ROW: + config["indexes"] = ( + "[{'columns':['_airbyte_active_row','" + + self.airbyte_unique_key + + "_scd','" + + self.airbyte_emitted_at + + "'],'type': 'btree'}]" + ) + elif partition_by == PartitionScheme.UNIQUE_KEY: + config["indexes"] = "[{'columns':['" + self.airbyte_unique_key + "'],'unique':True}]" + else: + config["indexes"] = "[{'columns':['" + self.airbyte_emitted_at + "'],'type':'btree'}]" + elif self.destination_type == DestinationType.REDSHIFT: + # see https://docs.getdbt.com/reference/resource-configs/redshift-configs + if partition_by == PartitionScheme.ACTIVE_ROW: + config["sort"] = f'["_airbyte_active_row", "{self.airbyte_unique_key}_scd", "{self.airbyte_emitted_at}"]' + elif partition_by == PartitionScheme.UNIQUE_KEY: + config["sort"] = f'["{self.airbyte_unique_key}", "{self.airbyte_emitted_at}"]' + elif partition_by == PartitionScheme.NOTHING: + pass + else: + config["sort"] = f'"{self.airbyte_emitted_at}"' + elif self.destination_type == DestinationType.SNOWFLAKE: + # see https://docs.getdbt.com/reference/resource-configs/snowflake-configs + if partition_by == PartitionScheme.ACTIVE_ROW: + config[ + "cluster_by" + ] = f'["_AIRBYTE_ACTIVE_ROW", "{self.airbyte_unique_key.upper()}_SCD", "{self.airbyte_emitted_at.upper()}"]' + elif partition_by == PartitionScheme.UNIQUE_KEY: + config["cluster_by"] = f'["{self.airbyte_unique_key.upper()}", "{self.airbyte_emitted_at.upper()}"]' + elif partition_by == PartitionScheme.NOTHING: + pass + else: + config["cluster_by"] = f'["{self.airbyte_emitted_at.upper()}"]' + if unique_key: + config["unique_key"] = f'"{unique_key}"' + elif not self.parent: + # in nested arrays, each element is sharing the same _airbyte_ab_id, so it's not unique + config["unique_key"] = self.get_ab_id(in_jinja=True) + return config + + def get_model_tags(self, is_intermediate: bool) -> str: + tags = "" + if self.parent: + tags += "nested" + else: + tags += "top-level" + if is_intermediate: + tags += "-intermediate" + return f'"{tags}"' + + def get_schema(self, is_intermediate: bool) -> str: + if is_intermediate: + return self.raw_schema + else: + return self.schema + + def current_json_path(self) -> str: + return "/".join(self.json_path) + + def normalized_stream_name(self) -> str: + """ + This is the normalized name of this stream to be used as a table (different as referring it as a column). + Note that it might not be the actual table name in case of collisions with other streams (see actual_table_name)... + """ + return self.name_transformer.normalize_table_name(self.stream_name) + + def sql_table_comment(self, include_from_table: bool = False) -> str: + result = f"-- {self.normalized_stream_name()}" + if len(self.json_path) > 1: + result += f" at {self.current_json_path()}" + if include_from_table: + from_table = jinja_call(self.from_table) + result += f" from {from_table}" + return result + + def hash_id(self, in_jinja: bool = False) -> str: + hash_id_col = f"_airbyte_{self.normalized_stream_name()}_hashid" + if self.parent: + if self.normalized_stream_name().lower() == self.parent.stream_name.lower(): + level = len(self.json_path) + hash_id_col = f"_airbyte_{self.normalized_stream_name()}_{level}_hashid" + + return self.name_transformer.normalize_column_name(hash_id_col, in_jinja) + + # Nested Streams + + def parent_hash_id(self, in_jinja: bool = False) -> str: + if self.parent: + return self.parent.hash_id(in_jinja) + return "" + + def unnesting_before_query(self, from_table: str) -> str: + if self.parent and self.is_nested_array: + parent_stream_name = f"'{self.parent.normalized_stream_name()}'" + quoted_field = self.name_transformer.normalize_column_name(self.stream_name, in_jinja=True) + return jinja_call(f"unnest_cte({from_table}, {parent_stream_name}, {quoted_field})") + return "" + + def unnesting_from(self) -> str: + if self.parent: + if self.is_nested_array: + parent_stream_name = f"'{self.parent.normalized_stream_name()}'" + quoted_field = self.name_transformer.normalize_column_name(self.stream_name, in_jinja=True) + return jinja_call(f"cross_join_unnest({parent_stream_name}, {quoted_field})") + return "" + + def unnesting_where(self) -> str: + if self.parent: + column_name = self.name_transformer.normalize_column_name(self.stream_name) + return f"and {column_name} is not null" + return "" + + +# Static Functions + + +def find_properties_object(path: List[str], field: str, properties) -> Dict[str, Dict]: + """ + This function is trying to look for a nested "properties" node under the current JSON node to + identify all nested objects. + + @param path JSON path traversed so far to arrive to this node + @param field is the current field being considered in the Json Tree + @param properties is the child tree of properties of the current field being searched + """ + result = {} + current_path = path + [field] + current = "_".join(current_path) + if isinstance(properties, str) or isinstance(properties, int): + return {} + else: + if "items" in properties: + return find_properties_object(path, field, properties["items"]) + elif "properties" in properties: + # we found a properties object + return {current: properties["properties"]} + elif "type" in properties and is_simple_property(properties): + # we found a basic type + return {current: {}} + elif isinstance(properties, dict): + for key in properties.keys(): + child = find_properties_object(path=current_path, field=key, properties=properties[key]) + if child: + result.update(child) + elif isinstance(properties, list): + for item in properties: + child = find_properties_object(path=current_path, field=field, properties=item) + if child: + result.update(child) + return result diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/table_name_registry.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/table_name_registry.py new file mode 100644 index 0000000000000..543554a340a37 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/table_name_registry.py @@ -0,0 +1,376 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import hashlib +from typing import Dict, List + +from normalization import DestinationType +from normalization.transform_catalog.destination_name_transformer import DestinationNameTransformer + +# minimum length of parent name used for nested streams +MINIMUM_PARENT_LENGTH = 10 + + +class NormalizedNameMetadata: + """ + A record of names collected by the TableNameRegistry + """ + + def __init__(self, intermediate_schema: str, schema: str, json_path: List[str], stream_name: str, table_name: str): + self.intermediate_schema: str = intermediate_schema + self.schema: str = schema + self.json_path: List[str] = json_path + self.stream_name: str = stream_name + self.table_name: str = table_name + + +class ConflictedNameMetadata: + """ + A record summary of a name conflict detected and resolved in TableNameRegistry + """ + + def __init__(self, schema: str, json_path: List[str], table_name_conflict: str, table_name_resolved: str): + self.schema: str = schema + self.json_path: List[str] = json_path + self.table_name_conflict: str = table_name_conflict + self.table_name_resolved: str = table_name_resolved + + +class ResolvedNameMetadata: + """ + A record of name collected and resolved by the TableNameRegistry + """ + + def __init__(self, schema: str, table_name: str, file_name: str): + self.schema: str = schema + self.table_name: str = table_name + self.file_name: str = file_name + + +class NormalizedTablesRegistry(Dict[str, List[NormalizedNameMetadata]]): + """ + An intermediate registry used by TableNameRegistry to detect conflicts in table names per schema + """ + + def __init__(self, name_transformer: DestinationNameTransformer): + super(NormalizedTablesRegistry, self).__init__() + self.name_transformer = name_transformer + + def add( + self, intermediate_schema: str, schema: str, json_path: List[str], stream_name: str, table_name: str + ) -> "NormalizedTablesRegistry": + key = self.get_table_key(schema, table_name) + if key not in self: + self[key] = [] + self[key].append(NormalizedNameMetadata(intermediate_schema, schema, json_path, stream_name, table_name)) + return self + + def get_table_key(self, schema: str, table_name: str) -> str: + return ( + f"{self.name_transformer.normalize_schema_name(schema, False, False)}." + f"{self.name_transformer.normalize_table_name(table_name, False, False)}" + ) + + def get_value(self, schema: str, table_name: str) -> List[NormalizedNameMetadata]: + return self[self.get_table_key(schema, table_name)] + + def has_collisions(self, key: str) -> bool: + return len(self[key]) > 1 + + +class NormalizedFilesRegistry(Dict[str, List[NormalizedNameMetadata]]): + """ + An intermediate registry used by TableNameRegistry to detect conflicts in file names + """ + + def __init__(self): + super(NormalizedFilesRegistry, self).__init__() + + def add( + self, intermediate_schema: str, schema: str, json_path: List[str], stream_name: str, table_name: str + ) -> "NormalizedFilesRegistry": + if table_name not in self: + self[table_name] = [] + self[table_name].append(NormalizedNameMetadata(intermediate_schema, schema, json_path, stream_name, table_name)) + return self + + def get_value(self, table_name: str) -> List[NormalizedNameMetadata]: + return self[table_name] + + def has_collisions(self, table_name: str) -> bool: + return len(self[table_name]) > 1 + + +class TableNameRegistry: + """ + A registry object that records table names being used during the run + + This registry helps detecting naming conflicts/collisions and how to resolve them. + + First, we collect all schema/stream_name/json_path listed in the catalog to detect any collisions, whether it is from: + - table naming: truncated stream name could conflict with each other within the same destination schema + - file naming: dbt use a global registry of file names without considering schema, so two tables with the same name in different schema + is valid but dbt would fail to distinguish them. Thus, the file needs should be unique within a dbt project (for example, + by adding the schema name to the file name when such collision occurs?) + + To do so, we build list of "simple" names without dealing with any collisions. + Next, we check if/when we encounter such naming conflicts. They usually happen when destination require a certain naming convention + with a limited number of characters, thus, we have to end up truncating names and creating collisions. + + In those cases, we resolve collisions using a more complex naming scheme using a suffix generated from hash of full names to make + them short and unique (but hard to remember/use). + """ + + def __init__(self, destination_type: DestinationType): + """ + @param destination_type is the destination type of warehouse + """ + self.destination_type: DestinationType = destination_type + self.name_transformer: DestinationNameTransformer = DestinationNameTransformer(destination_type) + # Simple XXX registry are collecting "simple" XXX names (with potential collisions) + self.simple_file_registry: NormalizedFilesRegistry = NormalizedFilesRegistry() + self.simple_table_registry: NormalizedTablesRegistry = NormalizedTablesRegistry(self.name_transformer) + # Registry is the collision free (resolved) mapping of schema json_path of the stream to the names that should be used + self.registry: Dict[str, ResolvedNameMetadata] = {} + + def register_table(self, intermediate_schema: str, schema: str, stream_name: str, json_path: List[str]): + """ + Record usages of simple table and file names used by each stream (top level and nested) in both + intermediate_schema and schema. + + After going through all streams and sub-streams, we'll be able to find if any collisions are present within + this catalog. + """ + intermediate_schema = self.name_transformer.normalize_schema_name(intermediate_schema, False, False) + schema = self.name_transformer.normalize_schema_name(schema, False, False) + table_name = self.get_simple_table_name(json_path) + self.simple_table_registry.add(intermediate_schema, schema, json_path, stream_name, table_name) + + def get_simple_table_name(self, json_path: List[str]) -> str: + """ + Generates a simple table name, possibly in collisions within this catalog because of truncation + """ + return self.name_transformer.normalize_table_name("_".join(json_path)) + + def resolve_names(self) -> List[ConflictedNameMetadata]: + conflicts = self.resolve_table_names() + self.resolve_file_names() + return conflicts + + def resolve_table_names(self) -> List[ConflictedNameMetadata]: + """ + Build a collision free registry from all schema/stream_name/json_path collected so far. + """ + resolved_keys = [] + # deal with table name collisions within the same schema first. + # file name should be equal to table name here + table_count = 0 + + for key in self.simple_table_registry: + for value in self.simple_table_registry[key]: + table_count += 1 + if self.simple_table_registry.has_collisions(key): + # handle collisions with unique hashed names + table_name = self.get_hashed_table_name(value.schema, value.json_path, value.stream_name, value.table_name) + resolved_keys.append(ConflictedNameMetadata(value.schema, value.json_path, value.table_name, table_name)) + else: + table_name = value.table_name + self.registry[self.get_registry_key(value.intermediate_schema, value.json_path, value.stream_name)] = ResolvedNameMetadata( + value.intermediate_schema, + table_name, + # use table_name as file_name for now + table_name, + ) + self.registry[self.get_registry_key(value.schema, value.json_path, value.stream_name)] = ResolvedNameMetadata( + value.schema, + table_name, + # use table_name as file_name for now + table_name, + ) + self.simple_file_registry.add(value.intermediate_schema, value.schema, value.json_path, value.stream_name, table_name) + registry_size = len(self.registry) + + # Oracle doesnt support namespace and this break this logic. + if self.destination_type != DestinationType.ORACLE: + assert (table_count * 2) == registry_size, f"Mismatched number of tables {table_count * 2} vs {registry_size} being resolved" + return resolved_keys + + def resolve_file_names(self): + # deal with file name collisions across schemas and update the file name to use in the registry when necessary + file_count = 0 + for key in self.simple_file_registry: + for value in self.simple_file_registry[key]: + file_count += 1 + if self.simple_file_registry.has_collisions(key): + # handle collisions with unique hashed names including schema + self.registry[ + self.get_registry_key(value.intermediate_schema, value.json_path, value.stream_name) + ] = ResolvedNameMetadata( + value.intermediate_schema, value.table_name, self.resolve_file_name(value.intermediate_schema, value.table_name) + ) + self.registry[self.get_registry_key(value.schema, value.json_path, value.stream_name)] = ResolvedNameMetadata( + value.schema, value.table_name, self.resolve_file_name(value.schema, value.table_name) + ) + registry_size = len(self.registry) + + # Oracle doesnt support namespace and this break this logic. + if self.destination_type != DestinationType.ORACLE: + assert (file_count * 2) == registry_size, f"Mismatched number of tables {file_count * 2} vs {registry_size} being resolved" + + def get_hashed_table_name(self, schema: str, json_path: List[str], stream_name: str, table_name: str) -> str: + """ + Generates a unique table name to avoid collisions within this catalog. + This is using a hash of full names but it is hard to use and remember, so this should be done rarely... + We'd prefer to use "simple" names instead as much as possible. + """ + if len(json_path) == 1: + # collisions on a top level stream name, add a hash of schema + stream name to the (truncated?) table name to make it unique + result = self.name_transformer.normalize_table_name(f"{stream_name}_{hash_json_path([schema] + json_path)}") + else: + # collisions on a nested sub-stream + result = self.name_transformer.normalize_table_name( + get_nested_hashed_table_name(self.name_transformer, schema, json_path, stream_name), False, False + ) + return result + + @staticmethod + def get_registry_key(schema: str, json_path: List[str], stream_name: str) -> str: + """ + Build the key string used to index the registry + """ + return ".".join([schema, "_".join(json_path), stream_name]).lower() + + def resolve_file_name(self, schema: str, table_name: str) -> str: + """ + We prefer to use file_name = table_name when possible... + + When a catalog has ambiguity, we have to fallback and use schema in the file name too + (which might increase a risk of truncate operation and thus collisions that we solve by adding a hash of the full names) + """ + if len(self.simple_file_registry[table_name]) == 1: + # no collisions on file naming + return table_name + else: + max_length = self.name_transformer.get_name_max_length() + # if schema . table fits into the destination, we use this naming convention + if len(schema) + len(table_name) + 1 < max_length: + return f"{schema}_{table_name}" + else: + # we have to make sure our filename is unique, use hash of full name + return self.name_transformer.normalize_table_name(f"{schema}_{table_name}_{hash_name(schema + table_name)}") + + def get_schema_name(self, schema: str, json_path: List[str], stream_name: str): + """ + Return the schema name from the registry that should be used for this combination of schema/json_path_to_substream + """ + key = self.get_registry_key(schema, json_path, stream_name) + if key in self.registry: + return self.name_transformer.normalize_schema_name(self.registry[key].schema, False, False) + else: + raise KeyError(f"Registry does not contain an entry for {schema} {json_path} {stream_name}") + + def get_table_name(self, schema: str, json_path: List[str], stream_name: str, suffix: str, truncate: bool = False): + """ + Return the table name from the registry that should be used for this combination of schema/json_path_to_substream + """ + key = self.get_registry_key(schema, json_path, stream_name) + if key in self.registry: + table_name = self.registry[key].table_name + else: + raise KeyError(f"Registry does not contain an entry for {schema} {json_path} {stream_name}") + + if suffix: + norm_suffix = suffix if not suffix or suffix.startswith("_") else f"_{suffix}" + else: + norm_suffix = "" + + conflict = False + conflict_solver = 0 + if stream_name in json_path: + conflict = True + conflict_solver = len(json_path) + + return self.name_transformer.normalize_table_name(f"{table_name}{norm_suffix}", False, truncate, conflict, conflict_solver) + + def get_file_name(self, schema: str, json_path: List[str], stream_name: str, suffix: str, truncate: bool = False): + """ + Return the file name from the registry that should be used for this combination of schema/json_path_to_substream + """ + key = self.get_registry_key(schema, json_path, stream_name) + if key in self.registry: + file_name = self.registry[key].file_name + else: + raise KeyError(f"Registry does not contain an entry for {schema} {json_path} {stream_name}") + if suffix: + norm_suffix = suffix if not suffix or suffix.startswith("_") else f"_{suffix}" + else: + norm_suffix = "" + + conflict = False + conflict_solver = 0 + if stream_name in json_path: + conflict = True + conflict_solver = len(json_path) + + return self.name_transformer.normalize_table_name(f"{file_name}{norm_suffix}", False, truncate, conflict, conflict_solver) + + def to_dict(self, apply_function=(lambda x: x)) -> Dict: + """ + Converts to a pure dict to serialize as json + """ + result = {} + for key in self.registry: + value = self.registry[key] + result[apply_function(key)] = { + apply_function("schema"): apply_function(value.schema), + apply_function("table"): apply_function(value.table_name), + apply_function("file"): apply_function(value.file_name), + } + return result + + +def hash_json_path(json_path: List[str]) -> str: + return hash_name("&airbyte&".join(json_path)) + + +def hash_name(input: str) -> str: + h = hashlib.sha1() + h.update(input.encode("utf-8").lower()) + return h.hexdigest()[:3] + + +def get_nested_hashed_table_name(name_transformer: DestinationNameTransformer, schema: str, json_path: List[str], child: str) -> str: + """ + In normalization code base, we often have to deal with naming for tables, combining informations from: + - parent table: to denote where a table is extracted from (in case of nesting) + - child table: in case of nesting, the field name or the original stream name + - extra suffix: normalization is done in multiple transformation steps, each may need to generate separate tables, + so we can add a suffix to distinguish the different transformation steps of a pipeline. + - json path: in terms of parent and nested field names in order to reach the table currently being built + + All these informations should be included (if possible) in the table naming for the user to (somehow) identify and + recognize what data is available there. + """ + parent = "_".join(json_path[:-1]) + max_length = name_transformer.get_name_max_length() + json_path_hash = hash_json_path([schema] + json_path) + norm_parent = parent if not parent else name_transformer.normalize_table_name(parent, False, False) + norm_child = name_transformer.normalize_table_name(child, False, False) + min_parent_length = min(MINIMUM_PARENT_LENGTH, len(norm_parent)) + + # no parent + if not parent: + raise RuntimeError("There is no nested table names without parents") + # if everything fits without truncation, don't truncate anything + elif (len(norm_parent) + len(json_path_hash) + len(norm_child) + 2) < max_length: + return f"{norm_parent}_{json_path_hash}_{norm_child}" + # if everything fits except for the parent, just truncate the parent (still guarantees parent is of length min_parent_length) + elif (min_parent_length + len(json_path_hash) + len(norm_child) + 2) < max_length: + max_parent_length = max_length - len(json_path_hash) - len(norm_child) - 2 + return f"{norm_parent[:max_parent_length]}_{json_path_hash}_{norm_child}" + # otherwise first truncate parent to the minimum length and middle truncate the child too + else: + norm_child_max_length = max_length - len(json_path_hash) - 2 - min_parent_length + trunc_norm_child = name_transformer.truncate_identifier_name(norm_child, norm_child_max_length) + return f"{norm_parent[:min_parent_length]}_{json_path_hash}_{trunc_norm_child}" diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/transform.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/transform.py new file mode 100644 index 0000000000000..b21acb69b2e39 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/transform.py @@ -0,0 +1,111 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import argparse +import os +from typing import Any, Dict + +import yaml +from normalization.destination_type import DestinationType +from normalization.transform_catalog.catalog_processor import CatalogProcessor + + +class TransformCatalog: + """ +To run this transformation: +``` +python3 main_dev_transform_catalog.py \ + --integration-type + --profile-config-dir . \ + --catalog integration_tests/catalog.json \ + --out dir \ + --json-column json_blob +``` + """ + + config: dict = {} + DBT_PROJECT = "dbt_project.yml" + + def __init__(self): + self.config = {} + + def run(self, args) -> None: + self.parse(args) + self.process_catalog() + + def parse(self, args) -> None: + parser = argparse.ArgumentParser(add_help=False) + parser.add_argument("--integration-type", type=str, required=True, help="type of integration dialect to use") + parser.add_argument("--profile-config-dir", type=str, required=True, help="path to directory containing DBT profiles.yml") + parser.add_argument("--catalog", nargs="+", type=str, required=True, help="path to Catalog (JSON Schema) file") + parser.add_argument("--out", type=str, required=True, help="path to output generated DBT Models to") + parser.add_argument("--json-column", type=str, required=False, help="name of the column containing the json blob") + parsed_args = parser.parse_args(args) + profiles_yml = read_profiles_yml(parsed_args.profile_config_dir) + self.config = { + "integration_type": parsed_args.integration_type, + "schema": extract_schema(profiles_yml), + "catalog": parsed_args.catalog, + "output_path": parsed_args.out, + "json_column": parsed_args.json_column, + "profile_config_dir": parsed_args.profile_config_dir, + } + + def process_catalog(self) -> None: + destination_type = DestinationType.from_string(self.config["integration_type"]) + schema = self.config["schema"] + output = self.config["output_path"] + json_col = self.config["json_column"] + processor = CatalogProcessor(output_directory=output, destination_type=destination_type) + for catalog_file in self.config["catalog"]: + print(f"Processing {catalog_file}...") + processor.process(catalog_file=catalog_file, json_column_name=json_col, default_schema=schema) + self.update_dbt_project_vars(json_column=self.config["json_column"], models_to_source=processor.models_to_source) + + def update_dbt_project_vars(self, **vars_config: Dict[str, Any]): + filename = os.path.join(self.config["profile_config_dir"], self.DBT_PROJECT) + config = read_yaml_config(filename) + config["vars"] = {**config.get("vars", {}), **vars_config} + write_yaml_config(config, filename) + + +def read_profiles_yml(profile_dir: str) -> Any: + with open(os.path.join(profile_dir, "profiles.yml"), "r") as file: + config = yaml.load(file, Loader=yaml.FullLoader) + obj = config["normalize"]["outputs"]["prod"] + return obj + + +def read_yaml_config(filename: str) -> Dict[str, Any]: + with open(filename, "r") as fp: + config = yaml.safe_load(fp) + if not isinstance(config, dict): + raise RuntimeError("{} does not parse to a dictionary".format(os.path.basename(filename))) + return config + + +def write_yaml_config(config: Dict[str, Any], filename: str): + with open(filename, "w") as fp: + fp.write(yaml.dump(config, sort_keys=False)) + + +def extract_schema(profiles_yml: Dict) -> str: + if "dataset" in profiles_yml: + return str(profiles_yml["dataset"]) + elif "schema" in profiles_yml: + return str(profiles_yml["schema"]) + else: + raise KeyError("No Dataset/Schema defined in profiles.yml") + + +def extract_path(profiles_yml: Dict) -> str: + if "path" in profiles_yml: + return str(profiles_yml["path"]) + else: + raise KeyError("No destination_path defined in profiles.yml") + + +def main(args=None): + TransformCatalog().run(args) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/utils.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/utils.py new file mode 100644 index 0000000000000..5a9b22788f028 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/utils.py @@ -0,0 +1,118 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +from typing import Set, Union + +from normalization.transform_catalog import dbt_macro + + +def jinja_call(command: Union[str, dbt_macro.Macro]) -> str: + return "{{ " + command + " }}" + + +def remove_jinja(command: str) -> str: + return str(command).replace("{{ ", "").replace(" }}", "") + + +def is_string(property_type) -> bool: + return property_type == "string" or "string" in property_type + + +def is_datetime(definition: dict) -> bool: + return ( + is_string(definition["type"]) + and ("format" in definition.keys()) + and (definition["format"] == "date-time" or "date-time" in definition["format"]) + ) + + +def is_datetime_without_timezone(definition: dict) -> bool: + return is_datetime(definition) and definition.get("airbyte_type") == "timestamp_without_timezone" + + +def is_datetime_with_timezone(definition: dict) -> bool: + return is_datetime(definition) and (not definition.get("airbyte_type") or definition.get("airbyte_type") == "timestamp_with_timezone") + + +def is_date(definition: dict) -> bool: + return ( + is_string(definition["type"]) + and ("format" in definition.keys()) + and (definition["format"] == "date" or "date" in definition["format"]) + ) + + +def is_time(definition: dict) -> bool: + return is_string(definition["type"]) and definition.get("format") == "time" + + +def is_time_with_timezone(definition: dict) -> bool: + return is_time(definition) and definition.get("airbyte_type") == "time_with_timezone" + + +def is_time_without_timezone(definition: dict) -> bool: + return is_time(definition) and definition.get("airbyte_type") == "time_without_timezone" + + +def is_number(property_type) -> bool: + if is_string(property_type): + # Handle union type, give priority to wider scope types + return False + return property_type == "number" or "number" in property_type + + +def is_big_integer(definition: dict) -> bool: + return "airbyte_type" in definition and definition["airbyte_type"] == "big_integer" + + +def is_long(property_type, definition: dict) -> bool: + # Check specifically for {type: number, airbyte_type: integer} + if ( + (property_type == "number" or "number" in property_type) + and "airbyte_type" in definition + and definition["airbyte_type"] == "integer" + ): + return True + if is_string(property_type) or is_number(property_type): + # Handle union type, give priority to wider scope types + return False + return property_type == "integer" or "integer" in property_type + + +def is_boolean(property_type, definition: dict) -> bool: + if is_string(property_type) or is_number(property_type) or is_big_integer(definition) or is_long(property_type, definition): + # Handle union type, give priority to wider scope types + return False + return property_type == "boolean" or "boolean" in property_type + + +def is_array(property_type) -> bool: + return property_type == "array" or "array" in property_type + + +def is_object(property_type) -> bool: + return property_type == "object" or "object" in property_type + + +def is_airbyte_column(name: str) -> bool: + return name.startswith("_airbyte_") + + +def is_simple_property(definition: dict) -> bool: + if "type" not in definition: + property_type = "object" + else: + property_type = definition["type"] + return ( + is_string(property_type) + or is_big_integer(definition) + or is_long(property_type, definition) + or is_number(property_type) + or is_boolean(property_type, definition) + ) + + +def is_combining_node(properties: dict) -> Set[str]: + return set(properties).intersection({"anyOf", "oneOf", "allOf"}) diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_config/__init__.py b/airbyte-integrations/bases/base-normalization/normalization/transform_config/__init__.py new file mode 100644 index 0000000000000..94c00f0d6dd56 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_config/__init__.py @@ -0,0 +1,3 @@ +from normalization.transform_config.transform import TransformConfig + +__all__ = ["TransformConfig"] diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_config/profile_base.yml b/airbyte-integrations/bases/base-normalization/normalization/transform_config/profile_base.yml new file mode 100644 index 0000000000000..bcb6af2fd8e2e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_config/profile_base.yml @@ -0,0 +1,14 @@ +# Top-level configs that apply to all profiles are set here +config: + partial_parse: true + printer_width: 120 + send_anonymous_usage_stats: false + use_colors: true + +normalize: + target: prod + outputs: + prod: +# type: "" +# database-specific configuration here... + diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_config/transform.py b/airbyte-integrations/bases/base-normalization/normalization/transform_config/transform.py new file mode 100644 index 0000000000000..7c14e02f64908 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_config/transform.py @@ -0,0 +1,395 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import argparse +import json +import os +import pkgutil +import socket +import subprocess +from typing import Any, Dict + +import yaml +from normalization.destination_type import DestinationType + + +class TransformConfig: + def run(self, args): + inputs = self.parse(args) + original_config = self.read_json_config(inputs["config"]) + integration_type = inputs["integration_type"] + transformed_config = self.transform(integration_type, original_config) + self.write_yaml_config(inputs["output_path"], transformed_config, "profiles.yml") + if self.is_ssh_tunnelling(original_config): + self.write_ssh_config(inputs["output_path"], original_config, transformed_config) + + @staticmethod + def parse(args): + parser = argparse.ArgumentParser(add_help=False) + parser.add_argument("--config", type=str, required=True, help="path to original config") + parser.add_argument( + "--integration-type", type=DestinationType, choices=list(DestinationType), required=True, help="type of integration" + ) + parser.add_argument("--out", type=str, required=True, help="path to output transformed config to") + + parsed_args = parser.parse_args(args) + print(str(parsed_args)) + + return { + "config": parsed_args.config, + "integration_type": parsed_args.integration_type, + "output_path": parsed_args.out, + } + + def transform(self, integration_type: DestinationType, config: Dict[str, Any]): + data = pkgutil.get_data(self.__class__.__module__.split(".")[0], "transform_config/profile_base.yml") + if not data: + raise FileExistsError("Failed to load profile_base.yml") + base_profile = yaml.load(data, Loader=yaml.FullLoader) + + transformed_integration_config = { + DestinationType.BIGQUERY.value: self.transform_bigquery, + DestinationType.POSTGRES.value: self.transform_postgres, + DestinationType.REDSHIFT.value: self.transform_redshift, + DestinationType.SNOWFLAKE.value: self.transform_snowflake, + DestinationType.MYSQL.value: self.transform_mysql, + DestinationType.ORACLE.value: self.transform_oracle, + DestinationType.MSSQL.value: self.transform_mssql, + DestinationType.CLICKHOUSE.value: self.transform_clickhouse, + DestinationType.TIDB.value: self.transform_tidb, + DestinationType.DUCKDB.value: self.transform_duckdb, + }[integration_type.value](config) + + # merge pre-populated base_profile with destination-specific configuration. + base_profile["normalize"]["outputs"]["prod"] = transformed_integration_config + + return base_profile + + @staticmethod + def create_file(name, content): + f = open(name, "x") + f.write(content) + f.close() + return os.path.abspath(f.name) + + @staticmethod + def is_ssh_tunnelling(config: Dict[str, Any]) -> bool: + tunnel_methods = ["SSH_KEY_AUTH", "SSH_PASSWORD_AUTH"] + if ( + "tunnel_method" in config.keys() + and "tunnel_method" in config["tunnel_method"] + and config["tunnel_method"]["tunnel_method"].upper() in tunnel_methods + ): + return True + else: + return False + + @staticmethod + def is_port_free(port: int) -> bool: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + try: + s.bind(("localhost", port)) + except Exception as e: + print(f"port {port} unsuitable: {e}") + return False + else: + print(f"port {port} is free") + return True + + @staticmethod + def pick_a_port() -> int: + """ + This function finds a free port, starting with 50001 and adding 1 until we find an open port. + """ + port_to_check = 50001 # just past start of dynamic port range (49152:65535) + while not TransformConfig.is_port_free(port_to_check): + port_to_check += 1 + # error if we somehow hit end of port range + if port_to_check > 65535: + raise RuntimeError("Couldn't find a free port to use.") + return port_to_check + + @staticmethod + def get_ssh_altered_config(config: Dict[str, Any], port_key: str = "port", host_key: str = "host") -> Dict[str, Any]: + """ + This should be called only if ssh tunneling is on. + It will return config with appropriately altered port and host values + """ + # make a copy of config rather than mutate in place + ssh_ready_config = {k: v for k, v in config.items()} + ssh_ready_config[port_key] = TransformConfig.pick_a_port() + ssh_ready_config[host_key] = "localhost" + return ssh_ready_config + + @staticmethod + def transform_bigquery(config: Dict[str, Any]): + print("transform_bigquery") + # https://docs.getdbt.com/reference/warehouse-profiles/bigquery-profile + + project_id = config["project_id"] + dataset_id = config["dataset_id"] + + if ":" in config["dataset_id"]: + splits = config["dataset_id"].split(":") + if len(splits) > 2: + raise ValueError("Invalid format for dataset ID (expected at most one colon)") + project_id, dataset_id = splits + if project_id != config["project_id"]: + raise ValueError( + f"Project ID in dataset ID did not match explicitly-provided project ID: {project_id} and {config['project_id']}" + ) + + dbt_config = { + "type": "bigquery", + "project": project_id, + "dataset": dataset_id, + "priority": config.get("transformation_priority", "interactive"), + "threads": 8, + "retries": 3, + } + if "credentials_json" in config: + dbt_config["method"] = "service-account-json" + dbt_config["keyfile_json"] = json.loads(config["credentials_json"]) + else: + dbt_config["method"] = "oauth" + if "dataset_location" in config: + dbt_config["location"] = config["dataset_location"] + return dbt_config + + @staticmethod + def transform_postgres(config: Dict[str, Any]): + print("transform_postgres") + + if TransformConfig.is_ssh_tunnelling(config): + config = TransformConfig.get_ssh_altered_config(config, port_key="port", host_key="host") + + # https://docs.getdbt.com/reference/warehouse-profiles/postgres-profile + dbt_config = { + "type": "postgres", + "host": config["host"], + "user": config["username"], + "pass": config.get("password", ""), + "port": config["port"], + "dbname": config["database"], + "schema": config["schema"], + "threads": 8, + } + + ssl = config.get("ssl") + if ssl: + ssl_mode = config.get("ssl_mode", {"mode": "allow"}) + dbt_config["sslmode"] = ssl_mode.get("mode") + if ssl_mode["mode"] == "verify-ca": + TransformConfig.create_file("ca.crt", ssl_mode["ca_certificate"]) + dbt_config["sslrootcert"] = "ca.crt" + elif ssl_mode["mode"] == "verify-full": + dbt_config["sslrootcert"] = TransformConfig.create_file("ca.crt", ssl_mode["ca_certificate"]) + dbt_config["sslcert"] = TransformConfig.create_file("client.crt", ssl_mode["client_certificate"]) + client_key = TransformConfig.create_file("client.key", ssl_mode["client_key"]) + subprocess.call("openssl pkcs8 -topk8 -inform PEM -in client.key -outform DER -out client.pk8 -nocrypt", shell=True) + dbt_config["sslkey"] = client_key.replace("client.key", "client.pk8") + + return dbt_config + + @staticmethod + def transform_redshift(config: Dict[str, Any]): + print("transform_redshift") + # https://docs.getdbt.com/reference/warehouse-profiles/redshift-profile + dbt_config = { + "type": "redshift", + "host": config["host"], + "user": config["username"], + "pass": config["password"], + "port": config["port"], + "dbname": config["database"], + "schema": config["schema"], + "threads": 4, + } + return dbt_config + + @staticmethod + def transform_snowflake(config: Dict[str, Any]): + print("transform_snowflake") + # here account is everything before ".snowflakecomputing.com" as it can include account, region & cloud environment information) + account = config["host"].replace(".snowflakecomputing.com", "").replace("http://", "").replace("https://", "") + # https://docs.getdbt.com/reference/warehouse-profiles/snowflake-profile + # snowflake coerces most of these values to uppercase, but if dbt has them as a different casing it has trouble finding the resources it needs. thus we coerce them to upper. + dbt_config = { + "type": "snowflake", + "account": account, + "user": config["username"].upper(), + "role": config["role"].upper(), + "database": config["database"].upper(), + "warehouse": config["warehouse"].upper(), + "schema": config["schema"].upper(), + "threads": 5, + "client_session_keep_alive": False, + "query_tag": "normalization", + "retry_all": True, + "retry_on_database_errors": True, + "connect_retries": 3, + "connect_timeout": 15, + } + + credentials = config.get("credentials", {}) + if credentials.get("auth_type") == "OAuth2.0": + dbt_config["authenticator"] = "oauth" + dbt_config["oauth_client_id"] = credentials["client_id"] + dbt_config["oauth_client_secret"] = credentials["client_secret"] + dbt_config["token"] = credentials["refresh_token"] + elif credentials.get("private_key"): + with open("private_key_path.txt", "w") as f: + f.write(credentials["private_key"]) + dbt_config["private_key_path"] = "private_key_path.txt" + if credentials.get("private_key_password"): + dbt_config["private_key_passphrase"] = credentials["private_key_password"] + elif credentials.get("password"): + dbt_config["password"] = credentials["password"] + else: + dbt_config["password"] = config["password"] + return dbt_config + + @staticmethod + def transform_mysql(config: Dict[str, Any]): + print("transform_mysql") + + if TransformConfig.is_ssh_tunnelling(config): + config = TransformConfig.get_ssh_altered_config(config, port_key="port", host_key="host") + + # https://github.com/dbeatty10/dbt-mysql#configuring-your-profile + dbt_config = { + # MySQL 8.x - type: mysql + # MySQL 5.x - type: mysql5 + "type": config.get("type", "mysql"), + "server": config["host"], + "port": config["port"], + # DBT schema is equivalent to MySQL database + "schema": config["database"], + "database": config["database"], + "username": config["username"], + "password": config.get("password", ""), + } + return dbt_config + + @staticmethod + def transform_oracle(config: Dict[str, Any]): + print("transform_oracle") + # https://github.com/techindicium/dbt-oracle#configure-your-profile + dbt_config = { + "type": "oracle", + "host": config["host"], + "user": config["username"], + "pass": config["password"], + "port": config["port"], + "dbname": config["sid"], + "schema": config["schema"], + "threads": 4, + } + return dbt_config + + @staticmethod + def transform_mssql(config: Dict[str, Any]): + print("transform_mssql") + # https://docs.getdbt.com/reference/warehouse-profiles/mssql-profile + + if TransformConfig.is_ssh_tunnelling(config): + config = TransformConfig.get_ssh_altered_config(config, port_key="port", host_key="host") + config["host"] = "127.0.0.1" # localhost is not supported by dbt-sqlserver. + + dbt_config = { + "type": "sqlserver", + "driver": "ODBC Driver 17 for SQL Server", + "server": config["host"], + "port": config["port"], + "schema": config["schema"], + "database": config["database"], + "user": config["username"], + "password": config["password"], + "threads": 8, + # "authentication": "sql", + # "trusted_connection": True, + } + return dbt_config + + @staticmethod + def transform_clickhouse(config: Dict[str, Any]): + print("transform_clickhouse") + # https://docs.getdbt.com/reference/warehouse-profiles/clickhouse-profile + dbt_config = { + "type": "clickhouse", + "driver": "http", + "verify": False, + "host": config["host"], + "port": config["port"], + "schema": config["database"], + "user": config["username"], + } + if "password" in config: + dbt_config["password"] = config["password"] + + # ssl is an optional configuration and is not present in strict-encrypt config + # if ssl option is not present in the config - default to True + dbt_config["secure"] = config.get("ssl", True) + + return dbt_config + + @staticmethod + def transform_tidb(config: Dict[str, Any]): + print("transform_tidb") + # https://github.com/pingcap/dbt-tidb#profile-configuration + dbt_config = { + "type": "tidb", + "server": config["host"], + "port": config["port"], + "schema": config["database"], + "database": config["database"], + "username": config["username"], + "password": config.get("password", ""), + } + return dbt_config + + @staticmethod + def transform_duckdb(config: Dict[str, Any]): + print("transform_duckdb") + dbt_config = { + "type": "duckdb", + "path": config["destination_path"], + "schema": config["schema"] if "schema" in config else "main", + } + return dbt_config + + @staticmethod + def read_json_config(input_path: str): + with open(input_path, "r") as file: + contents = file.read() + return json.loads(contents) + + @staticmethod + def write_yaml_config(output_path: str, config: Dict[str, Any], filename: str): + if not os.path.exists(output_path): + os.makedirs(output_path) + with open(os.path.join(output_path, filename), "w") as fh: + fh.write(yaml.dump(config)) + + @staticmethod + def write_ssh_config(output_path: str, original_config: Dict[str, Any], transformed_config: Dict[str, Any]): + """ + This function writes a json file with config specific to ssh. + We do this because we need these details to open the ssh tunnel for dbt. + """ + ssh_dict = { + "db_host": original_config["host"], + "db_port": original_config["port"], + "tunnel_map": original_config["tunnel_method"], + "local_port": transformed_config["normalize"]["outputs"]["prod"]["port"], + } + if not os.path.exists(output_path): + os.makedirs(output_path) + with open(os.path.join(output_path, "ssh.json"), "w") as fh: + json.dump(ssh_dict, fh) + + +def main(args=None): + TransformConfig().run(args) diff --git a/airbyte-integrations/bases/base-normalization/oracle.Dockerfile b/airbyte-integrations/bases/base-normalization/oracle.Dockerfile new file mode 100644 index 0000000000000..6041ea3cf1c50 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/oracle.Dockerfile @@ -0,0 +1,62 @@ +# As of today, dbt-oracle doesn't support 1.0.0 +# IF YOU UPGRADE DBT, make sure to also edit these files: +# 1. Remove the "normalization-oracle" entry here https://github.com/airbytehq/airbyte/pull/11267/files#diff-9a3bcae8cb5c56aa30c00548e06eade6ad771f3d4f098f6867ae9a183049dfd8R404 +# 2. Check if mysql.Dockerfile is on DBT 1.0.0 yet; if it is, then revert this entire edit https://github.com/airbytehq/airbyte/pull/11267/files#diff-8880e85b2b5690accc6f15f9292a8589a6eb83564803d57c4ee74e2ee8ede09eR117-R130 +FROM fishtownanalytics/dbt:0.19.1 + +USER root +WORKDIR /tmp +RUN apt-get update && apt-get install -y \ + wget \ + unzip \ + libaio-dev \ + libaio1 +RUN mkdir -p /opt/oracle +RUN wget https://download.oracle.com/otn_software/linux/instantclient/19600/instantclient-basic-linux.x64-19.6.0.0.0dbru.zip +RUN unzip instantclient-basic-linux.x64-19.6.0.0.0dbru.zip -d /opt/oracle +ENV ORACLE_HOME /opt/oracle/instantclient_19_6 +ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ORACLE_HOME +ENV TNS_ADMIN /opt/oracle/instantclient_19_6/network/admin +RUN pip install cx_Oracle + +COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte + +RUN apt-get update && apt-get install -y jq sshpass + +WORKDIR /airbyte +COPY entrypoint.sh . +COPY build/sshtunneling.sh . + +WORKDIR /airbyte/normalization_code +COPY normalization ./normalization +COPY setup.py . +COPY dbt-project-template/ ./dbt-template/ +COPY dbt-project-template-oracle/* ./dbt-template/ + +WORKDIR /airbyte/base_python_structs + +# workaround for https://github.com/yaml/pyyaml/issues/601 +# this should be fixed in the airbyte/base-airbyte-protocol-python image +RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation + +RUN pip install . + +WORKDIR /airbyte/normalization_code +RUN pip install . +# based of https://github.com/techindicium/dbt-oracle/tree/fa9718809840ee73e6072f483233f5150cc9986c +RUN pip install dbt-oracle==0.4.3 + +WORKDIR /airbyte/normalization_code/dbt-template/ + +# Pin MarkupSafe to 2.0.1 per this issue for dbt +# https://github.com/dbt-labs/dbt-core/issues/4745#issuecomment-1044165591 +RUN pip install --force-reinstall MarkupSafe==2.0.1 + +# Download external dbt dependencies +RUN dbt deps + +WORKDIR /airbyte +ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" +ENTRYPOINT ["/airbyte/entrypoint.sh"] + +LABEL io.airbyte.name=airbyte/normalization-oracle diff --git a/airbyte-integrations/bases/base-normalization/redshift.Dockerfile b/airbyte-integrations/bases/base-normalization/redshift.Dockerfile new file mode 100644 index 0000000000000..9b8124ebe9ed2 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/redshift.Dockerfile @@ -0,0 +1,37 @@ +FROM fishtownanalytics/dbt:1.0.0 +COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte + +# Install SSH Tunneling dependencies +RUN apt-get update && apt-get install -y jq sshpass + +WORKDIR /airbyte +COPY entrypoint.sh . +COPY build/sshtunneling.sh . + +WORKDIR /airbyte/normalization_code +COPY normalization ./normalization +COPY setup.py . +COPY dbt-project-template/ ./dbt-template/ +COPY dbt-project-template-redshift/* ./dbt-template/ + +# Install python dependencies +WORKDIR /airbyte/base_python_structs + +# workaround for https://github.com/yaml/pyyaml/issues/601 +# this should be fixed in the airbyte/base-airbyte-protocol-python image +RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation + +RUN pip install . + +WORKDIR /airbyte/normalization_code +RUN pip install . + +WORKDIR /airbyte/normalization_code/dbt-template/ +# Download external dbt dependencies +RUN dbt deps + +WORKDIR /airbyte +ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" +ENTRYPOINT ["/airbyte/entrypoint.sh"] + +LABEL io.airbyte.name=airbyte/normalization-redshift diff --git a/airbyte-integrations/bases/base-normalization/requirements.txt b/airbyte-integrations/bases/base-normalization/requirements.txt new file mode 100644 index 0000000000000..d6e1198b1ab1f --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/requirements.txt @@ -0,0 +1 @@ +-e . diff --git a/airbyte-integrations/bases/base-normalization/setup.cfg b/airbyte-integrations/bases/base-normalization/setup.cfg new file mode 100644 index 0000000000000..a7f638916e98b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/setup.cfg @@ -0,0 +1,2 @@ +[aliases] +test='pytest' diff --git a/airbyte-integrations/bases/base-normalization/setup.py b/airbyte-integrations/bases/base-normalization/setup.py new file mode 100644 index 0000000000000..cf58f57434d83 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/setup.py @@ -0,0 +1,28 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +# Note: cattrs is pinned to the last known working version which does not have conflicts with typing_extensions. Learn more https://airbytehq-team.slack.com/archives/C03C4AVJWG4/p1685546430990049 + +import setuptools + +setuptools.setup( + name="normalization", + description="Normalizes data in the destination.", + author="Airbyte", + author_email="contact@airbyte.io", + url="https://github.com/airbytehq/airbyte", + packages=setuptools.find_packages(), + install_requires=["airbyte-cdk", "pyyaml", "jinja2", "types-PyYAML", "cattrs==22.2.0"], + package_data={"": ["*.yml"]}, + setup_requires=["pytest-runner"], + entry_points={ + "console_scripts": [ + "transform-config=normalization.transform_config.transform:main", + "transform-catalog=normalization.transform_catalog.transform:main", + ], + }, + extras_require={ + "tests": ["airbyte-cdk", "pyyaml", "pytest", "mypy", "types-PyYAML"], + }, +) diff --git a/airbyte-integrations/bases/base-normalization/setup/snowflake.md b/airbyte-integrations/bases/base-normalization/setup/snowflake.md new file mode 100644 index 0000000000000..b536c67950beb --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/setup/snowflake.md @@ -0,0 +1,34 @@ +# Snowflake Setup + +## Setting up an integration user + +Here is the SQL to make an integration environment in Snowflake for this source via an ACCOUNTADMIN. Be sure to give a real password. + +```sql +CREATE WAREHOUSE INTEGRATION_TEST_WAREHOUSE_NORMALIZATION WITH WAREHOUSE_SIZE = 'XSMALL' WAREHOUSE_TYPE = 'STANDARD' AUTO_SUSPEND = 600 AUTO_RESUME = TRUE; + +CREATE DATABASE INTEGRATION_TEST_NORMALIZATION; + +CREATE ROLE INTEGRATION_TESTER_NORMALIZATION; + +GRANT ALL PRIVILEGES ON WAREHOUSE INTEGRATION_TEST_WAREHOUSE_NORMALIZATION TO ROLE INTEGRATION_TESTER_NORMALIZATION; +GRANT ALL PRIVILEGES ON DATABASE INTEGRATION_TEST_NORMALIZATION TO ROLE INTEGRATION_TESTER_NORMALIZATION; +GRANT ALL PRIVILEGES ON FUTURE SCHEMAS IN DATABASE INTEGRATION_TEST_NORMALIZATION TO ROLE INTEGRATION_TESTER_NORMALIZATION; +GRANT ALL PRIVILEGES ON FUTURE TABLES IN DATABASE INTEGRATION_TEST_NORMALIZATION TO ROLE INTEGRATION_TESTER_NORMALIZATION; + +# Add real password here and remove this comment +CREATE USER INTEGRATION_TEST_USER_NORMALIZATION PASSWORD='test' DEFAULT_ROLE=INTEGRATION_TESTER_NORMALIZATION DEFAULT_WAREHOUSE=INTEGRATION_TEST_WAREHOUSE_NORMALIZATION MUST_CHANGE_PASSWORD=false; + +GRANT ROLE INTEGRATION_TESTER_NORMALIZATION TO USER INTEGRATION_TEST_USER_NORMALIZATION; + +CREATE SCHEMA INTEGRATION_TEST_NORMALIZATION.TEST_SCHEMA; +``` + +If you ever need to start over, use this: + +```sql +DROP DATABASE IF EXISTS INTEGRATION_TEST_NORMALIZATION; +DROP USER IF EXISTS INTEGRATION_TEST_USER_NORMALIZATION; +DROP ROLE IF EXISTS INTEGRATION_TESTER_NORMALIZATION; +DROP WAREHOUSE IF EXISTS INTEGRATION_TEST_WAREHOUSE_NORMALIZATION; +``` diff --git a/airbyte-integrations/bases/base-normalization/snowflake.Dockerfile b/airbyte-integrations/bases/base-normalization/snowflake.Dockerfile new file mode 100644 index 0000000000000..41d74e50621a4 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/snowflake.Dockerfile @@ -0,0 +1,38 @@ +FROM fishtownanalytics/dbt:1.0.0 +COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte + +# Install SSH Tunneling dependencies +RUN apt-get update && apt-get install -y jq sshpass + +WORKDIR /airbyte +COPY entrypoint.sh . +COPY build/sshtunneling.sh . + +WORKDIR /airbyte/normalization_code +COPY normalization ./normalization +COPY setup.py . +COPY dbt-project-template/ ./dbt-template/ +COPY dbt-project-template-snowflake/* ./dbt-template/ + +# Install python dependencies +WORKDIR /airbyte/base_python_structs + +# workaround for https://github.com/yaml/pyyaml/issues/601 +# this should be fixed in the airbyte/base-airbyte-protocol-python image +RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation + +RUN pip install . + +WORKDIR /airbyte/normalization_code +RUN pip install . + +WORKDIR /airbyte/normalization_code/dbt-template/ +# Download external dbt dependencies +RUN dbt deps + +WORKDIR /airbyte +ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" +ENTRYPOINT ["/airbyte/entrypoint.sh"] + +LABEL io.airbyte.version=0.2.5 +LABEL io.airbyte.name=airbyte/normalization-snowflake diff --git a/airbyte-integrations/bases/base-normalization/tidb.Dockerfile b/airbyte-integrations/bases/base-normalization/tidb.Dockerfile new file mode 100644 index 0000000000000..a749f88a66d8c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/tidb.Dockerfile @@ -0,0 +1,37 @@ +FROM fishtownanalytics/dbt:1.0.0 +COPY --from=airbyte/base-airbyte-protocol-python:0.1.1 /airbyte /airbyte + +# Install SSH Tunneling dependencies +RUN apt-get update && apt-get install -y jq sshpass + +WORKDIR /airbyte +COPY entrypoint.sh . +COPY build/sshtunneling.sh . + +WORKDIR /airbyte/normalization_code +COPY normalization ./normalization +COPY setup.py . +COPY dbt-project-template/ ./dbt-template/ + +# Install python dependencies +WORKDIR /airbyte/base_python_structs + +# workaround for https://github.com/yaml/pyyaml/issues/601 +# this should be fixed in the airbyte/base-airbyte-protocol-python image +RUN pip install "Cython<3.0" "pyyaml==5.4" --no-build-isolation + +RUN pip install . + +WORKDIR /airbyte/normalization_code +RUN pip install . +RUN pip install dbt-tidb==1.0.1 + +WORKDIR /airbyte/normalization_code/dbt-template/ +# Download external dbt dependencies +RUN dbt deps + +WORKDIR /airbyte +ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh" +ENTRYPOINT ["/airbyte/entrypoint.sh"] + +LABEL io.airbyte.name=airbyte/normalization-tidb diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/private_key_path.txt b/airbyte-integrations/bases/base-normalization/unit_tests/private_key_path.txt new file mode 100644 index 0000000000000..8b98a34afc485 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/private_key_path.txt @@ -0,0 +1 @@ +AIRBYTE_PRIVATE_KEY \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog.json new file mode 100644 index 0000000000000..7ffa2f36d4421 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog.json @@ -0,0 +1,62 @@ +{ + "streams": [ + { + "stream": { + "name": "postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + } + } + }, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "cursor_field": [], + "destination_sync_mode": "append" + }, + { + "stream": { + "name": "postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine", + "namespace": "another", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + } + } + }, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "cursor_field": [], + "destination_sync_mode": "append" + }, + { + "stream": { + "name": "postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + } + } + }, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "cursor_field": [], + "destination_sync_mode": "append" + } + ] +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_clickhouse_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_clickhouse_names.json new file mode 100644 index 0000000000000..84f4fa7a50eb6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_clickhouse_names.json @@ -0,0 +1,32 @@ +{ + "_airbyte_another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_ch__destinations_are_fine", + "schema": "_airbyte_another", + "table": "postgres_has_a_64_ch__destinations_are_fine" + }, + "_airbyte_schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_ch__inations_are_fine_d2b", + "schema": "_airbyte_schema_test", + "table": "postgres_has_a_64_ch__inations_are_fine_d2b" + }, + "_airbyte_schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_ch__inations_are_fine_e5a", + "schema": "_airbyte_schema_test", + "table": "postgres_has_a_64_ch__inations_are_fine_e5a" + }, + "another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_ch__destinations_are_fine", + "schema": "another", + "table": "postgres_has_a_64_ch__destinations_are_fine" + }, + "schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_ch__inations_are_fine_d2b", + "schema": "schema_test", + "table": "postgres_has_a_64_ch__inations_are_fine_d2b" + }, + "schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_ch__inations_are_fine_e5a", + "schema": "schema_test", + "table": "postgres_has_a_64_ch__inations_are_fine_e5a" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_duckdb_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_duckdb_names.json new file mode 100644 index 0000000000000..160fc5b70b759 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_duckdb_names.json @@ -0,0 +1,32 @@ +{ + "_airbyte_another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_cha__destinations_are_fine", + "schema": "_airbyte_another", + "table": "postgres_has_a_64_cha__destinations_are_fine" + }, + "_airbyte_schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_cha__inations_are_fine_d2b", + "schema": "_airbyte_schema_test", + "table": "postgres_has_a_64_cha__inations_are_fine_d2b" + }, + "_airbyte_schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_cha__inations_are_fine_e5a", + "schema": "_airbyte_schema_test", + "table": "postgres_has_a_64_cha__inations_are_fine_e5a" + }, + "another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_cha__destinations_are_fine", + "schema": "another", + "table": "postgres_has_a_64_cha__destinations_are_fine" + }, + "schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_cha__inations_are_fine_d2b", + "schema": "schema_test", + "table": "postgres_has_a_64_cha__inations_are_fine_d2b" + }, + "schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_cha__inations_are_fine_e5a", + "schema": "schema_test", + "table": "postgres_has_a_64_cha__inations_are_fine_e5a" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_mssql_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_mssql_names.json new file mode 100644 index 0000000000000..160fc5b70b759 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_mssql_names.json @@ -0,0 +1,32 @@ +{ + "_airbyte_another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_cha__destinations_are_fine", + "schema": "_airbyte_another", + "table": "postgres_has_a_64_cha__destinations_are_fine" + }, + "_airbyte_schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_cha__inations_are_fine_d2b", + "schema": "_airbyte_schema_test", + "table": "postgres_has_a_64_cha__inations_are_fine_d2b" + }, + "_airbyte_schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_cha__inations_are_fine_e5a", + "schema": "_airbyte_schema_test", + "table": "postgres_has_a_64_cha__inations_are_fine_e5a" + }, + "another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_cha__destinations_are_fine", + "schema": "another", + "table": "postgres_has_a_64_cha__destinations_are_fine" + }, + "schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_cha__inations_are_fine_d2b", + "schema": "schema_test", + "table": "postgres_has_a_64_cha__inations_are_fine_d2b" + }, + "schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_cha__inations_are_fine_e5a", + "schema": "schema_test", + "table": "postgres_has_a_64_cha__inations_are_fine_e5a" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_mysql_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_mysql_names.json new file mode 100644 index 0000000000000..160fc5b70b759 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_mysql_names.json @@ -0,0 +1,32 @@ +{ + "_airbyte_another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_cha__destinations_are_fine", + "schema": "_airbyte_another", + "table": "postgres_has_a_64_cha__destinations_are_fine" + }, + "_airbyte_schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_cha__inations_are_fine_d2b", + "schema": "_airbyte_schema_test", + "table": "postgres_has_a_64_cha__inations_are_fine_d2b" + }, + "_airbyte_schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_cha__inations_are_fine_e5a", + "schema": "_airbyte_schema_test", + "table": "postgres_has_a_64_cha__inations_are_fine_e5a" + }, + "another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_cha__destinations_are_fine", + "schema": "another", + "table": "postgres_has_a_64_cha__destinations_are_fine" + }, + "schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_cha__inations_are_fine_d2b", + "schema": "schema_test", + "table": "postgres_has_a_64_cha__inations_are_fine_d2b" + }, + "schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_cha__inations_are_fine_e5a", + "schema": "schema_test", + "table": "postgres_has_a_64_cha__inations_are_fine_e5a" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_names.json new file mode 100644 index 0000000000000..760c94d2803d6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_names.json @@ -0,0 +1,32 @@ +{ + "_airbyte_another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "_airbyte_another_postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine", + "schema": "_airbyte_another", + "table": "postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine" + }, + "_airbyte_schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine", + "schema": "_airbyte_schema_test", + "table": "postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine" + }, + "_airbyte_schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "_airbyte_schema_test_postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine", + "schema": "_airbyte_schema_test", + "table": "postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine" + }, + "another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "another_postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine", + "schema": "another", + "table": "postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine" + }, + "schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine", + "schema": "schema_test", + "table": "postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine" + }, + "schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "schema_test_postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine", + "schema": "schema_test", + "table": "postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_oracle_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_oracle_names.json new file mode 100644 index 0000000000000..08417424a6c5b --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_oracle_names.json @@ -0,0 +1,17 @@ +{ + "another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "another_postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine", + "schema": "another", + "table": "postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine" + }, + "schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine", + "schema": "schema_test", + "table": "postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine" + }, + "schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "schema_test_postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine", + "schema": "schema_test", + "table": "postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_postgres_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_postgres_names.json new file mode 100644 index 0000000000000..84f4fa7a50eb6 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_postgres_names.json @@ -0,0 +1,32 @@ +{ + "_airbyte_another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_ch__destinations_are_fine", + "schema": "_airbyte_another", + "table": "postgres_has_a_64_ch__destinations_are_fine" + }, + "_airbyte_schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_ch__inations_are_fine_d2b", + "schema": "_airbyte_schema_test", + "table": "postgres_has_a_64_ch__inations_are_fine_d2b" + }, + "_airbyte_schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_ch__inations_are_fine_e5a", + "schema": "_airbyte_schema_test", + "table": "postgres_has_a_64_ch__inations_are_fine_e5a" + }, + "another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_ch__destinations_are_fine", + "schema": "another", + "table": "postgres_has_a_64_ch__destinations_are_fine" + }, + "schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_ch__inations_are_fine_d2b", + "schema": "schema_test", + "table": "postgres_has_a_64_ch__inations_are_fine_d2b" + }, + "schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_ch__inations_are_fine_e5a", + "schema": "schema_test", + "table": "postgres_has_a_64_ch__inations_are_fine_e5a" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_tidb_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_tidb_names.json new file mode 100644 index 0000000000000..160fc5b70b759 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/long_name_truncate_collisions_catalog_expected_tidb_names.json @@ -0,0 +1,32 @@ +{ + "_airbyte_another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_cha__destinations_are_fine", + "schema": "_airbyte_another", + "table": "postgres_has_a_64_cha__destinations_are_fine" + }, + "_airbyte_schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_cha__inations_are_fine_d2b", + "schema": "_airbyte_schema_test", + "table": "postgres_has_a_64_cha__inations_are_fine_d2b" + }, + "_airbyte_schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_cha__inations_are_fine_e5a", + "schema": "_airbyte_schema_test", + "table": "postgres_has_a_64_cha__inations_are_fine_e5a" + }, + "another.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_cha__destinations_are_fine", + "schema": "another", + "table": "postgres_has_a_64_cha__destinations_are_fine" + }, + "schema_test.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_and_not_more_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_cha__inations_are_fine_d2b", + "schema": "schema_test", + "table": "postgres_has_a_64_cha__inations_are_fine_d2b" + }, + "schema_test.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine.postgres_has_a_64_characters_limit_to_table_names_but_other_destinations_are_fine": { + "file": "postgres_has_a_64_cha__inations_are_fine_e5a", + "schema": "schema_test", + "table": "postgres_has_a_64_cha__inations_are_fine_e5a" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog.json new file mode 100644 index 0000000000000..94e6b4a798d9a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog.json @@ -0,0 +1,425 @@ +{ + "streams": [ + { + "stream": { + "name": "adcreatives", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "body": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "title": { + "type": ["null", "string"] + }, + "status": { + "type": ["null", "string"] + }, + "adlabels": { + "type": ["null", "array"], + "items": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "created_time": { + "type": "string", + "format": "date-time" + }, + "updated_time": { + "type": "string", + "format": "date-time" + } + } + } + }, + "link_url": { + "type": ["null", "string"] + }, + "image_crops": { + "type": ["null", "object"], + "properties": { + "100x72": { + "type": ["null", "array"], + "items": { + "type": ["null", "array"], + "items": { + "type": ["null", "integer"] + } + } + }, + "90x160": { + "type": ["null", "array"], + "items": { + "type": ["null", "array"], + "items": { + "type": ["null", "integer"] + } + } + }, + "100x100": { + "type": ["null", "array"], + "items": { + "type": ["null", "array"], + "items": { + "type": ["null", "integer"] + } + } + }, + "191x100": { + "type": ["null", "array"], + "items": { + "type": ["null", "array"], + "items": { + "type": ["null", "integer"] + } + } + }, + "400x150": { + "type": ["null", "array"], + "items": { + "type": ["null", "array"], + "items": { + "type": ["null", "integer"] + } + } + }, + "400x500": { + "type": ["null", "array"], + "items": { + "type": ["null", "array"], + "items": { + "type": ["null", "integer"] + } + } + }, + "600x360": { + "type": ["null", "array"], + "items": { + "type": ["null", "array"], + "items": { + "type": ["null", "integer"] + } + } + } + } + }, + "object_story_spec": { + "type": ["null", "object"], + "properties": { + "page_id": { + "type": ["null", "string"] + }, + "link_data": { + "type": ["null", "object"], + "properties": { + "link": { + "type": ["null", "string"] + }, + "name": { + "type": ["null", "string"] + }, + "caption": { + "type": ["null", "string"] + }, + "message": { + "type": ["null", "string"] + }, + "image_crops": { + "type": ["null", "object"], + "properties": { + "100x72": { + "type": ["null", "array"], + "items": { + "type": ["null", "array"], + "items": { + "type": ["null", "integer"] + } + } + }, + "90x160": { + "type": ["null", "array"], + "items": { + "type": ["null", "array"], + "items": { + "type": ["null", "integer"] + } + } + }, + "100x100": { + "type": ["null", "array"], + "items": { + "type": ["null", "array"], + "items": { + "type": ["null", "integer"] + } + } + }, + "191x100": { + "type": ["null", "array"], + "items": { + "type": ["null", "array"], + "items": { + "type": ["null", "integer"] + } + } + }, + "400x150": { + "type": ["null", "array"], + "items": { + "type": ["null", "array"], + "items": { + "type": ["null", "integer"] + } + } + }, + "400x500": { + "type": ["null", "array"], + "items": { + "type": ["null", "array"], + "items": { + "type": ["null", "integer"] + } + } + }, + "600x360": { + "type": ["null", "array"], + "items": { + "type": ["null", "array"], + "items": { + "type": ["null", "integer"] + } + } + } + } + }, + "app_link_spec": { + "type": ["null", "object"], + "properties": { + "ios": { + "type": ["null", "array"], + "items": { + "type": "object", + "properties": { + "url": { + "type": "string" + }, + "app_name": { + "type": "string" + }, + "app_store_id": { + "type": "string" + } + } + } + }, + "ipad": { + "type": ["null", "array"], + "items": { + "type": "object", + "properties": { + "url": { + "type": "string" + }, + "app_name": { + "type": "string" + }, + "app_store_id": { + "type": "string" + } + } + } + }, + "iphone": { + "type": ["null", "array"], + "items": { + "type": "object", + "properties": { + "url": { + "type": "string" + }, + "app_name": { + "type": "string" + }, + "app_store_id": { + "type": "string" + } + } + } + }, + "android": { + "type": ["null", "array"], + "items": { + "type": "object", + "properties": { + "url": { + "type": "string" + }, + "class": { + "type": "string" + }, + "package": { + "type": "string" + }, + "app_name": { + "type": "string" + } + } + } + } + } + } + }, + "text_data": { + "type": ["null", "object"], + "properties": { + "message": { + "type": "string" + } + } + }, + "photo_data": { + "type": ["null", "object"], + "properties": { + "url": { + "type": ["null", "string"] + }, + "caption": { + "type": "string" + }, + "image_hash": { + "type": ["null", "string"] + }, + "page_welcome_message": { + "type": ["null", "string"] + }, + "branded_content_sponsor_page_id": { + "type": ["null", "string"] + }, + "branded_content_sponsor_relationship": { + "type": ["null", "string"] + } + } + }, + "instagram_actor_id": { + "type": ["null", "string"] + } + } + }, + "template_url_spec": { + "type": ["null", "object"], + "properties": { + "ios": { + "type": ["null", "object"], + "properties": { + "url": { + "type": "string" + }, + "app_name": { + "type": "string" + }, + "app_store_id": { + "type": "string" + } + } + }, + "web": { + "type": ["null", "object"], + "properties": { + "url": { + "type": "string" + }, + "should_fallback": { + "type": "string" + } + } + }, + "ipad": { + "type": ["null", "object"], + "properties": { + "url": { + "type": "string" + }, + "app_name": { + "type": "string" + }, + "app_store_id": { + "type": "string" + } + } + }, + "config": { + "type": ["null", "object"], + "properties": { + "app_id": { + "type": "string" + } + } + }, + "iphone": { + "type": ["null", "object"], + "properties": { + "url": { + "type": "string" + }, + "app_name": { + "type": "string" + }, + "app_store_id": { + "type": "string" + } + } + }, + "android": { + "type": ["null", "object"], + "properties": { + "url": { + "type": "string" + }, + "package": { + "type": "string" + }, + "app_name": { + "type": "string" + } + } + }, + "windows_phone": { + "type": ["null", "object"], + "properties": { + "url": { + "type": "string" + }, + "app_id": { + "type": "string" + }, + "app_name": { + "type": "string" + } + } + } + } + } + } + } + }, + "supported_sync_modes": ["full_refresh"], + "source_defined_cursor": false, + "default_cursor_field": [] + }, + "sync_mode": "full_refresh", + "cursor_field": [], + "destination_sync_mode": "overwrite" + } + ] +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_bigquery_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_bigquery_names.json new file mode 100644 index 0000000000000..9b9c347db4a5a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_bigquery_names.json @@ -0,0 +1,252 @@ +{ + "_airbyte_schema_test.adcreatives.adcreatives": { + "file": "adcreatives", + "schema": "_airbyte_schema_test", + "table": "adcreatives" + }, + "_airbyte_schema_test.adcreatives_adlabels.adlabels": { + "file": "adcreatives_adlabels", + "schema": "_airbyte_schema_test", + "table": "adcreatives_adlabels" + }, + "_airbyte_schema_test.adcreatives_image_crops.image_crops": { + "file": "adcreatives_image_crops", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops" + }, + "_airbyte_schema_test.adcreatives_image_crops_100x100.100x100": { + "file": "adcreatives_image_crops_100x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_100x100" + }, + "_airbyte_schema_test.adcreatives_image_crops_100x72.100x72": { + "file": "adcreatives_image_crops_100x72", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_100x72" + }, + "_airbyte_schema_test.adcreatives_image_crops_191x100.191x100": { + "file": "adcreatives_image_crops_191x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_191x100" + }, + "_airbyte_schema_test.adcreatives_image_crops_400x150.400x150": { + "file": "adcreatives_image_crops_400x150", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_400x150" + }, + "_airbyte_schema_test.adcreatives_image_crops_400x500.400x500": { + "file": "adcreatives_image_crops_400x500", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_400x500" + }, + "_airbyte_schema_test.adcreatives_image_crops_600x360.600x360": { + "file": "adcreatives_image_crops_600x360", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_600x360" + }, + "_airbyte_schema_test.adcreatives_image_crops_90x160.90x160": { + "file": "adcreatives_image_crops_90x160", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_90x160" + }, + "_airbyte_schema_test.adcreatives_object_story_spec.object_story_spec": { + "file": "adcreatives_object_story_spec", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data.link_data": { + "file": "adcreatives_object_story_spec_link_data", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec_android", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec_android" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec_ios", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec_ios" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec_ipad", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec_ipad" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec_iphone", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec_iphone" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { + "file": "adcreatives_object_story_spec_link_data_image_crops", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { + "file": "adcreatives_object_story_spec_link_data_image_crops_100x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_100x100" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { + "file": "adcreatives_object_story_spec_link_data_image_crops_100x72", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_100x72" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { + "file": "adcreatives_object_story_spec_link_data_image_crops_191x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_191x100" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { + "file": "adcreatives_object_story_spec_link_data_image_crops_400x150", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_400x150" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { + "file": "adcreatives_object_story_spec_link_data_image_crops_400x500", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_400x500" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { + "file": "adcreatives_object_story_spec_link_data_image_crops_600x360", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_600x360" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { + "file": "adcreatives_object_story_spec_link_data_image_crops_90x160", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_90x160" + }, + "schema_test.adcreatives.adcreatives": { + "file": "adcreatives", + "schema": "schema_test", + "table": "adcreatives" + }, + "schema_test.adcreatives_adlabels.adlabels": { + "file": "adcreatives_adlabels", + "schema": "schema_test", + "table": "adcreatives_adlabels" + }, + "schema_test.adcreatives_image_crops.image_crops": { + "file": "adcreatives_image_crops", + "schema": "schema_test", + "table": "adcreatives_image_crops" + }, + "schema_test.adcreatives_image_crops_100x100.100x100": { + "file": "adcreatives_image_crops_100x100", + "schema": "schema_test", + "table": "adcreatives_image_crops_100x100" + }, + "schema_test.adcreatives_image_crops_100x72.100x72": { + "file": "adcreatives_image_crops_100x72", + "schema": "schema_test", + "table": "adcreatives_image_crops_100x72" + }, + "schema_test.adcreatives_image_crops_191x100.191x100": { + "file": "adcreatives_image_crops_191x100", + "schema": "schema_test", + "table": "adcreatives_image_crops_191x100" + }, + "schema_test.adcreatives_image_crops_400x150.400x150": { + "file": "adcreatives_image_crops_400x150", + "schema": "schema_test", + "table": "adcreatives_image_crops_400x150" + }, + "schema_test.adcreatives_image_crops_400x500.400x500": { + "file": "adcreatives_image_crops_400x500", + "schema": "schema_test", + "table": "adcreatives_image_crops_400x500" + }, + "schema_test.adcreatives_image_crops_600x360.600x360": { + "file": "adcreatives_image_crops_600x360", + "schema": "schema_test", + "table": "adcreatives_image_crops_600x360" + }, + "schema_test.adcreatives_image_crops_90x160.90x160": { + "file": "adcreatives_image_crops_90x160", + "schema": "schema_test", + "table": "adcreatives_image_crops_90x160" + }, + "schema_test.adcreatives_object_story_spec.object_story_spec": { + "file": "adcreatives_object_story_spec", + "schema": "schema_test", + "table": "adcreatives_object_story_spec" + }, + "schema_test.adcreatives_object_story_spec_link_data.link_data": { + "file": "adcreatives_object_story_spec_link_data", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec_android", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec_android" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec_ios", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec_ios" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec_ipad", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec_ipad" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec_iphone", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec_iphone" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { + "file": "adcreatives_object_story_spec_link_data_image_crops", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { + "file": "adcreatives_object_story_spec_link_data_image_crops_100x100", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_100x100" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { + "file": "adcreatives_object_story_spec_link_data_image_crops_100x72", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_100x72" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { + "file": "adcreatives_object_story_spec_link_data_image_crops_191x100", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_191x100" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { + "file": "adcreatives_object_story_spec_link_data_image_crops_400x150", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_400x150" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { + "file": "adcreatives_object_story_spec_link_data_image_crops_400x500", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_400x500" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { + "file": "adcreatives_object_story_spec_link_data_image_crops_600x360", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_600x360" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { + "file": "adcreatives_object_story_spec_link_data_image_crops_90x160", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_90x160" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_clickhouse_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_clickhouse_names.json new file mode 100644 index 0000000000000..450b8a7f4bfc8 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_clickhouse_names.json @@ -0,0 +1,252 @@ +{ + "_airbyte_schema_test.adcreatives.adcreatives": { + "file": "adcreatives", + "schema": "_airbyte_schema_test", + "table": "adcreatives" + }, + "_airbyte_schema_test.adcreatives_adlabels.adlabels": { + "file": "adcreatives_adlabels", + "schema": "_airbyte_schema_test", + "table": "adcreatives_adlabels" + }, + "_airbyte_schema_test.adcreatives_image_crops.image_crops": { + "file": "adcreatives_image_crops", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops" + }, + "_airbyte_schema_test.adcreatives_image_crops_100x100.100x100": { + "file": "adcreatives_image_crops_100x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_100x100" + }, + "_airbyte_schema_test.adcreatives_image_crops_100x72.100x72": { + "file": "adcreatives_image_crops_100x72", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_100x72" + }, + "_airbyte_schema_test.adcreatives_image_crops_191x100.191x100": { + "file": "adcreatives_image_crops_191x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_191x100" + }, + "_airbyte_schema_test.adcreatives_image_crops_400x150.400x150": { + "file": "adcreatives_image_crops_400x150", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_400x150" + }, + "_airbyte_schema_test.adcreatives_image_crops_400x500.400x500": { + "file": "adcreatives_image_crops_400x500", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_400x500" + }, + "_airbyte_schema_test.adcreatives_image_crops_600x360.600x360": { + "file": "adcreatives_image_crops_600x360", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_600x360" + }, + "_airbyte_schema_test.adcreatives_image_crops_90x160.90x160": { + "file": "adcreatives_image_crops_90x160", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_90x160" + }, + "_airbyte_schema_test.adcreatives_object_story_spec.object_story_spec": { + "file": "adcreatives_object_story_spec", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data.link_data": { + "file": "adcreatives_object_story_spec_link_data", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { + "file": "adcreatives_object_s__nk_data_app_link_spec", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__nk_data_app_link_spec" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { + "file": "adcreatives_object_s__app_link_spec_android", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__app_link_spec_android" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { + "file": "adcreatives_object_s__ata_app_link_spec_ios", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__ata_app_link_spec_ios" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { + "file": "adcreatives_object_s__ta_app_link_spec_ipad", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__ta_app_link_spec_ipad" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { + "file": "adcreatives_object_s___app_link_spec_iphone", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s___app_link_spec_iphone" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { + "file": "adcreatives_object_s__link_data_image_crops", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__link_data_image_crops" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { + "file": "adcreatives_object_s__a_image_crops_100x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__a_image_crops_100x100" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { + "file": "adcreatives_object_s__ta_image_crops_100x72", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__ta_image_crops_100x72" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { + "file": "adcreatives_object_s__a_image_crops_191x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__a_image_crops_191x100" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { + "file": "adcreatives_object_s__a_image_crops_400x150", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__a_image_crops_400x150" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { + "file": "adcreatives_object_s__a_image_crops_400x500", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__a_image_crops_400x500" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { + "file": "adcreatives_object_s__a_image_crops_600x360", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__a_image_crops_600x360" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { + "file": "adcreatives_object_s__ta_image_crops_90x160", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__ta_image_crops_90x160" + }, + "schema_test.adcreatives.adcreatives": { + "file": "adcreatives", + "schema": "schema_test", + "table": "adcreatives" + }, + "schema_test.adcreatives_adlabels.adlabels": { + "file": "adcreatives_adlabels", + "schema": "schema_test", + "table": "adcreatives_adlabels" + }, + "schema_test.adcreatives_image_crops.image_crops": { + "file": "adcreatives_image_crops", + "schema": "schema_test", + "table": "adcreatives_image_crops" + }, + "schema_test.adcreatives_image_crops_100x100.100x100": { + "file": "adcreatives_image_crops_100x100", + "schema": "schema_test", + "table": "adcreatives_image_crops_100x100" + }, + "schema_test.adcreatives_image_crops_100x72.100x72": { + "file": "adcreatives_image_crops_100x72", + "schema": "schema_test", + "table": "adcreatives_image_crops_100x72" + }, + "schema_test.adcreatives_image_crops_191x100.191x100": { + "file": "adcreatives_image_crops_191x100", + "schema": "schema_test", + "table": "adcreatives_image_crops_191x100" + }, + "schema_test.adcreatives_image_crops_400x150.400x150": { + "file": "adcreatives_image_crops_400x150", + "schema": "schema_test", + "table": "adcreatives_image_crops_400x150" + }, + "schema_test.adcreatives_image_crops_400x500.400x500": { + "file": "adcreatives_image_crops_400x500", + "schema": "schema_test", + "table": "adcreatives_image_crops_400x500" + }, + "schema_test.adcreatives_image_crops_600x360.600x360": { + "file": "adcreatives_image_crops_600x360", + "schema": "schema_test", + "table": "adcreatives_image_crops_600x360" + }, + "schema_test.adcreatives_image_crops_90x160.90x160": { + "file": "adcreatives_image_crops_90x160", + "schema": "schema_test", + "table": "adcreatives_image_crops_90x160" + }, + "schema_test.adcreatives_object_story_spec.object_story_spec": { + "file": "adcreatives_object_story_spec", + "schema": "schema_test", + "table": "adcreatives_object_story_spec" + }, + "schema_test.adcreatives_object_story_spec_link_data.link_data": { + "file": "adcreatives_object_story_spec_link_data", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { + "file": "adcreatives_object_s__nk_data_app_link_spec", + "schema": "schema_test", + "table": "adcreatives_object_s__nk_data_app_link_spec" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { + "file": "adcreatives_object_s__app_link_spec_android", + "schema": "schema_test", + "table": "adcreatives_object_s__app_link_spec_android" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { + "file": "adcreatives_object_s__ata_app_link_spec_ios", + "schema": "schema_test", + "table": "adcreatives_object_s__ata_app_link_spec_ios" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { + "file": "adcreatives_object_s__ta_app_link_spec_ipad", + "schema": "schema_test", + "table": "adcreatives_object_s__ta_app_link_spec_ipad" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { + "file": "adcreatives_object_s___app_link_spec_iphone", + "schema": "schema_test", + "table": "adcreatives_object_s___app_link_spec_iphone" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { + "file": "adcreatives_object_s__link_data_image_crops", + "schema": "schema_test", + "table": "adcreatives_object_s__link_data_image_crops" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { + "file": "adcreatives_object_s__a_image_crops_100x100", + "schema": "schema_test", + "table": "adcreatives_object_s__a_image_crops_100x100" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { + "file": "adcreatives_object_s__ta_image_crops_100x72", + "schema": "schema_test", + "table": "adcreatives_object_s__ta_image_crops_100x72" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { + "file": "adcreatives_object_s__a_image_crops_191x100", + "schema": "schema_test", + "table": "adcreatives_object_s__a_image_crops_191x100" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { + "file": "adcreatives_object_s__a_image_crops_400x150", + "schema": "schema_test", + "table": "adcreatives_object_s__a_image_crops_400x150" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { + "file": "adcreatives_object_s__a_image_crops_400x500", + "schema": "schema_test", + "table": "adcreatives_object_s__a_image_crops_400x500" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { + "file": "adcreatives_object_s__a_image_crops_600x360", + "schema": "schema_test", + "table": "adcreatives_object_s__a_image_crops_600x360" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { + "file": "adcreatives_object_s__ta_image_crops_90x160", + "schema": "schema_test", + "table": "adcreatives_object_s__ta_image_crops_90x160" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_duckdb_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_duckdb_names.json new file mode 100644 index 0000000000000..2bbb864cc4d87 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_duckdb_names.json @@ -0,0 +1,252 @@ +{ + "_airbyte_schema_test.adcreatives.adcreatives": { + "file": "adcreatives", + "schema": "_airbyte_schema_test", + "table": "adcreatives" + }, + "_airbyte_schema_test.adcreatives_adlabels.adlabels": { + "file": "adcreatives_adlabels", + "schema": "_airbyte_schema_test", + "table": "adcreatives_adlabels" + }, + "_airbyte_schema_test.adcreatives_image_crops.image_crops": { + "file": "adcreatives_image_crops", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops" + }, + "_airbyte_schema_test.adcreatives_image_crops_100x100.100x100": { + "file": "adcreatives_image_crops_100x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_100x100" + }, + "_airbyte_schema_test.adcreatives_image_crops_100x72.100x72": { + "file": "adcreatives_image_crops_100x72", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_100x72" + }, + "_airbyte_schema_test.adcreatives_image_crops_191x100.191x100": { + "file": "adcreatives_image_crops_191x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_191x100" + }, + "_airbyte_schema_test.adcreatives_image_crops_400x150.400x150": { + "file": "adcreatives_image_crops_400x150", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_400x150" + }, + "_airbyte_schema_test.adcreatives_image_crops_400x500.400x500": { + "file": "adcreatives_image_crops_400x500", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_400x500" + }, + "_airbyte_schema_test.adcreatives_image_crops_600x360.600x360": { + "file": "adcreatives_image_crops_600x360", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_600x360" + }, + "_airbyte_schema_test.adcreatives_image_crops_90x160.90x160": { + "file": "adcreatives_image_crops_90x160", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_90x160" + }, + "_airbyte_schema_test.adcreatives_object_story_spec.object_story_spec": { + "file": "adcreatives_object_story_spec", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data.link_data": { + "file": "adcreatives_object_story_spec_link_data", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { + "file": "adcreatives_object_st__nk_data_app_link_spec", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__nk_data_app_link_spec" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { + "file": "adcreatives_object_st__app_link_spec_android", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__app_link_spec_android" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { + "file": "adcreatives_object_st__ata_app_link_spec_ios", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__ata_app_link_spec_ios" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { + "file": "adcreatives_object_st__ta_app_link_spec_ipad", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__ta_app_link_spec_ipad" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { + "file": "adcreatives_object_st___app_link_spec_iphone", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st___app_link_spec_iphone" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { + "file": "adcreatives_object_st__link_data_image_crops", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__link_data_image_crops" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { + "file": "adcreatives_object_st__a_image_crops_100x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__a_image_crops_100x100" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { + "file": "adcreatives_object_st__ta_image_crops_100x72", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__ta_image_crops_100x72" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { + "file": "adcreatives_object_st__a_image_crops_191x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__a_image_crops_191x100" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { + "file": "adcreatives_object_st__a_image_crops_400x150", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__a_image_crops_400x150" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { + "file": "adcreatives_object_st__a_image_crops_400x500", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__a_image_crops_400x500" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { + "file": "adcreatives_object_st__a_image_crops_600x360", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__a_image_crops_600x360" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { + "file": "adcreatives_object_st__ta_image_crops_90x160", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__ta_image_crops_90x160" + }, + "schema_test.adcreatives.adcreatives": { + "file": "adcreatives", + "schema": "schema_test", + "table": "adcreatives" + }, + "schema_test.adcreatives_adlabels.adlabels": { + "file": "adcreatives_adlabels", + "schema": "schema_test", + "table": "adcreatives_adlabels" + }, + "schema_test.adcreatives_image_crops.image_crops": { + "file": "adcreatives_image_crops", + "schema": "schema_test", + "table": "adcreatives_image_crops" + }, + "schema_test.adcreatives_image_crops_100x100.100x100": { + "file": "adcreatives_image_crops_100x100", + "schema": "schema_test", + "table": "adcreatives_image_crops_100x100" + }, + "schema_test.adcreatives_image_crops_100x72.100x72": { + "file": "adcreatives_image_crops_100x72", + "schema": "schema_test", + "table": "adcreatives_image_crops_100x72" + }, + "schema_test.adcreatives_image_crops_191x100.191x100": { + "file": "adcreatives_image_crops_191x100", + "schema": "schema_test", + "table": "adcreatives_image_crops_191x100" + }, + "schema_test.adcreatives_image_crops_400x150.400x150": { + "file": "adcreatives_image_crops_400x150", + "schema": "schema_test", + "table": "adcreatives_image_crops_400x150" + }, + "schema_test.adcreatives_image_crops_400x500.400x500": { + "file": "adcreatives_image_crops_400x500", + "schema": "schema_test", + "table": "adcreatives_image_crops_400x500" + }, + "schema_test.adcreatives_image_crops_600x360.600x360": { + "file": "adcreatives_image_crops_600x360", + "schema": "schema_test", + "table": "adcreatives_image_crops_600x360" + }, + "schema_test.adcreatives_image_crops_90x160.90x160": { + "file": "adcreatives_image_crops_90x160", + "schema": "schema_test", + "table": "adcreatives_image_crops_90x160" + }, + "schema_test.adcreatives_object_story_spec.object_story_spec": { + "file": "adcreatives_object_story_spec", + "schema": "schema_test", + "table": "adcreatives_object_story_spec" + }, + "schema_test.adcreatives_object_story_spec_link_data.link_data": { + "file": "adcreatives_object_story_spec_link_data", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { + "file": "adcreatives_object_st__nk_data_app_link_spec", + "schema": "schema_test", + "table": "adcreatives_object_st__nk_data_app_link_spec" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { + "file": "adcreatives_object_st__app_link_spec_android", + "schema": "schema_test", + "table": "adcreatives_object_st__app_link_spec_android" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { + "file": "adcreatives_object_st__ata_app_link_spec_ios", + "schema": "schema_test", + "table": "adcreatives_object_st__ata_app_link_spec_ios" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { + "file": "adcreatives_object_st__ta_app_link_spec_ipad", + "schema": "schema_test", + "table": "adcreatives_object_st__ta_app_link_spec_ipad" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { + "file": "adcreatives_object_st___app_link_spec_iphone", + "schema": "schema_test", + "table": "adcreatives_object_st___app_link_spec_iphone" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { + "file": "adcreatives_object_st__link_data_image_crops", + "schema": "schema_test", + "table": "adcreatives_object_st__link_data_image_crops" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { + "file": "adcreatives_object_st__a_image_crops_100x100", + "schema": "schema_test", + "table": "adcreatives_object_st__a_image_crops_100x100" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { + "file": "adcreatives_object_st__ta_image_crops_100x72", + "schema": "schema_test", + "table": "adcreatives_object_st__ta_image_crops_100x72" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { + "file": "adcreatives_object_st__a_image_crops_191x100", + "schema": "schema_test", + "table": "adcreatives_object_st__a_image_crops_191x100" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { + "file": "adcreatives_object_st__a_image_crops_400x150", + "schema": "schema_test", + "table": "adcreatives_object_st__a_image_crops_400x150" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { + "file": "adcreatives_object_st__a_image_crops_400x500", + "schema": "schema_test", + "table": "adcreatives_object_st__a_image_crops_400x500" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { + "file": "adcreatives_object_st__a_image_crops_600x360", + "schema": "schema_test", + "table": "adcreatives_object_st__a_image_crops_600x360" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { + "file": "adcreatives_object_st__ta_image_crops_90x160", + "schema": "schema_test", + "table": "adcreatives_object_st__ta_image_crops_90x160" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_mssql_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_mssql_names.json new file mode 100644 index 0000000000000..2bbb864cc4d87 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_mssql_names.json @@ -0,0 +1,252 @@ +{ + "_airbyte_schema_test.adcreatives.adcreatives": { + "file": "adcreatives", + "schema": "_airbyte_schema_test", + "table": "adcreatives" + }, + "_airbyte_schema_test.adcreatives_adlabels.adlabels": { + "file": "adcreatives_adlabels", + "schema": "_airbyte_schema_test", + "table": "adcreatives_adlabels" + }, + "_airbyte_schema_test.adcreatives_image_crops.image_crops": { + "file": "adcreatives_image_crops", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops" + }, + "_airbyte_schema_test.adcreatives_image_crops_100x100.100x100": { + "file": "adcreatives_image_crops_100x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_100x100" + }, + "_airbyte_schema_test.adcreatives_image_crops_100x72.100x72": { + "file": "adcreatives_image_crops_100x72", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_100x72" + }, + "_airbyte_schema_test.adcreatives_image_crops_191x100.191x100": { + "file": "adcreatives_image_crops_191x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_191x100" + }, + "_airbyte_schema_test.adcreatives_image_crops_400x150.400x150": { + "file": "adcreatives_image_crops_400x150", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_400x150" + }, + "_airbyte_schema_test.adcreatives_image_crops_400x500.400x500": { + "file": "adcreatives_image_crops_400x500", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_400x500" + }, + "_airbyte_schema_test.adcreatives_image_crops_600x360.600x360": { + "file": "adcreatives_image_crops_600x360", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_600x360" + }, + "_airbyte_schema_test.adcreatives_image_crops_90x160.90x160": { + "file": "adcreatives_image_crops_90x160", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_90x160" + }, + "_airbyte_schema_test.adcreatives_object_story_spec.object_story_spec": { + "file": "adcreatives_object_story_spec", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data.link_data": { + "file": "adcreatives_object_story_spec_link_data", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { + "file": "adcreatives_object_st__nk_data_app_link_spec", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__nk_data_app_link_spec" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { + "file": "adcreatives_object_st__app_link_spec_android", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__app_link_spec_android" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { + "file": "adcreatives_object_st__ata_app_link_spec_ios", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__ata_app_link_spec_ios" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { + "file": "adcreatives_object_st__ta_app_link_spec_ipad", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__ta_app_link_spec_ipad" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { + "file": "adcreatives_object_st___app_link_spec_iphone", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st___app_link_spec_iphone" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { + "file": "adcreatives_object_st__link_data_image_crops", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__link_data_image_crops" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { + "file": "adcreatives_object_st__a_image_crops_100x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__a_image_crops_100x100" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { + "file": "adcreatives_object_st__ta_image_crops_100x72", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__ta_image_crops_100x72" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { + "file": "adcreatives_object_st__a_image_crops_191x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__a_image_crops_191x100" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { + "file": "adcreatives_object_st__a_image_crops_400x150", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__a_image_crops_400x150" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { + "file": "adcreatives_object_st__a_image_crops_400x500", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__a_image_crops_400x500" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { + "file": "adcreatives_object_st__a_image_crops_600x360", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__a_image_crops_600x360" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { + "file": "adcreatives_object_st__ta_image_crops_90x160", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__ta_image_crops_90x160" + }, + "schema_test.adcreatives.adcreatives": { + "file": "adcreatives", + "schema": "schema_test", + "table": "adcreatives" + }, + "schema_test.adcreatives_adlabels.adlabels": { + "file": "adcreatives_adlabels", + "schema": "schema_test", + "table": "adcreatives_adlabels" + }, + "schema_test.adcreatives_image_crops.image_crops": { + "file": "adcreatives_image_crops", + "schema": "schema_test", + "table": "adcreatives_image_crops" + }, + "schema_test.adcreatives_image_crops_100x100.100x100": { + "file": "adcreatives_image_crops_100x100", + "schema": "schema_test", + "table": "adcreatives_image_crops_100x100" + }, + "schema_test.adcreatives_image_crops_100x72.100x72": { + "file": "adcreatives_image_crops_100x72", + "schema": "schema_test", + "table": "adcreatives_image_crops_100x72" + }, + "schema_test.adcreatives_image_crops_191x100.191x100": { + "file": "adcreatives_image_crops_191x100", + "schema": "schema_test", + "table": "adcreatives_image_crops_191x100" + }, + "schema_test.adcreatives_image_crops_400x150.400x150": { + "file": "adcreatives_image_crops_400x150", + "schema": "schema_test", + "table": "adcreatives_image_crops_400x150" + }, + "schema_test.adcreatives_image_crops_400x500.400x500": { + "file": "adcreatives_image_crops_400x500", + "schema": "schema_test", + "table": "adcreatives_image_crops_400x500" + }, + "schema_test.adcreatives_image_crops_600x360.600x360": { + "file": "adcreatives_image_crops_600x360", + "schema": "schema_test", + "table": "adcreatives_image_crops_600x360" + }, + "schema_test.adcreatives_image_crops_90x160.90x160": { + "file": "adcreatives_image_crops_90x160", + "schema": "schema_test", + "table": "adcreatives_image_crops_90x160" + }, + "schema_test.adcreatives_object_story_spec.object_story_spec": { + "file": "adcreatives_object_story_spec", + "schema": "schema_test", + "table": "adcreatives_object_story_spec" + }, + "schema_test.adcreatives_object_story_spec_link_data.link_data": { + "file": "adcreatives_object_story_spec_link_data", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { + "file": "adcreatives_object_st__nk_data_app_link_spec", + "schema": "schema_test", + "table": "adcreatives_object_st__nk_data_app_link_spec" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { + "file": "adcreatives_object_st__app_link_spec_android", + "schema": "schema_test", + "table": "adcreatives_object_st__app_link_spec_android" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { + "file": "adcreatives_object_st__ata_app_link_spec_ios", + "schema": "schema_test", + "table": "adcreatives_object_st__ata_app_link_spec_ios" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { + "file": "adcreatives_object_st__ta_app_link_spec_ipad", + "schema": "schema_test", + "table": "adcreatives_object_st__ta_app_link_spec_ipad" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { + "file": "adcreatives_object_st___app_link_spec_iphone", + "schema": "schema_test", + "table": "adcreatives_object_st___app_link_spec_iphone" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { + "file": "adcreatives_object_st__link_data_image_crops", + "schema": "schema_test", + "table": "adcreatives_object_st__link_data_image_crops" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { + "file": "adcreatives_object_st__a_image_crops_100x100", + "schema": "schema_test", + "table": "adcreatives_object_st__a_image_crops_100x100" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { + "file": "adcreatives_object_st__ta_image_crops_100x72", + "schema": "schema_test", + "table": "adcreatives_object_st__ta_image_crops_100x72" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { + "file": "adcreatives_object_st__a_image_crops_191x100", + "schema": "schema_test", + "table": "adcreatives_object_st__a_image_crops_191x100" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { + "file": "adcreatives_object_st__a_image_crops_400x150", + "schema": "schema_test", + "table": "adcreatives_object_st__a_image_crops_400x150" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { + "file": "adcreatives_object_st__a_image_crops_400x500", + "schema": "schema_test", + "table": "adcreatives_object_st__a_image_crops_400x500" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { + "file": "adcreatives_object_st__a_image_crops_600x360", + "schema": "schema_test", + "table": "adcreatives_object_st__a_image_crops_600x360" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { + "file": "adcreatives_object_st__ta_image_crops_90x160", + "schema": "schema_test", + "table": "adcreatives_object_st__ta_image_crops_90x160" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_mysql_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_mysql_names.json new file mode 100644 index 0000000000000..2bbb864cc4d87 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_mysql_names.json @@ -0,0 +1,252 @@ +{ + "_airbyte_schema_test.adcreatives.adcreatives": { + "file": "adcreatives", + "schema": "_airbyte_schema_test", + "table": "adcreatives" + }, + "_airbyte_schema_test.adcreatives_adlabels.adlabels": { + "file": "adcreatives_adlabels", + "schema": "_airbyte_schema_test", + "table": "adcreatives_adlabels" + }, + "_airbyte_schema_test.adcreatives_image_crops.image_crops": { + "file": "adcreatives_image_crops", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops" + }, + "_airbyte_schema_test.adcreatives_image_crops_100x100.100x100": { + "file": "adcreatives_image_crops_100x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_100x100" + }, + "_airbyte_schema_test.adcreatives_image_crops_100x72.100x72": { + "file": "adcreatives_image_crops_100x72", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_100x72" + }, + "_airbyte_schema_test.adcreatives_image_crops_191x100.191x100": { + "file": "adcreatives_image_crops_191x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_191x100" + }, + "_airbyte_schema_test.adcreatives_image_crops_400x150.400x150": { + "file": "adcreatives_image_crops_400x150", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_400x150" + }, + "_airbyte_schema_test.adcreatives_image_crops_400x500.400x500": { + "file": "adcreatives_image_crops_400x500", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_400x500" + }, + "_airbyte_schema_test.adcreatives_image_crops_600x360.600x360": { + "file": "adcreatives_image_crops_600x360", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_600x360" + }, + "_airbyte_schema_test.adcreatives_image_crops_90x160.90x160": { + "file": "adcreatives_image_crops_90x160", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_90x160" + }, + "_airbyte_schema_test.adcreatives_object_story_spec.object_story_spec": { + "file": "adcreatives_object_story_spec", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data.link_data": { + "file": "adcreatives_object_story_spec_link_data", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { + "file": "adcreatives_object_st__nk_data_app_link_spec", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__nk_data_app_link_spec" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { + "file": "adcreatives_object_st__app_link_spec_android", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__app_link_spec_android" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { + "file": "adcreatives_object_st__ata_app_link_spec_ios", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__ata_app_link_spec_ios" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { + "file": "adcreatives_object_st__ta_app_link_spec_ipad", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__ta_app_link_spec_ipad" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { + "file": "adcreatives_object_st___app_link_spec_iphone", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st___app_link_spec_iphone" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { + "file": "adcreatives_object_st__link_data_image_crops", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__link_data_image_crops" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { + "file": "adcreatives_object_st__a_image_crops_100x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__a_image_crops_100x100" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { + "file": "adcreatives_object_st__ta_image_crops_100x72", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__ta_image_crops_100x72" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { + "file": "adcreatives_object_st__a_image_crops_191x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__a_image_crops_191x100" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { + "file": "adcreatives_object_st__a_image_crops_400x150", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__a_image_crops_400x150" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { + "file": "adcreatives_object_st__a_image_crops_400x500", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__a_image_crops_400x500" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { + "file": "adcreatives_object_st__a_image_crops_600x360", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__a_image_crops_600x360" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { + "file": "adcreatives_object_st__ta_image_crops_90x160", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__ta_image_crops_90x160" + }, + "schema_test.adcreatives.adcreatives": { + "file": "adcreatives", + "schema": "schema_test", + "table": "adcreatives" + }, + "schema_test.adcreatives_adlabels.adlabels": { + "file": "adcreatives_adlabels", + "schema": "schema_test", + "table": "adcreatives_adlabels" + }, + "schema_test.adcreatives_image_crops.image_crops": { + "file": "adcreatives_image_crops", + "schema": "schema_test", + "table": "adcreatives_image_crops" + }, + "schema_test.adcreatives_image_crops_100x100.100x100": { + "file": "adcreatives_image_crops_100x100", + "schema": "schema_test", + "table": "adcreatives_image_crops_100x100" + }, + "schema_test.adcreatives_image_crops_100x72.100x72": { + "file": "adcreatives_image_crops_100x72", + "schema": "schema_test", + "table": "adcreatives_image_crops_100x72" + }, + "schema_test.adcreatives_image_crops_191x100.191x100": { + "file": "adcreatives_image_crops_191x100", + "schema": "schema_test", + "table": "adcreatives_image_crops_191x100" + }, + "schema_test.adcreatives_image_crops_400x150.400x150": { + "file": "adcreatives_image_crops_400x150", + "schema": "schema_test", + "table": "adcreatives_image_crops_400x150" + }, + "schema_test.adcreatives_image_crops_400x500.400x500": { + "file": "adcreatives_image_crops_400x500", + "schema": "schema_test", + "table": "adcreatives_image_crops_400x500" + }, + "schema_test.adcreatives_image_crops_600x360.600x360": { + "file": "adcreatives_image_crops_600x360", + "schema": "schema_test", + "table": "adcreatives_image_crops_600x360" + }, + "schema_test.adcreatives_image_crops_90x160.90x160": { + "file": "adcreatives_image_crops_90x160", + "schema": "schema_test", + "table": "adcreatives_image_crops_90x160" + }, + "schema_test.adcreatives_object_story_spec.object_story_spec": { + "file": "adcreatives_object_story_spec", + "schema": "schema_test", + "table": "adcreatives_object_story_spec" + }, + "schema_test.adcreatives_object_story_spec_link_data.link_data": { + "file": "adcreatives_object_story_spec_link_data", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { + "file": "adcreatives_object_st__nk_data_app_link_spec", + "schema": "schema_test", + "table": "adcreatives_object_st__nk_data_app_link_spec" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { + "file": "adcreatives_object_st__app_link_spec_android", + "schema": "schema_test", + "table": "adcreatives_object_st__app_link_spec_android" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { + "file": "adcreatives_object_st__ata_app_link_spec_ios", + "schema": "schema_test", + "table": "adcreatives_object_st__ata_app_link_spec_ios" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { + "file": "adcreatives_object_st__ta_app_link_spec_ipad", + "schema": "schema_test", + "table": "adcreatives_object_st__ta_app_link_spec_ipad" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { + "file": "adcreatives_object_st___app_link_spec_iphone", + "schema": "schema_test", + "table": "adcreatives_object_st___app_link_spec_iphone" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { + "file": "adcreatives_object_st__link_data_image_crops", + "schema": "schema_test", + "table": "adcreatives_object_st__link_data_image_crops" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { + "file": "adcreatives_object_st__a_image_crops_100x100", + "schema": "schema_test", + "table": "adcreatives_object_st__a_image_crops_100x100" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { + "file": "adcreatives_object_st__ta_image_crops_100x72", + "schema": "schema_test", + "table": "adcreatives_object_st__ta_image_crops_100x72" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { + "file": "adcreatives_object_st__a_image_crops_191x100", + "schema": "schema_test", + "table": "adcreatives_object_st__a_image_crops_191x100" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { + "file": "adcreatives_object_st__a_image_crops_400x150", + "schema": "schema_test", + "table": "adcreatives_object_st__a_image_crops_400x150" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { + "file": "adcreatives_object_st__a_image_crops_400x500", + "schema": "schema_test", + "table": "adcreatives_object_st__a_image_crops_400x500" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { + "file": "adcreatives_object_st__a_image_crops_600x360", + "schema": "schema_test", + "table": "adcreatives_object_st__a_image_crops_600x360" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { + "file": "adcreatives_object_st__ta_image_crops_90x160", + "schema": "schema_test", + "table": "adcreatives_object_st__ta_image_crops_90x160" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_names.json new file mode 100644 index 0000000000000..9b9c347db4a5a --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_names.json @@ -0,0 +1,252 @@ +{ + "_airbyte_schema_test.adcreatives.adcreatives": { + "file": "adcreatives", + "schema": "_airbyte_schema_test", + "table": "adcreatives" + }, + "_airbyte_schema_test.adcreatives_adlabels.adlabels": { + "file": "adcreatives_adlabels", + "schema": "_airbyte_schema_test", + "table": "adcreatives_adlabels" + }, + "_airbyte_schema_test.adcreatives_image_crops.image_crops": { + "file": "adcreatives_image_crops", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops" + }, + "_airbyte_schema_test.adcreatives_image_crops_100x100.100x100": { + "file": "adcreatives_image_crops_100x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_100x100" + }, + "_airbyte_schema_test.adcreatives_image_crops_100x72.100x72": { + "file": "adcreatives_image_crops_100x72", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_100x72" + }, + "_airbyte_schema_test.adcreatives_image_crops_191x100.191x100": { + "file": "adcreatives_image_crops_191x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_191x100" + }, + "_airbyte_schema_test.adcreatives_image_crops_400x150.400x150": { + "file": "adcreatives_image_crops_400x150", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_400x150" + }, + "_airbyte_schema_test.adcreatives_image_crops_400x500.400x500": { + "file": "adcreatives_image_crops_400x500", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_400x500" + }, + "_airbyte_schema_test.adcreatives_image_crops_600x360.600x360": { + "file": "adcreatives_image_crops_600x360", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_600x360" + }, + "_airbyte_schema_test.adcreatives_image_crops_90x160.90x160": { + "file": "adcreatives_image_crops_90x160", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_90x160" + }, + "_airbyte_schema_test.adcreatives_object_story_spec.object_story_spec": { + "file": "adcreatives_object_story_spec", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data.link_data": { + "file": "adcreatives_object_story_spec_link_data", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec_android", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec_android" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec_ios", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec_ios" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec_ipad", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec_ipad" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec_iphone", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec_iphone" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { + "file": "adcreatives_object_story_spec_link_data_image_crops", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { + "file": "adcreatives_object_story_spec_link_data_image_crops_100x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_100x100" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { + "file": "adcreatives_object_story_spec_link_data_image_crops_100x72", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_100x72" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { + "file": "adcreatives_object_story_spec_link_data_image_crops_191x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_191x100" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { + "file": "adcreatives_object_story_spec_link_data_image_crops_400x150", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_400x150" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { + "file": "adcreatives_object_story_spec_link_data_image_crops_400x500", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_400x500" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { + "file": "adcreatives_object_story_spec_link_data_image_crops_600x360", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_600x360" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { + "file": "adcreatives_object_story_spec_link_data_image_crops_90x160", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_90x160" + }, + "schema_test.adcreatives.adcreatives": { + "file": "adcreatives", + "schema": "schema_test", + "table": "adcreatives" + }, + "schema_test.adcreatives_adlabels.adlabels": { + "file": "adcreatives_adlabels", + "schema": "schema_test", + "table": "adcreatives_adlabels" + }, + "schema_test.adcreatives_image_crops.image_crops": { + "file": "adcreatives_image_crops", + "schema": "schema_test", + "table": "adcreatives_image_crops" + }, + "schema_test.adcreatives_image_crops_100x100.100x100": { + "file": "adcreatives_image_crops_100x100", + "schema": "schema_test", + "table": "adcreatives_image_crops_100x100" + }, + "schema_test.adcreatives_image_crops_100x72.100x72": { + "file": "adcreatives_image_crops_100x72", + "schema": "schema_test", + "table": "adcreatives_image_crops_100x72" + }, + "schema_test.adcreatives_image_crops_191x100.191x100": { + "file": "adcreatives_image_crops_191x100", + "schema": "schema_test", + "table": "adcreatives_image_crops_191x100" + }, + "schema_test.adcreatives_image_crops_400x150.400x150": { + "file": "adcreatives_image_crops_400x150", + "schema": "schema_test", + "table": "adcreatives_image_crops_400x150" + }, + "schema_test.adcreatives_image_crops_400x500.400x500": { + "file": "adcreatives_image_crops_400x500", + "schema": "schema_test", + "table": "adcreatives_image_crops_400x500" + }, + "schema_test.adcreatives_image_crops_600x360.600x360": { + "file": "adcreatives_image_crops_600x360", + "schema": "schema_test", + "table": "adcreatives_image_crops_600x360" + }, + "schema_test.adcreatives_image_crops_90x160.90x160": { + "file": "adcreatives_image_crops_90x160", + "schema": "schema_test", + "table": "adcreatives_image_crops_90x160" + }, + "schema_test.adcreatives_object_story_spec.object_story_spec": { + "file": "adcreatives_object_story_spec", + "schema": "schema_test", + "table": "adcreatives_object_story_spec" + }, + "schema_test.adcreatives_object_story_spec_link_data.link_data": { + "file": "adcreatives_object_story_spec_link_data", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec_android", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec_android" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec_ios", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec_ios" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec_ipad", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec_ipad" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec_iphone", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec_iphone" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { + "file": "adcreatives_object_story_spec_link_data_image_crops", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { + "file": "adcreatives_object_story_spec_link_data_image_crops_100x100", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_100x100" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { + "file": "adcreatives_object_story_spec_link_data_image_crops_100x72", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_100x72" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { + "file": "adcreatives_object_story_spec_link_data_image_crops_191x100", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_191x100" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { + "file": "adcreatives_object_story_spec_link_data_image_crops_400x150", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_400x150" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { + "file": "adcreatives_object_story_spec_link_data_image_crops_400x500", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_400x500" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { + "file": "adcreatives_object_story_spec_link_data_image_crops_600x360", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_600x360" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { + "file": "adcreatives_object_story_spec_link_data_image_crops_90x160", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_90x160" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_oracle_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_oracle_names.json new file mode 100644 index 0000000000000..995ced64a833c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_oracle_names.json @@ -0,0 +1,252 @@ +{ + "schema_test.adcreatives.adcreatives": { + "file": "adcreatives", + "schema": "schema_test", + "table": "adcreatives" + }, + "schema_test.adcreatives_adlabels.adlabels": { + "file": "adcreatives_adlabels", + "schema": "schema_test", + "table": "adcreatives_adlabels" + }, + "schema_test.adcreatives_image_crops.image_crops": { + "file": "adcreatives_image_crops", + "schema": "schema_test", + "table": "adcreatives_image_crops" + }, + "schema_test.adcreatives_image_crops_100x100.100x100": { + "file": "adcreatives_image_crops_100x100", + "schema": "schema_test", + "table": "adcreatives_image_crops_100x100" + }, + "schema_test.adcreatives_image_crops_100x72.100x72": { + "file": "adcreatives_image_crops_100x72", + "schema": "schema_test", + "table": "adcreatives_image_crops_100x72" + }, + "schema_test.adcreatives_image_crops_191x100.191x100": { + "file": "adcreatives_image_crops_191x100", + "schema": "schema_test", + "table": "adcreatives_image_crops_191x100" + }, + "schema_test.adcreatives_image_crops_400x150.400x150": { + "file": "adcreatives_image_crops_400x150", + "schema": "schema_test", + "table": "adcreatives_image_crops_400x150" + }, + "schema_test.adcreatives_image_crops_400x500.400x500": { + "file": "adcreatives_image_crops_400x500", + "schema": "schema_test", + "table": "adcreatives_image_crops_400x500" + }, + "schema_test.adcreatives_image_crops_600x360.600x360": { + "file": "adcreatives_image_crops_600x360", + "schema": "schema_test", + "table": "adcreatives_image_crops_600x360" + }, + "schema_test.adcreatives_image_crops_90x160.90x160": { + "file": "adcreatives_image_crops_90x160", + "schema": "schema_test", + "table": "adcreatives_image_crops_90x160" + }, + "schema_test.adcreatives_object_story_spec.object_story_spec": { + "file": "adcreatives_object_story_spec", + "schema": "schema_test", + "table": "adcreatives_object_story_spec" + }, + "schema_test.adcreatives_object_story_spec_link_data.link_data": { + "file": "adcreatives_object_story_spec_link_data", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec_android", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec_android" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec_ios", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec_ios" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec_ipad", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec_ipad" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec_iphone", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec_iphone" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { + "file": "adcreatives_object_story_spec_link_data_image_crops", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { + "file": "adcreatives_object_story_spec_link_data_image_crops_100x100", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_100x100" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { + "file": "adcreatives_object_story_spec_link_data_image_crops_100x72", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_100x72" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { + "file": "adcreatives_object_story_spec_link_data_image_crops_191x100", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_191x100" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { + "file": "adcreatives_object_story_spec_link_data_image_crops_400x150", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_400x150" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { + "file": "adcreatives_object_story_spec_link_data_image_crops_400x500", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_400x500" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { + "file": "adcreatives_object_story_spec_link_data_image_crops_600x360", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_600x360" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { + "file": "adcreatives_object_story_spec_link_data_image_crops_90x160", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_90x160" + }, + "schema_test.adcreatives.adcreatives": { + "file": "adcreatives", + "schema": "schema_test", + "table": "adcreatives" + }, + "schema_test.adcreatives_adlabels.adlabels": { + "file": "adcreatives_adlabels", + "schema": "schema_test", + "table": "adcreatives_adlabels" + }, + "schema_test.adcreatives_image_crops.image_crops": { + "file": "adcreatives_image_crops", + "schema": "schema_test", + "table": "adcreatives_image_crops" + }, + "schema_test.adcreatives_image_crops_100x100.100x100": { + "file": "adcreatives_image_crops_100x100", + "schema": "schema_test", + "table": "adcreatives_image_crops_100x100" + }, + "schema_test.adcreatives_image_crops_100x72.100x72": { + "file": "adcreatives_image_crops_100x72", + "schema": "schema_test", + "table": "adcreatives_image_crops_100x72" + }, + "schema_test.adcreatives_image_crops_191x100.191x100": { + "file": "adcreatives_image_crops_191x100", + "schema": "schema_test", + "table": "adcreatives_image_crops_191x100" + }, + "schema_test.adcreatives_image_crops_400x150.400x150": { + "file": "adcreatives_image_crops_400x150", + "schema": "schema_test", + "table": "adcreatives_image_crops_400x150" + }, + "schema_test.adcreatives_image_crops_400x500.400x500": { + "file": "adcreatives_image_crops_400x500", + "schema": "schema_test", + "table": "adcreatives_image_crops_400x500" + }, + "schema_test.adcreatives_image_crops_600x360.600x360": { + "file": "adcreatives_image_crops_600x360", + "schema": "schema_test", + "table": "adcreatives_image_crops_600x360" + }, + "schema_test.adcreatives_image_crops_90x160.90x160": { + "file": "adcreatives_image_crops_90x160", + "schema": "schema_test", + "table": "adcreatives_image_crops_90x160" + }, + "schema_test.adcreatives_object_story_spec.object_story_spec": { + "file": "adcreatives_object_story_spec", + "schema": "schema_test", + "table": "adcreatives_object_story_spec" + }, + "schema_test.adcreatives_object_story_spec_link_data.link_data": { + "file": "adcreatives_object_story_spec_link_data", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec_android", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec_android" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec_ios", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec_ios" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec_ipad", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec_ipad" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { + "file": "adcreatives_object_story_spec_link_data_app_link_spec_iphone", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_app_link_spec_iphone" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { + "file": "adcreatives_object_story_spec_link_data_image_crops", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { + "file": "adcreatives_object_story_spec_link_data_image_crops_100x100", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_100x100" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { + "file": "adcreatives_object_story_spec_link_data_image_crops_100x72", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_100x72" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { + "file": "adcreatives_object_story_spec_link_data_image_crops_191x100", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_191x100" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { + "file": "adcreatives_object_story_spec_link_data_image_crops_400x150", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_400x150" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { + "file": "adcreatives_object_story_spec_link_data_image_crops_400x500", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_400x500" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { + "file": "adcreatives_object_story_spec_link_data_image_crops_600x360", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_600x360" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { + "file": "adcreatives_object_story_spec_link_data_image_crops_90x160", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data_image_crops_90x160" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_postgres_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_postgres_names.json new file mode 100644 index 0000000000000..450b8a7f4bfc8 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_postgres_names.json @@ -0,0 +1,252 @@ +{ + "_airbyte_schema_test.adcreatives.adcreatives": { + "file": "adcreatives", + "schema": "_airbyte_schema_test", + "table": "adcreatives" + }, + "_airbyte_schema_test.adcreatives_adlabels.adlabels": { + "file": "adcreatives_adlabels", + "schema": "_airbyte_schema_test", + "table": "adcreatives_adlabels" + }, + "_airbyte_schema_test.adcreatives_image_crops.image_crops": { + "file": "adcreatives_image_crops", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops" + }, + "_airbyte_schema_test.adcreatives_image_crops_100x100.100x100": { + "file": "adcreatives_image_crops_100x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_100x100" + }, + "_airbyte_schema_test.adcreatives_image_crops_100x72.100x72": { + "file": "adcreatives_image_crops_100x72", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_100x72" + }, + "_airbyte_schema_test.adcreatives_image_crops_191x100.191x100": { + "file": "adcreatives_image_crops_191x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_191x100" + }, + "_airbyte_schema_test.adcreatives_image_crops_400x150.400x150": { + "file": "adcreatives_image_crops_400x150", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_400x150" + }, + "_airbyte_schema_test.adcreatives_image_crops_400x500.400x500": { + "file": "adcreatives_image_crops_400x500", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_400x500" + }, + "_airbyte_schema_test.adcreatives_image_crops_600x360.600x360": { + "file": "adcreatives_image_crops_600x360", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_600x360" + }, + "_airbyte_schema_test.adcreatives_image_crops_90x160.90x160": { + "file": "adcreatives_image_crops_90x160", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_90x160" + }, + "_airbyte_schema_test.adcreatives_object_story_spec.object_story_spec": { + "file": "adcreatives_object_story_spec", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data.link_data": { + "file": "adcreatives_object_story_spec_link_data", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { + "file": "adcreatives_object_s__nk_data_app_link_spec", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__nk_data_app_link_spec" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { + "file": "adcreatives_object_s__app_link_spec_android", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__app_link_spec_android" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { + "file": "adcreatives_object_s__ata_app_link_spec_ios", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__ata_app_link_spec_ios" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { + "file": "adcreatives_object_s__ta_app_link_spec_ipad", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__ta_app_link_spec_ipad" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { + "file": "adcreatives_object_s___app_link_spec_iphone", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s___app_link_spec_iphone" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { + "file": "adcreatives_object_s__link_data_image_crops", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__link_data_image_crops" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { + "file": "adcreatives_object_s__a_image_crops_100x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__a_image_crops_100x100" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { + "file": "adcreatives_object_s__ta_image_crops_100x72", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__ta_image_crops_100x72" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { + "file": "adcreatives_object_s__a_image_crops_191x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__a_image_crops_191x100" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { + "file": "adcreatives_object_s__a_image_crops_400x150", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__a_image_crops_400x150" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { + "file": "adcreatives_object_s__a_image_crops_400x500", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__a_image_crops_400x500" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { + "file": "adcreatives_object_s__a_image_crops_600x360", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__a_image_crops_600x360" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { + "file": "adcreatives_object_s__ta_image_crops_90x160", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_s__ta_image_crops_90x160" + }, + "schema_test.adcreatives.adcreatives": { + "file": "adcreatives", + "schema": "schema_test", + "table": "adcreatives" + }, + "schema_test.adcreatives_adlabels.adlabels": { + "file": "adcreatives_adlabels", + "schema": "schema_test", + "table": "adcreatives_adlabels" + }, + "schema_test.adcreatives_image_crops.image_crops": { + "file": "adcreatives_image_crops", + "schema": "schema_test", + "table": "adcreatives_image_crops" + }, + "schema_test.adcreatives_image_crops_100x100.100x100": { + "file": "adcreatives_image_crops_100x100", + "schema": "schema_test", + "table": "adcreatives_image_crops_100x100" + }, + "schema_test.adcreatives_image_crops_100x72.100x72": { + "file": "adcreatives_image_crops_100x72", + "schema": "schema_test", + "table": "adcreatives_image_crops_100x72" + }, + "schema_test.adcreatives_image_crops_191x100.191x100": { + "file": "adcreatives_image_crops_191x100", + "schema": "schema_test", + "table": "adcreatives_image_crops_191x100" + }, + "schema_test.adcreatives_image_crops_400x150.400x150": { + "file": "adcreatives_image_crops_400x150", + "schema": "schema_test", + "table": "adcreatives_image_crops_400x150" + }, + "schema_test.adcreatives_image_crops_400x500.400x500": { + "file": "adcreatives_image_crops_400x500", + "schema": "schema_test", + "table": "adcreatives_image_crops_400x500" + }, + "schema_test.adcreatives_image_crops_600x360.600x360": { + "file": "adcreatives_image_crops_600x360", + "schema": "schema_test", + "table": "adcreatives_image_crops_600x360" + }, + "schema_test.adcreatives_image_crops_90x160.90x160": { + "file": "adcreatives_image_crops_90x160", + "schema": "schema_test", + "table": "adcreatives_image_crops_90x160" + }, + "schema_test.adcreatives_object_story_spec.object_story_spec": { + "file": "adcreatives_object_story_spec", + "schema": "schema_test", + "table": "adcreatives_object_story_spec" + }, + "schema_test.adcreatives_object_story_spec_link_data.link_data": { + "file": "adcreatives_object_story_spec_link_data", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { + "file": "adcreatives_object_s__nk_data_app_link_spec", + "schema": "schema_test", + "table": "adcreatives_object_s__nk_data_app_link_spec" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { + "file": "adcreatives_object_s__app_link_spec_android", + "schema": "schema_test", + "table": "adcreatives_object_s__app_link_spec_android" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { + "file": "adcreatives_object_s__ata_app_link_spec_ios", + "schema": "schema_test", + "table": "adcreatives_object_s__ata_app_link_spec_ios" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { + "file": "adcreatives_object_s__ta_app_link_spec_ipad", + "schema": "schema_test", + "table": "adcreatives_object_s__ta_app_link_spec_ipad" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { + "file": "adcreatives_object_s___app_link_spec_iphone", + "schema": "schema_test", + "table": "adcreatives_object_s___app_link_spec_iphone" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { + "file": "adcreatives_object_s__link_data_image_crops", + "schema": "schema_test", + "table": "adcreatives_object_s__link_data_image_crops" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { + "file": "adcreatives_object_s__a_image_crops_100x100", + "schema": "schema_test", + "table": "adcreatives_object_s__a_image_crops_100x100" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { + "file": "adcreatives_object_s__ta_image_crops_100x72", + "schema": "schema_test", + "table": "adcreatives_object_s__ta_image_crops_100x72" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { + "file": "adcreatives_object_s__a_image_crops_191x100", + "schema": "schema_test", + "table": "adcreatives_object_s__a_image_crops_191x100" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { + "file": "adcreatives_object_s__a_image_crops_400x150", + "schema": "schema_test", + "table": "adcreatives_object_s__a_image_crops_400x150" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { + "file": "adcreatives_object_s__a_image_crops_400x500", + "schema": "schema_test", + "table": "adcreatives_object_s__a_image_crops_400x500" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { + "file": "adcreatives_object_s__a_image_crops_600x360", + "schema": "schema_test", + "table": "adcreatives_object_s__a_image_crops_600x360" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { + "file": "adcreatives_object_s__ta_image_crops_90x160", + "schema": "schema_test", + "table": "adcreatives_object_s__ta_image_crops_90x160" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_tidb_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_tidb_names.json new file mode 100644 index 0000000000000..2bbb864cc4d87 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/nested_catalog_expected_tidb_names.json @@ -0,0 +1,252 @@ +{ + "_airbyte_schema_test.adcreatives.adcreatives": { + "file": "adcreatives", + "schema": "_airbyte_schema_test", + "table": "adcreatives" + }, + "_airbyte_schema_test.adcreatives_adlabels.adlabels": { + "file": "adcreatives_adlabels", + "schema": "_airbyte_schema_test", + "table": "adcreatives_adlabels" + }, + "_airbyte_schema_test.adcreatives_image_crops.image_crops": { + "file": "adcreatives_image_crops", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops" + }, + "_airbyte_schema_test.adcreatives_image_crops_100x100.100x100": { + "file": "adcreatives_image_crops_100x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_100x100" + }, + "_airbyte_schema_test.adcreatives_image_crops_100x72.100x72": { + "file": "adcreatives_image_crops_100x72", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_100x72" + }, + "_airbyte_schema_test.adcreatives_image_crops_191x100.191x100": { + "file": "adcreatives_image_crops_191x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_191x100" + }, + "_airbyte_schema_test.adcreatives_image_crops_400x150.400x150": { + "file": "adcreatives_image_crops_400x150", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_400x150" + }, + "_airbyte_schema_test.adcreatives_image_crops_400x500.400x500": { + "file": "adcreatives_image_crops_400x500", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_400x500" + }, + "_airbyte_schema_test.adcreatives_image_crops_600x360.600x360": { + "file": "adcreatives_image_crops_600x360", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_600x360" + }, + "_airbyte_schema_test.adcreatives_image_crops_90x160.90x160": { + "file": "adcreatives_image_crops_90x160", + "schema": "_airbyte_schema_test", + "table": "adcreatives_image_crops_90x160" + }, + "_airbyte_schema_test.adcreatives_object_story_spec.object_story_spec": { + "file": "adcreatives_object_story_spec", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data.link_data": { + "file": "adcreatives_object_story_spec_link_data", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_story_spec_link_data" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { + "file": "adcreatives_object_st__nk_data_app_link_spec", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__nk_data_app_link_spec" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { + "file": "adcreatives_object_st__app_link_spec_android", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__app_link_spec_android" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { + "file": "adcreatives_object_st__ata_app_link_spec_ios", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__ata_app_link_spec_ios" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { + "file": "adcreatives_object_st__ta_app_link_spec_ipad", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__ta_app_link_spec_ipad" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { + "file": "adcreatives_object_st___app_link_spec_iphone", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st___app_link_spec_iphone" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { + "file": "adcreatives_object_st__link_data_image_crops", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__link_data_image_crops" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { + "file": "adcreatives_object_st__a_image_crops_100x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__a_image_crops_100x100" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { + "file": "adcreatives_object_st__ta_image_crops_100x72", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__ta_image_crops_100x72" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { + "file": "adcreatives_object_st__a_image_crops_191x100", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__a_image_crops_191x100" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { + "file": "adcreatives_object_st__a_image_crops_400x150", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__a_image_crops_400x150" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { + "file": "adcreatives_object_st__a_image_crops_400x500", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__a_image_crops_400x500" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { + "file": "adcreatives_object_st__a_image_crops_600x360", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__a_image_crops_600x360" + }, + "_airbyte_schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { + "file": "adcreatives_object_st__ta_image_crops_90x160", + "schema": "_airbyte_schema_test", + "table": "adcreatives_object_st__ta_image_crops_90x160" + }, + "schema_test.adcreatives.adcreatives": { + "file": "adcreatives", + "schema": "schema_test", + "table": "adcreatives" + }, + "schema_test.adcreatives_adlabels.adlabels": { + "file": "adcreatives_adlabels", + "schema": "schema_test", + "table": "adcreatives_adlabels" + }, + "schema_test.adcreatives_image_crops.image_crops": { + "file": "adcreatives_image_crops", + "schema": "schema_test", + "table": "adcreatives_image_crops" + }, + "schema_test.adcreatives_image_crops_100x100.100x100": { + "file": "adcreatives_image_crops_100x100", + "schema": "schema_test", + "table": "adcreatives_image_crops_100x100" + }, + "schema_test.adcreatives_image_crops_100x72.100x72": { + "file": "adcreatives_image_crops_100x72", + "schema": "schema_test", + "table": "adcreatives_image_crops_100x72" + }, + "schema_test.adcreatives_image_crops_191x100.191x100": { + "file": "adcreatives_image_crops_191x100", + "schema": "schema_test", + "table": "adcreatives_image_crops_191x100" + }, + "schema_test.adcreatives_image_crops_400x150.400x150": { + "file": "adcreatives_image_crops_400x150", + "schema": "schema_test", + "table": "adcreatives_image_crops_400x150" + }, + "schema_test.adcreatives_image_crops_400x500.400x500": { + "file": "adcreatives_image_crops_400x500", + "schema": "schema_test", + "table": "adcreatives_image_crops_400x500" + }, + "schema_test.adcreatives_image_crops_600x360.600x360": { + "file": "adcreatives_image_crops_600x360", + "schema": "schema_test", + "table": "adcreatives_image_crops_600x360" + }, + "schema_test.adcreatives_image_crops_90x160.90x160": { + "file": "adcreatives_image_crops_90x160", + "schema": "schema_test", + "table": "adcreatives_image_crops_90x160" + }, + "schema_test.adcreatives_object_story_spec.object_story_spec": { + "file": "adcreatives_object_story_spec", + "schema": "schema_test", + "table": "adcreatives_object_story_spec" + }, + "schema_test.adcreatives_object_story_spec_link_data.link_data": { + "file": "adcreatives_object_story_spec_link_data", + "schema": "schema_test", + "table": "adcreatives_object_story_spec_link_data" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec.app_link_spec": { + "file": "adcreatives_object_st__nk_data_app_link_spec", + "schema": "schema_test", + "table": "adcreatives_object_st__nk_data_app_link_spec" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_android.android": { + "file": "adcreatives_object_st__app_link_spec_android", + "schema": "schema_test", + "table": "adcreatives_object_st__app_link_spec_android" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ios.ios": { + "file": "adcreatives_object_st__ata_app_link_spec_ios", + "schema": "schema_test", + "table": "adcreatives_object_st__ata_app_link_spec_ios" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_ipad.ipad": { + "file": "adcreatives_object_st__ta_app_link_spec_ipad", + "schema": "schema_test", + "table": "adcreatives_object_st__ta_app_link_spec_ipad" + }, + "schema_test.adcreatives_object_story_spec_link_data_app_link_spec_iphone.iphone": { + "file": "adcreatives_object_st___app_link_spec_iphone", + "schema": "schema_test", + "table": "adcreatives_object_st___app_link_spec_iphone" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops.image_crops": { + "file": "adcreatives_object_st__link_data_image_crops", + "schema": "schema_test", + "table": "adcreatives_object_st__link_data_image_crops" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x100.100x100": { + "file": "adcreatives_object_st__a_image_crops_100x100", + "schema": "schema_test", + "table": "adcreatives_object_st__a_image_crops_100x100" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_100x72.100x72": { + "file": "adcreatives_object_st__ta_image_crops_100x72", + "schema": "schema_test", + "table": "adcreatives_object_st__ta_image_crops_100x72" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_191x100.191x100": { + "file": "adcreatives_object_st__a_image_crops_191x100", + "schema": "schema_test", + "table": "adcreatives_object_st__a_image_crops_191x100" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x150.400x150": { + "file": "adcreatives_object_st__a_image_crops_400x150", + "schema": "schema_test", + "table": "adcreatives_object_st__a_image_crops_400x150" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_400x500.400x500": { + "file": "adcreatives_object_st__a_image_crops_400x500", + "schema": "schema_test", + "table": "adcreatives_object_st__a_image_crops_400x500" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_600x360.600x360": { + "file": "adcreatives_object_st__a_image_crops_600x360", + "schema": "schema_test", + "table": "adcreatives_object_st__a_image_crops_600x360" + }, + "schema_test.adcreatives_object_story_spec_link_data_image_crops_90x160.90x160": { + "file": "adcreatives_object_st__ta_image_crops_90x160", + "schema": "schema_test", + "table": "adcreatives_object_st__ta_image_crops_90x160" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog.json new file mode 100644 index 0000000000000..336cf17d71941 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog.json @@ -0,0 +1,92 @@ +{ + "streams": [ + { + "stream": { + "name": "simple stream name", + "namespace": "namespace", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + } + } + }, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "cursor_field": [], + "destination_sync_mode": "append" + }, + { + "stream": { + "name": "simple", + "namespace": "namespace", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + }, + "stream_name": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + } + } + } + } + }, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "cursor_field": [], + "destination_sync_mode": "append" + }, + { + "stream": { + "name": "simple_b94_stream_name", + "namespace": "other_namespace", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + } + } + }, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "cursor_field": [], + "destination_sync_mode": "append" + }, + { + "stream": { + "name": "simple_b94_stream_name", + "namespace": "yet_another_namespace_with_a_very_long_name", + "json_schema": { + "type": ["null", "object"], + "properties": { + "id": { + "type": ["null", "string"] + } + } + }, + "supported_sync_modes": ["incremental"], + "source_defined_cursor": true, + "default_cursor_field": [] + }, + "sync_mode": "incremental", + "cursor_field": [], + "destination_sync_mode": "append" + } + ] +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_clickhouse_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_clickhouse_names.json new file mode 100644 index 0000000000000..047c8cb29a298 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_clickhouse_names.json @@ -0,0 +1,52 @@ +{ + "_airbyte_namespace.simple stream name.simple stream name": { + "file": "simple_stream_name_f35", + "schema": "_airbyte_namespace", + "table": "simple_stream_name_f35" + }, + "_airbyte_namespace.simple.simple": { + "file": "simple", + "schema": "_airbyte_namespace", + "table": "simple" + }, + "_airbyte_namespace.simple_stream_name.stream_name": { + "file": "_airbyte_namespace_simple_b94_stream_name", + "schema": "_airbyte_namespace", + "table": "simple_b94_stream_name" + }, + "_airbyte_other_namespace.simple_b94_stream_name.simple_b94_stream_name": { + "file": "_airbyte_other_names__e_b94_stream_name_f9d", + "schema": "_airbyte_other_namespace", + "table": "simple_b94_stream_name" + }, + "_airbyte_yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { + "file": "_airbyte_yet_another__e_b94_stream_name_bae", + "schema": "_airbyte_yet_another_namespace_with_a_very_long_name", + "table": "simple_b94_stream_name" + }, + "namespace.simple stream name.simple stream name": { + "file": "simple_stream_name_f35", + "schema": "namespace", + "table": "simple_stream_name_f35" + }, + "namespace.simple.simple": { + "file": "simple", + "schema": "namespace", + "table": "simple" + }, + "namespace.simple_stream_name.stream_name": { + "file": "namespace_simple_b94_stream_name", + "schema": "namespace", + "table": "simple_b94_stream_name" + }, + "other_namespace.simple_b94_stream_name.simple_b94_stream_name": { + "file": "other_namespace_simple_b94_stream_name", + "schema": "other_namespace", + "table": "simple_b94_stream_name" + }, + "yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { + "file": "yet_another_namespac__e_b94_stream_name_5d1", + "schema": "yet_another_namespace_with_a_very_long_name", + "table": "simple_b94_stream_name" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_duckdb_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_duckdb_names.json new file mode 100644 index 0000000000000..0ae55a762fd8c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_duckdb_names.json @@ -0,0 +1,52 @@ +{ + "_airbyte_namespace.simple stream name.simple stream name": { + "file": "simple_stream_name_f35", + "schema": "_airbyte_namespace", + "table": "simple_stream_name_f35" + }, + "namespace.simple stream name.simple stream name": { + "file": "simple_stream_name_f35", + "schema": "namespace", + "table": "simple_stream_name_f35" + }, + "_airbyte_namespace.simple_stream_name.stream_name": { + "file": "_airbyte_namespace_simple_b94_stream_name", + "schema": "_airbyte_namespace", + "table": "simple_b94_stream_name" + }, + "namespace.simple_stream_name.stream_name": { + "file": "namespace_simple_b94_stream_name", + "schema": "namespace", + "table": "simple_b94_stream_name" + }, + "_airbyte_namespace.simple.simple": { + "file": "simple", + "schema": "_airbyte_namespace", + "table": "simple" + }, + "namespace.simple.simple": { + "file": "simple", + "schema": "namespace", + "table": "simple" + }, + "_airbyte_other_namespace.simple_b94_stream_name.simple_b94_stream_name": { + "file": "_airbyte_other_namesp__e_b94_stream_name_f9d", + "schema": "_airbyte_other_namespace", + "table": "simple_b94_stream_name" + }, + "other_namespace.simple_b94_stream_name.simple_b94_stream_name": { + "file": "other_namespace_simple_b94_stream_name", + "schema": "other_namespace", + "table": "simple_b94_stream_name" + }, + "_airbyte_yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { + "file": "_airbyte_yet_another___e_b94_stream_name_bae", + "schema": "_airbyte_yet_another_namespace_with_a_very_long_name", + "table": "simple_b94_stream_name" + }, + "yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { + "file": "yet_another_namespace__e_b94_stream_name_5d1", + "schema": "yet_another_namespace_with_a_very_long_name", + "table": "simple_b94_stream_name" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_mssql_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_mssql_names.json new file mode 100644 index 0000000000000..0ae55a762fd8c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_mssql_names.json @@ -0,0 +1,52 @@ +{ + "_airbyte_namespace.simple stream name.simple stream name": { + "file": "simple_stream_name_f35", + "schema": "_airbyte_namespace", + "table": "simple_stream_name_f35" + }, + "namespace.simple stream name.simple stream name": { + "file": "simple_stream_name_f35", + "schema": "namespace", + "table": "simple_stream_name_f35" + }, + "_airbyte_namespace.simple_stream_name.stream_name": { + "file": "_airbyte_namespace_simple_b94_stream_name", + "schema": "_airbyte_namespace", + "table": "simple_b94_stream_name" + }, + "namespace.simple_stream_name.stream_name": { + "file": "namespace_simple_b94_stream_name", + "schema": "namespace", + "table": "simple_b94_stream_name" + }, + "_airbyte_namespace.simple.simple": { + "file": "simple", + "schema": "_airbyte_namespace", + "table": "simple" + }, + "namespace.simple.simple": { + "file": "simple", + "schema": "namespace", + "table": "simple" + }, + "_airbyte_other_namespace.simple_b94_stream_name.simple_b94_stream_name": { + "file": "_airbyte_other_namesp__e_b94_stream_name_f9d", + "schema": "_airbyte_other_namespace", + "table": "simple_b94_stream_name" + }, + "other_namespace.simple_b94_stream_name.simple_b94_stream_name": { + "file": "other_namespace_simple_b94_stream_name", + "schema": "other_namespace", + "table": "simple_b94_stream_name" + }, + "_airbyte_yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { + "file": "_airbyte_yet_another___e_b94_stream_name_bae", + "schema": "_airbyte_yet_another_namespace_with_a_very_long_name", + "table": "simple_b94_stream_name" + }, + "yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { + "file": "yet_another_namespace__e_b94_stream_name_5d1", + "schema": "yet_another_namespace_with_a_very_long_name", + "table": "simple_b94_stream_name" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_mysql_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_mysql_names.json new file mode 100644 index 0000000000000..0ae55a762fd8c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_mysql_names.json @@ -0,0 +1,52 @@ +{ + "_airbyte_namespace.simple stream name.simple stream name": { + "file": "simple_stream_name_f35", + "schema": "_airbyte_namespace", + "table": "simple_stream_name_f35" + }, + "namespace.simple stream name.simple stream name": { + "file": "simple_stream_name_f35", + "schema": "namespace", + "table": "simple_stream_name_f35" + }, + "_airbyte_namespace.simple_stream_name.stream_name": { + "file": "_airbyte_namespace_simple_b94_stream_name", + "schema": "_airbyte_namespace", + "table": "simple_b94_stream_name" + }, + "namespace.simple_stream_name.stream_name": { + "file": "namespace_simple_b94_stream_name", + "schema": "namespace", + "table": "simple_b94_stream_name" + }, + "_airbyte_namespace.simple.simple": { + "file": "simple", + "schema": "_airbyte_namespace", + "table": "simple" + }, + "namespace.simple.simple": { + "file": "simple", + "schema": "namespace", + "table": "simple" + }, + "_airbyte_other_namespace.simple_b94_stream_name.simple_b94_stream_name": { + "file": "_airbyte_other_namesp__e_b94_stream_name_f9d", + "schema": "_airbyte_other_namespace", + "table": "simple_b94_stream_name" + }, + "other_namespace.simple_b94_stream_name.simple_b94_stream_name": { + "file": "other_namespace_simple_b94_stream_name", + "schema": "other_namespace", + "table": "simple_b94_stream_name" + }, + "_airbyte_yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { + "file": "_airbyte_yet_another___e_b94_stream_name_bae", + "schema": "_airbyte_yet_another_namespace_with_a_very_long_name", + "table": "simple_b94_stream_name" + }, + "yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { + "file": "yet_another_namespace__e_b94_stream_name_5d1", + "schema": "yet_another_namespace_with_a_very_long_name", + "table": "simple_b94_stream_name" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_names.json new file mode 100644 index 0000000000000..ec95f346d6d74 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_names.json @@ -0,0 +1,52 @@ +{ + "_airbyte_namespace.simple stream name.simple stream name": { + "file": "simple_stream_name_f35", + "schema": "_airbyte_namespace", + "table": "simple_stream_name_f35" + }, + "_airbyte_namespace.simple.simple": { + "file": "simple", + "schema": "_airbyte_namespace", + "table": "simple" + }, + "_airbyte_namespace.simple_stream_name.stream_name": { + "file": "_airbyte_namespace_simple_b94_stream_name", + "schema": "_airbyte_namespace", + "table": "simple_b94_stream_name" + }, + "_airbyte_other_namespace.simple_b94_stream_name.simple_b94_stream_name": { + "file": "_airbyte_other_namespace_simple_b94_stream_name", + "schema": "_airbyte_other_namespace", + "table": "simple_b94_stream_name" + }, + "_airbyte_yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { + "file": "_airbyte_yet_another_namespace_with_a_very_long_name_simple_b94_stream_name", + "schema": "_airbyte_yet_another_namespace_with_a_very_long_name", + "table": "simple_b94_stream_name" + }, + "namespace.simple stream name.simple stream name": { + "file": "simple_stream_name_f35", + "schema": "namespace", + "table": "simple_stream_name_f35" + }, + "namespace.simple.simple": { + "file": "simple", + "schema": "namespace", + "table": "simple" + }, + "namespace.simple_stream_name.stream_name": { + "file": "namespace_simple_b94_stream_name", + "schema": "namespace", + "table": "simple_b94_stream_name" + }, + "other_namespace.simple_b94_stream_name.simple_b94_stream_name": { + "file": "other_namespace_simple_b94_stream_name", + "schema": "other_namespace", + "table": "simple_b94_stream_name" + }, + "yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { + "file": "yet_another_namespace_with_a_very_long_name_simple_b94_stream_name", + "schema": "yet_another_namespace_with_a_very_long_name", + "table": "simple_b94_stream_name" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_oracle_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_oracle_names.json new file mode 100644 index 0000000000000..397069ffdb961 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_oracle_names.json @@ -0,0 +1,27 @@ +{ + "namespace.simple stream name.simple stream name": { + "file": "simple_stream_name_f35", + "schema": "namespace", + "table": "simple_stream_name_f35" + }, + "namespace.simple.simple": { + "file": "simple", + "schema": "namespace", + "table": "simple" + }, + "namespace.simple_stream_name.stream_name": { + "file": "namespace_simple_b94_stream_name", + "schema": "namespace", + "table": "simple_b94_stream_name" + }, + "other_namespace.simple_b94_stream_name.simple_b94_stream_name": { + "file": "other_namespace_simple_b94_stream_name", + "schema": "other_namespace", + "table": "simple_b94_stream_name" + }, + "yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { + "file": "yet_another_namespace_with_a_very_long_name_simple_b94_stream_name", + "schema": "yet_another_namespace_with_a_very_long_name", + "table": "simple_b94_stream_name" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_postgres_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_postgres_names.json new file mode 100644 index 0000000000000..047c8cb29a298 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_postgres_names.json @@ -0,0 +1,52 @@ +{ + "_airbyte_namespace.simple stream name.simple stream name": { + "file": "simple_stream_name_f35", + "schema": "_airbyte_namespace", + "table": "simple_stream_name_f35" + }, + "_airbyte_namespace.simple.simple": { + "file": "simple", + "schema": "_airbyte_namespace", + "table": "simple" + }, + "_airbyte_namespace.simple_stream_name.stream_name": { + "file": "_airbyte_namespace_simple_b94_stream_name", + "schema": "_airbyte_namespace", + "table": "simple_b94_stream_name" + }, + "_airbyte_other_namespace.simple_b94_stream_name.simple_b94_stream_name": { + "file": "_airbyte_other_names__e_b94_stream_name_f9d", + "schema": "_airbyte_other_namespace", + "table": "simple_b94_stream_name" + }, + "_airbyte_yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { + "file": "_airbyte_yet_another__e_b94_stream_name_bae", + "schema": "_airbyte_yet_another_namespace_with_a_very_long_name", + "table": "simple_b94_stream_name" + }, + "namespace.simple stream name.simple stream name": { + "file": "simple_stream_name_f35", + "schema": "namespace", + "table": "simple_stream_name_f35" + }, + "namespace.simple.simple": { + "file": "simple", + "schema": "namespace", + "table": "simple" + }, + "namespace.simple_stream_name.stream_name": { + "file": "namespace_simple_b94_stream_name", + "schema": "namespace", + "table": "simple_b94_stream_name" + }, + "other_namespace.simple_b94_stream_name.simple_b94_stream_name": { + "file": "other_namespace_simple_b94_stream_name", + "schema": "other_namespace", + "table": "simple_b94_stream_name" + }, + "yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { + "file": "yet_another_namespac__e_b94_stream_name_5d1", + "schema": "yet_another_namespace_with_a_very_long_name", + "table": "simple_b94_stream_name" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_tidb_names.json b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_tidb_names.json new file mode 100644 index 0000000000000..0ae55a762fd8c --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/resources/un-nesting_collisions_catalog_expected_tidb_names.json @@ -0,0 +1,52 @@ +{ + "_airbyte_namespace.simple stream name.simple stream name": { + "file": "simple_stream_name_f35", + "schema": "_airbyte_namespace", + "table": "simple_stream_name_f35" + }, + "namespace.simple stream name.simple stream name": { + "file": "simple_stream_name_f35", + "schema": "namespace", + "table": "simple_stream_name_f35" + }, + "_airbyte_namespace.simple_stream_name.stream_name": { + "file": "_airbyte_namespace_simple_b94_stream_name", + "schema": "_airbyte_namespace", + "table": "simple_b94_stream_name" + }, + "namespace.simple_stream_name.stream_name": { + "file": "namespace_simple_b94_stream_name", + "schema": "namespace", + "table": "simple_b94_stream_name" + }, + "_airbyte_namespace.simple.simple": { + "file": "simple", + "schema": "_airbyte_namespace", + "table": "simple" + }, + "namespace.simple.simple": { + "file": "simple", + "schema": "namespace", + "table": "simple" + }, + "_airbyte_other_namespace.simple_b94_stream_name.simple_b94_stream_name": { + "file": "_airbyte_other_namesp__e_b94_stream_name_f9d", + "schema": "_airbyte_other_namespace", + "table": "simple_b94_stream_name" + }, + "other_namespace.simple_b94_stream_name.simple_b94_stream_name": { + "file": "other_namespace_simple_b94_stream_name", + "schema": "other_namespace", + "table": "simple_b94_stream_name" + }, + "_airbyte_yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { + "file": "_airbyte_yet_another___e_b94_stream_name_bae", + "schema": "_airbyte_yet_another_namespace_with_a_very_long_name", + "table": "simple_b94_stream_name" + }, + "yet_another_namespace_with_a_very_long_name.simple_b94_stream_name.simple_b94_stream_name": { + "file": "yet_another_namespace__e_b94_stream_name_5d1", + "schema": "yet_another_namespace_with_a_very_long_name", + "table": "simple_b94_stream_name" + } +} diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/test_destination_name_transformer.py b/airbyte-integrations/bases/base-normalization/unit_tests/test_destination_name_transformer.py new file mode 100644 index 0000000000000..22e590b29fab9 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/test_destination_name_transformer.py @@ -0,0 +1,251 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import os + +import pytest +from normalization.destination_type import DestinationType +from normalization.transform_catalog.destination_name_transformer import ( + DestinationNameTransformer, + strip_accents, + transform_standard_naming, +) + + +@pytest.fixture(scope="function", autouse=True) +def before_tests(request): + # This makes the test run whether it is executed from the tests folder (with pytest/gradle) + # or from the base-normalization folder (through pycharm) + unit_tests_dir = os.path.join(request.fspath.dirname, "unit_tests") + if os.path.exists(unit_tests_dir): + os.chdir(unit_tests_dir) + else: + os.chdir(request.fspath.dirname) + yield + os.chdir(request.config.invocation_dir) + + +@pytest.mark.parametrize( + "input_str, destination_type, expected", + [ + # Contains Space character + ("Hello World", "Postgres", True), + ("Hello World", "BigQuery", False), + ("Hello World", "Snowflake", True), + ("Hello World", "Redshift", True), + ("Hello World", "MySQL", True), + ("Hello World", "MSSQL", True), + ("Hello World", "TiDB", True), + ("Hello World", "DuckDB", True), + # Reserved Word for BigQuery and MySQL only + ("Groups", "Postgres", False), + ("Groups", "BigQuery", True), + ("Groups", "Snowflake", False), + ("Groups", "Redshift", False), + ("Groups", "MySQL", True), + ("Groups", "MSSQL", False), + ("Groups", "TiDB", True), + ("Groups", "DuckDB", True), + # Doesnt start with alpha or underscore + ("100x200", "Postgres", True), + ("100x200", "BigQuery", False), + ("100x200", "Snowflake", True), + ("100x200", "Redshift", True), + ("100x200", "MySQL", True), + ("100x200", "MSSQL", True), + ("100x200", "TiDB", True), + ("100x200", "DuckDB", True), + # Contains non alpha numeric + ("post.wall", "Postgres", True), + ("post.wall", "BigQuery", False), + ("post.wall", "Snowflake", True), + ("post.wall", "Redshift", True), + ("post.wall", "MySQL", True), + ("post.wall", "MSSQL", True), + ("post.wall", "TiDB", True), + ("post.wall", "DuckDB", True), + ], +) +def test_needs_quote(input_str: str, destination_type: str, expected: bool): + name_transformer = DestinationNameTransformer(DestinationType.from_string(destination_type)) + assert name_transformer.needs_quotes(input_str) == expected + + +@pytest.mark.parametrize( + "input_str, expected", + [ + ("Hello World!", "Hello World!"), + ("àêî öÙ", "aei oU"), + ], +) +def test_strip_accents(input_str: str, expected: str): + assert strip_accents(input_str) == expected + + +@pytest.mark.parametrize( + "expected, input_str", + [ + ("__identifier_name", "__identifier_name"), + ("IDENTIFIER_NAME", "IDENTIFIER_NAME"), + ("123identifier_name", "123identifier_name"), + ("i0d0e0n0t0i0f0i0e0r0n0a0m0e", "i0d0e0n0t0i0f0i0e0r0n0a0m0e"), + ("_identifier_name", ",identifier+name"), + ("identifier_name", "identifiêr name"), + ("a_unicode_name__", "a_unicode_name_文"), + ("identifier__name__", "identifier__name__"), + ("identifier_name_weee", "identifier-name.weee"), + ("_identifier_name_", '"identifier name"'), + ("identifier_name", "identifier name"), + ("identifier_", "identifier%"), + ("_identifier_", "`identifier`"), + ], +) +def test_transform_standard_naming(input_str: str, expected: str): + assert transform_standard_naming(input_str) == expected + + +@pytest.mark.parametrize( + "input_str, destination_type, expected, expected_in_jinja", + [ + # Case sensitive names + ("Identifier Name", "Postgres", "{{ adapter.quote('Identifier Name') }}", "adapter.quote('Identifier Name')"), + ("Identifier Name", "BigQuery", "Identifier_Name", "'Identifier_Name'"), + ("Identifier Name", "Snowflake", "{{ adapter.quote('Identifier Name') }}", "adapter.quote('Identifier Name')"), + ("Identifier Name", "Redshift", "{{ adapter.quote('identifier name') }}", "adapter.quote('identifier name')"), + ("Identifier Name", "MySQL", "{{ adapter.quote('Identifier Name') }}", "adapter.quote('Identifier Name')"), + ("Identifier Name", "MSSQL", "{{ adapter.quote('Identifier Name') }}", "adapter.quote('Identifier Name')"), + ("Identifier Name", "TiDB", "{{ adapter.quote('Identifier Name') }}", "adapter.quote('Identifier Name')"), + ("Identifier Name", "DuckDB", "{{ adapter.quote('Identifier Name') }}", "adapter.quote('Identifier Name')"), + # Reserved Word for BigQuery and MySQL only + ("Groups", "Postgres", "groups", "'groups'"), + ("Groups", "BigQuery", "{{ adapter.quote('Groups') }}", "adapter.quote('Groups')"), + ("Groups", "Snowflake", "GROUPS", "'GROUPS'"), + ("Groups", "Redshift", "groups", "'groups'"), + ("Groups", "MySQL", "{{ adapter.quote('Groups') }}", "adapter.quote('Groups')"), + ("Groups", "MSSQL", "groups", "'groups'"), + ("Groups", "TiDB", "{{ adapter.quote('Groups') }}", "adapter.quote('Groups')"), + ("Groups", "DuckDB", "{{ adapter.quote('Groups') }}", "adapter.quote('Groups')"), + ], +) +def test_normalize_column_name(input_str: str, destination_type: str, expected: str, expected_in_jinja: str): + t = DestinationType.from_string(destination_type) + assert DestinationNameTransformer(t).normalize_column_name(input_str, in_jinja=False) == expected + assert DestinationNameTransformer(t).normalize_column_name(input_str, in_jinja=True) == expected_in_jinja + + +@pytest.mark.parametrize( + "input_str, expected", + [ + # below the limit + ("Aaaa_Bbbb_Cccc_Dddd_Eeee_Ffff_Gggg_Hhhh", "Aaaa_Bbbb_Cccc_Dddd_Eeee_Ffff_Gggg_Hhhh"), + # at the limit + ("Aaaa_Bbbb_Cccc_Dddd_Eeee_Ffff_Gggg_Hhhh_Iii", "Aaaa_Bbbb_Cccc_Dddd_Eeee_Ffff_Gggg_Hhhh_Iii"), + # over the limit + ("Aaaa_Bbbb_Cccc_Dddd_Eeee_Ffff_Gggg_Hhhh_Iiii", "Aaaa_Bbbb_Cccc_Dddd___e_Ffff_Gggg_Hhhh_Iiii"), + ("Aaaa_Bbbb_Cccc_Dddd_Eeee_a_very_long_name_Ffff_Gggg_Hhhh_Iiii", "Aaaa_Bbbb_Cccc_Dddd___e_Ffff_Gggg_Hhhh_Iiii"), + ("Aaaa_Bbbb_Cccc_Dddd_Eeee_Ffff_Gggg_Hhhh_Iiii_Jjjj_Kkkk", "Aaaa_Bbbb_Cccc_Dddd___g_Hhhh_Iiii_Jjjj_Kkkk"), + ("ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz_0123456789", "ABCDEFGHIJKLMNOPQRST__qrstuvwxyz_0123456789"), + ], +) +def test_truncate_identifier(input_str: str, expected: str): + """ + Rules about truncations, for example for both of these strings which are too long for the postgres 64 limit: + - `Aaaa_Bbbb_Cccc_Dddd_Eeee_Ffff_Gggg_Hhhh_Iiii` + - `Aaaa_Bbbb_Cccc_Dddd_Eeee_a_very_long_name_Ffff_Gggg_Hhhh_Iiii` + + Deciding on how to truncate (in the middle) are being verified in these tests. + In this instance, both strings ends up as:`Aaaa_Bbbb_Cccc_Dddd___e_Ffff_Gggg_Hhhh_Iiii` + and can potentially cause a collision in table names. + + Note that dealing with such collisions is not part of `destination_name_transformer` but of the `stream_processor`. + """ + name_transformer = DestinationNameTransformer(DestinationType.POSTGRES) + print(f"Truncating from #{len(input_str)} to #{len(expected)}") + assert name_transformer.truncate_identifier_name(input_str) == expected + + +@pytest.mark.parametrize( + "input_str, destination_type, expected, expected_column", + [ + # Case sensitive names + ("Identifier Name1", "Postgres", "identifier_name1", "{{ adapter.quote('Identifier Name1') }}"), + ("Identifier Name2", "BigQuery", "Identifier_Name2", "Identifier_Name2"), + ("Identifier Name3", "Snowflake", "IDENTIFIER_NAME3", "{{ adapter.quote('Identifier Name3') }}"), + ("Identifier Name4", "Redshift", "identifier_name4", "{{ adapter.quote('identifier name4') }}"), + ("Identifier Name5", "MySQL", "identifier_name5", "{{ adapter.quote('Identifier Name5') }}"), + ("Identifier Name6", "MSSQL", "identifier_name6", "{{ adapter.quote('Identifier Name6') }}"), + ("Identifier Name7", "TiDB", "identifier_name7", "{{ adapter.quote('Identifier Name7') }}"), + ("Identifier Name8", "DuckDB", "identifier_name8", "{{ adapter.quote('Identifier Name8') }}"), + # Unicode + ("a-Unicode_name_文1", "Postgres", "a_unicode_name__1", "{{ adapter.quote('a-Unicode_name_文1') }}"), + ("a-Unicode_name_文2", "BigQuery", "a_Unicode_name__2", "a_Unicode_name__2"), + ("a-Unicode_name_文3", "Snowflake", "A_UNICODE_NAME__3", "{{ adapter.quote('a-Unicode_name_文3') }}"), + ("a-Unicode_name_文4", "Redshift", "a_unicode_name__4", "{{ adapter.quote('a-unicode_name_文4') }}"), + ("a-Unicode_name_文5", "MySQL", "a_unicode_name__5", "{{ adapter.quote('a-Unicode_name_文5') }}"), + ("a-Unicode_name_文6", "MSSQL", "a_unicode_name__6", "{{ adapter.quote('a-Unicode_name_文6') }}"), + ("a-Unicode_name_文7", "TiDB", "a_unicode_name__7", "{{ adapter.quote('a-Unicode_name_文7') }}"), + ("a-Unicode_name_文8", "DuckDB", "a_unicode_name__8", "{{ adapter.quote('a-Unicode_name_文8') }}"), + # Doesnt start with alpha or underscore + ("100x2001", "Postgres", "100x2001", "{{ adapter.quote('100x2001') }}"), + ("100x2002", "BigQuery", "100x2002", "_100x2002"), + ("文2_a-Unicode_name", "BigQuery", "_2_a_Unicode_name", "_2_a_Unicode_name"), + ("100x2003", "Snowflake", "100x2003", "{{ adapter.quote('100x2003') }}"), + ("100x2004", "Redshift", "100x2004", "{{ adapter.quote('100x2004') }}"), + ("100x2005", "MySQL", "100x2005", "{{ adapter.quote('100x2005') }}"), + ("100x2006", "MSSQL", "_100x2006", "{{ adapter.quote('100x2006') }}"), + ("100x2007", "TiDB", "100x2007", "{{ adapter.quote('100x2007') }}"), + ("100x2008", "DuckDB", "100x2008", "{{ adapter.quote('100x2008') }}"), + # Reserved Keywords in BQ and MySQL + ("Groups", "Postgres", "groups", "groups"), + ("Groups", "BigQuery", "Groups", "{{ adapter.quote('Groups') }}"), + ("Groups", "Snowflake", "GROUPS", "GROUPS"), + ("Groups", "Redshift", "groups", "groups"), + ("Groups", "MySQL", "Groups", "{{ adapter.quote('Groups') }}"), + ("Groups", "MSSQL", "groups", "groups"), + ("Groups", "TiDB", "Groups", "{{ adapter.quote('Groups') }}"), + ("Groups", "DuckDB", "Groups", "{{ adapter.quote('Groups') }}"), + # Reserved Keywords + ("DisTincT", "Postgres", "DisTincT", "{{ adapter.quote('DisTincT') }}"), + ("DisTincT", "BigQuery", "DisTincT", "{{ adapter.quote('DisTincT') }}"), + ("DisTincT", "Snowflake", "DisTincT", "{{ adapter.quote('DisTincT') }}"), + ("DisTincT", "Redshift", "distinct", "{{ adapter.quote('distinct') }}"), + ("DisTincT", "MySQL", "DisTincT", "{{ adapter.quote('DisTincT') }}"), + ("DisTincT", "MSSQL", "DisTincT", "{{ adapter.quote('DisTincT') }}"), + ("DisTincT", "TiDB", "DisTincT", "{{ adapter.quote('DisTincT') }}"), + ("DisTincT", "DuckDB", "DisTincT", "{{ adapter.quote('DisTincT') }}"), + # Quoted identifiers + ("'QuoTed1 IdenTifiER'", "Postgres", "_quoted1_identifier_", "{{ adapter.quote('\\'QuoTed1 IdenTifiER\\'') }}"), + ("'QuoTed2 IdenTifiER'", "BigQuery", "_QuoTed2_IdenTifiER_", "_QuoTed2_IdenTifiER_"), + ("'QuoTed3 IdenTifiER'", "Snowflake", "_QUOTED3_IDENTIFIER_", "{{ adapter.quote('\\'QuoTed3 IdenTifiER\\'') }}"), + ("'QuoTed4 IdenTifiER'", "Redshift", "_quoted4_identifier_", "{{ adapter.quote('\\'quoted4 identifier\\'') }}"), + ("'QuoTed5 IdenTifiER'", "MySQL", "_quoted5_identifier_", "{{ adapter.quote('\\'QuoTed5 IdenTifiER\\'') }}"), + ("'QuoTed6 IdenTifiER'", "MSSQL", "_quoted6_identifier_", "{{ adapter.quote('\\'QuoTed6 IdenTifiER\\'') }}"), + ("'QuoTed7 IdenTifiER'", "TiDB", "_quoted7_identifier_", "{{ adapter.quote('\\'QuoTed7 IdenTifiER\\'') }}"), + ("'QuoTed8 IdenTifiER'", "DuckDB", "_quoted8_identifier_", "{{ adapter.quote('\\'QuoTed8 IdenTifiER\\'') }}"), + # Double Quoted identifiers + ('"QuoTed7 IdenTifiER"', "Postgres", "_quoted7_identifier_", '{{ adapter.quote(\'""QuoTed7 IdenTifiER""\') }}'), + ('"QuoTed8 IdenTifiER"', "BigQuery", "_QuoTed8_IdenTifiER_", "_QuoTed8_IdenTifiER_"), + ('"QuoTed9 IdenTifiER"', "Snowflake", "_QUOTED9_IDENTIFIER_", '{{ adapter.quote(\'""QuoTed9 IdenTifiER""\') }}'), + ('"QuoTed10 IdenTifiER"', "Redshift", "_quoted10_identifier_", '{{ adapter.quote(\'""quoted10 identifier""\') }}'), + ('"QuoTed11 IdenTifiER"', "MySQL", "_quoted11_identifier_", "{{ adapter.quote('\"QuoTed11 IdenTifiER\"') }}"), + ('"QuoTed12 IdenTifiER"', "MSSQL", "_quoted12_identifier_", '{{ adapter.quote(\'""QuoTed12 IdenTifiER""\') }}'), + ('"QuoTed13 IdenTifiER"', "TiDB", "_quoted13_identifier_", "{{ adapter.quote('\"QuoTed13 IdenTifiER\"') }}"), + ('"QuoTed14 IdenTifiER"', "DuckDB", "_quoted14_identifier_", "{{ adapter.quote('\"QuoTed14 IdenTifiER\"') }}"), + # Back Quoted identifiers + ("`QuoTed13 IdenTifiER`", "Postgres", "_quoted13_identifier_", "{{ adapter.quote('`QuoTed13 IdenTifiER`') }}"), + ("`QuoTed14 IdenTifiER`", "BigQuery", "_QuoTed14_IdenTifiER_", "_QuoTed14_IdenTifiER_"), + ("`QuoTed15 IdenTifiER`", "Snowflake", "_QUOTED15_IDENTIFIER_", "{{ adapter.quote('`QuoTed15 IdenTifiER`') }}"), + ("`QuoTed16 IdenTifiER`", "Redshift", "_quoted16_identifier_", "{{ adapter.quote('`quoted16 identifier`') }}"), + ("`QuoTed17 IdenTifiER`", "MySQL", "_quoted17_identifier_", "{{ adapter.quote('_QuoTed17 IdenTifiER_') }}"), + ("`QuoTed18 IdenTifiER`", "MSSQL", "_quoted18_identifier_", "{{ adapter.quote('`QuoTed18 IdenTifiER`') }}"), + ("`QuoTed17 IdenTifiER`", "TiDB", "_quoted17_identifier_", "{{ adapter.quote('_QuoTed17 IdenTifiER_') }}"), + ("`QuoTed19 IdenTifiER`", "DuckDB", "_quoted19_identifier_", "{{ adapter.quote('_QuoTed19 IdenTifiER_') }}"), + ], +) +def test_normalize_name(input_str: str, destination_type: str, expected: str, expected_column: str): + t = DestinationType.from_string(destination_type) + assert DestinationNameTransformer(t).normalize_schema_name(input_str) == expected + assert DestinationNameTransformer(t).normalize_table_name(input_str) == expected + assert DestinationNameTransformer(t).normalize_column_name(input_str) == expected_column diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/test_stream_processor.py b/airbyte-integrations/bases/base-normalization/unit_tests/test_stream_processor.py new file mode 100644 index 0000000000000..7251d1bb54c24 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/test_stream_processor.py @@ -0,0 +1,105 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import os +from typing import List + +import pytest +from airbyte_cdk.models import DestinationSyncMode, SyncMode +from normalization.destination_type import DestinationType +from normalization.transform_catalog.stream_processor import StreamProcessor +from normalization.transform_catalog.table_name_registry import TableNameRegistry + + +@pytest.fixture(scope="function", autouse=True) +def before_tests(request): + # This makes the test run whether it is executed from the tests folder (with pytest/gradle) + # or from the base-normalization folder (through pycharm) + unit_tests_dir = os.path.join(request.fspath.dirname, "unit_tests") + if os.path.exists(unit_tests_dir): + os.chdir(unit_tests_dir) + else: + os.chdir(request.fspath.dirname) + yield + os.chdir(request.config.invocation_dir) + + +@pytest.mark.parametrize( + "cursor_field, expecting_exception, expected_cursor_field", + [ + (None, False, "_airbyte_emitted_at"), + (["updated_at"], False, "updated_at"), + (["_airbyte_emitted_at"], False, "_airbyte_emitted_at"), + (["parent", "nested_field"], True, "nested_field"), + ], +) +def test_cursor_field(cursor_field: List[str], expecting_exception: bool, expected_cursor_field: str): + stream_processor = StreamProcessor.create( + stream_name="test_cursor_field", + destination_type=DestinationType.POSTGRES, + default_schema="default_schema", + raw_schema="raw_schema", + schema="schema_name", + source_sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.append_dedup, + cursor_field=cursor_field, + primary_key=[], + json_column_name="json_column_name", + properties=dict(), + tables_registry=TableNameRegistry(DestinationType.POSTGRES), + from_table="", + ) + try: + assert ( + stream_processor.get_cursor_field(column_names={expected_cursor_field: (expected_cursor_field, "random")}) + == expected_cursor_field + ) + except ValueError as e: + if not expecting_exception: + raise e + + +@pytest.mark.parametrize( + "primary_key, column_type, expecting_exception, expected_primary_keys, expected_final_primary_key_string", + [ + ([["id"]], "string", False, ["id"], "{{ adapter.quote('id') }}"), + ([["id"]], "number", False, ["id"], "cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }})"), + ([["first_name"], ["last_name"]], "string", False, ["first_name", "last_name"], "first_name, last_name"), + ([["float_id"]], "number", False, ["float_id"], "cast(float_id as {{ dbt_utils.type_string() }})"), + ([["_airbyte_emitted_at"]], "string", False, [], "cast(_airbyte_emitted_at as {{ dbt_utils.type_string() }})"), + (None, "string", True, [], ""), + ([["parent", "nested_field"]], "string", True, [], ""), + ], +) +def test_primary_key( + primary_key: List[List[str]], + column_type: str, + expecting_exception: bool, + expected_primary_keys: List[str], + expected_final_primary_key_string: str, +): + stream_processor = StreamProcessor.create( + stream_name="test_primary_key", + destination_type=DestinationType.POSTGRES, + raw_schema="raw_schema", + default_schema="default_schema", + schema="schema_name", + source_sync_mode=SyncMode.incremental, + destination_sync_mode=DestinationSyncMode.append_dedup, + cursor_field=[], + primary_key=primary_key, + json_column_name="json_column_name", + properties={key: {"type": column_type} for key in expected_primary_keys}, + tables_registry=TableNameRegistry(DestinationType.POSTGRES), + from_table="", + ) + try: + assert ( + ", ".join(stream_processor.get_primary_key_partition(column_names=stream_processor.extract_column_names())) + == expected_final_primary_key_string + ) + except ValueError as e: + if not expecting_exception: + raise e diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/test_table_name_registry.py b/airbyte-integrations/bases/base-normalization/unit_tests/test_table_name_registry.py new file mode 100644 index 0000000000000..cd645850f6991 --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/test_table_name_registry.py @@ -0,0 +1,186 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import json +import os +from typing import List + +import pytest +from normalization.destination_type import DestinationType +from normalization.transform_catalog.catalog_processor import CatalogProcessor +from normalization.transform_catalog.destination_name_transformer import DestinationNameTransformer +from normalization.transform_catalog.table_name_registry import TableNameRegistry, get_nested_hashed_table_name + + +@pytest.fixture(scope="function", autouse=True) +def before_tests(request): + # This makes the test run whether it is executed from the tests folder (with pytest/gradle) + # or from the base-normalization folder (through pycharm) + unit_tests_dir = os.path.join(request.fspath.dirname, "unit_tests") + if os.path.exists(unit_tests_dir): + os.chdir(unit_tests_dir) + else: + os.chdir(request.fspath.dirname) + yield + os.chdir(request.config.invocation_dir) + + +@pytest.mark.parametrize( + "catalog_file", + [ + "long_name_truncate_collisions_catalog", # collisions are generated on postgres because of character limits + "un-nesting_collisions_catalog", # collisions between top-level streams and nested ones + "nested_catalog", # sample catalog from facebook + ], +) +@pytest.mark.parametrize("destination_type", DestinationType.testable_destinations()) +def test_resolve_names(destination_type: DestinationType, catalog_file: str): + """ + For a given catalog.json and destination, multiple cases can occur where naming becomes tricky. + (especially since some destination like postgres have a very low limit to identifiers length of 64 characters) + + In case of nested objects/arrays in a stream, names can drag on to very long names. + Tests are built here using resources files as follow: + - `_catalog.json`: + input catalog.json, typically as what source would provide. + For example Hubspot, Stripe and Facebook catalog.json contains some level of nesting. + (here, nested_catalog.json is an extracted smaller sample of stream/properties from the facebook catalog) + - `_expected_names.json`: + list of expected table names + + For the expected json files, it is possible to specialize the file to a certain destination. + So if for example, the resources folder contains these two expected files: + - edge_cases_catalog_expected_names.json + - edge_cases_catalog_expected_postgres_names.json + Then the test will be using the first edge_cases_catalog_expected_names.json except for + Postgres destination where the expected table names will come from edge_cases_catalog_expected_postgres_names.json + + The content of the expected_*.json files are the serialization of the stream_processor.tables_registry.registry + """ + integration_type = destination_type.value + tables_registry = TableNameRegistry(destination_type) + + catalog = read_json(f"resources/{catalog_file}.json") + + # process top level + stream_processors = CatalogProcessor.build_stream_processor( + catalog=catalog, + json_column_name="'json_column_name_test'", + default_schema="schema_test", + name_transformer=DestinationNameTransformer(destination_type), + destination_type=destination_type, + tables_registry=tables_registry, + ) + for stream_processor in stream_processors: + # Check properties + if not stream_processor.properties: + raise EOFError("Invalid Catalog: Unexpected empty properties in catalog") + stream_processor.collect_table_names() + for conflict in tables_registry.resolve_names(): + print( + f"WARN: Resolving conflict: {conflict.schema}.{conflict.table_name_conflict} " + f"from '{'.'.join(conflict.json_path)}' into {conflict.table_name_resolved}" + ) + apply_function = identity + if DestinationType.SNOWFLAKE.value == destination_type.value: + apply_function = str.upper + elif DestinationType.REDSHIFT.value == destination_type.value: + apply_function = str.lower + if os.path.exists(f"resources/{catalog_file}_expected_{integration_type.lower()}_names.json"): + expected_names = read_json(f"resources/{catalog_file}_expected_{integration_type.lower()}_names.json", apply_function) + else: + expected_names = read_json(f"resources/{catalog_file}_expected_names.json", apply_function) + + assert tables_registry.to_dict(apply_function) == expected_names + + +def identity(x): + return x + + +def read_json(input_path: str, apply_function=(lambda x: x)): + with open(input_path, "r") as file: + contents = file.read() + if apply_function: + contents = apply_function(contents) + return json.loads(contents) + + +# This test is not intended to be exhaustive over the destinations, +# so it's not mandatory to add new destination expected field here. +# The intent here is to unit test simple_name vs nested_hashed_name +# functions in the table_name_registry. There are other tests that +# automatically test naming against all destinations whenever it is +# added to the enum. +@pytest.mark.parametrize( + "json_path, expected_postgres, expected_bigquery", + [ + ( + ["parent", "child"], + "parent_child", + "parent_child", + ), + ( + ["The parent stream has a nested column with a", "short_substream_name"], + "the_parent_stream_ha___short_substream_name", + "The_parent_stream_has_a_nested_column_with_a_short_substream_name", + ), + ( + ["The parent stream has a nested column with a", "substream with a rather long name"], + "the_parent_stream_ha__th_a_rather_long_name", + "The_parent_stream_has_a_nested_column_with_a_substream_with_a_rather_long_name", + ), + ], +) +def test_get_simple_table_name(json_path: List[str], expected_postgres: str, expected_bigquery: str): + """ + Checks how to generate a simple and easy to understand name from a json path + """ + postgres_registry = TableNameRegistry(DestinationType.POSTGRES) + actual_postgres_name = postgres_registry.get_simple_table_name(json_path) + assert actual_postgres_name == expected_postgres + assert len(actual_postgres_name) <= 43 # explicitly check for our max postgres length in case tests are changed in the future + + bigquery_registry = TableNameRegistry(DestinationType.BIGQUERY) + actual_bigquery_name = bigquery_registry.get_simple_table_name(json_path) + assert actual_bigquery_name == expected_bigquery + + +@pytest.mark.parametrize( + "json_path, expected_postgres, expected_bigquery", + [ + ( + ["parent", "child"], + "parent_30c_child", + "parent_30c_child", + ), + ( + ["The parent stream has a nested column with a", "short_substream_name"], + "the_parent_stream__cd9_short_substream_name", + "The_parent_stream_has_a_nested_column_with_a_cd9_short_substream_name", + ), + ( + ["The parent stream has a nested column with a", "substream with a rather long name"], + "the_parent_0a5_substream_wi__her_long_name", + "The_parent_stream_has_a_nested_column_with_a_0a5_substream_with_a_rather_long_name", + ), + ], +) +def test_get_nested_hashed_table_name(json_path: List[str], expected_postgres: str, expected_bigquery: str): + """ + Checks how to generate a unique name with strategies of combining all fields into a single table name for the user to (somehow) + identify and recognize what data is available in there. + A set of complicated rules are done in order to choose what parts to truncate or what to leave and handle + name collisions. + """ + child = json_path[-1] + postgres_name_transformer = DestinationNameTransformer(DestinationType.POSTGRES) + actual_postgres_name = get_nested_hashed_table_name(postgres_name_transformer, "schema", json_path, child) + assert actual_postgres_name == expected_postgres + assert len(actual_postgres_name) <= 43 # explicitly check for our max postgres length in case tests are changed in the future + + bigquery_name_transformer = DestinationNameTransformer(DestinationType.BIGQUERY) + actual_bigquery_name = get_nested_hashed_table_name(bigquery_name_transformer, "schema", json_path, child) + assert actual_bigquery_name == expected_bigquery diff --git a/airbyte-integrations/bases/base-normalization/unit_tests/test_transform_config.py b/airbyte-integrations/bases/base-normalization/unit_tests/test_transform_config.py new file mode 100644 index 0000000000000..2c3fc60f7ea3e --- /dev/null +++ b/airbyte-integrations/bases/base-normalization/unit_tests/test_transform_config.py @@ -0,0 +1,595 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + + +import json +import os +import socket +import tempfile +import time + +import pytest +from normalization.destination_type import DestinationType +from normalization.transform_catalog.transform import extract_path, extract_schema +from normalization.transform_config.transform import TransformConfig + + +class TestTransformConfig: + """ + This class is testing the transform config functionality that converts a destination_config.json into the adequate profiles.yml file for dbt to use + """ + + @pytest.fixture(scope="class", autouse=True) + def before_all_tests(self, request): + # This makes the test run whether it is executed from the tests folder (with pytest/gradle) + # or from the base-normalization folder (through pycharm) + unit_tests_dir = os.path.join(request.fspath.dirname, "unit_tests") + if os.path.exists(unit_tests_dir): + os.chdir(unit_tests_dir) + else: + os.chdir(request.fspath.dirname) + yield + os.chdir(request.config.invocation_dir) + + def test_is_ssh_tunnelling(self): + def single_test(config, expected_output): + assert TransformConfig.is_ssh_tunnelling(config) == expected_output + + inputs = [ + ({}, False), + ( + { + "type": "postgres", + "dbname": "my_db", + "host": "airbyte.io", + "pass": "password123", + "port": 5432, + "schema": "public", + "threads": 32, + "user": "a user", + }, + False, + ), + ( + { + "type": "postgres", + "dbname": "my_db", + "host": "airbyte.io", + "pass": "password123", + "port": 5432, + "schema": "public", + "threads": 32, + "user": "a user", + "tunnel_method": { + "tunnel_host": "1.2.3.4", + "tunnel_method": "SSH_PASSWORD_AUTH", + "tunnel_port": 22, + "tunnel_user": "user", + "tunnel_user_password": "pass", + }, + }, + True, + ), + ( + { + "type": "postgres", + "dbname": "my_db", + "host": "airbyte.io", + "pass": "password123", + "port": 5432, + "schema": "public", + "threads": 32, + "user": "a user", + "tunnel_method": { + "tunnel_method": "SSH_KEY_AUTH", + }, + }, + True, + ), + ( + { + "type": "postgres", + "dbname": "my_db", + "host": "airbyte.io", + "pass": "password123", + "port": 5432, + "schema": "public", + "threads": 32, + "user": "a user", + "tunnel_method": { + "nothing": "nothing", + }, + }, + False, + ), + ] + for input_tuple in inputs: + single_test(input_tuple[0], input_tuple[1]) + + def test_is_port_free(self): + # to test that this accurately identifies 'free' ports, we'll find a 'free' port and then try to use it + test_port = 13055 + while not TransformConfig.is_port_free(test_port): + test_port += 1 + if test_port > 65535: + raise RuntimeError("couldn't find a free port...") + + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind(("localhost", test_port)) + # if we haven't failed then we accurately identified a 'free' port. + # now we can test for accurate identification of 'in-use' port since we're using it + assert TransformConfig.is_port_free(test_port) is False + + # and just for good measure now that our context manager is closed (and port open again) + time.sleep(1) + assert TransformConfig.is_port_free(test_port) is True + + def test_pick_a_port(self): + supposedly_open_port = TransformConfig.pick_a_port() + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind(("localhost", supposedly_open_port)) + + def test_transform_bigquery(self): + input = { + "project_id": "my_project_id", + "dataset_id": "my_dataset_id", + "credentials_json": '{ "type": "service_account-json" }', + "transformation_priority": "interactive", + "dataset_location": "EU", + } + + actual_output = TransformConfig().transform_bigquery(input) + expected_output = { + "type": "bigquery", + "method": "service-account-json", + "project": "my_project_id", + "dataset": "my_dataset_id", + "priority": "interactive", + "keyfile_json": {"type": "service_account-json"}, + "location": "EU", + "retries": 3, + "threads": 8, + } + + actual_keyfile = actual_output["keyfile_json"] + expected_keyfile = {"type": "service_account-json"} + assert actual_output == expected_output + assert actual_keyfile == expected_keyfile + assert extract_schema(actual_output) == "my_dataset_id" + + def test_transform_bigquery_no_credentials(self): + input = {"project_id": "my_project_id", "dataset_id": "my_dataset_id"} + + actual_output = TransformConfig().transform_bigquery(input) + expected_output = { + "type": "bigquery", + "method": "oauth", + "project": "my_project_id", + "dataset": "my_dataset_id", + "priority": "interactive", + "retries": 3, + "threads": 8, + } + + assert actual_output == expected_output + assert extract_schema(actual_output) == "my_dataset_id" + + def test_transform_bigquery_with_embedded_project_id(self): + input = {"project_id": "my_project_id", "dataset_id": "my_project_id:my_dataset_id"} + + actual_output = TransformConfig().transform_bigquery(input) + expected_output = { + "type": "bigquery", + "method": "oauth", + "project": "my_project_id", + "dataset": "my_dataset_id", + "priority": "interactive", + "retries": 3, + "threads": 8, + } + + assert actual_output == expected_output + assert extract_schema(actual_output) == "my_dataset_id" + + def test_transform_bigquery_with_embedded_mismatched_project_id(self): + input = {"project_id": "my_project_id", "dataset_id": "bad_project_id:my_dataset_id"} + + try: + TransformConfig().transform_bigquery(input) + assert False, "transform_bigquery should have raised an exception" + except ValueError: + pass + + def test_transform_bigquery_with_invalid_format(self): + input = {"project_id": "my_project_id", "dataset_id": "foo:bar:baz"} + + try: + TransformConfig().transform_bigquery(input) + assert False, "transform_bigquery should have raised an exception" + except ValueError: + pass + + def test_transform_postgres(self): + input = { + "host": "airbyte.io", + "port": 5432, + "username": "a user", + "password": "password123", + "database": "my_db", + "schema": "public", + } + + actual = TransformConfig().transform_postgres(input) + expected = { + "type": "postgres", + "dbname": "my_db", + "host": "airbyte.io", + "pass": "password123", + "port": 5432, + "schema": "public", + "threads": 8, + "user": "a user", + } + + assert actual == expected + assert extract_schema(actual) == "public" + + def test_transform_postgres_ssh(self): + input = { + "host": "airbyte.io", + "port": 5432, + "username": "a user", + "password": "password123", + "database": "my_db", + "schema": "public", + "tunnel_method": { + "tunnel_host": "1.2.3.4", + "tunnel_method": "SSH_PASSWORD_AUTH", + "tunnel_port": 22, + "tunnel_user": "user", + "tunnel_user_password": "pass", + }, + } + port = TransformConfig.pick_a_port() + + actual = TransformConfig().transform_postgres(input) + expected = { + "type": "postgres", + "dbname": "my_db", + "host": "localhost", + "pass": "password123", + "port": port, + "schema": "public", + "threads": 8, + "user": "a user", + } + + assert actual == expected + assert extract_schema(actual) == "public" + + def test_transform_snowflake(self): + input = { + "host": "http://123abc.us-east-7.aws.snowflakecomputing.com", + "role": "AIRBYTE_ROLE", + "warehouse": "AIRBYTE_WAREHOUSE", + "database": "AIRBYTE_DATABASE", + "schema": "AIRBYTE_SCHEMA", + "username": "AIRBYTE_USER", + "password": "password123", + } + + actual = TransformConfig().transform_snowflake(input) + expected = { + "account": "123abc.us-east-7.aws", + "client_session_keep_alive": False, + "database": "AIRBYTE_DATABASE", + "password": "password123", + "query_tag": "normalization", + "role": "AIRBYTE_ROLE", + "schema": "AIRBYTE_SCHEMA", + "threads": 5, + "retry_all": True, + "retry_on_database_errors": True, + "connect_retries": 3, + "connect_timeout": 15, + "type": "snowflake", + "user": "AIRBYTE_USER", + "warehouse": "AIRBYTE_WAREHOUSE", + } + + assert actual == expected + assert extract_schema(actual) == "AIRBYTE_SCHEMA" + + def test_transform_snowflake_oauth(self): + + input = { + "host": "http://123abc.us-east-7.aws.snowflakecomputing.com", + "role": "AIRBYTE_ROLE", + "warehouse": "AIRBYTE_WAREHOUSE", + "database": "AIRBYTE_DATABASE", + "schema": "AIRBYTE_SCHEMA", + "username": "AIRBYTE_USER", + "credentials": { + "auth_type": "OAuth2.0", + "client_id": "AIRBYTE_CLIENT_ID", + "access_token": "AIRBYTE_ACCESS_TOKEN", + "client_secret": "AIRBYTE_CLIENT_SECRET", + "refresh_token": "AIRBYTE_REFRESH_TOKEN", + }, + } + + actual = TransformConfig().transform_snowflake(input) + expected = { + "account": "123abc.us-east-7.aws", + "client_session_keep_alive": False, + "database": "AIRBYTE_DATABASE", + "query_tag": "normalization", + "role": "AIRBYTE_ROLE", + "schema": "AIRBYTE_SCHEMA", + "threads": 5, + "retry_all": True, + "retry_on_database_errors": True, + "connect_retries": 3, + "connect_timeout": 15, + "type": "snowflake", + "user": "AIRBYTE_USER", + "warehouse": "AIRBYTE_WAREHOUSE", + "authenticator": "oauth", + "oauth_client_id": "AIRBYTE_CLIENT_ID", + "oauth_client_secret": "AIRBYTE_CLIENT_SECRET", + "token": "AIRBYTE_REFRESH_TOKEN", + } + + assert actual == expected + assert extract_schema(actual) == "AIRBYTE_SCHEMA" + + def test_transform_snowflake_key_pair(self): + + input = { + "host": "http://123abc.us-east-7.aws.snowflakecomputing.com", + "role": "AIRBYTE_ROLE", + "warehouse": "AIRBYTE_WAREHOUSE", + "database": "AIRBYTE_DATABASE", + "schema": "AIRBYTE_SCHEMA", + "username": "AIRBYTE_USER", + "credentials": { + "private_key": "AIRBYTE_PRIVATE_KEY", + "private_key_password": "AIRBYTE_PRIVATE_KEY_PASSWORD", + }, + } + + actual = TransformConfig().transform_snowflake(input) + expected = { + "account": "123abc.us-east-7.aws", + "client_session_keep_alive": False, + "database": "AIRBYTE_DATABASE", + "query_tag": "normalization", + "role": "AIRBYTE_ROLE", + "schema": "AIRBYTE_SCHEMA", + "threads": 5, + "retry_all": True, + "retry_on_database_errors": True, + "connect_retries": 3, + "connect_timeout": 15, + "type": "snowflake", + "user": "AIRBYTE_USER", + "warehouse": "AIRBYTE_WAREHOUSE", + "private_key_path": "private_key_path.txt", + "private_key_passphrase": "AIRBYTE_PRIVATE_KEY_PASSWORD", + } + + assert actual == expected + assert extract_schema(actual) == "AIRBYTE_SCHEMA" + + def test_transform_mysql(self): + input = { + "type": "mysql5", + "host": "airbyte.io", + "port": 5432, + "database": "my_db", + "schema": "public", + "username": "a user", + "password": "password1234", + } + + actual = TransformConfig().transform_mysql(input) + expected = { + "type": "mysql5", + "server": "airbyte.io", + "port": 5432, + "schema": "my_db", + "database": "my_db", + "username": "a user", + "password": "password1234", + } + + assert actual == expected + # DBT schema is equivalent to MySQL database + assert extract_schema(actual) == "my_db" + + def test_transform_mssql(self): + input = { + "type": "sqlserver", + "host": "airbyte.io", + "port": 1433, + "database": "my_db", + "schema": "my_db", + "username": "SA", + "password": "password1234", + } + + actual = TransformConfig().transform_mysql(input) + expected = { + "type": "sqlserver", + "server": "airbyte.io", + "port": 1433, + "schema": "my_db", + "database": "my_db", + "username": "SA", + "password": "password1234", + } + + assert actual == expected + # DBT schema is equivalent to MySQL database + assert extract_schema(actual) == "my_db" + + def test_transform_clickhouse(self): + input = {"host": "airbyte.io", "port": 9440, "database": "default", "username": "ch", "password": "password1234", "ssl": True} + + actual = TransformConfig().transform_clickhouse(input) + expected = { + "type": "clickhouse", + "driver": "http", + "verify": False, + "host": "airbyte.io", + "port": 9440, + "schema": "default", + "user": "ch", + "password": "password1234", + "secure": True, + } + + assert actual == expected + assert extract_schema(actual) == "default" + + # test that the full config is produced. this overlaps slightly with the transform_postgres test. + def test_transform(self): + input = { + "host": "airbyte.io", + "port": 5432, + "username": "a user", + "password": "password123", + "database": "my_db", + "schema": "public", + } + + expected = self.get_base_config() + expected["normalize"]["outputs"]["prod"] = { + "type": "postgres", + "dbname": "my_db", + "host": "airbyte.io", + "pass": "password123", + "port": 5432, + "schema": "public", + "threads": 8, + "user": "a user", + } + actual = TransformConfig().transform(DestinationType.POSTGRES, input) + + assert actual == expected + assert extract_schema(actual["normalize"]["outputs"]["prod"]) == "public" + + def test_transform_tidb(self): + input = { + "type": "tidb", + "host": "airbyte.io", + "port": 5432, + "database": "ti_db", + "schema": "public", + "username": "a user", + "password": "password1234", + } + + actual = TransformConfig().transform_tidb(input) + expected = { + "type": "tidb", + "server": "airbyte.io", + "port": 5432, + "schema": "ti_db", + "database": "ti_db", + "username": "a user", + "password": "password1234", + } + + assert actual == expected + assert extract_schema(actual) == "ti_db" + + def test_transform_duckdb_schema(self): + input = { + "type": "duckdb", + "destination_path": "/local/testing.duckdb", + "schema": "quackqauck", + } + + actual = TransformConfig().transform_duckdb(input) + expected = { + "type": "duckdb", + "path": "/local/testing.duckdb", + "schema": "quackqauck", + } + + assert actual == expected + assert extract_path(actual) == "/local/testing.duckdb" + + def test_transform_duckdb_no_schema(self): + input = { + "type": "duckdb", + "destination_path": "/local/testing.duckdb", + } + + actual = TransformConfig().transform_duckdb(input) + expected = { + "type": "duckdb", + "path": "/local/testing.duckdb", + "schema": "main", + } + + assert actual == expected + assert extract_path(actual) == "/local/testing.duckdb" + + def get_base_config(self): + return { + "config": { + "partial_parse": True, + "printer_width": 120, + "send_anonymous_usage_stats": False, + "use_colors": True, + }, + "normalize": {"target": "prod", "outputs": {"prod": {}}}, + } + + def test_parse(self): + t = TransformConfig() + assert {"integration_type": DestinationType.POSTGRES, "config": "config.json", "output_path": "out.yml"} == t.parse( + ["--integration-type", "postgres", "--config", "config.json", "--out", "out.yml"] + ) + + def test_write_ssh_config(self): + original_config_input = { + "type": "postgres", + "dbname": "my_db", + "host": "airbyte.io", + "pass": "password123", + "port": 5432, + "schema": "public", + "threads": 32, + "user": "a user", + "tunnel_method": { + "tunnel_host": "1.2.3.4", + "tunnel_method": "SSH_PASSWORD_AUTH", + "tunnel_port": 22, + "tunnel_user": "user", + "tunnel_user_password": "pass", + }, + } + transformed_config_input = self.get_base_config() + transformed_config_input["normalize"]["outputs"]["prod"] = { + "port": 7890, + } + expected = { + "db_host": "airbyte.io", + "db_port": 5432, + "tunnel_map": { + "tunnel_host": "1.2.3.4", + "tunnel_method": "SSH_PASSWORD_AUTH", + "tunnel_port": 22, + "tunnel_user": "user", + "tunnel_user_password": "pass", + }, + "local_port": 7890, + } + tmp_path = tempfile.TemporaryDirectory().name + TransformConfig.write_ssh_config(tmp_path, original_config_input, transformed_config_input) + with open(os.path.join(tmp_path, "ssh.json"), "r") as f: + assert json.load(f) == expected diff --git a/airbyte-integrations/bases/base/.dockerignore b/airbyte-integrations/bases/base/.dockerignore new file mode 100644 index 0000000000000..378eac25d3117 --- /dev/null +++ b/airbyte-integrations/bases/base/.dockerignore @@ -0,0 +1 @@ +build diff --git a/airbyte-integrations/bases/base/Dockerfile b/airbyte-integrations/bases/base/Dockerfile new file mode 100644 index 0000000000000..e03cdca90fc9a --- /dev/null +++ b/airbyte-integrations/bases/base/Dockerfile @@ -0,0 +1,19 @@ +### WARNING ### +# The Java connector Dockerfiles will soon be deprecated. +# This Dockerfile is not used to build the connector image we publish to DockerHub. +# The new logic to build the connector image is declared with Dagger here: +# https://github.com/airbytehq/airbyte/blob/master/tools/ci_connector_ops/ci_connector_ops/pipelines/actions/environments.py#L649 + +# If you need to add a custom logic to build your connector image, you can do it by adding a finalize_build.sh or finalize_build.py script in the connector folder. +# Please reach out to the Connectors Operations team if you have any question. +FROM amazonlinux:2022.0.20220831.1 + +WORKDIR /airbyte + +COPY base.sh . + +ENV AIRBYTE_ENTRYPOINT "/airbyte/base.sh" +ENTRYPOINT ["/airbyte/base.sh"] + +LABEL io.airbyte.version=0.1.0 +LABEL io.airbyte.name=airbyte/integration-base diff --git a/airbyte-integrations/bases/base/base.sh b/airbyte-integrations/bases/base/base.sh new file mode 100755 index 0000000000000..b72ad35e18eb0 --- /dev/null +++ b/airbyte-integrations/bases/base/base.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash + +set -e + +function echo2() { + echo >&2 "$@" +} + +function error() { + echo2 "$@" + exit 1 +} + +# todo: make it easy to select source or destination and validate based on selection by adding an integration type env variable. +function main() { + CMD="$1" + shift 1 || error "command not specified." + + ARGS= + while [ $# -ne 0 ]; do + case "$1" in + --config) + CONFIG_FILE="$2" + shift 2 + ;; + --catalog) + CATALOG_FILE="$2" + shift 2 + ;; + --state) + STATE_FILE="$2" + shift 2 + ;; + *) + error "Unknown option: $1" + ;; + esac + done + + case "$CMD" in + spec) + eval "$AIRBYTE_SPEC_CMD" + ;; + check) + eval "$AIRBYTE_CHECK_CMD" --config "$CONFIG_FILE" + ;; + discover) + eval "$AIRBYTE_DISCOVER_CMD" --config "$CONFIG_FILE" + ;; + read) + READ_STATEMENT="$AIRBYTE_READ_CMD --config $CONFIG_FILE --catalog $CATALOG_FILE" + if [[ ! -z "$STATE_FILE" ]]; then READ_STATEMENT="$READ_STATEMENT --state $STATE_FILE"; fi + eval "$READ_STATEMENT" + ;; + write) + eval "$AIRBYTE_WRITE_CMD" --config "$CONFIG_FILE" --catalog "$CATALOG_FILE" + ;; + *) + error "Unknown command: $CMD" + ;; + esac +} + +main "$@" diff --git a/airbyte-integrations/bases/base/build.gradle b/airbyte-integrations/bases/base/build.gradle new file mode 100644 index 0000000000000..0c2de175e2cc9 --- /dev/null +++ b/airbyte-integrations/bases/base/build.gradle @@ -0,0 +1,3 @@ +plugins { + id 'airbyte-docker-legacy' +} diff --git a/airbyte-integrations/scripts/data-lowcode-connectors.sh b/airbyte-integrations/scripts/data-lowcode-connectors.sh new file mode 100755 index 0000000000000..dca8891163125 --- /dev/null +++ b/airbyte-integrations/scripts/data-lowcode-connectors.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +set -e + +if [[ `git status --porcelain` ]]; then + # everything is not up to date! + echo "" + echo "ERROR: There are changes left to commit!" + echo "" + exit 1 +fi + +BRANCH_NAME="$(git symbolic-ref HEAD 2>/dev/null)" || +BRANCH_NAME="(unnamed branch)" # detached HEAD +BRANCH_NAME=${BRANCH_NAME##refs/heads/} + +OUTPUT_FILE="num_lowcode_connectors.csv" +echo "date,num_lowcode_connectors,num_python_connectors" > $OUTPUT_FILE + +# get every date between sep 1 and today (so we can keep consistent results when generating this sheet) +dates=$(python << EOM +from datetime import date, timedelta + +start_date = date(2022, 10, 1) +end_date = date.today() +delta = timedelta(days=1) +results = [] +while start_date <= end_date: + results.append(start_date.strftime("%Y-%m-%d")) + start_date += delta + +print(" ".join(results)) +EOM +) + +for d in $dates +do +git checkout $(git rev-list -n 1 --first-parent --before="$d" master) + +# count how many lowcode connectors there are + +num_lowcode=$(python << EOM +import os + +connectors = [f.path for f in os.scandir("airbyte-integrations/connectors/") if f.is_dir()] +declarative_connectors = [] +num_python_connectors = 0 +connectors_file = "lowcode_connector_names.txt" +open(connectors_file, "w").write("") +for full_path in connectors: + files = os.listdir(full_path) + connector_name = full_path.split("/")[-1] + # clear the file so the last day is the only one that writes to it + python_files = [x for x in files if ".py" in x] + if len(python_files) > 0: + sourcepy_dir = f"{full_path}/{connector_name.replace('-','_')}/source.py" + try: + sourcepy = open(sourcepy_dir, "r").read() + if "declarative YAML" in sourcepy: + declarative_connectors.append(full_path) + open(connectors_file, "a").write(connector_name + "\n") + else: + num_python_connectors += 1 + except FileNotFoundError: + pass + #print(f"Couldn't find a source.py in {sourcepy_dir}. Skipping.") +print(f"{len(declarative_connectors)},{num_python_connectors}") +EOM +) + +# print with date +echo $d,$num_lowcode >> $OUTPUT_FILE +done + + + +git checkout $BRANCH_NAME +git checkout -- . + +#uncomment to upload to GCS +#gcloud storage cp num_lowcode_connectors.csv gs://sherif-airbyte-metabase-backing-bucket/ \ No newline at end of file diff --git a/airbyte-integrations/scripts/utils.sh b/airbyte-integrations/scripts/utils.sh new file mode 100644 index 0000000000000..3c8d1bb0fdf7b --- /dev/null +++ b/airbyte-integrations/scripts/utils.sh @@ -0,0 +1,26 @@ +die () { + echo "$1" 1>&2 + exit 1 +} + +readlink_f () { + # https://stackoverflow.com/a/1116890 + TARGET_FILE=$1 + + cd "$(dirname $TARGET_FILE)" + TARGET_FILE="$(basename $TARGET_FILE)" + + # Iterate down a (possible) chain of symlinks + while [ -L "$TARGET_FILE" ] + do + TARGET_FILE="$(readlink $TARGET_FILE)" + cd "$(dirname $TARGET_FILE)" + TARGET_FILE="$(basename $TARGET_FILE)" + done + + # Compute the canonicalized name by finding the physical path + # for the directory we're in and appending the target file. + PHYS_DIR="$(pwd -P)" + RESULT="$PHYS_DIR/$TARGET_FILE" + echo "$RESULT" +}