From e0bac4aaebf3c4d287a778ffddbaf4623843c114 Mon Sep 17 00:00:00 2001 From: Christophe Duong Date: Thu, 6 Jan 2022 18:49:31 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Fix=20normalization=20SCD=20part?= =?UTF-8?q?ition=20by=20float=20columns=20errors=20with=20BigQuery=20(#928?= =?UTF-8?q?1)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ..._columns_resulting_into_long_names_scd.sql | 17 +- ..._columns_resulting_into_long_names_scd.sql | 19 ++- .../dedup_exchange_rate_scd.sql | 37 +++-- .../dedup_exchange_rate_scd.sql | 43 ++--- .../dedup_exchange_rate_scd.sql | 43 ++--- .../dedup_cdc_excluded_scd.sql | 46 +++--- .../dedup_exchange_rate_scd.sql | 40 ++--- .../renamed_dedup_cdc_excluded_scd.sql | 26 +-- .../renamed_dedup_cdc_excluded.sql | 1 + .../renamed_dedup_cdc_excluded_ab1.sql | 1 + .../renamed_dedup_cdc_excluded_ab2.sql | 1 + .../dedup_cdc_excluded_scd.sql | 48 +++--- .../dedup_exchange_rate_scd.sql | 46 +++--- .../renamed_dedup_cdc_excluded_scd.sql | 28 ++-- .../renamed_dedup_cdc_excluded.sql | 1 + .../renamed_dedup_cdc_excluded_scd.sql | 4 +- .../renamed_dedup_cdc_excluded.sql | 4 +- ...eam_with_co__lting_into_long_names_scd.sql | 19 +-- ...eam_with_co__lting_into_long_names_scd.sql | 21 +-- .../dedup_exchange_rate_scd.sql | 39 ++--- .../dedup_exchange_rate_scd.sql | 45 +++--- ..._stream_with_co_1g_into_long_names_scd.sql | 17 +- ..._stream_with_co_1g_into_long_names_scd.sql | 19 ++- ..._stream_with_co_1g_into_long_names_scd.sql | 17 +- .../dedup_exchange_rate_scd.sql | 37 +++-- .../dedup_exchange_rate_scd.sql | 43 ++--- .../dedup_exchange_rate_scd.sql | 37 +++-- .../dedup_exchange_rate_scd.sql | 43 ++--- .../dedup_exchange_rate_scd.sql | 49 +++--- .../dedup_exchange_rate_scd.sql | 43 ++--- ...ream_with_c__lting_into_long_names_scd.sql | 17 +- .../some_stream_that_was_empty_scd.sql | 13 +- ...ream_with_c__lting_into_long_names_scd.sql | 19 ++- .../some_stream_that_was_empty_scd.sql | 15 +- .../dedup_cdc_excluded_scd.sql | 43 ++--- .../dedup_exchange_rate_scd.sql | 37 +++-- .../test_normalization/pos_dedup_cdcx_scd.sql | 49 +++--- .../renamed_dedup_cdc_excluded_scd.sql | 23 +-- .../renamed_dedup_cdc_excluded.sql | 1 + .../renamed_dedup_cdc_excluded_stg.sql | 6 + .../renamed_dedup_cdc_excluded_ab1.sql | 1 + .../renamed_dedup_cdc_excluded_ab2.sql | 1 + .../dedup_cdc_excluded_scd.sql | 45 +++--- .../dedup_exchange_rate_scd.sql | 43 ++--- .../test_normalization/pos_dedup_cdcx_scd.sql | 51 +++--- .../renamed_dedup_cdc_excluded_scd.sql | 25 +-- .../renamed_dedup_cdc_excluded.sql | 1 + .../renamed_dedup_cdc_excluded_stg.sql | 1 + .../dedup_exchange_rate_scd.sql | 43 ++--- .../renamed_dedup_cdc_excluded_scd.sql | 45 +++--- .../renamed_dedup_cdc_excluded_scd.sql | 4 +- .../renamed_dedup_cdc_excluded.sql | 4 +- .../renamed_dedup_cdc_excluded_stg.sql | 4 +- .../renamed_dedup_cdc_excluded_scd.sql | 4 +- .../renamed_dedup_cdc_excluded.sql | 4 +- .../renamed_dedup_cdc_excluded_stg.sql | 4 +- ..._columns_resulting_into_long_names_scd.sql | 17 +- ..._columns_resulting_into_long_names_scd.sql | 19 ++- .../dedup_exchange_rate_scd.sql | 37 +++-- .../dedup_exchange_rate_scd.sql | 43 ++--- .../dedup_exchange_rate_scd.sql | 43 ++--- ..._COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql | 17 +- ..._COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql | 19 ++- .../DEDUP_EXCHANGE_RATE_SCD.sql | 37 +++-- .../DEDUP_EXCHANGE_RATE_SCD.sql | 43 ++--- .../data_input/catalog.json | 5 +- .../transform_catalog/stream_processor.py | 150 +++++++++++------- 67 files changed, 980 insertions(+), 787 deletions(-) diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index c98dcfdb9a7ed..59cf6d3a78044 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -26,9 +26,9 @@ scd_data as ( ), '')) as string ))) as _airbyte_unique_key, - id, - date, - `partition`, + id, + date, + `partition`, date as _airbyte_start_at, lag(date) over ( partition by id @@ -54,7 +54,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, to_hex(md5(cast(concat(coalesce(cast(_airbyte_unique_key as @@ -72,9 +75,9 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - date, - `partition`, + id, + date, + `partition`, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index 188d65104df21..a1d766dd11035 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -57,11 +57,11 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', + 'id', ]) }} as _airbyte_unique_key, - id, - date, - {{ adapter.quote('partition') }}, + id, + date, + {{ adapter.quote('partition') }}, date as _airbyte_start_at, lag(date) over ( partition by id @@ -87,7 +87,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -101,9 +104,9 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - date, - {{ adapter.quote('partition') }}, + id, + date, + {{ adapter.quote('partition') }}, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 28f810d9152da..d7fd59df15b5d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -30,14 +30,14 @@ scd_data as ( ), '')) as string ))) as _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - HKD_special___characters, - HKD_special___characters_1, - NZD, - USD, + id, + currency, + date, + timestamp_col, + HKD_special___characters, + HKD_special___characters_1, + NZD, + USD, date as _airbyte_start_at, lag(date) over ( partition by id, currency, cast(NZD as @@ -67,7 +67,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, to_hex(md5(cast(concat(coalesce(cast(_airbyte_unique_key as @@ -85,14 +88,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - date, - timestamp_col, - HKD_special___characters, - HKD_special___characters_1, - NZD, - USD, + id, + currency, + date, + timestamp_col, + HKD_special___characters, + HKD_special___characters_1, + NZD, + USD, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index df5b57acadcc1..c2edf0afe7961 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -59,18 +59,18 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'NZD', + 'id', + 'currency', + 'NZD', ]) }} as _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - HKD_special___characters, - HKD_special___characters_1, - NZD, - USD, + id, + currency, + date, + timestamp_col, + HKD_special___characters, + HKD_special___characters_1, + NZD, + USD, date as _airbyte_start_at, lag(date) over ( partition by id, currency, cast(NZD as {{ dbt_utils.type_string() }}) @@ -96,7 +96,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -110,14 +113,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - date, - timestamp_col, - HKD_special___characters, - HKD_special___characters_1, - NZD, - USD, + id, + currency, + date, + timestamp_col, + HKD_special___characters, + HKD_special___characters_1, + NZD, + USD, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index bd4c0b6271b45..cc6694836bb6a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -59,18 +59,18 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'NZD', + 'id', + 'currency', + 'NZD', ]) }} as _airbyte_unique_key, - id, - currency, - new_column, - date, - timestamp_col, - HKD_special___characters, - NZD, - USD, + id, + currency, + new_column, + date, + timestamp_col, + HKD_special___characters, + NZD, + USD, date as _airbyte_start_at, lag(date) over ( partition by cast(id as {{ dbt_utils.type_string() }}), currency, cast(NZD as {{ dbt_utils.type_string() }}) @@ -96,7 +96,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -110,14 +113,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - new_column, - date, - timestamp_col, - HKD_special___characters, - NZD, - USD, + id, + currency, + new_column, + date, + timestamp_col, + HKD_special___characters, + NZD, + USD, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index 99e574c63fda6..0c7c151236fd2 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -26,9 +26,10 @@ input_data_with_active_row_num as ( row_number() over ( partition by id order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc ) as _airbyte_active_row_num from input_data ), @@ -40,21 +41,21 @@ scd_data as ( toString(id) ))) as _airbyte_unique_key, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_emitted_at as _airbyte_start_at, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_lsn as _airbyte_start_at, case when _airbyte_active_row_num = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, - anyOrNull(_airbyte_emitted_at) over ( + anyOrNull(_ab_cdc_lsn) over ( partition by id order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at, + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) as _airbyte_end_at, _airbyte_ab_id, _airbyte_emitted_at, _airbyte_dedup_cdc_excluded_hashid @@ -65,7 +66,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, 'String'), accurateCastOrNull(_ab_cdc_updated_at, 'String') + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, 'String'), accurateCastOrNull(_ab_cdc_updated_at, 'String') order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, assumeNotNull(hex(MD5( @@ -91,11 +95,11 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 2486691308c65..c1e8e6cb63fec 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -46,14 +46,14 @@ scd_data as ( toString(NZD) ))) as _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - "HKD@spéçiäl & characters", - HKD_special___characters, - NZD, - USD, + id, + currency, + date, + timestamp_col, + "HKD@spéçiäl & characters", + HKD_special___characters, + NZD, + USD, date as _airbyte_start_at, case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, anyOrNull(date) over ( @@ -62,8 +62,7 @@ scd_data as ( date is null asc, date desc, _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at, + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) as _airbyte_end_at, _airbyte_ab_id, _airbyte_emitted_at, _airbyte_dedup_exchange_rate_hashid @@ -74,7 +73,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, assumeNotNull(hex(MD5( @@ -94,14 +96,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - date, - timestamp_col, - "HKD@spéçiäl & characters", - HKD_special___characters, - NZD, - USD, + id, + currency, + date, + timestamp_col, + "HKD@spéçiäl & characters", + HKD_special___characters, + NZD, + USD, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 4fa7b03259e21..eedc913fd45a5 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -26,8 +26,8 @@ input_data_with_active_row_num as ( row_number() over ( partition by id order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, _airbyte_emitted_at desc ) as _airbyte_active_row_num from input_data @@ -40,17 +40,17 @@ scd_data as ( toString(id) ))) as _airbyte_unique_key, - id, - _airbyte_emitted_at as _airbyte_start_at, + id, + _ab_cdc_updated_at, + _ab_cdc_updated_at as _airbyte_start_at, case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, - anyOrNull(_airbyte_emitted_at) over ( + anyOrNull(_ab_cdc_updated_at) over ( partition by id order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at, + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) as _airbyte_end_at, _airbyte_ab_id, _airbyte_emitted_at, _airbyte_renamed_dedup_cdc_excluded_hashid @@ -61,7 +61,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, assumeNotNull(hex(MD5( @@ -81,7 +84,8 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, + id, + _ab_cdc_updated_at, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql index b16b5361120f0..9966d52012aec 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -17,6 +17,7 @@ select _airbyte_unique_key, id, + _ab_cdc_updated_at, _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql index a09668e69387e..88a3674f694b3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql @@ -7,6 +7,7 @@ -- depends_on: {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} select {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as id, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql index 2fd528509bc5a..b192f4915e98f 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql @@ -7,6 +7,7 @@ -- depends_on: {{ ref('renamed_dedup_cdc_excluded_ab1') }} select accurateCastOrNull(id, '{{ dbt_utils.type_bigint() }}') as id, + accurateCastOrNull(_ab_cdc_updated_at, '{{ dbt_utils.type_float() }}') as _ab_cdc_updated_at, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index eedb5184f0a89..a20276296c922 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -56,9 +56,10 @@ input_data_with_active_row_num as ( row_number() over ( partition by id order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc ) as _airbyte_active_row_num from input_data ), @@ -66,23 +67,23 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', + 'id', ]) }} as _airbyte_unique_key, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_emitted_at as _airbyte_start_at, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_lsn as _airbyte_start_at, case when _airbyte_active_row_num = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, - anyOrNull(_airbyte_emitted_at) over ( + anyOrNull(_ab_cdc_lsn) over ( partition by id order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at, + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) as _airbyte_end_at, _airbyte_ab_id, _airbyte_emitted_at, _airbyte_dedup_cdc_excluded_hashid @@ -93,7 +94,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, '{{ dbt_utils.type_string() }}'), accurateCastOrNull(_ab_cdc_updated_at, '{{ dbt_utils.type_string() }}') + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, '{{ dbt_utils.type_string() }}'), accurateCastOrNull(_ab_cdc_updated_at, '{{ dbt_utils.type_string() }}') order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -107,11 +111,11 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - name, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, + id, + name, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 13744503505c2..1d94573fc99e1 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -68,18 +68,18 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'NZD', + 'id', + 'currency', + 'NZD', ]) }} as _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - {{ quote('HKD@spéçiäl & characters') }}, - HKD_special___characters, - NZD, - USD, + id, + currency, + date, + timestamp_col, + {{ quote('HKD@spéçiäl & characters') }}, + HKD_special___characters, + NZD, + USD, date as _airbyte_start_at, case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, anyOrNull(date) over ( @@ -88,8 +88,7 @@ scd_data as ( date is null asc, date desc, _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at, + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) as _airbyte_end_at, _airbyte_ab_id, _airbyte_emitted_at, _airbyte_dedup_exchange_rate_hashid @@ -100,7 +99,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -114,14 +116,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - date, - timestamp_col, - {{ quote('HKD@spéçiäl & characters') }}, - HKD_special___characters, - NZD, - USD, + id, + currency, + date, + timestamp_col, + {{ quote('HKD@spéçiäl & characters') }}, + HKD_special___characters, + NZD, + USD, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 525bee19a04f1..9e3c81ac18178 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -56,8 +56,8 @@ input_data_with_active_row_num as ( row_number() over ( partition by id order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, _airbyte_emitted_at desc ) as _airbyte_active_row_num from input_data @@ -66,19 +66,19 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', + 'id', ]) }} as _airbyte_unique_key, - id, - _airbyte_emitted_at as _airbyte_start_at, + id, + _ab_cdc_updated_at, + _ab_cdc_updated_at as _airbyte_start_at, case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row, - anyOrNull(_airbyte_emitted_at) over ( + anyOrNull(_ab_cdc_updated_at) over ( partition by id order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, _airbyte_emitted_at desc - ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING - ) as _airbyte_end_at, + ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) as _airbyte_end_at, _airbyte_ab_id, _airbyte_emitted_at, _airbyte_renamed_dedup_cdc_excluded_hashid @@ -89,7 +89,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -103,7 +106,8 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, + id, + _ab_cdc_updated_at, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql index d9f20813f833e..1b9cead2c4958 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -8,6 +8,7 @@ select _airbyte_unique_key, id, + _ab_cdc_updated_at, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 0fc967c7e00a1..f96e982626f6a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -1,5 +1,5 @@ - insert into test_normalization.renamed_dedup_cdc_excluded_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" + insert into test_normalization.renamed_dedup_cdc_excluded_scd ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" from renamed_dedup_cdc_excluded_scd__dbt_tmp \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql index 3ee365f07d589..4ff849492d8ab 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -1,5 +1,5 @@ - insert into test_normalization.renamed_dedup_cdc_excluded ("_airbyte_unique_key", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") - select "_airbyte_unique_key", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" + insert into test_normalization.renamed_dedup_cdc_excluded ("_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") + select "_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" from renamed_dedup_cdc_excluded__dbt_tmp \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql index ca7cb37338611..113bf3c06d961 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql @@ -39,21 +39,19 @@ scd_data as ( concat(concat(coalesce(cast(id as VARCHAR(max)), ''''),''''), '''') as VARCHAR(max)), '''')), 2) as _airbyte_unique_key, - id, - "date", - "partition", + id, + "date", + "partition", "date" as _airbyte_start_at, lag("date") over ( partition by id order by - "date" desc, "date" desc, _airbyte_emitted_at desc ) as _airbyte_end_at, case when row_number() over ( partition by id order by - "date" desc, "date" desc, _airbyte_emitted_at desc ) = 1 then 1 else 0 end as _airbyte_active_row, @@ -67,7 +65,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, convert(varchar(32), HashBytes(''md5'', coalesce(cast( @@ -85,9 +86,9 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - "date", - "partition", + id, + "date", + "partition", _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql index f312cf3afbfd4..b04acfdabc525 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co__lting_into_long_names_scd.sql @@ -55,23 +55,21 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', + 'id', ]) }} as _airbyte_unique_key, - id, - {{ adapter.quote('date') }}, - {{ adapter.quote('partition') }}, + id, + {{ adapter.quote('date') }}, + {{ adapter.quote('partition') }}, {{ adapter.quote('date') }} as _airbyte_start_at, lag({{ adapter.quote('date') }}) over ( partition by id order by - {{ adapter.quote('date') }} desc, {{ adapter.quote('date') }} desc, _airbyte_emitted_at desc ) as _airbyte_end_at, case when row_number() over ( partition by id order by - {{ adapter.quote('date') }} desc, {{ adapter.quote('date') }} desc, _airbyte_emitted_at desc ) = 1 then 1 else 0 end as _airbyte_active_row, @@ -85,7 +83,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -99,9 +100,9 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - {{ adapter.quote('date') }}, - {{ adapter.quote('partition') }}, + id, + {{ adapter.quote('date') }}, + {{ adapter.quote('partition') }}, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index a572299fff2d4..ea79968965206 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -41,20 +41,19 @@ scd_data as ( VARCHAR(max)), ''''), ''-'', coalesce(cast(nzd as VARCHAR(max)), ''''),''''), '''') as VARCHAR(max)), '''')), 2) as _airbyte_unique_key, - id, - currency, - "date", - timestamp_col, - "HKD@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, + id, + currency, + "date", + timestamp_col, + "HKD@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, "date" as _airbyte_start_at, lag("date") over ( partition by id, currency, cast(nzd as VARCHAR(max)) order by - "date" desc, "date" desc, _airbyte_emitted_at desc ) as _airbyte_end_at, @@ -62,7 +61,6 @@ scd_data as ( partition by id, currency, cast(nzd as VARCHAR(max)) order by - "date" desc, "date" desc, _airbyte_emitted_at desc ) = 1 then 1 else 0 end as _airbyte_active_row, @@ -76,7 +74,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, convert(varchar(32), HashBytes(''md5'', coalesce(cast( @@ -94,14 +95,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - "date", - timestamp_col, - "HKD@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, + id, + currency, + "date", + timestamp_col, + "HKD@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 3bd5d5d499ed7..fcc681aa95ba8 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mssql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -57,30 +57,28 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', + 'id', + 'currency', + 'nzd', ]) }} as _airbyte_unique_key, - id, - currency, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, + id, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, {{ adapter.quote('date') }} as _airbyte_start_at, lag({{ adapter.quote('date') }}) over ( partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) order by - {{ adapter.quote('date') }} desc, {{ adapter.quote('date') }} desc, _airbyte_emitted_at desc ) as _airbyte_end_at, case when row_number() over ( partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) order by - {{ adapter.quote('date') }} desc, {{ adapter.quote('date') }} desc, _airbyte_emitted_at desc ) = 1 then 1 else 0 end as _airbyte_active_row, @@ -94,7 +92,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -108,14 +109,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, + id, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql index 1e688d24d74f1..e5f3e4859deba 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -17,9 +17,9 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select md5(cast(concat(coalesce(cast(id as char), '')) as char)) as _airbyte_unique_key, - id, - `date`, - `partition`, + id, + `date`, + `partition`, `date` as _airbyte_start_at, lag(`date`) over ( partition by id @@ -45,7 +45,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, md5(cast(concat(coalesce(cast(_airbyte_unique_key as char), ''), '-', coalesce(cast(_airbyte_start_at as char), ''), '-', coalesce(cast(_airbyte_emitted_at as char), '')) as char)) as _airbyte_unique_key_scd, @@ -55,9 +58,9 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - `date`, - `partition`, + id, + `date`, + `partition`, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql index 78b7de1f601f7..d098146930d0c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -55,11 +55,11 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', + 'id', ]) }} as _airbyte_unique_key, - id, - {{ adapter.quote('date') }}, - {{ adapter.quote('partition') }}, + id, + {{ adapter.quote('date') }}, + {{ adapter.quote('partition') }}, {{ adapter.quote('date') }} as _airbyte_start_at, lag({{ adapter.quote('date') }}) over ( partition by id @@ -85,7 +85,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -99,9 +102,9 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - {{ adapter.quote('date') }}, - {{ adapter.quote('partition') }}, + id, + {{ adapter.quote('date') }}, + {{ adapter.quote('partition') }}, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql index 1e688d24d74f1..e5f3e4859deba 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_nested_streams/second_output/airbyte_incremental/scd/test_normalization/nested_stream_with_co_1g_into_long_names_scd.sql @@ -17,9 +17,9 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select md5(cast(concat(coalesce(cast(id as char), '')) as char)) as _airbyte_unique_key, - id, - `date`, - `partition`, + id, + `date`, + `partition`, `date` as _airbyte_start_at, lag(`date`) over ( partition by id @@ -45,7 +45,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, md5(cast(concat(coalesce(cast(_airbyte_unique_key as char), ''), '-', coalesce(cast(_airbyte_start_at as char), ''), '-', coalesce(cast(_airbyte_emitted_at as char), '')) as char)) as _airbyte_unique_key_scd, @@ -55,9 +58,9 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - `date`, - `partition`, + id, + `date`, + `partition`, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 64ded010aa63b..59d722cb4f381 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -17,14 +17,14 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select md5(cast(concat(coalesce(cast(id as char), ''), '-', coalesce(cast(currency as char), ''), '-', coalesce(cast(nzd as char), '')) as char)) as _airbyte_unique_key, - id, - currency, - `date`, - timestamp_col, - `HKD@spéçiäl & characters`, - hkd_special___characters, - nzd, - usd, + id, + currency, + `date`, + timestamp_col, + `HKD@spéçiäl & characters`, + hkd_special___characters, + nzd, + usd, `date` as _airbyte_start_at, lag(`date`) over ( partition by id, currency, cast(nzd as char) @@ -50,7 +50,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, md5(cast(concat(coalesce(cast(_airbyte_unique_key as char), ''), '-', coalesce(cast(_airbyte_start_at as char), ''), '-', coalesce(cast(_airbyte_emitted_at as char), '')) as char)) as _airbyte_unique_key_scd, @@ -60,14 +63,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - `date`, - timestamp_col, - `HKD@spéçiäl & characters`, - hkd_special___characters, - nzd, - usd, + id, + currency, + `date`, + timestamp_col, + `HKD@spéçiäl & characters`, + hkd_special___characters, + nzd, + usd, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index abfaa2002b55f..309ac4c903fe5 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -57,18 +57,18 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', + 'id', + 'currency', + 'nzd', ]) }} as _airbyte_unique_key, - id, - currency, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, + id, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, {{ adapter.quote('date') }} as _airbyte_start_at, lag({{ adapter.quote('date') }}) over ( partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) @@ -94,7 +94,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -108,14 +111,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, + id, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 64ded010aa63b..59d722cb4f381 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/mysql/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -17,14 +17,14 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select md5(cast(concat(coalesce(cast(id as char), ''), '-', coalesce(cast(currency as char), ''), '-', coalesce(cast(nzd as char), '')) as char)) as _airbyte_unique_key, - id, - currency, - `date`, - timestamp_col, - `HKD@spéçiäl & characters`, - hkd_special___characters, - nzd, - usd, + id, + currency, + `date`, + timestamp_col, + `HKD@spéçiäl & characters`, + hkd_special___characters, + nzd, + usd, `date` as _airbyte_start_at, lag(`date`) over ( partition by id, currency, cast(nzd as char) @@ -50,7 +50,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, md5(cast(concat(coalesce(cast(_airbyte_unique_key as char), ''), '-', coalesce(cast(_airbyte_start_at as char), ''), '-', coalesce(cast(_airbyte_emitted_at as char), '')) as char)) as _airbyte_unique_key_scd, @@ -60,14 +63,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - `date`, - timestamp_col, - `HKD@spéçiäl & characters`, - hkd_special___characters, - nzd, - usd, + id, + currency, + `date`, + timestamp_col, + `HKD@spéçiäl & characters`, + hkd_special___characters, + nzd, + usd, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index cbca1a34898ff..cfd186b006ae3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -27,27 +27,25 @@ scd_data as ( nzd ) as "_AIRBYTE_UNIQUE_KEY", - id, - currency, - "DATE", - timestamp_col, - hkd_special___characters, - hkd_special___characters_1, - nzd, - usd, + id, + currency, + "DATE", + timestamp_col, + hkd_special___characters, + hkd_special___characters_1, + nzd, + usd, "DATE" as "_AIRBYTE_START_AT", lag("DATE") over ( partition by id, currency, cast(nzd as varchar2(4000)) order by - "DATE" asc nulls last, - "DATE" desc, + "DATE" desc nulls last, "_AIRBYTE_EMITTED_AT" desc ) as "_AIRBYTE_END_AT", case when row_number() over ( partition by id, currency, cast(nzd as varchar2(4000)) order by - "DATE" asc nulls last, - "DATE" desc, + "DATE" desc nulls last, "_AIRBYTE_EMITTED_AT" desc ) = 1 then 1 else 0 end as "_AIRBYTE_ACTIVE_ROW", "_AIRBYTE_AB_ID", @@ -60,7 +58,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by "_AIRBYTE_UNIQUE_KEY", "_AIRBYTE_START_AT", "_AIRBYTE_EMITTED_AT" + partition by + "_AIRBYTE_UNIQUE_KEY", + "_AIRBYTE_START_AT", + "_AIRBYTE_EMITTED_AT" order by "_AIRBYTE_ACTIVE_ROW" desc, "_AIRBYTE_AB_ID" ) as "_AIRBYTE_ROW_NUM", ora_hash( @@ -80,14 +81,14 @@ dedup_data as ( select "_AIRBYTE_UNIQUE_KEY", "_AIRBYTE_UNIQUE_KEY_SCD", - id, - currency, - "DATE", - timestamp_col, - hkd_special___characters, - hkd_special___characters_1, - nzd, - usd, + id, + currency, + "DATE", + timestamp_col, + hkd_special___characters, + hkd_special___characters_1, + nzd, + usd, "_AIRBYTE_START_AT", "_AIRBYTE_END_AT", "_AIRBYTE_ACTIVE_ROW", diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 09ada8d511715..8e8364a7b5072 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -57,31 +57,29 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', + 'id', + 'currency', + 'nzd', ]) }} as {{ quote('_AIRBYTE_UNIQUE_KEY') }}, - id, - currency, - {{ quote('DATE') }}, - timestamp_col, - hkd_special___characters, - hkd_special___characters_1, - nzd, - usd, + id, + currency, + {{ quote('DATE') }}, + timestamp_col, + hkd_special___characters, + hkd_special___characters_1, + nzd, + usd, {{ quote('DATE') }} as {{ quote('_AIRBYTE_START_AT') }}, lag({{ quote('DATE') }}) over ( partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) order by - {{ quote('DATE') }} asc nulls last, - {{ quote('DATE') }} desc, + {{ quote('DATE') }} desc nulls last, {{ quote('_AIRBYTE_EMITTED_AT') }} desc ) as {{ quote('_AIRBYTE_END_AT') }}, case when row_number() over ( partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) order by - {{ quote('DATE') }} asc nulls last, - {{ quote('DATE') }} desc, + {{ quote('DATE') }} desc nulls last, {{ quote('_AIRBYTE_EMITTED_AT') }} desc ) = 1 then 1 else 0 end as {{ quote('_AIRBYTE_ACTIVE_ROW') }}, {{ quote('_AIRBYTE_AB_ID') }}, @@ -94,7 +92,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by {{ quote('_AIRBYTE_UNIQUE_KEY') }}, {{ quote('_AIRBYTE_START_AT') }}, {{ quote('_AIRBYTE_EMITTED_AT') }} + partition by + {{ quote('_AIRBYTE_UNIQUE_KEY') }}, + {{ quote('_AIRBYTE_START_AT') }}, + {{ quote('_AIRBYTE_EMITTED_AT') }} order by {{ quote('_AIRBYTE_ACTIVE_ROW') }} desc, {{ quote('_AIRBYTE_AB_ID') }} ) as {{ quote('_AIRBYTE_ROW_NUM') }}, {{ dbt_utils.surrogate_key([ @@ -108,14 +109,14 @@ dedup_data as ( select {{ quote('_AIRBYTE_UNIQUE_KEY') }}, {{ quote('_AIRBYTE_UNIQUE_KEY_SCD') }}, - id, - currency, - {{ quote('DATE') }}, - timestamp_col, - hkd_special___characters, - hkd_special___characters_1, - nzd, - usd, + id, + currency, + {{ quote('DATE') }}, + timestamp_col, + hkd_special___characters, + hkd_special___characters_1, + nzd, + usd, {{ quote('_AIRBYTE_START_AT') }}, {{ quote('_AIRBYTE_END_AT') }}, {{ quote('_AIRBYTE_ACTIVE_ROW') }}, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index cbca1a34898ff..cfd186b006ae3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/oracle/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -27,27 +27,25 @@ scd_data as ( nzd ) as "_AIRBYTE_UNIQUE_KEY", - id, - currency, - "DATE", - timestamp_col, - hkd_special___characters, - hkd_special___characters_1, - nzd, - usd, + id, + currency, + "DATE", + timestamp_col, + hkd_special___characters, + hkd_special___characters_1, + nzd, + usd, "DATE" as "_AIRBYTE_START_AT", lag("DATE") over ( partition by id, currency, cast(nzd as varchar2(4000)) order by - "DATE" asc nulls last, - "DATE" desc, + "DATE" desc nulls last, "_AIRBYTE_EMITTED_AT" desc ) as "_AIRBYTE_END_AT", case when row_number() over ( partition by id, currency, cast(nzd as varchar2(4000)) order by - "DATE" asc nulls last, - "DATE" desc, + "DATE" desc nulls last, "_AIRBYTE_EMITTED_AT" desc ) = 1 then 1 else 0 end as "_AIRBYTE_ACTIVE_ROW", "_AIRBYTE_AB_ID", @@ -60,7 +58,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by "_AIRBYTE_UNIQUE_KEY", "_AIRBYTE_START_AT", "_AIRBYTE_EMITTED_AT" + partition by + "_AIRBYTE_UNIQUE_KEY", + "_AIRBYTE_START_AT", + "_AIRBYTE_EMITTED_AT" order by "_AIRBYTE_ACTIVE_ROW" desc, "_AIRBYTE_AB_ID" ) as "_AIRBYTE_ROW_NUM", ora_hash( @@ -80,14 +81,14 @@ dedup_data as ( select "_AIRBYTE_UNIQUE_KEY", "_AIRBYTE_UNIQUE_KEY_SCD", - id, - currency, - "DATE", - timestamp_col, - hkd_special___characters, - hkd_special___characters_1, - nzd, - usd, + id, + currency, + "DATE", + timestamp_col, + hkd_special___characters, + hkd_special___characters_1, + nzd, + usd, "_AIRBYTE_START_AT", "_AIRBYTE_END_AT", "_AIRBYTE_ACTIVE_ROW", diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql index 5ceb844df0ae3..b5d7f740ba6eb 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql @@ -21,9 +21,9 @@ scd_data as ( ), '') as varchar )) as _airbyte_unique_key, - "id", - "date", - "partition", + "id", + "date", + "partition", "date" as _airbyte_start_at, lag("date") over ( partition by "id" @@ -49,7 +49,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, md5(cast(coalesce(cast(_airbyte_unique_key as @@ -67,9 +70,9 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - "id", - "date", - "partition", + "id", + "date", + "partition", _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql index c1b4813412748..53ef64cb928a8 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql @@ -21,8 +21,8 @@ scd_data as ( ), '') as varchar )) as _airbyte_unique_key, - "id", - "date", + "id", + "date", "date" as _airbyte_start_at, lag("date") over ( partition by "id" @@ -48,7 +48,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, md5(cast(coalesce(cast(_airbyte_unique_key as @@ -66,8 +69,8 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - "id", - "date", + "id", + "date", _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql index 3f1ab268c7d40..8772de10b5e74 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_c__lting_into_long_names_scd.sql @@ -56,11 +56,11 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - adapter.quote('id'), + adapter.quote('id'), ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('date') }}, - {{ adapter.quote('partition') }}, + {{ adapter.quote('id') }}, + {{ adapter.quote('date') }}, + {{ adapter.quote('partition') }}, {{ adapter.quote('date') }} as _airbyte_start_at, lag({{ adapter.quote('date') }}) over ( partition by {{ adapter.quote('id') }} @@ -86,7 +86,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -100,9 +103,9 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - {{ adapter.quote('date') }}, - {{ adapter.quote('partition') }}, + {{ adapter.quote('id') }}, + {{ adapter.quote('date') }}, + {{ adapter.quote('partition') }}, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql index d6592cc28f8fa..1eba7ba7bd0ba 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/some_stream_that_was_empty_scd.sql @@ -56,10 +56,10 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - adapter.quote('id'), + adapter.quote('id'), ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('date') }}, + {{ adapter.quote('id') }}, + {{ adapter.quote('date') }}, {{ adapter.quote('date') }} as _airbyte_start_at, lag({{ adapter.quote('date') }}) over ( partition by {{ adapter.quote('id') }} @@ -85,7 +85,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -99,8 +102,8 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - {{ adapter.quote('date') }}, + {{ adapter.quote('id') }}, + {{ adapter.quote('date') }}, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index 450c207deea6c..c9c2e087d956b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -21,25 +21,27 @@ scd_data as ( ), '') as varchar )) as _airbyte_unique_key, - "id", - "name", - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_emitted_at as _airbyte_start_at, - lag(_airbyte_emitted_at) over ( + "id", + "name", + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_lsn as _airbyte_start_at, + lag(_ab_cdc_lsn) over ( partition by "id" order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc ) as _airbyte_end_at, case when row_number() over ( partition by "id" order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, @@ -51,7 +53,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, cast(_ab_cdc_deleted_at as + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at, cast(_ab_cdc_deleted_at as varchar ), cast(_ab_cdc_updated_at as varchar @@ -77,11 +82,11 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - "id", - "name", - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, + "id", + "name", + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index a44a3ee1ffd4f..3db3150ff2766 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -25,14 +25,14 @@ scd_data as ( ), '') as varchar )) as _airbyte_unique_key, - "id", - currency, - "date", - timestamp_col, - "HKD@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, + "id", + currency, + "date", + timestamp_col, + "HKD@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, "date" as _airbyte_start_at, lag("date") over ( partition by "id", currency, cast(nzd as @@ -62,7 +62,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, md5(cast(coalesce(cast(_airbyte_unique_key as @@ -80,14 +83,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - "id", - currency, - "date", - timestamp_col, - "HKD@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, + "id", + currency, + "date", + timestamp_col, + "HKD@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql index a1466c6f433d8..438b303238b5e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql @@ -21,26 +21,30 @@ scd_data as ( ), '') as varchar )) as _airbyte_unique_key, - "id", - "name", - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, - _airbyte_emitted_at as _airbyte_start_at, - lag(_airbyte_emitted_at) over ( + "id", + "name", + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, + _ab_cdc_updated_at as _airbyte_start_at, + lag(_ab_cdc_updated_at) over ( partition by "id" order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc, _ab_cdc_log_pos desc + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _ab_cdc_updated_at desc, + _ab_cdc_log_pos desc, + _airbyte_emitted_at desc ) as _airbyte_end_at, case when row_number() over ( partition by "id" order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc, _ab_cdc_log_pos desc + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _ab_cdc_updated_at desc, + _ab_cdc_log_pos desc, + _airbyte_emitted_at desc ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, @@ -52,7 +56,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, cast(_ab_cdc_deleted_at as + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at, cast(_ab_cdc_deleted_at as varchar ), cast(_ab_cdc_updated_at as varchar @@ -82,12 +89,12 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - "id", - "name", - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, + "id", + "name", + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 993d6b15efad0..414ed447cc0b5 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -21,20 +21,21 @@ scd_data as ( ), '') as varchar )) as _airbyte_unique_key, - "id", - _airbyte_emitted_at as _airbyte_start_at, - lag(_airbyte_emitted_at) over ( + "id", + _ab_cdc_updated_at, + _ab_cdc_updated_at as _airbyte_start_at, + lag(_ab_cdc_updated_at) over ( partition by "id" order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, _airbyte_emitted_at desc ) as _airbyte_end_at, case when row_number() over ( partition by "id" order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, _airbyte_emitted_at desc ) = 1 then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, @@ -47,7 +48,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, md5(cast(coalesce(cast(_airbyte_unique_key as @@ -65,7 +69,8 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - "id", + "id", + _ab_cdc_updated_at, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql index d2a4347421f07..36303d71ef60e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -9,6 +9,7 @@ select _airbyte_unique_key, "id", + _ab_cdc_updated_at, _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql index a31227240b16b..8fb3cb3a5c344 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql @@ -10,6 +10,7 @@ with __dbt__cte__renamed_dedup_cdc_excluded_ab1 as ( -- depends_on: "postgres".test_normalization._airbyte_raw_renamed_dedup_cdc_excluded select jsonb_extract_path_text(_airbyte_data, 'id') as "id", + jsonb_extract_path_text(_airbyte_data, '_ab_cdc_updated_at') as _ab_cdc_updated_at, _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at @@ -25,6 +26,9 @@ select cast("id" as bigint ) as "id", + cast(_ab_cdc_updated_at as + float +) as _ab_cdc_updated_at, _airbyte_ab_id, _airbyte_emitted_at, now() as _airbyte_normalized_at @@ -37,6 +41,8 @@ where 1 = 1 select md5(cast(coalesce(cast("id" as varchar +), '') || '-' || coalesce(cast(_ab_cdc_updated_at as + varchar ), '') as varchar )) as _airbyte_renamed_dedup_cdc_excluded_hashid, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql index 332bcdac0d8ab..e75261bd70a4e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab1.sql @@ -8,6 +8,7 @@ -- depends_on: {{ source('test_normalization', '_airbyte_raw_renamed_dedup_cdc_excluded') }} select {{ json_extract_scalar('_airbyte_data', ['id'], ['id']) }} as {{ adapter.quote('id') }}, + {{ json_extract_scalar('_airbyte_data', ['_ab_cdc_updated_at'], ['_ab_cdc_updated_at']) }} as _ab_cdc_updated_at, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql index 54dbe8bb35287..f7a91a73a73cb 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_ctes/test_normalization/renamed_dedup_cdc_excluded_ab2.sql @@ -8,6 +8,7 @@ -- depends_on: {{ ref('renamed_dedup_cdc_excluded_ab1') }} select cast({{ adapter.quote('id') }} as {{ dbt_utils.type_bigint() }}) as {{ adapter.quote('id') }}, + cast(_ab_cdc_updated_at as {{ dbt_utils.type_float() }}) as _ab_cdc_updated_at, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql index d63cd07a8a4dd..98325193a5f00 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql @@ -56,27 +56,29 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - adapter.quote('id'), + adapter.quote('id'), ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_emitted_at as _airbyte_start_at, - lag(_airbyte_emitted_at) over ( + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_lsn as _airbyte_start_at, + lag(_ab_cdc_lsn) over ( partition by {{ adapter.quote('id') }} order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc ) as _airbyte_end_at, case when row_number() over ( partition by {{ adapter.quote('id') }} order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc + _ab_cdc_lsn is null asc, + _ab_cdc_lsn desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, @@ -88,7 +90,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, cast(_ab_cdc_deleted_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_updated_at as {{ dbt_utils.type_string() }}) + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at, cast(_ab_cdc_deleted_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_updated_at as {{ dbt_utils.type_string() }}) order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -102,11 +107,11 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 3a2d131cb654c..87453dc261145 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -58,18 +58,18 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - 'currency', - 'nzd', + adapter.quote('id'), + 'currency', + 'nzd', ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - currency, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, + {{ adapter.quote('id') }}, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, {{ adapter.quote('date') }} as _airbyte_start_at, lag({{ adapter.quote('date') }}) over ( partition by {{ adapter.quote('id') }}, currency, cast(nzd as {{ dbt_utils.type_string() }}) @@ -95,7 +95,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -109,14 +112,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - currency, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, + {{ adapter.quote('id') }}, + currency, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql index 61210186eb00c..36ce51399a3f3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/pos_dedup_cdcx_scd.sql @@ -56,28 +56,32 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - adapter.quote('id'), + adapter.quote('id'), ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, - _airbyte_emitted_at as _airbyte_start_at, - lag(_airbyte_emitted_at) over ( + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, + _ab_cdc_updated_at as _airbyte_start_at, + lag(_ab_cdc_updated_at) over ( partition by {{ adapter.quote('id') }} order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc, _ab_cdc_log_pos desc + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _ab_cdc_updated_at desc, + _ab_cdc_log_pos desc, + _airbyte_emitted_at desc ) as _airbyte_end_at, case when row_number() over ( partition by {{ adapter.quote('id') }} order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc, _ab_cdc_log_pos desc + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _ab_cdc_updated_at desc, + _ab_cdc_log_pos desc, + _airbyte_emitted_at desc ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, @@ -89,7 +93,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, cast(_ab_cdc_deleted_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_updated_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_log_pos as {{ dbt_utils.type_string() }}) + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at, cast(_ab_cdc_deleted_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_updated_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_log_pos as {{ dbt_utils.type_string() }}) order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -103,12 +110,12 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _ab_cdc_log_pos, + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_log_pos, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 89a414852dbc8..0c50939426f79 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -56,22 +56,23 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - adapter.quote('id'), + adapter.quote('id'), ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - _airbyte_emitted_at as _airbyte_start_at, - lag(_airbyte_emitted_at) over ( + {{ adapter.quote('id') }}, + _ab_cdc_updated_at, + _ab_cdc_updated_at as _airbyte_start_at, + lag(_ab_cdc_updated_at) over ( partition by {{ adapter.quote('id') }} order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, _airbyte_emitted_at desc ) as _airbyte_end_at, case when row_number() over ( partition by {{ adapter.quote('id') }} order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, _airbyte_emitted_at desc ) = 1 then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, @@ -84,7 +85,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -98,7 +102,8 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, + {{ adapter.quote('id') }}, + _ab_cdc_updated_at, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql index 9b58ab7c73f2f..ca5093eb3e17e 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -9,6 +9,7 @@ select _airbyte_unique_key, {{ adapter.quote('id') }}, + _ab_cdc_updated_at, _airbyte_ab_id, _airbyte_emitted_at, {{ current_timestamp() }} as _airbyte_normalized_at, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql index 8b713b1e15b3d..be9bbfcd86758 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/models/generated/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql @@ -9,6 +9,7 @@ select {{ dbt_utils.surrogate_key([ adapter.quote('id'), + '_ab_cdc_updated_at', ]) }} as _airbyte_renamed_dedup_cdc_excluded_hashid, tmp.* from {{ ref('renamed_dedup_cdc_excluded_ab2') }} tmp diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 4fa8039ff1a85..5db2e106d7ddf 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -58,18 +58,18 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - adapter.quote('id'), - 'currency', - 'nzd', + adapter.quote('id'), + 'currency', + 'nzd', ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - currency, - new_column, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - nzd, - usd, + {{ adapter.quote('id') }}, + currency, + new_column, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + nzd, + usd, {{ adapter.quote('date') }} as _airbyte_start_at, lag({{ adapter.quote('date') }}) over ( partition by cast({{ adapter.quote('id') }} as {{ dbt_utils.type_string() }}), currency, cast(nzd as {{ dbt_utils.type_string() }}) @@ -95,7 +95,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -109,14 +112,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - currency, - new_column, - {{ adapter.quote('date') }}, - timestamp_col, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - nzd, - usd, + {{ adapter.quote('id') }}, + currency, + new_column, + {{ adapter.quote('date') }}, + timestamp_col, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + nzd, + usd, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index f88aa6e23f3b5..3803571720588 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -56,27 +56,29 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - adapter.quote('id'), + adapter.quote('id'), ]) }} as _airbyte_unique_key, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, - _airbyte_emitted_at as _airbyte_start_at, - lag(_airbyte_emitted_at) over ( + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, + _ab_cdc_updated_at as _airbyte_start_at, + lag(_ab_cdc_updated_at) over ( partition by {{ adapter.quote('id') }} order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc ) as _airbyte_end_at, case when row_number() over ( partition by {{ adapter.quote('id') }} order by - _airbyte_emitted_at is null asc, - _airbyte_emitted_at desc, - _airbyte_emitted_at desc, _ab_cdc_updated_at desc + _ab_cdc_updated_at is null asc, + _ab_cdc_updated_at desc, + _ab_cdc_updated_at desc, + _airbyte_emitted_at desc ) = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row, _airbyte_ab_id, _airbyte_emitted_at, @@ -88,7 +90,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, cast(_ab_cdc_deleted_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_updated_at as {{ dbt_utils.type_string() }}) + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at, cast(_ab_cdc_deleted_at as {{ dbt_utils.type_string() }}), cast(_ab_cdc_updated_at as {{ dbt_utils.type_string() }}) order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -102,11 +107,11 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - {{ adapter.quote('id') }}, - {{ adapter.quote('name') }}, - _ab_cdc_lsn, - _ab_cdc_updated_at, - _ab_cdc_deleted_at, + {{ adapter.quote('id') }}, + {{ adapter.quote('name') }}, + _ab_cdc_lsn, + _ab_cdc_updated_at, + _ab_cdc_deleted_at, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 5536e95b30750..3ea9e1c724fb6 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -6,9 +6,9 @@ from "renamed_dedup_cdc_excluded_scd__dbt_tmp" ); - insert into "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") + insert into "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") ( - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" from "renamed_dedup_cdc_excluded_scd__dbt_tmp" ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql index a5cc40567b2cb..3fec1976ed9d0 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -6,9 +6,9 @@ from "renamed_dedup_cdc_excluded__dbt_tmp" ); - insert into "postgres".test_normalization."renamed_dedup_cdc_excluded" ("_airbyte_unique_key", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") + insert into "postgres".test_normalization."renamed_dedup_cdc_excluded" ("_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid") ( - select "_airbyte_unique_key", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" + select "_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid" from "renamed_dedup_cdc_excluded__dbt_tmp" ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql index 502e7141b3e86..5f39006c6490b 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/second_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql @@ -6,9 +6,9 @@ from "renamed_dedup_cdc_excluded_stg__dbt_tmp" ); - insert into "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" ("_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") + insert into "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" ("_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at") ( - select "_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" + select "_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at" from "renamed_dedup_cdc_excluded_stg__dbt_tmp" ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql index 7693af7ef2e63..dfe10c6da794d 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql @@ -6,9 +6,9 @@ from "renamed_dedup_cdc_excluded_scd__dbt_tmp" ); - insert into "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at") + insert into "postgres".test_normalization."renamed_dedup_cdc_excluded_scd" ("_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at") ( - select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at" + select "_airbyte_unique_key", "_airbyte_unique_key_scd", "id", "_ab_cdc_updated_at", "_airbyte_start_at", "_airbyte_end_at", "_airbyte_active_row", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at" from "renamed_dedup_cdc_excluded_scd__dbt_tmp" ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql index c8edd1056dd5d..c1d1c310179d3 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql @@ -6,9 +6,9 @@ from "renamed_dedup_cdc_excluded__dbt_tmp" ); - insert into "postgres".test_normalization."renamed_dedup_cdc_excluded" ("_airbyte_unique_key", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at") + insert into "postgres".test_normalization."renamed_dedup_cdc_excluded" ("_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at") ( - select "_airbyte_unique_key", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at" + select "_airbyte_unique_key", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "_airbyte_renamed_dedup_cdc_excluded_hashid", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at" from "renamed_dedup_cdc_excluded__dbt_tmp" ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql index 62a33963d7a20..55db812277ae0 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/postgres/test_simple_streams/third_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded_stg.sql @@ -6,9 +6,9 @@ from "renamed_dedup_cdc_excluded_stg__dbt_tmp" ); - insert into "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" ("_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at") + insert into "postgres"._airbyte_test_normalization."renamed_dedup_cdc_excluded_stg" ("_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at") ( - select "_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "name", "_ab_cdc_lsn", "_ab_cdc_updated_at", "_ab_cdc_deleted_at" + select "_airbyte_renamed_dedup_cdc_excluded_hashid", "id", "_ab_cdc_updated_at", "_airbyte_ab_id", "_airbyte_emitted_at", "_airbyte_normalized_at", "name", "_ab_cdc_lsn", "_ab_cdc_deleted_at" from "renamed_dedup_cdc_excluded_stg__dbt_tmp" ); \ No newline at end of file diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index f56ebb0d393ad..0f50d29444876 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/first_output/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -21,9 +21,9 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select md5(cast(coalesce(cast(id as varchar), '') as varchar)) as _airbyte_unique_key, - id, - date, - "partition", + id, + date, + "partition", date as _airbyte_start_at, lag(date) over ( partition by id @@ -49,7 +49,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, md5(cast(coalesce(cast(_airbyte_unique_key as varchar), '') || '-' || coalesce(cast(_airbyte_start_at as varchar), '') || '-' || coalesce(cast(_airbyte_emitted_at as varchar), '') as varchar)) as _airbyte_unique_key_scd, @@ -59,9 +62,9 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - date, - "partition", + id, + date, + "partition", _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql index f63d93b3787bc..1e30bf57e4595 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_nested_streams/models/generated/airbyte_incremental/scd/test_normalization/nested_stream_with_complex_columns_resulting_into_long_names_scd.sql @@ -56,11 +56,11 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', + 'id', ]) }} as _airbyte_unique_key, - id, - date, - {{ adapter.quote('partition') }}, + id, + date, + {{ adapter.quote('partition') }}, date as _airbyte_start_at, lag(date) over ( partition by id @@ -86,7 +86,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -100,9 +103,9 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - date, - {{ adapter.quote('partition') }}, + id, + date, + {{ adapter.quote('partition') }}, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 6550ed24e0699..e2fb4b8024b06 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -21,14 +21,14 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select md5(cast(coalesce(cast(id as varchar), '') || '-' || coalesce(cast(currency as varchar), '') || '-' || coalesce(cast(nzd as varchar), '') as varchar)) as _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - "hkd@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, + id, + currency, + date, + timestamp_col, + "hkd@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, date as _airbyte_start_at, lag(date) over ( partition by id, currency, cast(nzd as varchar) @@ -54,7 +54,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, md5(cast(coalesce(cast(_airbyte_unique_key as varchar), '') || '-' || coalesce(cast(_airbyte_start_at as varchar), '') || '-' || coalesce(cast(_airbyte_emitted_at as varchar), '') as varchar)) as _airbyte_unique_key_scd, @@ -64,14 +67,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - date, - timestamp_col, - "hkd@spéçiäl & characters", - hkd_special___characters, - nzd, - usd, + id, + currency, + date, + timestamp_col, + "hkd@spéçiäl & characters", + hkd_special___characters, + nzd, + usd, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 28eaf5da09dd3..81b85e492cd51 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -58,18 +58,18 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', + 'id', + 'currency', + 'nzd', ]) }} as _airbyte_unique_key, - id, - currency, - date, - timestamp_col, - {{ adapter.quote('hkd@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, + id, + currency, + date, + timestamp_col, + {{ adapter.quote('hkd@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, date as _airbyte_start_at, lag(date) over ( partition by id, currency, cast(nzd as {{ dbt_utils.type_string() }}) @@ -95,7 +95,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -109,14 +112,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - date, - timestamp_col, - {{ adapter.quote('hkd@spéçiäl & characters') }}, - hkd_special___characters, - nzd, - usd, + id, + currency, + date, + timestamp_col, + {{ adapter.quote('hkd@spéçiäl & characters') }}, + hkd_special___characters, + nzd, + usd, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql index 24c88aab4c0d3..363a39ec25fe7 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/redshift/test_simple_streams/modified_models/generated/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql @@ -58,18 +58,18 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'id', - 'currency', - 'nzd', + 'id', + 'currency', + 'nzd', ]) }} as _airbyte_unique_key, - id, - currency, - new_column, - date, - timestamp_col, - {{ adapter.quote('hkd@spéçiäl & characters') }}, - nzd, - usd, + id, + currency, + new_column, + date, + timestamp_col, + {{ adapter.quote('hkd@spéçiäl & characters') }}, + nzd, + usd, date as _airbyte_start_at, lag(date) over ( partition by cast(id as {{ dbt_utils.type_string() }}), currency, cast(nzd as {{ dbt_utils.type_string() }}) @@ -95,7 +95,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at + partition by + _airbyte_unique_key, + _airbyte_start_at, + _airbyte_emitted_at order by _airbyte_active_row desc, _airbyte_ab_id ) as _airbyte_row_num, {{ dbt_utils.surrogate_key([ @@ -109,14 +112,14 @@ dedup_data as ( select _airbyte_unique_key, _airbyte_unique_key_scd, - id, - currency, - new_column, - date, - timestamp_col, - {{ adapter.quote('hkd@spéçiäl & characters') }}, - nzd, - usd, + id, + currency, + new_column, + date, + timestamp_col, + {{ adapter.quote('hkd@spéçiäl & characters') }}, + nzd, + usd, _airbyte_start_at, _airbyte_end_at, _airbyte_active_row, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql index 7bc1f2d4776b6..73631957ce269 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql @@ -20,9 +20,9 @@ scd_data as ( ), '') as varchar )) as _AIRBYTE_UNIQUE_KEY, - ID, - DATE, - PARTITION, + ID, + DATE, + PARTITION, DATE as _AIRBYTE_START_AT, lag(DATE) over ( partition by ID @@ -48,7 +48,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _AIRBYTE_UNIQUE_KEY, _AIRBYTE_START_AT, _AIRBYTE_EMITTED_AT + partition by + _AIRBYTE_UNIQUE_KEY, + _AIRBYTE_START_AT, + _AIRBYTE_EMITTED_AT order by _AIRBYTE_ACTIVE_ROW desc, _AIRBYTE_AB_ID ) as _AIRBYTE_ROW_NUM, md5(cast(coalesce(cast(_AIRBYTE_UNIQUE_KEY as @@ -66,9 +69,9 @@ dedup_data as ( select _AIRBYTE_UNIQUE_KEY, _AIRBYTE_UNIQUE_KEY_SCD, - ID, - DATE, - PARTITION, + ID, + DATE, + PARTITION, _AIRBYTE_START_AT, _AIRBYTE_END_AT, _AIRBYTE_ACTIVE_ROW, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql index 0333595a99b97..167cdb066cb4c 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_nested_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/NESTED_STREAM_WITH_COMPLEX_COLUMNS_RESULTING_INTO_LONG_NAMES_SCD.sql @@ -56,11 +56,11 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'ID', + 'ID', ]) }} as _AIRBYTE_UNIQUE_KEY, - ID, - DATE, - PARTITION, + ID, + DATE, + PARTITION, DATE as _AIRBYTE_START_AT, lag(DATE) over ( partition by ID @@ -86,7 +86,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _AIRBYTE_UNIQUE_KEY, _AIRBYTE_START_AT, _AIRBYTE_EMITTED_AT + partition by + _AIRBYTE_UNIQUE_KEY, + _AIRBYTE_START_AT, + _AIRBYTE_EMITTED_AT order by _AIRBYTE_ACTIVE_ROW desc, _AIRBYTE_AB_ID ) as _AIRBYTE_ROW_NUM, {{ dbt_utils.surrogate_key([ @@ -100,9 +103,9 @@ dedup_data as ( select _AIRBYTE_UNIQUE_KEY, _AIRBYTE_UNIQUE_KEY_SCD, - ID, - DATE, - PARTITION, + ID, + DATE, + PARTITION, _AIRBYTE_START_AT, _AIRBYTE_END_AT, _AIRBYTE_ACTIVE_ROW, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql index 804ca297a46fd..20ae8e46add52 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/first_output/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql @@ -24,14 +24,14 @@ scd_data as ( ), '') as varchar )) as _AIRBYTE_UNIQUE_KEY, - ID, - CURRENCY, - DATE, - TIMESTAMP_COL, - "HKD@spéçiäl & characters", - HKD_SPECIAL___CHARACTERS, - NZD, - USD, + ID, + CURRENCY, + DATE, + TIMESTAMP_COL, + "HKD@spéçiäl & characters", + HKD_SPECIAL___CHARACTERS, + NZD, + USD, DATE as _AIRBYTE_START_AT, lag(DATE) over ( partition by ID, CURRENCY, cast(NZD as @@ -61,7 +61,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _AIRBYTE_UNIQUE_KEY, _AIRBYTE_START_AT, _AIRBYTE_EMITTED_AT + partition by + _AIRBYTE_UNIQUE_KEY, + _AIRBYTE_START_AT, + _AIRBYTE_EMITTED_AT order by _AIRBYTE_ACTIVE_ROW desc, _AIRBYTE_AB_ID ) as _AIRBYTE_ROW_NUM, md5(cast(coalesce(cast(_AIRBYTE_UNIQUE_KEY as @@ -79,14 +82,14 @@ dedup_data as ( select _AIRBYTE_UNIQUE_KEY, _AIRBYTE_UNIQUE_KEY_SCD, - ID, - CURRENCY, - DATE, - TIMESTAMP_COL, - "HKD@spéçiäl & characters", - HKD_SPECIAL___CHARACTERS, - NZD, - USD, + ID, + CURRENCY, + DATE, + TIMESTAMP_COL, + "HKD@spéçiäl & characters", + HKD_SPECIAL___CHARACTERS, + NZD, + USD, _AIRBYTE_START_AT, _AIRBYTE_END_AT, _AIRBYTE_ACTIVE_ROW, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql index ca5c91ab9c9be..2b62f6776a223 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql +++ b/airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/snowflake/test_simple_streams/models/generated/airbyte_incremental/scd/TEST_NORMALIZATION/DEDUP_EXCHANGE_RATE_SCD.sql @@ -58,18 +58,18 @@ scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select {{ dbt_utils.surrogate_key([ - 'ID', - 'CURRENCY', - 'NZD', + 'ID', + 'CURRENCY', + 'NZD', ]) }} as _AIRBYTE_UNIQUE_KEY, - ID, - CURRENCY, - DATE, - TIMESTAMP_COL, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - HKD_SPECIAL___CHARACTERS, - NZD, - USD, + ID, + CURRENCY, + DATE, + TIMESTAMP_COL, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + HKD_SPECIAL___CHARACTERS, + NZD, + USD, DATE as _AIRBYTE_START_AT, lag(DATE) over ( partition by ID, CURRENCY, cast(NZD as {{ dbt_utils.type_string() }}) @@ -95,7 +95,10 @@ dedup_data as ( -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by _AIRBYTE_UNIQUE_KEY, _AIRBYTE_START_AT, _AIRBYTE_EMITTED_AT + partition by + _AIRBYTE_UNIQUE_KEY, + _AIRBYTE_START_AT, + _AIRBYTE_EMITTED_AT order by _AIRBYTE_ACTIVE_ROW desc, _AIRBYTE_AB_ID ) as _AIRBYTE_ROW_NUM, {{ dbt_utils.surrogate_key([ @@ -109,14 +112,14 @@ dedup_data as ( select _AIRBYTE_UNIQUE_KEY, _AIRBYTE_UNIQUE_KEY_SCD, - ID, - CURRENCY, - DATE, - TIMESTAMP_COL, - {{ adapter.quote('HKD@spéçiäl & characters') }}, - HKD_SPECIAL___CHARACTERS, - NZD, - USD, + ID, + CURRENCY, + DATE, + TIMESTAMP_COL, + {{ adapter.quote('HKD@spéçiäl & characters') }}, + HKD_SPECIAL___CHARACTERS, + NZD, + USD, _AIRBYTE_START_AT, _AIRBYTE_END_AT, _AIRBYTE_ACTIVE_ROW, diff --git a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json index c8efc48f5b3e3..9b44f5e68d18a 100644 --- a/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json +++ b/airbyte-integrations/bases/base-normalization/integration_tests/resources/test_simple_streams/data_input/catalog.json @@ -96,6 +96,9 @@ "properties": { "id": { "type": "integer" + }, + "_ab_cdc_updated_at": { + "type": ["null", "number"] } } }, @@ -136,7 +139,7 @@ "default_cursor_field": [] }, "sync_mode": "incremental", - "cursor_field": [], + "cursor_field": ["_ab_cdc_lsn"], "destination_sync_mode": "append_dedup", "primary_key": [["id"]] }, diff --git a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py index 1a8993ddf8cc3..8cae3703bc9fc 100644 --- a/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py +++ b/airbyte-integrations/bases/base-normalization/normalization/transform_catalog/stream_processor.py @@ -376,12 +376,12 @@ def generate_json_parsing_model(self, from_table: str, column_names: Dict[str, T -- depends_on: {{ from_table }} {{ unnesting_before_query }} select - {%- if parent_hash_id %} +{%- if parent_hash_id %} {{ parent_hash_id }}, - {%- endif %} - {%- for field in fields %} +{%- endif %} +{%- for field in fields %} {{ field }}, - {%- endfor %} +{%- endfor %} {{ col_ab_id }}, {{ col_emitted_at }}, {{ '{{ current_timestamp() }}' }} as {{ col_normalized_at }} @@ -454,12 +454,12 @@ def generate_column_typing_model(self, from_table: str, column_names: Dict[str, -- SQL model to cast each column to its adequate SQL type converted from the JSON schema type -- depends_on: {{ from_table }} select - {%- if parent_hash_id %} +{%- if parent_hash_id %} {{ parent_hash_id }}, - {%- endif %} - {%- for field in fields %} +{%- endif %} +{%- for field in fields %} {{ field }}, - {%- endfor %} +{%- endfor %} {{ col_ab_id }}, {{ col_emitted_at }}, {{ '{{ current_timestamp() }}' }} as {{ col_normalized_at }} @@ -573,9 +573,9 @@ def generate_snowflake_timestamp_statement(column_name: str) -> str: template = Template( """ case - {% for format_item in formats %} +{% for format_item in formats %} when {{column_name}} regexp '{{format_item['regex']}}' then to_timestamp_tz({{column_name}}, '{{format_item['format']}}') - {% endfor %} +{% endfor %} when {{column_name}} = '' then NULL else to_timestamp_tz({{column_name}}) end as {{column_name}} @@ -591,12 +591,12 @@ def generate_id_hashing_model(self, from_table: str, column_names: Dict[str, Tup -- depends_on: {{ from_table }} select {{ '{{' }} dbt_utils.surrogate_key([ - {%- if parent_hash_id %} +{%- if parent_hash_id %} {{ parent_hash_id }}, - {%- endif %} - {%- for field in fields %} +{%- endif %} +{%- for field in fields %} {{ field }}, - {%- endfor %} +{%- endfor %} ]) {{ '}}' }} as {{ hash_id }}, tmp.* from {{ from_table }} tmp @@ -649,11 +649,12 @@ def safe_cast_to_string(definition: Dict, column_name: str, destination_type: De return col def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tuple[str, str]]) -> str: - order_null = "is null asc" + cursor_field = self.get_cursor_field(column_names) + order_null = f"is null asc,\n {cursor_field} desc" if self.destination_type.value == DestinationType.ORACLE.value: - order_null = "asc nulls last" + order_null = "desc nulls last" if self.destination_type.value == DestinationType.MSSQL.value: - # SQL Server treats NULL values as the lowest values, then sorted in ascending order, NULLs come first. + # SQL Server treats NULL values as the lowest values, thus NULLs come last when desc. order_null = "desc" lag_begin = "lag" @@ -663,7 +664,7 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup # ClickHouse doesn't support lag() yet, this is a workaround solution # Ref: https://clickhouse.com/docs/en/sql-reference/window-functions/ lag_begin = "anyOrNull" - lag_end = "ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING" + lag_end = " ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING" input_data_table = "input_data_with_active_row_num" enable_left_join_null = "" @@ -687,7 +688,7 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup quoted_col_cdc_deleted_at = self.name_transformer.normalize_column_name("_ab_cdc_deleted_at", in_jinja=True) quoted_col_cdc_updated_at = self.name_transformer.normalize_column_name("_ab_cdc_updated_at", in_jinja=True) cdc_active_row_pattern = f" and {col_cdc_deleted_at} is null" - cdc_updated_order_pattern = f", {col_cdc_updated_at} desc" + cdc_updated_order_pattern = f"\n {col_cdc_updated_at} desc," cdc_cols = ( f", {cast_begin}{col_cdc_deleted_at}{cast_as}" + "{{ dbt_utils.type_string() }}" @@ -701,15 +702,32 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup if "_ab_cdc_log_pos" in column_names.keys(): col_cdc_log_pos = self.name_transformer.normalize_column_name("_ab_cdc_log_pos") quoted_col_cdc_log_pos = self.name_transformer.normalize_column_name("_ab_cdc_log_pos", in_jinja=True) - cdc_updated_order_pattern += f", {col_cdc_log_pos} desc" + cdc_updated_order_pattern += f"\n {col_cdc_log_pos} desc," cdc_cols += f", {cast_begin}{col_cdc_log_pos}{cast_as}" + "{{ dbt_utils.type_string() }}" + f"{cast_end}" quoted_cdc_cols += f", {quoted_col_cdc_log_pos}" + if ( + self.destination_type == DestinationType.BIGQUERY + and self.get_cursor_field_property_name(column_names) != self.airbyte_emitted_at + and is_number(self.properties[self.get_cursor_field_property_name(column_names)]["type"]) + ): + # partition by float columns is not allowed in BigQuery, cast it to string + airbyte_start_at_string = ( + cast_begin + + self.name_transformer.normalize_column_name("_airbyte_start_at") + + cast_as + + "{{ dbt_utils.type_string() }}" + + cast_end + ) + else: + airbyte_start_at_string = self.name_transformer.normalize_column_name("_airbyte_start_at") + jinja_variables = { "active_row": self.name_transformer.normalize_column_name("_airbyte_active_row"), "airbyte_end_at": self.name_transformer.normalize_column_name("_airbyte_end_at"), "airbyte_row_num": self.name_transformer.normalize_column_name("_airbyte_row_num"), "airbyte_start_at": self.name_transformer.normalize_column_name("_airbyte_start_at"), + "airbyte_start_at_string": airbyte_start_at_string, "airbyte_unique_key_scd": self.name_transformer.normalize_column_name(f"{self.airbyte_unique_key}_scd"), "cdc_active_row": cdc_active_row_pattern, "cdc_cols": cdc_cols, @@ -717,7 +735,7 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup "col_ab_id": self.get_ab_id(), "col_emitted_at": self.get_emitted_at(), "col_normalized_at": self.get_normalized_at(), - "cursor_field": self.get_cursor_field(column_names), + "cursor_field": cursor_field, "enable_left_join_null": enable_left_join_null, "fields": self.list_fields(column_names), "from_table": from_table, @@ -745,9 +763,8 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup row_number() over ( partition by {{ primary_key_partition | join(", ") }} order by - {{ cursor_field }} {{ order_null }}, - {{ cursor_field }} desc, - {{ col_emitted_at }} desc{{ cdc_updated_at_order }} + {{ cursor_field }} {{ order_null }},{{ cdc_updated_at_order }} + {{ col_emitted_at }} desc ) as _airbyte_active_row_num from input_data ),""" @@ -759,11 +776,9 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup {{ lag_begin }}({{ cursor_field }}) over ( partition by {{ primary_key_partition | join(", ") }} order by - {{ cursor_field }} {{ order_null }}, - {{ cursor_field }} desc, - {{ col_emitted_at }} desc{{ cdc_updated_at_order }} - {{ lag_end }} - ) as {{ airbyte_end_at }}""" + {{ cursor_field }} {{ order_null }},{{ cdc_updated_at_order }} + {{ col_emitted_at }} desc + {{ lag_end }}) as {{ airbyte_end_at }}""" ).render(jinja_variables) jinja_variables["scd_columns_sql"] = scd_columns_sql else: @@ -772,16 +787,14 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup lag({{ cursor_field }}) over ( partition by {{ primary_key_partition | join(", ") }} order by - {{ cursor_field }} {{ order_null }}, - {{ cursor_field }} desc, - {{ col_emitted_at }} desc{{ cdc_updated_at_order }} + {{ cursor_field }} {{ order_null }},{{ cdc_updated_at_order }} + {{ col_emitted_at }} desc ) as {{ airbyte_end_at }}, case when row_number() over ( partition by {{ primary_key_partition | join(", ") }} order by - {{ cursor_field }} {{ order_null }}, - {{ cursor_field }} desc, - {{ col_emitted_at }} desc{{ cdc_updated_at_order }} + {{ cursor_field }} {{ order_null }},{{ cdc_updated_at_order }} + {{ col_emitted_at }} desc ) = 1{{ cdc_active_row }} then 1 else 0 end as {{ active_row }}""" ).render(jinja_variables) jinja_variables["scd_columns_sql"] = scd_columns_sql @@ -803,9 +816,9 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup -- build a subset of {{ unique_key }} from rows that are new select distinct {{ '{{' }} dbt_utils.surrogate_key([ - {%- for primary_key in primary_keys %} +{%- for primary_key in primary_keys %} {{ primary_key }}, - {%- endfor %} +{%- endfor %} ]) {{ '}}' }} as {{ unique_key }} from new_data ), @@ -840,17 +853,17 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup scd_data as ( -- SQL model to build a Type 2 Slowly Changing Dimension (SCD) table for each record identified by their primary key select - {%- if parent_hash_id %} - {{ parent_hash_id }}, - {%- endif %} +{%- if parent_hash_id %} + {{ parent_hash_id }}, +{%- endif %} {{ '{{' }} dbt_utils.surrogate_key([ - {%- for primary_key in primary_keys %} - {{ primary_key }}, - {%- endfor %} +{%- for primary_key in primary_keys %} + {{ primary_key }}, +{%- endfor %} ]) {{ '}}' }} as {{ unique_key }}, - {%- for field in fields %} - {{ field }}, - {%- endfor %} +{%- for field in fields %} + {{ field }}, +{%- endfor %} {{ cursor_field }} as {{ airbyte_start_at }}, {{ scd_columns_sql }}, {{ col_ab_id }}, @@ -863,7 +876,10 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup -- we need to ensure de-duplicated rows for merge/update queries -- additionally, we generate a unique key for the scd table row_number() over ( - partition by {{ unique_key }}, {{ airbyte_start_at }}, {{ col_emitted_at }}{{ cdc_cols }} + partition by + {{ unique_key }}, + {{ airbyte_start_at_string }}, + {{ col_emitted_at }}{{ cdc_cols }} order by {{ active_row }} desc, {{ col_ab_id }} ) as {{ airbyte_row_num }}, {{ '{{' }} dbt_utils.surrogate_key([ @@ -875,14 +891,14 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup from scd_data ) select - {%- if parent_hash_id %} - {{ parent_hash_id }}, - {%- endif %} +{%- if parent_hash_id %} + {{ parent_hash_id }}, +{%- endif %} {{ unique_key }}, {{ airbyte_unique_key_scd }}, - {%- for field in fields %} - {{ field }}, - {%- endfor %} +{%- for field in fields %} + {{ field }}, +{%- endfor %} {{ airbyte_start_at }}, {{ airbyte_end_at }}, {{ active_row }}, @@ -895,9 +911,22 @@ def generate_scd_type_2_model(self, from_table: str, column_names: Dict[str, Tup ).render(jinja_variables) return sql + def get_cursor_field_property_name(self, column_names: Dict[str, Tuple[str, str]]) -> str: + if not self.cursor_field: + if "_ab_cdc_updated_at" in column_names.keys(): + return "_ab_cdc_updated_at" + elif "_ab_cdc_log_pos" in column_names.keys(): + return "_ab_cdc_log_pos" + else: + return self.airbyte_emitted_at + elif len(self.cursor_field) == 1: + return self.cursor_field[0] + else: + raise ValueError(f"Unsupported nested cursor field {'.'.join(self.cursor_field)} for stream {self.stream_name}") + def get_cursor_field(self, column_names: Dict[str, Tuple[str, str]], in_jinja: bool = False) -> str: if not self.cursor_field: - cursor = self.name_transformer.normalize_column_name(self.airbyte_emitted_at, in_jinja) + cursor = self.name_transformer.normalize_column_name(self.get_cursor_field_property_name(column_names), in_jinja) elif len(self.cursor_field) == 1: if not is_airbyte_column(self.cursor_field[0]): cursor = column_names[self.cursor_field[0]][0] @@ -906,7 +935,6 @@ def get_cursor_field(self, column_names: Dict[str, Tuple[str, str]], in_jinja: b cursor = self.cursor_field[0] else: raise ValueError(f"Unsupported nested cursor field {'.'.join(self.cursor_field)} for stream {self.stream_name}") - return cursor def list_primary_keys(self, column_names: Dict[str, Tuple[str, str]]) -> List[str]: @@ -952,15 +980,15 @@ def generate_final_model(self, from_table: str, column_names: Dict[str, Tuple[st -- Final base SQL model -- depends_on: {{ from_table }} select - {%- if parent_hash_id %} +{%- if parent_hash_id %} {{ parent_hash_id }}, - {%- endif %} - {%- if unique_key %} +{%- endif %} +{%- if unique_key %} {{ unique_key }}, - {%- endif %} - {%- for field in fields %} +{%- endif %} +{%- for field in fields %} {{ field }}, - {%- endfor %} +{%- endfor %} {{ col_ab_id }}, {{ col_emitted_at }}, {{ '{{ current_timestamp() }}' }} as {{ col_normalized_at }},