From 15c0ce29420679f94d6e377ce54c3eb39dc9a329 Mon Sep 17 00:00:00 2001 From: sbuldeev Date: Fri, 18 Aug 2023 15:12:46 +0300 Subject: [PATCH 1/2] Introduce COMPUTE STATS statement The statements is executed against a staging table right before the data is moved from staging to target --- .../templates/load/dimension/scd1/02-handle-quality-checks.py | 2 ++ .../templates/load/fact/insert/02-handle-quality-checks.py | 2 ++ .../templates/load/fact/snapshot/02-handle-quality-checks.py | 2 ++ 3 files changed, 6 insertions(+) diff --git a/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/dimension/scd1/02-handle-quality-checks.py b/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/dimension/scd1/02-handle-quality-checks.py index 3c988a91b5..2389e64861 100644 --- a/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/dimension/scd1/02-handle-quality-checks.py +++ b/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/dimension/scd1/02-handle-quality-checks.py @@ -49,6 +49,8 @@ def run(job_input: IJobInput): job_input.execute_query(insert_into_staging) if check(staging_table): + job_input.execute_query(f"COMPUTE STATS {staging_table}") + insert_into_target = insert_query.format( source_schema=staging_schema, source_view=staging_table_name, diff --git a/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/fact/insert/02-handle-quality-checks.py b/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/fact/insert/02-handle-quality-checks.py index b54ab2bc1d..f34a8a702d 100644 --- a/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/fact/insert/02-handle-quality-checks.py +++ b/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/fact/insert/02-handle-quality-checks.py @@ -67,6 +67,8 @@ def run(job_input: IJobInput): view_full_name = f"{view_schema}.{view_name}" if check(view_full_name): + job_input.execute_query(f"COMPUTE STATS {staging_table}") + insert_into_target = insert_query.format( source_schema=staging_schema, source_view=staging_table_name, diff --git a/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/fact/snapshot/02-handle-quality-checks.py b/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/fact/snapshot/02-handle-quality-checks.py index 7cb70de877..8d48ea69bc 100644 --- a/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/fact/snapshot/02-handle-quality-checks.py +++ b/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/fact/snapshot/02-handle-quality-checks.py @@ -56,6 +56,8 @@ def run(job_input: IJobInput): job_input.execute_query(insert_into_staging) if check(staging_table): + job_input.execute_query(f"COMPUTE STATS {staging_table}") + insert_into_target = overwrite_target_query.format( staging_schema=staging_schema, staging_table_name=staging_table_name, From 071898d03e88d5699576b820d32a6df2411597ad Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 18 Aug 2023 12:29:22 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../templates/load/dimension/scd1/02-handle-quality-checks.py | 2 +- .../templates/load/fact/snapshot/02-handle-quality-checks.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/dimension/scd1/02-handle-quality-checks.py b/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/dimension/scd1/02-handle-quality-checks.py index 2389e64861..e2713791c1 100644 --- a/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/dimension/scd1/02-handle-quality-checks.py +++ b/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/dimension/scd1/02-handle-quality-checks.py @@ -50,7 +50,7 @@ def run(job_input: IJobInput): if check(staging_table): job_input.execute_query(f"COMPUTE STATS {staging_table}") - + insert_into_target = insert_query.format( source_schema=staging_schema, source_view=staging_table_name, diff --git a/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/fact/snapshot/02-handle-quality-checks.py b/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/fact/snapshot/02-handle-quality-checks.py index 8d48ea69bc..7c38369e10 100644 --- a/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/fact/snapshot/02-handle-quality-checks.py +++ b/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/fact/snapshot/02-handle-quality-checks.py @@ -57,7 +57,7 @@ def run(job_input: IJobInput): if check(staging_table): job_input.execute_query(f"COMPUTE STATS {staging_table}") - + insert_into_target = overwrite_target_query.format( staging_schema=staging_schema, staging_table_name=staging_table_name,