From c84fd29ed842c7f61c94e2541e7523644c6dc546 Mon Sep 17 00:00:00 2001 From: sbuldeev <110468720+sbuldeev@users.noreply.github.com> Date: Wed, 1 Mar 2023 11:56:03 +0200 Subject: [PATCH] vdk-impala: Add optional parameter for staging table prefix (#1666) What: Adding the option to pass a staging table prefix as well as code refactoring and readme file enhancements Why: It is linked to the issue Signed-off-by: Stefan Buldeev sbuldeev@vmware.com --------- Signed-off-by: Stefan Buldeev sbuldeev@vmware.com Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../load/dimension/scd1/02-handle-quality-checks.py | 10 ++++++---- .../impala/templates/load/dimension/scd1/README.md | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/dimension/scd1/02-handle-quality-checks.py b/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/dimension/scd1/02-handle-quality-checks.py index 3f77aaec60..22a5c8b5f0 100644 --- a/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/dimension/scd1/02-handle-quality-checks.py +++ b/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/dimension/scd1/02-handle-quality-checks.py @@ -25,17 +25,19 @@ def run(job_input: IJobInput): source_view = job_arguments.get("source_view") target_schema = job_arguments.get("target_schema") target_table = job_arguments.get("target_table") - staging_schema = job_arguments.get("staging_schema", target_schema) insert_query = get_query("02-insert-into-target.sql") - staging_table_name = f"vdk_check_{target_table}" if check: - if not staging_schema: + staging_schema = job_arguments.get("staging_schema", target_schema) + staging_table_name = f"vdk_check_{target_schema}_{target_table}" + + if len(staging_table_name) > 128: raise ValueError( - "No staging_schema specified to execute the defined data checks against." + f"Staging table - {staging_table_name} exceeds the 128 character limit." ) staging_table = f"{staging_schema}.{staging_table_name}" + align_stg_table_with_target( f"{target_schema}.{target_table}", staging_table, job_input ) diff --git a/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/dimension/scd1/README.md b/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/dimension/scd1/README.md index ce4a028fd3..fe1ae389e8 100644 --- a/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/dimension/scd1/README.md +++ b/projects/vdk-plugins/vdk-impala/src/vdk/plugin/impala/templates/load/dimension/scd1/README.md @@ -12,10 +12,12 @@ In summary, it overwrites the target table with the source data. ### Template Parameters (template_args): -- target_schema - SC Data Warehouse schema, where target data is loaded -- target_table - SC Data Warehouse table of DW type 'Slowly Changing Dimension Type 1', where target data is loaded -- source_schema - SC Data Lake schema, where source raw data is loaded from -- source_view - SC Data Lake view, where source raw data is loaded from +- target_schema - SC Data Warehouse schema, where target data is loaded +- target_table - SC Data Warehouse table of DW type 'Slowly Changing Dimension Type 1', where target data is loaded +- source_schema - SC Data Lake schema, where source raw data is loaded from +- source_view - SC Data Lake view, where source raw data is loaded from +- check - (Optional) Callback function responsible for checking the quality of the data +- staging_schema - (Optional) Schema where the checks will be executed. If not provided target_schema will be used as default ### Prerequisites: