vdk-trino: Add unit tests for templates using reserved words (#133)

Trino templates can work with identifiers which are reserved Trino keywords. A unit test is added for each template (scd1, scd2 and periodic snapshot) with reserved keywords as table and column names. Tested by unit tests. Signed-off-by: Yana Zhivkova <[email protected]>
vmware · Aug 25, 2021 · 9804719 · 9804719
1 parent e4c999f
commit 9804719
Show file tree

Hide file tree

Showing 3 changed files with 140 additions and 27 deletions.
diff --git a/...re/plugins/vdk-trino/tests/jobs/load_dimension_scd2_template_job/01_prepare_input_data.py b/...re/plugins/vdk-trino/tests/jobs/load_dimension_scd2_template_job/01_prepare_input_data.py
@@ -22,7 +22,7 @@ def run(job_input: IJobInput) -> None:
              {active_from_column} TIMESTAMP,
              {active_to_column} TIMESTAMP,
              "{id_column}" INT,
-             updated_by_user_id INT,
+             "{value_column_1}" INT,
              state VARCHAR,
              is_next BOOLEAN,
              cloud_vendor VARCHAR,
@@ -55,7 +55,7 @@ def run(job_input: IJobInput) -> None:
            CREATE TABLE IF NOT EXISTS "{source_schema}"."{source_view}" (
              {updated_at_column} TIMESTAMP,
              "{id_column}" INT,
-             updated_by_user_id INT,
+             "{value_column_1}" INT,
              state VARCHAR,
              is_next BOOLEAN,
              cloud_vendor VARCHAR,
@@ -90,7 +90,7 @@ def run(job_input: IJobInput) -> None:
              {active_from_column} TIMESTAMP,
              {active_to_column} TIMESTAMP,
              "{id_column}" INT,
-             updated_by_user_id INT,
+             "{value_column_1}" INT,
              state VARCHAR,
              is_next BOOLEAN,
              cloud_vendor VARCHAR,

diff --git a/...ob/02_run_load_dimension_scd1_template.py → ...ob/02_run_load_dimension_scd2_template.py b/...ob/02_run_load_dimension_scd1_template.py → ...ob/02_run_load_dimension_scd2_template.py
diff --git a/projects/vdk-core/plugins/vdk-trino/tests/test_vdk_templates.py b/projects/vdk-core/plugins/vdk-trino/tests/test_vdk_templates.py
@@ -112,6 +112,59 @@ def test_scd1_template(self) -> None:
             actual_rs.output == expected_rs.output
         ), f"Elements in {source_view} and {target_table} differ."
 
+    def test_scd1_template_reserved_args(self) -> None:
+        source_schema = "default"
+        source_view = "alter"
+        target_schema = "default"
+        target_table = "table"
+
+        result: Result = self.__runner.invoke(
+            [
+                "run",
+                get_test_job_path(
+                    pathlib.Path(os.path.dirname(os.path.abspath(__file__))),
+                    "load_dimension_scd1_template_job",
+                ),
+                "--arguments",
+                json.dumps(
+                    {
+                        "source_schema": source_schema,
+                        "source_view": source_view,
+                        "target_schema": target_schema,
+                        "target_table": target_table,
+                    }
+                ),
+            ]
+        )
+
+        cli_assert_equal(0, result)
+
+        actual_rs: Result = self.__runner.invoke(
+            [
+                "trino-query",
+                "--query",
+                f"""
+                SELECT * FROM "{target_schema}"."{target_table}"
+                """,
+            ]
+        )
+
+        expected_rs: Result = self.__runner.invoke(
+            [
+                "trino-query",
+                "--query",
+                f"""
+                SELECT * FROM "{source_schema}"."{source_view}"
+                """,
+            ]
+        )
+
+        cli_assert_equal(0, actual_rs)
+        cli_assert_equal(0, expected_rs)
+        assert (
+            actual_rs.output == expected_rs.output
+        ), f"Elements in {source_view} and {target_table} differ."
+
     def test_scd2_template(self) -> None:
         test_schema = "default"
         source_view = "vw_scmdb_people"
@@ -129,6 +182,25 @@ def test_scd2_template(self) -> None:
             1, self.__template_table_exists(test_schema, "backup_" + target_table)
         )
 
+    def test_scd2_template_reserved_args(self) -> None:
+        test_schema = "default"
+        source_view = "alter"
+        target_table = "table"
+        expect_table = "between"
+
+        result: Result = self.__scd2_template_execute(
+            test_schema, source_view, target_table, expect_table, False, "reserved"
+        )
+        cli_assert_equal(0, result)
+
+        # Check if we got the expected result and successfully dropped backup
+        self.__scd2_template_check_expected_res(
+            test_schema, target_table, expect_table, "reserved"
+        )
+        cli_assert_equal(
+            1, self.__template_table_exists(test_schema, "backup_" + target_table)
+        )
+
     def test_scd2_template_restore_target_from_backup_on_start(self) -> None:
         test_schema = "default"
         source_view = "vw_scmdb_people"
@@ -203,6 +275,21 @@ def test_fact_periodic_snapshot_template(self) -> None:
             test_schema, target_table, expect_table
         )
 
+    def test_fact_periodic_snapshot_template_reserved_args(self) -> None:
+        test_schema = "default"
+        source_view = "alter"
+        target_table = "table"
+        expect_table = "between"
+
+        result: Result = self.__fact_periodic_snapshot_template_execute(
+            test_schema, source_view, target_table, expect_table
+        )
+        cli_assert_equal(0, result)
+
+        self.__fact_periodic_snapshot_template_check_expected_res(
+            test_schema, target_table, expect_table
+        )
+
     def test_fact_periodic_snapshot_empty_source(self) -> None:
         test_schema = "default"
         source_view = "vw_fact_sddc_daily"
@@ -373,7 +460,9 @@ def __scd2_template_execute(
         target_table,
         expect_table,
         restore_from_backup=False,
+        reserved=False,
     ):
+        value_column_1 = reserved and "with" or "updated_by_user_id"
         return self.__runner.invoke(
             [
                 "run",
@@ -391,17 +480,17 @@ def __scd2_template_execute(
                         "staging_schema": test_schema,
                         "expect_schema": test_schema,
                         "expect_table": expect_table,
-                        "id_column": "sddc_id",
-                        "sk_column": "sddc_sk",
+                        "id_column": reserved and "when" or "sddc_id",
+                        "sk_column": reserved and "where" or "sddc_sk",
                         "value_columns": [
-                            "updated_by_user_id",
+                            value_column_1,
                             "state",
                             "is_next",
                             "cloud_vendor",
                             "version",
                         ],
                         "tracked_columns": [
-                            "updated_by_user_id",
+                            value_column_1,
                             "state",
                             "is_next",
                             "version",
@@ -414,36 +503,60 @@ def __scd2_template_execute(
                         "end_time_column": "end_time",
                         "end_time_default_value": "9999-12-31",
                         "test_restore_from_backup": f"{restore_from_backup}",
+                        "value_column_1": f"{value_column_1}",
                     }
                 ),
             ]
         )
 
     def __scd2_template_check_expected_res(
-        self, test_schema, target_table, expect_table
+        self, test_schema, target_table, expect_table, reserved=False
     ) -> None:
         # don't check first (surrogate key) column from the two results,
         # as those are uniquely generated and might differ
 
-        actual_rs: Result = self.__runner.invoke(
-            [
-                "trino-query",
-                "--query",
-                f"""SELECT active_from, active_to, sddc_id, updated_by_user_id, state, is_next, cloud_vendor, version
-                FROM "{test_schema}"."{target_table}"
-                ORDER BY sddc_id, active_to""",
-            ]
-        )
-
-        expected_rs: Result = self.__runner.invoke(
-            [
-                "trino-query",
-                "--query",
-                f"""SELECT active_from, active_to, sddc_id, updated_by_user_id, state, is_next, cloud_vendor, version
-                FROM "{test_schema}"."{expect_table}"
-                ORDER BY sddc_id, active_to""",
-            ]
-        )
+        if reserved:
+            actual_rs: Result = self.__runner.invoke(
+                [
+                    "trino-query",
+                    "--query",
+                    f"""
+                    SELECT active_from, active_to, "when", "with", state, is_next, cloud_vendor, version
+                    FROM "{test_schema}"."{target_table}"
+                    ORDER BY "when", active_to
+                    """,
+                ]
+            )
+
+            expected_rs: Result = self.__runner.invoke(
+                [
+                    "trino-query",
+                    "--query",
+                    f"""SELECT active_from, active_to, "when", "with", state, is_next, cloud_vendor, version
+                    FROM "{test_schema}"."{expect_table}"
+                    ORDER BY "when", active_to""",
+                ]
+            )
+        else:
+            actual_rs: Result = self.__runner.invoke(
+                [
+                    "trino-query",
+                    "--query",
+                    f"""SELECT active_from, active_to, sddc_id, updated_by_user_id, state, is_next, cloud_vendor, version
+                                FROM "{test_schema}"."{target_table}"
+                                ORDER BY sddc_id, active_to""",
+                ]
+            )
+
+            expected_rs: Result = self.__runner.invoke(
+                [
+                    "trino-query",
+                    "--query",
+                    f"""SELECT active_from, active_to, sddc_id, updated_by_user_id, state, is_next, cloud_vendor, version
+                                FROM "{test_schema}"."{expect_table}"
+                                ORDER BY sddc_id, active_to""",
+                ]
+            )
 
         cli_assert_equal(0, actual_rs)
         cli_assert_equal(0, expected_rs)