From dfa0ed23664fd79a7fda13d64067474b0585d1a9 Mon Sep 17 00:00:00 2001
From: Dilyan Marinov <mdilyan@vmware.com>
Date: Thu, 7 Dec 2023 14:55:28 +0200
Subject: [PATCH 1/2] vdk-structlog: filter vdk_step_name and vdk_step_type
 correctly

Why?

vdk_step_name and vdk_step_type were not filtered correctly when
using json format

What?

Make sure the metadata filter alwats executes last,
even when vdk_step_name and vdk_step_type are involved

Add separate functional test for vdk_step_name and vdk_step_type
to avoid conflict with file_name in other tests

How was this tested?

Functional tests

What kind of change is this?

Feature/non-breaking

Signed-off-by: Dilyan Marinov <mdilyan@vmware.com>
---
 .../src/vdk/plugin/structlog/formatters.py    |  2 +-
 .../vdk/plugin/structlog/structlog_plugin.py  | 21 ++++--
 .../vdk-structlog/tests/test_structlog.py     | 71 +++++++++++++++++--
 3 files changed, 81 insertions(+), 13 deletions(-)

diff --git a/projects/vdk-plugins/vdk-structlog/src/vdk/plugin/structlog/formatters.py b/projects/vdk-plugins/vdk-structlog/src/vdk/plugin/structlog/formatters.py
index 7c17b835dd..58f63e6457 100644
--- a/projects/vdk-plugins/vdk-structlog/src/vdk/plugin/structlog/formatters.py
+++ b/projects/vdk-plugins/vdk-structlog/src/vdk/plugin/structlog/formatters.py
@@ -162,5 +162,5 @@ def create_formatter(
         )
         formatter = ConsoleFormatter(fmt=format_string)
         custom_key_filter = ConsoleMetadataFilter(key_set)
-
+    custom_key_filter.name = "metadata_filter"
     return formatter, custom_key_filter
diff --git a/projects/vdk-plugins/vdk-structlog/src/vdk/plugin/structlog/structlog_plugin.py b/projects/vdk-plugins/vdk-structlog/src/vdk/plugin/structlog/structlog_plugin.py
index 8047aa2c79..ff524ce9ba 100644
--- a/projects/vdk-plugins/vdk-structlog/src/vdk/plugin/structlog/structlog_plugin.py
+++ b/projects/vdk-plugins/vdk-structlog/src/vdk/plugin/structlog/structlog_plugin.py
@@ -184,16 +184,25 @@ def run_job(self, context: JobContext) -> Optional[ExecutionResult]:
     @hookimpl(hookwrapper=True)
     def run_step(self, context: JobContext, step: Step) -> Optional[StepResult]:
         root_logger = logging.getLogger()
+        handler = root_logger.handlers[0]
+
+        # make sure the metadata filter executes last
+        # so that step_name and step_type are filtered if necessary
+        metadata_filter = [f for f in handler.filters if f.name == "metadata_filter"][0]
+        handler.removeFilter(metadata_filter)
+
         step_name_adder = AttributeAdder("vdk_step_name", step.name)
         step_type_adder = AttributeAdder("vdk_step_type", step.type)
-        for handler in root_logger.handlers:
-            handler.addFilter(step_name_adder)
-            handler.addFilter(step_type_adder)
+        handler.addFilter(step_name_adder)
+        handler.addFilter(step_type_adder)
+
+        # make sure the metadata filter executes last
+        # so that step_name and step_type are filtered if necessary
+        handler.addFilter(metadata_filter)
         out: HookCallResult
         out = yield
-        for handler in root_logger.handlers:
-            handler.removeFilter(step_name_adder)
-            handler.removeFilter(step_type_adder)
+        handler.removeFilter(step_name_adder)
+        handler.removeFilter(step_type_adder)
 
 
 @hookimpl
diff --git a/projects/vdk-plugins/vdk-structlog/tests/test_structlog.py b/projects/vdk-plugins/vdk-structlog/tests/test_structlog.py
index 3c7f3d40e5..12620360ec 100644
--- a/projects/vdk-plugins/vdk-structlog/tests/test_structlog.py
+++ b/projects/vdk-plugins/vdk-structlog/tests/test_structlog.py
@@ -36,6 +36,8 @@
         "line_number": r"\s:[0-9]+",
         "function_name": "run",
         "vdk_job_name": JOB_NAME,
+        "vdk_step_name": r"10_dummy\.py",
+        "vdk_step_type": r"python",
     },
     "ltsv": {
         "timestamp": r"timestamp:\d+\.\d+",
@@ -44,6 +46,8 @@
         "line_number": "line_number:[0-9]+",
         "function_name": "function_name:run",
         "vdk_job_name": f"vdk_job_name:{JOB_NAME}",
+        "vdk_step_name": r"vdk_step_name:10_dummy\.py",
+        "vdk_step_type": "vdk_step_type:python",
     },
     "json": {
         "timestamp": r'"timestamp": \d+\.\d+',
@@ -52,6 +56,8 @@
         "line_number": '"lineno": [0-9]+',
         "function_name": '"funcName": "run"',
         "vdk_job_name": f'"vdk_job_name": "{JOB_NAME}"',
+        "vdk_step_name": '"vdk_step_name": "10_dummy.py"',
+        "vdk_step_type": '"vdk_step_type": "python"',
     },
 }
 
@@ -81,7 +87,8 @@ def test_structlog(log_format):
         # due to the log_level_module config specified in the config.ini of the test job
         # the 'This log statement should not appear' log should not appear in the output logs
         assert (
-            _get_log_containing_s(logs, "This log statement should not appear") is None
+            _get_log_containing_s(
+                logs, "This log statement should not appear") is None
         )
 
         _assert_cases(
@@ -96,8 +103,10 @@ def test_stock_fields_removal(log_format):
     stock_field_reps = STOCK_FIELD_REPRESENTATIONS[log_format]
 
     for removed_field in STOCK_FIELDS:
-        shown_fields = [field for field in STOCK_FIELDS if field != removed_field]
-        vdk_logging_metadata = ",".join(shown_fields) + ",bound_test_key,extra_test_key"
+        shown_fields = [
+            field for field in STOCK_FIELDS if field != removed_field]
+        vdk_logging_metadata = ",".join(
+            shown_fields) + ",bound_test_key,extra_test_key"
 
         with mock.patch.dict(
             os.environ,
@@ -117,7 +126,8 @@ def test_stock_fields_removal(log_format):
 
             # check the rest are shown
             for shown_field in shown_fields:
-                assert re.search(stock_field_reps[shown_field], test_log) is not None
+                assert re.search(
+                    stock_field_reps[shown_field], test_log) is not None
 
 
 @pytest.mark.parametrize("log_format", ["console"])
@@ -138,7 +148,8 @@ def test_custom_format_applied(log_format):
                 assert _matches_custom_format(log)
                 break
         else:
-            pytest.fail("Log statement with no bound context not found in logs")
+            pytest.fail(
+                "Log statement with no bound context not found in logs")
 
 
 @pytest.mark.parametrize("log_format", ["json", "ltsv"])
@@ -162,7 +173,55 @@ def test_custom_format_not_applied_for_non_console_formats(log_format):
                 ), f"Custom format was incorrectly applied for {log_format} format. Log: {log}"
                 break
         else:
-            pytest.fail("Log statement with no bound context not found in logs")
+            pytest.fail(
+                "Log statement with no bound context not found in logs")
+
+
+@pytest.mark.parametrize("log_format", ["console", "json", "ltsv"])
+def test_step_name_step_type(log_format):
+    stock_field_reps = STOCK_FIELD_REPRESENTATIONS[log_format]
+    with mock.patch.dict(
+        os.environ,
+        {
+            "VDK_LOGGING_METADATA": "vdk_step_type,vdk_step_name",
+            "VDK_LOGGING_FORMAT": log_format,
+        },
+    ):
+        logs = _run_job_and_get_logs()
+        test_log = _get_log_containing_s(
+            logs, "Log statement with no bound context")
+        assert re.search(
+            stock_field_reps["vdk_step_name"], test_log) is not None
+        assert re.search(
+            stock_field_reps["vdk_step_type"], test_log) is not None
+
+    with mock.patch.dict(
+        os.environ,
+        {
+            "VDK_LOGGING_METADATA": "vdk_step_name",
+            "VDK_LOGGING_FORMAT": log_format,
+        },
+    ):
+        logs = _run_job_and_get_logs()
+        test_log = _get_log_containing_s(
+            logs, "Log statement with no bound context")
+        assert re.search(
+            stock_field_reps["vdk_step_name"], test_log) is not None
+        assert re.search(stock_field_reps["vdk_step_type"], test_log) is None
+
+    with mock.patch.dict(
+        os.environ,
+        {
+            "VDK_LOGGING_METADATA": "vdk_step_type",
+            "VDK_LOGGING_FORMAT": log_format,
+        },
+    ):
+        logs = _run_job_and_get_logs()
+        test_log = _get_log_containing_s(
+            logs, "Log statement with no bound context")
+        assert re.search(stock_field_reps["vdk_step_name"], test_log) is None
+        assert re.search(
+            stock_field_reps["vdk_step_type"], test_log) is not None
 
 
 def _matches_custom_format(log):

From 0999e391dd0d16f3610558f90f03db936034835e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 12 Dec 2023 14:42:44 +0000
Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../vdk-structlog/tests/test_structlog.py     | 39 +++++++------------
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/projects/vdk-plugins/vdk-structlog/tests/test_structlog.py b/projects/vdk-plugins/vdk-structlog/tests/test_structlog.py
index 12620360ec..1ce9585142 100644
--- a/projects/vdk-plugins/vdk-structlog/tests/test_structlog.py
+++ b/projects/vdk-plugins/vdk-structlog/tests/test_structlog.py
@@ -87,8 +87,7 @@ def test_structlog(log_format):
         # due to the log_level_module config specified in the config.ini of the test job
         # the 'This log statement should not appear' log should not appear in the output logs
         assert (
-            _get_log_containing_s(
-                logs, "This log statement should not appear") is None
+            _get_log_containing_s(logs, "This log statement should not appear") is None
         )
 
         _assert_cases(
@@ -103,10 +102,8 @@ def test_stock_fields_removal(log_format):
     stock_field_reps = STOCK_FIELD_REPRESENTATIONS[log_format]
 
     for removed_field in STOCK_FIELDS:
-        shown_fields = [
-            field for field in STOCK_FIELDS if field != removed_field]
-        vdk_logging_metadata = ",".join(
-            shown_fields) + ",bound_test_key,extra_test_key"
+        shown_fields = [field for field in STOCK_FIELDS if field != removed_field]
+        vdk_logging_metadata = ",".join(shown_fields) + ",bound_test_key,extra_test_key"
 
         with mock.patch.dict(
             os.environ,
@@ -126,8 +123,7 @@ def test_stock_fields_removal(log_format):
 
             # check the rest are shown
             for shown_field in shown_fields:
-                assert re.search(
-                    stock_field_reps[shown_field], test_log) is not None
+                assert re.search(stock_field_reps[shown_field], test_log) is not None
 
 
 @pytest.mark.parametrize("log_format", ["console"])
@@ -148,8 +144,7 @@ def test_custom_format_applied(log_format):
                 assert _matches_custom_format(log)
                 break
         else:
-            pytest.fail(
-                "Log statement with no bound context not found in logs")
+            pytest.fail("Log statement with no bound context not found in logs")
 
 
 @pytest.mark.parametrize("log_format", ["json", "ltsv"])
@@ -173,8 +168,7 @@ def test_custom_format_not_applied_for_non_console_formats(log_format):
                 ), f"Custom format was incorrectly applied for {log_format} format. Log: {log}"
                 break
         else:
-            pytest.fail(
-                "Log statement with no bound context not found in logs")
+            pytest.fail("Log statement with no bound context not found in logs")
 
 
 @pytest.mark.parametrize("log_format", ["console", "json", "ltsv"])
@@ -188,12 +182,9 @@ def test_step_name_step_type(log_format):
         },
     ):
         logs = _run_job_and_get_logs()
-        test_log = _get_log_containing_s(
-            logs, "Log statement with no bound context")
-        assert re.search(
-            stock_field_reps["vdk_step_name"], test_log) is not None
-        assert re.search(
-            stock_field_reps["vdk_step_type"], test_log) is not None
+        test_log = _get_log_containing_s(logs, "Log statement with no bound context")
+        assert re.search(stock_field_reps["vdk_step_name"], test_log) is not None
+        assert re.search(stock_field_reps["vdk_step_type"], test_log) is not None
 
     with mock.patch.dict(
         os.environ,
@@ -203,10 +194,8 @@ def test_step_name_step_type(log_format):
         },
     ):
         logs = _run_job_and_get_logs()
-        test_log = _get_log_containing_s(
-            logs, "Log statement with no bound context")
-        assert re.search(
-            stock_field_reps["vdk_step_name"], test_log) is not None
+        test_log = _get_log_containing_s(logs, "Log statement with no bound context")
+        assert re.search(stock_field_reps["vdk_step_name"], test_log) is not None
         assert re.search(stock_field_reps["vdk_step_type"], test_log) is None
 
     with mock.patch.dict(
@@ -217,11 +206,9 @@ def test_step_name_step_type(log_format):
         },
     ):
         logs = _run_job_and_get_logs()
-        test_log = _get_log_containing_s(
-            logs, "Log statement with no bound context")
+        test_log = _get_log_containing_s(logs, "Log statement with no bound context")
         assert re.search(stock_field_reps["vdk_step_name"], test_log) is None
-        assert re.search(
-            stock_field_reps["vdk_step_type"], test_log) is not None
+        assert re.search(stock_field_reps["vdk_step_type"], test_log) is not None
 
 
 def _matches_custom_format(log):