Skip to content

Commit

Permalink
vdk-structlog: fix filtering of metadata fields for json
Browse files Browse the repository at this point in the history
Why?

Filtering by timestamp, level and vdk_job_name does not work for json formatting

What?

- Introduce logic that inserts the correct fields in the json formatter log record
- Move the metadata filters after the filters that add the job name
- Fix bug in ConsoleMetadataFilters
- Add timestamp and function_name to tests
- Enable json tests
- Add possible values to README.md

How was this tested?

Ran functional tests locally

What kind of change is this?

Bugfix

Signed-off-by: Dilyan Marinov <[email protected]>
  • Loading branch information
Dilyan Marinov committed Nov 3, 2023
1 parent cabf1ba commit 274f352
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 36 deletions.
8 changes: 4 additions & 4 deletions projects/vdk-plugins/vdk-structlog/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ pip install vdk-structlog

(`vdk config-help` is useful command to browse all config options of your installation of vdk)

| Name | Description | (example) Value |
| ---------------- | --------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------ |
| logging_metadata | Configure the metadata that will be output along with the log message | "timestamp, level, logger_name, file_name, line_number, function_name, vdk_job_name, vdk_step_name, vdk_step_type" |
| logging_format | Configure the logging output format. Available formats: json, console | "console" |
| Name | Description | Example Value | Possible Values |
| ---------------- | --------------------------------------------------------------------- |---------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------|
| logging_metadata | Configure the metadata that will be output along with the log message | "timestamp, level, logger_name, file_name, vdk_job_name | "timestamp, level, logger_name, file_name, line_number, function_name, vdk_job_name, vdk_step_name, vdk_step_type" |
| logging_format | Configure the logging output format. Available formats: json, console | "console" | "console", "json" |

### Example: Configure metadata

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
}

JSON_STRUCTLOG_LOGGING_METADATA_DEFAULT = {
"timestamp": "%(timestamp)s",
"level": "%(level)s",
"timestamp": "%(created)s",
"level": "%(levelname)s",
"logger_name": "%(name)s",
"file_name": "%(filename)s",
"line_number": "%(lineno)s",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import Set

from vdk.plugin.structlog.constants import RECORD_DEFAULT_FIELDS
from vdk.plugin.structlog.constants import STRUCTLOG_METADATA_FIELDS


class JsonMetadataFilter(Filter):
Expand All @@ -18,7 +19,10 @@ class JsonMetadataFilter(Filter):

def __init__(self, key_set: Set):
super().__init__()
self._key_set = key_set
self._key_set = {
STRUCTLOG_METADATA_FIELDS[k] if k in STRUCTLOG_METADATA_FIELDS else k
for k in key_set
}

def filter(self, record: LogRecord) -> LogRecord:
fields = vars(record)
Expand All @@ -39,11 +43,7 @@ class ConsoleMetadataFilter(Filter):

def __init__(self, key_set: Set):
super().__init__()
temp = [
RECORD_DEFAULT_FIELDS[x] if x in RECORD_DEFAULT_FIELDS else x
for x in key_set
]
self._key_set = set(temp)
self._key_set = key_set

def filter(self, record: LogRecord) -> LogRecord:
fields = vars(record)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,17 @@ def add_fields(
record: LogRecord,
message_dict: Dict[str, Any],
) -> None:
super().add_fields(log_record, record, message_dict)

# remove log record fields that are set to empty string
keys = [x for x in log_record.keys() if not log_record[x]]
keys = [x for x in record.__dict__.keys() if record.__dict__[x] == ""]
for key in keys:
del log_record[key]

if not log_record.get("timestamp"):
now = record.created
log_record["timestamp"] = now
if log_record.get("level"):
log_record["level"] = log_record["level"].upper()
else:
log_record["level"] = record.levelname
del record.__dict__[key]
super().add_fields(log_record, record, message_dict)
if log_record.get("levelname"):
log_record["level"] = record.levelname.upper()
del log_record["levelname"]
if log_record.get("created"):
log_record["timestamp"] = record.created
del log_record["created"]


class ConsoleFormatter(Formatter):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def vdk_configure(self, config_builder: ConfigurationBuilder):
description=(
f"Possible values: {STRUCTLOG_LOGGING_METADATA_ALL_KEYS}"
"User-defined key-value pairs added to the logger's context will be displayed after the metadata, but before the message"
"Keys for user-defined key-value pairs have to be added in this config option for the values to be displayed in the metadata"
),
)

Expand Down Expand Up @@ -121,8 +122,8 @@ def initialize_job(self, context: JobContext) -> None:

handler = logging.StreamHandler(sys.stderr)
handler.setFormatter(formatter)
handler.addFilter(metadata_filter)
handler.addFilter(job_name_adder)
handler.addFilter(metadata_filter)

root_logger.addHandler(handler)

Expand All @@ -148,8 +149,8 @@ def run_job(self, context: JobContext) -> Optional[ExecutionResult]:

handler = logging.StreamHandler(sys.stderr)
handler.setFormatter(formatter)
handler.addFilter(metadata_filter)
handler.addFilter(job_name_adder)
handler.addFilter(metadata_filter)

root_logger.addHandler(handler)

Expand Down
24 changes: 14 additions & 10 deletions projects/vdk-plugins/vdk-structlog/tests/test_structlog.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,31 +18,37 @@
EXCLUDED_BOUND_TEST_KEY = "excluded_bound_test_key"
EXCLUDED_BOUND_TEST_VALUE = "excluded_bound_test_value"

# TODO: add vdk_step_name
# TODO: add vdk_step_type
STOCK_FIELDS = [
"timestamp",
"level",
"file_name",
"line_number",
"function_name",
"vdk_job_name",
] # TODO: add timestamp once bug is resolved
]
STOCK_FIELD_REPRESENTATIONS = {
"console": {
"timestamp": r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}",
"level": r"\[INFO ]",
"file_name": r"10_dummy\.py",
"line_number": ":[0-9]+",
"line_number": r"\s:[0-9]+",
"function_name": "run",
"vdk_job_name": JOB_NAME,
},
"json": {
"timestamp": r'"timestamp": \d+\.\d+',
"level": '"level": "INFO"',
"file_name": '"filename:":"10_dummy.py"',
"file_name": '"filename": "10_dummy.py"',
"line_number": '"lineno": [0-9]+',
"function_name": '"funcName": "run"',
"vdk_job_name": f'"vdk_job_name": "{JOB_NAME}"',
},
}


@pytest.mark.parametrize(
"log_format", ["console"]
) # TODO: replace with ["console","json"] once the issue where fields can't be excluded in JSON is fixed
@pytest.mark.parametrize("log_format", ["console", "json"])
def test_structlog(log_format):
with mock.patch.dict(
os.environ,
Expand Down Expand Up @@ -70,9 +76,7 @@ def test_structlog(log_format):
)


@pytest.mark.parametrize(
"log_format", ["console"]
) # TODO: replace with ["console", "json"] once the issue where fields can't be excluded in JSON is fixed
@pytest.mark.parametrize("log_format", ["console", "json"])
def test_stock_fields_removal(log_format):
stock_field_reps = STOCK_FIELD_REPRESENTATIONS[log_format]

Expand All @@ -93,7 +97,7 @@ def test_stock_fields_removal(log_format):
logs, "Log statement with bound context and extra context"
)

# check that the removed_field in not shown in the log
# check that the removed_field is not shown in the log
assert re.search(stock_field_reps[removed_field], test_log) is None

# check the rest are shown
Expand Down

0 comments on commit 274f352

Please sign in to comment.