Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vdk-structlog: fix filtering of metadata fields for json #2874

Merged
merged 1 commit into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions projects/vdk-plugins/vdk-structlog/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ pip install vdk-structlog

(`vdk config-help` is useful command to browse all config options of your installation of vdk)

| Name | Description | (example) Value |
| ---------------- | --------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------ |
| logging_metadata | Configure the metadata that will be output along with the log message | "timestamp, level, logger_name, file_name, line_number, function_name, vdk_job_name, vdk_step_name, vdk_step_type" |
| logging_format | Configure the logging output format. Available formats: json, console | "console" |
| Name | Description | Example Value | Possible Values |
| ---------------- | --------------------------------------------------------------------- |---------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------|
| logging_metadata | Configure the metadata that will be output along with the log message | "timestamp, level, logger_name, file_name, vdk_job_name | "timestamp, level, logger_name, file_name, line_number, function_name, vdk_job_name, vdk_step_name, vdk_step_type" |
| logging_format | Configure the logging output format. Available formats: json, console | "console" | "console", "json" |

### Example: Configure metadata

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
}

JSON_STRUCTLOG_LOGGING_METADATA_DEFAULT = {
"timestamp": "%(timestamp)s",
"level": "%(level)s",
"timestamp": "%(created)s",
"level": "%(levelname)s",
"logger_name": "%(name)s",
"file_name": "%(filename)s",
"line_number": "%(lineno)s",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import Set

from vdk.plugin.structlog.constants import RECORD_DEFAULT_FIELDS
from vdk.plugin.structlog.constants import STRUCTLOG_METADATA_FIELDS


class JsonMetadataFilter(Filter):
Expand All @@ -18,7 +19,10 @@ class JsonMetadataFilter(Filter):

def __init__(self, key_set: Set):
super().__init__()
self._key_set = key_set
self._key_set = {
STRUCTLOG_METADATA_FIELDS[k] if k in STRUCTLOG_METADATA_FIELDS else k
for k in key_set
}

def filter(self, record: LogRecord) -> LogRecord:
fields = vars(record)
Expand All @@ -39,11 +43,7 @@ class ConsoleMetadataFilter(Filter):

def __init__(self, key_set: Set):
super().__init__()
temp = [
RECORD_DEFAULT_FIELDS[x] if x in RECORD_DEFAULT_FIELDS else x
for x in key_set
]
self._key_set = set(temp)
self._key_set = key_set

def filter(self, record: LogRecord) -> LogRecord:
fields = vars(record)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,17 @@ def add_fields(
record: LogRecord,
message_dict: Dict[str, Any],
) -> None:
super().add_fields(log_record, record, message_dict)

# remove log record fields that are set to empty string
keys = [x for x in log_record.keys() if not log_record[x]]
keys = [x for x in record.__dict__.keys() if record.__dict__[x] == ""]
for key in keys:
del log_record[key]

if not log_record.get("timestamp"):
now = record.created
log_record["timestamp"] = now
if log_record.get("level"):
log_record["level"] = log_record["level"].upper()
else:
log_record["level"] = record.levelname
del record.__dict__[key]
super().add_fields(log_record, record, message_dict)
if log_record.get("levelname"):
log_record["level"] = record.levelname.upper()
del log_record["levelname"]
if log_record.get("created"):
log_record["timestamp"] = record.created
del log_record["created"]


class ConsoleFormatter(Formatter):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def vdk_configure(self, config_builder: ConfigurationBuilder):
description=(
f"Possible values: {STRUCTLOG_LOGGING_METADATA_ALL_KEYS}"
"User-defined key-value pairs added to the logger's context will be displayed after the metadata, but before the message"
"Keys for user-defined key-value pairs have to be added in this config option for the values to be displayed in the metadata"
),
)

Expand Down Expand Up @@ -121,8 +122,8 @@ def initialize_job(self, context: JobContext) -> None:

handler = logging.StreamHandler(sys.stderr)
handler.setFormatter(formatter)
handler.addFilter(metadata_filter)
handler.addFilter(job_name_adder)
handler.addFilter(metadata_filter)

root_logger.addHandler(handler)

Expand All @@ -148,8 +149,8 @@ def run_job(self, context: JobContext) -> Optional[ExecutionResult]:

handler = logging.StreamHandler(sys.stderr)
handler.setFormatter(formatter)
handler.addFilter(metadata_filter)
handler.addFilter(job_name_adder)
handler.addFilter(metadata_filter)

root_logger.addHandler(handler)

Expand Down
24 changes: 14 additions & 10 deletions projects/vdk-plugins/vdk-structlog/tests/test_structlog.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,31 +18,37 @@
EXCLUDED_BOUND_TEST_KEY = "excluded_bound_test_key"
EXCLUDED_BOUND_TEST_VALUE = "excluded_bound_test_value"

# TODO: add vdk_step_name
# TODO: add vdk_step_type
STOCK_FIELDS = [
"timestamp",
"level",
"file_name",
"line_number",
"function_name",
"vdk_job_name",
] # TODO: add timestamp once bug is resolved
]
STOCK_FIELD_REPRESENTATIONS = {
"console": {
"timestamp": r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}",
"level": r"\[INFO ]",
"file_name": r"10_dummy\.py",
"line_number": ":[0-9]+",
"line_number": r"\s:[0-9]+",
"function_name": "run",
"vdk_job_name": JOB_NAME,
},
"json": {
"timestamp": r'"timestamp": \d+\.\d+',
"level": '"level": "INFO"',
"file_name": '"filename:":"10_dummy.py"',
"file_name": '"filename": "10_dummy.py"',
"line_number": '"lineno": [0-9]+',
"function_name": '"funcName": "run"',
"vdk_job_name": f'"vdk_job_name": "{JOB_NAME}"',
},
}


@pytest.mark.parametrize(
"log_format", ["console"]
) # TODO: replace with ["console","json"] once the issue where fields can't be excluded in JSON is fixed
@pytest.mark.parametrize("log_format", ["console", "json"])
def test_structlog(log_format):
with mock.patch.dict(
os.environ,
Expand Down Expand Up @@ -70,9 +76,7 @@ def test_structlog(log_format):
)


@pytest.mark.parametrize(
"log_format", ["console"]
) # TODO: replace with ["console", "json"] once the issue where fields can't be excluded in JSON is fixed
@pytest.mark.parametrize("log_format", ["console", "json"])
def test_stock_fields_removal(log_format):
stock_field_reps = STOCK_FIELD_REPRESENTATIONS[log_format]

Expand All @@ -93,7 +97,7 @@ def test_stock_fields_removal(log_format):
logs, "Log statement with bound context and extra context"
)

# check that the removed_field in not shown in the log
# check that the removed_field is not shown in the log
assert re.search(stock_field_reps[removed_field], test_log) is None

# check the rest are shown
Expand Down