Skip to content

Commit

Permalink
vdk-core: add datetime to decimal json encoder
Browse files Browse the repository at this point in the history
Why?

As part of the work for vdk-oracle, we need to be able to pass datetime objects
for ingestion. This is not possible with the current verification.

What?

Add datetime to the decimal json encoder
Add bytes to the decimal json encoder
Make the encoder return timestamp for the datetime object
Make the encoder serialize bytes as a list of ints
Rename DecimalJsonEncoder to IngesterJsonEncoder

How was this tested

Ran locally with vdk-oracle
Unit and functional tests
CI tests

What kind of change is this?

Feature/non-breaking

Signed-off-by: Dilyan Marinov <[email protected]>
  • Loading branch information
Dilyan Marinov committed Nov 22, 2023
1 parent 74d567c commit 095a5ef
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
IngesterConfiguration,
)
from vdk.internal.builtin_plugins.ingestion.ingester_utils import AtomicCounter
from vdk.internal.builtin_plugins.ingestion.ingester_utils import DecimalJsonEncoder
from vdk.internal.builtin_plugins.ingestion.ingester_utils import IngesterJsonEncoder
from vdk.internal.core import errors
from vdk.internal.core.errors import ResolvableBy

Expand Down Expand Up @@ -679,7 +679,7 @@ def __verify_payload_format(self, payload_dict: dict):
# Check if payload dict is valid json
# TODO: optimize the check - we should not need to serialize the payload every time
try:
json.dumps(payload_dict, cls=DecimalJsonEncoder)
json.dumps(payload_dict, cls=IngesterJsonEncoder)
except (TypeError, OverflowError, Exception) as e:
errors.report_and_throw(
JsonSerializationIngestionException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,20 @@ def __repr__(self) -> str:
return str(self)


class DecimalJsonEncoder(JSONEncoder):
class IngesterJsonEncoder(JSONEncoder):
"""
This class is used to avoid an issue with the __verify_payload_format serialization check.
Normally, including data of type Decimal would cause that check to fail so we've amended
the default JsonEncoder object used to convert Decimal values to floats to avoid this issue.
Normally, including data of type Decimal and datetime would cause that check to fail so we've amended
the default JsonEncoder object used to convert Decimal and datetime values to floats to avoid this issue.
"""

def default(self, obj):
if isinstance(obj, datetime.datetime):
return obj.timestamp()
if isinstance(obj, Decimal):
return float(obj)
if isinstance(obj, bytes):
return list(obj)
return super().default(obj)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ def run(job_input: IJobInput):
payload = payload_str
if payload_str == "None":
payload = None
elif payload_str == "date":
payload = {"key1": datetime.utcnow()}
elif payload_str == "logger":
payload = {"key1": log}

job_input.send_object_for_ingestion(
payload=payload, destination_table="object_table", method="memory"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def test_payload_verification_unserializable():
"run",
jobs_path_from_caller_directory("test-ingest-bad-payload-job"),
"--arguments",
'{"payload": "date"}',
'{"payload": "logger"}',
]
)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,24 +1,54 @@
# Copyright 2021-2023 VMware, Inc.
# SPDX-License-Identifier: Apache-2.0
import datetime
import json
from decimal import Decimal

from pytest import raises
from vdk.internal.builtin_plugins.ingestion.ingester_utils import DecimalJsonEncoder
from vdk.internal.builtin_plugins.ingestion.ingester_utils import IngesterJsonEncoder


def test_decimal_json_encoder():
payload_no_decimal = {"a": 1, "b": 2}
def test_ingester_json_encoder():
payload_no_specials = {"a": 1, "b": 2}
payload_with_decimal = {"a": Decimal(1), "b": Decimal(2)}
payload_with_datetime = {
"a": datetime.datetime.fromtimestamp(1700641925),
"b": datetime.datetime.fromtimestamp(1700641925),
}
payload_with_bytes = {
"a": b"enoded string bla bla",
"b": b"another encoded string, look at me, I'm so special",
}

assert json.dumps(payload_no_decimal) == '{"a": 1, "b": 2}'
assert json.dumps(payload_no_specials) == '{"a": 1, "b": 2}'

with raises(TypeError):
json.dumps(payload_with_decimal)

assert json.dumps(payload_no_decimal, cls=DecimalJsonEncoder) == '{"a": 1, "b": 2}'
with raises(TypeError):
json.dumps(payload_with_datetime)

with raises(TypeError):
json.dumps(payload_with_bytes)

assert (
json.dumps(payload_no_specials, cls=IngesterJsonEncoder) == '{"a": 1, "b": 2}'
)

assert (
json.dumps(payload_with_decimal, cls=DecimalJsonEncoder)
json.dumps(payload_with_decimal, cls=IngesterJsonEncoder)
== '{"a": 1.0, "b": 2.0}'
)

assert (
json.dumps(payload_with_datetime, cls=IngesterJsonEncoder)
== '{"a": 1700641925.0, "b": 1700641925.0}'
)

assert json.dumps(payload_with_bytes, cls=IngesterJsonEncoder) == (
'{"a": [101, 110, 111, 100, 101, 100, 32, 115, 116, 114, 105, 110, 103, 32, '
'98, 108, 97, 32, 98, 108, 97], "b": [97, 110, 111, 116, 104, 101, 114, 32, '
"101, 110, 99, 111, 100, 101, 100, 32, 115, 116, 114, 105, 110, 103, 44, 32, "
"108, 111, 111, 107, 32, 97, 116, 32, 109, 101, 44, 32, 73, 39, 109, 32, 115, "
"111, 32, 115, 112, 101, 99, 105, 97, 108]}"
)

0 comments on commit 095a5ef

Please sign in to comment.