Skip to content

Commit

Permalink
vdk-core: add datetime and bytes to decimal json encoder (#2924)
Browse files Browse the repository at this point in the history
## Why?

As part of the work for vdk-oracle, we need to be able to pass datetime
objects for ingestion. This is not possible with the current
verification.

## What?

Add datetime to the decimal json encoder
Make the encoder return timestamp for the datetime object Rename
DecimalJsonEncoder to IngesterJsonEncoder

## How was this tested

Ran locally with vdk-oracle
CI tests

## What kind of change is this?

Feature/non-breaking

Signed-off-by: Dilyan Marinov <[email protected]>
Co-authored-by: Dilyan Marinov <[email protected]>
  • Loading branch information
DeltaMichael and Dilyan Marinov authored Nov 24, 2023
1 parent 8be388d commit 4ca6338
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
IngesterConfiguration,
)
from vdk.internal.builtin_plugins.ingestion.ingester_utils import AtomicCounter
from vdk.internal.builtin_plugins.ingestion.ingester_utils import DecimalJsonEncoder
from vdk.internal.builtin_plugins.ingestion.ingester_utils import IngesterJsonEncoder
from vdk.internal.core import errors
from vdk.internal.core.errors import ResolvableBy

Expand Down Expand Up @@ -679,7 +679,7 @@ def __verify_payload_format(self, payload_dict: dict):
# Check if payload dict is valid json
# TODO: optimize the check - we should not need to serialize the payload every time
try:
json.dumps(payload_dict, cls=DecimalJsonEncoder)
json.dumps(payload_dict, cls=IngesterJsonEncoder)
except (TypeError, OverflowError, Exception) as e:
errors.report_and_throw(
JsonSerializationIngestionException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,20 @@ def __repr__(self) -> str:
return str(self)


class DecimalJsonEncoder(JSONEncoder):
class IngesterJsonEncoder(JSONEncoder):
"""
This class is used to avoid an issue with the __verify_payload_format serialization check.
Normally, including data of type Decimal would cause that check to fail so we've amended
the default JsonEncoder object used to convert Decimal values to floats to avoid this issue.
Normally, including data of type Decimal and datetime would cause that check to fail so we've amended
the default JsonEncoder object used to convert Decimal and datetime values to floats to avoid this issue.
"""

def default(self, obj):
if isinstance(obj, datetime.datetime):
return obj.timestamp()
if isinstance(obj, Decimal):
return float(obj)
if isinstance(obj, bytes):
return list(obj)
return super().default(obj)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ def run(job_input: IJobInput):
payload = payload_str
if payload_str == "None":
payload = None
elif payload_str == "date":
payload = {"key1": datetime.utcnow()}
elif payload_str == "unserializable":
payload = {"key1": log}

job_input.send_object_for_ingestion(
payload=payload, destination_table="object_table", method="memory"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def test_payload_verification_unserializable():
"run",
jobs_path_from_caller_directory("test-ingest-bad-payload-job"),
"--arguments",
'{"payload": "date"}',
'{"payload": "unserializable"}',
]
)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,24 +1,54 @@
# Copyright 2021-2023 VMware, Inc.
# SPDX-License-Identifier: Apache-2.0
import datetime
import json
from decimal import Decimal

from pytest import raises
from vdk.internal.builtin_plugins.ingestion.ingester_utils import DecimalJsonEncoder
from vdk.internal.builtin_plugins.ingestion.ingester_utils import IngesterJsonEncoder


def test_decimal_json_encoder():
payload_no_decimal = {"a": 1, "b": 2}
def test_ingester_json_encoder():
payload_no_specials = {"a": 1, "b": 2}
payload_with_decimal = {"a": Decimal(1), "b": Decimal(2)}
payload_with_datetime = {
"a": datetime.datetime.fromtimestamp(1700641925),
"b": datetime.datetime.fromtimestamp(1700641925),
}
payload_with_bytes = {
"a": b"enoded string bla bla",
"b": b"another encoded string, look at me, I'm so special",
}

assert json.dumps(payload_no_decimal) == '{"a": 1, "b": 2}'
assert json.dumps(payload_no_specials) == '{"a": 1, "b": 2}'

with raises(TypeError):
json.dumps(payload_with_decimal)

assert json.dumps(payload_no_decimal, cls=DecimalJsonEncoder) == '{"a": 1, "b": 2}'
with raises(TypeError):
json.dumps(payload_with_datetime)

with raises(TypeError):
json.dumps(payload_with_bytes)

assert (
json.dumps(payload_no_specials, cls=IngesterJsonEncoder) == '{"a": 1, "b": 2}'
)

assert (
json.dumps(payload_with_decimal, cls=DecimalJsonEncoder)
json.dumps(payload_with_decimal, cls=IngesterJsonEncoder)
== '{"a": 1.0, "b": 2.0}'
)

assert (
json.dumps(payload_with_datetime, cls=IngesterJsonEncoder)
== '{"a": 1700641925.0, "b": 1700641925.0}'
)

assert json.dumps(payload_with_bytes, cls=IngesterJsonEncoder) == (
'{"a": [101, 110, 111, 100, 101, 100, 32, 115, 116, 114, 105, 110, 103, 32, '
'98, 108, 97, 32, 98, 108, 97], "b": [97, 110, 111, 116, 104, 101, 114, 32, '
"101, 110, 99, 111, 100, 101, 100, 32, 115, 116, 114, 105, 110, 103, 44, 32, "
"108, 111, 111, 107, 32, 97, 116, 32, 109, 101, 44, 32, 73, 39, 109, 32, 115, "
"111, 32, 115, 112, 101, 99, 105, 97, 108]}"
)

0 comments on commit 4ca6338

Please sign in to comment.