From 0eb478c0416235db81e2aa15d7238cf252b84ec6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= <16805946+edgarrmondragon@users.noreply.github.com> Date: Thu, 4 Apr 2024 14:41:22 -0600 Subject: [PATCH] fix(taps): Unmapped sub-fields in object-type fields are now no longer dropped when the field declares `additionalProperties` (#2301) * Add failing test * Implement fix * Some refactoring --- singer_sdk/helpers/_typing.py | 57 ++++++++++++++++++++++++----------- tests/core/test_typing.py | 21 +++++++++++++ 2 files changed, 60 insertions(+), 18 deletions(-) diff --git a/singer_sdk/helpers/_typing.py b/singer_sdk/helpers/_typing.py index fdaaecd6e..74ed59cf5 100644 --- a/singer_sdk/helpers/_typing.py +++ b/singer_sdk/helpers/_typing.py @@ -393,7 +393,7 @@ def conform_record_data_types( # TODO: This is in dire need of refactoring. It's a mess. -def _conform_record_data_types( # noqa: PLR0912 +def _conform_record_data_types( input_object: dict[str, t.Any], schema: dict, level: TypeConformanceLevel, @@ -421,30 +421,22 @@ def _conform_record_data_types( # noqa: PLR0912 for property_name, elem in input_object.items(): property_path = property_name if parent is None else f"{parent}.{property_name}" if property_name not in schema["properties"]: + if schema.get("additionalProperties"): + output_object[property_name] = elem unmapped_properties.append(property_path) continue property_schema = schema["properties"][property_name] if isinstance(elem, list) and is_uniform_list(property_schema): if level == TypeConformanceLevel.RECURSIVE: - item_schema = property_schema["items"] - output = [] - for item in elem: - if is_object_type(item_schema) and isinstance(item, dict): - ( - output_item, - sub_unmapped_properties, - ) = _conform_record_data_types( - item, - item_schema, - level, - property_path, - ) - unmapped_properties.extend(sub_unmapped_properties) - output.append(output_item) - else: - output.append(_conform_primitive_property(item, item_schema)) + output, sub_unmapped_properties = _conform_uniform_list( + elem, + path=property_path, + schema=property_schema, + level=level, + ) output_object[property_name] = output + unmapped_properties.extend(sub_unmapped_properties) else: output_object[property_name] = elem elif ( @@ -473,6 +465,35 @@ def _conform_record_data_types( # noqa: PLR0912 return output_object, unmapped_properties +def _conform_uniform_list( + element: list, + *, + path: str, + schema: dict, + level: TypeConformanceLevel, +) -> tuple[list, list[str]]: + item_schema = schema["items"] + unmapped_properties = [] + output = [] + for item in element: + if is_object_type(item_schema) and isinstance(item, dict): + ( + output_item, + sub_unmapped_properties, + ) = _conform_record_data_types( + item, + item_schema, + level, + path, + ) + unmapped_properties.extend(sub_unmapped_properties) + output.append(output_item) + else: + output.append(_conform_primitive_property(item, item_schema)) + + return output, unmapped_properties + + def _conform_primitive_property( # noqa: PLR0911 elem: t.Any, # noqa: ANN401 property_schema: dict, diff --git a/tests/core/test_typing.py b/tests/core/test_typing.py index 15d56ccce..66182c52d 100644 --- a/tests/core/test_typing.py +++ b/tests/core/test_typing.py @@ -263,6 +263,27 @@ def test_object_arrays_remove_types(caplog: pytest.LogCaptureFixture): ) +def test_conform_object_additional_properties(): + schema = PropertiesList( + Property( + "object", + PropertiesList(additional_properties=True), + ), + ).to_dict() + + record = {"object": {"extra": "value"}} + expected_output = {"object": {"extra": "value"}} + + actual_output = conform_record_data_types( + "test_stream", + record, + schema, + TypeConformanceLevel.RECURSIVE, + logger, + ) + assert actual_output == expected_output + + def test_conform_primitives(): assert ( _conform_primitive_property(