From a43c70cbe8a61de5c882fb4e02dd1a802f1222e5 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 23 Jan 2020 16:50:44 +0000 Subject: [PATCH 1/4] fix(bigquery): do not insert missing fields as explicit None --- bigquery/google/cloud/bigquery/_helpers.py | 13 ++++++++----- bigquery/tests/unit/test__helpers.py | 4 +++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/bigquery/google/cloud/bigquery/_helpers.py b/bigquery/google/cloud/bigquery/_helpers.py index 98eadb0a2f8e..fe025973f817 100644 --- a/bigquery/google/cloud/bigquery/_helpers.py +++ b/bigquery/google/cloud/bigquery/_helpers.py @@ -422,13 +422,16 @@ def _record_field_to_json(fields, row_value): record = {} isdict = isinstance(row_value, dict) + MISSING = object() # a sentinel to distinguish from None values + for subindex, subfield in enumerate(fields): subname = subfield.name - if isdict: - subvalue = row_value.get(subname) - else: - subvalue = row_value[subindex] - record[subname] = _field_to_json(subfield, subvalue) + subvalue = row_value.get(subname, MISSING) if isdict else row_value[subindex] + + # we should not convert missing values to an explicit None + if subvalue is not MISSING: + record[subname] = _field_to_json(subfield, subvalue) + return record diff --git a/bigquery/tests/unit/test__helpers.py b/bigquery/tests/unit/test__helpers.py index 6d92b4de73ba..ecba56209595 100644 --- a/bigquery/tests/unit/test__helpers.py +++ b/bigquery/tests/unit/test__helpers.py @@ -863,7 +863,9 @@ def test_w_missing_nullable(self): ] original = {"one": 42} converted = self._call_fut(fields, original) - self.assertEqual(converted, {"one": "42", "two": None}) + + # missing fields should not be converted to an explicit None + self.assertEqual(converted, {"one": "42"}) class Test_field_to_json(unittest.TestCase): From 2bd26737ebeb9c1351b71fc9d80080a634fa9736 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 24 Jan 2020 11:22:07 +0000 Subject: [PATCH 2/4] Omit all None values from JSON request body --- bigquery/google/cloud/bigquery/_helpers.py | 8 +++----- bigquery/tests/unit/test__helpers.py | 12 ++++++++++++ bigquery/tests/unit/test_client.py | 22 ++++++++++++---------- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/bigquery/google/cloud/bigquery/_helpers.py b/bigquery/google/cloud/bigquery/_helpers.py index fe025973f817..21a8e3636d24 100644 --- a/bigquery/google/cloud/bigquery/_helpers.py +++ b/bigquery/google/cloud/bigquery/_helpers.py @@ -422,14 +422,12 @@ def _record_field_to_json(fields, row_value): record = {} isdict = isinstance(row_value, dict) - MISSING = object() # a sentinel to distinguish from None values - for subindex, subfield in enumerate(fields): subname = subfield.name - subvalue = row_value.get(subname, MISSING) if isdict else row_value[subindex] + subvalue = row_value.get(subname) if isdict else row_value[subindex] - # we should not convert missing values to an explicit None - if subvalue is not MISSING: + # None values are unconditionally omitted + if subvalue is not None: record[subname] = _field_to_json(subfield, subvalue) return record diff --git a/bigquery/tests/unit/test__helpers.py b/bigquery/tests/unit/test__helpers.py index ecba56209595..46fdad6e31ba 100644 --- a/bigquery/tests/unit/test__helpers.py +++ b/bigquery/tests/unit/test__helpers.py @@ -867,6 +867,18 @@ def test_w_missing_nullable(self): # missing fields should not be converted to an explicit None self.assertEqual(converted, {"one": "42"}) + def test_w_explicit_none_value(self): + fields = [ + _make_field("INT64", name="one", mode="NULLABLE"), + _make_field("STRING", name="two", mode="NULLABLE"), + _make_field("BOOL", name="three", mode="REPEATED"), + ] + original = {"three": None, "one": 42, "two": None} + converted = self._call_fut(fields, original) + + # None values should be dropped regardless of the field type + self.assertEqual(converted, {"one": "42"}) + class Test_field_to_json(unittest.TestCase): def _call_fut(self, field, value): diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py index cce4bc532074..86c6f9883412 100644 --- a/bigquery/tests/unit/test_client.py +++ b/bigquery/tests/unit/test_client.py @@ -4636,10 +4636,13 @@ def test_insert_rows_w_schema(self): ] def _row_data(row): + result = {"full_name": row[0], "age": str(row[1])} joined = row[2] - if isinstance(row[2], datetime.datetime): - joined = _microseconds_from_datetime(joined) * 1e-6 - return {"full_name": row[0], "age": str(row[1]), "joined": joined} + if joined is not None: + if isinstance(joined, datetime.datetime): + joined = _microseconds_from_datetime(joined) * 1e-6 + result["joined"] = joined + return result SENT = { "rows": [ @@ -4708,7 +4711,10 @@ def test_insert_rows_w_list_of_dictionaries(self): def _row_data(row): joined = row["joined"] - if isinstance(joined, datetime.datetime): + if joined is None: + row = copy.deepcopy(row) + del row["joined"] + elif isinstance(joined, datetime.datetime): row["joined"] = _microseconds_from_datetime(joined) * 1e-6 row["age"] = str(row["age"]) return row @@ -4927,9 +4933,8 @@ def test_insert_rows_w_repeated_fields(self): }, { "json": { - "color": None, "items": [], - "structs": [{"score": None, "times": [], "distances": [3.5]}], + "structs": [{"times": [], "distances": [3.5]}], }, "insertId": "1", }, @@ -4996,10 +5001,7 @@ def test_insert_rows_w_record_schema(self): }, "insertId": "1", }, - { - "json": {"full_name": "Wylma Phlyntstone", "phone": None}, - "insertId": "2", - }, + {"json": {"full_name": "Wylma Phlyntstone"}, "insertId": "2"}, ] } From de1ca4c0db56c345a46244271b15dd2539a1daa5 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 24 Jan 2020 11:35:12 +0000 Subject: [PATCH 3/4] Add an extra test for all missing row values --- bigquery/tests/unit/test__helpers.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/bigquery/tests/unit/test__helpers.py b/bigquery/tests/unit/test__helpers.py index 46fdad6e31ba..fa6d27c981d8 100644 --- a/bigquery/tests/unit/test__helpers.py +++ b/bigquery/tests/unit/test__helpers.py @@ -856,7 +856,7 @@ def test_w_non_empty_dict(self): converted = self._call_fut(fields, original) self.assertEqual(converted, {"one": "42", "two": "two"}) - def test_w_missing_nullable(self): + def test_w_some_missing_nullables(self): fields = [ _make_field("INT64", name="one", mode="NULLABLE"), _make_field("STRING", name="two", mode="NULLABLE"), @@ -867,6 +867,17 @@ def test_w_missing_nullable(self): # missing fields should not be converted to an explicit None self.assertEqual(converted, {"one": "42"}) + def test_w_all_missing_nullables(self): + fields = [ + _make_field("INT64", name="one", mode="NULLABLE"), + _make_field("STRING", name="two", mode="NULLABLE"), + ] + original = {} + converted = self._call_fut(fields, original) + + # we should get an empty dict, not None + self.assertEqual(converted, {}) + def test_w_explicit_none_value(self): fields = [ _make_field("INT64", name="one", mode="NULLABLE"), From d15077d4eb5bbbc798e52c27e38c00c70c079a34 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 31 Jan 2020 07:32:18 +0000 Subject: [PATCH 4/4] Flatten a block of code a bit --- bigquery/tests/unit/test_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py index 86c6f9883412..f20e774d0867 100644 --- a/bigquery/tests/unit/test_client.py +++ b/bigquery/tests/unit/test_client.py @@ -4638,9 +4638,9 @@ def test_insert_rows_w_schema(self): def _row_data(row): result = {"full_name": row[0], "age": str(row[1])} joined = row[2] + if isinstance(joined, datetime.datetime): + joined = _microseconds_from_datetime(joined) * 1e-6 if joined is not None: - if isinstance(joined, datetime.datetime): - joined = _microseconds_from_datetime(joined) * 1e-6 result["joined"] = joined return result