Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: convert <NA> values to None instead of stringifying #22321

Merged
merged 1 commit into from
Dec 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion superset/result_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,11 @@ def stringify_values(array: np.ndarray) -> np.ndarray:

with np.nditer(result, flags=["refs_ok"], op_flags=["readwrite"]) as it:
for obj in it:
obj[...] = stringify(obj)
if pd.isna(obj):
# pandas <NA> type cannot be converted to string
obj[pd.isna(obj)] = None
else:
obj[...] = stringify(obj)

return result

Expand Down
41 changes: 41 additions & 0 deletions tests/unit_tests/result_set_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,13 @@
# pylint: disable=import-outside-toplevel, unused-argument


import numpy as np
import pandas as pd
from numpy.core.multiarray import array

from superset.result_set import stringify_values


def test_column_names_as_bytes() -> None:
"""
Test that we can handle column names as bytes.
Expand Down Expand Up @@ -65,3 +72,37 @@ def test_column_names_as_bytes() -> None:
| 1 | 2016-01-27 | 392.444 | 396.843 | 391.782 | 394.972 | 394.972 | 47424400 |
""".strip()
)


def test_stringify_with_null_integers():
"""
Test that we can safely handle type errors when an integer column has a null value
"""

data = [
("foo", "bar", pd.NA, None),
("foo", "bar", pd.NA, True),
("foo", "bar", pd.NA, None),
]
numpy_dtype = [
("id", "object"),
("value", "object"),
("num", "object"),
("bool", "object"),
]

array2 = np.array(data, dtype=numpy_dtype)
column_names = ["id", "value", "num", "bool"]

result_set = np.array([stringify_values(array2[column]) for column in column_names])

expected = np.array(
[
array(['"foo"', '"foo"', '"foo"'], dtype=object),
array(['"bar"', '"bar"', '"bar"'], dtype=object),
array([None, None, None], dtype=object),
array([None, "true", None], dtype=object),
]
)

assert np.array_equal(result_set, expected)