From dffcccb5a9878f6337ab581297a936317c6a1ec5 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 9 May 2023 16:01:44 -0700 Subject: [PATCH 1/2] BUG: SparseDtype requires numpy dtype --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/arrays/sparse/dtype.py | 2 ++ pandas/tests/arrays/sparse/test_astype.py | 16 +--------------- pandas/tests/arrays/sparse/test_dtype.py | 6 ++++++ 4 files changed, 10 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 62d56f684a11d..b98d3f9414ccc 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -424,6 +424,7 @@ Reshaping Sparse ^^^^^^ - Bug in :meth:`arrays.SparseArray.map` allowed the fill value to be included in the sparse values (:issue:`52095`) +- Bug in :class:`SparseDtype` constructor failing to raise ``TypeError`` when given an incompatible ``dtype`` for its subtype, which must be a ``numpy`` dtype (:issue:`??`) - ExtensionArray diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py index dadd161ceeb38..e29e21bc38108 100644 --- a/pandas/core/arrays/sparse/dtype.py +++ b/pandas/core/arrays/sparse/dtype.py @@ -91,6 +91,8 @@ def __init__(self, dtype: Dtype = np.float64, fill_value: Any = None) -> None: dtype = pandas_dtype(dtype) if is_string_dtype(dtype): dtype = np.dtype("object") + if not isinstance(dtype, np.dtype): + raise TypeError("SparseDtype subtype must be a numpy dtype") if fill_value is None: fill_value = na_value_for_dtype(dtype) diff --git a/pandas/tests/arrays/sparse/test_astype.py b/pandas/tests/arrays/sparse/test_astype.py index 86d69610059b3..d729a31668ade 100644 --- a/pandas/tests/arrays/sparse/test_astype.py +++ b/pandas/tests/arrays/sparse/test_astype.py @@ -3,11 +3,7 @@ from pandas._libs.sparse import IntIndex -from pandas import ( - DataFrame, - Series, - Timestamp, -) +from pandas import Timestamp import pandas._testing as tm from pandas.core.arrays.sparse import ( SparseArray, @@ -135,13 +131,3 @@ def test_astype_dt64_to_int64(self): arr3 = SparseArray(values, dtype=dtype) result3 = arr3.astype("int64") tm.assert_numpy_array_equal(result3, expected) - - -def test_dtype_sparse_with_fill_value_not_present_in_data(): - # GH 49987 - df = DataFrame([["a", 0], ["b", 1], ["b", 2]], columns=["A", "B"]) - result = df["A"].astype(SparseDtype("category", fill_value="c")) - expected = Series( - ["a", "b", "b"], name="A", dtype=SparseDtype("object", fill_value="c") - ) - tm.assert_series_equal(result, expected) diff --git a/pandas/tests/arrays/sparse/test_dtype.py b/pandas/tests/arrays/sparse/test_dtype.py index 58fedbd3e4231..f001372eb6a5f 100644 --- a/pandas/tests/arrays/sparse/test_dtype.py +++ b/pandas/tests/arrays/sparse/test_dtype.py @@ -207,3 +207,9 @@ def test_repr(): result = str(SparseDtype(object, fill_value="0")) expected = "Sparse[object, '0']" assert result == expected + + +def test_sparse_dtype_subtype_must_be_numpy_dtype(): + msg = "SparseDtype subtype must be a numpy dtype" + with pytest.raises(TypeError, match=msg): + SparseDtype("category", fill_value="c") From 893e9d44f2e2b206d9e8325b33f66ad86eebaf17 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 9 May 2023 16:02:47 -0700 Subject: [PATCH 2/2] GH ref --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/core/arrays/sparse/dtype.py | 1 + pandas/tests/arrays/sparse/test_dtype.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index b98d3f9414ccc..64c8f06349449 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -423,8 +423,8 @@ Reshaping Sparse ^^^^^^ +- Bug in :class:`SparseDtype` constructor failing to raise ``TypeError`` when given an incompatible ``dtype`` for its subtype, which must be a ``numpy`` dtype (:issue:`53160`) - Bug in :meth:`arrays.SparseArray.map` allowed the fill value to be included in the sparse values (:issue:`52095`) -- Bug in :class:`SparseDtype` constructor failing to raise ``TypeError`` when given an incompatible ``dtype`` for its subtype, which must be a ``numpy`` dtype (:issue:`??`) - ExtensionArray diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py index e29e21bc38108..5747ff807600d 100644 --- a/pandas/core/arrays/sparse/dtype.py +++ b/pandas/core/arrays/sparse/dtype.py @@ -92,6 +92,7 @@ def __init__(self, dtype: Dtype = np.float64, fill_value: Any = None) -> None: if is_string_dtype(dtype): dtype = np.dtype("object") if not isinstance(dtype, np.dtype): + # GH#53160 raise TypeError("SparseDtype subtype must be a numpy dtype") if fill_value is None: diff --git a/pandas/tests/arrays/sparse/test_dtype.py b/pandas/tests/arrays/sparse/test_dtype.py index f001372eb6a5f..88f8577ded5b0 100644 --- a/pandas/tests/arrays/sparse/test_dtype.py +++ b/pandas/tests/arrays/sparse/test_dtype.py @@ -210,6 +210,7 @@ def test_repr(): def test_sparse_dtype_subtype_must_be_numpy_dtype(): + # GH#53160 msg = "SparseDtype subtype must be a numpy dtype" with pytest.raises(TypeError, match=msg): SparseDtype("category", fill_value="c")