Skip to content

Commit

Permalink
BUG: SparseDtype requires numpy dtype (pandas-dev#53160)
Browse files Browse the repository at this point in the history
* BUG: SparseDtype requires numpy dtype

* GH ref
  • Loading branch information
jbrockmendel authored and Yi Wei committed May 19, 2023
1 parent bbf5614 commit 49a2b67
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 15 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,7 @@ Reshaping

Sparse
^^^^^^
- Bug in :class:`SparseDtype` constructor failing to raise ``TypeError`` when given an incompatible ``dtype`` for its subtype, which must be a ``numpy`` dtype (:issue:`53160`)
- Bug in :meth:`arrays.SparseArray.map` allowed the fill value to be included in the sparse values (:issue:`52095`)
-

Expand Down
3 changes: 3 additions & 0 deletions pandas/core/arrays/sparse/dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ def __init__(self, dtype: Dtype = np.float64, fill_value: Any = None) -> None:
dtype = pandas_dtype(dtype)
if is_string_dtype(dtype):
dtype = np.dtype("object")
if not isinstance(dtype, np.dtype):
# GH#53160
raise TypeError("SparseDtype subtype must be a numpy dtype")

if fill_value is None:
fill_value = na_value_for_dtype(dtype)
Expand Down
16 changes: 1 addition & 15 deletions pandas/tests/arrays/sparse/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,7 @@

from pandas._libs.sparse import IntIndex

from pandas import (
DataFrame,
Series,
Timestamp,
)
from pandas import Timestamp
import pandas._testing as tm
from pandas.core.arrays.sparse import (
SparseArray,
Expand Down Expand Up @@ -135,13 +131,3 @@ def test_astype_dt64_to_int64(self):
arr3 = SparseArray(values, dtype=dtype)
result3 = arr3.astype("int64")
tm.assert_numpy_array_equal(result3, expected)


def test_dtype_sparse_with_fill_value_not_present_in_data():
# GH 49987
df = DataFrame([["a", 0], ["b", 1], ["b", 2]], columns=["A", "B"])
result = df["A"].astype(SparseDtype("category", fill_value="c"))
expected = Series(
["a", "b", "b"], name="A", dtype=SparseDtype("object", fill_value="c")
)
tm.assert_series_equal(result, expected)
7 changes: 7 additions & 0 deletions pandas/tests/arrays/sparse/test_dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,3 +207,10 @@ def test_repr():
result = str(SparseDtype(object, fill_value="0"))
expected = "Sparse[object, '0']"
assert result == expected


def test_sparse_dtype_subtype_must_be_numpy_dtype():
# GH#53160
msg = "SparseDtype subtype must be a numpy dtype"
with pytest.raises(TypeError, match=msg):
SparseDtype("category", fill_value="c")

0 comments on commit 49a2b67

Please sign in to comment.