-
-
Notifications
You must be signed in to change notification settings - Fork 18.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ENH: EADtype._find_compatible_dtype #53106
Changes from all commits
a0f8c31
0770a07
9be179b
1271e26
48a2f6d
c62dcaa
c85a3eb
fb0b03a
b809fd7
deab083
b565439
c3fbbcb
995f4b4
6f481f9
0790932
2b9a092
a85a6d4
92227ef
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -686,6 +686,15 @@ def index_class(self) -> type_t[CategoricalIndex]: | |
|
||
return CategoricalIndex | ||
|
||
def _find_compatible_dtype(self, item) -> tuple[DtypeObj, Any]: | ||
from pandas.core.dtypes.missing import is_valid_na_for_dtype | ||
|
||
if item in self.categories or is_valid_na_for_dtype( | ||
item, self.categories.dtype | ||
): | ||
return self, item | ||
return np.dtype(object), item | ||
|
||
|
||
@register_extension_dtype | ||
class DatetimeTZDtype(PandasExtensionDtype): | ||
|
@@ -1606,6 +1615,18 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None: | |
except (KeyError, NotImplementedError): | ||
return None | ||
|
||
def _find_compatible_dtype(self, item) -> tuple[DtypeObj, Any]: | ||
from pandas.core.dtypes.cast import maybe_promote | ||
from pandas.core.dtypes.missing import is_valid_na_for_dtype | ||
|
||
if is_valid_na_for_dtype(item, self): | ||
return self, item | ||
|
||
dtype, item = maybe_promote(self.numpy_dtype, item) | ||
if dtype.kind in "iufb": | ||
return type(self).from_numpy_dtype(dtype), item | ||
return dtype, item | ||
|
||
|
||
@register_extension_dtype | ||
class SparseDtype(ExtensionDtype): | ||
|
@@ -2344,3 +2365,29 @@ def __from_arrow__(self, array: pa.Array | pa.ChunkedArray): | |
array_class = self.construct_array_type() | ||
arr = array.cast(self.pyarrow_dtype, safe=True) | ||
return array_class(arr) | ||
|
||
def _find_compatible_dtype(self, item: Any) -> tuple[DtypeObj, Any]: | ||
if isinstance(item, pa.Scalar): | ||
if not item.is_valid: | ||
# TODO: ask joris for help making these checks more robust | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jorisvandenbossche any thoughts here? (not time-sensitive) |
||
if item.type == self.pyarrow_dtype: | ||
return self, item.as_py() | ||
if item.type.to_pandas_dtype() == np.int64 and self.kind == "i": | ||
# FIXME: kludge | ||
return self, item.as_py() | ||
|
||
item = item.as_py() | ||
|
||
elif item is None or item is libmissing.NA: | ||
# TODO: np.nan? use is_valid_na_for_dtype | ||
return self, item | ||
|
||
from pandas.core.dtypes.cast import maybe_promote | ||
|
||
dtype, item = maybe_promote(self.numpy_dtype, item) | ||
|
||
if dtype == self.numpy_dtype: | ||
return self, item | ||
|
||
# TODO: implement from_numpy_dtype analogous to MaskedDtype.from_numpy_dtype | ||
return np.dtype(object), item |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -56,12 +56,7 @@ | |
ABCDataFrame, | ||
ABCSeries, | ||
) | ||
from pandas.core.dtypes.missing import ( | ||
infer_fill_value, | ||
is_valid_na_for_dtype, | ||
isna, | ||
na_value_for_dtype, | ||
) | ||
from pandas.core.dtypes.missing import infer_fill_value | ||
|
||
from pandas.core import algorithms as algos | ||
import pandas.core.common as com | ||
|
@@ -2203,26 +2198,14 @@ def _setitem_with_indexer_missing(self, indexer, value): | |
return self._setitem_with_indexer(new_indexer, value, "loc") | ||
|
||
# this preserves dtype of the value and of the object | ||
if not is_scalar(value): | ||
new_dtype = None | ||
|
||
elif is_valid_na_for_dtype(value, self.obj.dtype): | ||
if not is_object_dtype(self.obj.dtype): | ||
# Every NA value is suitable for object, no conversion needed | ||
value = na_value_for_dtype(self.obj.dtype, compat=False) | ||
|
||
new_dtype = maybe_promote(self.obj.dtype, value)[0] | ||
|
||
elif isna(value): | ||
new_dtype = None | ||
new_dtype = None | ||
if is_list_like(value): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note for ArrowDtype with
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yah, getting rid of this is_list_like check causes us to incorrectly raise on numpy non-object cases when using a list value (for which we don't have any tests). Can fix that in this PR or separately, as it is a bit more invasive. |
||
pass | ||
elif not self.obj.empty and not is_object_dtype(self.obj.dtype): | ||
# We should not cast, if we have object dtype because we can | ||
# set timedeltas into object series | ||
curr_dtype = self.obj.dtype | ||
curr_dtype = getattr(curr_dtype, "numpy_dtype", curr_dtype) | ||
new_dtype = maybe_promote(curr_dtype, value)[0] | ||
else: | ||
new_dtype = None | ||
new_dtype, value = maybe_promote(curr_dtype, value) | ||
|
||
new_values = Series([value], dtype=new_dtype)._values | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2751,6 +2751,29 @@ def test_describe_timedelta_data(pa_type): | |
tm.assert_series_equal(result, expected) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"value, target_value, dtype", | ||
[ | ||
(pa.scalar(4, type="int32"), 4, "int32[pyarrow]"), | ||
(pa.scalar(4, type="int64"), 4, "int32[pyarrow]"), | ||
# (pa.scalar(4.5, type="float64"), 4, "int32[pyarrow]"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happens here? Also what happens with a int64 scalar and int32 dtype? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. id want to follow the same logic we do for numpy dtypes, but was punting here in expectation of doing it in a follow-up (likely involving joris expressing an opinion) |
||
(4, 4, "int32[pyarrow]"), | ||
(pd.NA, None, "int32[pyarrow]"), | ||
(None, None, "int32[pyarrow]"), | ||
(pa.scalar(None, type="int32"), None, "int32[pyarrow]"), | ||
(pa.scalar(None, type="int64"), None, "int32[pyarrow]"), | ||
], | ||
) | ||
def test_series_setitem_with_enlargement(value, target_value, dtype): | ||
# GH#52235 | ||
# similar to series/inedexing/test_setitem.py::test_setitem_keep_precision | ||
# and test_setitem_enlarge_with_na, but for arrow dtypes | ||
ser = pd.Series([1, 2, 3], dtype=dtype) | ||
ser[3] = value | ||
expected = pd.Series([1, 2, 3, target_value], dtype=dtype) | ||
tm.assert_series_equal(ser, expected) | ||
|
||
|
||
@pytest.mark.parametrize("pa_type", tm.DATETIME_PYARROW_DTYPES) | ||
def test_describe_datetime_data(pa_type): | ||
# GH53001 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what happens if item is null? The pyarrow null
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IIUC pyarrow nulls are now typed. id prefer to be strict about making these match, but dont care that much. am hoping @jorisvandenbossche will weigh in