Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport PR #52821 on branch 2.0.x (BUG: Non unitless np NaT arithmetic with non-nano) #52847

Merged
merged 1 commit into from
Apr 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ Bug fixes
- Bug in :meth:`DataFrame.max` and related casting different :class:`Timestamp` resolutions always to nanoseconds (:issue:`52524`)
- Bug in :meth:`Series.describe` not returning :class:`ArrowDtype` with ``pyarrow.float64`` type with numeric data (:issue:`52427`)
- Bug in :meth:`Series.dt.tz_localize` incorrectly localizing timestamps with :class:`ArrowDtype` (:issue:`52677`)
- Bug in arithmetic between ``np.datetime64`` and ``np.timedelta64`` ``NaT`` scalars with units always returning nanosecond resolution (:issue:`52295`)
- Bug in logical and comparison operations between :class:`ArrowDtype` and numpy masked types (e.g. ``"boolean"``) (:issue:`52625`)
- Fixed bug in :func:`merge` when merging with ``ArrowDtype`` one one and a NumPy dtype on the other side (:issue:`52406`)
- Fixed segfault in :meth:`Series.to_numpy` with ``null[pyarrow]`` dtype (:issue:`52443`)
Expand Down
25 changes: 22 additions & 3 deletions pandas/core/ops/array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,14 @@
lib,
ops as libops,
)
from pandas._libs.tslibs import BaseOffset
from pandas._libs.tslibs import (
BaseOffset,
get_supported_reso,
get_unit_from_dtype,
is_supported_unit,
is_unitless,
npy_unit_to_abbrev,
)
from pandas._typing import (
ArrayLike,
Shape,
Expand Down Expand Up @@ -475,7 +482,13 @@ def maybe_prepare_scalar_for_op(obj, shape: Shape):
from pandas.core.arrays import DatetimeArray

# Avoid possible ambiguities with pd.NaT
obj = obj.astype("datetime64[ns]")
# GH 52295
if is_unitless(obj.dtype):
obj = obj.astype("datetime64[ns]")
elif not is_supported_unit(get_unit_from_dtype(obj.dtype)):
unit = get_unit_from_dtype(obj.dtype)
closest_unit = npy_unit_to_abbrev(get_supported_reso(unit))
obj = obj.astype(f"datetime64[{closest_unit}]")
right = np.broadcast_to(obj, shape)
return DatetimeArray(right)

Expand All @@ -488,7 +501,13 @@ def maybe_prepare_scalar_for_op(obj, shape: Shape):
# wrapping timedelta64("NaT") in Timedelta returns NaT,
# which would incorrectly be treated as a datetime-NaT, so
# we broadcast and wrap in a TimedeltaArray
obj = obj.astype("timedelta64[ns]")
# GH 52295
if is_unitless(obj.dtype):
obj = obj.astype("timedelta64[ns]")
elif not is_supported_unit(get_unit_from_dtype(obj.dtype)):
unit = get_unit_from_dtype(obj.dtype)
closest_unit = npy_unit_to_abbrev(get_supported_reso(unit))
obj = obj.astype(f"timedelta64[{closest_unit}]")
right = np.broadcast_to(obj, shape)
return TimedeltaArray(right)

Expand Down
37 changes: 37 additions & 0 deletions pandas/tests/arithmetic/test_datetime64.py
Original file line number Diff line number Diff line change
Expand Up @@ -2436,3 +2436,40 @@ def test_dt64arr_addsub_object_dtype_2d():

assert result2.shape == (4, 1)
assert all(td._value == 0 for td in result2.ravel())


def test_non_nano_dt64_addsub_np_nat_scalars():
# GH 52295
ser = Series([1233242342344, 232432434324, 332434242344], dtype="datetime64[ms]")
result = ser - np.datetime64("nat", "ms")
expected = Series([NaT] * 3, dtype="timedelta64[ms]")
tm.assert_series_equal(result, expected)

result = ser + np.timedelta64("nat", "ms")
expected = Series([NaT] * 3, dtype="datetime64[ms]")
tm.assert_series_equal(result, expected)


def test_non_nano_dt64_addsub_np_nat_scalars_unitless():
# GH 52295
# TODO: Can we default to the ser unit?
ser = Series([1233242342344, 232432434324, 332434242344], dtype="datetime64[ms]")
result = ser - np.datetime64("nat")
expected = Series([NaT] * 3, dtype="timedelta64[ns]")
tm.assert_series_equal(result, expected)

result = ser + np.timedelta64("nat")
expected = Series([NaT] * 3, dtype="datetime64[ns]")
tm.assert_series_equal(result, expected)


def test_non_nano_dt64_addsub_np_nat_scalars_unsupported_unit():
# GH 52295
ser = Series([12332, 23243, 33243], dtype="datetime64[s]")
result = ser - np.datetime64("nat", "D")
expected = Series([NaT] * 3, dtype="timedelta64[s]")
tm.assert_series_equal(result, expected)

result = ser + np.timedelta64("nat", "D")
expected = Series([NaT] * 3, dtype="datetime64[s]")
tm.assert_series_equal(result, expected)
1 change: 1 addition & 0 deletions pandas/tests/arithmetic/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box_with_array
np.datetime64("NaT", "ns"),
pd.NaT,
],
ids=repr,
)
def test_add_sub_datetimedeltalike_invalid(
self, numeric_idx, other, box_with_array
Expand Down