From 3297047918dd51cfcffb7e3f8c7c4acfb5683867 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 9 Mar 2023 15:32:53 -0800 Subject: [PATCH 01/13] API: Make Day not a Tick --- pandas/_libs/tslibs/__init__.py | 2 + pandas/_libs/tslibs/offsets.pyx | 61 ++++++++++++++----- pandas/_libs/tslibs/period.pyx | 10 ++- pandas/_libs/tslibs/timedeltas.pyx | 12 +++- pandas/_libs/tslibs/timestamps.pyx | 4 ++ pandas/_testing/asserters.py | 22 ++++++- pandas/core/arrays/datetimelike.py | 37 +++++++++-- pandas/core/arrays/datetimes.py | 7 ++- pandas/core/arrays/period.py | 14 ++++- pandas/core/arrays/timedeltas.py | 13 +++- pandas/core/generic.py | 5 +- pandas/core/indexes/datetimelike.py | 34 ++++++++++- pandas/core/indexes/interval.py | 3 + pandas/core/indexes/timedeltas.py | 18 +++++- pandas/core/resample.py | 33 ++++++++-- pandas/core/window/rolling.py | 8 ++- pandas/tests/arithmetic/test_datetime64.py | 4 ++ pandas/tests/arithmetic/test_numeric.py | 13 +++- pandas/tests/arithmetic/test_timedelta64.py | 27 ++++++-- pandas/tests/arrays/test_datetimelike.py | 26 ++++---- pandas/tests/arrays/test_timedeltas.py | 2 +- .../tests/indexes/datetimes/test_datetime.py | 12 ++-- .../indexes/datetimes/test_scalar_compat.py | 5 +- .../indexes/timedeltas/methods/test_insert.py | 2 +- .../indexes/timedeltas/test_constructors.py | 2 +- .../tests/indexes/timedeltas/test_formats.py | 16 ++--- .../indexes/timedeltas/test_freq_attr.py | 11 ++-- pandas/tests/indexes/timedeltas/test_ops.py | 10 ++- .../tests/indexes/timedeltas/test_setops.py | 2 +- .../indexes/timedeltas/test_timedelta.py | 4 +- .../timedeltas/test_timedelta_range.py | 2 +- pandas/tests/resample/test_datetime_index.py | 5 ++ pandas/tests/resample/test_period_index.py | 14 +++-- pandas/tests/scalar/period/test_period.py | 2 +- .../tests/scalar/timestamp/test_arithmetic.py | 4 +- .../tests/scalar/timestamp/test_unary_ops.py | 5 +- .../tseries/frequencies/test_freq_code.py | 1 - pandas/tests/tseries/offsets/test_offsets.py | 7 ++- pandas/tests/tseries/offsets/test_ticks.py | 2 +- pandas/tests/tslibs/test_api.py | 1 + pandas/tests/tslibs/test_to_offset.py | 2 +- 41 files changed, 353 insertions(+), 111 deletions(-) diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py index 42f84619ddbe5..5f2e9235afa9e 100644 --- a/pandas/_libs/tslibs/__init__.py +++ b/pandas/_libs/tslibs/__init__.py @@ -33,6 +33,7 @@ "is_supported_unit", "npy_unit_to_abbrev", "get_supported_reso", + "Day" ] from pandas._libs.tslibs import dtypes @@ -62,6 +63,7 @@ BaseOffset, Tick, to_offset, + Day, ) from pandas._libs.tslibs.period import ( IncompatibleFrequency, diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 9b626b9ee4f6a..8fd990bce843e 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -926,8 +926,6 @@ cdef class Tick(SingleConstructorOffset): # Note: Without making this cpdef, we get AttributeError when calling # from __mul__ cpdef Tick _next_higher_resolution(Tick self): - if type(self) is Day: - return Hour(self.n * 24) if type(self) is Hour: return Minute(self.n * 60) if type(self) is Minute: @@ -1086,13 +1084,43 @@ cdef class Tick(SingleConstructorOffset): self.normalize = False -cdef class Day(Tick): - _nanos_inc = 24 * 3600 * 1_000_000_000 +cdef class Day(SingleConstructorOffset): + _adjust_dst = True + _attributes = tuple(["n", "normalize"]) + rule_code = "D" # used by parse_time_string _prefix = "D" _td64_unit = "D" _period_dtype_code = PeriodDtypeCode.D _creso = NPY_DATETIMEUNIT.NPY_FR_D + def __init__(self, n=1, normalize=False): + BaseOffset.__init__(self, n) + if normalize: + # GH#21427 + raise ValueError( + "Day offset with `normalize=True` are not allowed." + ) + + def is_on_offset(self, dt) -> bool: + return True + + @apply_wraps + def _apply(self, other): + if isinstance(other, Day): + # TODO: why isn't this handled in __add__? + return Day(self.n + other.n) + return other + np.timedelta64(self.n, "D")# Timedelta(days=self.n).as_unit("s") + + @apply_array_wraps + def _apply_array(self, dtarr): + return dtarr + np.timedelta64(self.n, "D")#Timedelta(days=self.n).as_unit("s") + + @cache_readonly + def freqstr(self) -> str: + if self.n != 1: + return str(self.n) + "D" + return "D" + cdef class Hour(Tick): _nanos_inc = 3600 * 1_000_000_000 @@ -1145,16 +1173,13 @@ cdef class Nano(Tick): def delta_to_tick(delta: timedelta) -> Tick: if delta.microseconds == 0 and getattr(delta, "nanoseconds", 0) == 0: # nanoseconds only for pd.Timedelta - if delta.seconds == 0: - return Day(delta.days) + seconds = delta.days * 86400 + delta.seconds + if seconds % 3600 == 0: + return Hour(seconds / 3600) + elif seconds % 60 == 0: + return Minute(seconds / 60) else: - seconds = delta.days * 86400 + delta.seconds - if seconds % 3600 == 0: - return Hour(seconds / 3600) - elif seconds % 60 == 0: - return Minute(seconds / 60) - else: - return Second(seconds) + return Second(seconds) else: nanos = delta_to_nanoseconds(delta) if nanos % 1_000_000 == 0: @@ -4123,7 +4148,7 @@ cpdef to_offset(freq): <2 * BusinessDays> >>> to_offset(pd.Timedelta(days=1)) - + <24Hour> >>> to_offset(pd.offsets.Hour()) @@ -4162,7 +4187,7 @@ cpdef to_offset(freq): if not stride: stride = 1 - if prefix in {"D", "H", "T", "S", "L", "U", "N"}: + if prefix in {"H", "T", "S", "L", "U", "N"}: # For these prefixes, we have something like "3H" or # "2.5T", so we can construct a Timedelta with the # matching unit and get our offset from delta_to_tick @@ -4180,6 +4205,12 @@ cpdef to_offset(freq): if delta is None: delta = offset + elif isinstance(delta, Day) and isinstance(offset, Tick): + # e.g. "1D1H" is treated like "25H" + delta = Hour(delta.n * 24) + offset + elif isinstance(offset, Day) and isinstance(delta, Tick): + # e.g. "1H1D" is treated like "25H" + delta = delta + Hour(offset.n * 24) else: delta = delta + offset except (ValueError, TypeError) as err: diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 7da1cab9af4f9..f66d73cef7b23 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -23,6 +23,7 @@ from cpython.datetime cimport ( PyDate_Check, PyDateTime_Check, datetime, + timedelta, import_datetime, ) from libc.stdlib cimport ( @@ -105,7 +106,7 @@ from pandas._libs.tslibs.offsets cimport ( to_offset, ) -from pandas._libs.tslibs.offsets import INVALID_FREQ_ERR_MSG +from pandas._libs.tslibs.offsets import INVALID_FREQ_ERR_MSG, Day cdef: enum: @@ -1733,7 +1734,7 @@ cdef class _Period(PeriodMixin): cdef: int64_t inc - if not is_tick_object(self.freq): + if not is_tick_object(self.freq) and not isinstance(self.freq, Day): raise IncompatibleFrequency("Input cannot be converted to " f"Period(freq={self.freqstr})") @@ -1744,6 +1745,9 @@ cdef class _Period(PeriodMixin): # i.e. np.timedelta64("nat") return NaT + if isinstance(other, Day): + other = timedelta(days=other.n) + try: inc = delta_to_nanoseconds(other, reso=self.freq._creso, round_ok=False) except ValueError as err: @@ -1771,7 +1775,7 @@ cdef class _Period(PeriodMixin): return NaT return other.__add__(self) - if is_any_td_scalar(other): + if is_any_td_scalar(other) or isinstance(other, Day): return self._add_timedeltalike_scalar(other) elif is_offset_object(other): return self._add_offset(other) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 01755fdd65654..4a36b2684dbce 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1707,6 +1707,7 @@ class Timedelta(_Timedelta): ) * 1_000_000_000 ) + # TODO: catch OverflowError and re-raise as OutOfBoundsTimedelta value = np.timedelta64( int(kwargs.get("nanoseconds", 0)) + int(kwargs.get("microseconds", 0) * 1_000) @@ -1819,9 +1820,14 @@ class Timedelta(_Timedelta): int64_t result, unit ndarray[int64_t] arr - from pandas._libs.tslibs.offsets import to_offset + from pandas._libs.tslibs.offsets import to_offset, Day - to_offset(freq).nanos # raises on non-fixed freq + orig = freq + freq = to_offset(freq) + if isinstance(freq, Day): + # In this context it is clear D represents 24 hours + freq = 24 * freq.n * to_offset("H") + freq.nanos # raises on non-fixed freq unit = delta_to_nanoseconds(to_offset(freq), self._creso) arr = np.array([self._value], dtype="i8") @@ -1829,7 +1835,7 @@ class Timedelta(_Timedelta): result = round_nsint64(arr, mode, unit)[0] except OverflowError as err: raise OutOfBoundsTimedelta( - f"Cannot round {self} to freq={freq} without overflow" + f"Cannot round {self} to freq={orig} without overflow" ) from err return Timedelta._from_value_and_reso(result, self._creso) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 398f26723b508..949e50ae1ecee 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -99,6 +99,7 @@ from pandas._libs.tslibs.np_datetime import ( OutOfBoundsTimedelta, ) +from pandas._libs.tslibs.offsets import Day from pandas._libs.tslibs.offsets cimport to_offset from pandas._libs.tslibs.timedeltas cimport ( _Timedelta, @@ -1673,6 +1674,9 @@ class Timestamp(_Timestamp): int64_t nanos freq = to_offset(freq) + if isinstance(freq, Day): + # In this context it is sufficiently clear that this means 24H + freq = freq.n * 24 * to_offset("H") freq.nanos # raises on non-fixed freq nanos = delta_to_nanoseconds(freq, self._creso) if nanos == 0: diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index e25e8388bc4cd..95d1bc5250dc5 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -53,6 +53,7 @@ from pandas.core.indexes.api import safe_sort_index from pandas.io.formats.printing import pprint_thing +from pandas._libs.tslibs import Day, Timedelta, Tick def assert_almost_equal( @@ -546,7 +547,7 @@ def assert_datetime_array_equal( assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray") if check_freq: - assert_attr_equal("freq", left, right, obj=obj) + assert_freq_equal(left.freq, right.freq) assert_attr_equal("tz", left, right, obj=obj) @@ -894,7 +895,7 @@ def assert_series_equal( if check_freq and isinstance(left.index, (DatetimeIndex, TimedeltaIndex)): lidx = left.index ridx = right.index - assert lidx.freq == ridx.freq, (lidx.freq, ridx.freq) + assert_freq_equal(lidx.freq, ridx.freq) if check_dtype: # We want to skip exact dtype checking when `check_categorical` @@ -1016,6 +1017,21 @@ def assert_series_equal( ) +def assert_freq_equal(left, right): + # TODO: sure we want to do this??? + if isinstance(left, Day): + if isinstance(right, Day): + assert left == right + elif isinstance(right, Tick): + assert right == Timedelta(days=left.n) + else: + assert left == right # will raise + elif isinstance(right, Day): + assert_freq_equal(right, left) + else: + assert left == right + + # This could be refactored to use the NDFrame.equals method def assert_frame_equal( left, @@ -1234,7 +1250,7 @@ def assert_equal(left, right, **kwargs) -> None: if isinstance(left, Index): assert_index_equal(left, right, **kwargs) if isinstance(left, (DatetimeIndex, TimedeltaIndex)): - assert left.freq == right.freq, (left.freq, right.freq) + assert_freq_equal(left.freq, right.freq) elif isinstance(left, Series): assert_series_equal(left, right, **kwargs) elif isinstance(left, DataFrame): diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 1e9b5641aa5e0..c4f33ef3ef01a 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -40,6 +40,7 @@ Timestamp, astype_overflowsafe, delta_to_nanoseconds, + Day, get_unit_from_dtype, iNaT, ints_to_pydatetime, @@ -375,6 +376,8 @@ def __getitem__( # At this point we know the result is an array. result = cast(DatetimeLikeArrayT, result) result._freq = self._get_getitem_freq(key) + if self.dtype.kind == "m" and result._freq is not None: + assert isinstance(result._freq, Tick) return result def _get_getitem_freq(self, key) -> BaseOffset | None: @@ -886,9 +889,14 @@ def inferred_freq(self) -> str | None: if self.ndim != 1: return None try: - return frequencies.infer_freq(self) + res = frequencies.infer_freq(self) except ValueError: return None + if self.dtype.kind == "m" and res is not None and res.endswith("D"): + if res == "D": + return "24H" + res = str(int(res[:-1]) * 24) + "H" + return res @property # NB: override with cache_readonly in immutable subclasses def _resolution_obj(self) -> Resolution | None: @@ -1025,6 +1033,10 @@ def _get_arithmetic_result_freq(self, other) -> BaseOffset | None: elif isinstance(self.freq, Tick): # In these cases return self.freq + elif isinstance(self.freq, Day) and getattr(self, "tz", None) is None: + return self.freq + # TODO: are there tzaware cases when we can reliably preserve freq? + # We have a bunch of tests that seem to think so return None @final @@ -1122,6 +1134,8 @@ def _sub_datetimelike(self, other: Timestamp | DatetimeArray) -> TimedeltaArray: res_m8 = res_values.view(f"timedelta64[{self.unit}]") new_freq = self._get_arithmetic_result_freq(other) + if isinstance(new_freq, Day): + new_freq = new_freq.n * 24 * to_offset("H") # TODO: Day method for this? return TimedeltaArray._simple_new(res_m8, dtype=res_m8.dtype, freq=new_freq) @final @@ -1456,7 +1470,7 @@ def __iadd__(self: DatetimeLikeArrayT, other) -> DatetimeLikeArrayT: if not is_period_dtype(self.dtype): # restore freq, which is invalidated by setitem - self._freq = result.freq + self.freq = result.freq return self def __isub__(self: DatetimeLikeArrayT, other) -> DatetimeLikeArrayT: @@ -1465,7 +1479,7 @@ def __isub__(self: DatetimeLikeArrayT, other) -> DatetimeLikeArrayT: if not is_period_dtype(self.dtype): # restore freq, which is invalidated by setitem - self._freq = result.freq + self.freq = result.freq return self # -------------------------------------------------------------- @@ -1850,7 +1864,7 @@ def __init__( freq = to_offset(freq) NDArrayBacked.__init__(self, values=values, dtype=dtype) - self._freq = freq + self.freq = freq if inferred_freq is None and freq is not None: type(self)._validate_frequency(self, freq) @@ -1875,6 +1889,8 @@ def freq(self, value) -> None: if self.ndim > 1: raise ValueError("Cannot set freq with ndim > 1") + if self.dtype.kind == "m": + assert value is None or isinstance(value, Tick) self._freq = value @classmethod @@ -1995,7 +2011,10 @@ def _round(self, freq, mode, ambiguous, nonexistent): values = self.view("i8") values = cast(np.ndarray, values) - nanos = to_offset(freq).nanos # raises on non-fixed frequencies + freq = to_offset(freq) + if isinstance(freq, Day): + freq = freq.n * 24 * to_offset("H") + nanos = freq.nanos # raises on non-fixed frequencies nanos = delta_to_nanoseconds(to_offset(freq), self._creso) result_i8 = round_nsint64(values, mode, nanos) result = self._maybe_mask_results(result_i8, fill_value=iNaT) @@ -2066,11 +2085,19 @@ def _with_freq(self, freq): elif len(self) == 0 and isinstance(freq, BaseOffset): # Always valid. In the TimedeltaArray case, we assume this # is a Tick offset. + if self.dtype.kind == "m" and not isinstance(freq, Tick): + raise ValueError("TimedeltaIndex/Array freq must be a Tick") pass else: # As an internal method, we can ensure this assertion always holds assert freq == "infer" freq = to_offset(self.inferred_freq) + if isinstance(freq, Day) and self.dtype.kind == "m": + # FIXME: inferred_freq is wrong here + freq = freq.n * 24 * to_offset("H") + + if self.dtype.kind == "m" and freq is not None: + assert isinstance(freq, Tick) arr = self.view() arr._freq = freq diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 1624870705b8f..730e914d230fa 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -442,8 +442,11 @@ def _generate_range( # type: ignore[override] if end is not None: end = end.tz_localize(None) - if isinstance(freq, Tick): - i8values = generate_regular_range(start, end, periods, freq, unit=unit) + if isinstance(freq, Tick) or (tz is None and isinstance(freq, Day)): + tfreq = freq + if isinstance(freq, Day): + tfreq = freq.n * 24 * to_offset("H") + i8values = generate_regular_range(start, end, periods, tfreq, unit=unit) else: xdr = _generate_range( start=start, end=end, periods=periods, offset=freq, unit=unit diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index afb6f02b1cd4a..e76460e990e92 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -36,6 +36,7 @@ from pandas._libs.tslibs.fields import isleapyear_arr from pandas._libs.tslibs.offsets import ( Tick, + Day, delta_to_tick, ) from pandas._libs.tslibs.period import ( @@ -698,6 +699,10 @@ def _addsub_int_array_or_scalar( def _add_offset(self, other: BaseOffset): assert not isinstance(other, Tick) + if isinstance(other, Day): + other = Timedelta(days=other.n) + return self + other + self._require_matching_freq(other, base=True) return self._addsub_int_array_or_scalar(other.n, operator.add) @@ -712,7 +717,7 @@ def _add_timedeltalike_scalar(self, other): ------- PeriodArray """ - if not isinstance(self.freq, Tick): + if not isinstance(self.freq, Tick) and not isinstance(self.freq, Day): # We cannot add timedelta-like to non-tick PeriodArray raise raise_on_incompatible(self, other) @@ -720,7 +725,10 @@ def _add_timedeltalike_scalar(self, other): # i.e. np.timedelta64("NaT") return super()._add_timedeltalike_scalar(other) - td = np.asarray(Timedelta(other).asm8) + if isinstance(other, Day): + td = np.asarray(Timedelta(days=other.n).asm8) + else: + td = np.asarray(Timedelta(other).asm8) return self._add_timedelta_arraylike(td) def _add_timedelta_arraylike( @@ -736,7 +744,7 @@ def _add_timedelta_arraylike( PeriodArray """ freq = self.freq - if not isinstance(freq, Tick): + if not isinstance(freq, (Tick, Day)): # We cannot add timedelta-like to non-tick PeriodArray raise TypeError( f"Cannot add or subtract timedelta64[ns] dtype from {self.dtype}" diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 329d6a937728a..1a07d7d2de571 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -19,6 +19,7 @@ BaseOffset, NaT, NaTType, + Day, Tick, Timedelta, astype_overflowsafe, @@ -201,6 +202,7 @@ def _simple_new( # type: ignore[override] assert not tslibs.is_unitless(dtype) assert isinstance(values, np.ndarray), type(values) assert dtype == values.dtype + assert freq is None or isinstance(freq, Tick) result = super()._simple_new(values=values, dtype=dtype) result._freq = freq @@ -240,7 +242,13 @@ def _from_sequence_not_strict( explicit_none = freq is None freq = freq if freq is not lib.no_default else None + if isinstance(freq, Day): + raise ValueError( + "Day offset object is not valid for TimedeltaIndex, pass e.g. 24H instead." + ) freq, freq_infer = dtl.maybe_infer_freq(freq) + if isinstance(freq, Day): + freq = freq.n * 24 * to_offset("H") data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit) freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer) @@ -259,7 +267,10 @@ def _from_sequence_not_strict( elif freq_infer: # Set _freq directly to bypass duplicative _validate_frequency # check. - result._freq = to_offset(result.inferred_freq) + res_freq = to_offset(result.inferred_freq) + if isinstance(res_freq, Day): + res_freq = res_freq.n * 24 * to_offset("H") + result._freq = res_freq return result diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a4dfb085c766f..21a1e344767a5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -40,6 +40,7 @@ from pandas._libs.tslibs import ( Period, Tick, + Day, Timestamp, to_offset, ) @@ -8986,7 +8987,7 @@ def first(self: NDFrameT, offset) -> NDFrameT: return self.copy(deep=False) offset = to_offset(offset) - if not isinstance(offset, Tick) and offset.is_on_offset(self.index[0]): + if not isinstance(offset, (Tick, Day)) and offset.is_on_offset(self.index[0]): # GH#29623 if first value is end of period, remove offset with n = 1 # before adding the real offset end_date = end = self.index[0] - offset.base + offset @@ -8994,7 +8995,7 @@ def first(self: NDFrameT, offset) -> NDFrameT: end_date = end = self.index[0] + offset # Tick-like, e.g. 3 weeks - if isinstance(offset, Tick) and end_date in self.index: + if isinstance(offset, (Tick, Day)) and end_date in self.index: end = self.index.searchsorted(end_date, side="left") return self.iloc[:end] diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 17fac22f578db..4a8908b879ce0 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -28,6 +28,7 @@ BaseOffset, Resolution, Tick, + Day, parsing, to_offset, ) @@ -544,7 +545,18 @@ def _intersection(self, other: Index, sort: bool = False) -> Index: # At this point we should have result.dtype == self.dtype # and type(result) is type(self._data) result = self._wrap_setop_result(other, result) - return result._with_freq(None)._with_freq("infer") + result = result._with_freq(None)._with_freq("infer") + + # TODO: could share this with the union restore-Day code but + # at this point we have an Index here while we have a DTA/TDA theres + if isinstance(self.freq, Day) and isinstance(result.freq, Tick): + # If we infer a 24H-like freq but are D, restore "D" + td = Timedelta(result.freq) + div, mod = divmod(td.value, 24 * 3600 * 10**9) + if mod == 0: + freq = to_offset("D") * div + result._data._freq = freq + return result else: return self._fast_intersect(other, sort) @@ -666,7 +678,25 @@ def _union(self, other, sort): # that result.freq == self.freq return result else: - return super()._union(other, sort)._with_freq("infer") + result = super()._union(other, sort)._with_freq("infer") + if isinstance(self.freq, Day) and isinstance(result.freq, Tick): + # If we infer a 24H-like freq but are D, restore "D" + td = Timedelta(result.freq) + div, mod = divmod(td.value, 24 * 3600 * 10**9) + if mod == 0: + freq = to_offset("D") * div + result._freq = freq + return result + + def _maybe_restore_day(self, result: DatetimeArray | TimedeltaArray) -> DatetimeArray | TimedeltaArray: + if isinstance(self.freq, Day) and isinstance(result.freq, Tick): + # If we infer a 24H-like freq but are D, restore "D" + td = Timedelta(result.freq) + div, mod = divmod(td.value, 24 * 3600 * 10**9) + if mod == 0: + freq = to_offset("D") * div + result._freq = freq + return result # -------------------------------------------------------------------- # Join Methods diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index c3b7ba72b2e46..5f266813503f9 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -25,6 +25,7 @@ BaseOffset, Timedelta, Timestamp, + Day, to_offset, ) from pandas.errors import InvalidIndexError @@ -1094,6 +1095,8 @@ def interval_range( raise ValueError( f"freq must be numeric or convertible to DateOffset, got {freq}" ) from err + if isinstance(freq, Day) and (isinstance(start, Timedelta) or isinstance(end, Timedelta)): + freq = freq.n * 24 * to_offset("H") # verify type compatibility if not all( diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 0afb928e7bddb..5d5f530ca1d4f 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -11,6 +11,7 @@ Resolution, Timedelta, to_offset, + Day, ) from pandas.core.dtypes.common import ( @@ -271,14 +272,14 @@ def timedelta_range( -------- >>> pd.timedelta_range(start='1 day', periods=4) TimedeltaIndex(['1 days', '2 days', '3 days', '4 days'], - dtype='timedelta64[ns]', freq='D') + dtype='timedelta64[ns]', freq='24H') The ``closed`` parameter specifies which endpoint is included. The default behavior is to include both endpoints. >>> pd.timedelta_range(start='1 day', periods=4, closed='right') TimedeltaIndex(['2 days', '3 days', '4 days'], - dtype='timedelta64[ns]', freq='D') + dtype='timedelta64[ns]', freq='24H') The ``freq`` parameter specifies the frequency of the TimedeltaIndex. Only fixed frequencies can be passed, non-fixed frequencies such as @@ -304,10 +305,21 @@ def timedelta_range( '200001 days 00:00:00'], dtype='timedelta64[s]', freq='100000D') """ + orig = freq if freq is None and com.any_none(periods, start, end): - freq = "D" + freq = "24H" + + if isinstance(freq, Day): + # If a user specifically passes a Day *object* we disallow it, + # but if they pass a Day-like string we'll convert it to hourly below. + raise ValueError("Passing a Day offset to timedelta_range is not allowed, pass an hourly offset instead") freq, _ = dtl.maybe_infer_freq(freq) + if isinstance(freq, Day): + freq = 24 * freq.n * to_offset("H") + elif isinstance(freq, Timedelta): + freq = to_offset(freq) # FIXME: shouldn't happen right? + tdarr = TimedeltaArray._generate_range( start, end, periods, freq, closed=closed, unit=unit ) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 0ca01efe0c855..15014ed85ed23 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -24,6 +24,7 @@ Timedelta, Timestamp, to_offset, + Day, ) from pandas._typing import NDFrameT from pandas.compat.numpy import function as nv @@ -1277,9 +1278,15 @@ def _downsample(self, how, **kwargs): if not len(ax): # reset to the new freq + freq = self.freq + if isinstance(freq, Day) and obj.index.dtype.kind == "m": + freq = freq.n * 24 * to_offset("H") + if not isinstance(freq, Tick) and obj.index.dtype.kind == "m": + # FIXME: wrong in the status quo! + freq = None obj = obj.copy() - obj.index = obj.index._with_freq(self.freq) - assert obj.index.freq == self.freq, (obj.index.freq, self.freq) + obj.index = obj.index._with_freq(freq) + assert obj.index.freq == freq, (obj.index.freq, freq) return obj # do we have a regular frequency @@ -1826,22 +1833,29 @@ def _get_time_delta_bins(self, ax: TimedeltaIndex): f"an instance of {type(ax).__name__}" ) + freq = self.freq + if isinstance(freq, Day): + # TODO: are we super-duper sure this is safe? maybe we can unify conversion earlier? + freq = 24 * freq.n * to_offset("H") if not len(ax): - binner = labels = TimedeltaIndex(data=[], freq=self.freq, name=ax.name) + if not isinstance(freq, Tick): + # FIXME: this seems to be happening in the status quo + freq = None + binner = labels = TimedeltaIndex(data=[], freq=freq, name=ax.name) return binner, [], labels start, end = ax.min(), ax.max() if self.closed == "right": - end += self.freq + end += freq labels = binner = timedelta_range( - start=start, end=end, freq=self.freq, name=ax.name + start=start, end=end, freq=freq, name=ax.name ) end_stamps = labels if self.closed == "left": - end_stamps += self.freq + end_stamps += freq bins = ax.searchsorted(end_stamps, side=self.closed) @@ -2267,6 +2281,13 @@ def _asfreq_compat(index: DatetimeIndex | PeriodIndex | TimedeltaIndex, freq): elif isinstance(index, DatetimeIndex): new_index = DatetimeIndex([], dtype=index.dtype, freq=freq, name=index.name) elif isinstance(index, TimedeltaIndex): + if freq is not None: + freq = to_offset(freq) # TODO: do this earlier? + if isinstance(freq, Day): + freq = freq.n * 24 * to_offset("H") + if not isinstance(freq, Tick): + # FIXME: wrong in main + freq = None new_index = TimedeltaIndex([], dtype=index.dtype, freq=freq, name=index.name) else: # pragma: no cover raise TypeError(type(index)) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index b11ff11421ed4..477ef10aae177 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -24,6 +24,7 @@ from pandas._libs.tslibs import ( BaseOffset, to_offset, + Day, ) import pandas._libs.window.aggregations as window_aggregations from pandas.compat._optional import import_optional_dependency @@ -1777,7 +1778,12 @@ def _validate(self): self._on.freq.nanos / self._on.freq.n ) else: - self._win_freq_i8 = freq.nanos + if isinstance(freq, Day): + # In this context we treat Day as 24H + # TODO: will this cause trouble with tzaware cases? + self._win_freq_i8 = freq.n * 24 * 3600 * 10**9 + else: + self._win_freq_i8 = freq.nanos # min_periods must be an integer if self.min_periods is None: diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index b9c23d5029e22..ae2ac7fa996da 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -816,6 +816,8 @@ def test_dt64arr_add_timedeltalike_scalar( rng = date_range("2000-01-01", "2000-02-01", tz=tz) expected = date_range("2000-01-01 02:00", "2000-02-01 02:00", tz=tz) + if tz is not None: + expected = expected._with_freq(None) rng = tm.box_expected(rng, box_with_array) expected = tm.box_expected(expected, box_with_array) @@ -836,6 +838,8 @@ def test_dt64arr_sub_timedeltalike_scalar( rng = date_range("2000-01-01", "2000-02-01", tz=tz) expected = date_range("1999-12-31 22:00", "2000-01-31 22:00", tz=tz) + if tz is not None: + expected = expected._with_freq(None) rng = tm.box_expected(rng, box_with_array) expected = tm.box_expected(expected, box_with_array) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index eaf80f4768458..ed3aa00828ef0 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -20,6 +20,7 @@ TimedeltaIndex, array, ) +from pandas._libs.tslibs import Day import pandas._testing as tm from pandas.core import ops from pandas.core.computation import expressions as expr @@ -264,10 +265,16 @@ def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box_with_array index = tm.box_expected(index, box) expected = tm.box_expected(expected, box) - result = three_days / index - tm.assert_equal(result, expected) + if isinstance(three_days, Day): + # GH#41943 Day is no longer timedelta-like + msg = "unsupported operand type" + with pytest.raises(TypeError, match=msg): + three_days / index + else: + result = three_days / index + tm.assert_equal(result, expected) + msg = "cannot use operands with types dtype" - msg = "cannot use operands with types dtype" with pytest.raises(TypeError, match=msg): index / three_days diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 33fc63938407c..b2374368569f8 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -33,6 +33,7 @@ assert_invalid_comparison, get_upcast_box, ) +from pandas._libs.tslibs import Day def assert_dtype(obj, expected_dtype): @@ -624,27 +625,27 @@ def test_tdi_ops_attributes(self): result = rng + 1 * rng.freq exp = timedelta_range("4 days", periods=5, freq="2D", name="x") tm.assert_index_equal(result, exp) - assert result.freq == "2D" + assert result.freq == "48H" result = rng - 2 * rng.freq exp = timedelta_range("-2 days", periods=5, freq="2D", name="x") tm.assert_index_equal(result, exp) - assert result.freq == "2D" + assert result.freq == "48H" result = rng * 2 exp = timedelta_range("4 days", periods=5, freq="4D", name="x") tm.assert_index_equal(result, exp) - assert result.freq == "4D" + assert result.freq == "96H" result = rng / 2 exp = timedelta_range("1 days", periods=5, freq="D", name="x") tm.assert_index_equal(result, exp) - assert result.freq == "D" + assert result.freq == "24H" result = -rng exp = timedelta_range("-2 days", periods=5, freq="-2D", name="x") tm.assert_index_equal(result, exp) - assert result.freq == "-2D" + assert result.freq == "-48H" rng = timedelta_range("-2 days", periods=5, freq="D", name="x") @@ -1821,6 +1822,16 @@ def test_td64arr_mod_tdscalar(self, box_with_array, three_days): expected = TimedeltaIndex(["1 Day", "2 Days", "0 Days"] * 3) expected = tm.box_expected(expected, box_with_array) + if isinstance(three_days, Day): + msg = "unsupported operand type" + with pytest.raises(TypeError, match=msg): + tdarr % three_days + with pytest.raises(TypeError, match=msg): + divmod(tdarr, three_days) + with pytest.raises(TypeError, match=msg): + tdarr // three_days + return + result = tdarr % three_days tm.assert_equal(result, expected) @@ -1864,6 +1875,12 @@ def test_td64arr_rmod_tdscalar(self, box_with_array, three_days): expected = TimedeltaIndex(expected) expected = tm.box_expected(expected, box_with_array) + if isinstance(three_days, Day): + msg = "Cannot divide Day by TimedeltaArray" + with pytest.raises(TypeError, match=msg): + three_days % tdarr + return + result = three_days % tdarr tm.assert_equal(result, expected) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 8db056b8fef58..edfb3ca41c488 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -85,7 +85,7 @@ class SharedTests: def arr1d(self): """Fixture returning DatetimeArray with daily frequency.""" data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = self.array_cls(data, freq="D") + arr = self.array_cls(data, freq="24H") return arr def test_compare_len1_raises(self, arr1d): @@ -175,7 +175,7 @@ def test_take(self): def test_take_fill_raises(self, fill_value): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = self.array_cls(data, freq="D") + arr = self.array_cls(data, freq="24H") msg = f"value should be a '{arr._scalar_type.__name__}' or 'NaT'. Got" with pytest.raises(TypeError, match=msg): @@ -184,7 +184,7 @@ def test_take_fill_raises(self, fill_value): def test_take_fill(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = self.array_cls(data, freq="D") + arr = self.array_cls(data, freq="24H") result = arr.take([-1, 1], allow_fill=True, fill_value=None) assert result[0] is NaT @@ -219,7 +219,7 @@ def test_concat_same_type(self, arr1d): def test_unbox_scalar(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = self.array_cls(data, freq="D") + arr = self.array_cls(data, freq="24H") result = arr._unbox_scalar(arr[0]) expected = arr._ndarray.dtype.type assert isinstance(result, expected) @@ -233,7 +233,7 @@ def test_unbox_scalar(self): def test_check_compatible_with(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = self.array_cls(data, freq="D") + arr = self.array_cls(data, freq="24H") arr._check_compatible_with(arr[0]) arr._check_compatible_with(arr[:1]) @@ -241,13 +241,13 @@ def test_check_compatible_with(self): def test_scalar_from_string(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = self.array_cls(data, freq="D") + arr = self.array_cls(data, freq="24H") result = arr._scalar_from_string(str(arr[0])) assert result == arr[0] def test_reduce_invalid(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = self.array_cls(data, freq="D") + arr = self.array_cls(data, freq="24H") msg = "does not support reduction 'not a method'" with pytest.raises(TypeError, match=msg): @@ -256,7 +256,7 @@ def test_reduce_invalid(self): @pytest.mark.parametrize("method", ["pad", "backfill"]) def test_fillna_method_doesnt_change_orig(self, method): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = self.array_cls(data, freq="D") + arr = self.array_cls(data, freq="24H") arr[4] = NaT fill_value = arr[3] if method == "pad" else arr[5] @@ -269,7 +269,7 @@ def test_fillna_method_doesnt_change_orig(self, method): def test_searchsorted(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = self.array_cls(data, freq="D") + arr = self.array_cls(data, freq="24H") # scalar result = arr.searchsorted(arr[1]) @@ -407,7 +407,7 @@ def test_repr_2d(self, arr1d): def test_setitem(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = self.array_cls(data, freq="D") + arr = self.array_cls(data, freq="24H") arr[0] = arr[1] expected = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 @@ -484,7 +484,7 @@ def test_setitem_categorical(self, arr1d, as_index): def test_setitem_raises(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = self.array_cls(data, freq="D") + arr = self.array_cls(data, freq="24H") val = arr[0] with pytest.raises(IndexError, match="index 12 is out of bounds"): @@ -520,7 +520,7 @@ def test_setitem_numeric_raises(self, arr1d, box): def test_inplace_arithmetic(self): # GH#24115 check that iadd and isub are actually in-place data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = self.array_cls(data, freq="D") + arr = self.array_cls(data, freq="24H") expected = arr + pd.Timedelta(days=1) arr += pd.Timedelta(days=1) @@ -533,7 +533,7 @@ def test_inplace_arithmetic(self): def test_shift_fill_int_deprecated(self): # GH#31971, enforced in 2.0 data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = self.array_cls(data, freq="D") + arr = self.array_cls(data, freq="24H") with pytest.raises(TypeError, match="value should be a"): arr.shift(1, fill_value=1) diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index 69d92b1551e14..48578f725884a 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -237,7 +237,7 @@ def test_setitem_objects(self, obj): @pytest.mark.parametrize("index", [True, False]) def test_searchsorted_invalid_types(self, other, index): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 - arr = TimedeltaArray(data, freq="D") + arr = TimedeltaArray(data, freq="24H") if index: arr = pd.Index(arr) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index af1a94391a353..a8c2104a95206 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -19,17 +19,19 @@ class TestDatetimeIndex: def test_sub_datetime_preserves_freq(self, tz_naive_fixture): # GH#48818 + # GH#41943 we cannot reliably preserve non-tick freq when crossing + # DS dti = date_range("2016-01-01", periods=12, tz=tz_naive_fixture) res = dti - dti[0] expected = pd.timedelta_range("0 Days", "11 Days") tm.assert_index_equal(res, expected) - assert res.freq == expected.freq + if tz_naive_fixture is None: + assert res.freq == expected.freq + else: + # we _could_ preserve for UTC and fixed-offsets + assert res.freq is None - @pytest.mark.xfail( - reason="The inherited freq is incorrect bc dti.freq is incorrect " - "https://github.com/pandas-dev/pandas/pull/48818/files#r982793461" - ) def test_sub_datetime_preserves_freq_across_dst(self): # GH#48818 ts = Timestamp("2016-03-11", tz="US/Pacific") diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py index f07a9dce5f6ae..314c2e8f7761d 100644 --- a/pandas/tests/indexes/datetimes/test_scalar_compat.py +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -251,7 +251,10 @@ def test_ceil_floor_edge(self, test_input, rounder, freq, expected): ) def test_round_int64(self, start, index_freq, periods, round_freq): dt = date_range(start=start, freq=index_freq, periods=periods) - unit = to_offset(round_freq).nanos + if round_freq == "1D": + unit = 24 * 3600 * 10**9 + else: + unit = to_offset(round_freq).nanos # test floor result = dt.floor(round_freq) diff --git a/pandas/tests/indexes/timedeltas/methods/test_insert.py b/pandas/tests/indexes/timedeltas/methods/test_insert.py index f8164102815f6..547eaaec0e5bf 100644 --- a/pandas/tests/indexes/timedeltas/methods/test_insert.py +++ b/pandas/tests/indexes/timedeltas/methods/test_insert.py @@ -136,7 +136,7 @@ def test_insert_empty(self): td = idx[0] result = idx[:0].insert(0, td) - assert result.freq == "D" + assert result.freq == "24H" with pytest.raises(IndexError, match="loc must be an integer between"): result = idx[:0].insert(1, td) diff --git a/pandas/tests/indexes/timedeltas/test_constructors.py b/pandas/tests/indexes/timedeltas/test_constructors.py index 4e817ee708614..807486984efc2 100644 --- a/pandas/tests/indexes/timedeltas/test_constructors.py +++ b/pandas/tests/indexes/timedeltas/test_constructors.py @@ -208,7 +208,7 @@ def test_constructor_coverage(self): # non-conforming freq msg = ( "Inferred frequency None from passed values does not conform to " - "passed frequency D" + "passed frequency 24H" ) with pytest.raises(ValueError, match=msg): TimedeltaIndex(["1 days", "2 days", "4 days"], freq="D") diff --git a/pandas/tests/indexes/timedeltas/test_formats.py b/pandas/tests/indexes/timedeltas/test_formats.py index 751f9e4cc9eee..78edaefa2aab4 100644 --- a/pandas/tests/indexes/timedeltas/test_formats.py +++ b/pandas/tests/indexes/timedeltas/test_formats.py @@ -16,15 +16,15 @@ def test_representation(self, method): idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D") idx5 = TimedeltaIndex(["1 days 00:00:01", "2 days", "3 days"]) - exp1 = "TimedeltaIndex([], dtype='timedelta64[ns]', freq='D')" + exp1 = "TimedeltaIndex([], dtype='timedelta64[ns]', freq='24H')" - exp2 = "TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', freq='D')" + exp2 = "TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', freq='24H')" - exp3 = "TimedeltaIndex(['1 days', '2 days'], dtype='timedelta64[ns]', freq='D')" + exp3 = "TimedeltaIndex(['1 days', '2 days'], dtype='timedelta64[ns]', freq='24H')" exp4 = ( "TimedeltaIndex(['1 days', '2 days', '3 days'], " - "dtype='timedelta64[ns]', freq='D')" + "dtype='timedelta64[ns]', freq='24H')" ) exp5 = ( @@ -76,13 +76,13 @@ def test_summary(self): idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D") idx5 = TimedeltaIndex(["1 days 00:00:01", "2 days", "3 days"]) - exp1 = "TimedeltaIndex: 0 entries\nFreq: D" + exp1 = "TimedeltaIndex: 0 entries\nFreq: 24H" - exp2 = "TimedeltaIndex: 1 entries, 1 days to 1 days\nFreq: D" + exp2 = "TimedeltaIndex: 1 entries, 1 days to 1 days\nFreq: 24H" - exp3 = "TimedeltaIndex: 2 entries, 1 days to 2 days\nFreq: D" + exp3 = "TimedeltaIndex: 2 entries, 1 days to 2 days\nFreq: 24H" - exp4 = "TimedeltaIndex: 3 entries, 1 days to 3 days\nFreq: D" + exp4 = "TimedeltaIndex: 3 entries, 1 days to 3 days\nFreq: 24H" exp5 = "TimedeltaIndex: 3 entries, 1 days 00:00:01 to 3 days 00:00:00" diff --git a/pandas/tests/indexes/timedeltas/test_freq_attr.py b/pandas/tests/indexes/timedeltas/test_freq_attr.py index 39b9c11aa833c..c86d1f5e8369f 100644 --- a/pandas/tests/indexes/timedeltas/test_freq_attr.py +++ b/pandas/tests/indexes/timedeltas/test_freq_attr.py @@ -11,7 +11,7 @@ class TestFreq: @pytest.mark.parametrize("values", [["0 days", "2 days", "4 days"], []]) - @pytest.mark.parametrize("freq", ["2D", Day(2), "48H", Hour(48)]) + @pytest.mark.parametrize("freq", ["48H", Hour(48)]) def test_freq_setter(self, values, freq): # GH#20678 idx = TimedeltaIndex(values) @@ -30,12 +30,13 @@ def test_freq_setter_errors(self): idx = TimedeltaIndex(["0 days", "2 days", "4 days"]) # setting with an incompatible freq + # FIXME: should probably say "48H" rather than "2D"? msg = ( "Inferred frequency 2D from passed values does not conform to " - "passed frequency 5D" + "passed frequency 120H" ) with pytest.raises(ValueError, match=msg): - idx._data.freq = "5D" + idx._data.freq = "120H" # setting with a non-fixed frequency msg = r"<2 \* BusinessDays> is a non-fixed frequency" @@ -57,5 +58,5 @@ def test_freq_view_safe(self): assert tdi2.freq is None # Original was not altered - assert tdi.freq == "2D" - assert tda.freq == "2D" + assert tdi.freq == "48H" + assert tda.freq == "48H" diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index f6013baf86edc..6c3e9d0605d2a 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -11,4 +11,12 @@ def test_infer_freq(self, freq_sample): idx = timedelta_range("1", freq=freq_sample, periods=10) result = TimedeltaIndex(idx.asi8, freq="infer") tm.assert_index_equal(idx, result) - assert result.freq == freq_sample + + if freq_sample == "D": + assert result.freq == "24H" + elif freq_sample == "3D": + assert result.freq == "72H" + elif freq_sample == "-3D": + assert result.freq == "-72H" + else: + assert result.freq == freq_sample diff --git a/pandas/tests/indexes/timedeltas/test_setops.py b/pandas/tests/indexes/timedeltas/test_setops.py index cb6dce1e7ad80..650fb68cb2e07 100644 --- a/pandas/tests/indexes/timedeltas/test_setops.py +++ b/pandas/tests/indexes/timedeltas/test_setops.py @@ -90,7 +90,7 @@ def test_union_freq_infer(self): result = left.union(right) tm.assert_index_equal(result, tdi) - assert result.freq == "D" + assert result.freq == "24H" def test_intersection_bug_1708(self): index_1 = timedelta_range("1 day", periods=4, freq="h") diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 1170fd6a053d9..8aede79e922a8 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -25,9 +25,9 @@ class TestTimedeltaIndex(DatetimeLike): @pytest.fixture def simple_index(self) -> TimedeltaIndex: index = pd.to_timedelta(range(5), unit="d")._with_freq("infer") - assert index.freq == "D" + assert index.freq == "24H" ret = index + pd.offsets.Hour(1) - assert ret.freq == "D" + assert ret.freq == "24H" return ret @pytest.fixture diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index 05fdddd7a4f4f..ec80c71b2b63d 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -30,7 +30,7 @@ def test_timedelta_range(self): result = timedelta_range("0 days", "10 days", freq="D") tm.assert_index_equal(result, expected) - expected = to_timedelta(np.arange(5), unit="D") + Second(2) + Day() + expected = to_timedelta(np.arange(5), unit="D") + Second(2) + Timedelta(hours=24) result = timedelta_range("1 days, 00:00:02", "5 days, 00:00:02", freq="D") tm.assert_index_equal(result, expected) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 3c46887dad859..3fe6e00f39683 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1801,6 +1801,11 @@ def test_resample_equivalent_offsets(n1, freq1, n2, freq2, k, unit): dti = date_range("19910905 13:00", "19911005 07:00", freq=freq1).as_unit(unit) ser = Series(range(len(dti)), index=dti) + if freq2 == "D" and n2 % 1 != 0: + with pytest.raises(ValueError): + ser.resample(str(n2_) + freq2) + return + result1 = ser.resample(str(n1_) + freq1).mean() result2 = ser.resample(str(n2_) + freq2).mean() tm.assert_series_equal(result1, result2) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 6d0a56a947065..af7cda404af1c 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -826,15 +826,21 @@ def test_resample_with_only_nat(self): ) def test_resample_with_offset(self, start, end, start_freq, end_freq, offset): # GH 23882 & 31809 - s = Series(0, index=period_range(start, end, freq=start_freq)) - s = s + np.arange(len(s)) - result = s.resample(end_freq, offset=offset).mean() + pi = period_range(start, end, freq=start_freq) + ser = Series(np.arange(len(pi)), index=pi) + result = ser.resample(end_freq, offset=offset).mean() result = result.to_timestamp(end_freq) - expected = s.to_timestamp().resample(end_freq, offset=offset).mean() + expected = ser.to_timestamp().resample(end_freq, offset=offset).mean() if end_freq == "M": # TODO: is non-tick the relevant characteristic? (GH 33815) expected.index = expected.index._with_freq(None) + elif expected.index.freq.freqstr.endswith("H") and result.index.freq.freqstr.endswith("D"): + # TODO: this is a kludge introduced when implementing GH#41943 bc + # Tick comparison used to consider 24H==1D but no longer does. + # Implement a cleaner fix somewhere. + if expected.index.freq.n == 24 * result.index.freq.n: + expected.index.freq = result.index.freq tm.assert_series_equal(result, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index b8a0a8068ba31..97b05606f094b 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -532,7 +532,7 @@ def test_period_cons_combined(self): with pytest.raises(ValueError, match=msg): Period(ordinal=1, freq="-1H1D") - msg = "Frequency must be positive, because it represents span: 0D" + msg = "Frequency must be positive, because it represents span: 0H" with pytest.raises(ValueError, match=msg): Period("2011-01", freq="0D0H") with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py index 2dac346bc54d5..33a81dc7bf281 100644 --- a/pandas/tests/scalar/timestamp/test_arithmetic.py +++ b/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -50,7 +50,7 @@ def test_overflow_offset_raises(self): with pytest.raises(OutOfBoundsTimedelta, match=lmsg2): stamp + offset_overflow - with pytest.raises(OverflowError, match=msg): + with pytest.raises(OutOfBoundsTimedelta, match=lmsg2): offset_overflow + stamp with pytest.raises(OutOfBoundsTimedelta, match=lmsg2): @@ -68,7 +68,7 @@ def test_overflow_offset_raises(self): with pytest.raises(OutOfBoundsTimedelta, match=lmsg3): stamp + offset_overflow - with pytest.raises(OverflowError, match=msg): + with pytest.raises(OutOfBoundsTimedelta, match=lmsg3): offset_overflow + stamp with pytest.raises(OutOfBoundsTimedelta, match=lmsg3): diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 2fee395886cff..19852de61e1c7 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -264,7 +264,10 @@ def test_round_int64(self, timestamp, freq): # check that all rounding modes are accurate to int64 precision # see GH#22591 dt = Timestamp(timestamp).as_unit("ns") - unit = to_offset(freq).nanos + if freq == "1D": + unit = 24 * 3600 * 10**9 + else: + unit = to_offset(freq).nanos # test floor result = dt.floor(freq) diff --git a/pandas/tests/tseries/frequencies/test_freq_code.py b/pandas/tests/tseries/frequencies/test_freq_code.py index e961fdc295c96..6e08cbf6abb51 100644 --- a/pandas/tests/tseries/frequencies/test_freq_code.py +++ b/pandas/tests/tseries/frequencies/test_freq_code.py @@ -56,7 +56,6 @@ def test_get_freq_roundtrip2(freq): ((1.04, "H"), (3744, "S")), ((1, "D"), (1, "D")), ((0.342931, "H"), (1234551600, "U")), - ((1.2345, "D"), (106660800, "L")), ], ) def test_resolution_bumping(args, expected): diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index bfc5139c78b91..edc2d1e9bda57 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -44,6 +44,7 @@ FY5253, BaseOffset, BDay, + Day, BMonthEnd, BusinessHour, CustomBusinessDay, @@ -219,7 +220,7 @@ def test_offset_freqstr(self, offset_types): assert offset.rule_code == code def _check_offsetfunc_works(self, offset, funcname, dt, expected, normalize=False): - if normalize and issubclass(offset, Tick): + if normalize and issubclass(offset, (Tick, Day)): # normalize=True disallowed for Tick subclasses GH#21427 return @@ -449,7 +450,7 @@ def test_is_on_offset(self, offset_types, expecteds): assert offset_s.is_on_offset(dt) # when normalize=True, is_on_offset checks time is 00:00:00 - if issubclass(offset_types, Tick): + if issubclass(offset_types, (Tick, Day)): # normalize=True disallowed for Tick subclasses GH#21427 return offset_n = _create_offset(offset_types, normalize=True) @@ -481,7 +482,7 @@ def test_add(self, offset_types, tz_naive_fixture, expecteds): assert result == expected_localize # normalize=True, disallowed for Tick subclasses GH#21427 - if issubclass(offset_types, Tick): + if issubclass(offset_types, (Tick, Day)): return offset_s = _create_offset(offset_types, normalize=True) expected = Timestamp(expected.date()) diff --git a/pandas/tests/tseries/offsets/test_ticks.py b/pandas/tests/tseries/offsets/test_ticks.py index 69953955ebbce..22a5127078dd3 100644 --- a/pandas/tests/tseries/offsets/test_ticks.py +++ b/pandas/tests/tseries/offsets/test_ticks.py @@ -53,7 +53,7 @@ def test_delta_to_tick(): delta = timedelta(3) tick = delta_to_tick(delta) - assert tick == offsets.Day(3) + assert tick == offsets.Hour(72) td = Timedelta(nanoseconds=5) tick = delta_to_tick(td) diff --git a/pandas/tests/tslibs/test_api.py b/pandas/tests/tslibs/test_api.py index a596d4a85074e..0b95f26237698 100644 --- a/pandas/tests/tslibs/test_api.py +++ b/pandas/tests/tslibs/test_api.py @@ -29,6 +29,7 @@ def test_namespace(): "NaTType", "iNaT", "nat_strings", + "Day", "OutOfBoundsDatetime", "OutOfBoundsTimedelta", "Period", diff --git a/pandas/tests/tslibs/test_to_offset.py b/pandas/tests/tslibs/test_to_offset.py index 27ddbb82f49a9..2ebd86b0e14ca 100644 --- a/pandas/tests/tslibs/test_to_offset.py +++ b/pandas/tests/tslibs/test_to_offset.py @@ -139,7 +139,7 @@ def test_to_offset_leading_plus(freqstr, expected): ({"days": -1, "seconds": 1}, offsets.Second(-86399)), ({"hours": 1, "minutes": 10}, offsets.Minute(70)), ({"hours": 1, "minutes": -10}, offsets.Minute(50)), - ({"weeks": 1}, offsets.Day(7)), + ({"weeks": 1}, offsets.Hour(168)), ({"hours": 1}, offsets.Hour(1)), ({"hours": 1}, to_offset("60min")), ({"microseconds": 1}, offsets.Micro(1)), From afa935d834f3331e8796f68223563de35061041f Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 9 Mar 2023 15:34:22 -0800 Subject: [PATCH 02/13] CLN --- pandas/_libs/tslibs/offsets.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 8fd990bce843e..86d7d1d237c61 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1109,11 +1109,11 @@ cdef class Day(SingleConstructorOffset): if isinstance(other, Day): # TODO: why isn't this handled in __add__? return Day(self.n + other.n) - return other + np.timedelta64(self.n, "D")# Timedelta(days=self.n).as_unit("s") + return other + np.timedelta64(self.n, "D") @apply_array_wraps def _apply_array(self, dtarr): - return dtarr + np.timedelta64(self.n, "D")#Timedelta(days=self.n).as_unit("s") + return dtarr + np.timedelta64(self.n, "D") @cache_readonly def freqstr(self) -> str: From a81ac610a0922181a3750f67e6009e51cc05c6c1 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 9 Mar 2023 18:37:49 -0800 Subject: [PATCH 03/13] fix remaining tests --- pandas/_libs/tslibs/offsets.pyx | 5 ++++ pandas/_libs/tslibs/timedeltas.pyx | 8 +++---- pandas/_libs/tslibs/timestamps.pyx | 7 ++---- pandas/_testing/asserters.py | 8 +++++-- pandas/core/arrays/datetimelike.py | 22 ++++++++++------- pandas/core/arrays/datetimes.py | 7 +++--- pandas/core/arrays/period.py | 4 ++-- pandas/core/arrays/timedeltas.py | 17 ++++++++----- pandas/core/indexes/datetimelike.py | 24 ++++--------------- pandas/core/indexes/period.py | 3 ++- pandas/core/resample.py | 21 +++++++++------- .../arrays/timedeltas/test_constructors.py | 2 +- pandas/tests/indexes/datetimes/test_misc.py | 3 ++- .../indexes/timedeltas/test_freq_attr.py | 3 +-- pandas/tests/indexing/test_partial.py | 6 ++--- pandas/tests/resample/test_datetime_index.py | 3 ++- .../scalar/timedelta/test_constructors.py | 8 ++++++- pandas/tests/tseries/offsets/test_dst.py | 2 +- 18 files changed, 81 insertions(+), 72 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 86d7d1d237c61..91ede1e042f56 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -787,6 +787,11 @@ cdef class BaseOffset: def nanos(self): raise ValueError(f"{self} is a non-fixed frequency") + def _maybe_to_hours(self): + if not isinstance(self, Day): + return self + return Hour(self.n * 24) + def is_anchored(self) -> bool: # TODO: Does this make sense for the general case? It would help # if there were a canonical docstring for what is_anchored means. diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 4a36b2684dbce..f7128e1c834a9 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1820,13 +1820,11 @@ class Timedelta(_Timedelta): int64_t result, unit ndarray[int64_t] arr - from pandas._libs.tslibs.offsets import to_offset, Day + from pandas._libs.tslibs.offsets import to_offset orig = freq - freq = to_offset(freq) - if isinstance(freq, Day): - # In this context it is clear D represents 24 hours - freq = 24 * freq.n * to_offset("H") + # In this context it is sufficiently clear that "D" this means 24H + freq = to_offset(freq)._maybe_to_hours() freq.nanos # raises on non-fixed freq unit = delta_to_nanoseconds(to_offset(freq), self._creso) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 949e50ae1ecee..158d0317f9644 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -99,7 +99,6 @@ from pandas._libs.tslibs.np_datetime import ( OutOfBoundsTimedelta, ) -from pandas._libs.tslibs.offsets import Day from pandas._libs.tslibs.offsets cimport to_offset from pandas._libs.tslibs.timedeltas cimport ( _Timedelta, @@ -1673,10 +1672,8 @@ class Timestamp(_Timestamp): cdef: int64_t nanos - freq = to_offset(freq) - if isinstance(freq, Day): - # In this context it is sufficiently clear that this means 24H - freq = freq.n * 24 * to_offset("H") + # In this context it is sufficiently clear that "D" this means 24H + freq = to_offset(freq)._maybe_to_hours() freq.nanos # raises on non-fixed freq nanos = delta_to_nanoseconds(freq, self._creso) if nanos == 0: diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 95d1bc5250dc5..d9974590f23ef 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -10,6 +10,11 @@ from pandas._libs.missing import is_matching_na from pandas._libs.sparse import SparseIndex import pandas._libs.testing as _testing +from pandas._libs.tslibs import ( + Day, + Tick, + Timedelta, +) from pandas.core.dtypes.common import ( is_bool, @@ -53,7 +58,6 @@ from pandas.core.indexes.api import safe_sort_index from pandas.io.formats.printing import pprint_thing -from pandas._libs.tslibs import Day, Timedelta, Tick def assert_almost_equal( @@ -1025,7 +1029,7 @@ def assert_freq_equal(left, right): elif isinstance(right, Tick): assert right == Timedelta(days=left.n) else: - assert left == right # will raise + assert left == right, (left, right) elif isinstance(right, Day): assert_freq_equal(right, left) else: diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index c4f33ef3ef01a..07578f75a015a 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -30,6 +30,7 @@ from pandas._libs.arrays import NDArrayBacked from pandas._libs.tslibs import ( BaseOffset, + Day, IncompatibleFrequency, NaT, NaTType, @@ -40,7 +41,6 @@ Timestamp, astype_overflowsafe, delta_to_nanoseconds, - Day, get_unit_from_dtype, iNaT, ints_to_pydatetime, @@ -1134,8 +1134,9 @@ def _sub_datetimelike(self, other: Timestamp | DatetimeArray) -> TimedeltaArray: res_m8 = res_values.view(f"timedelta64[{self.unit}]") new_freq = self._get_arithmetic_result_freq(other) - if isinstance(new_freq, Day): - new_freq = new_freq.n * 24 * to_offset("H") # TODO: Day method for this? + if new_freq is not None: + # TODO: are we sure this is right? + new_freq = new_freq._maybe_to_hours() return TimedeltaArray._simple_new(res_m8, dtype=res_m8.dtype, freq=new_freq) @final @@ -1861,7 +1862,11 @@ def __init__( if copy: values = values.copy() if freq: + if values.dtype.kind == "m" and isinstance(freq, Day): + raise TypeError("TimedeltaArray freq must be a Tick or None") freq = to_offset(freq) + if values.dtype.kind == "m": + freq = freq._maybe_to_hours() NDArrayBacked.__init__(self, values=values, dtype=dtype) self.freq = freq @@ -2011,9 +2016,8 @@ def _round(self, freq, mode, ambiguous, nonexistent): values = self.view("i8") values = cast(np.ndarray, values) - freq = to_offset(freq) - if isinstance(freq, Day): - freq = freq.n * 24 * to_offset("H") + # In this context it is clear "D" means "24H"" + freq = to_offset(freq)._maybe_to_hours() nanos = freq.nanos # raises on non-fixed frequencies nanos = delta_to_nanoseconds(to_offset(freq), self._creso) result_i8 = round_nsint64(values, mode, nanos) @@ -2092,9 +2096,9 @@ def _with_freq(self, freq): # As an internal method, we can ensure this assertion always holds assert freq == "infer" freq = to_offset(self.inferred_freq) - if isinstance(freq, Day) and self.dtype.kind == "m": - # FIXME: inferred_freq is wrong here - freq = freq.n * 24 * to_offset("H") + if freq is not None and self.dtype.kind == "m": + # TODO: handle this in self.inferred_freq? + freq = freq._maybe_to_hours() if self.dtype.kind == "m" and freq is not None: assert isinstance(freq, Tick) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 730e914d230fa..5de6eeafa79da 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -443,10 +443,9 @@ def _generate_range( # type: ignore[override] end = end.tz_localize(None) if isinstance(freq, Tick) or (tz is None and isinstance(freq, Day)): - tfreq = freq - if isinstance(freq, Day): - tfreq = freq.n * 24 * to_offset("H") - i8values = generate_regular_range(start, end, periods, tfreq, unit=unit) + i8values = generate_regular_range( + start, end, periods, freq._maybe_to_hours(), unit=unit + ) else: xdr = _generate_range( start=start, end=end, periods=periods, offset=freq, unit=unit diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index e76460e990e92..49003870407b6 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -35,8 +35,8 @@ from pandas._libs.tslibs.dtypes import FreqGroup from pandas._libs.tslibs.fields import isleapyear_arr from pandas._libs.tslibs.offsets import ( - Tick, Day, + Tick, delta_to_tick, ) from pandas._libs.tslibs.period import ( @@ -794,7 +794,7 @@ def _check_timedeltalike_freq_compat(self, other): ------ IncompatibleFrequency """ - assert isinstance(self.freq, Tick) # checked by calling function + assert isinstance(self.freq, (Tick, Day)) # checked by calling function dtype = np.dtype(f"m8[{self.freq._td64_unit}]") diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 1a07d7d2de571..52c07848aa18e 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -17,9 +17,9 @@ ) from pandas._libs.tslibs import ( BaseOffset, + Day, NaT, NaTType, - Day, Tick, Timedelta, astype_overflowsafe, @@ -244,11 +244,12 @@ def _from_sequence_not_strict( if isinstance(freq, Day): raise ValueError( - "Day offset object is not valid for TimedeltaIndex, pass e.g. 24H instead." + "Day offset object is not valid for TimedeltaIndex, " + "pass e.g. 24H instead." ) freq, freq_infer = dtl.maybe_infer_freq(freq) - if isinstance(freq, Day): - freq = freq.n * 24 * to_offset("H") + if freq is not None: + freq = freq._maybe_to_hours() data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit) freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer) @@ -268,8 +269,9 @@ def _from_sequence_not_strict( # Set _freq directly to bypass duplicative _validate_frequency # check. res_freq = to_offset(result.inferred_freq) - if isinstance(res_freq, Day): - res_freq = res_freq.n * 24 * to_offset("H") + if res_freq is not None: + # TODO: handle this in inferred_freq + res_freq = res_freq._maybe_to_hours() result._freq = res_freq return result @@ -284,6 +286,9 @@ def _generate_range( # type: ignore[override] if freq is None and any(x is None for x in [periods, start, end]): raise ValueError("Must provide freq argument if no data is supplied") + if isinstance(freq, Day): + raise TypeError("TimedeltaArray/Index freq must be a Tick or None") + if com.count_not_none(start, end, periods, freq) != 3: raise ValueError( "Of the four parameters: start, end, periods, " diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 4a8908b879ce0..f492151516551 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -26,9 +26,9 @@ ) from pandas._libs.tslibs import ( BaseOffset, + Day, Resolution, Tick, - Day, parsing, to_offset, ) @@ -546,16 +546,7 @@ def _intersection(self, other: Index, sort: bool = False) -> Index: # and type(result) is type(self._data) result = self._wrap_setop_result(other, result) result = result._with_freq(None)._with_freq("infer") - - # TODO: could share this with the union restore-Day code but - # at this point we have an Index here while we have a DTA/TDA theres - if isinstance(self.freq, Day) and isinstance(result.freq, Tick): - # If we infer a 24H-like freq but are D, restore "D" - td = Timedelta(result.freq) - div, mod = divmod(td.value, 24 * 3600 * 10**9) - if mod == 0: - freq = to_offset("D") * div - result._data._freq = freq + result = self._maybe_restore_day(result._data) return result else: @@ -679,16 +670,9 @@ def _union(self, other, sort): return result else: result = super()._union(other, sort)._with_freq("infer") - if isinstance(self.freq, Day) and isinstance(result.freq, Tick): - # If we infer a 24H-like freq but are D, restore "D" - td = Timedelta(result.freq) - div, mod = divmod(td.value, 24 * 3600 * 10**9) - if mod == 0: - freq = to_offset("D") * div - result._freq = freq - return result + return self._maybe_restore_day(result) - def _maybe_restore_day(self, result: DatetimeArray | TimedeltaArray) -> DatetimeArray | TimedeltaArray: + def _maybe_restore_day(self, result: _TDT) -> _TDT: if isinstance(self.freq, Day) and isinstance(result.freq, Tick): # If we infer a 24H-like freq but are D, restore "D" td = Timedelta(result.freq) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index faa1e9658fa80..ae850f0648187 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -14,6 +14,7 @@ from pandas._libs import index as libindex from pandas._libs.tslibs import ( BaseOffset, + Day, NaT, Period, Resolution, @@ -288,7 +289,7 @@ def _maybe_convert_timedelta(self, other) -> int | npt.NDArray[np.int64]: of self.freq. Note IncompatibleFrequency subclasses ValueError. """ if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)): - if isinstance(self.freq, Tick): + if isinstance(self.freq, (Tick, Day)): # _check_timedeltalike_freq_compat will raise if incompatible delta = self._data._check_timedeltalike_freq_compat(other) return delta diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 15014ed85ed23..49e0ba9e03585 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -24,7 +24,6 @@ Timedelta, Timestamp, to_offset, - Day, ) from pandas._typing import NDFrameT from pandas.compat.numpy import function as nv @@ -1775,7 +1774,6 @@ def _get_time_bins(self, ax: DatetimeIndex): ax_values = ax.asi8 binner, bin_edges = self._adjust_bin_edges(binner, ax_values) - # general version, knowing nothing about relative frequencies bins = lib.generate_bins_dt64( ax_values, bin_edges, self.closed, hasnans=ax.hasnans @@ -1835,7 +1833,8 @@ def _get_time_delta_bins(self, ax: TimedeltaIndex): freq = self.freq if isinstance(freq, Day): - # TODO: are we super-duper sure this is safe? maybe we can unify conversion earlier? + # TODO: are we super-duper sure this is safe? maybe we can unify + # conversion earlier? freq = 24 * freq.n * to_offset("H") if not len(ax): if not isinstance(freq, Tick): @@ -1849,9 +1848,7 @@ def _get_time_delta_bins(self, ax: TimedeltaIndex): if self.closed == "right": end += freq - labels = binner = timedelta_range( - start=start, end=end, freq=freq, name=ax.name - ) + labels = binner = timedelta_range(start=start, end=end, freq=freq, name=ax.name) end_stamps = labels if self.closed == "left": @@ -2024,7 +2021,7 @@ def _get_timestamp_range_edges( ------- A tuple of length 2, containing the adjusted pd.Timestamp objects. """ - if isinstance(freq, Tick): + if isinstance(freq, (Tick, Day)): index_tz = first.tz if isinstance(origin, Timestamp) and (origin.tz is None) != (index_tz is None): raise ValueError("The origin must have the same timezone as the index.") @@ -2034,6 +2031,8 @@ def _get_timestamp_range_edges( origin = Timestamp("1970-01-01", tz=index_tz) if isinstance(freq, Day): + # TODO: should we change behavior for next comment now that Day + # respects DST? # _adjust_dates_anchored assumes 'D' means 24H, but first/last # might contain a DST transition (23H, 24H, or 25H). # So "pretend" the dates are naive when adjusting the endpoints @@ -2043,7 +2042,13 @@ def _get_timestamp_range_edges( origin = origin.tz_localize(None) first, last = _adjust_dates_anchored( - first, last, freq, closed=closed, origin=origin, offset=offset, unit=unit + first, + last, + freq._maybe_to_hours(), + closed=closed, + origin=origin, + offset=offset, + unit=unit, ) if isinstance(freq, Day): first = first.tz_localize(index_tz) diff --git a/pandas/tests/arrays/timedeltas/test_constructors.py b/pandas/tests/arrays/timedeltas/test_constructors.py index 3a076a6828a98..76e9ec25e066d 100644 --- a/pandas/tests/arrays/timedeltas/test_constructors.py +++ b/pandas/tests/arrays/timedeltas/test_constructors.py @@ -23,7 +23,7 @@ def test_freq_validation(self): msg = ( "Inferred frequency None from passed values does not " - "conform to passed frequency D" + "conform to passed frequency 24H" ) with pytest.raises(ValueError, match=msg): TimedeltaArray(arr.view("timedelta64[ns]"), freq="D") diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index f13dfcd5c20bd..6946559932c88 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -302,4 +302,5 @@ def test_add_timedelta_preserves_freq(): freq="D", ) result = dti + Timedelta(days=1) - assert result.freq == dti.freq + # GH#??? with tzaware we can no longer retain "D" freq in addition + assert result.freq is None diff --git a/pandas/tests/indexes/timedeltas/test_freq_attr.py b/pandas/tests/indexes/timedeltas/test_freq_attr.py index c86d1f5e8369f..d2b5ec4254a0e 100644 --- a/pandas/tests/indexes/timedeltas/test_freq_attr.py +++ b/pandas/tests/indexes/timedeltas/test_freq_attr.py @@ -4,7 +4,6 @@ from pandas.tseries.offsets import ( DateOffset, - Day, Hour, ) @@ -32,7 +31,7 @@ def test_freq_setter_errors(self): # setting with an incompatible freq # FIXME: should probably say "48H" rather than "2D"? msg = ( - "Inferred frequency 2D from passed values does not conform to " + "Inferred frequency 48H from passed values does not conform to " "passed frequency 120H" ) with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index a2fcd18ba5bfe..508877240f7f5 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -335,9 +335,9 @@ def test_partial_setting2(self): np.random.randn(8, 4), index=dates, columns=["A", "B", "C", "D"] ) - expected = pd.concat( - [df_orig, DataFrame({"A": 7}, index=dates[-1:] + dates.freq)], sort=True - ) + exp_index = dates[-1:] + dates.freq + exp_index.freq = dates.freq + expected = pd.concat([df_orig, DataFrame({"A": 7}, index=exp_index)], sort=True) df = df_orig.copy() df.loc[dates[-1] + dates.freq, "A"] = 7 tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 3fe6e00f39683..39d4da95d8556 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1802,7 +1802,8 @@ def test_resample_equivalent_offsets(n1, freq1, n2, freq2, k, unit): ser = Series(range(len(dti)), index=dti) if freq2 == "D" and n2 % 1 != 0: - with pytest.raises(ValueError): + msg = "Invalid frequency: (0.25|0.5|0.75|1.0|1.5)D" + with pytest.raises(ValueError, match=msg): ser.resample(str(n2_) + freq2) return diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index ad9dd408fbeaf..78709fc49ec2c 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -90,7 +90,13 @@ def test_from_tick_reso(): assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value tick = offsets.Day() - assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value + msg = ( + "Value must be Timedelta, string, integer, float, timedelta or " + "convertible, not Day" + ) + with pytest.raises(ValueError, match=msg): + # TODO: should be TypeError? + Timedelta(tick) def test_construction(): diff --git a/pandas/tests/tseries/offsets/test_dst.py b/pandas/tests/tseries/offsets/test_dst.py index e00b7d0b78059..94977115e9914 100644 --- a/pandas/tests/tseries/offsets/test_dst.py +++ b/pandas/tests/tseries/offsets/test_dst.py @@ -173,7 +173,7 @@ def test_springforward_singular(self): QuarterEnd: ["11/2/2012", "12/31/2012"], BQuarterBegin: ["11/2/2012", "12/3/2012"], BQuarterEnd: ["11/2/2012", "12/31/2012"], - Day: ["11/4/2012", "11/4/2012 23:00"], + Day: ["11/4/2012", "11/5/2012"], }.items() @pytest.mark.parametrize("tup", offset_classes) From bd9a2356b141d5c7650797a6767822acceda7c94 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 10 Mar 2023 11:09:54 -0800 Subject: [PATCH 04/13] lint fixups --- doc/source/reference/offset_frequency.rst | 1 - pandas/_libs/tslibs/__init__.py | 4 ++-- pandas/_libs/tslibs/period.pyx | 7 +++++-- pandas/core/generic.py | 2 +- pandas/core/indexes/interval.py | 6 ++++-- pandas/core/indexes/timedeltas.py | 10 ++++++---- pandas/core/window/rolling.py | 2 +- pandas/tests/arithmetic/test_numeric.py | 3 ++- pandas/tests/arithmetic/test_timedelta64.py | 6 +++--- pandas/tests/indexes/timedeltas/test_formats.py | 4 +++- .../tests/indexes/timedeltas/test_timedelta_range.py | 9 ++++----- pandas/tests/resample/test_period_index.py | 4 +++- pandas/tests/scalar/timestamp/test_arithmetic.py | 5 ----- pandas/tests/tseries/offsets/test_offsets.py | 2 +- 14 files changed, 35 insertions(+), 30 deletions(-) diff --git a/doc/source/reference/offset_frequency.rst b/doc/source/reference/offset_frequency.rst index ab89fe74e7337..da88b8d42dd6c 100644 --- a/doc/source/reference/offset_frequency.rst +++ b/doc/source/reference/offset_frequency.rst @@ -1107,7 +1107,6 @@ Properties .. autosummary:: :toctree: api/ - Day.delta Day.freqstr Day.kwds Day.name diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py index 5f2e9235afa9e..39ad7a0d95122 100644 --- a/pandas/_libs/tslibs/__init__.py +++ b/pandas/_libs/tslibs/__init__.py @@ -33,7 +33,7 @@ "is_supported_unit", "npy_unit_to_abbrev", "get_supported_reso", - "Day" + "Day", ] from pandas._libs.tslibs import dtypes @@ -61,9 +61,9 @@ ) from pandas._libs.tslibs.offsets import ( BaseOffset, + Day, Tick, to_offset, - Day, ) from pandas._libs.tslibs.period import ( IncompatibleFrequency, diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index f66d73cef7b23..8898e556ae48f 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -23,8 +23,8 @@ from cpython.datetime cimport ( PyDate_Check, PyDateTime_Check, datetime, - timedelta, import_datetime, + timedelta, ) from libc.stdlib cimport ( free, @@ -106,7 +106,10 @@ from pandas._libs.tslibs.offsets cimport ( to_offset, ) -from pandas._libs.tslibs.offsets import INVALID_FREQ_ERR_MSG, Day +from pandas._libs.tslibs.offsets import ( + INVALID_FREQ_ERR_MSG, + Day, +) cdef: enum: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 21a1e344767a5..29aec2c16410f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -38,9 +38,9 @@ from pandas._libs import lib from pandas._libs.lib import is_range_indexer from pandas._libs.tslibs import ( + Day, Period, Tick, - Day, Timestamp, to_offset, ) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 5f266813503f9..43c2274bd624f 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -23,9 +23,9 @@ ) from pandas._libs.tslibs import ( BaseOffset, + Day, Timedelta, Timestamp, - Day, to_offset, ) from pandas.errors import InvalidIndexError @@ -1095,7 +1095,9 @@ def interval_range( raise ValueError( f"freq must be numeric or convertible to DateOffset, got {freq}" ) from err - if isinstance(freq, Day) and (isinstance(start, Timedelta) or isinstance(end, Timedelta)): + if isinstance(freq, Day) and ( + isinstance(start, Timedelta) or isinstance(end, Timedelta) + ): freq = freq.n * 24 * to_offset("H") # verify type compatibility diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 5d5f530ca1d4f..0436ea64d9851 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -8,10 +8,10 @@ lib, ) from pandas._libs.tslibs import ( + Day, Resolution, Timedelta, to_offset, - Day, ) from pandas.core.dtypes.common import ( @@ -303,16 +303,18 @@ def timedelta_range( >>> pd.timedelta_range("1 Day", periods=3, freq="100000D", unit="s") TimedeltaIndex(['1 days 00:00:00', '100001 days 00:00:00', '200001 days 00:00:00'], - dtype='timedelta64[s]', freq='100000D') + dtype='timedelta64[s]', freq='2400000H') """ - orig = freq if freq is None and com.any_none(periods, start, end): freq = "24H" if isinstance(freq, Day): # If a user specifically passes a Day *object* we disallow it, # but if they pass a Day-like string we'll convert it to hourly below. - raise ValueError("Passing a Day offset to timedelta_range is not allowed, pass an hourly offset instead") + raise ValueError( + "Passing a Day offset to timedelta_range is not allowed, " + "pass an hourly offset instead" + ) freq, _ = dtl.maybe_infer_freq(freq) if isinstance(freq, Day): diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 477ef10aae177..d1cf4ac9d92a8 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -23,8 +23,8 @@ from pandas._libs.tslibs import ( BaseOffset, - to_offset, Day, + to_offset, ) import pandas._libs.window.aggregations as window_aggregations from pandas.compat._optional import import_optional_dependency diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index ed3aa00828ef0..6fea2b90e3eac 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -11,6 +11,8 @@ import numpy as np import pytest +from pandas._libs.tslibs import Day + import pandas as pd from pandas import ( Index, @@ -20,7 +22,6 @@ TimedeltaIndex, array, ) -from pandas._libs.tslibs import Day import pandas._testing as tm from pandas.core import ops from pandas.core.computation import expressions as expr diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index b2374368569f8..bf44adc5c6673 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -8,6 +8,7 @@ import numpy as np import pytest +from pandas._libs.tslibs import Day from pandas.errors import ( OutOfBoundsDatetime, PerformanceWarning, @@ -33,7 +34,6 @@ assert_invalid_comparison, get_upcast_box, ) -from pandas._libs.tslibs import Day def assert_dtype(obj, expected_dtype): @@ -1009,7 +1009,7 @@ def test_td64arr_add_sub_datetimelike_scalar( ts = dt_scalar tdi = timedelta_range("1 day", periods=3) - expected = pd.date_range("2012-01-02", periods=3, tz=tz) + expected = pd.date_range("2012-01-02", periods=3, tz=tz, freq="24H") tdarr = tm.box_expected(tdi, box_with_array) expected = tm.box_expected(expected, box_with_array) @@ -1017,7 +1017,7 @@ def test_td64arr_add_sub_datetimelike_scalar( tm.assert_equal(ts + tdarr, expected) tm.assert_equal(tdarr + ts, expected) - expected2 = pd.date_range("2011-12-31", periods=3, freq="-1D", tz=tz) + expected2 = pd.date_range("2011-12-31", periods=3, freq="-24H", tz=tz) expected2 = tm.box_expected(expected2, box_with_array) tm.assert_equal(ts - tdarr, expected2) diff --git a/pandas/tests/indexes/timedeltas/test_formats.py b/pandas/tests/indexes/timedeltas/test_formats.py index 78edaefa2aab4..48c2b4e3db469 100644 --- a/pandas/tests/indexes/timedeltas/test_formats.py +++ b/pandas/tests/indexes/timedeltas/test_formats.py @@ -20,7 +20,9 @@ def test_representation(self, method): exp2 = "TimedeltaIndex(['1 days'], dtype='timedelta64[ns]', freq='24H')" - exp3 = "TimedeltaIndex(['1 days', '2 days'], dtype='timedelta64[ns]', freq='24H')" + exp3 = ( + "TimedeltaIndex(['1 days', '2 days'], dtype='timedelta64[ns]', freq='24H')" + ) exp4 = ( "TimedeltaIndex(['1 days', '2 days', '3 days'], " diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index ec80c71b2b63d..a184aa8862cd7 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -8,10 +8,7 @@ ) import pandas._testing as tm -from pandas.tseries.offsets import ( - Day, - Second, -) +from pandas.tseries.offsets import Second class TestTimedeltas: @@ -30,7 +27,9 @@ def test_timedelta_range(self): result = timedelta_range("0 days", "10 days", freq="D") tm.assert_index_equal(result, expected) - expected = to_timedelta(np.arange(5), unit="D") + Second(2) + Timedelta(hours=24) + expected = ( + to_timedelta(np.arange(5), unit="D") + Second(2) + Timedelta(hours=24) + ) result = timedelta_range("1 days, 00:00:02", "5 days, 00:00:02", freq="D") tm.assert_index_equal(result, expected) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index af7cda404af1c..6645bfc22e4b3 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -835,7 +835,9 @@ def test_resample_with_offset(self, start, end, start_freq, end_freq, offset): if end_freq == "M": # TODO: is non-tick the relevant characteristic? (GH 33815) expected.index = expected.index._with_freq(None) - elif expected.index.freq.freqstr.endswith("H") and result.index.freq.freqstr.endswith("D"): + elif expected.index.freq.freqstr.endswith( + "H" + ) and result.index.freq.freqstr.endswith("D"): # TODO: this is a kludge introduced when implementing GH#41943 bc # Tick comparison used to consider 24H==1D but no longer does. # Implement a cleaner fix somewhere. diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py index 33a81dc7bf281..0d8ac419f1228 100644 --- a/pandas/tests/scalar/timestamp/test_arithmetic.py +++ b/pandas/tests/scalar/timestamp/test_arithmetic.py @@ -40,11 +40,6 @@ def test_overflow_offset_raises(self): stamp = Timestamp("2017-01-13 00:00:00").as_unit("ns") offset_overflow = 20169940 * offsets.Day(1) - msg = ( - "the add operation between " - r"\<-?\d+ \* Days\> and \d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} " - "will overflow" - ) lmsg2 = r"Cannot cast -?20169940 days \+?00:00:00 to unit='ns' without overflow" with pytest.raises(OutOfBoundsTimedelta, match=lmsg2): diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index edc2d1e9bda57..f6894e4c15c43 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -44,7 +44,6 @@ FY5253, BaseOffset, BDay, - Day, BMonthEnd, BusinessHour, CustomBusinessDay, @@ -52,6 +51,7 @@ CustomBusinessMonthBegin, CustomBusinessMonthEnd, DateOffset, + Day, Easter, FY5253Quarter, LastWeekOfMonth, From eee39add7037565e14c506a20718cd3508ebf6ca Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 10 Mar 2023 13:06:08 -0800 Subject: [PATCH 05/13] remove extra to_hours --- pandas/_testing/asserters.py | 26 +++----------------- pandas/core/resample.py | 18 +++++++++++--- pandas/tests/resample/test_datetime_index.py | 6 +++-- pandas/tests/resample/test_resample_api.py | 2 +- 4 files changed, 22 insertions(+), 30 deletions(-) diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index d9974590f23ef..e25e8388bc4cd 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -10,11 +10,6 @@ from pandas._libs.missing import is_matching_na from pandas._libs.sparse import SparseIndex import pandas._libs.testing as _testing -from pandas._libs.tslibs import ( - Day, - Tick, - Timedelta, -) from pandas.core.dtypes.common import ( is_bool, @@ -551,7 +546,7 @@ def assert_datetime_array_equal( assert_numpy_array_equal(left._ndarray, right._ndarray, obj=f"{obj}._ndarray") if check_freq: - assert_freq_equal(left.freq, right.freq) + assert_attr_equal("freq", left, right, obj=obj) assert_attr_equal("tz", left, right, obj=obj) @@ -899,7 +894,7 @@ def assert_series_equal( if check_freq and isinstance(left.index, (DatetimeIndex, TimedeltaIndex)): lidx = left.index ridx = right.index - assert_freq_equal(lidx.freq, ridx.freq) + assert lidx.freq == ridx.freq, (lidx.freq, ridx.freq) if check_dtype: # We want to skip exact dtype checking when `check_categorical` @@ -1021,21 +1016,6 @@ def assert_series_equal( ) -def assert_freq_equal(left, right): - # TODO: sure we want to do this??? - if isinstance(left, Day): - if isinstance(right, Day): - assert left == right - elif isinstance(right, Tick): - assert right == Timedelta(days=left.n) - else: - assert left == right, (left, right) - elif isinstance(right, Day): - assert_freq_equal(right, left) - else: - assert left == right - - # This could be refactored to use the NDFrame.equals method def assert_frame_equal( left, @@ -1254,7 +1234,7 @@ def assert_equal(left, right, **kwargs) -> None: if isinstance(left, Index): assert_index_equal(left, right, **kwargs) if isinstance(left, (DatetimeIndex, TimedeltaIndex)): - assert_freq_equal(left.freq, right.freq) + assert left.freq == right.freq, (left.freq, right.freq) elif isinstance(left, Series): assert_series_equal(left, right, **kwargs) elif isinstance(left, DataFrame): diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 49e0ba9e03585..22cc296a949ec 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1278,8 +1278,6 @@ def _downsample(self, how, **kwargs): if not len(ax): # reset to the new freq freq = self.freq - if isinstance(freq, Day) and obj.index.dtype.kind == "m": - freq = freq.n * 24 * to_offset("H") if not isinstance(freq, Tick) and obj.index.dtype.kind == "m": # FIXME: wrong in the status quo! freq = None @@ -1530,6 +1528,19 @@ def get_resampler(obj: Series | DataFrame, kind=None, **kwds) -> Resampler: """ Create a TimeGrouper and return our resampler. """ + freq = kwds.get("freq", None) + if freq is not None: + # TODO: same thing in get_resampler_for_grouping? + freq = to_offset(freq) + axis = kwds.get("axis", 0) + axis = obj._get_axis_number(axis) + ax = obj.axes[axis] + if isinstance(ax, TimedeltaIndex): + # TODO: could disallow/deprecate Day _object_ while still + # allowing "D" string? + freq = freq._maybe_to_hours() + kwds["freq"] = freq + tg = TimeGrouper(**kwds) return tg._get_resampler(obj, kind=kind) @@ -2288,8 +2299,7 @@ def _asfreq_compat(index: DatetimeIndex | PeriodIndex | TimedeltaIndex, freq): elif isinstance(index, TimedeltaIndex): if freq is not None: freq = to_offset(freq) # TODO: do this earlier? - if isinstance(freq, Day): - freq = freq.n * 24 * to_offset("H") + freq = freq._maybe_to_hours() if not isinstance(freq, Tick): # FIXME: wrong in main freq = None diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index 39d4da95d8556..fda13976a6e1a 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -912,7 +912,7 @@ def test_resample_origin_epoch_with_tz_day_vs_24h(unit): result_1 = ts_1.resample("D", origin="epoch").mean() result_2 = ts_1.resample("24H", origin="epoch").mean() - tm.assert_series_equal(result_1, result_2) + tm.assert_series_equal(result_1, result_2, check_freq=False) # check that we have the same behavior with epoch even if we are not timezone aware ts_no_tz = ts_1.tz_localize(None) @@ -1809,7 +1809,9 @@ def test_resample_equivalent_offsets(n1, freq1, n2, freq2, k, unit): result1 = ser.resample(str(n1_) + freq1).mean() result2 = ser.resample(str(n2_) + freq2).mean() - tm.assert_series_equal(result1, result2) + assert result1.index.freq == str(n1_) + freq1 + assert result2.index.freq == str(n2_) + freq2 + tm.assert_series_equal(result1, result2, check_freq=False) @pytest.mark.parametrize( diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 7ce4f482b6414..4b11873c88ffa 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -733,7 +733,7 @@ def test_resample_agg_readonly(): arr.setflags(write=False) ser = Series(arr, index=index) - rs = ser.resample("1D") + rs = ser.resample("24h") expected = Series([pd.Timestamp(0), pd.Timestamp(0)], index=index[::24]) From 9d346edb6f285c86604f954c559a9bc1def04acf Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 10 Mar 2023 13:58:47 -0800 Subject: [PATCH 06/13] simplify --- .python-version | 1 + pandas/core/arrays/timedeltas.py | 3 --- pandas/core/indexes/interval.py | 7 ++----- pandas/core/indexes/timedeltas.py | 6 ++---- pandas/core/resample.py | 8 ++++---- pandas/core/window/rolling.py | 10 +++------- pandas/tests/resample/test_period_index.py | 10 ++-------- 7 files changed, 14 insertions(+), 31 deletions(-) create mode 100644 .python-version diff --git a/.python-version b/.python-version new file mode 100644 index 0000000000000..1e3345683185f --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.11.2 diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 52c07848aa18e..37201e09fb3bf 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -269,9 +269,6 @@ def _from_sequence_not_strict( # Set _freq directly to bypass duplicative _validate_frequency # check. res_freq = to_offset(result.inferred_freq) - if res_freq is not None: - # TODO: handle this in inferred_freq - res_freq = res_freq._maybe_to_hours() result._freq = res_freq return result diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 43c2274bd624f..f23a2eedd17ed 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -23,7 +23,6 @@ ) from pandas._libs.tslibs import ( BaseOffset, - Day, Timedelta, Timestamp, to_offset, @@ -1095,10 +1094,8 @@ def interval_range( raise ValueError( f"freq must be numeric or convertible to DateOffset, got {freq}" ) from err - if isinstance(freq, Day) and ( - isinstance(start, Timedelta) or isinstance(end, Timedelta) - ): - freq = freq.n * 24 * to_offset("H") + if isinstance(start, Timedelta) or isinstance(end, Timedelta): + freq = freq._maybe_to_hours() # verify type compatibility if not all( diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 0436ea64d9851..3c5042977956b 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -317,10 +317,8 @@ def timedelta_range( ) freq, _ = dtl.maybe_infer_freq(freq) - if isinstance(freq, Day): - freq = 24 * freq.n * to_offset("H") - elif isinstance(freq, Timedelta): - freq = to_offset(freq) # FIXME: shouldn't happen right? + if freq is not None: + freq = freq._maybe_to_hours() tdarr = TimedeltaArray._generate_range( start, end, periods, freq, closed=closed, unit=unit diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 22cc296a949ec..73ef67de79c9f 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1785,6 +1785,7 @@ def _get_time_bins(self, ax: DatetimeIndex): ax_values = ax.asi8 binner, bin_edges = self._adjust_bin_edges(binner, ax_values) + # general version, knowing nothing about relative frequencies bins = lib.generate_bins_dt64( ax_values, bin_edges, self.closed, hasnans=ax.hasnans @@ -1843,10 +1844,9 @@ def _get_time_delta_bins(self, ax: TimedeltaIndex): ) freq = self.freq - if isinstance(freq, Day): - # TODO: are we super-duper sure this is safe? maybe we can unify - # conversion earlier? - freq = 24 * freq.n * to_offset("H") + # TODO: are we super-duper sure this is safe? maybe we can unify + # conversion earlier? + freq = freq._maybe_to_hours() if not len(ax): if not isinstance(freq, Tick): # FIXME: this seems to be happening in the status quo diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index d1cf4ac9d92a8..70840433f1085 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -23,7 +23,6 @@ from pandas._libs.tslibs import ( BaseOffset, - Day, to_offset, ) import pandas._libs.window.aggregations as window_aggregations @@ -1778,12 +1777,9 @@ def _validate(self): self._on.freq.nanos / self._on.freq.n ) else: - if isinstance(freq, Day): - # In this context we treat Day as 24H - # TODO: will this cause trouble with tzaware cases? - self._win_freq_i8 = freq.n * 24 * 3600 * 10**9 - else: - self._win_freq_i8 = freq.nanos + # In this context we treat Day as 24H + # TODO: will this cause trouble with tzaware cases? + self._win_freq_i8 = freq._maybe_to_hours().nanos # min_periods must be an integer if self.min_periods is None: diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 6645bfc22e4b3..5361a32e36393 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -835,14 +835,8 @@ def test_resample_with_offset(self, start, end, start_freq, end_freq, offset): if end_freq == "M": # TODO: is non-tick the relevant characteristic? (GH 33815) expected.index = expected.index._with_freq(None) - elif expected.index.freq.freqstr.endswith( - "H" - ) and result.index.freq.freqstr.endswith("D"): - # TODO: this is a kludge introduced when implementing GH#41943 bc - # Tick comparison used to consider 24H==1D but no longer does. - # Implement a cleaner fix somewhere. - if expected.index.freq.n == 24 * result.index.freq.n: - expected.index.freq = result.index.freq + else: + result.index._data._freq = result.index.freq._maybe_to_hours() tm.assert_series_equal(result, expected) @pytest.mark.parametrize( From 5d905bd08d2323092d9e0bd0e20fc121ca365c45 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 10 Mar 2023 13:59:15 -0800 Subject: [PATCH 07/13] revert accidental --- .python-version | 1 - 1 file changed, 1 deletion(-) delete mode 100644 .python-version diff --git a/.python-version b/.python-version deleted file mode 100644 index 1e3345683185f..0000000000000 --- a/.python-version +++ /dev/null @@ -1 +0,0 @@ -3.11.2 From 004869d9939dc27fd0cf3671f594e2afd42b75e1 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 10 Mar 2023 16:23:37 -0800 Subject: [PATCH 08/13] mypy, docs fixups, cleanups --- doc/source/user_guide/timedeltas.rst | 2 +- pandas/_libs/tslibs/offsets.pyi | 6 +++++- pandas/_libs/tslibs/period.pyx | 4 ++-- pandas/core/arrays/datetimelike.py | 16 ++++++++-------- pandas/core/arrays/period.py | 3 +-- pandas/core/arrays/timedeltas.py | 3 +-- pandas/core/indexes/datetimelike.py | 1 + pandas/tests/arithmetic/test_numeric.py | 4 +--- pandas/tests/arithmetic/test_timedelta64.py | 5 ++--- pandas/tests/indexes/datetimes/test_misc.py | 2 +- pandas/tests/resample/test_period_index.py | 8 ++++---- 11 files changed, 27 insertions(+), 27 deletions(-) diff --git a/doc/source/user_guide/timedeltas.rst b/doc/source/user_guide/timedeltas.rst index 3a75aa0b39b1f..ae7edf87ad7ad 100644 --- a/doc/source/user_guide/timedeltas.rst +++ b/doc/source/user_guide/timedeltas.rst @@ -63,7 +63,7 @@ Further, operations among the scalars yield another scalar ``Timedelta``. .. ipython:: python - pd.Timedelta(pd.offsets.Day(2)) + pd.Timedelta(pd.offsets.Second(2)) + pd.Timedelta( + pd.Timedelta(pd.offsets.Hour(48)) + pd.Timedelta(pd.offsets.Second(2)) + pd.Timedelta( "00:00:00.000123" ) diff --git a/pandas/_libs/tslibs/offsets.pyi b/pandas/_libs/tslibs/offsets.pyi index f1aca4717665c..9571630d7d966 100644 --- a/pandas/_libs/tslibs/offsets.pyi +++ b/pandas/_libs/tslibs/offsets.pyi @@ -89,6 +89,7 @@ class BaseOffset: @property def nanos(self) -> int: ... def is_anchored(self) -> bool: ... + def _maybe_to_hours(self) -> BaseOffset: ... def _get_offset(name: str) -> BaseOffset: ... @@ -113,10 +114,13 @@ class Tick(SingleConstructorOffset): def delta(self) -> Timedelta: ... @property def nanos(self) -> int: ... + def _maybe_to_hours(self) -> Tick: ... def delta_to_tick(delta: timedelta) -> Tick: ... -class Day(Tick): ... +class Day(Tick): + def _maybe_to_hours(self) -> Hour: ... + class Hour(Tick): ... class Minute(Tick): ... class Second(Tick): ... diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 8898e556ae48f..618007dd8a75b 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -24,7 +24,6 @@ from cpython.datetime cimport ( PyDateTime_Check, datetime, import_datetime, - timedelta, ) from libc.stdlib cimport ( free, @@ -1749,7 +1748,8 @@ cdef class _Period(PeriodMixin): return NaT if isinstance(other, Day): - other = timedelta(days=other.n) + # Periods are timezone-naive, so we treat Day as Tick-like + other = np.timedelta64(other.n, "D") try: inc = delta_to_nanoseconds(other, reso=self.freq._creso, round_ok=False) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 07578f75a015a..2e482f91fd662 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -376,8 +376,6 @@ def __getitem__( # At this point we know the result is an array. result = cast(DatetimeLikeArrayT, result) result._freq = self._get_getitem_freq(key) - if self.dtype.kind == "m" and result._freq is not None: - assert isinstance(result._freq, Tick) return result def _get_getitem_freq(self, key) -> BaseOffset | None: @@ -893,6 +891,8 @@ def inferred_freq(self) -> str | None: except ValueError: return None if self.dtype.kind == "m" and res is not None and res.endswith("D"): + # TimedeltaArray freq must be a Tick, so we convert the inferred + # daily freq to hourly. if res == "D": return "24H" res = str(int(res[:-1]) * 24) + "H" @@ -1471,7 +1471,7 @@ def __iadd__(self: DatetimeLikeArrayT, other) -> DatetimeLikeArrayT: if not is_period_dtype(self.dtype): # restore freq, which is invalidated by setitem - self.freq = result.freq + self._freq = result.freq return self def __isub__(self: DatetimeLikeArrayT, other) -> DatetimeLikeArrayT: @@ -1480,7 +1480,7 @@ def __isub__(self: DatetimeLikeArrayT, other) -> DatetimeLikeArrayT: if not is_period_dtype(self.dtype): # restore freq, which is invalidated by setitem - self.freq = result.freq + self._freq = result.freq return self # -------------------------------------------------------------- @@ -1869,7 +1869,7 @@ def __init__( freq = freq._maybe_to_hours() NDArrayBacked.__init__(self, values=values, dtype=dtype) - self.freq = freq + self._freq = freq if inferred_freq is None and freq is not None: type(self)._validate_frequency(self, freq) @@ -1890,16 +1890,16 @@ def freq(self, value) -> None: if value is not None: value = to_offset(value) self._validate_frequency(self, value) + if self.dtype.kind == "m" and not isinstance(value, Tick): + raise TypeError("TimedeltaArray/Index freq must be a Tick or None") if self.ndim > 1: raise ValueError("Cannot set freq with ndim > 1") - if self.dtype.kind == "m": - assert value is None or isinstance(value, Tick) self._freq = value @classmethod - def _validate_frequency(cls, index, freq, **kwargs): + def _validate_frequency(cls, index, freq: BaseOffset, **kwargs): """ Validate that a frequency is compatible with the values of a given Datetime Array/Index or Timedelta Array/Index diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 49003870407b6..8592d770a572d 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -700,8 +700,7 @@ def _add_offset(self, other: BaseOffset): assert not isinstance(other, Tick) if isinstance(other, Day): - other = Timedelta(days=other.n) - return self + other + return self + np.timedelta64(other.n, "D") self._require_matching_freq(other, base=True) return self._addsub_int_array_or_scalar(other.n, operator.add) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 37201e09fb3bf..3c1e68654b11d 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -268,8 +268,7 @@ def _from_sequence_not_strict( elif freq_infer: # Set _freq directly to bypass duplicative _validate_frequency # check. - res_freq = to_offset(result.inferred_freq) - result._freq = res_freq + result._freq = to_offset(result.inferred_freq) return result diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index f492151516551..2fa155122378a 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -411,6 +411,7 @@ class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, ABC): """ _data: DatetimeArray | TimedeltaArray + _freq: BaseOffset | None _comparables = ["name", "freq"] _attributes = ["name", "freq"] diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 6fea2b90e3eac..c21936e6525e8 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -11,8 +11,6 @@ import numpy as np import pytest -from pandas._libs.tslibs import Day - import pandas as pd from pandas import ( Index, @@ -266,7 +264,7 @@ def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box_with_array index = tm.box_expected(index, box) expected = tm.box_expected(expected, box) - if isinstance(three_days, Day): + if isinstance(three_days, pd.offsets.Day): # GH#41943 Day is no longer timedelta-like msg = "unsupported operand type" with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index bf44adc5c6673..3a3e8570bb591 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -8,7 +8,6 @@ import numpy as np import pytest -from pandas._libs.tslibs import Day from pandas.errors import ( OutOfBoundsDatetime, PerformanceWarning, @@ -1822,7 +1821,7 @@ def test_td64arr_mod_tdscalar(self, box_with_array, three_days): expected = TimedeltaIndex(["1 Day", "2 Days", "0 Days"] * 3) expected = tm.box_expected(expected, box_with_array) - if isinstance(three_days, Day): + if isinstance(three_days, offsets.Day): msg = "unsupported operand type" with pytest.raises(TypeError, match=msg): tdarr % three_days @@ -1875,7 +1874,7 @@ def test_td64arr_rmod_tdscalar(self, box_with_array, three_days): expected = TimedeltaIndex(expected) expected = tm.box_expected(expected, box_with_array) - if isinstance(three_days, Day): + if isinstance(three_days, offsets.Day): msg = "Cannot divide Day by TimedeltaArray" with pytest.raises(TypeError, match=msg): three_days % tdarr diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 6946559932c88..004d8d546093b 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -295,6 +295,7 @@ def test_iter_readonly(): def test_add_timedelta_preserves_freq(): # GH#37295 should hold for any DTI with freq=None or Tick freq + # GH#51874 changed this, with tzaware we can no longer retain "D" in addition tz = "Canada/Eastern" dti = date_range( start=Timestamp("2019-03-26 00:00:00-0400", tz=tz), @@ -302,5 +303,4 @@ def test_add_timedelta_preserves_freq(): freq="D", ) result = dti + Timedelta(days=1) - # GH#??? with tzaware we can no longer retain "D" freq in addition assert result.freq is None diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 5361a32e36393..cecf1cc66e257 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -826,12 +826,12 @@ def test_resample_with_only_nat(self): ) def test_resample_with_offset(self, start, end, start_freq, end_freq, offset): # GH 23882 & 31809 - pi = period_range(start, end, freq=start_freq) - ser = Series(np.arange(len(pi)), index=pi) - result = ser.resample(end_freq, offset=offset).mean() + s = Series(0, index=period_range(start, end, freq=start_freq)) + s = s + np.arange(len(s)) + result = s.resample(end_freq, offset=offset).mean() result = result.to_timestamp(end_freq) - expected = ser.to_timestamp().resample(end_freq, offset=offset).mean() + expected = s.to_timestamp().resample(end_freq, offset=offset).mean() if end_freq == "M": # TODO: is non-tick the relevant characteristic? (GH 33815) expected.index = expected.index._with_freq(None) From 2c9ffca0535f7fa0b9c7e64e26eedd6f60f29ee3 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 10 Mar 2023 17:42:01 -0800 Subject: [PATCH 09/13] remove unnecessary check --- pandas/core/arrays/datetimelike.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 2e482f91fd662..20a7f2071884d 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -2096,9 +2096,6 @@ def _with_freq(self, freq): # As an internal method, we can ensure this assertion always holds assert freq == "infer" freq = to_offset(self.inferred_freq) - if freq is not None and self.dtype.kind == "m": - # TODO: handle this in self.inferred_freq? - freq = freq._maybe_to_hours() if self.dtype.kind == "m" and freq is not None: assert isinstance(freq, Tick) From d1851cff782b456cfec247ec3d548028d570517b Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 11 Mar 2023 14:10:24 -0800 Subject: [PATCH 10/13] CLN: unnecessary pass --- pandas/core/arrays/datetimelike.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 20a7f2071884d..eac9ed7284aa0 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -2091,7 +2091,6 @@ def _with_freq(self, freq): # is a Tick offset. if self.dtype.kind == "m" and not isinstance(freq, Tick): raise ValueError("TimedeltaIndex/Array freq must be a Tick") - pass else: # As an internal method, we can ensure this assertion always holds assert freq == "infer" From 8e89064c89839e6db4985f00a0ad57ccae801746 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 13 Mar 2023 14:48:57 -0700 Subject: [PATCH 11/13] Fix doctest --- pandas/_libs/tslibs/offsets.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index d5d92926d8ba5..c8eb2b6f0d95b 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -4156,7 +4156,7 @@ cpdef to_offset(freq): <2 * BusinessDays> >>> to_offset(pd.Timedelta(days=1)) - <24Hour> + <24 * Hours> >>> to_offset(pd.offsets.Hour()) From 69e654be1d6ad4a7685a45f162c9a19162dad980 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 16 Mar 2023 10:42:05 -0700 Subject: [PATCH 12/13] mypy fixup --- pandas/core/indexes/datetimelike.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 37799d4a39316..37b3ae7c0984d 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -670,7 +670,7 @@ def _union(self, other, sort): result = super()._union(other, sort)._with_freq("infer") return self._maybe_restore_day(result) - def _maybe_restore_day(self, result: _TDT) -> _TDT: + def _maybe_restore_day(self, result: Self) -> Self: if isinstance(self.freq, Day) and isinstance(result.freq, Tick): # If we infer a 24H-like freq but are D, restore "D" td = Timedelta(result.freq) From 976d84b38b20beb12ca84e100fc0fc14c10b61b0 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 17 Mar 2023 10:12:42 -0700 Subject: [PATCH 13/13] troubleshoot docstring validation --- pandas/_libs/tslibs/offsets.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index b1eb41a17459b..f8fb01d5c0da8 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1092,7 +1092,6 @@ cdef class Tick(SingleConstructorOffset): cdef class Day(SingleConstructorOffset): _adjust_dst = True _attributes = tuple(["n", "normalize"]) - rule_code = "D" # used by parse_time_string _prefix = "D" _td64_unit = "D" _period_dtype_code = PeriodDtypeCode.D