From 891374a60a9ba20c6cfbe3317037d6f11e5098a5 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 8 Jan 2018 13:55:17 -0800 Subject: [PATCH 01/10] remove EndMixin since it is only used in one place; make apply_index_days add timedelta64[ns] array instead of int64 array --- pandas/_libs/tslibs/offsets.pyx | 21 --------------------- pandas/tseries/offsets.py | 24 ++++++++++++++++++++---- 2 files changed, 20 insertions(+), 25 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 585c904a601ed..2ea3c0a846fb7 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -290,27 +290,6 @@ class CacheableOffset(object): _cacheable = True -class EndMixin(object): - # helper for vectorized offsets - - def _end_apply_index(self, i, freq): - """Offsets index to end of Period frequency""" - - off = i.to_perioddelta('D') - - base, mult = get_freq_code(freq) - base_period = i.to_period(base) - if self.n > 0: - # when adding, dates on end roll to next - roll = np.where(base_period.to_timestamp(how='end') == i - off, - self.n, self.n - 1) - else: - roll = self.n - - base = (base_period + roll).to_timestamp(how='end') - return base + off - - # --------------------------------------------------------------------- # Base Classes diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 7c5fe2f0314e4..ef38e8cacea43 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -27,7 +27,6 @@ apply_index_wraps, roll_yearday, shift_month, - EndMixin, BaseOffset) @@ -1226,7 +1225,8 @@ def _get_roll(self, i, before_day_of_month, after_day_of_month): return roll def _apply_index_days(self, i, roll): - i += (roll % 2) * Timedelta(days=self.day_of_month).value + nanos = (roll % 2) * Timedelta(days=self.day_of_month).value + i += nanos.astype('timedelta64[ns]') return i + Timedelta(days=-1) @@ -1271,13 +1271,14 @@ def _get_roll(self, i, before_day_of_month, after_day_of_month): return roll def _apply_index_days(self, i, roll): - return i + (roll % 2) * Timedelta(days=self.day_of_month - 1).value + nanos = (roll % 2) * Timedelta(days=self.day_of_month - 1).value + return i + nanos.astype('timedelta64[ns]') # --------------------------------------------------------------------- # Week-Based Offset Classes -class Week(EndMixin, DateOffset): +class Week(DateOffset): """ Weekly offset @@ -1327,6 +1328,21 @@ def apply_index(self, i): else: return self._end_apply_index(i, self.freqstr) + def _end_apply_index(self, i, freq): + """Offsets index to end of Period frequency""" + off = i.to_perioddelta('D') + + base_period = i.to_period('W') + if self.n > 0: + # when adding, dates on end roll to next + roll = np.where(base_period.to_timestamp(how='end') == i - off, + self.n, self.n - 1) + else: + roll = self.n + + base = (base_period + roll).to_timestamp(how='end') + return base + off + def onOffset(self, dt): if self.normalize and not _is_normalized(dt): return False From 1b7d11abdf00d1bc2d7ab9d1c00f9e1f8622a676 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 8 Jan 2018 13:59:49 -0800 Subject: [PATCH 02/10] make default kwargs valid --- pandas/tseries/offsets.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index ef38e8cacea43..a818ece2c0c4a 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1328,14 +1328,15 @@ def apply_index(self, i): else: return self._end_apply_index(i, self.freqstr) - def _end_apply_index(self, i, freq): + def _end_apply_index(self, dtindex, freq): """Offsets index to end of Period frequency""" - off = i.to_perioddelta('D') + off = dtindex.to_perioddelta('D') - base_period = i.to_period('W') + base_period = dtindex.to_period('W') if self.n > 0: # when adding, dates on end roll to next - roll = np.where(base_period.to_timestamp(how='end') == i - off, + normed = dtindex - off + roll = np.where(base_period.to_timestamp(how='end') == normed, self.n, self.n - 1) else: roll = self.n @@ -1396,9 +1397,9 @@ class WeekOfMonth(_WeekOfMonthMixin, DateOffset): Parameters ---------- n : int - week : {0, 1, 2, 3, ...}, default None + week : {0, 1, 2, 3, ...}, default 0 0 is 1st week of month, 1 2nd week, etc. - weekday : {0, 1, ..., 6}, default None + weekday : {0, 1, ..., 6}, default 0 0: Mondays 1: Tuesdays 2: Wednesdays @@ -1410,7 +1411,7 @@ class WeekOfMonth(_WeekOfMonthMixin, DateOffset): _prefix = 'WOM' _adjust_dst = True - def __init__(self, n=1, normalize=False, week=None, weekday=None): + def __init__(self, n=1, normalize=False, week=0, weekday=0): self.n = self._validate_n(n) self.normalize = normalize self.weekday = weekday @@ -1473,7 +1474,7 @@ class LastWeekOfMonth(_WeekOfMonthMixin, DateOffset): Parameters ---------- n : int, default 1 - weekday : {0, 1, ..., 6}, default None + weekday : {0, 1, ..., 6}, default 0 0: Mondays 1: Tuesdays 2: Wednesdays @@ -1486,7 +1487,7 @@ class LastWeekOfMonth(_WeekOfMonthMixin, DateOffset): _prefix = 'LWOM' _adjust_dst = True - def __init__(self, n=1, normalize=False, weekday=None): + def __init__(self, n=1, normalize=False, weekday=0): self.n = self._validate_n(n) self.normalize = normalize self.weekday = weekday From cd080261d465b379b09b6c0a750aabe64ab79781 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 8 Jan 2018 14:03:34 -0800 Subject: [PATCH 03/10] use roll_convention where possible --- pandas/_libs/tslibs/offsets.pyx | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 2ea3c0a846fb7..16767a788d02c 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -654,11 +654,8 @@ def shift_months(int64_t[:] dtindex, int months, object day=None): months_to_roll = months compare_day = get_firstbday(dts.year, dts.month) - if months_to_roll > 0 and dts.day < compare_day: - months_to_roll -= 1 - elif months_to_roll <= 0 and dts.day > compare_day: - # as if rolled forward already - months_to_roll += 1 + months_to_roll = roll_convention(dts.day, months_to_roll, + compare_day) dts.year = year_add_months(dts, months_to_roll) dts.month = month_add_months(dts, months_to_roll) @@ -677,11 +674,8 @@ def shift_months(int64_t[:] dtindex, int months, object day=None): months_to_roll = months compare_day = get_lastbday(dts.year, dts.month) - if months_to_roll > 0 and dts.day < compare_day: - months_to_roll -= 1 - elif months_to_roll <= 0 and dts.day > compare_day: - # as if rolled forward already - months_to_roll += 1 + months_to_roll = roll_convention(dts.day, months_to_roll, + compare_day) dts.year = year_add_months(dts, months_to_roll) dts.month = month_add_months(dts, months_to_roll) From 38415c4938b5ca1ad6f57d8a7337368fd4f2f517 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 8 Jan 2018 14:13:09 -0800 Subject: [PATCH 04/10] make roll_convention nogil --- pandas/_libs/tslibs/offsets.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 16767a788d02c..5f16c15403512 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -796,7 +796,7 @@ cpdef int get_day_of_month(datetime other, day_opt) except? -1: raise ValueError(day_opt) -cpdef int roll_convention(int other, int n, int compare): +cpdef int roll_convention(int other, int n, int compare) nogil: """ Possibly increment or decrement the number of periods to shift based on rollforward/rollbackward conventions. From 11aa1090e8dfba538583065426525753a445280e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 10 Jan 2018 12:00:55 -0800 Subject: [PATCH 05/10] fix unhelpful pytest info, arg for to_period --- pandas/tests/indexes/datetimes/test_arithmetic.py | 9 +++++---- pandas/tseries/offsets.py | 9 +++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_arithmetic.py b/pandas/tests/indexes/datetimes/test_arithmetic.py index fb804266259dc..011b33a4d6f35 100644 --- a/pandas/tests/indexes/datetimes/test_arithmetic.py +++ b/pandas/tests/indexes/datetimes/test_arithmetic.py @@ -495,7 +495,7 @@ def test_dt64_with_DateOffsets_relativedelta(klass, assert_func): assert_func(klass([x - op for x in vec]), vec - op) -@pytest.mark.parametrize('cls_name', [ +@pytest.mark.parametrize('cls_and_kwargs', [ 'YearBegin', ('YearBegin', {'month': 5}), 'YearEnd', ('YearEnd', {'month': 5}), 'MonthBegin', 'MonthEnd', @@ -518,7 +518,7 @@ def test_dt64_with_DateOffsets_relativedelta(klass, assert_func): @pytest.mark.parametrize('klass,assert_func', [ (Series, tm.assert_series_equal), (DatetimeIndex, tm.assert_index_equal)]) -def test_dt64_with_DateOffsets(klass, assert_func, normalize, cls_name): +def test_dt64_with_DateOffsets(klass, assert_func, normalize, cls_and_kwargs): # GH#10699 # assert these are equal on a piecewise basis vec = klass([Timestamp('2000-01-05 00:15:00'), @@ -530,11 +530,12 @@ def test_dt64_with_DateOffsets(klass, assert_func, normalize, cls_name): Timestamp('2000-05-15'), Timestamp('2001-06-15')]) - if isinstance(cls_name, tuple): + if isinstance(cls_and_kwargs, tuple): # If cls_name param is a tuple, then 2nd entry is kwargs for # the offset constructor - cls_name, kwargs = cls_name + cls_name, kwargs = cls_and_kwargs else: + cls_name = cls_and_kwargs kwargs = {} offset_cls = getattr(pd.offsets, cls_name) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 6b9da83a12ca1..5884ade1c544a 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -16,7 +16,7 @@ from pandas._libs import tslib, Timestamp, OutOfBoundsDatetime, Timedelta from pandas.util._decorators import cache_readonly -from pandas._libs.tslibs import ccalendar +from pandas._libs.tslibs import ccalendar, frequencies as libfrequencies from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds import pandas._libs.tslibs.offsets as liboffsets from pandas._libs.tslibs.offsets import ( @@ -1333,13 +1333,14 @@ def apply_index(self, i): return ((i.to_period('W') + self.n).to_timestamp() + i.to_perioddelta('W')) else: - return self._end_apply_index(i, self.freqstr) + return self._end_apply_index(i) - def _end_apply_index(self, dtindex, freq): + def _end_apply_index(self, dtindex): """Offsets index to end of Period frequency""" off = dtindex.to_perioddelta('D') - base_period = dtindex.to_period('W') + base, mult = libfrequencies.get_freq_code(self.freqstr) + base_period = dtindex.to_period(base) if self.n > 0: # when adding, dates on end roll to next normed = dtindex - off From 39e00909947842c9b6c15d36e3df4b4356c40d44 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 10 Jan 2018 18:00:48 -0800 Subject: [PATCH 06/10] docstrings --- pandas/tseries/offsets.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 5884ade1c544a..d7a88f8a1bf26 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1232,6 +1232,17 @@ def _get_roll(self, i, before_day_of_month, after_day_of_month): return roll def _apply_index_days(self, i, roll): + """Add days portion of offset to DatetimeIndex i + + Parameters + ---------- + i : DatetimeIndex + roll : ndarray[int64_t] + + Returns + ------- + result : DatetimeIndex + """ nanos = (roll % 2) * Timedelta(days=self.day_of_month).value i += nanos.astype('timedelta64[ns]') return i + Timedelta(days=-1) @@ -1278,6 +1289,17 @@ def _get_roll(self, i, before_day_of_month, after_day_of_month): return roll def _apply_index_days(self, i, roll): + """Add days portion of offset to DatetimeIndex i + + Parameters + ---------- + i : DatetimeIndex + roll : ndarray[int64_t] + + Returns + ------- + result : DatetimeIndex + """ nanos = (roll % 2) * Timedelta(days=self.day_of_month - 1).value return i + nanos.astype('timedelta64[ns]') From baa07b14b804852c013c8e76abe8db369ecea22c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 10 Jan 2018 18:02:42 -0800 Subject: [PATCH 07/10] docstring --- pandas/tseries/offsets.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index d7a88f8a1bf26..e6b9f66c094c1 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1358,7 +1358,17 @@ def apply_index(self, i): return self._end_apply_index(i) def _end_apply_index(self, dtindex): - """Offsets index to end of Period frequency""" + """Add self to the given DatetimeIndex, specialized for case where + self.weekday is non-null. + + Parameters + ---------- + dtindex : DatetimeIndex + + Returns + ------- + result : DatetimeIndex + """ off = dtindex.to_perioddelta('D') base, mult = libfrequencies.get_freq_code(self.freqstr) From 4a1c28cdf5164d1b4ec60eddcd49c8b5e14a6649 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 10 Jan 2018 18:06:23 -0800 Subject: [PATCH 08/10] smoke test that default kwargs are valid --- pandas/tests/tseries/offsets/test_offsets.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 23e627aeba017..b086884ecd250 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -3087,6 +3087,13 @@ def test_get_offset_day_error(): DateOffset()._get_offset_day(datetime.now()) +def test_valid_default_arguments(offset_types): + # GH#19142 check that the calling the constructors without passing + # any keyword arguments produce valid offsets + cls = offset_types + cls() + + @pytest.mark.parametrize('kwd', sorted(list(liboffsets.relativedelta_kwds))) def test_valid_month_attributes(kwd, month_classes): # GH#18226 From 665867990ae95eab6fe721776508f158a3379a4c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 11 Jan 2018 10:35:08 -0800 Subject: [PATCH 09/10] whatsnew note --- doc/source/whatsnew/v0.23.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 92eeed89ada2a..456b104a2652d 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -473,3 +473,4 @@ Other - Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`) - :func:`Timestamp.replace` will now handle Daylight Savings transitions gracefully (:issue:`18319`) +- Bug in :class:`WeekOfMonth` and :class:`LastWeekOfMonth` where default keyword arguments for constructor raised ``ValueError`` (:issue:`19142`) From 39b66b55c0adb4d7cf2244accb117d6716b8fcf2 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 11 Jan 2018 17:18:37 -0800 Subject: [PATCH 10/10] move whatsnew to conversion --- doc/source/whatsnew/v0.23.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index dff6ccdfb6b43..6f0ee0a2f4a51 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -381,6 +381,7 @@ Conversion - Fixed bug where comparing :class:`DatetimeIndex` failed to raise ``TypeError`` when attempting to compare timezone-aware and timezone-naive datetimelike objects (:issue:`18162`) - Bug in :class:`DatetimeIndex` where the repr was not showing high-precision time values at the end of a day (e.g., 23:59:59.999999999) (:issue:`19030`) - Bug where dividing a scalar timedelta-like object with :class:`TimedeltaIndex` performed the reciprocal operation (:issue:`19125`) +- Bug in :class:`WeekOfMonth` and :class:`LastWeekOfMonth` where default keyword arguments for constructor raised ``ValueError`` (:issue:`19142`) - Indexing @@ -474,4 +475,3 @@ Other - Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`) - :func:`Timestamp.replace` will now handle Daylight Savings transitions gracefully (:issue:`18319`) -- Bug in :class:`WeekOfMonth` and :class:`LastWeekOfMonth` where default keyword arguments for constructor raised ``ValueError`` (:issue:`19142`)