From 9c1bae402d4c302a033da7159096d9df288506b0 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Fri, 11 Oct 2013 22:31:46 -0400 Subject: [PATCH] CLN/ENH: Stop instantiating all offsets on load. Use a more flexible lookup based on prefix then delegate creation to class' `from_name()` method. Totally get rid of hasOffsetName (not necessary). Finally, make some inheriting go on to simplify things. --- doc/source/release.rst | 4 + pandas/tseries/frequencies.py | 207 ++------------- pandas/tseries/offsets.py | 384 ++++++++++++++++----------- pandas/tseries/tests/test_offsets.py | 65 +++-- 4 files changed, 300 insertions(+), 360 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index b74b1f9252709..9be06d60f0f16 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -394,6 +394,10 @@ See :ref:`Internal Refactoring` bs4/lxml (:issue:`4770`). - Removed the ``keep_internal`` keyword parameter in ``pandas/core/groupby.py`` because it wasn't being used (:issue:`5102`). + - Base ``DateOffsets`` are no longer all instantiated on importing pandas, + instead they are generated and cached on the fly. The internal + representation and handling of DateOffsets has also been clarified. + (:issue:`5189`, related :issue:`5004`) .. _release.bug_fixes-0.13.0: diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index d1fd51c073f83..4878ebfccf915 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -12,7 +12,6 @@ import pandas.core.common as com import pandas.lib as lib import pandas.tslib as tslib -from pandas import _np_version_under1p7 class FreqGroup(object): @@ -125,162 +124,15 @@ def _get_freq_str(base, mult=1): MonthEnd, BMonthBegin, BMonthEnd, QuarterBegin, QuarterEnd, BQuarterBegin, BQuarterEnd, YearBegin, YearEnd, - BYearBegin, BYearEnd, + BYearBegin, BYearEnd, _make_offset ) try: cday = CDay() except NotImplementedError: cday = None -_offset_map = { - 'D': Day(), - 'C': cday, - 'B': BDay(), - 'H': Hour(), - 'T': Minute(), - 'S': Second(), - 'L': Milli(), - 'U': Micro(), - None: None, - - # Monthly - Calendar - 'M': MonthEnd(), - 'MS': MonthBegin(), - - # Monthly - Business - 'BM': BMonthEnd(), - 'BMS': BMonthBegin(), - - # Annual - Calendar - 'A-JAN': YearEnd(month=1), - 'A-FEB': YearEnd(month=2), - 'A-MAR': YearEnd(month=3), - 'A-APR': YearEnd(month=4), - 'A-MAY': YearEnd(month=5), - 'A-JUN': YearEnd(month=6), - 'A-JUL': YearEnd(month=7), - 'A-AUG': YearEnd(month=8), - 'A-SEP': YearEnd(month=9), - 'A-OCT': YearEnd(month=10), - 'A-NOV': YearEnd(month=11), - 'A-DEC': YearEnd(month=12), - - # Annual - Calendar (start) - 'AS-JAN': YearBegin(month=1), - 'AS-FEB': YearBegin(month=2), - 'AS-MAR': YearBegin(month=3), - 'AS-APR': YearBegin(month=4), - 'AS-MAY': YearBegin(month=5), - 'AS-JUN': YearBegin(month=6), - 'AS-JUL': YearBegin(month=7), - 'AS-AUG': YearBegin(month=8), - 'AS-SEP': YearBegin(month=9), - 'AS-OCT': YearBegin(month=10), - 'AS-NOV': YearBegin(month=11), - 'AS-DEC': YearBegin(month=12), - - # Annual - Business - 'BA-JAN': BYearEnd(month=1), - 'BA-FEB': BYearEnd(month=2), - 'BA-MAR': BYearEnd(month=3), - 'BA-APR': BYearEnd(month=4), - 'BA-MAY': BYearEnd(month=5), - 'BA-JUN': BYearEnd(month=6), - 'BA-JUL': BYearEnd(month=7), - 'BA-AUG': BYearEnd(month=8), - 'BA-SEP': BYearEnd(month=9), - 'BA-OCT': BYearEnd(month=10), - 'BA-NOV': BYearEnd(month=11), - 'BA-DEC': BYearEnd(month=12), - - # Annual - Business (Start) - 'BAS-JAN': BYearBegin(month=1), - 'BAS-FEB': BYearBegin(month=2), - 'BAS-MAR': BYearBegin(month=3), - 'BAS-APR': BYearBegin(month=4), - 'BAS-MAY': BYearBegin(month=5), - 'BAS-JUN': BYearBegin(month=6), - 'BAS-JUL': BYearBegin(month=7), - 'BAS-AUG': BYearBegin(month=8), - 'BAS-SEP': BYearBegin(month=9), - 'BAS-OCT': BYearBegin(month=10), - 'BAS-NOV': BYearBegin(month=11), - 'BAS-DEC': BYearBegin(month=12), - - # Quarterly - Calendar - # 'Q' : QuarterEnd(startingMonth=3), - 'Q-JAN': QuarterEnd(startingMonth=1), - 'Q-FEB': QuarterEnd(startingMonth=2), - 'Q-MAR': QuarterEnd(startingMonth=3), - 'Q-APR': QuarterEnd(startingMonth=4), - 'Q-MAY': QuarterEnd(startingMonth=5), - 'Q-JUN': QuarterEnd(startingMonth=6), - 'Q-JUL': QuarterEnd(startingMonth=7), - 'Q-AUG': QuarterEnd(startingMonth=8), - 'Q-SEP': QuarterEnd(startingMonth=9), - 'Q-OCT': QuarterEnd(startingMonth=10), - 'Q-NOV': QuarterEnd(startingMonth=11), - 'Q-DEC': QuarterEnd(startingMonth=12), - - # Quarterly - Calendar (Start) - 'QS': QuarterBegin(startingMonth=1), - 'QS-JAN': QuarterBegin(startingMonth=1), - 'QS-FEB': QuarterBegin(startingMonth=2), - 'QS-MAR': QuarterBegin(startingMonth=3), - 'QS-APR': QuarterBegin(startingMonth=4), - 'QS-MAY': QuarterBegin(startingMonth=5), - 'QS-JUN': QuarterBegin(startingMonth=6), - 'QS-JUL': QuarterBegin(startingMonth=7), - 'QS-AUG': QuarterBegin(startingMonth=8), - 'QS-SEP': QuarterBegin(startingMonth=9), - 'QS-OCT': QuarterBegin(startingMonth=10), - 'QS-NOV': QuarterBegin(startingMonth=11), - 'QS-DEC': QuarterBegin(startingMonth=12), - - # Quarterly - Business - 'BQ-JAN': BQuarterEnd(startingMonth=1), - 'BQ-FEB': BQuarterEnd(startingMonth=2), - 'BQ-MAR': BQuarterEnd(startingMonth=3), - - 'BQ': BQuarterEnd(startingMonth=12), - 'BQ-APR': BQuarterEnd(startingMonth=4), - 'BQ-MAY': BQuarterEnd(startingMonth=5), - 'BQ-JUN': BQuarterEnd(startingMonth=6), - 'BQ-JUL': BQuarterEnd(startingMonth=7), - 'BQ-AUG': BQuarterEnd(startingMonth=8), - 'BQ-SEP': BQuarterEnd(startingMonth=9), - 'BQ-OCT': BQuarterEnd(startingMonth=10), - 'BQ-NOV': BQuarterEnd(startingMonth=11), - 'BQ-DEC': BQuarterEnd(startingMonth=12), - - # Quarterly - Business (Start) - 'BQS-JAN': BQuarterBegin(startingMonth=1), - 'BQS': BQuarterBegin(startingMonth=1), - 'BQS-FEB': BQuarterBegin(startingMonth=2), - 'BQS-MAR': BQuarterBegin(startingMonth=3), - 'BQS-APR': BQuarterBegin(startingMonth=4), - 'BQS-MAY': BQuarterBegin(startingMonth=5), - 'BQS-JUN': BQuarterBegin(startingMonth=6), - 'BQS-JUL': BQuarterBegin(startingMonth=7), - 'BQS-AUG': BQuarterBegin(startingMonth=8), - 'BQS-SEP': BQuarterBegin(startingMonth=9), - 'BQS-OCT': BQuarterBegin(startingMonth=10), - 'BQS-NOV': BQuarterBegin(startingMonth=11), - 'BQS-DEC': BQuarterBegin(startingMonth=12), - - # Weekly - 'W-MON': Week(weekday=0), - 'W-TUE': Week(weekday=1), - 'W-WED': Week(weekday=2), - 'W-THU': Week(weekday=3), - 'W-FRI': Week(weekday=4), - 'W-SAT': Week(weekday=5), - 'W-SUN': Week(weekday=6), - -} - -if not _np_version_under1p7: - _offset_map['N'] = Nano() +#: cache of previously seen offsets +_offset_map = {} _offset_to_period_map = { 'WEEKDAY': 'D', @@ -386,15 +238,6 @@ def get_period_alias(offset_str): _legacy_reverse_map = dict((v, k) for k, v in reversed(sorted(compat.iteritems(_rule_aliases)))) -# for helping out with pretty-printing and name-lookups - -_offset_names = {} -for name, offset in compat.iteritems(_offset_map): - if offset is None: - continue - offset.name = name - _offset_names[offset] = name - def inferTimeRule(index): from pandas.tseries.index import DatetimeIndex @@ -513,22 +356,21 @@ def get_offset(name): else: if name in _rule_aliases: name = _rule_aliases[name] - - offset = _offset_map.get(name) - - if offset is not None: - return offset - else: - raise ValueError('Bad rule name requested: %s.' % name) + try: + if name not in _offset_map: + # generate and cache offset + offset = _make_offset(name) + _offset_map[name] = offset + return _offset_map[name] + except (ValueError, TypeError, KeyError): + # bad prefix or suffix + pass + raise ValueError('Bad rule name requested: %s.' % name) getOffset = get_offset -def hasOffsetName(offset): - return offset in _offset_names - - def get_offset_name(offset): """ Return rule name associated with a DateOffset object @@ -537,11 +379,18 @@ def get_offset_name(offset): -------- get_offset_name(BMonthEnd(1)) --> 'EOM' """ - name = _offset_names.get(offset) - - if name is not None: - return name - else: + if offset is None: + raise ValueError("Offset can't be none!") + # Hack because this is what it did before... + if isinstance(offset, BDay): + if offset.n != 1: + raise ValueError('Bad rule given: %s.' % 'BusinessDays') + else: + return offset.rule_code + try: + return offset.freqstr + except AttributeError: + # Bad offset, give useful error. raise ValueError('Bad rule given: %s.' % offset) @@ -549,7 +398,7 @@ def get_legacy_offset_name(offset): """ Return the pre pandas 0.8.0 name for the date offset """ - name = _offset_names.get(offset) + name = offset.name return _legacy_reverse_map.get(name, name) get_offset_name = get_offset_name @@ -652,7 +501,7 @@ def _period_alias_dictionary(): L_aliases = ["L", "MS", "MILLISECOND", "MILLISECONDLY"] U_aliases = ["U", "US", "MICROSECOND", "MICROSECONDLY"] N_aliases = ["N", "NS", "NANOSECOND", "NANOSECONDLY"] - + for k in M_aliases: alias_dict[k] = 'M' @@ -679,7 +528,7 @@ def _period_alias_dictionary(): for k in N_aliases: alias_dict[k] = 'N' - + A_prefixes = ["A", "Y", "ANN", "ANNUAL", "ANNUALLY", "YR", "YEAR", "YEARLY"] diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 309b6fbb9a51a..a9488f74cb65a 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -8,7 +8,6 @@ # import after tools, dateutil check from dateutil.relativedelta import relativedelta import pandas.tslib as tslib -import numpy as np from pandas import _np_version_under1p7 __all__ = ['Day', 'BusinessDay', 'BDay', 'CustomBusinessDay', 'CDay', @@ -21,6 +20,7 @@ #---------------------------------------------------------------------- # DateOffset + class ApplyTypeError(TypeError): # sentinel class for catching the apply error to return NotImplemented pass @@ -108,7 +108,7 @@ def _should_cache(self): def _params(self): attrs = [(k, v) for k, v in compat.iteritems(vars(self)) if k not in ['kwds', '_offset', 'name', 'normalize', - 'busdaycalendar']] + 'busdaycalendar', '_named']] attrs.extend(list(self.kwds.items())) attrs = sorted(set(attrs)) @@ -116,15 +116,14 @@ def _params(self): return params def __repr__(self): - if hasattr(self, 'name') and len(self.name): - return self.name - + if hasattr(self, '_named'): + return self._named className = getattr(self, '_outputName', type(self).__name__) exclude = set(['n', 'inc']) attrs = [] for attr in sorted(self.__dict__): if ((attr == 'kwds' and len(self.kwds) == 0) - or attr.startswith('_')): + or attr.startswith('_')): continue elif attr == 'kwds': kwds_new = {} @@ -152,6 +151,13 @@ def __repr__(self): out += '>' return out + @property + def name(self): + if hasattr(self, '_named'): + return self._named + else: + return self.rule_code + def __eq__(self, other): if other is None: return False @@ -234,9 +240,14 @@ def onOffset(self, dt): b = ((dt + self) - self) return a == b + # way to get around weirdness with rule_code + @property + def _prefix(self): + raise NotImplementedError('Prefix not defined') + @property def rule_code(self): - raise NotImplementedError + return self._prefix @property def freqstr(self): @@ -253,10 +264,20 @@ def freqstr(self): return fstr -class BusinessDay(CacheableOffset, DateOffset): +class SingleConstructorOffset(DateOffset): + @classmethod + def _from_name(cls, suffix=None): + # default _from_name calls cls with no args + if suffix: + raise ValueError("Bad freq suffix %s" % suffix) + return cls() + + +class BusinessDay(CacheableOffset, SingleConstructorOffset): """ DateOffset subclass representing possibly n business days """ + _prefix = 'B' def __init__(self, n=1, **kwds): self.n = int(n) @@ -264,14 +285,12 @@ def __init__(self, n=1, **kwds): self.offset = kwds.get('offset', timedelta(0)) self.normalize = kwds.get('normalize', False) - @property - def rule_code(self): - return 'B' - - def __repr__(self): #TODO: Figure out if this should be merged into DateOffset - if hasattr(self, 'name') and len(self.name): - return self.name - + # TODO: Combine this with DateOffset by defining a whitelisted set of + # attributes on each object rather than the existing behavior of iterating + # over internal ``__dict__`` + def __repr__(self): + if hasattr(self, '_named'): + return self._named className = getattr(self, '_outputName', self.__class__.__name__) attrs = [] @@ -411,6 +430,7 @@ class CustomBusinessDay(BusinessDay): """ _cacheable = False + _prefix = 'C' def __init__(self, n=1, **kwds): # Check we have the required numpy version @@ -450,10 +470,6 @@ def __setstate__(self, state): self.__dict__ = state self._set_busdaycalendar() - @property - def rule_code(self): - return 'C' - @staticmethod def _to_dt64(dt, dtype='datetime64'): if isinstance(dt, (datetime, compat.string_types)): @@ -503,11 +519,21 @@ def onOffset(self, dt): return np.is_busday(day64, busdaycal=self.busdaycalendar) -class MonthEnd(CacheableOffset, DateOffset): +class MonthOffset(SingleConstructorOffset): + @property + def name(self): + if self.isAnchored: + return self.rule_code + else: + return "%s-%s" % (self.rule_code, _int_to_month[self.n]) + + +class MonthEnd(CacheableOffset, MonthOffset): """DateOffset of one month end""" def apply(self, other): - other = datetime(other.year, other.month, other.day, tzinfo=other.tzinfo) + other = datetime(other.year, other.month, other.day, + tzinfo=other.tzinfo) n = self.n _, days_in_month = tslib.monthrange(other.year, other.month) @@ -523,12 +549,10 @@ def onOffset(cls, dt): days_in_month = tslib.monthrange(dt.year, dt.month)[1] return dt.day == days_in_month - @property - def rule_code(self): - return 'M' + _prefix = 'M' -class MonthBegin(CacheableOffset, DateOffset): +class MonthBegin(CacheableOffset, MonthOffset): """DateOffset of one month at beginning""" def apply(self, other): @@ -544,12 +568,10 @@ def apply(self, other): def onOffset(cls, dt): return dt.day == 1 - @property - def rule_code(self): - return 'MS' + _prefix = 'MS' -class BusinessMonthEnd(CacheableOffset, DateOffset): +class BusinessMonthEnd(CacheableOffset, MonthOffset): """DateOffset increments between business EOM dates""" def isAnchored(self): @@ -574,12 +596,10 @@ def apply(self, other): other = other - BDay() return other - @property - def rule_code(self): - return 'BM' + _prefix = 'BM' -class BusinessMonthBegin(CacheableOffset, DateOffset): +class BusinessMonthBegin(CacheableOffset, MonthOffset): """DateOffset of one business month at beginning""" def apply(self, other): @@ -611,9 +631,7 @@ def onOffset(cls, dt): else: return dt.day == 1 - @property - def rule_code(self): - return 'BMS' + _prefix = 'BMS' class Week(CacheableOffset, DateOffset): @@ -665,15 +683,25 @@ def apply(self, other): def onOffset(self, dt): return dt.weekday() == self.weekday + _prefix = 'W' + @property def rule_code(self): suffix = '' if self.weekday is not None: - suffix = '-%s' % (_weekday_dict[self.weekday]) - return 'W' + suffix + suffix = '-%s' % (_int_to_weekday[self.weekday]) + return self._prefix + suffix + + @classmethod + def _from_name(cls, suffix=None): + if not suffix: + weekday = None + else: + weekday = _weekday_to_int[suffix] + return cls(weekday=weekday) -_weekday_dict = { +_int_to_weekday = { 0: 'MON', 1: 'TUE', 2: 'WED', @@ -683,6 +711,8 @@ def rule_code(self): 6: 'SUN' } +_weekday_to_int = dict((v, k) for k, v in _int_to_weekday.items()) + class WeekOfMonth(CacheableOffset, DateOffset): """ @@ -736,7 +766,8 @@ def apply(self, other): else: months = self.n + 1 - return self.getOffsetOfMonth(other + relativedelta(months=months, day=1)) + return self.getOffsetOfMonth(other + relativedelta(months=months, + day=1)) def getOffsetOfMonth(self, dt): w = Week(weekday=self.weekday) @@ -754,27 +785,69 @@ def onOffset(self, dt): @property def rule_code(self): - suffix = '-%d%s' % (self.week + 1, _weekday_dict.get(self.weekday, '')) - return 'WOM' + suffix - + return '%s-%d%s' % (self._prefix, self.week + 1, + _int_to_weekday.get(self.weekday, '')) -class BQuarterEnd(CacheableOffset, DateOffset): - """DateOffset increments between business Quarter dates - startingMonth = 1 corresponds to dates like 1/31/2007, 4/30/2007, ... - startingMonth = 2 corresponds to dates like 2/28/2007, 5/31/2007, ... - startingMonth = 3 corresponds to dates like 3/30/2007, 6/29/2007, ... - """ - _outputName = 'BusinessQuarterEnd' + _prefix = 'WOM' + @classmethod + def _from_name(cls, suffix=None): + if not suffix: + raise ValueError("Prefix %r requires a suffix." % (cls._prefix)) + # TODO: handle n here... + # only one digit weeks (1 --> week 0, 2 --> week 1, etc.) + week = int(suffix[0]) - 1 + weekday = _weekday_to_int[suffix[1:]] + return cls(week=week, weekday=weekday) + + +class QuarterOffset(DateOffset): + """Quarter representation - doesn't call super""" + + #: default month for __init__ + _default_startingMonth = None + #: default month in _from_name + _from_name_startingMonth = None + + # TODO: Consider combining QuarterOffset and YearOffset __init__ at some + # point def __init__(self, n=1, **kwds): self.n = n - self.startingMonth = kwds.get('startingMonth', 3) + self.startingMonth = kwds.get('startingMonth', + self._default_startingMonth) self.kwds = kwds def isAnchored(self): return (self.n == 1 and self.startingMonth is not None) + @classmethod + def _from_name(cls, suffix=None): + kwargs = {} + if suffix: + kwargs['startingMonth'] = _month_to_int[suffix] + else: + if cls._from_name_startingMonth is not None: + kwargs['startingMonth'] = cls._from_name_startingMonth + return cls(**kwargs) + + @property + def rule_code(self): + return '%s-%s' % (self._prefix, _int_to_month[self.startingMonth]) + + +class BQuarterEnd(CacheableOffset, QuarterOffset): + """DateOffset increments between business Quarter dates + startingMonth = 1 corresponds to dates like 1/31/2007, 4/30/2007, ... + startingMonth = 2 corresponds to dates like 2/28/2007, 5/31/2007, ... + startingMonth = 3 corresponds to dates like 3/30/2007, 6/29/2007, ... + """ + _outputName = 'BusinessQuarterEnd' + _default_startingMonth = 3 + # 'BQ' + _from_name_startingMonth = 12 + _prefix = 'BQ' + def apply(self, other): n = self.n @@ -802,13 +875,8 @@ def onOffset(self, dt): modMonth = (dt.month - self.startingMonth) % 3 return BMonthEnd().onOffset(dt) and modMonth == 0 - @property - def rule_code(self): - suffix = '-%s' % _month_dict[self.startingMonth] - return 'BQ' + suffix - -_month_dict = { +_int_to_month = { 1: 'JAN', 2: 'FEB', 3: 'MAR', @@ -823,18 +891,16 @@ def rule_code(self): 12: 'DEC' } +_month_to_int = dict((v, k) for k, v in _int_to_month.items()) -class BQuarterBegin(CacheableOffset, DateOffset): - _outputName = "BusinessQuarterBegin" - - def __init__(self, n=1, **kwds): - self.n = n - self.startingMonth = kwds.get('startingMonth', 3) - self.kwds = kwds - - def isAnchored(self): - return (self.n == 1 and self.startingMonth is not None) +# TODO: This is basically the same as BQuarterEnd +class BQuarterBegin(CacheableOffset, QuarterOffset): + _outputName = "BusinessQuarterBegin" + # I suspect this is wrong for *all* of them. + _default_startingMonth = 3 + _from_name_startingMonth = 1 + _prefix = 'BQS' def apply(self, other): n = self.n @@ -864,19 +930,16 @@ def apply(self, other): other.microsecond) return result - @property - def rule_code(self): - suffix = '-%s' % _month_dict[self.startingMonth] - return 'BQS' + suffix - -class QuarterEnd(CacheableOffset, DateOffset): +class QuarterEnd(CacheableOffset, QuarterOffset): """DateOffset increments between business Quarter dates startingMonth = 1 corresponds to dates like 1/31/2007, 4/30/2007, ... startingMonth = 2 corresponds to dates like 2/28/2007, 5/31/2007, ... startingMonth = 3 corresponds to dates like 3/31/2007, 6/30/2007, ... """ _outputName = 'QuarterEnd' + _default_startingMonth = 3 + _prefix = 'Q' def __init__(self, n=1, **kwds): self.n = n @@ -907,20 +970,12 @@ def onOffset(self, dt): modMonth = (dt.month - self.startingMonth) % 3 return MonthEnd().onOffset(dt) and modMonth == 0 - @property - def rule_code(self): - suffix = '-%s' % _month_dict[self.startingMonth] - return 'Q' + suffix - -class QuarterBegin(CacheableOffset, DateOffset): +class QuarterBegin(CacheableOffset, QuarterOffset): _outputName = 'QuarterBegin' - - def __init__(self, n=1, **kwds): - self.n = n - self.startingMonth = kwds.get('startingMonth', 3) - - self.kwds = kwds + _default_startingMonth = 3 + _from_name_startingMonth = 1 + _prefix = 'QS' def isAnchored(self): return (self.n == 1 and self.startingMonth is not None) @@ -943,24 +998,36 @@ def apply(self, other): other = other + relativedelta(months=3 * n - monthsSince, day=1) return other - @property - def rule_code(self): - suffix = '-%s' % _month_dict[self.startingMonth] - return 'QS' + suffix - -class BYearEnd(CacheableOffset, DateOffset): - """DateOffset increments between business EOM dates""" - _outputName = 'BusinessYearEnd' +class YearOffset(DateOffset): + """DateOffset that just needs a month""" def __init__(self, n=1, **kwds): - self.month = kwds.get('month', 12) + self.month = kwds.get('month', self._default_month) if self.month < 1 or self.month > 12: raise ValueError('Month must go from 1 to 12') DateOffset.__init__(self, n=n, **kwds) + @classmethod + def _from_name(cls, suffix=None): + kwargs = {} + if suffix: + kwargs['month'] = _month_to_int[suffix] + return cls(**kwargs) + + @property + def rule_code(self): + return '%s-%s' % (self._prefix, _int_to_month[self.month]) + + +class BYearEnd(CacheableOffset, YearOffset): + """DateOffset increments between business EOM dates""" + _outputName = 'BusinessYearEnd' + _default_month = 12 + _prefix = 'BA' + def apply(self, other): n = self.n @@ -990,23 +1057,12 @@ def apply(self, other): return result - @property - def rule_code(self): - suffix = '-%s' % _month_dict[self.month] - return 'BA' + suffix - -class BYearBegin(CacheableOffset, DateOffset): +class BYearBegin(CacheableOffset, YearOffset): """DateOffset increments between business year begin dates""" _outputName = 'BusinessYearBegin' - - def __init__(self, n=1, **kwds): - self.month = kwds.get('month', 1) - - if self.month < 1 or self.month > 12: - raise ValueError('Month must go from 1 to 12') - - DateOffset.__init__(self, n=n, **kwds) + _default_month = 1 + _prefix = 'BAS' def apply(self, other): n = self.n @@ -1032,22 +1088,11 @@ def apply(self, other): first = _get_firstbday(wkday) return datetime(other.year, self.month, first) - @property - def rule_code(self): - suffix = '-%s' % _month_dict[self.month] - return 'BAS' + suffix - -class YearEnd(CacheableOffset, DateOffset): +class YearEnd(CacheableOffset, YearOffset): """DateOffset increments between calendar year ends""" - - def __init__(self, n=1, **kwds): - self.month = kwds.get('month', 12) - - if self.month < 1 or self.month > 12: - raise ValueError('Month must go from 1 to 12') - - DateOffset.__init__(self, n=n, **kwds) + _default_month = 12 + _prefix = 'A' def apply(self, other): def _increment(date): @@ -1074,8 +1119,8 @@ def _decrement(date): date.microsecond) def _rollf(date): - if (date.month != self.month or - date.day < tslib.monthrange(date.year, date.month)[1]): + if date.month != self.month or\ + date.day < tslib.monthrange(date.year, date.month)[1]: date = _increment(date) return date @@ -1099,22 +1144,11 @@ def onOffset(self, dt): wkday, days_in_month = tslib.monthrange(dt.year, self.month) return self.month == dt.month and dt.day == days_in_month - @property - def rule_code(self): - suffix = '-%s' % _month_dict[self.month] - return 'A' + suffix - -class YearBegin(CacheableOffset, DateOffset): +class YearBegin(CacheableOffset, YearOffset): """DateOffset increments between calendar year begin dates""" - - def __init__(self, n=1, **kwds): - self.month = kwds.get('month', 1) - - if self.month < 1 or self.month > 12: - raise ValueError('Month must go from 1 to 12') - - DateOffset.__init__(self, n=n, **kwds) + _default_month = 1 + _prefix = 'AS' def apply(self, other): def _increment(date): @@ -1127,7 +1161,7 @@ def _increment(date): def _decrement(date): year = date.year if date.month < self.month or (date.month == self.month and - date.day == 1): + date.day == 1): year -= 1 return datetime(year, self.month, 1, date.hour, date.minute, date.second, date.microsecond) @@ -1156,11 +1190,6 @@ def _rollf(date): def onOffset(self, dt): return dt.month == self.month and dt.day == 1 - @property - def rule_code(self): - suffix = '-%s' % _month_dict[self.month] - return 'AS' + suffix - #---------------------------------------------------------------------- # Ticks @@ -1175,7 +1204,7 @@ def f(self, other): return f -class Tick(DateOffset): +class Tick(SingleConstructorOffset): _inc = timedelta(microseconds=1000) __gt__ = _tick_comp(operator.gt) @@ -1242,11 +1271,7 @@ def apply(self, other): else: raise ApplyTypeError('Unhandled type: %s' % type(other).__name__) - _rule_base = 'undefined' - - @property - def rule_code(self): - return self._rule_base + _prefix = 'undefined' def isAnchored(self): return False @@ -1287,36 +1312,36 @@ def _delta_to_nanoseconds(delta): class Day(CacheableOffset, Tick): _inc = timedelta(1) - _rule_base = 'D' + _prefix = 'D' class Hour(Tick): _inc = timedelta(0, 3600) - _rule_base = 'H' + _prefix = 'H' class Minute(Tick): _inc = timedelta(0, 60) - _rule_base = 'T' + _prefix = 'T' class Second(Tick): _inc = timedelta(0, 1) - _rule_base = 'S' + _prefix = 'S' class Milli(Tick): - _rule_base = 'L' + _prefix = 'L' class Micro(Tick): _inc = timedelta(microseconds=1) - _rule_base = 'U' + _prefix = 'U' class Nano(Tick): _inc = np.timedelta64(1, 'ns') if not _np_version_under1p7 else 1 - _rule_base = 'N' + _prefix = 'N' BDay = BusinessDay @@ -1402,3 +1427,46 @@ def generate_range(start=None, end=None, periods=None, if next_date <= cur: raise ValueError('Offset %s did not increment date' % offset) cur = next_date + +prefix_mapping = dict((offset._prefix, offset) for offset in [ + YearBegin, # 'AS' + YearEnd, # 'A' + BYearBegin, # 'BAS' + BYearEnd, # 'BA' + BusinessDay, # 'B' + BusinessMonthBegin, # 'BMS' + BusinessMonthEnd, # 'BM' + BQuarterEnd, # 'BQ' + BQuarterBegin, # 'BQS' + CustomBusinessDay, # 'C' + MonthEnd, # 'M' + MonthBegin, # 'MS' + Week, # 'W' + Second, # 'S' + Minute, # 'T' + Micro, # 'U' + QuarterEnd, # 'Q' + QuarterBegin, # 'QS' + Milli, # 'L' + Hour, # 'H' + Day, # 'D' + WeekOfMonth, # 'WOM' +]) + +if not _np_version_under1p7: + # Only 1.7+ supports nanosecond resolution + prefix_mapping['N'] = Nano + + +def _make_offset(key): + """Gets offset based on key. KeyError if prefix is bad, ValueError if + suffix is bad. All handled by `get_offset` in tseries/frequencies. Not + public.""" + if key is None: + return None + split = key.replace('@', '-').split('-') + klass = prefix_mapping[split[0]] + # handles case where there's no suffix (and will TypeError if too many '-') + obj = klass._from_name(*split[1:]) + obj._named = key + return obj diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tseries/tests/test_offsets.py index 0f7a356e84664..8592a2c2d8d9c 100644 --- a/pandas/tseries/tests/test_offsets.py +++ b/pandas/tseries/tests/test_offsets.py @@ -13,7 +13,7 @@ DateOffset, Week, YearBegin, YearEnd, Hour, Minute, Second, Day, Micro, Milli, Nano, WeekOfMonth, format, ole2datetime, QuarterEnd, to_datetime, normalize_date, - get_offset, get_offset_name, hasOffsetName, get_standard_freq) + get_offset, get_offset_name, get_standard_freq) from pandas.tseries.frequencies import _offset_map from pandas.tseries.index import _to_m8, DatetimeIndex, _daterange_cache @@ -99,6 +99,7 @@ class TestDateOffset(unittest.TestCase): def setUp(self): self.d = Timestamp(datetime(2008, 1, 2)) + _offset_map.clear() def test_repr(self): repr(DateOffset()) @@ -1747,11 +1748,6 @@ def test_compare_ticks(): assert(kls(3) != kls(4)) -def test_hasOffsetName(): - assert hasOffsetName(BDay()) - assert not hasOffsetName(BDay(2)) - - def test_get_offset_name(): assertRaisesRegexp(ValueError, 'Bad rule.*BusinessDays', get_offset_name, BDay(2)) @@ -1766,17 +1762,17 @@ def test_get_offset_name(): def test_get_offset(): assertRaisesRegexp(ValueError, "rule.*GIBBERISH", get_offset, 'gibberish') + assertRaisesRegexp(ValueError, "rule.*QS-JAN-B", get_offset, 'QS-JAN-B') + pairs = [('B', BDay()), ('b', BDay()), ('bm', BMonthEnd()), + ('Bm', BMonthEnd()), ('W-MON', Week(weekday=0)), + ('W-TUE', Week(weekday=1)), ('W-WED', Week(weekday=2)), + ('W-THU', Week(weekday=3)), ('W-FRI', Week(weekday=4)), + ('w@Sat', Week(weekday=5))] - assert get_offset('B') == BDay() - assert get_offset('b') == BDay() - assert get_offset('bm') == BMonthEnd() - assert get_offset('Bm') == BMonthEnd() - assert get_offset('W-MON') == Week(weekday=0) - assert get_offset('W-TUE') == Week(weekday=1) - assert get_offset('W-WED') == Week(weekday=2) - assert get_offset('W-THU') == Week(weekday=3) - assert get_offset('W-FRI') == Week(weekday=4) - assert get_offset('w@Sat') == Week(weekday=5) + for name, expected in pairs: + offset = get_offset(name) + assert offset == expected, ("Expected %r to yield %r (actual: %r)" % + (name, expected, offset)) def test_parse_time_string(): @@ -1813,7 +1809,7 @@ def test_quarterly_dont_normalize(): class TestOffsetAliases(unittest.TestCase): def setUp(self): - pass + _offset_map.clear() def test_alias_equality(self): for k, v in compat.iteritems(_offset_map): @@ -1824,15 +1820,17 @@ def test_alias_equality(self): def test_rule_code(self): lst = ['M', 'MS', 'BM', 'BMS', 'D', 'B', 'H', 'T', 'S', 'L', 'U'] for k in lst: - assert k == _offset_map[k].rule_code - assert k == (_offset_map[k] * 3).rule_code + self.assertEqual(k, get_offset(k).rule_code) + # should be cached - this is kind of an internals test... + assert k in _offset_map + self.assertEqual(k, (get_offset(k) * 3).rule_code) suffix_lst = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'] base = 'W' for v in suffix_lst: alias = '-'.join([base, v]) - assert alias == _offset_map[alias].rule_code - assert alias == (_offset_map[alias] * 5).rule_code + self.assertEqual(alias, get_offset(alias).rule_code) + self.assertEqual(alias, (get_offset(alias) * 5).rule_code) suffix_lst = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC'] @@ -1840,8 +1838,8 @@ def test_rule_code(self): for base in base_lst: for v in suffix_lst: alias = '-'.join([base, v]) - assert alias == _offset_map[alias].rule_code - assert alias == (_offset_map[alias] * 5).rule_code + self.assertEqual(alias, get_offset(alias).rule_code) + self.assertEqual(alias, (get_offset(alias) * 5).rule_code) def test_apply_ticks(): @@ -1900,6 +1898,7 @@ def test_all_cacheableoffsets(self): def setUp(self): _daterange_cache.clear() + _offset_map.clear() def run_X_index_creation(self, cls): inst1 = cls() @@ -1927,6 +1926,26 @@ def test_week_of_month_index_creation(self): self.assertTrue(inst2 in _daterange_cache) +class TestReprNames(unittest.TestCase): + def test_str_for_named_is_name(self): + # look at all the amazing combinations! + month_prefixes = ['A', 'AS', 'BA', 'BAS', 'Q', 'BQ', 'BQS', 'QS'] + names = [prefix + '-' + month for prefix in month_prefixes + for month in ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', + 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC']] + days = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN'] + names += ['W-' + day for day in days] + names += ['WOM-' + week + day for week in ('1', '2', '3', '4') + for day in days] + #singletons + names += ['S', 'T', 'U', 'BM', 'BMS', 'BQ', 'QS'] # No 'Q' + _offset_map.clear() + for name in names: + offset = get_offset(name) + self.assertEqual(repr(offset), name) + self.assertEqual(str(offset), name) + + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False)