-
-
Notifications
You must be signed in to change notification settings - Fork 18.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ENH: Cleanup backend for Offsets and Period #5148
Changes from all commits
4069b8a
780ad84
44b26cc
44a6ab1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -71,7 +71,7 @@ def get_freq(freq): | |
return freq | ||
|
||
|
||
def get_freq_code(freqstr): | ||
def get_freq_code(freqstr, as_periodstr=False): | ||
""" | ||
|
||
Parameters | ||
|
@@ -81,7 +81,13 @@ def get_freq_code(freqstr): | |
------- | ||
""" | ||
if isinstance(freqstr, DateOffset): | ||
freqstr = (get_offset_name(freqstr), freqstr.n) | ||
freqstr_raw = get_offset_name(freqstr) | ||
|
||
#if we can, convert to canonical period str | ||
if as_periodstr: | ||
freqstr_raw = get_period_alias(freqstr_raw) | ||
|
||
freqstr = (freqstr_raw, freqstr.n) | ||
|
||
if isinstance(freqstr, tuple): | ||
if (com.is_integer(freqstr[0]) and | ||
|
@@ -113,7 +119,7 @@ def _get_freq_str(base, mult=1): | |
code = _reverse_period_code_map.get(base) | ||
if mult == 1: | ||
return code | ||
return str(mult) + code | ||
return "%s%s" % (mult, code) | ||
|
||
|
||
#---------------------------------------------------------------------- | ||
|
@@ -157,6 +163,7 @@ def _get_freq_str(base, mult=1): | |
'H': 'H', | ||
'Q': 'Q', | ||
'A': 'A', | ||
'Y': 'A', | ||
'W': 'W', | ||
'M': 'M' | ||
} | ||
|
@@ -202,6 +209,9 @@ def get_period_alias(offset_str): | |
'Q@FEB': 'BQ-FEB', | ||
'Q@MAR': 'BQ-MAR', | ||
'Q': 'Q-DEC', | ||
'QS': 'QS-JAN', | ||
'BQ': 'BQ-DEC', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are these new for Period? need to add to docs? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought there was an issue for these, maybe not. I'll make sure to mention these in the docs. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are to canonicalize the offsets s.t if
Adding these aliases fixes that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cancan101 I'm +1 on equality, +0 on making names the same and -1 on making sure they are the same offset. It could happen to be that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree with your votes here. I wasn't suggesting making the offsets
|
||
'BQS': 'BQS-JAN', | ||
|
||
'A': 'A-DEC', # YearEnd(month=12), | ||
'AS': 'AS-JAN', # YearBegin(month=1), | ||
|
@@ -387,19 +397,44 @@ def get_legacy_offset_name(offset): | |
name = offset.name | ||
return _legacy_reverse_map.get(name, name) | ||
|
||
def get_standard_freq(freq): | ||
def get_standard_freq(freq, as_periodstr=False): | ||
""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you put an example inte docstring |
||
Return the standardized frequency string | ||
Return the standardized frequency string. | ||
as_periodstr=True returns the string representing the period rather than | ||
the frequency. An example when these may differ is MonthBegin. | ||
MonthBegin and MonthEnd are two different frequencies but they define the | ||
same period. | ||
|
||
>>> get_standard_freq(pandas.tseries.offsets.MonthBegin(), as_periodstr=False) | ||
'L' | ||
>>> get_standard_freq(pandas.tseries.offsets.MonthEnd(), as_periodstr=False) | ||
'M' | ||
>>> get_standard_freq(pandas.tseries.offsets.MonthBegin(), as_periodstr=True) | ||
'M' | ||
>>> get_standard_freq(pandas.tseries.offsets.MonthEnd(), as_periodstr=True) | ||
'M' | ||
""" | ||
if freq is None: | ||
return None | ||
|
||
if isinstance(freq, DateOffset): | ||
return get_offset_name(freq) | ||
code, stride = get_freq_code(freq, as_periodstr=as_periodstr) | ||
|
||
code, stride = get_freq_code(freq) | ||
return _get_freq_str(code, stride) | ||
|
||
def _get_standard_period_freq_impl(freq): | ||
return get_standard_freq(freq, as_periodstr=True) | ||
|
||
def get_standard_period_freq(freq): | ||
if isinstance(freq, DateOffset): | ||
return freq.periodstr | ||
|
||
return _get_standard_period_freq_impl(freq) | ||
|
||
def _assert_mult_1(mult): | ||
if mult != 1: | ||
# TODO: Better error message - this is slightly confusing | ||
raise ValueError('Only mult == 1 supported') | ||
|
||
#---------------------------------------------------------------------- | ||
# Period codes | ||
|
||
|
@@ -629,7 +664,7 @@ def infer_freq(index, warn=True): | |
|
||
Returns | ||
------- | ||
freq : string or None | ||
freq : DateOffset object or None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so does this no longer accept a string at all? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you mean by "accept"? I changed the return type. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, you're right, read too quickly |
||
None if no discernible frequency | ||
TypeError if the index is not datetime-like | ||
""" | ||
|
@@ -650,7 +685,28 @@ def infer_freq(index, warn=True): | |
|
||
index = pd.DatetimeIndex(index) | ||
inferer = _FrequencyInferer(index, warn=warn) | ||
return inferer.get_freq() | ||
return to_offset(inferer.get_freq()) | ||
|
||
|
||
def infer_freqstr(index, warn=True): | ||
""" | ||
Infer the most likely frequency given the input index. If the frequency is | ||
uncertain, a warning will be printed | ||
|
||
Parameters | ||
---------- | ||
index : DatetimeIndex | ||
if passed a Series will use the values of the series (NOT THE INDEX) | ||
warn : boolean, default True | ||
|
||
Returns | ||
------- | ||
freq : string or None | ||
None if no discernible frequency | ||
TypeError if the index is not datetime-like | ||
""" | ||
return infer_freq(index, warn).freqstr | ||
|
||
|
||
_ONE_MICRO = long(1000) | ||
_ONE_MILLI = _ONE_MICRO * 1000 | ||
|
@@ -887,9 +943,11 @@ def is_subperiod(source, target): | |
------- | ||
is_subperiod : boolean | ||
""" | ||
source_raw = source | ||
if isinstance(source, offsets.DateOffset): | ||
source = source.rule_code | ||
|
||
target_raw = target | ||
if isinstance(target, offsets.DateOffset): | ||
target = target.rule_code | ||
|
||
|
@@ -918,6 +976,12 @@ def is_subperiod(source, target): | |
return source in ['T', 'S'] | ||
elif target == 'S': | ||
return source in ['S'] | ||
elif isinstance(source_raw, offsets._NonCythonPeriod): | ||
return source_raw.is_subperiod(target_raw) | ||
elif isinstance(target_raw, offsets._NonCythonPeriod): | ||
return target_raw.is_superperiod(source_raw) | ||
else: | ||
return False | ||
|
||
|
||
def is_superperiod(source, target): | ||
|
@@ -936,9 +1000,11 @@ def is_superperiod(source, target): | |
------- | ||
is_superperiod : boolean | ||
""" | ||
source_raw = source | ||
if isinstance(source, offsets.DateOffset): | ||
source = source.rule_code | ||
|
||
target_raw = target | ||
if isinstance(target, offsets.DateOffset): | ||
target = target.rule_code | ||
|
||
|
@@ -971,6 +1037,12 @@ def is_superperiod(source, target): | |
return target in ['T', 'S'] | ||
elif source == 'S': | ||
return target in ['S'] | ||
elif isinstance(source_raw, offsets._NonCythonPeriod): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do instance checks as opposed to a try/except that catches a Also, can we add an else:
return False Just would be a little clearer here. |
||
return source_raw.is_superperiod(target_raw) | ||
elif isinstance(target_raw, offsets._NonCythonPeriod): | ||
return target_raw.is_subperiod(source_raw) | ||
else: | ||
return False | ||
|
||
|
||
def _get_rule_month(source, default='DEC'): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,7 +14,7 @@ | |
from pandas.compat import u | ||
from pandas.tseries.frequencies import ( | ||
infer_freq, to_offset, get_period_alias, | ||
Resolution, get_reso_string, get_offset) | ||
Resolution, get_reso_string, get_offset, infer_freqstr) | ||
from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay | ||
from pandas.tseries.tools import parse_time_string, normalize_date | ||
from pandas.util.decorators import cache_readonly | ||
|
@@ -792,8 +792,8 @@ def to_period(self, freq=None): | |
msg = "You must pass a freq argument as current index has none." | ||
raise ValueError(msg) | ||
|
||
if freq is None: | ||
freq = get_period_alias(self.freqstr) | ||
if freq is None: # No reason no convert to str; keep w/e freq is | ||
freq = self.freq | ||
|
||
return PeriodIndex(self.values, freq=freq, tz=self.tz) | ||
|
||
|
@@ -1427,6 +1427,13 @@ def inferred_freq(self): | |
except ValueError: | ||
return None | ||
|
||
@cache_readonly | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are you sure this can be cached like this? Is it possible that it will become inferred later on? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The |
||
def inferred_freqstr(self): | ||
try: | ||
return infer_freqstr(self) | ||
except ValueError: | ||
return None | ||
|
||
@property | ||
def freqstr(self): | ||
""" return the frequency object as a string if its set, otherwise None """ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
needs to default to do what it did before. [because I think this is actually a public method]
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@jtratner Am I missing something here? The change should by default not change behavior.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
okay.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you updte docstring for parameters and returns
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@cancan101 please make sure to update docstring here