From 74c834493748ec31939b83013fed921cf57bced2 Mon Sep 17 00:00:00 2001 From: Bran Yang Date: Wed, 10 Feb 2016 12:35:12 -0500 Subject: [PATCH] Fix #12037 Error when Resampling using pd.tseries.offsets.Nano as period Closes #12037 Author: Bran Yang Closes #12270 from BranYang/nanosec and squashes the following commits: bff0c85 [Bran Yang] Add to whatsnew and some comments fd0b307 [Bran Yang] Fix #12037 Error when Resampling using pd.tseries.offsets.Nano as period --- doc/source/whatsnew/v0.18.0.txt | 3 ++- pandas/tseries/resample.py | 9 +++++++-- pandas/tseries/tests/test_resample.py | 23 +++++++++++++++++++++++ 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index b1248eda37108..cf1a13d33e17f 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -828,6 +828,7 @@ Bug Fixes - Bug in ``Series.str.get_dummies`` when one of the variables was 'name' (:issue:`12180`) - Bug in ``pd.concat`` while concatenating tz-aware NaT series. (:issue:`11693`, :issue:`11755`) - Bug in ``pd.read_stata`` with version <= 108 files (:issue:`12232`) +- Bug in ``Series.resample`` using a frequency of ``Nano`` when the index is a ``DatetimeIndex`` and contains non-zero nanosecond parts (:issue:`12037`) - Bug in ``Timedelta.round`` with negative values (:issue:`11690`) @@ -845,7 +846,7 @@ Bug Fixes - Bug in ``DataFrame.query`` containing an assignment (:issue:`8664`) - Bug in ``from_msgpack`` where ``__contains__()`` fails for columns of the unpacked ``DataFrame``, if the ``DataFrame`` has object columns. (:issue:`11880`) -- Bug in ``df.resample()`` on categorical data with ``TimedeltaIndex`` (:issue:`12169`) +- Bug in ``.resample`` on categorical data with ``TimedeltaIndex`` (:issue:`12169`) - Bug in timezone info lost when broadcasting scalar datetime to ``DataFrame`` (:issue:`11682`) diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 4e7962686db59..a22f87cb90420 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -854,9 +854,14 @@ def _get_time_bins(self, ax): closed=self.closed, base=self.base) tz = ax.tz + # GH #12037 + # use first/last directly instead of call replace() on them + # because replace() will swallow the nanosecond part + # thus last bin maybe slightly before the end if the end contains + # nanosecond part and lead to `Values falls after last bin` error binner = labels = DatetimeIndex(freq=self.freq, - start=first.replace(tzinfo=None), - end=last.replace(tzinfo=None), + start=first, + end=last, tz=tz, name=ax.name) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 233d795015089..1cece8b060377 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -1236,6 +1236,29 @@ def test_monthly_resample_error(self): # it works! ts.resample('M') + def test_nanosecond_resample_error(self): + # GH 12307 - Values falls after last bin when + # Resampling using pd.tseries.offsets.Nano as period + start = 1443707890427 + exp_start = 1443707890400 + indx = pd.date_range( + start=pd.to_datetime(start), + periods=10, + freq='100n' + ) + ts = pd.Series(range(len(indx)), index=indx) + r = ts.resample(pd.tseries.offsets.Nano(100)) + result = r.agg('mean') + + exp_indx = pd.date_range( + start=pd.to_datetime(exp_start), + periods=10, + freq='100n' + ) + exp = pd.Series(range(len(exp_indx)), index=exp_indx) + + assert_series_equal(result, exp) + def test_resample_anchored_intraday(self): # #1471, #1458