Skip to content

Commit

Permalink
BUG: GH15429 transform result of timedelta from datetime
Browse files Browse the repository at this point in the history
The transform() operation needs to return a like-indexed. To facilitate this, transform starts with a copy of the original series. Then, after the computation for each group, sets the appropriate elements of the copied series equal to the result. At that point is does a type comparison, and discovers that the timedelta is not cast-able to a datetime.
  • Loading branch information
stephenrauch committed Feb 27, 2017
1 parent b3ae4c7 commit e0004e7
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 5 deletions.
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -624,7 +624,8 @@ Bug Fixes
- Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`)



- Bug in ``groupby.transform`` where calculating a ``timedelta`` from a ``datetime`` caused a ``ValueError`` (:issue:`15429`)
- Bug in ``groupby.transform`` where calculating a ``float`` from a ``datetime`` returned a ``float`` instead of ``datetime`` (:issue:`10972`)


- Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`)
Expand Down
13 changes: 10 additions & 3 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
_ensure_object,
_ensure_categorical,
_ensure_float)
from pandas.types.cast import _possibly_downcast_to_dtype
from pandas.types.cast import _possibly_downcast_to_dtype, _find_common_type
from pandas.types.missing import isnull, notnull, _maybe_fill

from pandas.core.common import (_values_from_object, AbstractMethodError,
Expand Down Expand Up @@ -2906,8 +2906,15 @@ def transform(self, func, *args, **kwargs):
common_type = np.common_type(np.array(res), result)
if common_type != result.dtype:
result = result.astype(common_type)
except:
pass
except Exception as exc:
# date math can cause type of result to change
if i == 0 and (is_datetime64_dtype(result.dtype) or
is_timedelta64_dtype(result.dtype)):
try:
dtype = res.dtype
except Exception as exc:
dtype = type(res)
result = np.empty_like(result, dtype)

indexer = self._get_index(name)
result[indexer] = res
Expand Down
38 changes: 37 additions & 1 deletion pandas/tests/groupby/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import pandas as pd
from pandas.util import testing as tm
from pandas import Series, DataFrame, Timestamp, MultiIndex, concat
from pandas import Series, DataFrame, Timestamp, MultiIndex, concat, date_range
from pandas.types.common import _ensure_platform_int
from .common import MixIn, assert_fp_equal

Expand Down Expand Up @@ -190,6 +190,42 @@ def test_transform_bug(self):
expected = Series(np.arange(5, 0, step=-1), name='B')
assert_series_equal(result, expected)

def test_transform_datetime_to_timedelta(self):
# GH 10972
# transforming a datetime to timedelta
df = DataFrame(dict(A=Timestamp('20130101'), B=np.arange(5)))
expected = pd.Series([
Timestamp('20130101') - Timestamp('20130101')] * 5, name='A')

# this does date math without changing result type in transform
base_time = df['A'][0]
result = df.groupby('A')['A'].transform(
lambda x: x.max() - x.min() + base_time) - base_time
assert_series_equal(result, expected)

# this does date math and causes the transform to return timedelta
result = df.groupby('A')['A'].transform(lambda x: x.max() - x.min())
assert_series_equal(result, expected)

def test_transform_datetime_to_numeric(self):
# convert dt to float
df = DataFrame({
'a': 1, 'b': date_range('2015-01-01', periods=2, freq='D')})
result = df.groupby('a').b.transform(
lambda x: x.dt.dayofweek - x.dt.dayofweek.mean())

expected = Series([-0.5, 0.5], name='b')
assert_series_equal(result, expected)

# convert dt to int
df = DataFrame({
'a': 1, 'b': date_range('2015-01-01', periods=2, freq='D')})
result = df.groupby('a').b.transform(
lambda x: x.dt.dayofweek - x.dt.dayofweek.min())

expected = Series([0, 1], name='b')
assert_series_equal(result, expected)

def test_transform_multiple(self):
grouped = self.ts.groupby([lambda x: x.year, lambda x: x.month])

Expand Down

0 comments on commit e0004e7

Please sign in to comment.