Skip to content

Commit

Permalink
Merge pull request #3595 from jreback/combine_first_timestamp
Browse files Browse the repository at this point in the history
BUG: (GH3593) fixed a bug in the incorrect conversion of datetime64[ns]  in combine_first
  • Loading branch information
jreback committed May 13, 2013
2 parents 5e9db38 + 6b5ca31 commit e6cdd46
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 14 deletions.
2 changes: 2 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ pandas 0.11.1
- ``combine_first`` not returning the same dtype in cases where it can (GH3552_)
- Fixed bug with ``Panel.transpose`` argument aliases (GH3556_)
- Fixed platform bug in ``PeriodIndex.take`` (GH3579_)
- Fixed bud in incorrect conversion of datetime64[ns] in ``combine_first`` (GH3593_)
- Fixed bug in reset_index with ``NaN`` in a multi-index (GH3586_)

.. _GH3164: https://github.com/pydata/pandas/issues/3164
Expand Down Expand Up @@ -145,6 +146,7 @@ pandas 0.11.1
.. _GH3586: https://github.com/pydata/pandas/issues/3586
.. _GH3493: https://github.com/pydata/pandas/issues/3493
.. _GH3579: https://github.com/pydata/pandas/issues/3579
.. _GH3593: https://github.com/pydata/pandas/issues/3593
.. _GH3556: https://github.com/pydata/pandas/issues/3556


Expand Down
34 changes: 34 additions & 0 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -921,6 +921,33 @@ def _possibly_downcast_to_dtype(result, dtype):

return result

def _lcd_dtypes(a_dtype, b_dtype):
""" return the lcd dtype to hold these types """

if is_datetime64_dtype(a_dtype) or is_datetime64_dtype(b_dtype):
return _NS_DTYPE
elif is_timedelta64_dtype(a_dtype) or is_timedelta64_dtype(b_dtype):
return _TD_DTYPE
elif is_complex_dtype(a_dtype):
if is_complex_dtype(b_dtype):
return a_dtype
return np.float64
elif is_integer_dtype(a_dtype):
if is_integer_dtype(b_dtype):
if a_dtype.itemsize == b_dtype.itemsize:
return a_dtype
return np.int64
return np.float64
elif is_float_dtype(a_dtype):
if is_float_dtype(b_dtype):
if a_dtype.itemsize == b_dtype.itemsize:
return a_dtype
else:
return np.float64
elif is_integer(b_dtype):
return np.float64
return np.object

def _interp_wrapper(f, wrap_dtype, na_override=None):
def wrapper(arr, mask, limit=None):
view = arr.view(wrap_dtype)
Expand Down Expand Up @@ -1524,6 +1551,13 @@ def is_float_dtype(arr_or_dtype):
tipo = arr_or_dtype.dtype.type
return issubclass(tipo, np.floating)

def is_complex_dtype(arr_or_dtype):
if isinstance(arr_or_dtype, np.dtype):
tipo = arr_or_dtype.type
else:
tipo = arr_or_dtype.dtype.type
return issubclass(tipo, np.complexfloating)


def is_list_like(arg):
return hasattr(arg, '__iter__') and not isinstance(arg, basestring) or hasattr(arg,'len')
Expand Down
47 changes: 41 additions & 6 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3738,8 +3738,11 @@ def combine(self, other, func, fill_value=None, overwrite=True):

result = {}
for col in new_columns:
series = this[col].values
otherSeries = other[col].values
series = this[col]
otherSeries = other[col]

this_dtype = series.dtype
other_dtype = otherSeries.dtype

this_mask = isnull(series)
other_mask = isnull(otherSeries)
Expand All @@ -3756,18 +3759,40 @@ def combine(self, other, func, fill_value=None, overwrite=True):
series[this_mask] = fill_value
otherSeries[other_mask] = fill_value

arr = func(series, otherSeries)
# if we have different dtypes, possibily promote
new_dtype = this_dtype
if this_dtype != other_dtype:
new_dtype = com._lcd_dtypes(this_dtype,other_dtype)
series = series.astype(new_dtype)
otherSeries = otherSeries.astype(new_dtype)

# see if we need to be represented as i8 (datetimelike)
# try to keep us at this dtype
needs_i8_conversion = com.needs_i8_conversion(new_dtype)
if needs_i8_conversion:
this_dtype = new_dtype
arr = func(series, otherSeries, True)
else:
arr = func(series, otherSeries)

if do_fill:
arr = com.ensure_float(arr)
arr[this_mask & other_mask] = NA

# try to downcast back to the original dtype
if needs_i8_conversion:
arr = com._possibly_cast_to_datetime(arr, this_dtype)
else:
arr = com._possibly_downcast_to_dtype(arr, this_dtype)

result[col] = arr

# convert_objects just in case
return self._constructor(result,
index=new_index,
columns=new_columns).convert_objects(copy=False)
columns=new_columns).convert_objects(
convert_dates=True,
copy=False)

def combine_first(self, other):
"""
Expand All @@ -3788,8 +3813,18 @@ def combine_first(self, other):
-------
combined : DataFrame
"""
def combiner(x, y):
return expressions.where(isnull(x), y, x, raise_on_error=True)
def combiner(x, y, needs_i8_conversion=False):
x_values = x.values if hasattr(x,'values') else x
y_values = y.values if hasattr(y,'values') else y
if needs_i8_conversion:
mask = isnull(x)
x_values = x_values.view('i8')
y_values = y_values.view('i8')
else:
mask = isnull(x_values)

return expressions.where(mask, y_values, x_values, raise_on_error=True)

return self.combine(other, combiner, overwrite=False)

def update(self, other, join='left', overwrite=True, filter_func=None,
Expand Down
16 changes: 13 additions & 3 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,14 +258,15 @@ def downcast(self, dtypes = None):

return blocks

def astype(self, dtype, copy = True, raise_on_error = True):
def astype(self, dtype, copy = True, raise_on_error = True, values = None):
"""
Coerce to the new type (if copy=True, return a new copy)
raise on an except if raise == True
"""
try:
newb = make_block(com._astype_nansafe(self.values, dtype, copy = copy),
self.items, self.ref_items, fastpath=True)
if values is None:
values = com._astype_nansafe(self.values, dtype, copy = copy)
newb = make_block(values, self.items, self.ref_items, fastpath=True)
except:
if raise_on_error is True:
raise
Expand Down Expand Up @@ -708,6 +709,15 @@ def is_bool(self):
""" we can be a bool if we have only bool values but are of type object """
return lib.is_bool_array(self.values.ravel())

def astype(self, dtype, copy=True, raise_on_error=True, values=None):
""" allow astypes to datetime64[ns],timedelta64[ns] with coercion """
dtype = np.dtype(dtype)
if dtype == _NS_DTYPE or dtype == _TD_DTYPE:
values = com._possibly_convert_datetime(self.values,dtype)
else:
values = None
return super(ObjectBlock, self).astype(dtype=dtype,copy=copy,raise_on_error=raise_on_error,values=values)

def convert(self, convert_dates = True, convert_numeric = True, copy = True):
""" attempt to coerce any object types to better types
return a copy of the block (if copy = True)
Expand Down
13 changes: 9 additions & 4 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
from pandas.core.common import (isnull, notnull, _is_bool_indexer,
_default_index, _maybe_promote, _maybe_upcast,
_asarray_tuplesafe, is_integer_dtype,
_infer_dtype_from_scalar, is_list_like)
_infer_dtype_from_scalar, is_list_like,
_NS_DTYPE, _TD_DTYPE)
from pandas.core.index import (Index, MultiIndex, InvalidIndexError,
_ensure_index, _handle_legacy_indexes)
from pandas.core.indexing import _SeriesIndexer, _check_bool_indexer, _check_slice_bounds
Expand Down Expand Up @@ -929,9 +930,13 @@ def astype(self, dtype):
"""
See numpy.ndarray.astype
"""
casted = com._astype_nansafe(self.values, dtype)
return self._constructor(casted, index=self.index, name=self.name,
dtype=casted.dtype)
dtype = np.dtype(dtype)
if dtype == _NS_DTYPE or dtype == _TD_DTYPE:
values = com._possibly_cast_to_datetime(self.values,dtype)
else:
values = com._astype_nansafe(self.values, dtype)
return self._constructor(values, index=self.index, name=self.name,
dtype=values.dtype)

def convert_objects(self, convert_dates=True, convert_numeric=True, copy=True):
"""
Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7907,6 +7907,25 @@ def test_combine_first_mixed_bug(self):
expected = Series([True,True,False])
assert_series_equal(result,expected)

# GH 3593, converting datetime64[ns] incorrecly
df0 = DataFrame({"a":[datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]})
df1 = DataFrame({"a":[None, None, None]})
df2 = df1.combine_first(df0)
assert_frame_equal(df2,df0)

df2 = df0.combine_first(df1)
assert_frame_equal(df2,df0)

df0 = DataFrame({"a":[datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]})
df1 = DataFrame({"a":[datetime(2000, 1, 2), None, None]})
df2 = df1.combine_first(df0)
result = df0.copy()
result.iloc[0,:] = df1.iloc[0,:]
assert_frame_equal(df2,result)

df2 = df0.combine_first(df1)
assert_frame_equal(df2,df0)

def test_update(self):
df = DataFrame([[1.5, nan, 3.],
[1.5, nan, 3.],
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1856,7 +1856,7 @@ def test_operators_timedelta64(self):
v1 = date_range('2012-1-1', periods=3, freq='D')
v2 = date_range('2012-1-2', periods=3, freq='D')
rs = Series(v2) - Series(v1)
xp = Series(1e9 * 3600 * 24, rs.index).astype('timedelta64[ns]')
xp = Series(1e9 * 3600 * 24, rs.index).astype('int64').astype('timedelta64[ns]')
assert_series_equal(rs, xp)
self.assert_(rs.dtype=='timedelta64[ns]')

Expand Down

0 comments on commit e6cdd46

Please sign in to comment.