Skip to content

Commit

Permalink
Merge pull request #3600 from jreback/modulo
Browse files Browse the repository at this point in the history
BUG:  Fix integer modulo and division to make integer and float dtypes work similarly for invalid values
  • Loading branch information
jreback committed May 14, 2013
2 parents 9ae47f9 + 555af4c commit a14cbd0
Show file tree
Hide file tree
Showing 7 changed files with 159 additions and 21 deletions.
4 changes: 4 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ pandas 0.11.1
to append an index with a different name than the existing
- support datelike columns with a timezone as data_columns (GH2852_)
- table writing performance improvements.
- Add modulo operator to Series, DataFrame

**API Changes**

Expand Down Expand Up @@ -111,6 +112,8 @@ pandas 0.11.1
is a ``list`` or ``tuple``.
- Fixed bug where a time-series was being selected in preference to an actual column name
in a frame (GH3594_)
- Fix modulo and integer division on Series,DataFrames to act similary to ``float`` dtypes to return
``np.nan`` or ``np.inf`` as appropriate (GH3590_)

.. _GH3164: https://github.com/pydata/pandas/issues/3164
.. _GH2786: https://github.com/pydata/pandas/issues/2786
Expand Down Expand Up @@ -155,6 +158,7 @@ pandas 0.11.1
.. _GH3593: https://github.com/pydata/pandas/issues/3593
.. _GH3556: https://github.com/pydata/pandas/issues/3556
.. _GH3594: https://github.com/pydata/pandas/issues/3594
.. _GH3590: https://github.com/pydata/pandas/issues/3590
.. _GH3435: https://github.com/pydata/pandas/issues/3435


Expand Down
12 changes: 12 additions & 0 deletions doc/source/v0.11.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,17 @@ enhancements along with a large number of bug fixes.
API changes
~~~~~~~~~~~

- Fix modulo and integer division on Series,DataFrames to act similary to ``float`` dtypes to return
``np.nan`` or ``np.inf`` as appropriate (GH3590_). This correct a numpy bug that treats ``integer``
and ``float`` dtypes differently.

.. ipython:: python

p = DataFrame({ 'first' : [3,4,5,8], 'second' : [0,0,0,3] })
p % 0
p % p
p / p
p / 0

Enhancements
~~~~~~~~~~~~
Expand All @@ -33,4 +44,5 @@ on GitHub for a complete list.
.. _GH3477: https://github.com/pydata/pandas/issues/3477
.. _GH3492: https://github.com/pydata/pandas/issues/3492
.. _GH3499: https://github.com/pydata/pandas/issues/3499
.. _GH3590: https://github.com/pydata/pandas/issues/3590
.. _GH3435: https://github.com/pydata/pandas/issues/3435
38 changes: 31 additions & 7 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -793,13 +793,16 @@ def changeit():

# try to directly set by expanding our array to full
# length of the boolean
om = other[mask]
om_at = om.astype(result.dtype)
if (om == om_at).all():
new_other = result.values.copy()
new_other[mask] = om_at
result[:] = new_other
return result, False
try:
om = other[mask]
om_at = om.astype(result.dtype)
if (om == om_at).all():
new_other = result.values.copy()
new_other[mask] = om_at
result[:] = new_other
return result, False
except:
pass

# we are forced to change the dtype of the result as the input isn't compatible
r, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True)
Expand Down Expand Up @@ -948,6 +951,27 @@ def _lcd_dtypes(a_dtype, b_dtype):
return np.float64
return np.object

def _fill_zeros(result, y, fill):
""" if we have an integer value (or array in y)
and we have 0's, fill them with the fill,
return the result """

if fill is not None:
if not isinstance(y, np.ndarray):
dtype, value = _infer_dtype_from_scalar(y)
y = pa.empty(result.shape,dtype=dtype)
y.fill(value)

if is_integer_dtype(y):

mask = y.ravel() == 0
if mask.any():
shape = result.shape
result, changed = _maybe_upcast_putmask(result.ravel(),mask,fill)
result = result.reshape(shape)

return result

def _interp_wrapper(f, wrap_dtype, na_override=None):
def wrapper(arr, mask, limit=None):
view = arr.view(wrap_dtype)
Expand Down
20 changes: 13 additions & 7 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,10 +189,12 @@ class DataConflictError(Exception):
# Factory helper methods


def _arith_method(op, name, str_rep = None, default_axis='columns'):
def _arith_method(op, name, str_rep = None, default_axis='columns', fill_zeros=None):
def na_op(x, y):
try:
result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True)
result = com._fill_zeros(result,y,fill_zeros)

except TypeError:
xrav = x.ravel()
result = np.empty(x.size, dtype=x.dtype)
Expand Down Expand Up @@ -841,20 +843,23 @@ def __contains__(self, key):
__sub__ = _arith_method(operator.sub, '__sub__', '-', default_axis=None)
__mul__ = _arith_method(operator.mul, '__mul__', '*', default_axis=None)
__truediv__ = _arith_method(operator.truediv, '__truediv__', '/',
default_axis=None)
default_axis=None, fill_zeros=np.inf)
__floordiv__ = _arith_method(operator.floordiv, '__floordiv__',
default_axis=None)
default_axis=None, fill_zeros=np.inf)
__pow__ = _arith_method(operator.pow, '__pow__', '**', default_axis=None)

__mod__ = _arith_method(operator.mod, '__mod__', '*', default_axis=None, fill_zeros=np.nan)

__radd__ = _arith_method(_radd_compat, '__radd__', default_axis=None)
__rmul__ = _arith_method(operator.mul, '__rmul__', default_axis=None)
__rsub__ = _arith_method(lambda x, y: y - x, '__rsub__', default_axis=None)
__rtruediv__ = _arith_method(lambda x, y: y / x, '__rtruediv__',
default_axis=None)
default_axis=None, fill_zeros=np.inf)
__rfloordiv__ = _arith_method(lambda x, y: y // x, '__rfloordiv__',
default_axis=None)
default_axis=None, fill_zeros=np.inf)
__rpow__ = _arith_method(lambda x, y: y ** x, '__rpow__',
default_axis=None)
__rmod__ = _arith_method(operator.mod, '__rmod__', default_axis=None, fill_zeros=np.nan)

# boolean operators
__and__ = _arith_method(operator.and_, '__and__', '&')
Expand All @@ -863,9 +868,10 @@ def __contains__(self, key):

# Python 2 division methods
if not py3compat.PY3:
__div__ = _arith_method(operator.div, '__div__', '/', default_axis=None)
__div__ = _arith_method(operator.div, '__div__', '/',
default_axis=None, fill_zeros=np.inf)
__rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__',
default_axis=None)
default_axis=None, fill_zeros=np.inf)

def __neg__(self):
arr = operator.neg(self.values)
Expand Down
19 changes: 12 additions & 7 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,17 @@
# Wrapper function for Series arithmetic methods


def _arith_method(op, name):
def _arith_method(op, name, fill_zeros=None):
"""
Wrapper function for Series arithmetic operations, to avoid
code duplication.
"""
def na_op(x, y):
try:

result = op(x, y)
result = com._fill_zeros(result,y,fill_zeros)

except TypeError:
result = pa.empty(len(x), dtype=x.dtype)
if isinstance(y, pa.Array):
Expand Down Expand Up @@ -1258,16 +1261,18 @@ def iteritems(self):
__add__ = _arith_method(operator.add, '__add__')
__sub__ = _arith_method(operator.sub, '__sub__')
__mul__ = _arith_method(operator.mul, '__mul__')
__truediv__ = _arith_method(operator.truediv, '__truediv__')
__floordiv__ = _arith_method(operator.floordiv, '__floordiv__')
__truediv__ = _arith_method(operator.truediv, '__truediv__', fill_zeros=np.inf)
__floordiv__ = _arith_method(operator.floordiv, '__floordiv__', fill_zeros=np.inf)
__pow__ = _arith_method(operator.pow, '__pow__')
__mod__ = _arith_method(operator.mod, '__mod__', fill_zeros=np.nan)

__radd__ = _arith_method(_radd_compat, '__add__')
__rmul__ = _arith_method(operator.mul, '__mul__')
__rsub__ = _arith_method(lambda x, y: y - x, '__sub__')
__rtruediv__ = _arith_method(lambda x, y: y / x, '__truediv__')
__rfloordiv__ = _arith_method(lambda x, y: y // x, '__floordiv__')
__rtruediv__ = _arith_method(lambda x, y: y / x, '__truediv__', fill_zeros=np.inf)
__rfloordiv__ = _arith_method(lambda x, y: y // x, '__floordiv__', fill_zeros=np.inf)
__rpow__ = _arith_method(lambda x, y: y ** x, '__pow__')
__rmod__ = _arith_method(operator.mod, '__mod__', fill_zeros=np.nan)

# comparisons
__gt__ = _comp_method(operator.gt, '__gt__')
Expand Down Expand Up @@ -1301,8 +1306,8 @@ def __invert__(self):

# Python 2 division operators
if not py3compat.PY3:
__div__ = _arith_method(operator.div, '__div__')
__rdiv__ = _arith_method(lambda x, y: y / x, '__div__')
__div__ = _arith_method(operator.div, '__div__', fill_zeros=np.inf)
__rdiv__ = _arith_method(lambda x, y: y / x, '__div__', fill_zeros=np.inf)
__idiv__ = __div__

#----------------------------------------------------------------------
Expand Down
44 changes: 44 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4011,6 +4011,50 @@ def test_operators_none_as_na(self):
result = op(df.fillna(7), df)
assert_frame_equal(result, expected)

def test_modulo(self):

# GH3590, modulo as ints
p = DataFrame({ 'first' : [3,4,5,8], 'second' : [0,0,0,3] })

### this is technically wrong as the integer portion is coerced to float ###
expected = DataFrame({ 'first' : Series([0,0,0,0],dtype='float64'), 'second' : Series([np.nan,np.nan,np.nan,0]) })
result = p % p
assert_frame_equal(result,expected)

# numpy has a slightly different (wrong) treatement
result2 = DataFrame(p.values % p.values,index=p.index,columns=p.columns,dtype='float64')
result2.iloc[0:3,1] = np.nan
assert_frame_equal(result2,expected)

result = p % 0
expected = DataFrame(np.nan,index=p.index,columns=p.columns)
assert_frame_equal(result,expected)

# numpy has a slightly different (wrong) treatement
result2 = DataFrame(p.values.astype('float64') % 0,index=p.index,columns=p.columns)
assert_frame_equal(result2,expected)

def test_div(self):

# integer div, but deal with the 0's
p = DataFrame({ 'first' : [3,4,5,8], 'second' : [0,0,0,3] })
result = p / p

### this is technically wrong as the integer portion is coerced to float ###
expected = DataFrame({ 'first' : Series([1,1,1,1],dtype='float64'), 'second' : Series([np.inf,np.inf,np.inf,1]) })
assert_frame_equal(result,expected)

result2 = DataFrame(p.values.astype('float64')/p.values,index=p.index,columns=p.columns).fillna(np.inf)
assert_frame_equal(result2,expected)

result = p / 0
expected = DataFrame(np.inf,index=p.index,columns=p.columns)
assert_frame_equal(result,expected)

# numpy has a slightly different (wrong) treatement
result2 = DataFrame(p.values.astype('float64')/0,index=p.index,columns=p.columns).fillna(np.inf)
assert_frame_equal(result2,expected)

def test_logical_operators(self):
import operator

Expand Down
43 changes: 43 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1766,6 +1766,49 @@ def test_neg(self):
def test_invert(self):
assert_series_equal(-(self.series < 0), ~(self.series < 0))

def test_modulo(self):

# GH3590, modulo as ints
p = DataFrame({ 'first' : [3,4,5,8], 'second' : [0,0,0,3] })
result = p['first'] % p['second']
expected = Series(p['first'].values % p['second'].values,dtype='float64')
expected.iloc[0:3] = np.nan
assert_series_equal(result,expected)

result = p['first'] % 0
expected = Series(np.nan,index=p.index)
assert_series_equal(result,expected)

p = p.astype('float64')
result = p['first'] % p['second']
expected = Series(p['first'].values % p['second'].values)
assert_series_equal(result,expected)

def test_div(self):

# integer div, but deal with the 0's
p = DataFrame({ 'first' : [3,4,5,8], 'second' : [0,0,0,3] })
result = p['first'] / p['second']
expected = Series(p['first'].values / p['second'].values,dtype='float64')
expected.iloc[0:3] = np.inf
assert_series_equal(result,expected)

result = p['first'] / 0
expected = Series(np.inf,index=p.index)
assert_series_equal(result,expected)

p = p.astype('float64')
result = p['first'] / p['second']
expected = Series(p['first'].values / p['second'].values)
assert_series_equal(result,expected)

p = DataFrame({ 'first' : [3,4,5,8], 'second' : [1,1,1,1] })
result = p['first'] / p['second']
if py3compat.PY3:
assert_series_equal(result,p['first'].astype('float64'))
else:
assert_series_equal(result,p['first'])

def test_operators(self):

def _check_op(series, other, op, pos_only=False):
Expand Down

0 comments on commit a14cbd0

Please sign in to comment.