diff --git a/RELEASE.rst b/RELEASE.rst index 31627cec01d1e..0b6ed0b4d2853 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -48,6 +48,7 @@ pandas 0.11.1 to append an index with a different name than the existing - support datelike columns with a timezone as data_columns (GH2852_) - table writing performance improvements. + - Add modulo operator to Series, DataFrame **API Changes** @@ -110,6 +111,8 @@ pandas 0.11.1 is a ``list`` or ``tuple``. - Fixed bug where a time-series was being selected in preference to an actual column name in a frame (GH3594_) + - Fix modulo and integer division on Series,DataFrames to act similary to ``float`` dtypes to return + ``np.nan`` or ``np.inf`` as appropriate (GH3590_) .. _GH3164: https://github.com/pydata/pandas/issues/3164 .. _GH2786: https://github.com/pydata/pandas/issues/2786 @@ -153,6 +156,7 @@ pandas 0.11.1 .. _GH3593: https://github.com/pydata/pandas/issues/3593 .. _GH3556: https://github.com/pydata/pandas/issues/3556 .. _GH3594: https://github.com/pydata/pandas/issues/3594 +.. _GH3590: https://github.com/pydata/pandas/issues/3590 .. _GH3435: https://github.com/pydata/pandas/issues/3435 diff --git a/doc/source/v0.11.1.txt b/doc/source/v0.11.1.txt index 74818f9542cae..3719d9eb09dee 100644 --- a/doc/source/v0.11.1.txt +++ b/doc/source/v0.11.1.txt @@ -9,6 +9,17 @@ enhancements along with a large number of bug fixes. API changes ~~~~~~~~~~~ + - Fix modulo and integer division on Series,DataFrames to act similary to ``float`` dtypes to return + ``np.nan`` or ``np.inf`` as appropriate (GH3590_). This correct a numpy bug that treats ``integer`` + and ``float`` dtypes differently. + + .. ipython:: python + + p = DataFrame({ 'first' : [3,4,5,8], 'second' : [0,0,0,3] }) + p % 0 + p % p + p / p + p / 0 Enhancements ~~~~~~~~~~~~ @@ -33,4 +44,5 @@ on GitHub for a complete list. .. _GH3477: https://github.com/pydata/pandas/issues/3477 .. _GH3492: https://github.com/pydata/pandas/issues/3492 .. _GH3499: https://github.com/pydata/pandas/issues/3499 +.. _GH3590: https://github.com/pydata/pandas/issues/3590 .. _GH3435: https://github.com/pydata/pandas/issues/3435 diff --git a/pandas/core/common.py b/pandas/core/common.py index 2da2db052cb93..6bb4b36862956 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -793,13 +793,16 @@ def changeit(): # try to directly set by expanding our array to full # length of the boolean - om = other[mask] - om_at = om.astype(result.dtype) - if (om == om_at).all(): - new_other = result.values.copy() - new_other[mask] = om_at - result[:] = new_other - return result, False + try: + om = other[mask] + om_at = om.astype(result.dtype) + if (om == om_at).all(): + new_other = result.values.copy() + new_other[mask] = om_at + result[:] = new_other + return result, False + except: + pass # we are forced to change the dtype of the result as the input isn't compatible r, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True) @@ -948,6 +951,27 @@ def _lcd_dtypes(a_dtype, b_dtype): return np.float64 return np.object +def _fill_zeros(result, y, fill): + """ if we have an integer value (or array in y) + and we have 0's, fill them with the fill, + return the result """ + + if fill is not None: + if not isinstance(y, np.ndarray): + dtype, value = _infer_dtype_from_scalar(y) + y = pa.empty(result.shape,dtype=dtype) + y.fill(value) + + if is_integer_dtype(y): + + mask = y.ravel() == 0 + if mask.any(): + shape = result.shape + result, changed = _maybe_upcast_putmask(result.ravel(),mask,fill) + result = result.reshape(shape) + + return result + def _interp_wrapper(f, wrap_dtype, na_override=None): def wrapper(arr, mask, limit=None): view = arr.view(wrap_dtype) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5b2dc6dd96efb..c1f2f38dabd8b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -189,10 +189,12 @@ class DataConflictError(Exception): # Factory helper methods -def _arith_method(op, name, str_rep = None, default_axis='columns'): +def _arith_method(op, name, str_rep = None, default_axis='columns', fill_zeros=None): def na_op(x, y): try: result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True) + result = com._fill_zeros(result,y,fill_zeros) + except TypeError: xrav = x.ravel() result = np.empty(x.size, dtype=x.dtype) @@ -841,20 +843,23 @@ def __contains__(self, key): __sub__ = _arith_method(operator.sub, '__sub__', '-', default_axis=None) __mul__ = _arith_method(operator.mul, '__mul__', '*', default_axis=None) __truediv__ = _arith_method(operator.truediv, '__truediv__', '/', - default_axis=None) + default_axis=None, fill_zeros=np.inf) __floordiv__ = _arith_method(operator.floordiv, '__floordiv__', - default_axis=None) + default_axis=None, fill_zeros=np.inf) __pow__ = _arith_method(operator.pow, '__pow__', '**', default_axis=None) + __mod__ = _arith_method(operator.mod, '__mod__', '*', default_axis=None, fill_zeros=np.nan) + __radd__ = _arith_method(_radd_compat, '__radd__', default_axis=None) __rmul__ = _arith_method(operator.mul, '__rmul__', default_axis=None) __rsub__ = _arith_method(lambda x, y: y - x, '__rsub__', default_axis=None) __rtruediv__ = _arith_method(lambda x, y: y / x, '__rtruediv__', - default_axis=None) + default_axis=None, fill_zeros=np.inf) __rfloordiv__ = _arith_method(lambda x, y: y // x, '__rfloordiv__', - default_axis=None) + default_axis=None, fill_zeros=np.inf) __rpow__ = _arith_method(lambda x, y: y ** x, '__rpow__', default_axis=None) + __rmod__ = _arith_method(operator.mod, '__rmod__', default_axis=None, fill_zeros=np.nan) # boolean operators __and__ = _arith_method(operator.and_, '__and__', '&') @@ -863,9 +868,10 @@ def __contains__(self, key): # Python 2 division methods if not py3compat.PY3: - __div__ = _arith_method(operator.div, '__div__', '/', default_axis=None) + __div__ = _arith_method(operator.div, '__div__', '/', + default_axis=None, fill_zeros=np.inf) __rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__', - default_axis=None) + default_axis=None, fill_zeros=np.inf) def __neg__(self): arr = operator.neg(self.values) diff --git a/pandas/core/series.py b/pandas/core/series.py index a2816d93d6f1e..e807cf3f1dfd4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -55,14 +55,17 @@ # Wrapper function for Series arithmetic methods -def _arith_method(op, name): +def _arith_method(op, name, fill_zeros=None): """ Wrapper function for Series arithmetic operations, to avoid code duplication. """ def na_op(x, y): try: + result = op(x, y) + result = com._fill_zeros(result,y,fill_zeros) + except TypeError: result = pa.empty(len(x), dtype=x.dtype) if isinstance(y, pa.Array): @@ -1258,16 +1261,18 @@ def iteritems(self): __add__ = _arith_method(operator.add, '__add__') __sub__ = _arith_method(operator.sub, '__sub__') __mul__ = _arith_method(operator.mul, '__mul__') - __truediv__ = _arith_method(operator.truediv, '__truediv__') - __floordiv__ = _arith_method(operator.floordiv, '__floordiv__') + __truediv__ = _arith_method(operator.truediv, '__truediv__', fill_zeros=np.inf) + __floordiv__ = _arith_method(operator.floordiv, '__floordiv__', fill_zeros=np.inf) __pow__ = _arith_method(operator.pow, '__pow__') + __mod__ = _arith_method(operator.mod, '__mod__', fill_zeros=np.nan) __radd__ = _arith_method(_radd_compat, '__add__') __rmul__ = _arith_method(operator.mul, '__mul__') __rsub__ = _arith_method(lambda x, y: y - x, '__sub__') - __rtruediv__ = _arith_method(lambda x, y: y / x, '__truediv__') - __rfloordiv__ = _arith_method(lambda x, y: y // x, '__floordiv__') + __rtruediv__ = _arith_method(lambda x, y: y / x, '__truediv__', fill_zeros=np.inf) + __rfloordiv__ = _arith_method(lambda x, y: y // x, '__floordiv__', fill_zeros=np.inf) __rpow__ = _arith_method(lambda x, y: y ** x, '__pow__') + __rmod__ = _arith_method(operator.mod, '__mod__', fill_zeros=np.nan) # comparisons __gt__ = _comp_method(operator.gt, '__gt__') @@ -1301,8 +1306,8 @@ def __invert__(self): # Python 2 division operators if not py3compat.PY3: - __div__ = _arith_method(operator.div, '__div__') - __rdiv__ = _arith_method(lambda x, y: y / x, '__div__') + __div__ = _arith_method(operator.div, '__div__', fill_zeros=np.inf) + __rdiv__ = _arith_method(lambda x, y: y / x, '__div__', fill_zeros=np.inf) __idiv__ = __div__ #---------------------------------------------------------------------- diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index ce89dda63597f..f77503bd1487d 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -4011,6 +4011,50 @@ def test_operators_none_as_na(self): result = op(df.fillna(7), df) assert_frame_equal(result, expected) + def test_modulo(self): + + # GH3590, modulo as ints + p = DataFrame({ 'first' : [3,4,5,8], 'second' : [0,0,0,3] }) + + ### this is technically wrong as the integer portion is coerced to float ### + expected = DataFrame({ 'first' : Series([0,0,0,0],dtype='float64'), 'second' : Series([np.nan,np.nan,np.nan,0]) }) + result = p % p + assert_frame_equal(result,expected) + + # numpy has a slightly different (wrong) treatement + result2 = DataFrame(p.values % p.values,index=p.index,columns=p.columns,dtype='float64') + result2.iloc[0:3,1] = np.nan + assert_frame_equal(result2,expected) + + result = p % 0 + expected = DataFrame(np.nan,index=p.index,columns=p.columns) + assert_frame_equal(result,expected) + + # numpy has a slightly different (wrong) treatement + result2 = DataFrame(p.values.astype('float64') % 0,index=p.index,columns=p.columns) + assert_frame_equal(result2,expected) + + def test_div(self): + + # integer div, but deal with the 0's + p = DataFrame({ 'first' : [3,4,5,8], 'second' : [0,0,0,3] }) + result = p / p + + ### this is technically wrong as the integer portion is coerced to float ### + expected = DataFrame({ 'first' : Series([1,1,1,1],dtype='float64'), 'second' : Series([np.inf,np.inf,np.inf,1]) }) + assert_frame_equal(result,expected) + + result2 = DataFrame(p.values.astype('float64')/p.values,index=p.index,columns=p.columns).fillna(np.inf) + assert_frame_equal(result2,expected) + + result = p / 0 + expected = DataFrame(np.inf,index=p.index,columns=p.columns) + assert_frame_equal(result,expected) + + # numpy has a slightly different (wrong) treatement + result2 = DataFrame(p.values.astype('float64')/0,index=p.index,columns=p.columns).fillna(np.inf) + assert_frame_equal(result2,expected) + def test_logical_operators(self): import operator diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index d98cfe3e385cb..11ede8d759b38 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -1766,6 +1766,49 @@ def test_neg(self): def test_invert(self): assert_series_equal(-(self.series < 0), ~(self.series < 0)) + def test_modulo(self): + + # GH3590, modulo as ints + p = DataFrame({ 'first' : [3,4,5,8], 'second' : [0,0,0,3] }) + result = p['first'] % p['second'] + expected = Series(p['first'].values % p['second'].values,dtype='float64') + expected.iloc[0:3] = np.nan + assert_series_equal(result,expected) + + result = p['first'] % 0 + expected = Series(np.nan,index=p.index) + assert_series_equal(result,expected) + + p = p.astype('float64') + result = p['first'] % p['second'] + expected = Series(p['first'].values % p['second'].values) + assert_series_equal(result,expected) + + def test_div(self): + + # integer div, but deal with the 0's + p = DataFrame({ 'first' : [3,4,5,8], 'second' : [0,0,0,3] }) + result = p['first'] / p['second'] + expected = Series(p['first'].values / p['second'].values,dtype='float64') + expected.iloc[0:3] = np.inf + assert_series_equal(result,expected) + + result = p['first'] / 0 + expected = Series(np.inf,index=p.index) + assert_series_equal(result,expected) + + p = p.astype('float64') + result = p['first'] / p['second'] + expected = Series(p['first'].values / p['second'].values) + assert_series_equal(result,expected) + + p = DataFrame({ 'first' : [3,4,5,8], 'second' : [1,1,1,1] }) + result = p['first'] / p['second'] + if py3compat.PY3: + assert_series_equal(result,p['first'].astype('float64')) + else: + assert_series_equal(result,p['first']) + def test_operators(self): def _check_op(series, other, op, pos_only=False):