BUG: Fix #9144 #8445 Fix how core.common._fill_zeros handles div and …

…mod by zero
pandas-dev · Feb 16, 2015 · 85342ee · 85342ee
1 parent 76195fb
commit 85342ee
Show file tree

Hide file tree

Showing 6 changed files with 144 additions and 34 deletions.
diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt
@@ -131,6 +131,37 @@ methods (:issue:`9088`).
     dtype: int64
 
 
+- During division involving a ``Series`` or ``DataFrame``, ``0/0`` and ``0//0`` now give ``np.nan`` instead of ``np.inf``. (:issue:`9144`, :issue:`8445`)
+
+  Previous Behavior
+
+  .. code-block:: python
+
+        In [2]: p = pd.Series([0, 1])
+
+        In [3]: p / 0
+        Out[3]:
+        0    inf
+        1    inf
+        dtype: float64
+
+        In [4]: p // 0
+        Out[4]:
+        0    inf
+        1    inf
+        dtype: float64
+
+
+
+  New Behavior
+
+  .. ipython:: python
+
+     p = pd.Series([0, 1])
+     p / 0
+     p // 0
+
+
 
 Deprecations
 ~~~~~~~~~~~~

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -1395,36 +1395,40 @@ def _fill_zeros(result, x, y, name, fill):
     mask the nan's from x
     """
 
-    if fill is not None:
+    if fill is None or is_float_dtype(result):
+        return result
+
+    if name.startswith(('r', '__r')):
+        x,y = y,x
 
-        if name.startswith('r'):
-            x,y = y,x
+    if np.isscalar(y):
+        y = np.array(y)
 
+    if is_integer_dtype(y):
 
-        if not isinstance(y, np.ndarray):
-            dtype, value = _infer_dtype_from_scalar(y)
-            y = np.empty(result.shape, dtype=dtype)
-            y.fill(value)
+        if (y == 0).any():
 
-        if is_integer_dtype(y):
+            # GH 7325, mask and nans must be broadcastable (also: PR 9308)
+            # Raveling and then reshaping makes np.putmask faster
+            mask = ((y == 0) & ~np.isnan(result)).ravel()
 
-            if (y.ravel() == 0).any():
-                shape = result.shape
-                result = result.ravel().astype('float64')
+            shape = result.shape
+            result = result.astype('float64', copy=False).ravel()
 
-                # GH 7325, mask and nans must be broadcastable
-                signs = np.sign(result)
-                mask = ((y == 0) & ~np.isnan(x)).ravel()
+            np.putmask(result, mask, fill)
 
-                np.putmask(result, mask, fill)
+            # if we have a fill of inf, then sign it correctly
+            # (GH 6178 and PR 9308)
+            if np.isinf(fill):
+                signs = np.sign(y if name.startswith(('r', '__r')) else x)
+                negative_inf_mask = (signs.ravel() < 0) & mask
+                np.putmask(result, negative_inf_mask, -fill)
 
-                # if we have a fill of inf, then sign it
-                # correctly
-                # GH 6178
-                if np.isinf(fill):
-                    np.putmask(result,(signs<0) & mask, -fill)
+            if "floordiv" in name:  # (PR 9308)
+                nan_mask = ((y == 0) & (x == 0)).ravel()
+                np.putmask(result, nan_mask, np.nan)
 
-                result = result.reshape(shape)
+            result = result.reshape(shape)
 
     return result
 

diff --git a/pandas/core/ops.py b/pandas/core/ops.py
@@ -81,7 +81,8 @@ def names(x):
         rpow=arith_method(lambda x, y: y ** x, names('rpow'), op('**'),
                           default_axis=default_axis, reversed=True),
         rmod=arith_method(lambda x, y: y % x, names('rmod'), op('%'),
-                          default_axis=default_axis, reversed=True),
+                          default_axis=default_axis, fill_zeros=np.nan,
+                          reversed=True),
     )
     new_methods['div'] = new_methods['truediv']
     new_methods['rdiv'] = new_methods['rtruediv']

diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -20,7 +20,7 @@
 )
 from pandas import compat
 
-from numpy import random, nan
+from numpy import random, nan, inf
 from numpy.random import randn
 import numpy as np
 import numpy.ma as ma
@@ -5138,23 +5138,26 @@ def test_modulo(self):
 
     def test_div(self):
 
-        # integer div, but deal with the 0's
+        # integer div, but deal with the 0's (GH 9144)
         p = DataFrame({ 'first' : [3,4,5,8], 'second' : [0,0,0,3] })
         result = p / p
 
-        ### this is technically wrong as the integer portion is coerced to float ###
-        expected = DataFrame({ 'first' : Series([1,1,1,1],dtype='float64'), 'second' : Series([np.inf,np.inf,np.inf,1]) })
+        expected = DataFrame({'first': Series([1.0, 1.0, 1.0, 1.0]),
+                              'second': Series([nan, nan, nan, 1])})
         assert_frame_equal(result,expected)
 
-        result2 = DataFrame(p.values.astype('float64')/p.values,index=p.index,columns=p.columns).fillna(np.inf)
+        result2 = DataFrame(p.values.astype('float') / p.values, index=p.index,
+                            columns=p.columns)
         assert_frame_equal(result2,expected)
 
         result = p / 0
-        expected = DataFrame(np.inf,index=p.index,columns=p.columns)
+        expected = DataFrame(inf, index=p.index, columns=p.columns)
+        expected.iloc[0:3, 1] = nan
         assert_frame_equal(result,expected)
 
         # numpy has a slightly different (wrong) treatement
-        result2 = DataFrame(p.values.astype('float64')/0,index=p.index,columns=p.columns).fillna(np.inf)
+        result2 = DataFrame(p.values.astype('float64') / 0, index=p.index,
+                            columns=p.columns)
         assert_frame_equal(result2,expected)
 
         p = DataFrame(np.random.randn(10, 5))
@@ -5604,7 +5607,7 @@ def test_arith_flex_series(self):
 
         # broadcasting issue in GH7325
         df = DataFrame(np.arange(3*2).reshape((3,2)),dtype='int64')
-        expected = DataFrame([[np.inf,np.inf],[1.0,1.5],[1.0,1.25]])
+        expected = DataFrame([[nan, inf], [1.0, 1.5], [1.0, 1.25]])
         result = df.div(df[0],axis='index')
         assert_frame_equal(result,expected)
 

diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
@@ -11,7 +11,7 @@
 
 import nose
 
-from numpy import nan
+from numpy import nan, inf
 import numpy as np
 import numpy.ma as ma
 import pandas as pd
@@ -2689,6 +2689,17 @@ def test_modulo(self):
         result2 = p['second'] % p['first']
         self.assertFalse(np.array_equal(result, result2))
 
+        # GH 9144
+        s = Series([0, 1])
+
+        result = s % 0
+        expected = Series([nan, nan])
+        assert_series_equal(result, expected)
+
+        result = 0 % s
+        expected = Series([nan, 0.0])
+        assert_series_equal(result, expected)
+
     def test_div(self):
 
         # no longer do integer div for any ops, but deal with the 0's
@@ -2730,6 +2741,21 @@ def test_div(self):
         result = p['second'] / p['first']
         assert_series_equal(result, expected)
 
+        # GH 9144
+        s = Series([-1, 0, 1])
+
+        result = 0 / s
+        expected = Series([0.0, nan, 0.0])
+        assert_series_equal(result, expected)
+
+        result = s / 0
+        expected = Series([-inf, nan, inf])
+        assert_series_equal(result, expected)
+
+        result = s // 0
+        expected = Series([-inf, nan, inf])
+        assert_series_equal(result, expected)
+
     def test_operators(self):
 
         def _check_op(series, other, op, pos_only=False):
@@ -6414,17 +6440,17 @@ def test_pct_change_shift_over_nas(self):
     def test_autocorr(self):
         # Just run the function
         corr1 = self.ts.autocorr()
-        
+
         # Now run it with the lag parameter
         corr2 = self.ts.autocorr(lag=1)
-        
+
         # corr() with lag needs Series of at least length 2
         if len(self.ts) <= 2:
             self.assertTrue(np.isnan(corr1))
             self.assertTrue(np.isnan(corr2))
         else:
             self.assertEqual(corr1, corr2)
-        
+
         # Choose a random lag between 1 and length of Series - 2
         # and compare the result with the Series corr() function
         n = 1 + np.random.randint(max(1, len(self.ts) - 2))

diff --git a/vb_suite/binary_ops.py b/vb_suite/binary_ops.py
@@ -72,6 +72,51 @@
     Benchmark("df * df2", setup, name='frame_mult_no_ne',cleanup="expr.set_use_numexpr(True)",
               start_date=datetime(2013, 2, 26))
 
+#----------------------------------------------------------------------
+# division
+
+setup = common_setup + """
+df  = DataFrame(np.random.randn(1000, 1000))
+"""
+frame_float_div_by_zero = \
+    Benchmark("df / 0", setup, name='frame_float_div_by_zero')
+
+setup = common_setup + """
+df  = DataFrame(np.random.randn(1000, 1000))
+"""
+frame_float_floor_by_zero = \
+    Benchmark("df // 0", setup, name='frame_float_floor_by_zero')
+
+setup = common_setup + """
+df  = DataFrame(np.random.random_integers((1000, 1000)))
+"""
+frame_int_div_by_zero = \
+    Benchmark("df / 0", setup, name='frame_int_div_by_zero')
+
+setup = common_setup + """
+df  = DataFrame(np.random.randn(1000, 1000))
+df2 = DataFrame(np.random.randn(1000, 1000))
+"""
+frame_float_div = \
+    Benchmark("df // df2", setup, name='frame_float_div')
+
+#----------------------------------------------------------------------
+# modulo
+
+setup = common_setup + """
+df  = DataFrame(np.random.randn(1000, 1000))
+df2 = DataFrame(np.random.randn(1000, 1000))
+"""
+frame_float_mod = \
+    Benchmark("df / df2", setup, name='frame_float_mod')
+
+setup = common_setup + """
+df  = DataFrame(np.random.random_integers((1000, 1000)))
+df2 = DataFrame(np.random.random_integers((1000, 1000)))
+"""
+frame_int_mod = \
+    Benchmark("df / df2", setup, name='frame_int_mod')
+
 #----------------------------------------------------------------------
 # multi and