BUG: Consistent division by zero behavior for Index/Series (#27321)

pandas-dev · Jul 11, 2019 · 3885575 · jorisvandenbossche · Jul 13, 2019 · TomAugspurger
1 parent 2d0b20b
commit 3885575
Show file tree

Hide file tree

Showing 8 changed files with 82 additions and 68 deletions.
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -1009,6 +1009,7 @@ Numeric
 - Bug in :meth:`~pandas.eval` when comparing floats with scalar operators, for example: ``x < -0.1`` (:issue:`25928`)
 - Fixed bug where casting all-boolean array to integer extension array failed (:issue:`25211`)
 - Bug in ``divmod`` with a :class:`Series` object containing zeros incorrectly raising ``AttributeError`` (:issue:`26987`)
+- Inconsistency in :class:`Series` floor-division (`//`) and ``divmod`` filling positive//zero with ``NaN`` instead of ``Inf`` (:issue:`27321`)
 -
 
 Conversion

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
@@ -234,32 +234,6 @@ def _gen_eval_kwargs(name):
     return kwargs
 
 
-def _gen_fill_zeros(name):
-    """
-    Find the appropriate fill value to use when filling in undefined values
-    in the results of the given operation caused by operating on
-    (generally dividing by) zero.
-
-    Parameters
-    ----------
-    name : str
-
-    Returns
-    -------
-    fill_value : {None, np.nan, np.inf}
-    """
-    name = name.strip("__")
-    if "div" in name:
-        # truediv, floordiv, and reversed variants
-        fill_value = np.inf
-    elif "mod" in name:
-        # mod, rmod
-        fill_value = np.nan
-    else:
-        fill_value = None
-    return fill_value
-
-
 def _get_frame_op_default_axis(name):
     """
     Only DataFrame cares about default_axis, specifically:
@@ -1632,7 +1606,6 @@ def _arith_method_SERIES(cls, op, special):
     str_rep = _get_opstr(op, cls)
     op_name = _get_op_name(op, special)
     eval_kwargs = _gen_eval_kwargs(op_name)
-    fill_zeros = _gen_fill_zeros(op_name)
     construct_result = (
         _construct_divmod_result if op in [divmod, rdivmod] else _construct_result
     )
@@ -1663,7 +1636,7 @@ def na_op(x, y):
         except TypeError:
             result = masked_arith_op(x, y, op)
 
-        return missing.dispatch_fill_zeros(op, x, y, result, fill_zeros)
+        return missing.dispatch_fill_zeros(op, x, y, result)
 
     def wrapper(left, right):
         if isinstance(right, ABCDataFrame):
@@ -2154,7 +2127,6 @@ def _arith_method_FRAME(cls, op, special):
     str_rep = _get_opstr(op, cls)
     op_name = _get_op_name(op, special)
     eval_kwargs = _gen_eval_kwargs(op_name)
-    fill_zeros = _gen_fill_zeros(op_name)
     default_axis = _get_frame_op_default_axis(op_name)
 
     def na_op(x, y):
@@ -2165,7 +2137,7 @@ def na_op(x, y):
         except TypeError:
             result = masked_arith_op(x, y, op)
 
-        return missing.dispatch_fill_zeros(op, x, y, result, fill_zeros)
+        return missing.dispatch_fill_zeros(op, x, y, result)
 
     if op_name in _op_descriptions:
         # i.e. include "add" but not "__add__"

diff --git a/pandas/core/ops/missing.py b/pandas/core/ops/missing.py
@@ -27,7 +27,7 @@
 
 from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype, is_scalar
 
-from .roperator import rdivmod
+from .roperator import rdivmod, rfloordiv, rmod
 
 
 def fill_zeros(result, x, y, name, fill):
@@ -85,7 +85,7 @@ def fill_zeros(result, x, y, name, fill):
     return result
 
 
-def mask_zero_div_zero(x, y, result, copy=False):
+def mask_zero_div_zero(x, y, result):
     """
     Set results of 0 / 0 or 0 // 0 to np.nan, regardless of the dtypes
     of the numerator or the denominator.
@@ -95,9 +95,6 @@ def mask_zero_div_zero(x, y, result, copy=False):
     x : ndarray
     y : ndarray
     result : ndarray
-    copy : bool (default False)
-        Whether to always create a new array or try to fill in the existing
-        array if possible.
 
     Returns
     -------
@@ -113,10 +110,19 @@ def mask_zero_div_zero(x, y, result, copy=False):
     >>> mask_zero_div_zero(x, y, result)
     array([ inf,  nan, -inf])
     """
+    if not isinstance(result, np.ndarray):
+        # FIXME: SparseArray would raise TypeError with np.putmask
+        return result
+
     if is_scalar(y):
         y = np.array(y)
 
     zmask = y == 0
+
+    if isinstance(zmask, bool):
+        # FIXME: numpy did not evaluate pointwise, seen in docs build
+        return result
+
     if zmask.any():
         shape = result.shape
 
@@ -125,12 +131,13 @@ def mask_zero_div_zero(x, y, result, copy=False):
         zpos_mask = zmask & ~zneg_mask
 
         nan_mask = (zmask & (x == 0)).ravel()
-        neginf_mask = ((zpos_mask & (x < 0)) | (zneg_mask & (x > 0))).ravel()
-        posinf_mask = ((zpos_mask & (x > 0)) | (zneg_mask & (x < 0))).ravel()
+        with np.errstate(invalid="ignore"):
+            neginf_mask = ((zpos_mask & (x < 0)) | (zneg_mask & (x > 0))).ravel()
+            posinf_mask = ((zpos_mask & (x > 0)) | (zneg_mask & (x < 0))).ravel()
 
         if nan_mask.any() or neginf_mask.any() or posinf_mask.any():
             # Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN
-            result = result.astype("float64", copy=copy).ravel()
+            result = result.astype("float64", copy=False).ravel()
 
             np.putmask(result, nan_mask, np.nan)
             np.putmask(result, posinf_mask, np.inf)
@@ -157,36 +164,45 @@ def dispatch_missing(op, left, right, result):
     -------
     result : ndarray
     """
-    opstr = "__{opname}__".format(opname=op.__name__).replace("____", "__")
     if op is operator.floordiv:
         # Note: no need to do this for truediv; in py3 numpy behaves the way
         #  we want.
         result = mask_zero_div_zero(left, right, result)
     elif op is operator.mod:
-        result = fill_zeros(result, left, right, opstr, np.nan)
+        result = fill_zeros(result, left, right, "__mod__", np.nan)
     elif op is divmod:
         res0 = mask_zero_div_zero(left, right, result[0])
-        res1 = fill_zeros(result[1], left, right, opstr, np.nan)
+        res1 = fill_zeros(result[1], left, right, "__divmod__", np.nan)
         result = (res0, res1)
     return result
 
 
 # FIXME: de-duplicate with dispatch_missing
-def dispatch_fill_zeros(op, left, right, result, fill_value):
+def dispatch_fill_zeros(op, left, right, result):
     """
     Call fill_zeros with the appropriate fill value depending on the operation,
     with special logic for divmod and rdivmod.
     """
     if op is divmod:
         result = (
-            fill_zeros(result[0], left, right, "__floordiv__", np.inf),
+            mask_zero_div_zero(left, right, result[0]),
             fill_zeros(result[1], left, right, "__mod__", np.nan),
         )
     elif op is rdivmod:
         result = (
-            fill_zeros(result[0], left, right, "__rfloordiv__", np.inf),
+            mask_zero_div_zero(right, left, result[0]),
             fill_zeros(result[1], left, right, "__rmod__", np.nan),
         )
-    else:
-        result = fill_zeros(result, left, right, op.__name__, fill_value)
+    elif op is operator.floordiv:
+        # Note: no need to do this for truediv; in py3 numpy behaves the way
+        #  we want.
+        result = mask_zero_div_zero(left, right, result)
+    elif op is op is rfloordiv:
+        # Note: no need to do this for rtruediv; in py3 numpy behaves the way
+        #  we want.
+        result = mask_zero_div_zero(right, left, result)
+    elif op is operator.mod:
+        result = fill_zeros(result, left, right, "__mod__", np.nan)
+    elif op is rmod:
+        result = fill_zeros(result, left, right, "__rmod__", np.nan)
     return result
diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py
@@ -331,7 +331,12 @@ def test_ser_divmod_zero(self, dtype1, any_real_dtype):
         left = pd.Series([1, 1]).astype(dtype1)
         right = pd.Series([0, 2]).astype(dtype2)
 
+        # GH#27321 pandas convention is to set 1 // 0 to np.inf, as opposed
+        #  to numpy which sets to np.nan; patch `expected[0]` below
         expected = left // right, left % right
+        expected = list(expected)
+        expected[0] = expected[0].astype(np.float64)
+        expected[0][0] = np.inf
         result = divmod(left, right)
 
         tm.assert_series_equal(result[0], expected[0])
@@ -881,17 +886,16 @@ def check(series, other):
 
             _check_op(series, other, operator.pow, pos_only=True)
 
-            _check_op(series, other, lambda x, y: operator.add(y, x))
-            _check_op(series, other, lambda x, y: operator.sub(y, x))
-            _check_op(series, other, lambda x, y: operator.truediv(y, x))
-            _check_op(series, other, lambda x, y: operator.floordiv(y, x))
-            _check_op(series, other, lambda x, y: operator.mul(y, x))
-            _check_op(series, other, lambda x, y: operator.pow(y, x), pos_only=True)
-            _check_op(series, other, lambda x, y: operator.mod(y, x))
+            _check_op(series, other, ops.radd)
+            _check_op(series, other, ops.rsub)
+            _check_op(series, other, ops.rtruediv)
+            _check_op(series, other, ops.rfloordiv)
+            _check_op(series, other, ops.rmul)
+            _check_op(series, other, ops.rpow, pos_only=True)
+            _check_op(series, other, ops.rmod)
 
         tser = tm.makeTimeSeries().rename("ts")
         check(tser, tser * 2)
-        check(tser, tser * 0)
         check(tser, tser[::2])
         check(tser, 5)
 
@@ -931,13 +935,9 @@ def check(series, other):
 
         tser = tm.makeTimeSeries().rename("ts")
         check(tser, tser * 2)
-        check(tser, tser * 0)
         check(tser, tser[::2])
         check(tser, 5)
 
-    @pytest.mark.xfail(
-        reason="Series division does not yet fill 1/0 consistently; Index does."
-    )
     def test_series_divmod_zero(self):
         # Check that divmod uses pandas convention for division by zero,
         #  which does not match numpy.
@@ -950,8 +950,8 @@ def test_series_divmod_zero(self):
         other = tser * 0
 
         result = divmod(tser, other)
-        exp1 = pd.Series([np.inf] * len(tser), index=tser.index)
-        exp2 = pd.Series([np.nan] * len(tser), index=tser.index)
+        exp1 = pd.Series([np.inf] * len(tser), index=tser.index, name="ts")
+        exp2 = pd.Series([np.nan] * len(tser), index=tser.index, name="ts")
         tm.assert_series_equal(result[0], exp1)
         tm.assert_series_equal(result[1], exp2)
 

diff --git a/pandas/tests/arrays/sparse/test_arithmetics.py b/pandas/tests/arrays/sparse/test_arithmetics.py
@@ -49,6 +49,12 @@ def _check_numeric_ops(self, a, b, a_dense, b_dense, mix, op):
             else:
                 expected = op(a_dense, b_dense)
 
+            if op in [operator.floordiv, ops.rfloordiv]:
+                # Series sets 1//0 to np.inf, which SparseArray does not do (yet)
+                mask = np.isinf(expected)
+                if mask.any():
+                    expected[mask] = np.nan
+
             self._assert(result, expected)
 
     def _check_bool_result(self, res):

diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py
@@ -179,6 +179,10 @@ def _check_op_float(self, result, expected, mask, s, op_name, other):
         # check comparisons that are resulting in float dtypes
 
         expected[mask] = np.nan
+        if "floordiv" in op_name:
+            # Series op sets 1//0 to np.inf, which IntegerArray does not do (yet)
+            mask2 = np.isinf(expected) & np.isnan(result)
+            expected[mask2] = np.nan
         tm.assert_series_equal(result, expected)
 
     def _check_op_integer(self, result, expected, mask, s, op_name, other):

diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py
@@ -1,4 +1,5 @@
 import operator
+from types import LambdaType
 
 import numpy as np
 from numpy import nan
@@ -9,6 +10,7 @@
 
 import pandas as pd
 from pandas import DataFrame, Series, bdate_range, compat
+from pandas.core import ops
 from pandas.core.indexes.datetimes import DatetimeIndex
 from pandas.core.sparse import frame as spf
 from pandas.core.sparse.api import (
@@ -424,6 +426,13 @@ def _compare_to_dense(a, b, da, db, op):
             sparse_result = op(a, b)
             dense_result = op(da, db)
 
+            # catch lambdas but not non-lambdas e.g. operator.add
+            if op in [operator.floordiv, ops.rfloordiv] or isinstance(op, LambdaType):
+                # GH#27231 Series sets 1//0 to np.inf, which SparseArray
+                #  does not do (yet)
+                mask = np.isinf(dense_result) & ~np.isinf(sparse_result.to_dense())
+                dense_result[mask] = np.nan
+
             fill = sparse_result.default_fill_value
             dense_result = dense_result.to_sparse(fill_value=fill)
             tm.assert_sp_frame_equal(sparse_result, dense_result, exact_indices=False)
@@ -436,7 +445,6 @@ def _compare_to_dense(a, b, da, db, op):
                 )
 
         opnames = ["add", "sub", "mul", "truediv", "floordiv"]
-        ops = [getattr(operator, name) for name in opnames]
 
         fidx = frame.index
 
@@ -466,6 +474,7 @@ def _compare_to_dense(a, b, da, db, op):
                 f = lambda a, b: getattr(a, op)(b, axis="index")
                 _compare_to_dense(frame, s, frame.to_dense(), s.to_dense(), f)
 
+                # FIXME: dont leave commented-out
                 # rops are not implemented
                 # _compare_to_dense(s, frame, s.to_dense(),
                 #                   frame.to_dense(), f)
@@ -479,13 +488,14 @@ def _compare_to_dense(a, b, da, db, op):
             frame.xs(fidx[5])[:2],
         ]
 
-        for op in ops:
+        for name in opnames:
+            op = getattr(operator, name)
             for s in series:
                 _compare_to_dense(frame, s, frame.to_dense(), s, op)
                 _compare_to_dense(s, frame, s, frame.to_dense(), op)
 
         # it works!
-        result = frame + frame.loc[:, ["A", "B"]]  # noqa
+        frame + frame.loc[:, ["A", "B"]]
 
     def test_op_corners(self, float_frame, empty_frame):
         empty = empty_frame + empty_frame

diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py
@@ -12,6 +12,7 @@
 
 import pandas as pd
 from pandas import DataFrame, Series, SparseDtype, SparseSeries, bdate_range, isna
+from pandas.core import ops
 from pandas.core.reshape.util import cartesian_product
 import pandas.core.sparse.frame as spf
 from pandas.tests.series.test_api import SharedWithSparse
@@ -563,6 +564,10 @@ def _check_op(a, b, op):
             adense = a.to_dense() if isinstance(a, SparseSeries) else a
             bdense = b.to_dense() if isinstance(b, SparseSeries) else b
             dense_result = op(adense, bdense)
+            if "floordiv" in op.__name__:
+                # Series sets 1//0 to np.inf, which SparseSeries does not do (yet)
+                mask = np.isinf(dense_result)
+                dense_result[mask] = np.nan
             tm.assert_almost_equal(sp_result.to_dense(), dense_result)
 
         def check(a, b):
@@ -572,11 +577,11 @@ def check(a, b):
             _check_op(a, b, operator.floordiv)
             _check_op(a, b, operator.mul)
 
-            _check_op(a, b, lambda x, y: operator.add(y, x))
-            _check_op(a, b, lambda x, y: operator.sub(y, x))
-            _check_op(a, b, lambda x, y: operator.truediv(y, x))
-            _check_op(a, b, lambda x, y: operator.floordiv(y, x))
-            _check_op(a, b, lambda x, y: operator.mul(y, x))
+            _check_op(a, b, ops.radd)
+            _check_op(a, b, ops.rsub)
+            _check_op(a, b, ops.rtruediv)
+            _check_op(a, b, ops.rfloordiv)
+            _check_op(a, b, ops.rmul)
 
             # FIXME: don't leave commented-out
             # NaN ** 0 = 1 in C?