Skip to content

Commit

Permalink
BUG: Consistent division by zero behavior for Index/Series (#27321)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and jreback committed Jul 11, 2019
1 parent 2d0b20b commit 3885575
Show file tree
Hide file tree
Showing 8 changed files with 82 additions and 68 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1009,6 +1009,7 @@ Numeric
- Bug in :meth:`~pandas.eval` when comparing floats with scalar operators, for example: ``x < -0.1`` (:issue:`25928`)
- Fixed bug where casting all-boolean array to integer extension array failed (:issue:`25211`)
- Bug in ``divmod`` with a :class:`Series` object containing zeros incorrectly raising ``AttributeError`` (:issue:`26987`)
- Inconsistency in :class:`Series` floor-division (`//`) and ``divmod`` filling positive//zero with ``NaN`` instead of ``Inf`` (:issue:`27321`)
-
Conversion
Expand Down
32 changes: 2 additions & 30 deletions pandas/core/ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,32 +234,6 @@ def _gen_eval_kwargs(name):
return kwargs


def _gen_fill_zeros(name):
"""
Find the appropriate fill value to use when filling in undefined values
in the results of the given operation caused by operating on
(generally dividing by) zero.
Parameters
----------
name : str
Returns
-------
fill_value : {None, np.nan, np.inf}
"""
name = name.strip("__")
if "div" in name:
# truediv, floordiv, and reversed variants
fill_value = np.inf
elif "mod" in name:
# mod, rmod
fill_value = np.nan
else:
fill_value = None
return fill_value


def _get_frame_op_default_axis(name):
"""
Only DataFrame cares about default_axis, specifically:
Expand Down Expand Up @@ -1632,7 +1606,6 @@ def _arith_method_SERIES(cls, op, special):
str_rep = _get_opstr(op, cls)
op_name = _get_op_name(op, special)
eval_kwargs = _gen_eval_kwargs(op_name)
fill_zeros = _gen_fill_zeros(op_name)
construct_result = (
_construct_divmod_result if op in [divmod, rdivmod] else _construct_result
)
Expand Down Expand Up @@ -1663,7 +1636,7 @@ def na_op(x, y):
except TypeError:
result = masked_arith_op(x, y, op)

return missing.dispatch_fill_zeros(op, x, y, result, fill_zeros)
return missing.dispatch_fill_zeros(op, x, y, result)

def wrapper(left, right):
if isinstance(right, ABCDataFrame):
Expand Down Expand Up @@ -2154,7 +2127,6 @@ def _arith_method_FRAME(cls, op, special):
str_rep = _get_opstr(op, cls)
op_name = _get_op_name(op, special)
eval_kwargs = _gen_eval_kwargs(op_name)
fill_zeros = _gen_fill_zeros(op_name)
default_axis = _get_frame_op_default_axis(op_name)

def na_op(x, y):
Expand All @@ -2165,7 +2137,7 @@ def na_op(x, y):
except TypeError:
result = masked_arith_op(x, y, op)

return missing.dispatch_fill_zeros(op, x, y, result, fill_zeros)
return missing.dispatch_fill_zeros(op, x, y, result)

if op_name in _op_descriptions:
# i.e. include "add" but not "__add__"
Expand Down
48 changes: 32 additions & 16 deletions pandas/core/ops/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype, is_scalar

from .roperator import rdivmod
from .roperator import rdivmod, rfloordiv, rmod


def fill_zeros(result, x, y, name, fill):
Expand Down Expand Up @@ -85,7 +85,7 @@ def fill_zeros(result, x, y, name, fill):
return result


def mask_zero_div_zero(x, y, result, copy=False):
def mask_zero_div_zero(x, y, result):
"""
Set results of 0 / 0 or 0 // 0 to np.nan, regardless of the dtypes
of the numerator or the denominator.
Expand All @@ -95,9 +95,6 @@ def mask_zero_div_zero(x, y, result, copy=False):
x : ndarray
y : ndarray
result : ndarray
copy : bool (default False)
Whether to always create a new array or try to fill in the existing
array if possible.
Returns
-------
Expand All @@ -113,10 +110,19 @@ def mask_zero_div_zero(x, y, result, copy=False):
>>> mask_zero_div_zero(x, y, result)
array([ inf, nan, -inf])
"""
if not isinstance(result, np.ndarray):
# FIXME: SparseArray would raise TypeError with np.putmask
return result

if is_scalar(y):
y = np.array(y)

zmask = y == 0

if isinstance(zmask, bool):
# FIXME: numpy did not evaluate pointwise, seen in docs build
return result

if zmask.any():
shape = result.shape

Expand All @@ -125,12 +131,13 @@ def mask_zero_div_zero(x, y, result, copy=False):
zpos_mask = zmask & ~zneg_mask

nan_mask = (zmask & (x == 0)).ravel()
neginf_mask = ((zpos_mask & (x < 0)) | (zneg_mask & (x > 0))).ravel()
posinf_mask = ((zpos_mask & (x > 0)) | (zneg_mask & (x < 0))).ravel()
with np.errstate(invalid="ignore"):
neginf_mask = ((zpos_mask & (x < 0)) | (zneg_mask & (x > 0))).ravel()
posinf_mask = ((zpos_mask & (x > 0)) | (zneg_mask & (x < 0))).ravel()

if nan_mask.any() or neginf_mask.any() or posinf_mask.any():
# Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN
result = result.astype("float64", copy=copy).ravel()
result = result.astype("float64", copy=False).ravel()

np.putmask(result, nan_mask, np.nan)
np.putmask(result, posinf_mask, np.inf)
Expand All @@ -157,36 +164,45 @@ def dispatch_missing(op, left, right, result):
-------
result : ndarray
"""
opstr = "__{opname}__".format(opname=op.__name__).replace("____", "__")
if op is operator.floordiv:
# Note: no need to do this for truediv; in py3 numpy behaves the way
# we want.
result = mask_zero_div_zero(left, right, result)
elif op is operator.mod:
result = fill_zeros(result, left, right, opstr, np.nan)
result = fill_zeros(result, left, right, "__mod__", np.nan)
elif op is divmod:
res0 = mask_zero_div_zero(left, right, result[0])
res1 = fill_zeros(result[1], left, right, opstr, np.nan)
res1 = fill_zeros(result[1], left, right, "__divmod__", np.nan)
result = (res0, res1)
return result


# FIXME: de-duplicate with dispatch_missing
def dispatch_fill_zeros(op, left, right, result, fill_value):
def dispatch_fill_zeros(op, left, right, result):
"""
Call fill_zeros with the appropriate fill value depending on the operation,
with special logic for divmod and rdivmod.
"""
if op is divmod:
result = (
fill_zeros(result[0], left, right, "__floordiv__", np.inf),
mask_zero_div_zero(left, right, result[0]),
fill_zeros(result[1], left, right, "__mod__", np.nan),
)
elif op is rdivmod:
result = (
fill_zeros(result[0], left, right, "__rfloordiv__", np.inf),
mask_zero_div_zero(right, left, result[0]),
fill_zeros(result[1], left, right, "__rmod__", np.nan),
)
else:
result = fill_zeros(result, left, right, op.__name__, fill_value)
elif op is operator.floordiv:
# Note: no need to do this for truediv; in py3 numpy behaves the way
# we want.
result = mask_zero_div_zero(left, right, result)
elif op is op is rfloordiv:
# Note: no need to do this for rtruediv; in py3 numpy behaves the way
# we want.
result = mask_zero_div_zero(right, left, result)
elif op is operator.mod:
result = fill_zeros(result, left, right, "__mod__", np.nan)
elif op is rmod:
result = fill_zeros(result, left, right, "__rmod__", np.nan)
return result
28 changes: 14 additions & 14 deletions pandas/tests/arithmetic/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,12 @@ def test_ser_divmod_zero(self, dtype1, any_real_dtype):
left = pd.Series([1, 1]).astype(dtype1)
right = pd.Series([0, 2]).astype(dtype2)

# GH#27321 pandas convention is to set 1 // 0 to np.inf, as opposed
# to numpy which sets to np.nan; patch `expected[0]` below
expected = left // right, left % right
expected = list(expected)
expected[0] = expected[0].astype(np.float64)
expected[0][0] = np.inf
result = divmod(left, right)

tm.assert_series_equal(result[0], expected[0])
Expand Down Expand Up @@ -881,17 +886,16 @@ def check(series, other):

_check_op(series, other, operator.pow, pos_only=True)

_check_op(series, other, lambda x, y: operator.add(y, x))
_check_op(series, other, lambda x, y: operator.sub(y, x))
_check_op(series, other, lambda x, y: operator.truediv(y, x))
_check_op(series, other, lambda x, y: operator.floordiv(y, x))
_check_op(series, other, lambda x, y: operator.mul(y, x))
_check_op(series, other, lambda x, y: operator.pow(y, x), pos_only=True)
_check_op(series, other, lambda x, y: operator.mod(y, x))
_check_op(series, other, ops.radd)
_check_op(series, other, ops.rsub)
_check_op(series, other, ops.rtruediv)
_check_op(series, other, ops.rfloordiv)
_check_op(series, other, ops.rmul)
_check_op(series, other, ops.rpow, pos_only=True)
_check_op(series, other, ops.rmod)

tser = tm.makeTimeSeries().rename("ts")
check(tser, tser * 2)
check(tser, tser * 0)
check(tser, tser[::2])
check(tser, 5)

Expand Down Expand Up @@ -931,13 +935,9 @@ def check(series, other):

tser = tm.makeTimeSeries().rename("ts")
check(tser, tser * 2)
check(tser, tser * 0)
check(tser, tser[::2])
check(tser, 5)

@pytest.mark.xfail(
reason="Series division does not yet fill 1/0 consistently; Index does."
)
def test_series_divmod_zero(self):
# Check that divmod uses pandas convention for division by zero,
# which does not match numpy.
Expand All @@ -950,8 +950,8 @@ def test_series_divmod_zero(self):
other = tser * 0

result = divmod(tser, other)
exp1 = pd.Series([np.inf] * len(tser), index=tser.index)
exp2 = pd.Series([np.nan] * len(tser), index=tser.index)
exp1 = pd.Series([np.inf] * len(tser), index=tser.index, name="ts")
exp2 = pd.Series([np.nan] * len(tser), index=tser.index, name="ts")
tm.assert_series_equal(result[0], exp1)
tm.assert_series_equal(result[1], exp2)

Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/arrays/sparse/test_arithmetics.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ def _check_numeric_ops(self, a, b, a_dense, b_dense, mix, op):
else:
expected = op(a_dense, b_dense)

if op in [operator.floordiv, ops.rfloordiv]:
# Series sets 1//0 to np.inf, which SparseArray does not do (yet)
mask = np.isinf(expected)
if mask.any():
expected[mask] = np.nan

self._assert(result, expected)

def _check_bool_result(self, res):
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/arrays/test_integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,10 @@ def _check_op_float(self, result, expected, mask, s, op_name, other):
# check comparisons that are resulting in float dtypes

expected[mask] = np.nan
if "floordiv" in op_name:
# Series op sets 1//0 to np.inf, which IntegerArray does not do (yet)

This comment has been minimized.

Copy link
@jorisvandenbossche

jorisvandenbossche Jul 13, 2019

Member

is there an open issue for this?

This comment has been minimized.

Copy link
@TomAugspurger

TomAugspurger Jul 15, 2019

Contributor
mask2 = np.isinf(expected) & np.isnan(result)
expected[mask2] = np.nan
tm.assert_series_equal(result, expected)

def _check_op_integer(self, result, expected, mask, s, op_name, other):
Expand Down
16 changes: 13 additions & 3 deletions pandas/tests/sparse/frame/test_frame.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import operator
from types import LambdaType

import numpy as np
from numpy import nan
Expand All @@ -9,6 +10,7 @@

import pandas as pd
from pandas import DataFrame, Series, bdate_range, compat
from pandas.core import ops
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.sparse import frame as spf
from pandas.core.sparse.api import (
Expand Down Expand Up @@ -424,6 +426,13 @@ def _compare_to_dense(a, b, da, db, op):
sparse_result = op(a, b)
dense_result = op(da, db)

# catch lambdas but not non-lambdas e.g. operator.add
if op in [operator.floordiv, ops.rfloordiv] or isinstance(op, LambdaType):
# GH#27231 Series sets 1//0 to np.inf, which SparseArray
# does not do (yet)
mask = np.isinf(dense_result) & ~np.isinf(sparse_result.to_dense())
dense_result[mask] = np.nan

fill = sparse_result.default_fill_value
dense_result = dense_result.to_sparse(fill_value=fill)
tm.assert_sp_frame_equal(sparse_result, dense_result, exact_indices=False)
Expand All @@ -436,7 +445,6 @@ def _compare_to_dense(a, b, da, db, op):
)

opnames = ["add", "sub", "mul", "truediv", "floordiv"]
ops = [getattr(operator, name) for name in opnames]

fidx = frame.index

Expand Down Expand Up @@ -466,6 +474,7 @@ def _compare_to_dense(a, b, da, db, op):
f = lambda a, b: getattr(a, op)(b, axis="index")
_compare_to_dense(frame, s, frame.to_dense(), s.to_dense(), f)

# FIXME: dont leave commented-out
# rops are not implemented
# _compare_to_dense(s, frame, s.to_dense(),
# frame.to_dense(), f)
Expand All @@ -479,13 +488,14 @@ def _compare_to_dense(a, b, da, db, op):
frame.xs(fidx[5])[:2],
]

for op in ops:
for name in opnames:
op = getattr(operator, name)
for s in series:
_compare_to_dense(frame, s, frame.to_dense(), s, op)
_compare_to_dense(s, frame, s, frame.to_dense(), op)

# it works!
result = frame + frame.loc[:, ["A", "B"]] # noqa
frame + frame.loc[:, ["A", "B"]]

def test_op_corners(self, float_frame, empty_frame):
empty = empty_frame + empty_frame
Expand Down
15 changes: 10 additions & 5 deletions pandas/tests/sparse/series/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import pandas as pd
from pandas import DataFrame, Series, SparseDtype, SparseSeries, bdate_range, isna
from pandas.core import ops
from pandas.core.reshape.util import cartesian_product
import pandas.core.sparse.frame as spf
from pandas.tests.series.test_api import SharedWithSparse
Expand Down Expand Up @@ -563,6 +564,10 @@ def _check_op(a, b, op):
adense = a.to_dense() if isinstance(a, SparseSeries) else a
bdense = b.to_dense() if isinstance(b, SparseSeries) else b
dense_result = op(adense, bdense)
if "floordiv" in op.__name__:
# Series sets 1//0 to np.inf, which SparseSeries does not do (yet)
mask = np.isinf(dense_result)
dense_result[mask] = np.nan
tm.assert_almost_equal(sp_result.to_dense(), dense_result)

def check(a, b):
Expand All @@ -572,11 +577,11 @@ def check(a, b):
_check_op(a, b, operator.floordiv)
_check_op(a, b, operator.mul)

_check_op(a, b, lambda x, y: operator.add(y, x))
_check_op(a, b, lambda x, y: operator.sub(y, x))
_check_op(a, b, lambda x, y: operator.truediv(y, x))
_check_op(a, b, lambda x, y: operator.floordiv(y, x))
_check_op(a, b, lambda x, y: operator.mul(y, x))
_check_op(a, b, ops.radd)
_check_op(a, b, ops.rsub)
_check_op(a, b, ops.rtruediv)
_check_op(a, b, ops.rfloordiv)
_check_op(a, b, ops.rmul)

# FIXME: don't leave commented-out
# NaN ** 0 = 1 in C?
Expand Down

0 comments on commit 3885575

Please sign in to comment.