Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Consistent division by zero behavior for Index/Series #27321

Merged
merged 29 commits into from
Jul 11, 2019
Merged
Show file tree
Hide file tree
Changes from 27 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
6bd4fb1
TST: parametrize tests
jbrockmendel Jul 7, 2019
d9943be
for loop, preparing to parametrize
jbrockmendel Jul 7, 2019
eb8b6d7
parametrize/fixturize mix
jbrockmendel Jul 7, 2019
3edebac
fixture for op
jbrockmendel Jul 7, 2019
eb60fc3
blackify
jbrockmendel Jul 7, 2019
3cf8b1e
parametrize more
jbrockmendel Jul 7, 2019
c34a9cc
param scalar
jbrockmendel Jul 7, 2019
158b003
Merge branch 'master' of https://github.com/pandas-dev/pandas into sp…
jbrockmendel Jul 7, 2019
b59b81a
docstring, xfail
jbrockmendel Jul 7, 2019
f085d6b
remove defunct comment
jbrockmendel Jul 7, 2019
339ed84
Merge branch 'master' of https://github.com/pandas-dev/pandas into sp…
jbrockmendel Jul 8, 2019
f03bd93
implement all_arithmetic_functions fixture
jbrockmendel Jul 8, 2019
cd2f564
isort fixup
jbrockmendel Jul 9, 2019
e362a08
check early for non-scalar default_fill_value
jbrockmendel Jul 9, 2019
7c6e127
try to bring dispatch_fill_zeros, dispatch_mnissing in sync, remove u…
jbrockmendel Jul 8, 2019
97611c3
standardize calls
jbrockmendel Jul 8, 2019
c30b40c
use pandas convention for divmod
jbrockmendel Jul 9, 2019
1ed12b8
remove comment
jbrockmendel Jul 9, 2019
45ffbc2
Merge branch 'master' of https://github.com/pandas-dev/pandas into di…
jbrockmendel Jul 10, 2019
591eaaa
patch sparse and IntegerArray tests
jbrockmendel Jul 10, 2019
2841be0
Merge branch 'master' of https://github.com/pandas-dev/pandas into di…
jbrockmendel Jul 10, 2019
518e2e9
Merge branch 'master' of https://github.com/pandas-dev/pandas into di…
jbrockmendel Jul 10, 2019
2580048
fixup for docs
jbrockmendel Jul 10, 2019
f440c59
Merge branch 'master' of https://github.com/pandas-dev/pandas into di…
jbrockmendel Jul 10, 2019
188ac99
Merge branch 'master' of https://github.com/pandas-dev/pandas into di…
jbrockmendel Jul 11, 2019
6b411c4
requested comments
jbrockmendel Jul 11, 2019
77c2383
whatsnew
jbrockmendel Jul 11, 2019
7b8880c
Merge branch 'master' of https://github.com/pandas-dev/pandas into di…
jbrockmendel Jul 11, 2019
14010c8
dummy commit to force ci
jbrockmendel Jul 11, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1009,6 +1009,7 @@ Numeric
- Bug in :meth:`~pandas.eval` when comparing floats with scalar operators, for example: ``x < -0.1`` (:issue:`25928`)
- Fixed bug where casting all-boolean array to integer extension array failed (:issue:`25211`)
- Bug in ``divmod`` with a :class:`Series` object containing zeros incorrectly raising ``AttributeError`` (:issue:`26987`)
- Inconsistency in :class:`Series` floor-division (`//`) and ``divmod`` filling positive//zero with ``NaN`` instead of ``Inf`` (:issue:`27321`)
-

Conversion
Expand Down
32 changes: 2 additions & 30 deletions pandas/core/ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,32 +234,6 @@ def _gen_eval_kwargs(name):
return kwargs


def _gen_fill_zeros(name):
"""
Find the appropriate fill value to use when filling in undefined values
in the results of the given operation caused by operating on
(generally dividing by) zero.

Parameters
----------
name : str

Returns
-------
fill_value : {None, np.nan, np.inf}
"""
name = name.strip("__")
if "div" in name:
# truediv, floordiv, and reversed variants
fill_value = np.inf
elif "mod" in name:
# mod, rmod
fill_value = np.nan
else:
fill_value = None
return fill_value


def _get_frame_op_default_axis(name):
"""
Only DataFrame cares about default_axis, specifically:
Expand Down Expand Up @@ -1632,7 +1606,6 @@ def _arith_method_SERIES(cls, op, special):
str_rep = _get_opstr(op, cls)
op_name = _get_op_name(op, special)
eval_kwargs = _gen_eval_kwargs(op_name)
fill_zeros = _gen_fill_zeros(op_name)
construct_result = (
_construct_divmod_result if op in [divmod, rdivmod] else _construct_result
)
Expand Down Expand Up @@ -1663,7 +1636,7 @@ def na_op(x, y):
except TypeError:
result = masked_arith_op(x, y, op)

return missing.dispatch_fill_zeros(op, x, y, result, fill_zeros)
return missing.dispatch_fill_zeros(op, x, y, result)

def wrapper(left, right):
if isinstance(right, ABCDataFrame):
Expand Down Expand Up @@ -2154,7 +2127,6 @@ def _arith_method_FRAME(cls, op, special):
str_rep = _get_opstr(op, cls)
op_name = _get_op_name(op, special)
eval_kwargs = _gen_eval_kwargs(op_name)
fill_zeros = _gen_fill_zeros(op_name)
default_axis = _get_frame_op_default_axis(op_name)

def na_op(x, y):
Expand All @@ -2165,7 +2137,7 @@ def na_op(x, y):
except TypeError:
result = masked_arith_op(x, y, op)

return missing.dispatch_fill_zeros(op, x, y, result, fill_zeros)
return missing.dispatch_fill_zeros(op, x, y, result)

if op_name in _op_descriptions:
# i.e. include "add" but not "__add__"
Expand Down
48 changes: 32 additions & 16 deletions pandas/core/ops/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype, is_scalar

from .roperator import rdivmod
from .roperator import rdivmod, rfloordiv, rmod


def fill_zeros(result, x, y, name, fill):
Expand Down Expand Up @@ -85,7 +85,7 @@ def fill_zeros(result, x, y, name, fill):
return result


def mask_zero_div_zero(x, y, result, copy=False):
def mask_zero_div_zero(x, y, result):
"""
Set results of 0 / 0 or 0 // 0 to np.nan, regardless of the dtypes
of the numerator or the denominator.
Expand All @@ -95,9 +95,6 @@ def mask_zero_div_zero(x, y, result, copy=False):
x : ndarray
y : ndarray
result : ndarray
copy : bool (default False)
Whether to always create a new array or try to fill in the existing
array if possible.

Returns
-------
Expand All @@ -113,10 +110,19 @@ def mask_zero_div_zero(x, y, result, copy=False):
>>> mask_zero_div_zero(x, y, result)
array([ inf, nan, -inf])
"""
if not isinstance(result, np.ndarray):
# FIXME: SparseArray would raise TypeError with np.putmask
return result

if is_scalar(y):
y = np.array(y)

zmask = y == 0

if isinstance(zmask, bool):
# FIXME: numpy did not evaluate pointwise, seen in docs build
return result

if zmask.any():
shape = result.shape

Expand All @@ -125,12 +131,13 @@ def mask_zero_div_zero(x, y, result, copy=False):
zpos_mask = zmask & ~zneg_mask

nan_mask = (zmask & (x == 0)).ravel()
neginf_mask = ((zpos_mask & (x < 0)) | (zneg_mask & (x > 0))).ravel()
posinf_mask = ((zpos_mask & (x > 0)) | (zneg_mask & (x < 0))).ravel()
with np.errstate(invalid="ignore"):
neginf_mask = ((zpos_mask & (x < 0)) | (zneg_mask & (x > 0))).ravel()
posinf_mask = ((zpos_mask & (x > 0)) | (zneg_mask & (x < 0))).ravel()

if nan_mask.any() or neginf_mask.any() or posinf_mask.any():
# Fill negative/0 with -inf, positive/0 with +inf, 0/0 with NaN
result = result.astype("float64", copy=copy).ravel()
result = result.astype("float64", copy=False).ravel()

np.putmask(result, nan_mask, np.nan)
np.putmask(result, posinf_mask, np.inf)
Expand All @@ -157,36 +164,45 @@ def dispatch_missing(op, left, right, result):
-------
result : ndarray
"""
opstr = "__{opname}__".format(opname=op.__name__).replace("____", "__")
if op is operator.floordiv:
# Note: no need to do this for truediv; in py3 numpy behaves the way
# we want.
result = mask_zero_div_zero(left, right, result)
elif op is operator.mod:
result = fill_zeros(result, left, right, opstr, np.nan)
result = fill_zeros(result, left, right, "__mod__", np.nan)
elif op is divmod:
res0 = mask_zero_div_zero(left, right, result[0])
res1 = fill_zeros(result[1], left, right, opstr, np.nan)
res1 = fill_zeros(result[1], left, right, "__divmod__", np.nan)
result = (res0, res1)
return result


# FIXME: de-duplicate with dispatch_missing
def dispatch_fill_zeros(op, left, right, result, fill_value):
def dispatch_fill_zeros(op, left, right, result):
"""
Call fill_zeros with the appropriate fill value depending on the operation,
with special logic for divmod and rdivmod.
"""
if op is divmod:
result = (
fill_zeros(result[0], left, right, "__floordiv__", np.inf),
mask_zero_div_zero(left, right, result[0]),
fill_zeros(result[1], left, right, "__mod__", np.nan),
)
elif op is rdivmod:
result = (
fill_zeros(result[0], left, right, "__rfloordiv__", np.inf),
mask_zero_div_zero(right, left, result[0]),
fill_zeros(result[1], left, right, "__rmod__", np.nan),
)
else:
result = fill_zeros(result, left, right, op.__name__, fill_value)
elif op is operator.floordiv:
# Note: no need to do this for truediv; in py3 numpy behaves the way
# we want.
result = mask_zero_div_zero(left, right, result)
elif op is op is rfloordiv:
# Note: no need to do this for rtruediv; in py3 numpy behaves the way
# we want.
result = mask_zero_div_zero(right, left, result)
elif op is operator.mod:
result = fill_zeros(result, left, right, "__mod__", np.nan)
elif op is rmod:
result = fill_zeros(result, left, right, "__rmod__", np.nan)
return result
28 changes: 14 additions & 14 deletions pandas/tests/arithmetic/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,12 @@ def test_ser_divmod_zero(self, dtype1, any_real_dtype):
left = pd.Series([1, 1]).astype(dtype1)
right = pd.Series([0, 2]).astype(dtype2)

# GH#27321 pandas convention is to set 1 // 0 to np.inf, as opposed
# to numpy which sets to np.nan; patch `expected[0]` below
expected = left // right, left % right
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in future PR can you give some comments on what you ar doing here (filling np.inf on the integer divide)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure

expected = list(expected)
expected[0] = expected[0].astype(np.float64)
expected[0][0] = np.inf
result = divmod(left, right)

tm.assert_series_equal(result[0], expected[0])
Expand Down Expand Up @@ -881,17 +886,16 @@ def check(series, other):

_check_op(series, other, operator.pow, pos_only=True)

_check_op(series, other, lambda x, y: operator.add(y, x))
_check_op(series, other, lambda x, y: operator.sub(y, x))
_check_op(series, other, lambda x, y: operator.truediv(y, x))
_check_op(series, other, lambda x, y: operator.floordiv(y, x))
_check_op(series, other, lambda x, y: operator.mul(y, x))
_check_op(series, other, lambda x, y: operator.pow(y, x), pos_only=True)
_check_op(series, other, lambda x, y: operator.mod(y, x))
_check_op(series, other, ops.radd)
_check_op(series, other, ops.rsub)
_check_op(series, other, ops.rtruediv)
_check_op(series, other, ops.rfloordiv)
_check_op(series, other, ops.rmul)
_check_op(series, other, ops.rpow, pos_only=True)
_check_op(series, other, ops.rmod)

tser = tm.makeTimeSeries().rename("ts")
check(tser, tser * 2)
check(tser, tser * 0)
check(tser, tser[::2])
check(tser, 5)

Expand Down Expand Up @@ -931,13 +935,9 @@ def check(series, other):

tser = tm.makeTimeSeries().rename("ts")
check(tser, tser * 2)
check(tser, tser * 0)
check(tser, tser[::2])
check(tser, 5)

@pytest.mark.xfail(
reason="Series division does not yet fill 1/0 consistently; Index does."
)
def test_series_divmod_zero(self):
# Check that divmod uses pandas convention for division by zero,
# which does not match numpy.
Expand All @@ -950,8 +950,8 @@ def test_series_divmod_zero(self):
other = tser * 0

result = divmod(tser, other)
exp1 = pd.Series([np.inf] * len(tser), index=tser.index)
exp2 = pd.Series([np.nan] * len(tser), index=tser.index)
exp1 = pd.Series([np.inf] * len(tser), index=tser.index, name="ts")
exp2 = pd.Series([np.nan] * len(tser), index=tser.index, name="ts")
tm.assert_series_equal(result[0], exp1)
tm.assert_series_equal(result[1], exp2)

Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/arrays/sparse/test_arithmetics.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ def _check_numeric_ops(self, a, b, a_dense, b_dense, mix, op):
else:
expected = op(a_dense, b_dense)

if op in [operator.floordiv, ops.rfloordiv]:
# Series sets 1//0 to np.inf, which SparseArray does not do (yet)
mask = np.isinf(expected)
if mask.any():
expected[mask] = np.nan

self._assert(result, expected)

def _check_bool_result(self, res):
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/arrays/test_integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,10 @@ def _check_op_float(self, result, expected, mask, s, op_name, other):
# check comparisons that are resulting in float dtypes

expected[mask] = np.nan
if "floordiv" in op_name:
# Series op sets 1//0 to np.inf, which IntegerArray does not do (yet)
mask2 = np.isinf(expected) & np.isnan(result)
expected[mask2] = np.nan
tm.assert_series_equal(result, expected)

def _check_op_integer(self, result, expected, mask, s, op_name, other):
Expand Down
16 changes: 13 additions & 3 deletions pandas/tests/sparse/frame/test_frame.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import operator
from types import LambdaType

import numpy as np
from numpy import nan
Expand All @@ -9,6 +10,7 @@

import pandas as pd
from pandas import DataFrame, Series, bdate_range, compat
from pandas.core import ops
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.sparse import frame as spf
from pandas.core.sparse.api import (
Expand Down Expand Up @@ -424,6 +426,13 @@ def _compare_to_dense(a, b, da, db, op):
sparse_result = op(a, b)
dense_result = op(da, db)

# catch lambdas but not non-lambdas e.g. operator.add
if op in [operator.floordiv, ops.rfloordiv] or isinstance(op, LambdaType):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this not just callable(op)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because we don't want e.g. operator.add

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

k can you add that as a comment (future PR ok), as its not obvious

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure

# GH#27231 Series sets 1//0 to np.inf, which SparseArray
# does not do (yet)
mask = np.isinf(dense_result) & ~np.isinf(sparse_result.to_dense())
dense_result[mask] = np.nan

fill = sparse_result.default_fill_value
dense_result = dense_result.to_sparse(fill_value=fill)
tm.assert_sp_frame_equal(sparse_result, dense_result, exact_indices=False)
Expand All @@ -436,7 +445,6 @@ def _compare_to_dense(a, b, da, db, op):
)

opnames = ["add", "sub", "mul", "truediv", "floordiv"]
ops = [getattr(operator, name) for name in opnames]

fidx = frame.index

Expand Down Expand Up @@ -466,6 +474,7 @@ def _compare_to_dense(a, b, da, db, op):
f = lambda a, b: getattr(a, op)(b, axis="index")
_compare_to_dense(frame, s, frame.to_dense(), s.to_dense(), f)

# FIXME: dont leave commented-out
# rops are not implemented
# _compare_to_dense(s, frame, s.to_dense(),
# frame.to_dense(), f)
Expand All @@ -479,13 +488,14 @@ def _compare_to_dense(a, b, da, db, op):
frame.xs(fidx[5])[:2],
]

for op in ops:
for name in opnames:
op = getattr(operator, name)
for s in series:
_compare_to_dense(frame, s, frame.to_dense(), s, op)
_compare_to_dense(s, frame, s, frame.to_dense(), op)

# it works!
result = frame + frame.loc[:, ["A", "B"]] # noqa
frame + frame.loc[:, ["A", "B"]]

def test_op_corners(self, float_frame, empty_frame):
empty = empty_frame + empty_frame
Expand Down
15 changes: 10 additions & 5 deletions pandas/tests/sparse/series/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import pandas as pd
from pandas import DataFrame, Series, SparseDtype, SparseSeries, bdate_range, isna
from pandas.core import ops
from pandas.core.reshape.util import cartesian_product
import pandas.core.sparse.frame as spf
from pandas.tests.series.test_api import SharedWithSparse
Expand Down Expand Up @@ -563,6 +564,10 @@ def _check_op(a, b, op):
adense = a.to_dense() if isinstance(a, SparseSeries) else a
bdense = b.to_dense() if isinstance(b, SparseSeries) else b
dense_result = op(adense, bdense)
if "floordiv" in op.__name__:
# Series sets 1//0 to np.inf, which SparseSeries does not do (yet)
mask = np.isinf(dense_result)
dense_result[mask] = np.nan
tm.assert_almost_equal(sp_result.to_dense(), dense_result)

def check(a, b):
Expand All @@ -572,11 +577,11 @@ def check(a, b):
_check_op(a, b, operator.floordiv)
_check_op(a, b, operator.mul)

_check_op(a, b, lambda x, y: operator.add(y, x))
_check_op(a, b, lambda x, y: operator.sub(y, x))
_check_op(a, b, lambda x, y: operator.truediv(y, x))
_check_op(a, b, lambda x, y: operator.floordiv(y, x))
_check_op(a, b, lambda x, y: operator.mul(y, x))
_check_op(a, b, ops.radd)
_check_op(a, b, ops.rsub)
_check_op(a, b, ops.rtruediv)
_check_op(a, b, ops.rfloordiv)
_check_op(a, b, ops.rmul)

# FIXME: don't leave commented-out
# NaN ** 0 = 1 in C?
Expand Down