Skip to content

Commit

Permalink
fix and test index division by zero
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Jan 22, 2018
1 parent b6acf5e commit 969f342
Show file tree
Hide file tree
Showing 5 changed files with 150 additions and 18 deletions.
33 changes: 33 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4044,6 +4044,8 @@ def _evaluate_numeric_binop(self, other):
attrs = self._maybe_update_attributes(attrs)
with np.errstate(all='ignore'):
result = op(values, other)

result = dispatch_missing(op, values, other, result)
return constructor(result, **attrs)

return _evaluate_numeric_binop
Expand Down Expand Up @@ -4167,6 +4169,37 @@ def invalid_op(self, other=None):
Index._add_comparison_methods()


def dispatch_missing(op, left, right, result):
"""
Fill nulls caused by division by zero, casting to a diffferent dtype
if necessary.
Parameters
----------
op : function (operator.add, operator.div, ...)
left : object, usually Index
right : object
result : ndarray
Returns
-------
result : ndarray
"""
opstr = '__{opname}__'.format(opname=op.__name__).replace('____', '__')
if op in [operator.div, operator.truediv, operator.floordiv]:
result = missing.mask_zero_div_zero(left, right, result)
elif op is operator.mod:
result = missing.fill_zeros(result, left, right,
opstr, np.nan)
elif op is divmod:
res0 = missing.fill_zeros(result[0], left, right,
opstr, np.nan)
res1 = missing.fill_zeros(result[1], left, right,
opstr, np.nan)
result = (res0, res1)
return result


def _ensure_index_from_sequences(sequences, names=None):
"""Construct an index from sequences of data.
Expand Down
31 changes: 13 additions & 18 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,7 @@ def __getitem__(self, key):
return super_getitem(key)

def __floordiv__(self, other):
if is_integer(other):
if is_integer(other) and other != 0:
if (len(self) == 0 or
self._start % other == 0 and
self._step % other == 0):
Expand Down Expand Up @@ -592,26 +592,27 @@ def _evaluate_numeric_binop(self, other):
attrs = self._get_attributes_dict()
attrs = self._maybe_update_attributes(attrs)

left, right = self, other
if reversed:
self, other = other, self
left, right = right, left

try:
# apply if we have an override
if step:
with np.errstate(all='ignore'):
rstep = step(self._step, other)
rstep = step(left._step, right)

# we don't have a representable op
# so return a base index
if not is_integer(rstep) or not rstep:
raise ValueError

else:
rstep = self._step
rstep = left._step

with np.errstate(all='ignore'):
rstart = op(self._start, other)
rstop = op(self._stop, other)
rstart = op(left._start, right)
rstop = op(left._stop, right)

result = RangeIndex(rstart,
rstop,
Expand All @@ -627,18 +628,12 @@ def _evaluate_numeric_binop(self, other):

return result

except (ValueError, TypeError, AttributeError):
pass

# convert to Int64Index ops
if isinstance(self, RangeIndex):
self = self.values
if isinstance(other, RangeIndex):
other = other.values

with np.errstate(all='ignore'):
results = op(self, other)
return Index(results, **attrs)
except (ValueError, TypeError, AttributeError,
ZeroDivisionError):
# Defer to Int64Index implementation
if reversed:
return op(other, self._int64index)
return op(self._int64index, other)

return _evaluate_numeric_binop

Expand Down
38 changes: 38 additions & 0 deletions pandas/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,44 @@ def fill_zeros(result, x, y, name, fill):
return result


def mask_zero_div_zero(x, y, result):
"""
Set results of 0 / 0 or 0 // 0 to np.nan, regardless of the dtypes
of the numerator or the denominator
Parameters
----------
x : ndarray
y : ndarray
result : ndarray
Returns
-------
filled_result : ndarray
"""
if is_scalar(y):
y = np.array(y)

zmask = y == 0
if zmask.any():
shape = result.shape

nan_mask = (zmask & (x == 0)).ravel()
neginf_mask = (zmask & (x < 0)).ravel()
posinf_mask = (zmask & (x > 0)).ravel()

if nan_mask.any() or neginf_mask.any() or posinf_mask.any():
result = result.astype('float64', copy=False).ravel()

np.putmask(result, nan_mask, np.nan)
np.putmask(result, posinf_mask, np.inf)
np.putmask(result, neginf_mask, -np.inf)

result = result.reshape(shape)

return result


def _interp_limit(invalid, fw_limit, bw_limit):
"""Get idx of values that won't be filled b/c they exceed the limits.
Expand Down
40 changes: 40 additions & 0 deletions pandas/tests/indexes/test_numeric.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
import operator

import pytest

Expand All @@ -17,6 +18,11 @@

from pandas.tests.indexes.common import Base

# For testing division by (or of) zero for Series with length 5, this
# gives several scalar-zeros and length-5 vector-zeros
zeros = tm.gen_zeros(5)
zeros = [x for x in zeros if not isinstance(x, Series)]


def full_like(array, value):
"""Compatibility for numpy<1.8.0
Expand Down Expand Up @@ -157,6 +163,40 @@ def test_divmod_series(self):
for r, e in zip(result, expected):
tm.assert_series_equal(r, e)

@pytest.mark.parametrize('op', [operator.div, operator.truediv])
@pytest.mark.parametrize('zero', zeros)
def test_div_zero(self, zero, op):
idx = self.create_index()

expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf],
dtype=np.float64)
result = op(idx, zero)
tm.assert_index_equal(result, expected)
ser_compat = op(Series(idx).astype('i8'), np.array(zero).astype('i8'))
tm.assert_series_equal(ser_compat, Series(result))

@pytest.mark.parametrize('zero', zeros)
def test_floordiv_zero(self, zero):
idx = self.create_index()
expected = Index([np.nan, np.inf, np.inf, np.inf, np.inf],
dtype=np.float64)

result = idx // zero
tm.assert_index_equal(result, expected)
ser_compat = Series(idx).astype('i8') // np.array(zero).astype('i8')
tm.assert_series_equal(ser_compat, Series(result))

@pytest.mark.parametrize('zero', zeros)
def test_mod_zero(self, zero):
idx = self.create_index()

expected = Index([np.nan, np.nan, np.nan, np.nan, np.nan],
dtype=np.float64)
result = idx % zero
tm.assert_index_equal(result, expected)
ser_compat = Series(idx).astype('i8') % np.array(zero).astype('i8')
tm.assert_series_equal(ser_compat, Series(result))

def test_explicit_conversions(self):

# GH 8608
Expand Down
26 changes: 26 additions & 0 deletions pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1964,6 +1964,32 @@ def add_nans_panel4d(panel4d):
return panel4d


def gen_zeros(arr_len):
"""
For testing division by (or of) zero for Series or Indexes with the given
length, this gives variants of scalar zeros and vector zeros with different
dtypes.
Generate variants of scalar zeros and all-zero arrays with the given
length.
Parameters
----------
arr_len : int
Returns
-------
zeros : list
"""
zeros = [box([0] * arr_len, dtype=dtype)
for box in [pd.Series, pd.Index, np.array]
for dtype in [np.int64, np.uint64, np.float64]]
zeros.extend([np.array(0, dtype=dtype)
for dtype in [np.int64, np.uint64, np.float64]])
zeros.extend([0, 0.0, long(0)])
return zeros


class TestSubDict(dict):

def __init__(self, *args, **kwargs):
Expand Down

0 comments on commit 969f342

Please sign in to comment.