Skip to content

Commit

Permalink
BUG: Fix #9144 #8445 Fix how core.common._fill_zeros handles div and …
Browse files Browse the repository at this point in the history
…mod by zero
  • Loading branch information
Garrett-R committed Feb 16, 2015
1 parent 76195fb commit 85342ee
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 34 deletions.
31 changes: 31 additions & 0 deletions doc/source/whatsnew/v0.16.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,37 @@ methods (:issue:`9088`).
dtype: int64


- During division involving a ``Series`` or ``DataFrame``, ``0/0`` and ``0//0`` now give ``np.nan`` instead of ``np.inf``. (:issue:`9144`, :issue:`8445`)

Previous Behavior

.. code-block:: python

In [2]: p = pd.Series([0, 1])

In [3]: p / 0
Out[3]:
0 inf
1 inf
dtype: float64

In [4]: p // 0
Out[4]:
0 inf
1 inf
dtype: float64



New Behavior

.. ipython:: python

p = pd.Series([0, 1])
p / 0
p // 0



Deprecations
~~~~~~~~~~~~
Expand Down
46 changes: 25 additions & 21 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1395,36 +1395,40 @@ def _fill_zeros(result, x, y, name, fill):
mask the nan's from x
"""

if fill is not None:
if fill is None or is_float_dtype(result):
return result

if name.startswith(('r', '__r')):
x,y = y,x

if name.startswith('r'):
x,y = y,x
if np.isscalar(y):
y = np.array(y)

if is_integer_dtype(y):

if not isinstance(y, np.ndarray):
dtype, value = _infer_dtype_from_scalar(y)
y = np.empty(result.shape, dtype=dtype)
y.fill(value)
if (y == 0).any():

if is_integer_dtype(y):
# GH 7325, mask and nans must be broadcastable (also: PR 9308)
# Raveling and then reshaping makes np.putmask faster
mask = ((y == 0) & ~np.isnan(result)).ravel()

if (y.ravel() == 0).any():
shape = result.shape
result = result.ravel().astype('float64')
shape = result.shape
result = result.astype('float64', copy=False).ravel()

# GH 7325, mask and nans must be broadcastable
signs = np.sign(result)
mask = ((y == 0) & ~np.isnan(x)).ravel()
np.putmask(result, mask, fill)

np.putmask(result, mask, fill)
# if we have a fill of inf, then sign it correctly
# (GH 6178 and PR 9308)
if np.isinf(fill):
signs = np.sign(y if name.startswith(('r', '__r')) else x)
negative_inf_mask = (signs.ravel() < 0) & mask
np.putmask(result, negative_inf_mask, -fill)

# if we have a fill of inf, then sign it
# correctly
# GH 6178
if np.isinf(fill):
np.putmask(result,(signs<0) & mask, -fill)
if "floordiv" in name: # (PR 9308)
nan_mask = ((y == 0) & (x == 0)).ravel()
np.putmask(result, nan_mask, np.nan)

result = result.reshape(shape)
result = result.reshape(shape)

return result

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ def names(x):
rpow=arith_method(lambda x, y: y ** x, names('rpow'), op('**'),
default_axis=default_axis, reversed=True),
rmod=arith_method(lambda x, y: y % x, names('rmod'), op('%'),
default_axis=default_axis, reversed=True),
default_axis=default_axis, fill_zeros=np.nan,
reversed=True),
)
new_methods['div'] = new_methods['truediv']
new_methods['rdiv'] = new_methods['rtruediv']
Expand Down
19 changes: 11 additions & 8 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
)
from pandas import compat

from numpy import random, nan
from numpy import random, nan, inf
from numpy.random import randn
import numpy as np
import numpy.ma as ma
Expand Down Expand Up @@ -5138,23 +5138,26 @@ def test_modulo(self):

def test_div(self):

# integer div, but deal with the 0's
# integer div, but deal with the 0's (GH 9144)
p = DataFrame({ 'first' : [3,4,5,8], 'second' : [0,0,0,3] })
result = p / p

### this is technically wrong as the integer portion is coerced to float ###
expected = DataFrame({ 'first' : Series([1,1,1,1],dtype='float64'), 'second' : Series([np.inf,np.inf,np.inf,1]) })
expected = DataFrame({'first': Series([1.0, 1.0, 1.0, 1.0]),
'second': Series([nan, nan, nan, 1])})
assert_frame_equal(result,expected)

result2 = DataFrame(p.values.astype('float64')/p.values,index=p.index,columns=p.columns).fillna(np.inf)
result2 = DataFrame(p.values.astype('float') / p.values, index=p.index,
columns=p.columns)
assert_frame_equal(result2,expected)

result = p / 0
expected = DataFrame(np.inf,index=p.index,columns=p.columns)
expected = DataFrame(inf, index=p.index, columns=p.columns)
expected.iloc[0:3, 1] = nan
assert_frame_equal(result,expected)

# numpy has a slightly different (wrong) treatement
result2 = DataFrame(p.values.astype('float64')/0,index=p.index,columns=p.columns).fillna(np.inf)
result2 = DataFrame(p.values.astype('float64') / 0, index=p.index,
columns=p.columns)
assert_frame_equal(result2,expected)

p = DataFrame(np.random.randn(10, 5))
Expand Down Expand Up @@ -5604,7 +5607,7 @@ def test_arith_flex_series(self):

# broadcasting issue in GH7325
df = DataFrame(np.arange(3*2).reshape((3,2)),dtype='int64')
expected = DataFrame([[np.inf,np.inf],[1.0,1.5],[1.0,1.25]])
expected = DataFrame([[nan, inf], [1.0, 1.5], [1.0, 1.25]])
result = df.div(df[0],axis='index')
assert_frame_equal(result,expected)

Expand Down
34 changes: 30 additions & 4 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

import nose

from numpy import nan
from numpy import nan, inf
import numpy as np
import numpy.ma as ma
import pandas as pd
Expand Down Expand Up @@ -2689,6 +2689,17 @@ def test_modulo(self):
result2 = p['second'] % p['first']
self.assertFalse(np.array_equal(result, result2))

# GH 9144
s = Series([0, 1])

result = s % 0
expected = Series([nan, nan])
assert_series_equal(result, expected)

result = 0 % s
expected = Series([nan, 0.0])
assert_series_equal(result, expected)

def test_div(self):

# no longer do integer div for any ops, but deal with the 0's
Expand Down Expand Up @@ -2730,6 +2741,21 @@ def test_div(self):
result = p['second'] / p['first']
assert_series_equal(result, expected)

# GH 9144
s = Series([-1, 0, 1])

result = 0 / s
expected = Series([0.0, nan, 0.0])
assert_series_equal(result, expected)

result = s / 0
expected = Series([-inf, nan, inf])
assert_series_equal(result, expected)

result = s // 0
expected = Series([-inf, nan, inf])
assert_series_equal(result, expected)

def test_operators(self):

def _check_op(series, other, op, pos_only=False):
Expand Down Expand Up @@ -6414,17 +6440,17 @@ def test_pct_change_shift_over_nas(self):
def test_autocorr(self):
# Just run the function
corr1 = self.ts.autocorr()

# Now run it with the lag parameter
corr2 = self.ts.autocorr(lag=1)

# corr() with lag needs Series of at least length 2
if len(self.ts) <= 2:
self.assertTrue(np.isnan(corr1))
self.assertTrue(np.isnan(corr2))
else:
self.assertEqual(corr1, corr2)

# Choose a random lag between 1 and length of Series - 2
# and compare the result with the Series corr() function
n = 1 + np.random.randint(max(1, len(self.ts) - 2))
Expand Down
45 changes: 45 additions & 0 deletions vb_suite/binary_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,51 @@
Benchmark("df * df2", setup, name='frame_mult_no_ne',cleanup="expr.set_use_numexpr(True)",
start_date=datetime(2013, 2, 26))

#----------------------------------------------------------------------
# division

setup = common_setup + """
df = DataFrame(np.random.randn(1000, 1000))
"""
frame_float_div_by_zero = \
Benchmark("df / 0", setup, name='frame_float_div_by_zero')

setup = common_setup + """
df = DataFrame(np.random.randn(1000, 1000))
"""
frame_float_floor_by_zero = \
Benchmark("df // 0", setup, name='frame_float_floor_by_zero')

setup = common_setup + """
df = DataFrame(np.random.random_integers((1000, 1000)))
"""
frame_int_div_by_zero = \
Benchmark("df / 0", setup, name='frame_int_div_by_zero')

setup = common_setup + """
df = DataFrame(np.random.randn(1000, 1000))
df2 = DataFrame(np.random.randn(1000, 1000))
"""
frame_float_div = \
Benchmark("df // df2", setup, name='frame_float_div')

#----------------------------------------------------------------------
# modulo

setup = common_setup + """
df = DataFrame(np.random.randn(1000, 1000))
df2 = DataFrame(np.random.randn(1000, 1000))
"""
frame_float_mod = \
Benchmark("df / df2", setup, name='frame_float_mod')

setup = common_setup + """
df = DataFrame(np.random.random_integers((1000, 1000)))
df2 = DataFrame(np.random.random_integers((1000, 1000)))
"""
frame_int_mod = \
Benchmark("df / df2", setup, name='frame_int_mod')

#----------------------------------------------------------------------
# multi and

Expand Down

0 comments on commit 85342ee

Please sign in to comment.