From 044a99eceb481d620e4c81a4fc1fbb7ae0c9a461 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 12 Oct 2018 15:13:30 -0500 Subject: [PATCH 01/12] BUG/PERF: Avoid listifying in dispatch_to_extension_op This simplifies dispatch_to_extension_op. The remaining logic is simply unboxing Series / Indexes in favor of their underlying arrays. This forced two additional changes 1. Move some logic that IntegerArray relied on down to the IntegerArray ops. Things like handling of 0-dim ndarrays was previously broken on IntegerArray ops, but work with Serires[IntegerArray] 2. Fix pandas handling of 1 ** NA. --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/arrays/integer.py | 29 +++++++++---- pandas/core/ops.py | 33 ++++++-------- pandas/tests/arrays/test_integer.py | 60 +++++++++++++++++++++++++- pandas/tests/extension/test_integer.py | 1 - 5 files changed, 93 insertions(+), 31 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 5811a8c4c45ff..4a438f0cd8862 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -845,6 +845,7 @@ Numeric - Bug in :meth:`DataFrame.apply` where, when supplied with a string argument and additional positional or keyword arguments (e.g. ``df.apply('sum', min_count=1)``), a ``TypeError`` was wrongly raised (:issue:`22376`) - Bug in :meth:`DataFrame.astype` to extension dtype may raise ``AttributeError`` (:issue:`22578`) - Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype arithmetic operations with ``ndarray`` with integer dtype incorrectly treating the narray as ``timedelta64[ns]`` dtype (:issue:`23114`) +- Bug in :meth:`Series.rpow` with object dtype ``NaN`` for ``1 ** NA`` instead of ``1`` (:issue:`22922`). Strings ^^^^^^^ diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 9917045f2f7d2..81a0da9f1e8fd 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -280,6 +280,8 @@ def _coerce_to_ndarray(self): data[self._mask] = self._na_value return data + __array_priority__ = 1 # higher than ndarray so ops dispatch to us + def __array__(self, dtype=None): """ the array interface, return my values @@ -288,12 +290,6 @@ def __array__(self, dtype=None): return self._coerce_to_ndarray() def __iter__(self): - """Iterate over elements of the array. - - """ - # This needs to be implemented so that pandas recognizes extension - # arrays as list-like. The default implementation makes successive - # calls to ``__getitem__``, which may be slower than necessary. for i in range(len(self)): if self._mask[i]: yield self.dtype.na_value @@ -504,8 +500,13 @@ def cmp_method(self, other): op_name = op.__name__ mask = None + if isinstance(other, IntegerArray): other, mask = other._data, other._mask + + elif getattr(other, 'ndim', None) == 0: + other = other.item() + elif is_list_like(other): other = np.asarray(other) if other.ndim > 0 and len(self) != len(other): @@ -586,14 +587,20 @@ def integer_arithmetic_method(self, other): op_name = op.__name__ mask = None + + if getattr(other, 'ndim', 0) > 1: + raise NotImplementedError( + "can only perform ops with 1-d structures") + if isinstance(other, (ABCSeries, ABCIndexClass)): other = getattr(other, 'values', other) if isinstance(other, IntegerArray): other, mask = other._data, other._mask - elif getattr(other, 'ndim', 0) > 1: - raise NotImplementedError( - "can only perform ops with 1-d structures") + + elif getattr(other, 'ndim', None) == 0: + other = other.item() + elif is_list_like(other): other = np.asarray(other) if not other.ndim: @@ -612,6 +619,10 @@ def integer_arithmetic_method(self, other): else: mask = self._mask | mask + if op_name == 'rpow': + # 1 ** np.nan is 1. So we have to unmask those. + mask = np.where(other == 1, False, mask) + with np.errstate(all='ignore'): result = op(self._data, other) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 640b2812d3e85..60c84973b2e11 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -862,10 +862,16 @@ def masked_arith_op(x, y, op): # mask is only meaningful for x result = np.empty(x.size, dtype=x.dtype) mask = notna(xrav) + if mask.any(): with np.errstate(all='ignore'): result[mask] = op(xrav[mask], y) + if op == pow: + result = np.where(~mask, x, result) + elif op == rpow: + result = np.where(~mask, y, result) + result, changed = maybe_upcast_putmask(result, ~mask, np.nan) result = result.reshape(x.shape) # 2D compat return result @@ -1202,29 +1208,16 @@ def dispatch_to_extension_op(op, left, right): # The op calls will raise TypeError if the op is not defined # on the ExtensionArray - # TODO(jreback) - # we need to listify to avoid ndarray, or non-same-type extension array - # dispatching - - if is_extension_array_dtype(left): - - new_left = left.values - if isinstance(right, np.ndarray): - - # handle numpy scalars, this is a PITA - # TODO(jreback) - new_right = lib.item_from_zerodim(right) - if is_scalar(new_right): - new_right = [new_right] - new_right = list(new_right) - elif is_extension_array_dtype(right) and type(left) != type(right): - new_right = list(right) - else: - new_right = right + # unbox Series and Index to arrays + if isinstance(left, (ABCSeries, ABCIndexClass)): + new_left = left._values else: + new_left = left - new_left = list(left.values) + if isinstance(right, (ABCSeries, ABCIndexClass)): + new_right = right._values + else: new_right = right res_values = op(new_left, new_right) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 23ee8d217bd59..293a9da0e8a86 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -218,25 +218,68 @@ def test_arith_integer_array(self, data, all_arithmetic_operators): def test_arith_series_with_scalar(self, data, all_arithmetic_operators): # scalar op = all_arithmetic_operators + if op == '__rpow__': + raise pytest.skip("__rpow__ tested separately.") s = pd.Series(data) self._check_op(s, op, 1, exc=TypeError) + def test_arith_series_with_scalar_rpow(self, data): + s = pd.Series(data) + # 1^x is 1.0 for all x, so test separately + result = 1 ** s + expected = pd.Series(1, index=s.index, dtype=data.dtype.numpy_dtype) + assert result.dtype == data.dtype + result = result.astype(data.dtype.numpy_dtype) + self.assert_series_equal(result, expected) + + # test other bases regularly + self._check_op(s, '__rpow__', 2, exc=None) + def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): # frame & scalar op = all_arithmetic_operators + if op == '__rpow__': + raise pytest.skip("__rpow__ tested separately.") + df = pd.DataFrame({'A': data}) self._check_op(df, op, 1, exc=TypeError) + def test_arith_frame_with_scalar_rpow(self, data): + df = pd.DataFrame({"A": data}) + result = 1.0 ** df + expected = pd.DataFrame(1.0, index=df.index, columns=df.columns) + self.assert_frame_equal(result, expected) + + # test other bases regularly + self._check_op(df, '__rpow__', 2, exc=TypeError) + def test_arith_series_with_array(self, data, all_arithmetic_operators): # ndarray & other series op = all_arithmetic_operators + if op == '__rpow__': + raise pytest.skip("__rpow__ tested separately.") + s = pd.Series(data) other = np.ones(len(s), dtype=s.dtype.type) self._check_op(s, op, other, exc=TypeError) + def test_arith_series_with_array_rpow(self, data): + s = pd.Series(data) + other = np.ones(len(s), dtype=data.dtype.numpy_dtype) + expected = pd.Series(1, index=s.index, dtype=data.dtype.numpy_dtype) + result = other ** s + + assert result.dtype == data.dtype + result = result.astype(data.dtype.numpy_dtype) + + self.assert_series_equal(result, expected) + + other = 2 * np.ones(len(s), dtype=s.dtype.type) + self._check_op(s, '__rpow__', other, exc=TypeError) + def test_arith_coerce_scalar(self, data, all_arithmetic_operators): op = all_arithmetic_operators @@ -248,13 +291,20 @@ def test_arith_coerce_scalar(self, data, all_arithmetic_operators): @pytest.mark.parametrize("other", [1., 1.0, np.array(1.), np.array([1.])]) def test_arithmetic_conversion(self, all_arithmetic_operators, other): # if we have a float operand we should have a float result - # if if that is equal to an integer + # if that is equal to an integer op = self.get_op_from_name(all_arithmetic_operators) s = pd.Series([1, 2, 3], dtype='Int64') result = op(s, other) assert result.dtype is np.dtype('float') + @pytest.mark.parametrize("other", [0, 0.5]) + def test_arith_zero_dim_ndarray(self, other): + arr = integer_array([1, None, 2]) + result = arr + np.array(other) + expected = arr + other + tm.assert_equal(result, expected) + def test_error(self, data, all_arithmetic_operators): # invalid ops @@ -323,6 +373,14 @@ def test_compare_array(self, data, all_compare_operators): other = pd.Series([0] * len(data)) self._compare_other(s, data, op_name, other) + def test_rpow_one_to_na(self): + # https://github.com/pandas-dev/pandas/issues/22022 + # NumPy says 1 ** nan is 1. + arr = integer_array([np.nan, np.nan]) + result = np.array([1.0, 2.0]) ** arr + expected = np.array([1.0, np.nan]) + tm.assert_numpy_array_equal(result, expected) + class TestCasting(object): pass diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index 89c36bbe7b325..668939e775148 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -143,7 +143,6 @@ def test_error(self, data, all_arithmetic_operators): # other specific errors tested in the integer array specific tests pass - @pytest.mark.xfail(reason="EA is listified. GH-22922", strict=True) def test_add_series_with_extension_array(self, data): super(TestArithmeticOps, self).test_add_series_with_extension_array( data From fe693d6ee5f0ea7e5d7273eab9615a4b21a84fd2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sun, 14 Oct 2018 16:41:22 -0500 Subject: [PATCH 02/12] array priorities, docs --- doc/source/extending.rst | 12 ++++++++++++ pandas/core/arrays/integer.py | 2 +- pandas/tests/extension/decimal/array.py | 1 + pandas/tests/extension/json/array.py | 1 + 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/doc/source/extending.rst b/doc/source/extending.rst index ab940384594bc..8ae327cac57c5 100644 --- a/doc/source/extending.rst +++ b/doc/source/extending.rst @@ -173,6 +173,18 @@ or not that succeeds depends on whether the operation returns a result that's valid for the ``ExtensionArray``. If an ``ExtensionArray`` cannot be reconstructed, an ndarray containing the scalars returned instead. +For ease of implementation and consistency with operations between pandas +and NumPy ndarrays, we recommend *not* handling Series and DataFrame in +your binary ops. Instead, you should detect these cases and return ``NotImplemented``. +When pandas encounters an operation like ``op(Series, ExtensionArray)``, pandas +will + +1. unbox the array from the ``Series`` (roughly ``Series.values``) +2. call ``result = op(values, ExtensionArray)`` +3. re-box the result in a ``Series`` + +Similar for DataFrame. + .. _extending.extension.testing: Testing Extension Arrays diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 81a0da9f1e8fd..6ee43074a768d 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -280,7 +280,7 @@ def _coerce_to_ndarray(self): data[self._mask] = self._na_value return data - __array_priority__ = 1 # higher than ndarray so ops dispatch to us + __array_priority__ = 1000 # higher than ndarray so ops dispatch to us def __array__(self, dtype=None): """ diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 53a598559393c..fe07aae61c5e2 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -47,6 +47,7 @@ def _is_numeric(self): class DecimalArray(ExtensionArray, ExtensionScalarOpsMixin): + __array_priority__ = 1000 def __init__(self, values, dtype=None, copy=False, context=None): for val in values: diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 976511941042d..5c63e50c3eaaa 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -55,6 +55,7 @@ def construct_from_string(cls, string): class JSONArray(ExtensionArray): dtype = JSONDtype() + __array_priority__ = 1000 def __init__(self, values, dtype=None, copy=False): for val in values: From cf945ee4f08a7c3e4406d6cb8d15b74e3d90163e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 15 Oct 2018 08:30:20 -0500 Subject: [PATCH 03/12] REF: Return NotImplemented for ops with series / index --- doc/source/extending.rst | 6 ++---- pandas/core/arrays/base.py | 6 ++++++ pandas/core/arrays/integer.py | 11 ++++++++--- pandas/core/sparse/array.py | 25 +++++++++++++++++++++---- 4 files changed, 37 insertions(+), 11 deletions(-) diff --git a/doc/source/extending.rst b/doc/source/extending.rst index 8ae327cac57c5..a5348dc4d7091 100644 --- a/doc/source/extending.rst +++ b/doc/source/extending.rst @@ -174,8 +174,8 @@ that's valid for the ``ExtensionArray``. If an ``ExtensionArray`` cannot be reconstructed, an ndarray containing the scalars returned instead. For ease of implementation and consistency with operations between pandas -and NumPy ndarrays, we recommend *not* handling Series and DataFrame in -your binary ops. Instead, you should detect these cases and return ``NotImplemented``. +and NumPy ndarrays, we recommend *not* handling Series and Indexes in your binary ops. +Instead, you should detect these cases and return ``NotImplemented``. When pandas encounters an operation like ``op(Series, ExtensionArray)``, pandas will @@ -183,8 +183,6 @@ will 2. call ``result = op(values, ExtensionArray)`` 3. re-box the result in a ``Series`` -Similar for DataFrame. - .. _extending.extension.testing: Testing Extension Arrays diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index b745569d5bd76..cff93091c1a41 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -9,6 +9,7 @@ import operator +from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass from pandas.errors import AbstractMethodError from pandas.compat.numpy import function as nv from pandas.compat import set_function_name, PY3 @@ -825,6 +826,11 @@ def convert_values(param): else: # Assume its an object ovalues = [param] * len(self) return ovalues + + if isinstance(other, (ABCSeries, ABCIndexClass)): + # rely on pandas to unbox and dispatch to us + return NotImplemented + lvalues = self rvalues = convert_values(other) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 6ee43074a768d..75a0c39af991a 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -501,6 +501,10 @@ def cmp_method(self, other): op_name = op.__name__ mask = None + if isinstance(other, (ABCSeries, ABCIndexClass)): + # Rely on pandas to unbox and dispatch to us. + return NotImplemented + if isinstance(other, IntegerArray): other, mask = other._data, other._mask @@ -588,13 +592,14 @@ def integer_arithmetic_method(self, other): op_name = op.__name__ mask = None + if isinstance(other, (ABCSeries, ABCIndexClass)): + # Rely on pandas to unbox and dispatch to us. + return NotImplemented + if getattr(other, 'ndim', 0) > 1: raise NotImplementedError( "can only perform ops with 1-d structures") - if isinstance(other, (ABCSeries, ABCIndexClass)): - other = getattr(other, 'values', other) - if isinstance(other, IntegerArray): other, mask = other._data, other._mask diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index 15b5118db2230..cac0a9da4278d 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -1223,7 +1223,21 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): 'power': 'pow', 'remainder': 'mod', 'divide': 'div', + 'equal': 'eq', + 'not_equal': 'ne', + 'less': 'lt', + 'less_equal': 'le', + 'greater': 'gt', + 'greater_equal': 'ge', } + + flipped = { + 'lt': '__gt__', + 'le': '__ge__', + 'gt': '__lt__', + 'ge': '__le__', + } + op_name = ufunc.__name__ op_name = aliases.get(op_name, op_name) @@ -1231,7 +1245,8 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if isinstance(inputs[0], type(self)): return getattr(self, '__{}__'.format(op_name))(inputs[1]) else: - return getattr(self, '__r{}__'.format(op_name))(inputs[0]) + name = flipped.get(op_name, '__r{}__'.format(op_name)) + return getattr(self, name)(inputs[0]) if len(inputs) == 1: # No alignment necessary. @@ -1280,7 +1295,8 @@ def sparse_arithmetic_method(self, other): op_name = op.__name__ if isinstance(other, (ABCSeries, ABCIndexClass)): - other = getattr(other, 'values', other) + # Rely on pandas to dispatch to us. + return NotImplemented if isinstance(other, SparseArray): return _sparse_array_op(self, other, op, op_name) @@ -1325,10 +1341,11 @@ def cmp_method(self, other): op_name = op_name[:-1] if isinstance(other, (ABCSeries, ABCIndexClass)): - other = getattr(other, 'values', other) + # Rely on pandas to unbox and dispatch to us. + return NotImplemented if not is_scalar(other) and not isinstance(other, type(self)): - # convert list-like to ndarary + # convert list-like to ndarray other = np.asarray(other) if isinstance(other, np.ndarray): From b7906ea2f9448652409114a9b0c51badb7945cd8 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 15 Oct 2018 10:17:46 -0500 Subject: [PATCH 04/12] fix eq, ne --- pandas/core/sparse/array.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index cac0a9da4278d..f29eec97ad5b4 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -1236,6 +1236,8 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): 'le': '__ge__', 'gt': '__lt__', 'ge': '__le__', + 'eq': '__eq__', + 'ne': '__ne__', } op_name = ufunc.__name__ From 01256692c31ff6dd9745fa0ec84f91d06a832def Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 15 Oct 2018 10:33:30 -0500 Subject: [PATCH 05/12] added base test --- pandas/tests/extension/base/ops.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 9c1cfcdfeacc3..7baa6284e398f 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -107,6 +107,18 @@ def test_error(self, data, all_arithmetic_operators): with pytest.raises(AttributeError): getattr(data, op_name) + def test_direct_arith_with_series_returns_not_implemented(self, data): + # EAs should return NotImplemented for ops with Series. + # Pandas takes care of unboxing the series and calling the EA's op. + other = pd.Series(data) + if hasattr(data, '__add__'): + result = data.__add__(other) + assert result is NotImplemented + else: + raise pytest.skip( + "{} does not implement add".format(data.__class__.__name__) + ) + class BaseComparisonOpsTests(BaseOpsUtil): """Various Series and DataFrame comparison ops methods.""" @@ -140,3 +152,15 @@ def test_compare_array(self, data, all_compare_operators): s = pd.Series(data) other = pd.Series([data[0]] * len(data)) self._compare_other(s, data, op_name, other) + + def test_direct_arith_with_series_returns_not_implemented(self, data): + # EAs should return NotImplemented for ops with Series. + # Pandas takes care of unboxing the series and calling the EA's op. + other = pd.Series(data) + if hasattr(data, '__eq__'): + result = data.__eq__(other) + assert result is NotImplemented + else: + raise pytest.skip( + "{} does not implement __eq__".format(data.__class__.__name__) + ) From 07a632eaf6491ee7be621438e0c3b15eea664131 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 17 Oct 2018 17:48:46 -0500 Subject: [PATCH 06/12] zerodim --- pandas/core/arrays/integer.py | 16 +++++---- pandas/tests/arrays/test_integer.py | 54 ++++++----------------------- 2 files changed, 20 insertions(+), 50 deletions(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 75a0c39af991a..f43b840fad2ad 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -3,7 +3,8 @@ import copy import numpy as np -from pandas._libs.lib import infer_dtype + +from pandas._libs import lib from pandas.util._decorators import cache_readonly from pandas.compat import u, range, string_types from pandas.compat import set_function_name @@ -171,7 +172,7 @@ def coerce_to_array(values, dtype, mask=None, copy=False): values = np.array(values, copy=copy) if is_object_dtype(values): - inferred_type = infer_dtype(values) + inferred_type = lib.infer_dtype(values) if inferred_type not in ['floating', 'integer', 'mixed-integer', 'mixed-integer-float']: raise TypeError("{} cannot be converted to an IntegerDtype".format( @@ -508,14 +509,13 @@ def cmp_method(self, other): if isinstance(other, IntegerArray): other, mask = other._data, other._mask - elif getattr(other, 'ndim', None) == 0: - other = other.item() - elif is_list_like(other): other = np.asarray(other) if other.ndim > 0 and len(self) != len(other): raise ValueError('Lengths must match to compare') + other = lib.item_from_zerodim(other) + # numpy will show a DeprecationWarning on invalid elementwise # comparisons, this will raise in the future with warnings.catch_warnings(): @@ -624,7 +624,11 @@ def integer_arithmetic_method(self, other): else: mask = self._mask | mask - if op_name == 'rpow': + if op_name == 'pow': + # 1 ** np.nan is 1. So we have to unmask those. + mask = np.where(self == 1, False, mask) + + elif op_name == 'rpow': # 1 ** np.nan is 1. So we have to unmask those. mask = np.where(other == 1, False, mask) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 293a9da0e8a86..4cdd5a214d1d5 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -128,6 +128,9 @@ def _check_op(self, s, op_name, other, exc=None): if omask is not None: mask |= omask + if op_name == '__rpow__': + mask = np.where(other == 1, False, mask) + # float result type or float op if ((is_float_dtype(other) or is_float(other) or op_name in ['__rtruediv__', '__truediv__', @@ -171,7 +174,6 @@ def _check_op_integer(self, result, expected, mask, s, op_name, other): else: expected[(s.values == 0) & ((expected == 0) | expected.isna())] = 0 - try: expected[(expected == np.inf) | (expected == -np.inf)] = fill_value original = expected @@ -218,68 +220,25 @@ def test_arith_integer_array(self, data, all_arithmetic_operators): def test_arith_series_with_scalar(self, data, all_arithmetic_operators): # scalar op = all_arithmetic_operators - if op == '__rpow__': - raise pytest.skip("__rpow__ tested separately.") s = pd.Series(data) self._check_op(s, op, 1, exc=TypeError) - def test_arith_series_with_scalar_rpow(self, data): - s = pd.Series(data) - # 1^x is 1.0 for all x, so test separately - result = 1 ** s - expected = pd.Series(1, index=s.index, dtype=data.dtype.numpy_dtype) - assert result.dtype == data.dtype - result = result.astype(data.dtype.numpy_dtype) - self.assert_series_equal(result, expected) - - # test other bases regularly - self._check_op(s, '__rpow__', 2, exc=None) - def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): # frame & scalar op = all_arithmetic_operators - if op == '__rpow__': - raise pytest.skip("__rpow__ tested separately.") - df = pd.DataFrame({'A': data}) self._check_op(df, op, 1, exc=TypeError) - def test_arith_frame_with_scalar_rpow(self, data): - df = pd.DataFrame({"A": data}) - result = 1.0 ** df - expected = pd.DataFrame(1.0, index=df.index, columns=df.columns) - self.assert_frame_equal(result, expected) - - # test other bases regularly - self._check_op(df, '__rpow__', 2, exc=TypeError) - def test_arith_series_with_array(self, data, all_arithmetic_operators): # ndarray & other series op = all_arithmetic_operators - if op == '__rpow__': - raise pytest.skip("__rpow__ tested separately.") - s = pd.Series(data) other = np.ones(len(s), dtype=s.dtype.type) self._check_op(s, op, other, exc=TypeError) - def test_arith_series_with_array_rpow(self, data): - s = pd.Series(data) - other = np.ones(len(s), dtype=data.dtype.numpy_dtype) - expected = pd.Series(1, index=s.index, dtype=data.dtype.numpy_dtype) - result = other ** s - - assert result.dtype == data.dtype - result = result.astype(data.dtype.numpy_dtype) - - self.assert_series_equal(result, expected) - - other = 2 * np.ones(len(s), dtype=s.dtype.type) - self._check_op(s, '__rpow__', other, exc=TypeError) - def test_arith_coerce_scalar(self, data, all_arithmetic_operators): op = all_arithmetic_operators @@ -335,6 +294,13 @@ def test_error(self, data, all_arithmetic_operators): with pytest.raises(NotImplementedError): opa(np.arange(len(s)).reshape(-1, len(s))) + def test_pow(self): + a = pd.core.arrays.integer_array([1, None, None, 1]) + b = pd.core.arrays.integer_array([1, None, 1, None]) + result = a ** b + expected = pd.core.arrays.integer_array([1, None, None, 1]) + tm.assert_extension_array_equal(result, expected) + class TestComparisonOps(BaseOpsUtil): From e378c7da240d88fe4dc21c338feca6e03516e867 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 17 Oct 2018 20:48:34 -0500 Subject: [PATCH 07/12] move --- pandas/tests/arrays/test_integer.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 4cdd5a214d1d5..a423d50ecb877 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -301,6 +301,14 @@ def test_pow(self): expected = pd.core.arrays.integer_array([1, None, None, 1]) tm.assert_extension_array_equal(result, expected) + def test_rpow_one_to_na(self): + # https://github.com/pandas-dev/pandas/issues/22022 + # NumPy says 1 ** nan is 1. + arr = integer_array([np.nan, np.nan]) + result = np.array([1.0, 2.0]) ** arr + expected = np.array([1.0, np.nan]) + tm.assert_numpy_array_equal(result, expected) + class TestComparisonOps(BaseOpsUtil): @@ -339,14 +347,6 @@ def test_compare_array(self, data, all_compare_operators): other = pd.Series([0] * len(data)) self._compare_other(s, data, op_name, other) - def test_rpow_one_to_na(self): - # https://github.com/pandas-dev/pandas/issues/22022 - # NumPy says 1 ** nan is 1. - arr = integer_array([np.nan, np.nan]) - result = np.array([1.0, 2.0]) ** arr - expected = np.array([1.0, np.nan]) - tm.assert_numpy_array_equal(result, expected) - class TestCasting(object): pass From 42b91d0e16cfb9463581bf406150ff962081e30a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 11:33:31 -0500 Subject: [PATCH 08/12] mask first --- pandas/core/arrays/integer.py | 3 +-- pandas/core/ops.py | 11 ++++++----- pandas/tests/series/test_arithmetic.py | 16 ++++++++++++++++ 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index f43b840fad2ad..52762514d00c2 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -624,12 +624,11 @@ def integer_arithmetic_method(self, other): else: mask = self._mask | mask + # 1 ** np.nan is 1. So we have to unmask those. if op_name == 'pow': - # 1 ** np.nan is 1. So we have to unmask those. mask = np.where(self == 1, False, mask) elif op_name == 'rpow': - # 1 ** np.nan is 1. So we have to unmask those. mask = np.where(other == 1, False, mask) with np.errstate(all='ignore'): diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 4d455a5547492..daa87ad173772 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -863,15 +863,16 @@ def masked_arith_op(x, y, op): result = np.empty(x.size, dtype=x.dtype) mask = notna(xrav) + # 1 ** np.nan is 1. So we have to unmask those. + if op == pow: + mask = np.where(x == 1, False, mask) + elif op == rpow: + mask = np.where(y == 1, False, mask) + if mask.any(): with np.errstate(all='ignore'): result[mask] = op(xrav[mask], y) - if op == pow: - result = np.where(~mask, x, result) - elif op == rpow: - result = np.where(~mask, y, result) - result, changed = maybe_upcast_putmask(result, ~mask, np.nan) result = result.reshape(x.shape) # 2D compat return result diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 37ba1c91368b3..43b24930f6303 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import operator +import numpy as np import pytest from pandas import Series @@ -66,3 +67,18 @@ def test_ser_cmp_result_names(self, names, op): ser = Series(cidx).rename(names[1]) result = op(ser, cidx) assert result.name == names[2] + + +def test_pow_ops_object(): + # 22922 + # pow is weird with masking & 1, so testing here + a = Series([1, np.nan, 1, np.nan], dtype=object) + b = Series([1, np.nan, np.nan, 1], dtype=object) + result = a ** b + expected = Series(a.values ** b.values, dtype=object) + tm.assert_series_equal(result, expected) + + result = b ** a + expected = Series(b.values ** a.values, dtype=object) + + tm.assert_series_equal(result, expected) From f03e66ba9e7eb5ad8a0cd6f4021efb4af6fe7cc6 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 11:40:21 -0500 Subject: [PATCH 09/12] test none nan --- pandas/tests/arrays/test_integer.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index a423d50ecb877..72e4907d6108b 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -128,7 +128,11 @@ def _check_op(self, s, op_name, other, exc=None): if omask is not None: mask |= omask - if op_name == '__rpow__': + # 1 ** na is na, so need to unmask those + if op_name == '__pow__': + mask = np.where(s == 1, False, mask) + + elif op_name == '__rpow__': mask = np.where(other == 1, False, mask) # float result type or float op @@ -295,8 +299,8 @@ def test_error(self, data, all_arithmetic_operators): opa(np.arange(len(s)).reshape(-1, len(s))) def test_pow(self): - a = pd.core.arrays.integer_array([1, None, None, 1]) - b = pd.core.arrays.integer_array([1, None, 1, None]) + a = integer_array([1, None, None, 1]) + b = integer_array([1, None, 1, None]) result = a ** b expected = pd.core.arrays.integer_array([1, None, None, 1]) tm.assert_extension_array_equal(result, expected) @@ -521,6 +525,18 @@ def test_integer_array_constructor(): IntegerArray(values) +@pytest.mark.parametrize('a, b', [ + ([1, None], [1, np.nan]), + pytest.param([None], [np.nan], + marks=pytest.mark.xfail(reason='infer object dtype.', + strict=True)), +]) +def test_integer_array_constructor_none_is_nan(a, b): + result = integer_array(a) + expected = integer_array(b) + tm.assert_extension_array_equal(result, expected) + + def test_integer_array_constructor_copy(): values = np.array([1, 2, 3, 4], dtype='int64') mask = np.array([False, False, False, True], dtype='bool') From f14cf0b752307a473a268857f0ec25e9e464acb5 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 11:43:26 -0500 Subject: [PATCH 10/12] comments --- pandas/tests/arrays/test_integer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 72e4907d6108b..66d2baac8c91c 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -299,15 +299,15 @@ def test_error(self, data, all_arithmetic_operators): opa(np.arange(len(s)).reshape(-1, len(s))) def test_pow(self): - a = integer_array([1, None, None, 1]) - b = integer_array([1, None, 1, None]) + # https://github.com/pandas-dev/pandas/issues/22022 + a = integer_array([1, np.nan, np.nan, 1]) + b = integer_array([1, np.nan, 1, np.nan]) result = a ** b - expected = pd.core.arrays.integer_array([1, None, None, 1]) + expected = pd.core.arrays.integer_array([1, np.nan, np.nan, 1]) tm.assert_extension_array_equal(result, expected) def test_rpow_one_to_na(self): # https://github.com/pandas-dev/pandas/issues/22022 - # NumPy says 1 ** nan is 1. arr = integer_array([np.nan, np.nan]) result = np.array([1.0, 2.0]) ** arr expected = np.array([1.0, np.nan]) @@ -528,7 +528,7 @@ def test_integer_array_constructor(): @pytest.mark.parametrize('a, b', [ ([1, None], [1, np.nan]), pytest.param([None], [np.nan], - marks=pytest.mark.xfail(reason='infer object dtype.', + marks=pytest.mark.xfail(reason='GH-23224', strict=True)), ]) def test_integer_array_constructor_none_is_nan(a, b): From 4fc1d1b57c4faa483ee983c8b24656ffa6160d6f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 12:44:33 -0500 Subject: [PATCH 11/12] Note __array_ufunc__ and priority [ci skip] --- doc/source/extending.rst | 8 ++++++++ pandas/core/arrays/base.py | 29 +++++++++++++++++++++++------ 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/doc/source/extending.rst b/doc/source/extending.rst index a5348dc4d7091..d143b87347595 100644 --- a/doc/source/extending.rst +++ b/doc/source/extending.rst @@ -135,6 +135,14 @@ There are two approaches for providing operator support for your ExtensionArray: 2. Use an operator implementation from pandas that depends on operators that are already defined on the underlying elements (scalars) of the ExtensionArray. +.. note:: + + Regardless of the approach, you may want to implement ``__array_ufunc__`` + or set ``__array_priority__`` if you want your implementation + to be called when involved in binary operations with NumPy + arrays. See the `numpy documentation `__ + for more. + For the first approach, you define selected operators, e.g., ``__add__``, ``__le__``, etc. that you want your ``ExtensionArray`` subclass to support. diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index cff93091c1a41..237a6d9993a7e 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -110,6 +110,7 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): compatible with the ExtensionArray. copy : boolean, default False If True, copy the underlying data. + Returns ------- ExtensionArray @@ -725,7 +726,14 @@ def _reduce(self, name, skipna=True, **kwargs): class ExtensionOpsMixin(object): """ - A base class for linking the operators to their dunder names + A base class for linking the operators to their dunder names. + + .. note:: + + You may want to implement ``__array_ufunc__`` + or set ``__array_priority__`` if you want your implementation + to be called when involved in binary operations with NumPy + arrays. """ @classmethod @@ -762,12 +770,14 @@ def _add_comparison_ops(cls): class ExtensionScalarOpsMixin(ExtensionOpsMixin): - """A mixin for defining the arithmetic and logical operations on - an ExtensionArray class, where it is assumed that the underlying objects - have the operators already defined. + """ + A mixin for defining ops on an ExtensionArray. + + It is assumed that the underlying scalar objects have the operators + already defined. - Usage - ------ + Notes + ----- If you have defined a subclass MyExtensionArray(ExtensionArray), then use MyExtensionArray(ExtensionArray, ExtensionScalarOpsMixin) to get the arithmetic operators. After the definition of MyExtensionArray, @@ -777,6 +787,13 @@ class ExtensionScalarOpsMixin(ExtensionOpsMixin): MyExtensionArray._add_comparison_ops() to link the operators to your class. + + .. note:: + + You may want to implement ``__array_ufunc__`` + or set ``__array_priority__`` if you want your implementation + to be called when involved in binary operations with NumPy + arrays. """ @classmethod From 03a367e047456de392d9af0bfcd0e58839dff9fe Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 16:26:40 -0500 Subject: [PATCH 12/12] DOC: clarify maybe [skip ci] --- doc/source/extending.rst | 8 +++----- pandas/core/arrays/base.py | 14 ++++++-------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/doc/source/extending.rst b/doc/source/extending.rst index d143b87347595..1e8a8e50dd9e3 100644 --- a/doc/source/extending.rst +++ b/doc/source/extending.rst @@ -137,11 +137,9 @@ There are two approaches for providing operator support for your ExtensionArray: .. note:: - Regardless of the approach, you may want to implement ``__array_ufunc__`` - or set ``__array_priority__`` if you want your implementation - to be called when involved in binary operations with NumPy - arrays. See the `numpy documentation `__ - for more. + Regardless of the approach, you may want to set ``__array_priority__`` + if you want your implementation to be called when involved in binary operations + with NumPy arrays. For the first approach, you define selected operators, e.g., ``__add__``, ``__le__``, etc. that you want your ``ExtensionArray`` subclass to support. diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 237a6d9993a7e..f842d1237cb14 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -730,10 +730,9 @@ class ExtensionOpsMixin(object): .. note:: - You may want to implement ``__array_ufunc__`` - or set ``__array_priority__`` if you want your implementation - to be called when involved in binary operations with NumPy - arrays. + You may want to set ``__array_priority__`` if you want your + implementation to be called when involved in binary operations + with NumPy arrays. """ @classmethod @@ -790,10 +789,9 @@ class ExtensionScalarOpsMixin(ExtensionOpsMixin): .. note:: - You may want to implement ``__array_ufunc__`` - or set ``__array_priority__`` if you want your implementation - to be called when involved in binary operations with NumPy - arrays. + You may want to set ``__array_priority__`` if you want your + implementation to be called when involved in binary operations + with NumPy arrays. """ @classmethod