diff --git a/doc/source/extending.rst b/doc/source/extending.rst index ab940384594bc..1e8a8e50dd9e3 100644 --- a/doc/source/extending.rst +++ b/doc/source/extending.rst @@ -135,6 +135,12 @@ There are two approaches for providing operator support for your ExtensionArray: 2. Use an operator implementation from pandas that depends on operators that are already defined on the underlying elements (scalars) of the ExtensionArray. +.. note:: + + Regardless of the approach, you may want to set ``__array_priority__`` + if you want your implementation to be called when involved in binary operations + with NumPy arrays. + For the first approach, you define selected operators, e.g., ``__add__``, ``__le__``, etc. that you want your ``ExtensionArray`` subclass to support. @@ -173,6 +179,16 @@ or not that succeeds depends on whether the operation returns a result that's valid for the ``ExtensionArray``. If an ``ExtensionArray`` cannot be reconstructed, an ndarray containing the scalars returned instead. +For ease of implementation and consistency with operations between pandas +and NumPy ndarrays, we recommend *not* handling Series and Indexes in your binary ops. +Instead, you should detect these cases and return ``NotImplemented``. +When pandas encounters an operation like ``op(Series, ExtensionArray)``, pandas +will + +1. unbox the array from the ``Series`` (roughly ``Series.values``) +2. call ``result = op(values, ExtensionArray)`` +3. re-box the result in a ``Series`` + .. _extending.extension.testing: Testing Extension Arrays diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 2c142bdd7185b..6858073e46b07 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -875,6 +875,7 @@ Numeric - Bug in :meth:`DataFrame.apply` where, when supplied with a string argument and additional positional or keyword arguments (e.g. ``df.apply('sum', min_count=1)``), a ``TypeError`` was wrongly raised (:issue:`22376`) - Bug in :meth:`DataFrame.astype` to extension dtype may raise ``AttributeError`` (:issue:`22578`) - Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype arithmetic operations with ``ndarray`` with integer dtype incorrectly treating the narray as ``timedelta64[ns]`` dtype (:issue:`23114`) +- Bug in :meth:`Series.rpow` with object dtype ``NaN`` for ``1 ** NA`` instead of ``1`` (:issue:`22922`). Strings ^^^^^^^ diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index b745569d5bd76..f842d1237cb14 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -9,6 +9,7 @@ import operator +from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass from pandas.errors import AbstractMethodError from pandas.compat.numpy import function as nv from pandas.compat import set_function_name, PY3 @@ -109,6 +110,7 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): compatible with the ExtensionArray. copy : boolean, default False If True, copy the underlying data. + Returns ------- ExtensionArray @@ -724,7 +726,13 @@ def _reduce(self, name, skipna=True, **kwargs): class ExtensionOpsMixin(object): """ - A base class for linking the operators to their dunder names + A base class for linking the operators to their dunder names. + + .. note:: + + You may want to set ``__array_priority__`` if you want your + implementation to be called when involved in binary operations + with NumPy arrays. """ @classmethod @@ -761,12 +769,14 @@ def _add_comparison_ops(cls): class ExtensionScalarOpsMixin(ExtensionOpsMixin): - """A mixin for defining the arithmetic and logical operations on - an ExtensionArray class, where it is assumed that the underlying objects - have the operators already defined. + """ + A mixin for defining ops on an ExtensionArray. + + It is assumed that the underlying scalar objects have the operators + already defined. - Usage - ------ + Notes + ----- If you have defined a subclass MyExtensionArray(ExtensionArray), then use MyExtensionArray(ExtensionArray, ExtensionScalarOpsMixin) to get the arithmetic operators. After the definition of MyExtensionArray, @@ -776,6 +786,12 @@ class ExtensionScalarOpsMixin(ExtensionOpsMixin): MyExtensionArray._add_comparison_ops() to link the operators to your class. + + .. note:: + + You may want to set ``__array_priority__`` if you want your + implementation to be called when involved in binary operations + with NumPy arrays. """ @classmethod @@ -825,6 +841,11 @@ def convert_values(param): else: # Assume its an object ovalues = [param] * len(self) return ovalues + + if isinstance(other, (ABCSeries, ABCIndexClass)): + # rely on pandas to unbox and dispatch to us + return NotImplemented + lvalues = self rvalues = convert_values(other) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 9917045f2f7d2..52762514d00c2 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -3,7 +3,8 @@ import copy import numpy as np -from pandas._libs.lib import infer_dtype + +from pandas._libs import lib from pandas.util._decorators import cache_readonly from pandas.compat import u, range, string_types from pandas.compat import set_function_name @@ -171,7 +172,7 @@ def coerce_to_array(values, dtype, mask=None, copy=False): values = np.array(values, copy=copy) if is_object_dtype(values): - inferred_type = infer_dtype(values) + inferred_type = lib.infer_dtype(values) if inferred_type not in ['floating', 'integer', 'mixed-integer', 'mixed-integer-float']: raise TypeError("{} cannot be converted to an IntegerDtype".format( @@ -280,6 +281,8 @@ def _coerce_to_ndarray(self): data[self._mask] = self._na_value return data + __array_priority__ = 1000 # higher than ndarray so ops dispatch to us + def __array__(self, dtype=None): """ the array interface, return my values @@ -288,12 +291,6 @@ def __array__(self, dtype=None): return self._coerce_to_ndarray() def __iter__(self): - """Iterate over elements of the array. - - """ - # This needs to be implemented so that pandas recognizes extension - # arrays as list-like. The default implementation makes successive - # calls to ``__getitem__``, which may be slower than necessary. for i in range(len(self)): if self._mask[i]: yield self.dtype.na_value @@ -504,13 +501,21 @@ def cmp_method(self, other): op_name = op.__name__ mask = None + + if isinstance(other, (ABCSeries, ABCIndexClass)): + # Rely on pandas to unbox and dispatch to us. + return NotImplemented + if isinstance(other, IntegerArray): other, mask = other._data, other._mask + elif is_list_like(other): other = np.asarray(other) if other.ndim > 0 and len(self) != len(other): raise ValueError('Lengths must match to compare') + other = lib.item_from_zerodim(other) + # numpy will show a DeprecationWarning on invalid elementwise # comparisons, this will raise in the future with warnings.catch_warnings(): @@ -586,14 +591,21 @@ def integer_arithmetic_method(self, other): op_name = op.__name__ mask = None + if isinstance(other, (ABCSeries, ABCIndexClass)): - other = getattr(other, 'values', other) + # Rely on pandas to unbox and dispatch to us. + return NotImplemented - if isinstance(other, IntegerArray): - other, mask = other._data, other._mask - elif getattr(other, 'ndim', 0) > 1: + if getattr(other, 'ndim', 0) > 1: raise NotImplementedError( "can only perform ops with 1-d structures") + + if isinstance(other, IntegerArray): + other, mask = other._data, other._mask + + elif getattr(other, 'ndim', None) == 0: + other = other.item() + elif is_list_like(other): other = np.asarray(other) if not other.ndim: @@ -612,6 +624,13 @@ def integer_arithmetic_method(self, other): else: mask = self._mask | mask + # 1 ** np.nan is 1. So we have to unmask those. + if op_name == 'pow': + mask = np.where(self == 1, False, mask) + + elif op_name == 'rpow': + mask = np.where(other == 1, False, mask) + with np.errstate(all='ignore'): result = op(self._data, other) diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index e6ca7b8de83e4..b853d8fc0c528 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -1471,7 +1471,23 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): 'power': 'pow', 'remainder': 'mod', 'divide': 'div', + 'equal': 'eq', + 'not_equal': 'ne', + 'less': 'lt', + 'less_equal': 'le', + 'greater': 'gt', + 'greater_equal': 'ge', } + + flipped = { + 'lt': '__gt__', + 'le': '__ge__', + 'gt': '__lt__', + 'ge': '__le__', + 'eq': '__eq__', + 'ne': '__ne__', + } + op_name = ufunc.__name__ op_name = aliases.get(op_name, op_name) @@ -1479,7 +1495,8 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): if isinstance(inputs[0], type(self)): return getattr(self, '__{}__'.format(op_name))(inputs[1]) else: - return getattr(self, '__r{}__'.format(op_name))(inputs[0]) + name = flipped.get(op_name, '__r{}__'.format(op_name)) + return getattr(self, name)(inputs[0]) if len(inputs) == 1: # No alignment necessary. @@ -1528,7 +1545,8 @@ def sparse_arithmetic_method(self, other): op_name = op.__name__ if isinstance(other, (ABCSeries, ABCIndexClass)): - other = getattr(other, 'values', other) + # Rely on pandas to dispatch to us. + return NotImplemented if isinstance(other, SparseArray): return _sparse_array_op(self, other, op, op_name) @@ -1573,10 +1591,11 @@ def cmp_method(self, other): op_name = op_name[:-1] if isinstance(other, (ABCSeries, ABCIndexClass)): - other = getattr(other, 'values', other) + # Rely on pandas to unbox and dispatch to us. + return NotImplemented if not is_scalar(other) and not isinstance(other, type(self)): - # convert list-like to ndarary + # convert list-like to ndarray other = np.asarray(other) if isinstance(other, np.ndarray): diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 9791354de7ffa..daa87ad173772 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -862,6 +862,13 @@ def masked_arith_op(x, y, op): # mask is only meaningful for x result = np.empty(x.size, dtype=x.dtype) mask = notna(xrav) + + # 1 ** np.nan is 1. So we have to unmask those. + if op == pow: + mask = np.where(x == 1, False, mask) + elif op == rpow: + mask = np.where(y == 1, False, mask) + if mask.any(): with np.errstate(all='ignore'): result[mask] = op(xrav[mask], y) @@ -1202,29 +1209,16 @@ def dispatch_to_extension_op(op, left, right): # The op calls will raise TypeError if the op is not defined # on the ExtensionArray - # TODO(jreback) - # we need to listify to avoid ndarray, or non-same-type extension array - # dispatching - - if is_extension_array_dtype(left): - - new_left = left.values - if isinstance(right, np.ndarray): - - # handle numpy scalars, this is a PITA - # TODO(jreback) - new_right = lib.item_from_zerodim(right) - if is_scalar(new_right): - new_right = [new_right] - new_right = list(new_right) - elif is_extension_array_dtype(right) and type(left) != type(right): - new_right = list(right) - else: - new_right = right + # unbox Series and Index to arrays + if isinstance(left, (ABCSeries, ABCIndexClass)): + new_left = left._values else: + new_left = left - new_left = list(left.values) + if isinstance(right, (ABCSeries, ABCIndexClass)): + new_right = right._values + else: new_right = right res_values = op(new_left, new_right) diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 23ee8d217bd59..66d2baac8c91c 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -128,6 +128,13 @@ def _check_op(self, s, op_name, other, exc=None): if omask is not None: mask |= omask + # 1 ** na is na, so need to unmask those + if op_name == '__pow__': + mask = np.where(s == 1, False, mask) + + elif op_name == '__rpow__': + mask = np.where(other == 1, False, mask) + # float result type or float op if ((is_float_dtype(other) or is_float(other) or op_name in ['__rtruediv__', '__truediv__', @@ -171,7 +178,6 @@ def _check_op_integer(self, result, expected, mask, s, op_name, other): else: expected[(s.values == 0) & ((expected == 0) | expected.isna())] = 0 - try: expected[(expected == np.inf) | (expected == -np.inf)] = fill_value original = expected @@ -248,13 +254,20 @@ def test_arith_coerce_scalar(self, data, all_arithmetic_operators): @pytest.mark.parametrize("other", [1., 1.0, np.array(1.), np.array([1.])]) def test_arithmetic_conversion(self, all_arithmetic_operators, other): # if we have a float operand we should have a float result - # if if that is equal to an integer + # if that is equal to an integer op = self.get_op_from_name(all_arithmetic_operators) s = pd.Series([1, 2, 3], dtype='Int64') result = op(s, other) assert result.dtype is np.dtype('float') + @pytest.mark.parametrize("other", [0, 0.5]) + def test_arith_zero_dim_ndarray(self, other): + arr = integer_array([1, None, 2]) + result = arr + np.array(other) + expected = arr + other + tm.assert_equal(result, expected) + def test_error(self, data, all_arithmetic_operators): # invalid ops @@ -285,6 +298,21 @@ def test_error(self, data, all_arithmetic_operators): with pytest.raises(NotImplementedError): opa(np.arange(len(s)).reshape(-1, len(s))) + def test_pow(self): + # https://github.com/pandas-dev/pandas/issues/22022 + a = integer_array([1, np.nan, np.nan, 1]) + b = integer_array([1, np.nan, 1, np.nan]) + result = a ** b + expected = pd.core.arrays.integer_array([1, np.nan, np.nan, 1]) + tm.assert_extension_array_equal(result, expected) + + def test_rpow_one_to_na(self): + # https://github.com/pandas-dev/pandas/issues/22022 + arr = integer_array([np.nan, np.nan]) + result = np.array([1.0, 2.0]) ** arr + expected = np.array([1.0, np.nan]) + tm.assert_numpy_array_equal(result, expected) + class TestComparisonOps(BaseOpsUtil): @@ -497,6 +525,18 @@ def test_integer_array_constructor(): IntegerArray(values) +@pytest.mark.parametrize('a, b', [ + ([1, None], [1, np.nan]), + pytest.param([None], [np.nan], + marks=pytest.mark.xfail(reason='GH-23224', + strict=True)), +]) +def test_integer_array_constructor_none_is_nan(a, b): + result = integer_array(a) + expected = integer_array(b) + tm.assert_extension_array_equal(result, expected) + + def test_integer_array_constructor_copy(): values = np.array([1, 2, 3, 4], dtype='int64') mask = np.array([False, False, False, True], dtype='bool') diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 9c1cfcdfeacc3..7baa6284e398f 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -107,6 +107,18 @@ def test_error(self, data, all_arithmetic_operators): with pytest.raises(AttributeError): getattr(data, op_name) + def test_direct_arith_with_series_returns_not_implemented(self, data): + # EAs should return NotImplemented for ops with Series. + # Pandas takes care of unboxing the series and calling the EA's op. + other = pd.Series(data) + if hasattr(data, '__add__'): + result = data.__add__(other) + assert result is NotImplemented + else: + raise pytest.skip( + "{} does not implement add".format(data.__class__.__name__) + ) + class BaseComparisonOpsTests(BaseOpsUtil): """Various Series and DataFrame comparison ops methods.""" @@ -140,3 +152,15 @@ def test_compare_array(self, data, all_compare_operators): s = pd.Series(data) other = pd.Series([data[0]] * len(data)) self._compare_other(s, data, op_name, other) + + def test_direct_arith_with_series_returns_not_implemented(self, data): + # EAs should return NotImplemented for ops with Series. + # Pandas takes care of unboxing the series and calling the EA's op. + other = pd.Series(data) + if hasattr(data, '__eq__'): + result = data.__eq__(other) + assert result is NotImplemented + else: + raise pytest.skip( + "{} does not implement __eq__".format(data.__class__.__name__) + ) diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 53a598559393c..fe07aae61c5e2 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -47,6 +47,7 @@ def _is_numeric(self): class DecimalArray(ExtensionArray, ExtensionScalarOpsMixin): + __array_priority__ = 1000 def __init__(self, values, dtype=None, copy=False, context=None): for val in values: diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 976511941042d..5c63e50c3eaaa 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -55,6 +55,7 @@ def construct_from_string(cls, string): class JSONArray(ExtensionArray): dtype = JSONDtype() + __array_priority__ = 1000 def __init__(self, values, dtype=None, copy=False): for val in values: diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index 89c36bbe7b325..668939e775148 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -143,7 +143,6 @@ def test_error(self, data, all_arithmetic_operators): # other specific errors tested in the integer array specific tests pass - @pytest.mark.xfail(reason="EA is listified. GH-22922", strict=True) def test_add_series_with_extension_array(self, data): super(TestArithmeticOps, self).test_add_series_with_extension_array( data diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 37ba1c91368b3..43b24930f6303 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import operator +import numpy as np import pytest from pandas import Series @@ -66,3 +67,18 @@ def test_ser_cmp_result_names(self, names, op): ser = Series(cidx).rename(names[1]) result = op(ser, cidx) assert result.name == names[2] + + +def test_pow_ops_object(): + # 22922 + # pow is weird with masking & 1, so testing here + a = Series([1, np.nan, 1, np.nan], dtype=object) + b = Series([1, np.nan, np.nan, 1], dtype=object) + result = a ** b + expected = Series(a.values ** b.values, dtype=object) + tm.assert_series_equal(result, expected) + + result = b ** a + expected = Series(b.values ** a.values, dtype=object) + + tm.assert_series_equal(result, expected)