pandas-dev · jreback · Jun 29, 2018 · May 30, 2018 · May 31, 2018 · May 31, 2018
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -14,6 +14,7 @@ Other Enhancements
 ^^^^^^^^^^^^^^^^^^
 - :func:`to_datetime` now supports the ``%Z`` and ``%z`` directive when passed into ``format`` (:issue:`13486`)
 - :func:`to_csv` now supports ``compression`` keyword when a file handle is passed. (:issue:`21227`)
+- ``ExtensionArray`` has a ``ExtensionOpsMixin`` factory that allows default operators to be defined (:issue:`20659`, :issue:`19577`)
 -
 
 .. _whatsnew_0240.api_breaking:

diff --git a/pandas/api/extensions/__init__.py b/pandas/api/extensions/__init__.py
@@ -3,5 +3,5 @@
                                   register_index_accessor,
                                   register_series_accessor)
 from pandas.core.algorithms import take  # noqa
-from pandas.core.arrays.base import ExtensionArray  # noqa
+from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin  # noqa
 from pandas.core.dtypes.dtypes import ExtensionDtype  # noqa
diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py
@@ -1,2 +1,2 @@
-from .base import ExtensionArray  # noqa
+from .base import ExtensionArray, ExtensionOpsMixin  # noqa
 from .categorical import Categorical  # noqa
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -9,6 +9,11 @@
 
 from pandas.errors import AbstractMethodError
 from pandas.compat.numpy import function as nv
+from pandas.compat import set_function_name, PY3
+import pandas.core.common as com
+from pandas.core.dtypes.common import (
+    is_extension_array_dtype,
+    is_list_like)
 
 _not_implemented_message = "{} does not implement {}."
 
@@ -610,3 +615,91 @@ def _ndarray_values(self):
         used for interacting with our indexers.
         """
         return np.array(self)
+
+
+def ExtensionOpsMixin(include_arith_ops, include_logic_ops):
+    """A mixin factory for creating default arithmetic and logical operators,
+    which are based on the underlying dtype backing the ExtensionArray
+
+    Parameters
+    ----------
+    include_arith_ops : boolean indicating whether arithmetic ops should be
+                        created
+    include_logic_ops : boolean indicating whether logical ops should be
+                        created
+
+    Returns
+    -------
+    A mixin class that has the associated operators defined.
+
+    Usage
+    ------
+    If you have defined a subclass MyClass(ExtensionArray), then
+    use MyClass(ExtensionArray, ExtensionOpsMixin(True, True)) to
+    get both the arithmetic and logical operators
+    """
+    class _ExtensionOpsMixin(object):
+        pass
+
+    def create_method(op_name):
+        def _binop(self, other):
+            def convert_values(parm):
+                if isinstance(parm, ExtensionArray):
+                    ovalues = list(parm)
+                elif is_extension_array_dtype(parm):
+                    ovalues = parm.values
+                elif is_list_like(parm):
+                    ovalues = parm
+                else:  # Assume its an object
+                    ovalues = [parm] * len(self)
+                return ovalues
+            lvalues = convert_values(self)
+            rvalues = convert_values(other)
+
+            # Get the method for each object.
+            def callfunc(a, b):
+                f = getattr(a, op_name, None)
+                if f is not None:
+                    return f(b)
+                else:
+                    return NotImplemented
+            res = [callfunc(a, b) for (a, b) in zip(lvalues, rvalues)]
 def _make_compare(op): 
 def _make_compare(op): 
+
+            # We can't use (NotImplemented in res) because the
+            # results might be objects that have overridden __eq__
+            if any(isinstance(r, type(NotImplemented)) for r in res):
+                msg = "invalid operation {opn} between {one} and {two}"
+                raise TypeError(msg.format(opn=op_name,
+                                           one=type(lvalues),
+                                           two=type(rvalues)))
+
+            res_values = com._values_from_object(res)
+
+            try:
+                res_values = self._from_sequence(res_values)
+            except TypeError:
+                pass
+
+            return res_values
+
+        name = '__{name}__'.format(name=op_name)
+        return set_function_name(_binop, name, _ExtensionOpsMixin)
+
+    if include_arith_ops:
+        arithops = ['__add__', '__radd__', '__sub__', '__rsub__', '__mul__',
+                    '__rmul__', '__pow__', '__rpow__', '__mod__', '__rmod__',
+                    '__floordiv__', '__rfloordiv__', '__truediv__',
+                    '__rtruediv__', '__divmod__', '__rdivmod__']
+        if not PY3:
+            arithops.extend(['__div__', '__rdiv__'])
+
+        for op_name in arithops:
+            setattr(_ExtensionOpsMixin, op_name, create_method(op_name))
+
+    if include_logic_ops:
+        logicops = ['__eq__', '__ne__', '__lt__', '__gt__',
+                    '__le__', '__ge__']
+        for op_name in logicops:
+            setattr(_ExtensionOpsMixin, op_name, create_method(op_name))
+
+    return _ExtensionOpsMixin
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -2972,16 +2972,20 @@ def get_value(self, series, key):
         # use this, e.g. DatetimeIndex
         s = getattr(series, '_values', None)
         if isinstance(s, (ExtensionArray, Index)) and is_scalar(key):
-            # GH 20825
+            # GH 20882, 21257
             # Unify Index and ExtensionArray treatment
             # First try to convert the key to a location
-            # If that fails, see if key is an integer, and
+            # If that fails, raise a KeyError if an integer
+            # index, otherwise, see if key is an integer, and
             # try that
             try:
                 iloc = self.get_loc(key)
                 return s[iloc]
             except KeyError:
-                if is_integer(key):
+                if (len(self) > 0 and
+                        self.inferred_type in ['integer', 'boolean']):
+                    raise
+                elif is_integer(key):
                     return s[key]
 
         s = com._values_from_object(series)

diff --git a/pandas/core/ops.py b/pandas/core/ops.py
@@ -30,6 +30,7 @@
     is_bool_dtype,
     is_list_like,
     is_scalar,
+    is_extension_array_dtype,
     _ensure_object)
 from pandas.core.dtypes.cast import (
     maybe_upcast_putmask, find_common_type,
@@ -990,6 +991,20 @@ def _construct_divmod_result(left, result, index, name, dtype):
     )
 
 
+def dispatch_to_extension_op(left, right, op_name):
+    """
+    Assume that left is a Series backed by an ExtensionArray,
+    apply the operator defined by op_name.
+    """
+
+    method = getattr(left.values, op_name, None)
+    res_values = method(right)
+
+    res_name = get_op_result_name(left, right)
+    return left._constructor(res_values, index=left.index,
+                             name=res_name)
+
+
 def _arith_method_SERIES(cls, op, special):
     """
     Wrapper function for Series arithmetic operations, to avoid
@@ -1058,6 +1073,9 @@ def wrapper(left, right):
             raise TypeError("{typ} cannot perform the operation "
                             "{op}".format(typ=type(left).__name__, op=str_rep))
 
+        elif is_extension_array_dtype(left):
+            return dispatch_to_extension_op(left, right, op_name)
+
         lvalues = left.values
         rvalues = right
         if isinstance(rvalues, ABCSeries):
@@ -1208,6 +1226,9 @@ def wrapper(self, other, axis=None):
             return self._constructor(res_values, index=self.index,
                                      name=res_name)
 
+        elif is_extension_array_dtype(self):
+            return dispatch_to_extension_op(self, other, op_name)
+
         elif isinstance(other, ABCSeries):
             # By this point we have checked that self._indexed_same(other)
             res_values = na_op(self.values, other.values)

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2196,23 +2196,22 @@ def _binop(self, other, func, level=None, fill_value=None):
             result.name = None
         return result
 
-    def combine(self, other, func, fill_value=np.nan):
+    def combine(self, other, func, fill_value=None):
         """
         Perform elementwise binary operation on two Series using given function
         with optional fill value when an index is missing from one Series or
         the other
-
         Parameters
         ----------
         other : Series or scalar value
         func : function
             Function that takes two scalars as inputs and return a scalar
         fill_value : scalar value
-
+            The default specifies to use the appropriate NaN value for
+            the underlying dtype of the Series
         Returns
         -------
         result : Series
-
         Examples
         --------
         >>> s1 = Series([1, 2])
@@ -2221,26 +2220,36 @@ def combine(self, other, func, fill_value=np.nan):
         0    0
         1    2
         dtype: int64
-
         See Also
         --------
         Series.combine_first : Combine Series values, choosing the calling
             Series's values first
         """
+        self_is_ext = is_extension_array_dtype(self.values)
+        if fill_value is None:
+            fill_value = na_value_for_dtype(self.dtype, False)
+
         if isinstance(other, Series):
             new_index = self.index.union(other.index)
             new_name = ops.get_op_result_name(self, other)
-            new_values = np.empty(len(new_index), dtype=self.dtype)
-            for i, idx in enumerate(new_index):
+            new_values = []
+            for idx in new_index:
                 lv = self.get(idx, fill_value)
                 rv = other.get(idx, fill_value)
                 with np.errstate(all='ignore'):
-                    new_values[i] = func(lv, rv)
+                    new_values.append(func(lv, rv))
         else:
             new_index = self.index
             with np.errstate(all='ignore'):
-                new_values = func(self._values, other)
+                new_values = [func(lv, other) for lv in self._values]
             new_name = self.name
+
+        if self_is_ext and not is_categorical_dtype(self.values):
+            try:
+                new_values = self._values._from_sequence(new_values)
+            except TypeError:
+                pass
+
         return self._constructor(new_values, index=new_index, name=new_name)
 
     def combine_first(self, other):

diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py
@@ -130,7 +130,7 @@ def test_get(self, data):
         expected = s.iloc[[0, 1]]
         self.assert_series_equal(result, expected)
 
-        assert s.get(-1) == s.iloc[-1]
+        assert s.get(-1) is None
         assert s.get(s.index.max() + 1) is None
 
         s = pd.Series(data[:6], index=list('abcdef'))

diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
@@ -6,7 +6,7 @@
 import numpy as np
 
 import pandas as pd
-from pandas.core.arrays import ExtensionArray
+from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin
 from pandas.core.dtypes.base import ExtensionDtype
 
 
@@ -24,11 +24,13 @@ def construct_from_string(cls, string):
                             "'{}'".format(cls, string))
 
 
-class DecimalArray(ExtensionArray):
+class DecimalArray(ExtensionArray, ExtensionOpsMixin(True, True)):
     dtype = DecimalDtype()
 
     def __init__(self, values):
-        assert all(isinstance(v, decimal.Decimal) for v in values)
+        for val in values:
+            if not isinstance(val, self.dtype.type):
+                raise TypeError
         values = np.asarray(values, dtype=object)
 
         self._data = values

diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
@@ -7,6 +7,9 @@
 
 from pandas.tests.extension import base
 
+from pandas.tests.series.test_operators import TestSeriesOperators
+from pandas.util._decorators import cache_readonly
+
 from .array import DecimalDtype, DecimalArray, make_data
 
 
@@ -183,3 +186,36 @@ def test_dataframe_constructor_with_different_dtype_raises():
     xpr = "Cannot coerce extension array to dtype 'int64'. "
     with tm.assert_raises_regex(ValueError, xpr):
         pd.DataFrame({"A": arr}, dtype='int64')
+
+
+_ts = pd.Series(DecimalArray(make_data()))
+
+
+class TestOperator(BaseDecimal, TestSeriesOperators):
+    @cache_readonly
+    def ts(self):
+        ts = _ts.copy()
+        ts.name = 'ts'
+        return ts
+
+    def test_operators(self):
+        def absfunc(v):
+            if isinstance(v, pd.Series):
+                vals = v.values
+                return pd.Series(vals._from_sequence([abs(i) for i in vals]))
+            else:
+                return abs(v)
+        context = decimal.getcontext()
+        divbyzerotrap = context.traps[decimal.DivisionByZero]
+        invalidoptrap = context.traps[decimal.InvalidOperation]
+        context.traps[decimal.DivisionByZero] = 0
+        context.traps[decimal.InvalidOperation] = 0
+        super(TestOperator, self).test_operators(absfunc)
+        context.traps[decimal.DivisionByZero] = divbyzerotrap
+        context.traps[decimal.InvalidOperation] = invalidoptrap
+
+    def test_operators_corner(self):
+        pytest.skip("Cannot add empty Series of float64 to DecimalArray")
+
+    def test_divmod(self):
+        pytest.skip("divmod not appropriate for Decimal type")
diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py
@@ -1216,11 +1216,11 @@ def test_neg(self):
     def test_invert(self):
         assert_series_equal(-(self.series < 0), ~(self.series < 0))
 
-    def test_operators(self):
+    def test_operators(self, absfunc=np.abs):
         def _check_op(series, other, op, pos_only=False,
                       check_dtype=True):
-            left = np.abs(series) if pos_only else series
-            right = np.abs(other) if pos_only else other
+            left = absfunc(series) if pos_only else series
+            right = absfunc(other) if pos_only else other
 
             cython_or_numpy = op(left, right)
             python = left.combine(right, op)

diff --git a/pandas/util/testing.py b/pandas/util/testing.py
@@ -30,7 +30,8 @@
     is_categorical_dtype,
     is_interval_dtype,
     is_sequence,
-    is_list_like)
+    is_list_like,
+    is_extension_array_dtype)
 from pandas.io.formats.printing import pprint_thing
 from pandas.core.algorithms import take_1d
 import pandas.core.common as com
@@ -1225,6 +1226,10 @@ def assert_series_equal(left, right, check_dtype=True,
         right = pd.IntervalIndex(right)
         assert_index_equal(left, right, obj='{obj}.index'.format(obj=obj))
 
+    elif (is_extension_array_dtype(left) and not is_categorical_dtype(left) and
+          is_extension_array_dtype(right) and not is_categorical_dtype(right)):
+        return assert_extension_array_equal(left.values, right.values)
+
     else:
         _testing.assert_almost_equal(left.get_values(), right.get_values(),
                                      check_less_precise=check_less_precise,