Skip to content

Commit

Permalink
Implement NA.__array_ufunc__ (#30245)
Browse files Browse the repository at this point in the history
  • Loading branch information
TomAugspurger authored and jreback committed Jan 5, 2020
1 parent ea73e0b commit 97153bf
Show file tree
Hide file tree
Showing 8 changed files with 256 additions and 113 deletions.
4 changes: 2 additions & 2 deletions doc/source/getting_started/dsintro.rst
Original file line number Diff line number Diff line change
Expand Up @@ -676,11 +676,11 @@ similar to an ndarray:
# only show the first 5 rows
df[:5].T
.. _dsintro.numpy_interop:

DataFrame interoperability with NumPy functions
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. _dsintro.numpy_interop:

Elementwise NumPy ufuncs (log, exp, sqrt, ...) and various other NumPy functions
can be used with no issues on Series and DataFrame, assuming the data within
are numeric:
Expand Down
26 changes: 26 additions & 0 deletions doc/source/user_guide/missing_data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -920,3 +920,29 @@ filling missing values beforehand.

A similar situation occurs when using Series or DataFrame objects in ``if``
statements, see :ref:`gotchas.truth`.

NumPy ufuncs
------------

:attr:`pandas.NA` implements NumPy's ``__array_ufunc__`` protocol. Most ufuncs
work with ``NA``, and generally return ``NA``:

.. ipython:: python
np.log(pd.NA)
np.add(pd.NA, 1)
.. warning::

Currently, ufuncs involving an ndarray and ``NA`` will return an
object-dtype filled with NA values.

.. ipython:: python
a = np.array([1, 2, 3])
np.greater(a, pd.NA)
The return type here may change to return a different array type
in the future.

See :ref:`dsintro.numpy_interop` for more on ufuncs.
53 changes: 48 additions & 5 deletions pandas/_libs/missing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ from pandas._libs.tslibs.np_datetime cimport (
get_timedelta64_value, get_datetime64_value)
from pandas._libs.tslibs.nattype cimport (
checknull_with_nat, c_NaT as NaT, is_null_datetimelike)
from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op

from pandas.compat import is_platform_32bit

Expand Down Expand Up @@ -290,16 +291,29 @@ cdef inline bint is_null_period(v):
# Implementation of NA singleton


def _create_binary_propagating_op(name, divmod=False):
def _create_binary_propagating_op(name, is_divmod=False):

def method(self, other):
if (other is C_NA or isinstance(other, str)
or isinstance(other, (numbers.Number, np.bool_))):
if divmod:
or isinstance(other, (numbers.Number, np.bool_))
or isinstance(other, np.ndarray) and not other.shape):
# Need the other.shape clause to handle NumPy scalars,
# since we do a setitem on `out` below, which
# won't work for NumPy scalars.
if is_divmod:
return NA, NA
else:
return NA

elif isinstance(other, np.ndarray):
out = np.empty(other.shape, dtype=object)
out[:] = NA

if is_divmod:
return out, out.copy()
else:
return out

return NotImplemented

method.__name__ = name
Expand Down Expand Up @@ -369,8 +383,8 @@ class NAType(C_NAType):
__rfloordiv__ = _create_binary_propagating_op("__rfloordiv__")
__mod__ = _create_binary_propagating_op("__mod__")
__rmod__ = _create_binary_propagating_op("__rmod__")
__divmod__ = _create_binary_propagating_op("__divmod__", divmod=True)
__rdivmod__ = _create_binary_propagating_op("__rdivmod__", divmod=True)
__divmod__ = _create_binary_propagating_op("__divmod__", is_divmod=True)
__rdivmod__ = _create_binary_propagating_op("__rdivmod__", is_divmod=True)
# __lshift__ and __rshift__ are not implemented

__eq__ = _create_binary_propagating_op("__eq__")
Expand All @@ -397,6 +411,8 @@ class NAType(C_NAType):
return type(other)(1)
else:
return NA
elif isinstance(other, np.ndarray):
return np.where(other == 0, other.dtype.type(1), NA)

return NotImplemented

Expand All @@ -408,6 +424,8 @@ class NAType(C_NAType):
return other
else:
return NA
elif isinstance(other, np.ndarray):
return np.where((other == 1) | (other == -1), other, NA)

return NotImplemented

Expand Down Expand Up @@ -440,6 +458,31 @@ class NAType(C_NAType):

__rxor__ = __xor__

__array_priority__ = 1000
_HANDLED_TYPES = (np.ndarray, numbers.Number, str, np.bool_)

def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
types = self._HANDLED_TYPES + (NAType,)
for x in inputs:
if not isinstance(x, types):
return NotImplemented

if method != "__call__":
raise ValueError(f"ufunc method '{method}' not supported for NA")
result = maybe_dispatch_ufunc_to_dunder_op(
self, ufunc, method, *inputs, **kwargs
)
if result is NotImplemented:
# For a NumPy ufunc that's not a binop, like np.logaddexp
index = [i for i, x in enumerate(inputs) if x is NA][0]
result = np.broadcast_arrays(*inputs)[index]
if result.ndim == 0:
result = result.item()
if ufunc.nout > 1:
result = (NA,) * ufunc.nout

return result


C_NA = NAType() # C-visible
NA = C_NA # Python-visible
94 changes: 94 additions & 0 deletions pandas/_libs/ops_dispatch.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
DISPATCHED_UFUNCS = {
"add",
"sub",
"mul",
"pow",
"mod",
"floordiv",
"truediv",
"divmod",
"eq",
"ne",
"lt",
"gt",
"le",
"ge",
"remainder",
"matmul",
"or",
"xor",
"and",
}
UFUNC_ALIASES = {
"subtract": "sub",
"multiply": "mul",
"floor_divide": "floordiv",
"true_divide": "truediv",
"power": "pow",
"remainder": "mod",
"divide": "div",
"equal": "eq",
"not_equal": "ne",
"less": "lt",
"less_equal": "le",
"greater": "gt",
"greater_equal": "ge",
"bitwise_or": "or",
"bitwise_and": "and",
"bitwise_xor": "xor",
}

# For op(., Array) -> Array.__r{op}__
REVERSED_NAMES = {
"lt": "__gt__",
"le": "__ge__",
"gt": "__lt__",
"ge": "__le__",
"eq": "__eq__",
"ne": "__ne__",
}


def maybe_dispatch_ufunc_to_dunder_op(
object self, object ufunc, str method, *inputs, **kwargs
):
"""
Dispatch a ufunc to the equivalent dunder method.
Parameters
----------
self : ArrayLike
The array whose dunder method we dispatch to
ufunc : Callable
A NumPy ufunc
method : {'reduce', 'accumulate', 'reduceat', 'outer', 'at', '__call__'}
inputs : ArrayLike
The input arrays.
kwargs : Any
The additional keyword arguments, e.g. ``out``.
Returns
-------
result : Any
The result of applying the ufunc
"""
# special has the ufuncs we dispatch to the dunder op on

op_name = ufunc.__name__
op_name = UFUNC_ALIASES.get(op_name, op_name)

def not_implemented(*args, **kwargs):
return NotImplemented

if (method == "__call__"
and op_name in DISPATCHED_UFUNCS
and kwargs.get("out") is None):
if isinstance(inputs[0], type(self)):
name = f"__{op_name}__"
return getattr(self, name, not_implemented)(inputs[1])
else:
name = REVERSED_NAMES.get(op_name, f"__r{op_name}__")
result = getattr(self, name, not_implemented)(inputs[0])
return result
else:
return NotImplemented
2 changes: 1 addition & 1 deletion pandas/core/ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import numpy as np

from pandas._libs import Timedelta, Timestamp, lib
from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op # noqa:F401
from pandas.util._decorators import Appender

from pandas.core.dtypes.common import is_list_like, is_timedelta64_dtype
Expand All @@ -31,7 +32,6 @@
)
from pandas.core.ops.array_ops import comp_method_OBJECT_ARRAY # noqa:F401
from pandas.core.ops.common import unpack_zerodim_and_defer
from pandas.core.ops.dispatch import maybe_dispatch_ufunc_to_dunder_op # noqa:F401
from pandas.core.ops.dispatch import should_series_dispatch
from pandas.core.ops.docstrings import (
_arith_doc_FRAME,
Expand Down
95 changes: 1 addition & 94 deletions pandas/core/ops/dispatch.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
"""
Functions for defining unary operations.
"""
from typing import Any, Callable, Union
from typing import Any, Union

import numpy as np

from pandas._typing import ArrayLike

from pandas.core.dtypes.common import (
is_datetime64_dtype,
is_extension_array_dtype,
Expand Down Expand Up @@ -126,94 +124,3 @@ def dispatch_to_extension_op(
# on the ExtensionArray
res_values = op(left, right)
return res_values


def maybe_dispatch_ufunc_to_dunder_op(
self: ArrayLike, ufunc: Callable, method: str, *inputs: ArrayLike, **kwargs: Any
):
"""
Dispatch a ufunc to the equivalent dunder method.
Parameters
----------
self : ArrayLike
The array whose dunder method we dispatch to
ufunc : Callable
A NumPy ufunc
method : {'reduce', 'accumulate', 'reduceat', 'outer', 'at', '__call__'}
inputs : ArrayLike
The input arrays.
kwargs : Any
The additional keyword arguments, e.g. ``out``.
Returns
-------
result : Any
The result of applying the ufunc
"""
# special has the ufuncs we dispatch to the dunder op on
special = {
"add",
"sub",
"mul",
"pow",
"mod",
"floordiv",
"truediv",
"divmod",
"eq",
"ne",
"lt",
"gt",
"le",
"ge",
"remainder",
"matmul",
"or",
"xor",
"and",
}
aliases = {
"subtract": "sub",
"multiply": "mul",
"floor_divide": "floordiv",
"true_divide": "truediv",
"power": "pow",
"remainder": "mod",
"divide": "div",
"equal": "eq",
"not_equal": "ne",
"less": "lt",
"less_equal": "le",
"greater": "gt",
"greater_equal": "ge",
"bitwise_or": "or",
"bitwise_and": "and",
"bitwise_xor": "xor",
}

# For op(., Array) -> Array.__r{op}__
flipped = {
"lt": "__gt__",
"le": "__ge__",
"gt": "__lt__",
"ge": "__le__",
"eq": "__eq__",
"ne": "__ne__",
}

op_name = ufunc.__name__
op_name = aliases.get(op_name, op_name)

def not_implemented(*args, **kwargs):
return NotImplemented

if method == "__call__" and op_name in special and kwargs.get("out") is None:
if isinstance(inputs[0], type(self)):
name = f"__{op_name}__"
return getattr(self, name, not_implemented)(inputs[1])
else:
name = flipped.get(op_name, f"__r{op_name}__")
return getattr(self, name, not_implemented)(inputs[0])
else:
return NotImplemented
Loading

0 comments on commit 97153bf

Please sign in to comment.