-
-
Notifications
You must be signed in to change notification settings - Fork 18.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
BUG/PERF: Avoid listifying in dispatch_to_extension_op #23155
Changes from all commits
044a99e
fe693d6
cf945ee
6507f43
b7906ea
0125669
f1dd665
07a632e
e378c7d
b2f9243
42b91d0
f03e66b
f14cf0b
32757f6
4fc1d1b
03a367e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,8 @@ | |
import copy | ||
import numpy as np | ||
|
||
from pandas._libs.lib import infer_dtype | ||
|
||
from pandas._libs import lib | ||
from pandas.util._decorators import cache_readonly | ||
from pandas.compat import u, range, string_types | ||
from pandas.compat import set_function_name | ||
|
@@ -171,7 +172,7 @@ def coerce_to_array(values, dtype, mask=None, copy=False): | |
|
||
values = np.array(values, copy=copy) | ||
if is_object_dtype(values): | ||
inferred_type = infer_dtype(values) | ||
inferred_type = lib.infer_dtype(values) | ||
if inferred_type not in ['floating', 'integer', | ||
'mixed-integer', 'mixed-integer-float']: | ||
raise TypeError("{} cannot be converted to an IntegerDtype".format( | ||
|
@@ -280,6 +281,8 @@ def _coerce_to_ndarray(self): | |
data[self._mask] = self._na_value | ||
return data | ||
|
||
__array_priority__ = 1000 # higher than ndarray so ops dispatch to us | ||
|
||
def __array__(self, dtype=None): | ||
""" | ||
the array interface, return my values | ||
|
@@ -288,12 +291,6 @@ def __array__(self, dtype=None): | |
return self._coerce_to_ndarray() | ||
|
||
def __iter__(self): | ||
"""Iterate over elements of the array. | ||
|
||
""" | ||
# This needs to be implemented so that pandas recognizes extension | ||
# arrays as list-like. The default implementation makes successive | ||
# calls to ``__getitem__``, which may be slower than necessary. | ||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
for i in range(len(self)): | ||
if self._mask[i]: | ||
yield self.dtype.na_value | ||
|
@@ -504,13 +501,21 @@ def cmp_method(self, other): | |
|
||
op_name = op.__name__ | ||
mask = None | ||
|
||
if isinstance(other, (ABCSeries, ABCIndexClass)): | ||
# Rely on pandas to unbox and dispatch to us. | ||
return NotImplemented | ||
|
||
if isinstance(other, IntegerArray): | ||
other, mask = other._data, other._mask | ||
|
||
elif is_list_like(other): | ||
other = np.asarray(other) | ||
if other.ndim > 0 and len(self) != len(other): | ||
raise ValueError('Lengths must match to compare') | ||
|
||
other = lib.item_from_zerodim(other) | ||
|
||
# numpy will show a DeprecationWarning on invalid elementwise | ||
# comparisons, this will raise in the future | ||
with warnings.catch_warnings(): | ||
|
@@ -586,14 +591,21 @@ def integer_arithmetic_method(self, other): | |
|
||
op_name = op.__name__ | ||
mask = None | ||
|
||
if isinstance(other, (ABCSeries, ABCIndexClass)): | ||
other = getattr(other, 'values', other) | ||
# Rely on pandas to unbox and dispatch to us. | ||
return NotImplemented | ||
|
||
if isinstance(other, IntegerArray): | ||
other, mask = other._data, other._mask | ||
elif getattr(other, 'ndim', 0) > 1: | ||
if getattr(other, 'ndim', 0) > 1: | ||
raise NotImplementedError( | ||
"can only perform ops with 1-d structures") | ||
|
||
if isinstance(other, IntegerArray): | ||
other, mask = other._data, other._mask | ||
|
||
elif getattr(other, 'ndim', None) == 0: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is moved from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Had to keep this one in an elif, so that we avoid the |
||
other = other.item() | ||
|
||
elif is_list_like(other): | ||
other = np.asarray(other) | ||
if not other.ndim: | ||
|
@@ -612,6 +624,13 @@ def integer_arithmetic_method(self, other): | |
else: | ||
mask = self._mask | mask | ||
|
||
# 1 ** np.nan is 1. So we have to unmask those. | ||
if op_name == 'pow': | ||
mask = np.where(self == 1, False, mask) | ||
|
||
elif op_name == 'rpow': | ||
mask = np.where(other == 1, False, mask) | ||
|
||
with np.errstate(all='ignore'): | ||
result = op(self._data, other) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1471,15 +1471,32 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): | |
'power': 'pow', | ||
'remainder': 'mod', | ||
'divide': 'div', | ||
'equal': 'eq', | ||
'not_equal': 'ne', | ||
'less': 'lt', | ||
'less_equal': 'le', | ||
'greater': 'gt', | ||
'greater_equal': 'ge', | ||
} | ||
|
||
flipped = { | ||
'lt': '__gt__', | ||
'le': '__ge__', | ||
'gt': '__lt__', | ||
'ge': '__le__', | ||
'eq': '__eq__', | ||
'ne': '__ne__', | ||
} | ||
|
||
op_name = ufunc.__name__ | ||
op_name = aliases.get(op_name, op_name) | ||
|
||
if op_name in special and kwargs.get('out') is None: | ||
if isinstance(inputs[0], type(self)): | ||
return getattr(self, '__{}__'.format(op_name))(inputs[1]) | ||
else: | ||
return getattr(self, '__r{}__'.format(op_name))(inputs[0]) | ||
name = flipped.get(op_name, '__r{}__'.format(op_name)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. note to @jbrockmendel since we do this a couple of times iIRC, we should have a more generic way of doing this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And if we implement I think There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That doesn't quite work since the comparison ops don't define |
||
return getattr(self, name)(inputs[0]) | ||
|
||
if len(inputs) == 1: | ||
# No alignment necessary. | ||
|
@@ -1528,7 +1545,8 @@ def sparse_arithmetic_method(self, other): | |
op_name = op.__name__ | ||
|
||
if isinstance(other, (ABCSeries, ABCIndexClass)): | ||
other = getattr(other, 'values', other) | ||
# Rely on pandas to dispatch to us. | ||
return NotImplemented | ||
|
||
if isinstance(other, SparseArray): | ||
return _sparse_array_op(self, other, op, op_name) | ||
|
@@ -1573,10 +1591,11 @@ def cmp_method(self, other): | |
op_name = op_name[:-1] | ||
|
||
if isinstance(other, (ABCSeries, ABCIndexClass)): | ||
other = getattr(other, 'values', other) | ||
# Rely on pandas to unbox and dispatch to us. | ||
return NotImplemented | ||
|
||
if not is_scalar(other) and not isinstance(other, type(self)): | ||
# convert list-like to ndarary | ||
# convert list-like to ndarray | ||
other = np.asarray(other) | ||
|
||
if isinstance(other, np.ndarray): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should we just put this in the base class? (for the ops mixin)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That seems a little too invasive for a base class. I’d rather leave that up to the subclasser.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
so what arithmetic subclass would not want this set?
is there an example?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To clarify, I'm not sure if there's a way to unset it, if you don't want to set it in a subclass (you don't want to opt into numpy's array stuff at all).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just find this a detail which would likely be forgotten in any subclass, I don't see a harm and much upset in setting it onthe base class (you can always unset if you really really think you need to).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you unset it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't really know if setting
__array_priority__ = 0
is enough to "unset" it, and I don't know what all setting__array_priority__
in the first place opts you into.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you document this in the Mixin itself though (if you are not going to set it by defaulrt). It is so non-obvious that you need to do this.