-
-
Notifications
You must be signed in to change notification settings - Fork 18.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
dispatch scalar DataFrame ops to Series #22163
Changes from 11 commits
7681092
b226abf
3fd46bc
0dff3a1
caf2da0
6ff8705
0513e0b
3a7b782
6636565
edc3792
3c65f93
2cdc58b
4703db7
c090713
d683cb3
8c64cf6
dbdea1a
f1edec4
9b62135
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1311,7 +1311,7 @@ def na_op(x, y): | |
with np.errstate(all='ignore'): | ||
result = method(y) | ||
if result is NotImplemented: | ||
raise TypeError("invalid type comparison") | ||
return invalid_comparison(x, y, op) | ||
else: | ||
result = op(x, y) | ||
|
||
|
@@ -1327,6 +1327,10 @@ def wrapper(self, other, axis=None): | |
|
||
res_name = get_op_result_name(self, other) | ||
|
||
if isinstance(other, list): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ATM the |
||
# TODO: same for tuples? | ||
other = np.asarray(other) | ||
|
||
if isinstance(other, ABCDataFrame): # pragma: no cover | ||
# Defer to DataFrame implementation; fail early | ||
return NotImplemented | ||
|
@@ -1426,8 +1430,6 @@ def wrapper(self, other, axis=None): | |
|
||
else: | ||
values = self.get_values() | ||
if isinstance(other, list): | ||
other = np.asarray(other) | ||
|
||
with np.errstate(all='ignore'): | ||
res = na_op(values, other) | ||
|
@@ -1706,7 +1708,8 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): | |
if fill_value is not None: | ||
self = self.fillna(fill_value) | ||
|
||
return self._combine_const(other, na_op, try_cast=True) | ||
pass_op = op if lib.is_scalar(other) else na_op | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you are checking for a scalar here and above? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's kind of annoying. If This PR handles only scalars since that is a relatively easy case. A few PRs down the road we'll have all these ops dispatch to series, at which point this won't be necessary. |
||
return self._combine_const(other, pass_op, try_cast=True) | ||
|
||
f.__name__ = op_name | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -273,6 +273,8 @@ def test_getitem_boolean(self): | |
# test df[df > 0] | ||
for df in [self.tsframe, self.mixed_frame, | ||
self.mixed_float, self.mixed_int]: | ||
if compat.PY3 and df is self.mixed_frame: | ||
continue | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let's strip out the mixed_frame to another function (even though that duplicates some code), bonus can parametrize this test. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I don't think There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these need to be made fixtures. this becomes so much easier. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree, am starting to implement this in the test_arithmetic sequence of PRs. Will update this test when that lands. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. k thanks |
||
|
||
data = df._get_numeric_data() | ||
bif = df[df > 0] | ||
|
@@ -2468,8 +2470,11 @@ def test_boolean_indexing_mixed(self): | |
assert_frame_equal(df2, expected) | ||
|
||
df['foo'] = 'test' | ||
with tm.assert_raises_regex(TypeError, 'boolean setting ' | ||
'on mixed-type'): | ||
msg = ("boolean setting on mixed-type|" | ||
"not supported between|" | ||
"unorderable types") | ||
with tm.assert_raises_regex(TypeError, msg): | ||
# TODO: This message should be the same in PY2/PY3 | ||
df[df > 0.3] = 1 | ||
|
||
def test_where(self): | ||
|
@@ -2502,6 +2507,10 @@ def _check_get(df, cond, check_dtypes=True): | |
# check getting | ||
for df in [default_frame, self.mixed_frame, | ||
self.mixed_float, self.mixed_int]: | ||
if compat.PY3 and df is self.mixed_frame: | ||
with pytest.raises(TypeError): | ||
df > 0 | ||
continue | ||
cond = df > 0 | ||
_check_get(df, cond) | ||
|
||
|
@@ -2549,6 +2558,10 @@ def _check_align(df, cond, other, check_dtypes=True): | |
assert (rs.dtypes == df.dtypes).all() | ||
|
||
for df in [self.mixed_frame, self.mixed_float, self.mixed_int]: | ||
if compat.PY3 and df is self.mixed_frame: | ||
with pytest.raises(TypeError): | ||
df > 0 | ||
continue | ||
|
||
# other is a frame | ||
cond = (df > 0)[1:] | ||
|
@@ -2594,6 +2607,10 @@ def _check_set(df, cond, check_dtypes=True): | |
|
||
for df in [default_frame, self.mixed_frame, self.mixed_float, | ||
self.mixed_int]: | ||
if compat.PY3 and df is self.mixed_frame: | ||
with pytest.raises(TypeError): | ||
df > 0 | ||
continue | ||
|
||
cond = df > 0 | ||
_check_set(df, cond) | ||
|
@@ -2759,9 +2776,14 @@ def test_where_datetime(self): | |
C=np.random.randn(5))) | ||
|
||
stamp = datetime(2013, 1, 3) | ||
result = df[df > stamp] | ||
with pytest.raises(TypeError): | ||
df > stamp | ||
|
||
result = df[df.iloc[:, :-1] > stamp] | ||
|
||
expected = df.copy() | ||
expected.loc[[0, 1], 'A'] = np.nan | ||
expected.loc[:, 'C'] = np.nan | ||
assert_frame_equal(result, expected) | ||
|
||
def test_where_none(self): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -578,7 +578,6 @@ def test_timedelta_ops_with_missing_values(self): | |
scalar1 = pd.to_timedelta('00:00:01') | ||
scalar2 = pd.to_timedelta('00:00:02') | ||
timedelta_NaT = pd.to_timedelta('NaT') | ||
NA = np.nan | ||
|
||
actual = scalar1 + scalar1 | ||
assert actual == scalar2 | ||
|
@@ -646,10 +645,10 @@ def test_timedelta_ops_with_missing_values(self): | |
actual = df1 - timedelta_NaT | ||
tm.assert_frame_equal(actual, dfn) | ||
|
||
actual = df1 + NA | ||
tm.assert_frame_equal(actual, dfn) | ||
actual = df1 - NA | ||
tm.assert_frame_equal(actual, dfn) | ||
with pytest.raises(TypeError): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this raising? this is a big change if you don't allow nan to act as NaT in ops There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the current behavior for Series and Index. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this needs a subsection in the whatsnew then, marked as an api change. |
||
df1 + np.nan | ||
with pytest.raises(TypeError): | ||
df1 - np.nan | ||
|
||
actual = df1 + pd.NaT # NaT is datetime, not timedelta | ||
tm.assert_frame_equal(actual, dfn) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1235,16 +1235,31 @@ def test_binop_other(self, op, value, dtype): | |
(operator.truediv, 'bool'), | ||
(operator.mod, 'i8'), | ||
(operator.mod, 'complex128'), | ||
(operator.mod, '<M8[ns]'), | ||
(operator.mod, '<m8[ns]'), | ||
(operator.pow, 'bool')} | ||
if (op, dtype) in skip: | ||
pytest.skip("Invalid combination {},{}".format(op, dtype)) | ||
|
||
e = DummyElement(value, dtype) | ||
s = pd.DataFrame({"A": [e.value, e.value]}, dtype=e.dtype) | ||
result = op(s, e).dtypes | ||
expected = op(s, value).dtypes | ||
assert_series_equal(result, expected) | ||
|
||
invalid = {(operator.pow, '<M8[ns]'), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pull this out and parametrize There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test already has two layers of parametrization; it isn't clear how to pull this out without making it more verbose+repetitive. Let me give this some thought and circle back. |
||
(operator.mod, '<M8[ns]'), | ||
(operator.truediv, '<M8[ns]'), | ||
(operator.mul, '<M8[ns]'), | ||
(operator.add, '<M8[ns]'), | ||
(operator.pow, '<m8[ns]'), | ||
(operator.mod, '<m8[ns]'), | ||
(operator.mul, '<m8[ns]')} | ||
|
||
if (op, dtype) in invalid: | ||
with pytest.raises(TypeError): | ||
result = op(s, e.value) | ||
else: | ||
# FIXME: Since dispatching to Series, this test no longer | ||
# asserts anything meaningful | ||
result = op(s, e.value).dtypes | ||
expected = op(s, value).dtypes | ||
assert_series_equal(result, expected) | ||
|
||
|
||
@pytest.mark.parametrize('typestr, holder', [ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is is pretty annoything that we have to do this, I would make an explict function maybe
is_any_scalar
I think as we have these types of checks all over. pls make an issue for this.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.