From 7681092bcbffafd01bed83621318cc5b8208e4e9 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 1 Aug 2018 18:32:33 -0700 Subject: [PATCH 01/13] dispatch scalar DataFrame ops to Series --- pandas/core/frame.py | 14 ++++ pandas/core/ops.py | 7 +- pandas/tests/frame/test_arithmetic.py | 6 +- pandas/tests/frame/test_indexing.py | 7 +- pandas/tests/frame/test_operators.py | 13 +++- .../indexes/timedeltas/test_arithmetic.py | 9 +-- pandas/tests/internals/test_internals.py | 25 +++++-- pandas/tests/test_arithmetic.py | 74 ++++++------------- pandas/tests/test_expressions.py | 4 +- 9 files changed, 86 insertions(+), 73 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 834cc3d188b39..0583c9cb97387 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4940,6 +4940,20 @@ def _combine_match_columns(self, other, func, level=None, try_cast=True): return self._constructor(new_data) def _combine_const(self, other, func, errors='raise', try_cast=True): + if lib.is_scalar(other) or np.ndim(other) == 0: + new_data = {i: func(self.iloc[:, i], other) + for i, col in enumerate(self.columns)} + + result = self._constructor(new_data, index=self.index, copy=False) + result.columns = self.columns + return result + elif np.ndim(other) == 2 and other.shape == self.shape: + new_data = {i: func(self.iloc[:, i], other[:, i]) + for i in range(len(self.columns))} + result = self._constructor(new_data, index=self.index, copy=False) + result.columns = self.columns + return result + new_data = self._data.eval(func=func, other=other, errors=errors, try_cast=try_cast) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index f7d863bba82a7..7b6f273ff4304 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1311,7 +1311,7 @@ def na_op(x, y): with np.errstate(all='ignore'): result = method(y) if result is NotImplemented: - raise TypeError("invalid type comparison") + return invalid_comparison(x, y, op) else: result = op(x, y) @@ -1706,7 +1706,10 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): if fill_value is not None: self = self.fillna(fill_value) - return self._combine_const(other, na_op, try_cast=True) + pass_func = na_op + if is_scalar(lib.item_from_zerodim(other)): + pass_func = op + return self._combine_const(other, pass_func, try_cast=True) f.__name__ = op_name diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 3d03a70553d2d..d889ba70a4148 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -27,8 +27,8 @@ def test_df_float_none_comparison(self): df = pd.DataFrame(np.random.randn(8, 3), index=range(8), columns=['A', 'B', 'C']) - with pytest.raises(TypeError): - df.__eq__(None) + result = df == None + assert not result.any().any() def test_df_string_comparison(self): df = pd.DataFrame([{"a": 1, "b": "foo"}, {"a": 2, "b": "bar"}]) @@ -201,8 +201,6 @@ def test_df_div_zero_series_does_not_commute(self): class TestFrameArithmetic(object): - @pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano', - strict=True) def test_df_sub_datetime64_not_ns(self): df = pd.DataFrame(pd.date_range('20130101', periods=3)) dt64 = np.datetime64('2013-01-01') diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 5f229aca5c25b..81e5a132b4f19 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -2759,9 +2759,14 @@ def test_where_datetime(self): C=np.random.randn(5))) stamp = datetime(2013, 1, 3) - result = df[df > stamp] + with pytest.raises(TypeError): + df > stamp + + result = df[df.iloc[:, :-1] > stamp] + expected = df.copy() expected.loc[[0, 1], 'A'] = np.nan + expected.loc[:, 'C'] = np.nan assert_frame_equal(result, expected) def test_where_none(self): diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index a11d673fd5d7f..1bc98fbb44d9d 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -206,10 +206,15 @@ def test_timestamp_compare(self): right_f = getattr(operator, right) # no nats - expected = left_f(df, Timestamp('20010109')) - result = right_f(Timestamp('20010109'), df) - assert_frame_equal(result, expected) - + if left in ['eq', 'ne']: + expected = left_f(df, Timestamp('20010109')) + result = right_f(Timestamp('20010109'), df) + assert_frame_equal(result, expected) + else: + with pytest.raises(TypeError): + left_f(df, Timestamp('20010109')) + with pytest.raises(TypeError): + right_f(Timestamp('20010109'), df) # nats expected = left_f(df, Timestamp('nat')) result = right_f(Timestamp('nat'), df) diff --git a/pandas/tests/indexes/timedeltas/test_arithmetic.py b/pandas/tests/indexes/timedeltas/test_arithmetic.py index a5e75de2a267e..eafa1e886657b 100644 --- a/pandas/tests/indexes/timedeltas/test_arithmetic.py +++ b/pandas/tests/indexes/timedeltas/test_arithmetic.py @@ -898,7 +898,6 @@ def test_timedelta_ops_with_missing_values(self): scalar1 = pd.to_timedelta('00:00:01') scalar2 = pd.to_timedelta('00:00:02') timedelta_NaT = pd.to_timedelta('NaT') - NA = np.nan actual = scalar1 + scalar1 assert actual == scalar2 @@ -966,10 +965,10 @@ def test_timedelta_ops_with_missing_values(self): actual = df1 - timedelta_NaT tm.assert_frame_equal(actual, dfn) - actual = df1 + NA - tm.assert_frame_equal(actual, dfn) - actual = df1 - NA - tm.assert_frame_equal(actual, dfn) + with pytest.raises(TypeError): + actual = df1 + np.nan + with pytest.raises(TypeError): + actual = df1 - np.nan actual = df1 + pd.NaT # NaT is datetime, not timedelta tm.assert_frame_equal(actual, dfn) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 0b06775326ab1..34f22513106ba 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1235,16 +1235,31 @@ def test_binop_other(self, op, value, dtype): (operator.truediv, 'bool'), (operator.mod, 'i8'), (operator.mod, 'complex128'), - (operator.mod, ' Date: Wed, 1 Aug 2018 18:39:17 -0700 Subject: [PATCH 02/13] flake8 fixup --- pandas/tests/frame/test_arithmetic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index d889ba70a4148..89b8f747a3d26 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -27,7 +27,7 @@ def test_df_float_none_comparison(self): df = pd.DataFrame(np.random.randn(8, 3), index=range(8), columns=['A', 'B', 'C']) - result = df == None + result = df.__eq__(None) assert not result.any().any() def test_df_string_comparison(self): From 3fd46bc5a52c016b1bf0f828e74fff5d25771739 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 1 Aug 2018 20:45:32 -0700 Subject: [PATCH 03/13] kludge-fix indexing errors --- pandas/tests/frame/test_indexing.py | 23 +++++++++++++++++++++-- pandas/tests/test_arithmetic.py | 1 - 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 81e5a132b4f19..8d53f09401849 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -273,6 +273,9 @@ def test_getitem_boolean(self): # test df[df > 0] for df in [self.tsframe, self.mixed_frame, self.mixed_float, self.mixed_int]: + if compat.PY3 and df is self.mixed_frame: + # FIXME: This should not be 2/3-dependent + continue data = df._get_numeric_data() bif = df[df > 0] @@ -2468,8 +2471,9 @@ def test_boolean_indexing_mixed(self): assert_frame_equal(df2, expected) df['foo'] = 'test' - with tm.assert_raises_regex(TypeError, 'boolean setting ' - 'on mixed-type'): + msg = "boolean setting on mixed-type|not supported between" + with tm.assert_raises_regex(TypeError, msg): + # FIXME: This message should be the same in PY2/PY3 df[df > 0.3] = 1 def test_where(self): @@ -2502,6 +2506,11 @@ def _check_get(df, cond, check_dtypes=True): # check getting for df in [default_frame, self.mixed_frame, self.mixed_float, self.mixed_int]: + if compat.PY3 and df is self.mixed_frame: + # FIXME: this should not be PY2/PY3-dependent + with pytest.raises(TypeError): + df > 0 + continue cond = df > 0 _check_get(df, cond) @@ -2549,6 +2558,11 @@ def _check_align(df, cond, other, check_dtypes=True): assert (rs.dtypes == df.dtypes).all() for df in [self.mixed_frame, self.mixed_float, self.mixed_int]: + if compat.PY3 and df is self.mixed_frame: + # FIXME: This should not be PY2/PY3-dependent + with pytest.raises(TypeError): + df > 0 + continue # other is a frame cond = (df > 0)[1:] @@ -2594,6 +2608,11 @@ def _check_set(df, cond, check_dtypes=True): for df in [default_frame, self.mixed_frame, self.mixed_float, self.mixed_int]: + if compat.PY3 and df is self.mixed_frame: + # FIXME: This should not be PY2/PY3-dependent + with pytest.raises(TypeError): + df > 0 + continue cond = df > 0 _check_set(df, cond) diff --git a/pandas/tests/test_arithmetic.py b/pandas/tests/test_arithmetic.py index 486458525ffc7..6ba6698d6d12e 100644 --- a/pandas/tests/test_arithmetic.py +++ b/pandas/tests/test_arithmetic.py @@ -12,7 +12,6 @@ from pandas.core import ops from pandas.errors import NullFrequencyError -from pandas._libs.tslibs import IncompatibleFrequency from pandas import ( Timedelta, Timestamp, NaT, Series, TimedeltaIndex, DatetimeIndex) From caf2da04d51b8e144495cb722bc4152d049c2687 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 2 Aug 2018 22:20:41 -0700 Subject: [PATCH 04/13] scale back py2/py3 compat goals --- pandas/core/frame.py | 6 ------ pandas/core/ops.py | 11 +++++------ pandas/tests/frame/test_indexing.py | 6 +----- pandas/tests/test_arithmetic.py | 4 +++- 4 files changed, 9 insertions(+), 18 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5ce89315a42b7..b8c10b5008e1b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4956,12 +4956,6 @@ def _combine_const(self, other, func, errors='raise', try_cast=True): result = self._constructor(new_data, index=self.index, copy=False) result.columns = self.columns return result - elif np.ndim(other) == 2 and other.shape == self.shape: - new_data = {i: func(self.iloc[:, i], other[:, i]) - for i in range(len(self.columns))} - result = self._constructor(new_data, index=self.index, copy=False) - result.columns = self.columns - return result new_data = self._data.eval(func=func, other=other, errors=errors, diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 7b6f273ff4304..7a0385e759f0f 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1327,6 +1327,10 @@ def wrapper(self, other, axis=None): res_name = get_op_result_name(self, other) + if isinstance(other, list): + # TODO: same for tuples? + other = np.asarray(other) + if isinstance(other, ABCDataFrame): # pragma: no cover # Defer to DataFrame implementation; fail early return NotImplemented @@ -1426,8 +1430,6 @@ def wrapper(self, other, axis=None): else: values = self.get_values() - if isinstance(other, list): - other = np.asarray(other) with np.errstate(all='ignore'): res = na_op(values, other) @@ -1706,10 +1708,7 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): if fill_value is not None: self = self.fillna(fill_value) - pass_func = na_op - if is_scalar(lib.item_from_zerodim(other)): - pass_func = op - return self._combine_const(other, pass_func, try_cast=True) + return self._combine_const(other, na_op, try_cast=True) f.__name__ = op_name diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index c3ed7f41d1f3a..7b014e4c40015 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -274,7 +274,6 @@ def test_getitem_boolean(self): for df in [self.tsframe, self.mixed_frame, self.mixed_float, self.mixed_int]: if compat.PY3 and df is self.mixed_frame: - # FIXME: This should not be 2/3-dependent continue data = df._get_numeric_data() @@ -2473,7 +2472,7 @@ def test_boolean_indexing_mixed(self): df['foo'] = 'test' msg = "boolean setting on mixed-type|not supported between" with tm.assert_raises_regex(TypeError, msg): - # FIXME: This message should be the same in PY2/PY3 + # TODO: This message should be the same in PY2/PY3 df[df > 0.3] = 1 def test_where(self): @@ -2507,7 +2506,6 @@ def _check_get(df, cond, check_dtypes=True): for df in [default_frame, self.mixed_frame, self.mixed_float, self.mixed_int]: if compat.PY3 and df is self.mixed_frame: - # FIXME: this should not be PY2/PY3-dependent with pytest.raises(TypeError): df > 0 continue @@ -2559,7 +2557,6 @@ def _check_align(df, cond, other, check_dtypes=True): for df in [self.mixed_frame, self.mixed_float, self.mixed_int]: if compat.PY3 and df is self.mixed_frame: - # FIXME: This should not be PY2/PY3-dependent with pytest.raises(TypeError): df > 0 continue @@ -2609,7 +2606,6 @@ def _check_set(df, cond, check_dtypes=True): for df in [default_frame, self.mixed_frame, self.mixed_float, self.mixed_int]: if compat.PY3 and df is self.mixed_frame: - # FIXME: This should not be PY2/PY3-dependent with pytest.raises(TypeError): df > 0 continue diff --git a/pandas/tests/test_arithmetic.py b/pandas/tests/test_arithmetic.py index 6ba6698d6d12e..5fccb9c01d33e 100644 --- a/pandas/tests/test_arithmetic.py +++ b/pandas/tests/test_arithmetic.py @@ -188,7 +188,9 @@ def test_td64arr_sub_timestamp_raises(self, box): idx = TimedeltaIndex(['1 day', '2 day']) idx = tm.box_expected(idx, box) - msg = "cannot subtract a datelike from|Could not operate" + msg = ("cannot subtract a datelike from|" + "Could not operate|" + "cannot perform operation") with tm.assert_raises_regex(TypeError, msg): idx - Timestamp('2011-01-01') From 0513e0b572889ff5373781c49afe2ec76138b742 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 3 Aug 2018 12:52:07 -0700 Subject: [PATCH 05/13] update error message --- pandas/tests/frame/test_indexing.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index 7b014e4c40015..6a4cf1ffc6071 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -2470,7 +2470,9 @@ def test_boolean_indexing_mixed(self): assert_frame_equal(df2, expected) df['foo'] = 'test' - msg = "boolean setting on mixed-type|not supported between" + msg = ("boolean setting on mixed-type|" + "not supported between|" + "unorderable types") with tm.assert_raises_regex(TypeError, msg): # TODO: This message should be the same in PY2/PY3 df[df > 0.3] = 1 From 3a7b7826787db33d4f45b917174f8d20d5a40c87 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 3 Aug 2018 12:58:19 -0700 Subject: [PATCH 06/13] try to fix test_expressions failure going down the wrong path --- pandas/core/ops.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 7a0385e759f0f..4eb68dad09813 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1708,7 +1708,8 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): if fill_value is not None: self = self.fillna(fill_value) - return self._combine_const(other, na_op, try_cast=True) + pass_op = op if lib.is_scalar(other) else na_op + return self._combine_const(other, pass_op, try_cast=True) f.__name__ = op_name From 6636565d5566a6667bcd8e616fbaeefa177ed7bf Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 3 Aug 2018 21:36:50 -0700 Subject: [PATCH 07/13] dummy commit to force CI --- pandas/tests/indexes/timedeltas/test_arithmetic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/timedeltas/test_arithmetic.py b/pandas/tests/indexes/timedeltas/test_arithmetic.py index eafa1e886657b..9a44cc91b29f7 100644 --- a/pandas/tests/indexes/timedeltas/test_arithmetic.py +++ b/pandas/tests/indexes/timedeltas/test_arithmetic.py @@ -966,9 +966,9 @@ def test_timedelta_ops_with_missing_values(self): tm.assert_frame_equal(actual, dfn) with pytest.raises(TypeError): - actual = df1 + np.nan + df1 + np.nan with pytest.raises(TypeError): - actual = df1 - np.nan + df1 - np.nan actual = df1 + pd.NaT # NaT is datetime, not timedelta tm.assert_frame_equal(actual, dfn) From 3c65f939fab779dbd9fedd676a663a837783beba Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 7 Aug 2018 11:57:40 -0700 Subject: [PATCH 08/13] post-merge cleanup --- pandas/tests/test_arithmetic.py | 92 ++++----------------------------- 1 file changed, 9 insertions(+), 83 deletions(-) diff --git a/pandas/tests/test_arithmetic.py b/pandas/tests/test_arithmetic.py index 0821ffbfd4ee3..d01311e94dd1c 100644 --- a/pandas/tests/test_arithmetic.py +++ b/pandas/tests/test_arithmetic.py @@ -77,11 +77,6 @@ def box_df_fail(request): class TestNumericArraylikeArithmeticWithTimedeltaScalar(object): - @pytest.mark.parametrize('box', [ - pd.Index, - Series, - pd.DataFrame - ]) @pytest.mark.parametrize('index', [ pd.Int64Index(range(1, 11)), pd.UInt64Index(range(1, 11)), @@ -175,11 +170,6 @@ def test_td64arr_add_sub_float(self, box, op, other): with pytest.raises(TypeError): op(tdi, other) - @pytest.mark.parametrize('box', [ - pd.Index, - Series, - pd.DataFrame - ], ids=lambda x: x.__name__) @pytest.mark.parametrize('freq', [None, 'H']) def test_td64arr_sub_period(self, box, freq): # GH#13078 @@ -219,11 +209,6 @@ def test_td64arr_sub_pi(self, box, tdi_freq, pi_freq): # ------------------------------------------------------------- # Binary operations td64 arraylike and datetime-like - @pytest.mark.parametrize('box', [ - pd.Index, - Series, - pd.DataFrame - ], ids=lambda x: x.__name__) def test_td64arr_sub_timestamp_raises(self, box): idx = TimedeltaIndex(['1 day', '2 day']) idx = tm.box_expected(idx, box) @@ -234,11 +219,6 @@ def test_td64arr_sub_timestamp_raises(self, box): with tm.assert_raises_regex(TypeError, msg): idx - Timestamp('2011-01-01') - @pytest.mark.parametrize('box', [ - pd.Index, - Series, - pd.DataFrame - ], ids=lambda x: x.__name__) def test_td64arr_add_timestamp(self, box): idx = TimedeltaIndex(['1 day', '2 day']) expected = DatetimeIndex(['2011-01-02', '2011-01-03']) @@ -249,11 +229,6 @@ def test_td64arr_add_timestamp(self, box): result = idx + Timestamp('2011-01-01') tm.assert_equal(result, expected) - @pytest.mark.parametrize('box', [ - pd.Index, - Series, - pd.DataFrame - ], ids=lambda x: x.__name__) def test_td64_radd_timestamp(self, box): idx = TimedeltaIndex(['1 day', '2 day']) expected = DatetimeIndex(['2011-01-02', '2011-01-03']) @@ -265,11 +240,6 @@ def test_td64_radd_timestamp(self, box): result = Timestamp('2011-01-01') + idx tm.assert_equal(result, expected) - @pytest.mark.parametrize('box', [ - pd.Index, - Series, - pd.DataFrame - ], ids=lambda x: x.__name__) def test_td64arr_add_sub_timestamp(self, box): # GH#11925 ts = Timestamp('2012-01-01') @@ -786,8 +756,7 @@ class TestTimedeltaArraylikeMulDivOps(object): # Multiplication # organized with scalar others first, then array-like - def test_td64arr_mul_int(self, box_df_fail): - box = box_df_fail # DataFrame op returns object instead of m8[ns] + def test_td64arr_mul_int(self, box): idx = TimedeltaIndex(np.arange(5, dtype='int64')) idx = tm.box_expected(idx, box) @@ -806,8 +775,7 @@ def test_td64arr_mul_tdlike_scalar_raises(self, delta, box): with pytest.raises(TypeError): rng * delta - def test_tdi_mul_int_array_zerodim(self, box_df_fail): - box = box_df_fail # DataFrame op returns object dtype + def test_tdi_mul_int_array_zerodim(self, box): rng5 = np.arange(5, dtype='int64') idx = TimedeltaIndex(rng5) expected = TimedeltaIndex(rng5 * 5) @@ -883,24 +851,21 @@ def test_tdi_rmul_arraylike(self, other, box_df_fail): # ------------------------------------------------------------------ # __div__ - def test_td64arr_div_nat_invalid(self, box_df_fail): + def test_td64arr_div_nat_invalid(self, box): # don't allow division by NaT (maybe could in the future) - box = box_df_fail # DataFrame returns all-NaT instead of raising rng = timedelta_range('1 days', '10 days', name='foo') rng = tm.box_expected(rng, box) with pytest.raises(TypeError): rng / pd.NaT - def test_td64arr_div_int(self, box_df_fail): - box = box_df_fail # DataFrame returns object dtype instead of m8[ns] + def test_td64arr_div_int(self, box): idx = TimedeltaIndex(np.arange(5, dtype='int64')) idx = tm.box_expected(idx, box) result = idx / 1 tm.assert_equal(result, idx) - def test_tdi_div_tdlike_scalar(self, delta, box_df_fail): - box = box_df_fail # DataFrame op returns m8[ns] instead of float64 + def test_tdi_div_tdlike_scalar(self, delta, box): rng = timedelta_range('1 days', '10 days', name='foo') expected = pd.Float64Index((np.arange(10) + 1) * 12, name='foo') @@ -910,8 +875,7 @@ def test_tdi_div_tdlike_scalar(self, delta, box_df_fail): result = rng / delta tm.assert_equal(result, expected) - def test_tdi_div_tdlike_scalar_with_nat(self, delta, box_df_fail): - box = box_df_fail # DataFrame op returns m8[ns] instead of float64 + def test_tdi_div_tdlike_scalar_with_nat(self, delta, box): rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') expected = pd.Float64Index([12, np.nan, 24], name='foo') @@ -924,11 +888,6 @@ def test_tdi_div_tdlike_scalar_with_nat(self, delta, box_df_fail): # ------------------------------------------------------------------ # __floordiv__, __rfloordiv__ - @pytest.mark.parametrize('box', [ - pd.Index, - Series, - pd.DataFrame - ], ids=lambda x: x.__name__) def test_td64arr_floordiv_tdscalar(self, box, scalar_td): # GH#18831 td1 = Series([timedelta(minutes=5, seconds=3)] * 3) @@ -942,11 +901,6 @@ def test_td64arr_floordiv_tdscalar(self, box, scalar_td): result = td1 // scalar_td tm.assert_equal(result, expected) - @pytest.mark.parametrize('box', [ - pd.Index, - Series, - pd.DataFrame - ], ids=lambda x: x.__name__) def test_td64arr_rfloordiv_tdscalar(self, box, scalar_td): # GH#18831 if box is pd.DataFrame and isinstance(scalar_td, np.timedelta64): @@ -963,11 +917,6 @@ def test_td64arr_rfloordiv_tdscalar(self, box, scalar_td): result = scalar_td // td1 tm.assert_equal(result, expected) - @pytest.mark.parametrize('box', [ - pd.Index, - Series, - pd.DataFrame - ], ids=lambda x: x.__name__) def test_td64arr_rfloordiv_tdscalar_explicit(self, box, scalar_td): # GH#18831 td1 = Series([timedelta(minutes=5, seconds=3)] * 3) @@ -983,15 +932,13 @@ def test_td64arr_rfloordiv_tdscalar_explicit(self, box, scalar_td): result = td1.__rfloordiv__(scalar_td) tm.assert_equal(result, expected) - def test_td64arr_floordiv_int(self, box_df_fail): - box = box_df_fail # DataFrame returns object dtype + def test_td64arr_floordiv_int(self, box): idx = TimedeltaIndex(np.arange(5, dtype='int64')) idx = tm.box_expected(idx, box) result = idx // 1 tm.assert_equal(result, idx) - def test_td64arr_floordiv_tdlike_scalar(self, delta, box_df_fail): - box = box_df_fail # DataFrame returns m8[ns] instead of int64 dtype + def test_td64arr_floordiv_tdlike_scalar(self, delta, box): tdi = timedelta_range('1 days', '10 days', name='foo') expected = pd.Int64Index((np.arange(10) + 1) * 12, name='foo') @@ -1007,9 +954,8 @@ def test_td64arr_floordiv_tdlike_scalar(self, delta, box_df_fail): Timedelta('10m7s'), Timedelta('10m7s').to_timedelta64() ], ids=lambda x: type(x).__name__) - def test_td64arr_rfloordiv_tdlike_scalar(self, scalar_td, box_df_fail): + def test_td64arr_rfloordiv_tdlike_scalar(self, scalar_td, box): # GH#19125 - box = box_df_fail # DataFrame op returns m8[ns] instead of f8 dtype tdi = TimedeltaIndex(['00:05:03', '00:05:03', pd.NaT], freq=None) expected = pd.Index([2.0, 2.0, np.nan]) @@ -1028,11 +974,6 @@ def test_td64arr_rfloordiv_tdlike_scalar(self, scalar_td, box_df_fail): # ------------------------------------------------------------------ # Operations with invalid others - @pytest.mark.parametrize('box', [ - pd.Index, - Series, - pd.DataFrame - ], ids=lambda x: x.__name__) def test_td64arr_mul_tdscalar_invalid(self, box, scalar_td): td1 = Series([timedelta(minutes=5, seconds=3)] * 3) td1.iloc[2] = np.nan @@ -1065,11 +1006,6 @@ def test_td64arr_mul_td64arr_raises(self, box): # ------------------------------------------------------------------ # Operations with numeric others - @pytest.mark.parametrize('box', [ - pd.Index, - Series, - pd.DataFrame - ], ids=lambda x: x.__name__) @pytest.mark.parametrize('one', [1, np.array(1), 1.0, np.array(1.0)]) def test_td64arr_mul_numeric_scalar(self, box, one, tdser): # GH#4521 @@ -1094,11 +1030,6 @@ def test_td64arr_mul_numeric_scalar(self, box, one, tdser): result = (2 * one) * tdser tm.assert_equal(result, expected) - @pytest.mark.parametrize('box', [ - pd.Index, - Series, - pd.DataFrame - ], ids=lambda x: x.__name__) @pytest.mark.parametrize('two', [2, 2.0, np.array(2), np.array(2.0)]) def test_td64arr_div_numeric_scalar(self, box, two, tdser): # GH#4521 @@ -1238,11 +1169,6 @@ def test_float_series_rdiv_td64arr(self, box, names): class TestTimedeltaArraylikeInvalidArithmeticOps(object): - @pytest.mark.parametrize('box', [ - pd.Index, - Series, - pd.DataFrame - ], ids=lambda x: x.__name__) @pytest.mark.parametrize('scalar_td', [ timedelta(minutes=5, seconds=4), Timedelta('5m4s'), From 4703db7863625f77a4a7d779202c46aa144b55be Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 8 Aug 2018 11:31:58 -0700 Subject: [PATCH 09/13] mark tests with GH Issues --- pandas/tests/arithmetic/test_datetime64.py | 47 +++++++++++++----- pandas/tests/frame/test_arithmetic.py | 56 ++++++++++++++++++++++ pandas/tests/test_arithmetic.py | 1 + 3 files changed, 91 insertions(+), 13 deletions(-) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index b09d3c3183803..4d1f295f8667b 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -61,6 +61,15 @@ def test_tz_aware_scalar_comparison(self, timestamps): expected = pd.DataFrame({'test': [False, False]}) tm.assert_frame_equal(df == -1, expected) + def test_dt64_nat_comparison(self): + # GH#22242, GH#22163 DataFrame considered NaT == ts incorrectly + ts = pd.Timestamp.now() + df = pd.DataFrame([ts, pd.NaT]) + expected = pd.DataFrame([True, False]) + + result = df == ts + tm.assert_frame_equal(result, expected) + class TestDatetime64SeriesComparison(object): def test_dt64_ser_cmp_date_warning(self): @@ -571,10 +580,22 @@ def test_dti_cmp_object_dtype(self): # Arithmetic class TestFrameArithmetic(object): + def test_dt64arr_sub_dtscalar(self, box): + # GH#8554, GH#22163 DataFrame op should _not_ return dt64 dtype + idx = pd.date_range('2013-01-01', periods=3) + idx = tm.box_expected(idx, box) + + ts = pd.Timestamp('2013-01-01') + # TODO: parametrize over scalar types + + expected = pd.TimedeltaIndex(['0 Days', '1 Day', '2 Days']) + expected = tm.box_expected(expected, box) + + result = idx - ts + tm.assert_equal(result, expected) - @pytest.mark.xfail(reason='GH#7996 datetime64 units not converted to nano', - strict=True) def test_df_sub_datetime64_not_ns(self): + # GH#7996, GH#22163 ensure non-nano datetime64 is converted to nano df = pd.DataFrame(pd.date_range('20130101', periods=3)) dt64 = np.datetime64('2013-01-01') assert dt64.dtype == 'datetime64[D]' @@ -616,9 +637,11 @@ def test_dti_add_sub_float(self, op, other): with pytest.raises(TypeError): op(dti, other) - def test_dti_add_timestamp_raises(self): + def test_dti_add_timestamp_raises(self, box): + # GH#22163 ensure DataFrame doesn't cast Timestamp to i8 idx = DatetimeIndex(['2011-01-01', '2011-01-02']) - msg = "cannot add DatetimeIndex and Timestamp" + idx = tm.box_expected(idx, box) + msg = "cannot add" with tm.assert_raises_regex(TypeError, msg): idx + Timestamp('2011-01-01') @@ -714,13 +737,17 @@ def test_dti_add_intarray_no_freq(self, box): # ------------------------------------------------------------- # Binary operations DatetimeIndex and timedelta-like - def test_dti_add_timedeltalike(self, tz_naive_fixture, delta): + def test_dti_add_timedeltalike(self, tz_naive_fixture, delta, box): + # GH#22005, GH#22163 check DataFrame doesn't raise TypeError tz = tz_naive_fixture rng = pd.date_range('2000-01-01', '2000-02-01', tz=tz) + rng = tm.box_expected(rng, box) + result = rng + delta expected = pd.date_range('2000-01-01 02:00', '2000-02-01 02:00', tz=tz) - tm.assert_index_equal(result, expected) + expected = tm.box_expected(expected, box) + tm.assert_equal(result, expected) def test_dti_iadd_timedeltalike(self, tz_naive_fixture, delta): tz = tz_naive_fixture @@ -1198,14 +1225,8 @@ def test_dti_with_offset_series(self, tz_naive_fixture, names): res3 = dti - other tm.assert_series_equal(res3, expected_sub) - @pytest.mark.parametrize('box', [ - pd.Index, - pd.Series, - pytest.param(pd.DataFrame, - marks=pytest.mark.xfail(reason="Returns object dtype", - strict=True)) - ], ids=lambda x: x.__name__) def test_dti_add_offset_tzaware(self, tz_aware_fixture, box): + # GH#21610, GH#22163 ensure DataFrame doesn't return object-dtype timezone = tz_aware_fixture if timezone == 'US/Pacific': dates = date_range('2012-11-01', periods=3, tz=timezone) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index eea8b868bb2ff..995bd7d9db483 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -12,6 +12,53 @@ # Comparisons class TestFrameComparisons(object): + def test_flex_comparison_nat(self): + # GH#15697, GH#22163 df.eq(pd.NaT) should behave like df == pd.NaT, + # and _definitely_ not be NaN + df = pd.DataFrame([pd.NaT]) + + result = df == pd.NaT + # result.iloc[0, 0] is a np.bool_ object + assert result.iloc[0, 0].item() is False + + result = df.eq(pd.NaT) + assert result.iloc[0, 0].item() is False + + result = df != pd.NaT + assert result.iloc[0, 0].item() is True + + result = df.ne(pd.NaT) + assert result.iloc[0, 0].item() is True + + def test_mixed_comparison(self): + # GH#13128, GH#22163 != datetime64 vs non-dt64 should be False, + # not raise TypeError + # (this appears to be fixed before #22163, not sure when) + df = pd.DataFrame([['1989-08-01', 1], ['1989-08-01', 2]]) + other = pd.DataFrame([['a', 'b'], ['c', 'd']]) + + result = df == other + assert not result.any().any() + + result = df != other + assert result.all().all() + + def test_df_numeric_cmp_dt64_raises(self): + # GH#8932, GH#22163 + ts = pd.Timestamp.now() + df = pd.DataFrame({'x': range(5)}) + with pytest.raises(TypeError): + df > ts + with pytest.raises(TypeError): + df < ts + with pytest.raises(TypeError): + ts < df + with pytest.raises(TypeError): + ts > df + + assert not (df == ts).any().any() + assert (df != ts).all().all() + def test_df_boolean_comparison_error(self): # GH#4576 # boolean comparisons with a tuple/list give unexpected results @@ -81,6 +128,15 @@ def test_df_add_flex_filled_mixed_dtypes(self): class TestFrameArithmetic(object): + def test_df_bool_mul_int(self): + # GH#22047, GH#22163 multiplication by 1 should result in int dtype, + # not object dtype + df = pd.DataFrame([[False, True], [False, False]]) + result = df * 1 + assert (result.dtypes == np.int64).all() + + result = 1 * df + assert (result.dtypes == np.int64).all() @pytest.mark.parametrize('data', [ [1, 2, 3], diff --git a/pandas/tests/test_arithmetic.py b/pandas/tests/test_arithmetic.py index d01311e94dd1c..2f3321a28c042 100644 --- a/pandas/tests/test_arithmetic.py +++ b/pandas/tests/test_arithmetic.py @@ -866,6 +866,7 @@ def test_td64arr_div_int(self, box): tm.assert_equal(result, idx) def test_tdi_div_tdlike_scalar(self, delta, box): + # GH#20088, GH#22163 ensure DataFrame returns correct dtype rng = timedelta_range('1 days', '10 days', name='foo') expected = pd.Float64Index((np.arange(10) + 1) * 12, name='foo') From c090713ced2708737f4517f6cfd343b6448786d1 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 8 Aug 2018 11:47:00 -0700 Subject: [PATCH 10/13] whatsnew? everything is new --- doc/source/whatsnew/v0.24.0.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 7c46a1c7b7f27..d398144251516 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -452,6 +452,7 @@ Datetimelike API Changes - :class:`DateOffset` objects are now immutable. Attempting to alter one of these will now raise ``AttributeError`` (:issue:`21341`) - :class:`PeriodIndex` subtraction of another ``PeriodIndex`` will now return an object-dtype :class:`Index` of :class:`DateOffset` objects instead of raising a ``TypeError`` (:issue:`20049`) - :func:`cut` and :func:`qcut` now returns a :class:`DatetimeIndex` or :class:`TimedeltaIndex` bins when the input is datetime or timedelta dtype respectively and ``retbins=True`` (:issue:`19891`) +- :class:`DataFrame` with ``timedelta64[ns]`` dtypes addition or subtraction of ``NaN`` will raise ``TypeError`` instead of returning all-``NaT``; this is consistent with the behavior of :class:`Series` and :class:`Index` operations (:issue:`22163`) .. _whatsnew_0240.api.other: @@ -537,6 +538,16 @@ Datetimelike - Bug in :class:`DatetimeIndex` comparisons where string comparisons incorrectly raises ``TypeError`` (:issue:`22074`) - Bug in :class:`DatetimeIndex` comparisons when comparing against ``timedelta64[ns]`` dtyped arrays; in some cases ``TypeError`` was incorrectly raised, in others it incorrectly failed to raise (:issue:`22074`) - Bug in :class:`DatetimeIndex` comparisons when comparing against object-dtyped arrays (:issue:`22074`) +- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype addition and subtraction with ``Timedelta``-like objects (:issue:`22005`,:issue:`22163`) +- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype addition and subtraction with ``DateOffset`` objects returning an ``object`` dtype instead of ``datetime64[ns]`` dtype (:issue:`21610`,:issue:`22163`) +- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype comparing against ``NaT`` incorrectly (:issue:`22242`,:issue:`22163`) +- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype subtracting ``Timestamp``-like object incorrectly returned ``datetime64[ns]`` dtype instead of ``timedelta64[ns]`` dtype (:issue:`8554`,:issue:`22163`) +- Bug in :class:`DataFrame` with ``datetime64[ns]`` dtype subtracting ``np.datetime64`` object with non-nanosecond unit failing to convert to nanoseconds (:issue:`18874`,:issue:`22163`) +- Bug in :class:`DataFrame` comparisons against ``Timestamp``-like objects failing to raise ``TypeError`` for inequality checks with mismatched types (:issue:`8932`,:issue:`22163`) +- Bug in :class:`DataFrame` with mixed dtypes including ``datetime64[ns]`` incorrectly raising ``TypeError`` on equality comparisons (:issue:`13128`,:issue:`22163`) +- Bug in :meth:`DataFrame.eq` comparison against ``NaT`` incorrectly returning ``True`` or ``NaN`` (:issue:`15697`,:issue:`22163`) +- Bug in :class:`DataFrame` with ``timedelta64[ns]`` dtype division by ``Timedelta``-like scalar incorrectly returning ``timedelta64[ns]`` dtype instead of ``float64`` dtype (:issue:`20088`,:issue:`22163`) +- Timedelta ^^^^^^^^^ @@ -584,6 +595,7 @@ Numeric when supplied with a list of functions and ``axis=1`` (e.g. ``df.apply(['sum', 'mean'], axis=1)``), a ``TypeError`` was wrongly raised. For all three methods such calculation are now done correctly. (:issue:`16679`). - Bug in :class:`Series` comparison against datetime-like scalars and arrays (:issue:`22074`) +- Bug in :class:`DataFrame` multiplication between boolean dtype and integer returning ``object`` dtype instead of integer dtype (:issue:`22047`,:issue:`22163`) - Strings From d683cb3b5691202297772ba3c538a798bf653b9f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 8 Aug 2018 22:23:26 -0700 Subject: [PATCH 11/13] edit test for appveyor compat --- pandas/tests/frame/test_arithmetic.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 995bd7d9db483..0b69e29256ee8 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -133,10 +133,15 @@ def test_df_bool_mul_int(self): # not object dtype df = pd.DataFrame([[False, True], [False, False]]) result = df * 1 - assert (result.dtypes == np.int64).all() + + # On appveyor this comes back as np.int32 instead of np.int64, + # so we check dtype.kind instead of just dtype + kinds = result.dtypes.apply(lambda x: x.kind) + assert (kinds == 'i').all() result = 1 * df - assert (result.dtypes == np.int64).all() + kinds = result.dtypes.apply(lambda x: x.kind) + assert (kinds == 'i').all() @pytest.mark.parametrize('data', [ [1, 2, 3], From dbdea1af168e0fd7dfd35d3d6b1df1d2705638b3 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 9 Aug 2018 12:45:26 -0700 Subject: [PATCH 12/13] API Changes section for DataFrame[timedelta64] - np.nan --- doc/source/whatsnew/v0.24.0.txt | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 55dc887c920c1..a09e32d8a315d 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -216,7 +216,7 @@ New Behavior: idx = pd.interval_range(0, 4) idx.values -This mirrors ``CateogricalIndex.values``, which returns a ``Categorical``. +This mirrors ``CategoricalIndex.values``, which returns a ``Categorical``. For situations where you need an ``ndarray`` of ``Interval`` objects, use :meth:`numpy.asarray` or ``idx.astype(object)``. @@ -406,6 +406,34 @@ Previous Behavior: In [3]: pi - pi[0] Out[3]: Int64Index([0, 1, 2], dtype='int64') + +.. _whatsnew_0240.api.timedelta64_subtract_nan + +Addition/Subtraction of ``NaN`` from :class:``DataFrame`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Adding or subtracting ``NaN`` from a :class:`DataFrame` column with +`timedelta64[ns]` dtype will now raise a ``TypeError`` instead of returning +all-``NaT``. This is for compatibility with ``TimedeltaIndex`` and +``Series`` behavior (:issue:`22163`) + +.. ipython:: python + + df = pd.DataFrame([pd.Timedelta(days=1)]) + df - np.nan + +Previous Behavior: + +.. code-block:: ipython + + In [4]: df = pd.DataFrame([pd.Timedelta(days=1)]) + + In [5]: df - np.nan + Out[5]: + 0 + 0 NaT + + .. _whatsnew_0240.api.extension: ExtensionType Changes @@ -453,7 +481,6 @@ Datetimelike API Changes - :class:`DateOffset` objects are now immutable. Attempting to alter one of these will now raise ``AttributeError`` (:issue:`21341`) - :class:`PeriodIndex` subtraction of another ``PeriodIndex`` will now return an object-dtype :class:`Index` of :class:`DateOffset` objects instead of raising a ``TypeError`` (:issue:`20049`) - :func:`cut` and :func:`qcut` now returns a :class:`DatetimeIndex` or :class:`TimedeltaIndex` bins when the input is datetime or timedelta dtype respectively and ``retbins=True`` (:issue:`19891`) -- :class:`DataFrame` with ``timedelta64[ns]`` dtypes addition or subtraction of ``NaN`` will raise ``TypeError`` instead of returning all-``NaT``; this is consistent with the behavior of :class:`Series` and :class:`Index` operations (:issue:`22163`) .. _whatsnew_0240.api.other: From f1edec44426110a53eb89bc20bf8c62cb30b07d7 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 9 Aug 2018 14:00:43 -0700 Subject: [PATCH 13/13] un-xfail --- pandas/tests/arithmetic/test_numeric.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 44aaba2885bf7..8c1450650c0a5 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -55,13 +55,6 @@ def test_ops_series(self): tm.assert_series_equal(expected, td * other) tm.assert_series_equal(expected, other * td) - @pytest.mark.parametrize('box', [ - pd.Index, - Series, - pytest.param(pd.DataFrame, - marks=pytest.mark.xfail(reason="block.eval incorrect", - strict=True)) - ]) @pytest.mark.parametrize('index', [ pd.Int64Index(range(1, 11)), pd.UInt64Index(range(1, 11)), @@ -76,7 +69,7 @@ def test_ops_series(self): def test_numeric_arr_mul_tdscalar(self, scalar_td, index, box): # GH#19333 - if (box is Series and + if (box in [Series, pd.DataFrame] and type(scalar_td) is timedelta and index.dtype == 'f8'): raise pytest.xfail(reason="Cannot multiply timedelta by float")