diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 305d7b97816341..925eaac45045d2 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -8,7 +8,6 @@ from pandas import DataFrame, Index, MultiIndex, Series, concat, merge from pandas.tests.reshape.merge.test_merge import NGROUPS, N, get_test_data import pandas.util.testing as tm -from pandas.util.testing import assert_frame_equal a_ = np.array @@ -194,7 +193,7 @@ def test_join_on(self): expected = DataFrame( {"key": ["a", "a", "b", "b", "c"], "value": [0, 0, 1, 1, 2]} ) - assert_frame_equal(joined, expected) + tm.assert_frame_equal(joined, expected) # Test when some are missing df_a = DataFrame([[1], [2], [3]], index=["a", "b", "c"], columns=["one"]) @@ -281,7 +280,7 @@ def test_join_on_pass_vector(self): join_col = self.target.pop("C") result = self.target.join(self.source, on=join_col) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_join_with_len0(self): # nothing to merge @@ -314,12 +313,12 @@ def test_join_on_singlekey_list(self): joined = df.join(df2, on=["key"]) expected = df.join(df2, on="key") - assert_frame_equal(joined, expected) + tm.assert_frame_equal(joined, expected) def test_join_on_series(self): result = self.target.join(self.source["MergedA"], on="C") expected = self.target.join(self.source[["MergedA"]], on="C") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_join_on_series_buglet(self): # GH #638 @@ -341,11 +340,11 @@ def test_join_index_mixed(self, join_type): joined = df1.join(df2, how=join_type) expected = _join_by_hand(df1, df2, how=join_type) - assert_frame_equal(joined, expected) + tm.assert_frame_equal(joined, expected) joined = df2.join(df1, how=join_type) expected = _join_by_hand(df2, df1, how=join_type) - assert_frame_equal(joined, expected) + tm.assert_frame_equal(joined, expected) def test_join_index_mixed_overlap(self): df1 = DataFrame( @@ -377,7 +376,7 @@ def test_join_index_mixed_overlap(self): df1.columns = expected_columns[:4] df2.columns = expected_columns[4:] expected = _join_by_hand(df1, df2) - assert_frame_equal(joined, expected) + tm.assert_frame_equal(joined, expected) def test_join_empty_bug(self): # generated an exception in 0.4.3 @@ -416,7 +415,7 @@ def test_join_multiindex(self): ex_index = Index(index1.values).union(Index(index2.values)) expected = df1.reindex(ex_index).join(df2.reindex(ex_index)) expected.index.names = index1.names - assert_frame_equal(joined, expected) + tm.assert_frame_equal(joined, expected) assert joined.index.names == index1.names df1 = df1.sort_index(level=1) @@ -427,7 +426,7 @@ def test_join_multiindex(self): expected = df1.reindex(ex_index).join(df2.reindex(ex_index)) expected.index.names = index1.names - assert_frame_equal(joined, expected) + tm.assert_frame_equal(joined, expected) assert joined.index.names == index1.names def test_join_inner_multiindex(self): @@ -475,7 +474,7 @@ def test_join_inner_multiindex(self): how="inner", sort=False, ) - assert_frame_equal(joined, expected2.reindex_like(joined)) + tm.assert_frame_equal(joined, expected2.reindex_like(joined)) expected2 = merge( to_join, @@ -490,7 +489,7 @@ def test_join_inner_multiindex(self): expected.index = joined.index assert joined.index.is_monotonic - assert_frame_equal(joined, expected) + tm.assert_frame_equal(joined, expected) # _assert_same_contents(expected, expected2.loc[:, expected.columns]) @@ -528,7 +527,7 @@ def test_join_float64_float32(self): assert rs.dtypes["md"] == "float32" xp = xpdf.merge(s, left_on="a", right_index=True) - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) def test_join_many_non_unique_index(self): df1 = DataFrame({"a": [1, 1], "b": [1, 1], "c": [10, 20]}) @@ -547,7 +546,7 @@ def test_join_many_non_unique_index(self): expected = expected[result.columns] expected["a"] = expected.a.astype("int64") expected["b"] = expected.b.astype("int64") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) df1 = DataFrame({"a": [1, 1, 1], "b": [1, 1, 1], "c": [10, 20, 30]}) df2 = DataFrame({"a": [1, 1, 1], "b": [1, 1, 2], "d": [100, 200, 300]}) @@ -562,7 +561,7 @@ def test_join_many_non_unique_index(self): result = result.reset_index() - assert_frame_equal(result, expected.loc[:, result.columns]) + tm.assert_frame_equal(result, expected.loc[:, result.columns]) # GH 11519 df = DataFrame( @@ -580,9 +579,9 @@ def test_join_many_non_unique_index(self): outer = df.join(s, how="outer") left = df.join(s, how="left") right = df.join(s, how="right") - assert_frame_equal(inner, outer) - assert_frame_equal(inner, left) - assert_frame_equal(inner, right) + tm.assert_frame_equal(inner, outer) + tm.assert_frame_equal(inner, left) + tm.assert_frame_equal(inner, right) def test_join_sort(self): left = DataFrame({"key": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 4]}) @@ -597,7 +596,7 @@ def test_join_sort(self): }, index=[1, 2, 0, 3], ) - assert_frame_equal(joined, expected) + tm.assert_frame_equal(joined, expected) # smoke test joined = left.join(right, on="key", sort=False) @@ -684,7 +683,7 @@ def test_join_many_mixed(self): df3 = df.loc[:, ["key"]] result = df1.join([df2, df3]) - assert_frame_equal(result, df) + tm.assert_frame_equal(result, df) def test_join_dups(self): @@ -702,7 +701,7 @@ def test_join_dups(self): expected = concat([df, df], axis=1) result = df.join(df, rsuffix="_2") result.columns = expected.columns - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # GH 4975, invalid join on dups w = DataFrame(np.random.randn(4, 2), columns=["x", "y"]) @@ -716,7 +715,7 @@ def test_join_dups(self): dta = dta.merge(w, left_index=True, right_index=True) expected = concat([x, y, z, w], axis=1) expected.columns = ["x_x", "y_x", "x_y", "y_y", "x_x", "y_x", "x_y", "y_y"] - assert_frame_equal(dta, expected) + tm.assert_frame_equal(dta, expected) def test_join_multi_to_multi(self, join_type): # GH 20475 @@ -736,7 +735,7 @@ def test_join_multi_to_multi(self, join_type): .merge(right.reset_index(), on=["abc", "xy"], how=join_type) .set_index(["abc", "xy", "num"]) ) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) msg = ( r"len\(left_on\) must equal the number of levels in the index" ' of "right"' @@ -769,7 +768,7 @@ def test_join_on_tz_aware_datetimeindex(self): result = df1.join(df2.set_index("date"), on="date") expected = df1.copy() expected["vals_2"] = pd.Series([np.nan] * 2 + list("tuv"), dtype=object) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def _check_join(left, right, result, join_col, how="left", lsuffix="_x", rsuffix="_y"): diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 19555a0c7e4c37..37c0b57bc75812 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -29,7 +29,6 @@ from pandas.core.reshape.concat import concat from pandas.core.reshape.merge import MergeError, merge import pandas.util.testing as tm -from pandas.util.testing import assert_frame_equal, assert_series_equal N = 50 NGROUPS = 8 @@ -128,7 +127,7 @@ def test_merge_inner_join_empty(self): df_a = pd.DataFrame({"a": [1, 2]}, index=[0, 1], dtype="int64") result = pd.merge(df_empty, df_a, left_index=True, right_index=True) expected = pd.DataFrame({"a": []}, index=[], dtype="int64") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_merge_common(self): joined = merge(self.df, self.df2) @@ -142,7 +141,7 @@ def test_merge_index_as_on_arg(self): right = self.df2.set_index("key1") result = merge(left, right, on="key1") expected = merge(self.df, self.df2, on="key1").set_index("key1") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_merge_index_singlekey_right_vs_left(self): left = DataFrame( @@ -156,7 +155,7 @@ def test_merge_index_singlekey_right_vs_left(self): merged2 = merge( right, left, right_on="key", left_index=True, how="right", sort=False ) - assert_frame_equal(merged1, merged2.loc[:, merged1.columns]) + tm.assert_frame_equal(merged1, merged2.loc[:, merged1.columns]) merged1 = merge( left, right, left_on="key", right_index=True, how="left", sort=True @@ -164,7 +163,7 @@ def test_merge_index_singlekey_right_vs_left(self): merged2 = merge( right, left, right_on="key", left_index=True, how="right", sort=True ) - assert_frame_equal(merged1, merged2.loc[:, merged1.columns]) + tm.assert_frame_equal(merged1, merged2.loc[:, merged1.columns]) def test_merge_index_singlekey_inner(self): left = DataFrame( @@ -175,11 +174,11 @@ def test_merge_index_singlekey_inner(self): # inner join result = merge(left, right, left_on="key", right_index=True, how="inner") expected = left.join(right, on="key").loc[result.index] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = merge(right, left, right_on="key", left_index=True, how="inner") expected = left.join(right, on="key").loc[result.index] - assert_frame_equal(result, expected.loc[:, result.columns]) + tm.assert_frame_equal(result, expected.loc[:, result.columns]) def test_merge_misspecified(self): msg = "Must pass right_on or right_index=True" @@ -296,7 +295,7 @@ def test_intelligently_handle_join_key(self): }, columns=["value", "key", "rvalue"], ) - assert_frame_equal(joined, expected) + tm.assert_frame_equal(joined, expected) def test_merge_join_key_dtype_cast(self): # #8596 @@ -331,7 +330,7 @@ def test_handle_join_key_pass_array(self): merged = merge(left, right, left_on="key", right_on=key, how="outer") merged2 = merge(right, left, left_on=key, right_on="key", how="outer") - assert_series_equal(merged["key"], merged2["key"]) + tm.assert_series_equal(merged["key"], merged2["key"]) assert merged["key"].notna().all() assert merged2["key"].notna().all() @@ -406,10 +405,10 @@ def test_left_merge_empty_dataframe(self): right = DataFrame({"key": []}) result = merge(left, right, on="key", how="left") - assert_frame_equal(result, left) + tm.assert_frame_equal(result, left) result = merge(right, left, on="key", how="right") - assert_frame_equal(result, left) + tm.assert_frame_equal(result, left) @pytest.mark.parametrize( "kwarg", @@ -540,7 +539,7 @@ def test_merge_empty_frame(self, series_of_dtype, series_of_dtype2): columns=["value_x", "key", "value_y"], ) actual = df_empty.merge(df, on="key") - assert_frame_equal(actual, expected) + tm.assert_frame_equal(actual, expected) def test_merge_all_na_column(self, series_of_dtype, series_of_dtype_all_na): # GH 25183 @@ -561,7 +560,7 @@ def test_merge_all_na_column(self, series_of_dtype, series_of_dtype_all_na): columns=["key", "value_x", "value_y"], ) actual = df_left.merge(df_right, on="key") - assert_frame_equal(actual, expected) + tm.assert_frame_equal(actual, expected) def test_merge_nosort(self): # GH#2098, TODO: anything to do? @@ -589,7 +588,7 @@ def test_merge_nosort(self): result = df.merge(new, on="var3", sort=False) exp = merge(df, new, on="var3", sort=False) - assert_frame_equal(result, exp) + tm.assert_frame_equal(result, exp) assert (df.var3.unique() == result.var3.unique()).all() @@ -610,7 +609,7 @@ def test_merge_nan_right(self): .set_index(None) .reset_index()[["i1", "i2", "i1_", "i3"]] ) - assert_frame_equal(result, expected, check_dtype=False) + tm.assert_frame_equal(result, expected, check_dtype=False) df1 = DataFrame({"i1": [0, 1], "i2": [0.5, 1.5]}) df2 = DataFrame({"i1": [0], "i3": [0.7]}) @@ -623,7 +622,7 @@ def test_merge_nan_right(self): "i3": {0: 0.69999999999999996, 1: np.nan}, } )[["i1", "i2", "i1_", "i3"]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_merge_type(self): class NotADataFrame(DataFrame): @@ -650,7 +649,7 @@ def test_join_append_timedeltas(self): "t": [timedelta(0, 22500), timedelta(0, 22500)], } ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) td = np.timedelta64(300000000) lhs = DataFrame(Series([td, td], index=["A", "B"])) @@ -663,7 +662,7 @@ def test_join_append_timedeltas(self): "0r": Series([td, pd.NaT], index=list("AB")), } ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_other_datetime_unit(self): # GH 13389 @@ -729,7 +728,7 @@ def test_overlapping_columns_error_message(self): } ) expected.columns = ["key", "foo", "foo", "bar", "bar"] - assert_frame_equal(merge(df, df2), expected) + tm.assert_frame_equal(merge(df, df2), expected) # #2649, #10639 df2.columns = ["key1", "foo", "foo"] @@ -761,7 +760,7 @@ def test_merge_on_datetime64tz(self): } ) result = pd.merge(left, right, on="key", how="outer") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) left = pd.DataFrame( { @@ -785,7 +784,7 @@ def test_merge_on_datetime64tz(self): } ) result = pd.merge(left, right, on="key", how="outer") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) assert result["value_x"].dtype == "datetime64[ns, US/Eastern]" assert result["value_y"].dtype == "datetime64[ns, US/Eastern]" @@ -844,7 +843,7 @@ def test_merge_datetime64tz_with_dst_transition(self): "value_y": [np.nan] * 4 + [2] * 3, } ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_merge_non_unique_period_index(self): # GH #16871 @@ -878,7 +877,7 @@ def test_merge_on_periods(self): } ) result = pd.merge(left, right, on="key", how="outer") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) left = pd.DataFrame( {"key": [1, 2], "value": pd.period_range("20151010", periods=2, freq="D")} @@ -897,7 +896,7 @@ def test_merge_on_periods(self): } ) result = pd.merge(left, right, on="key", how="outer") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) assert result["value_x"].dtype == "Period[D]" assert result["value_y"].dtype == "Period[D]" @@ -950,13 +949,13 @@ def test_indicator(self): ] test = merge(df1, df2, on="col1", how="outer", indicator=True) - assert_frame_equal(test, df_result) + tm.assert_frame_equal(test, df_result) test = df1.merge(df2, on="col1", how="outer", indicator=True) - assert_frame_equal(test, df_result) + tm.assert_frame_equal(test, df_result) # No side effects - assert_frame_equal(df1, df1_copy) - assert_frame_equal(df2, df2_copy) + tm.assert_frame_equal(df1, df1_copy) + tm.assert_frame_equal(df2, df2_copy) # Check with custom name df_result_custom_name = df_result @@ -967,11 +966,11 @@ def test_indicator(self): test_custom_name = merge( df1, df2, on="col1", how="outer", indicator="custom_name" ) - assert_frame_equal(test_custom_name, df_result_custom_name) + tm.assert_frame_equal(test_custom_name, df_result_custom_name) test_custom_name = df1.merge( df2, on="col1", how="outer", indicator="custom_name" ) - assert_frame_equal(test_custom_name, df_result_custom_name) + tm.assert_frame_equal(test_custom_name, df_result_custom_name) # Check only accepts strings and booleans msg = "indicator option can only accept boolean or string arguments" @@ -1043,9 +1042,9 @@ def test_indicator(self): ) test5 = merge(df3, df4, on=["col1", "col2"], how="outer", indicator=True) - assert_frame_equal(test5, hand_coded_result) + tm.assert_frame_equal(test5, hand_coded_result) test5 = df3.merge(df4, on=["col1", "col2"], how="outer", indicator=True) - assert_frame_equal(test5, hand_coded_result) + tm.assert_frame_equal(test5, hand_coded_result) def test_validation(self): left = DataFrame( @@ -1066,8 +1065,8 @@ def test_validation(self): right_copy = right.copy() result = merge(left, right, left_index=True, right_index=True, validate="1:1") - assert_frame_equal(left, left_copy) - assert_frame_equal(right, right_copy) + tm.assert_frame_equal(left, left_copy) + tm.assert_frame_equal(right, right_copy) # make sure merge still correct expected = DataFrame( @@ -1084,7 +1083,7 @@ def test_validation(self): result = merge( left, right, left_index=True, right_index=True, validate="one_to_one" ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) expected_2 = DataFrame( { @@ -1096,12 +1095,12 @@ def test_validation(self): ) result = merge(left, right, on="a", validate="1:1") - assert_frame_equal(left, left_copy) - assert_frame_equal(right, right_copy) - assert_frame_equal(result, expected_2) + tm.assert_frame_equal(left, left_copy) + tm.assert_frame_equal(right, right_copy) + tm.assert_frame_equal(result, expected_2) result = merge(left, right, on="a", validate="one_to_one") - assert_frame_equal(result, expected_2) + tm.assert_frame_equal(result, expected_2) # One index, one column expected_3 = DataFrame( @@ -1122,7 +1121,7 @@ def test_validation(self): right_on="a", validate="one_to_one", ) - assert_frame_equal(result, expected_3) + tm.assert_frame_equal(result, expected_3) # Dups on right right_w_dups = right.append(pd.DataFrame({"a": ["e"], "c": ["moo"]}, index=[4])) @@ -1231,7 +1230,7 @@ def test_validation(self): merge(left, right, on="a", validate="1:1") result = merge(left, right, on=["a", "b"], validate="1:1") - assert_frame_equal(result, expected_multi) + tm.assert_frame_equal(result, expected_multi) def test_merge_two_empty_df_no_division_error(self): # GH17776, PR #17846 @@ -1293,7 +1292,7 @@ def test_merge_on_index_with_more_values(self, how, index, expected_index): columns=["a", "key", "b"], ) expected.set_index(expected_index, inplace=True) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_merge_right_index_right(self): # Note: the expected output here is probably incorrect. @@ -1354,7 +1353,7 @@ def _check_merge(x, y): expected = expected.set_index("index") # TODO check_names on merge? - assert_frame_equal(result, expected, check_names=False) + tm.assert_frame_equal(result, expected, check_names=False) class TestMergeDtypes: @@ -1432,10 +1431,10 @@ def test_merge_on_ints_floats(self, int_vals, float_vals, exp_vals): expected = DataFrame(exp_vals) result = A.merge(B, left_on="X", right_on="Y") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = B.merge(A, left_on="Y", right_on="X") - assert_frame_equal(result, expected[["Y", "X"]]) + tm.assert_frame_equal(result, expected[["Y", "X"]]) def test_merge_key_dtype_cast(self): # GH 17044 @@ -1459,18 +1458,18 @@ def test_merge_on_ints_floats_warning(self): with tm.assert_produces_warning(UserWarning): result = A.merge(B, left_on="X", right_on="Y") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) with tm.assert_produces_warning(UserWarning): result = B.merge(A, left_on="Y", right_on="X") - assert_frame_equal(result, expected[["Y", "X"]]) + tm.assert_frame_equal(result, expected[["Y", "X"]]) # test no warning if float has NaNs B = DataFrame({"Y": [np.nan, np.nan, 3.0]}) with tm.assert_produces_warning(None): result = B.merge(A, left_on="Y", right_on="X") - assert_frame_equal(result, expected[["Y", "X"]]) + tm.assert_frame_equal(result, expected[["Y", "X"]]) def test_merge_incompat_infer_boolean_object(self): # GH21119: bool + object bool merge OK @@ -1479,9 +1478,9 @@ def test_merge_incompat_infer_boolean_object(self): expected = DataFrame({"key": [True, False]}, dtype=object) result = pd.merge(df1, df2, on="key") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = pd.merge(df2, df1, on="key") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # with missing value df1 = DataFrame({"key": Series([True, False, np.nan], dtype=object)}) @@ -1489,9 +1488,9 @@ def test_merge_incompat_infer_boolean_object(self): expected = DataFrame({"key": [True, False]}, dtype=object) result = pd.merge(df1, df2, on="key") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = pd.merge(df2, df1, on="key") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( "df1_vals, df2_vals", @@ -1600,7 +1599,7 @@ def test_identical(self, left): [CategoricalDtype(), np.dtype("O"), np.dtype("O")], index=["X", "Y_x", "Y_y"], ) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_basic(self, left, right): # we have matching Categorical dtypes in X @@ -1611,7 +1610,7 @@ def test_basic(self, left, right): [CategoricalDtype(), np.dtype("O"), np.dtype("int64")], index=["X", "Y", "Z"], ) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_merge_categorical(self): # GH 9426 @@ -1679,7 +1678,7 @@ def tests_merge_categorical_unordered_equal(self): "Right": ["A1", "B1", "C1"], } ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_other_columns(self, left, right): # non-merge columns should preserve if possible @@ -1691,7 +1690,7 @@ def test_other_columns(self, left, right): [CategoricalDtype(), np.dtype("O"), CategoricalDtype()], index=["X", "Y", "Z"], ) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # categories are preserved assert left.X.values.is_dtype_equal(merged.X.values) @@ -1720,7 +1719,7 @@ def test_dtype_on_merged_different(self, change, join_type, left, right): expected = Series( [np.dtype("O"), np.dtype("O"), np.dtype("int64")], index=["X", "Y", "Z"] ) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_self_join_multiple_categories(self): # GH 16767 @@ -1760,7 +1759,7 @@ def test_self_join_multiple_categories(self): # self-join should equal ourselves result = pd.merge(df, df, on=list(df.columns)) - assert_frame_equal(result, df) + tm.assert_frame_equal(result, df) def test_dtype_on_categorical_dates(self): # GH 16900 @@ -1785,13 +1784,13 @@ def test_dtype_on_categorical_dates(self): columns=["date", "num2", "num4"], ) result_outer = pd.merge(df, df2, how="outer", on=["date"]) - assert_frame_equal(result_outer, expected_outer) + tm.assert_frame_equal(result_outer, expected_outer) expected_inner = pd.DataFrame( [[pd.Timestamp("2001-01-01"), 1.1, 1.3]], columns=["date", "num2", "num4"] ) result_inner = pd.merge(df, df2, how="inner", on=["date"]) - assert_frame_equal(result_inner, expected_inner) + tm.assert_frame_equal(result_inner, expected_inner) @pytest.mark.parametrize("ordered", [True, False]) @pytest.mark.parametrize( @@ -1815,7 +1814,7 @@ def test_merging_with_bool_or_int_cateorical_column( {"id": [2, 4], "cat": expected_categories, "num": [1, 9]} ) expected["cat"] = expected["cat"].astype(CDT(categories, ordered=ordered)) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) def test_merge_on_int_array(self): # GH 23020 @@ -1824,7 +1823,7 @@ def test_merge_on_int_array(self): expected = pd.DataFrame( {"A": pd.Series([1, 2, np.nan], dtype="Int64"), "B_x": 1, "B_y": 1} ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) @pytest.fixture @@ -1915,7 +1914,7 @@ def test_merge_index_types(index): expected = DataFrame( OrderedDict([("left_data", [1, 2]), ("right_data", [1.0, 2.0])]), index=index ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -2111,7 +2110,7 @@ def test_merge_on_cat_and_ext_array(): result = pd.merge(left, right, how="inner", on="a") expected = right.copy() - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_merge_multiindex_columns(): diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 2e9ae803231597..e12aad870f1c12 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -7,7 +7,7 @@ import pandas as pd from pandas import Timedelta, merge_asof, read_csv, to_datetime from pandas.core.reshape.merge import MergeError -from pandas.util.testing import assert_frame_equal +import pandas.util.testing as tm class TestAsOfMerge: @@ -44,7 +44,7 @@ def test_examples1(self): ) result = pd.merge_asof(left, right, on="a") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_examples2(self): """ doc-string examples """ @@ -131,7 +131,7 @@ def test_examples2(self): tolerance=pd.Timedelta("10ms"), allow_exact_matches=False, ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_examples3(self): """ doc-string examples """ @@ -145,7 +145,7 @@ def test_examples3(self): ) result = pd.merge_asof(left, right, on="a", direction="forward") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_examples4(self): """ doc-string examples """ @@ -159,7 +159,7 @@ def test_examples4(self): ) result = pd.merge_asof(left, right, on="a", direction="nearest") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_basic(self): @@ -168,7 +168,7 @@ def test_basic(self): quotes = self.quotes result = merge_asof(trades, quotes, on="time", by="ticker") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_basic_categorical(self): @@ -180,7 +180,7 @@ def test_basic_categorical(self): expected.ticker = expected.ticker.astype("category") result = merge_asof(trades, quotes, on="time", by="ticker") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_basic_left_index(self): @@ -196,7 +196,7 @@ def test_basic_left_index(self): expected.index = result.index # time column appears after left"s columns expected = expected[result.columns] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_basic_right_index(self): @@ -207,7 +207,7 @@ def test_basic_right_index(self): result = merge_asof( trades, quotes, left_on="time", right_index=True, by="ticker" ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_basic_left_index_right_index(self): @@ -218,7 +218,7 @@ def test_basic_left_index_right_index(self): result = merge_asof( trades, quotes, left_index=True, right_index=True, by="ticker" ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_multi_index(self): @@ -260,7 +260,7 @@ def test_basic_left_by_right_by(self): result = merge_asof( trades, quotes, on="time", left_by="ticker", right_by="ticker" ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_missing_right_by(self): @@ -271,7 +271,7 @@ def test_missing_right_by(self): q = quotes[quotes.ticker != "MSFT"] result = merge_asof(trades, q, on="time", by="ticker") expected.loc[expected.ticker == "MSFT", ["bid", "ask"]] = np.nan - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_multiby(self): # GH13936 @@ -336,7 +336,7 @@ def test_multiby(self): ) result = pd.merge_asof(trades, quotes, on="time", by=["ticker", "exch"]) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_multiby_heterogeneous_types(self): # GH13936 @@ -401,7 +401,7 @@ def test_multiby_heterogeneous_types(self): ) result = pd.merge_asof(trades, quotes, on="time", by=["ticker", "exch"]) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_multiby_indexed(self): # GH15676 @@ -439,7 +439,7 @@ def test_multiby_indexed(self): left, right, left_index=True, right_index=True, by=["k1", "k2"] ) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) with pytest.raises(MergeError): pd.merge_asof( @@ -458,7 +458,7 @@ def test_basic2(self, datapath): quotes = self.read_data(datapath, "quotes2.csv", dedupe=True) result = merge_asof(trades, quotes, on="time", by="ticker") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_basic_no_by(self): f = ( @@ -473,7 +473,7 @@ def test_basic_no_by(self): quotes = f(self.quotes) result = merge_asof(trades, quotes, on="time") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_valid_join_keys(self): @@ -498,7 +498,7 @@ def test_with_duplicates(self, datapath): ) result = merge_asof(self.trades, q, on="time", by="ticker") expected = self.read_data(datapath, "asof.csv") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_with_duplicates_no_on(self): @@ -508,7 +508,7 @@ def test_with_duplicates_no_on(self): expected = pd.DataFrame( {"key": [1, 1, 3], "left_val": [1, 2, 3], "right_val": [1, 1, 3]} ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_valid_allow_exact_matches(self): @@ -602,7 +602,7 @@ def test_tolerance(self, tolerance): result = merge_asof(trades, quotes, on="time", by="ticker", tolerance=tolerance) expected = self.tolerance - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_tolerance_forward(self): # GH14887 @@ -615,7 +615,7 @@ def test_tolerance_forward(self): ) result = pd.merge_asof(left, right, on="a", direction="forward", tolerance=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_tolerance_nearest(self): # GH14887 @@ -628,7 +628,7 @@ def test_tolerance_nearest(self): ) result = pd.merge_asof(left, right, on="a", direction="nearest", tolerance=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_tolerance_tz(self): # GH 14844 @@ -668,7 +668,7 @@ def test_tolerance_tz(self): "value2": list("BCDEE"), } ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_tolerance_float(self): # GH22981 @@ -686,7 +686,7 @@ def test_tolerance_float(self): ) result = pd.merge_asof(left, right, on="a", direction="nearest", tolerance=0.5) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_index_tolerance(self): # GH 15135 @@ -702,7 +702,7 @@ def test_index_tolerance(self): by="ticker", tolerance=pd.Timedelta("1day"), ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_allow_exact_matches(self): @@ -710,7 +710,7 @@ def test_allow_exact_matches(self): self.trades, self.quotes, on="time", by="ticker", allow_exact_matches=False ) expected = self.allow_exact_matches - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_allow_exact_matches_forward(self): # GH14887 @@ -725,7 +725,7 @@ def test_allow_exact_matches_forward(self): result = pd.merge_asof( left, right, on="a", direction="forward", allow_exact_matches=False ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_allow_exact_matches_nearest(self): # GH14887 @@ -740,7 +740,7 @@ def test_allow_exact_matches_nearest(self): result = pd.merge_asof( left, right, on="a", direction="nearest", allow_exact_matches=False ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_allow_exact_matches_and_tolerance(self): @@ -753,7 +753,7 @@ def test_allow_exact_matches_and_tolerance(self): allow_exact_matches=False, ) expected = self.allow_exact_matches_and_tolerance - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_allow_exact_matches_and_tolerance2(self): # GH 13695 @@ -777,7 +777,7 @@ def test_allow_exact_matches_and_tolerance2(self): "version": [2], } ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = pd.merge_asof(df1, df2, on="time", allow_exact_matches=False) expected = pd.DataFrame( @@ -787,7 +787,7 @@ def test_allow_exact_matches_and_tolerance2(self): "version": [1], } ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = pd.merge_asof( df1, @@ -803,7 +803,7 @@ def test_allow_exact_matches_and_tolerance2(self): "version": [np.nan], } ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_allow_exact_matches_and_tolerance3(self): # GH 13709 @@ -840,7 +840,7 @@ def test_allow_exact_matches_and_tolerance3(self): "version": [np.nan, np.nan], } ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_allow_exact_matches_and_tolerance_forward(self): # GH14887 @@ -860,7 +860,7 @@ def test_allow_exact_matches_and_tolerance_forward(self): allow_exact_matches=False, tolerance=1, ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_allow_exact_matches_and_tolerance_nearest(self): # GH14887 @@ -880,7 +880,7 @@ def test_allow_exact_matches_and_tolerance_nearest(self): allow_exact_matches=False, tolerance=1, ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_forward_by(self): # GH14887 @@ -910,7 +910,7 @@ def test_forward_by(self): ) result = pd.merge_asof(left, right, on="a", by="b", direction="forward") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_nearest_by(self): # GH14887 @@ -940,7 +940,7 @@ def test_nearest_by(self): ) result = pd.merge_asof(left, right, on="a", by="b", direction="nearest") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_by_int(self): # we specialize by type, so test that this is correct @@ -1001,7 +1001,7 @@ def test_by_int(self): columns=["time", "key", "value1", "value2"], ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_on_float(self): # mimics how to determine the minimum-price variation @@ -1031,7 +1031,7 @@ def test_on_float(self): columns=["symbol", "price", "mpv"], ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_on_specialized_type(self, any_real_dtype): # see gh-13936 @@ -1062,7 +1062,7 @@ def test_on_specialized_type(self, any_real_dtype): ) expected.value = dtype(expected.value) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_on_specialized_type_by_int(self, any_real_dtype): # see gh-13936 @@ -1098,7 +1098,7 @@ def test_on_specialized_type_by_int(self, any_real_dtype): ) expected.value = dtype(expected.value) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_on_float_by_int(self): # type specialize both "by" and "on" parameters @@ -1155,7 +1155,7 @@ def test_on_float_by_int(self): columns=["symbol", "exch", "price", "mpv"], ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_merge_datatype_error_raises(self): msg = r"incompatible merge keys \[0\] .*, must be the same type" @@ -1224,7 +1224,7 @@ def test_merge_by_col_tz_aware(self): [[pd.Timestamp("2018-01-01", tz="UTC"), 2, "a", "b"]], columns=["by_col", "on_col", "values_x", "values_y"], ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_by_mixed_tz_aware(self): # GH 26649 @@ -1250,7 +1250,7 @@ def test_by_mixed_tz_aware(self): columns=["by_col1", "by_col2", "on_col", "value_x"], ) expected["value_y"] = np.array([np.nan], dtype=object) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_timedelta_tolerance_nearest(self): # GH 27642 @@ -1286,7 +1286,7 @@ def test_timedelta_tolerance_nearest(self): left, right, on="time", tolerance=Timedelta("1ms"), direction="nearest" ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_int_type_tolerance(self, any_int_dtype): # GH #28870 @@ -1302,4 +1302,4 @@ def test_int_type_tolerance(self, any_int_dtype): expected["a"] = expected["a"].astype(any_int_dtype) result = pd.merge_asof(left, right, on="a", tolerance=10) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/merge/test_merge_index_as_string.py b/pandas/tests/reshape/merge/test_merge_index_as_string.py index 5e3bf03a0a4eca..4e0f570567c072 100644 --- a/pandas/tests/reshape/merge/test_merge_index_as_string.py +++ b/pandas/tests/reshape/merge/test_merge_index_as_string.py @@ -2,7 +2,7 @@ import pytest from pandas import DataFrame -from pandas.util.testing import assert_frame_equal +import pandas.util.testing as tm @pytest.fixture @@ -136,7 +136,7 @@ def test_merge_indexes_and_columns_on(left_df, right_df, on, how): # Perform merge result = left_df.merge(right_df, on=on, how=how) - assert_frame_equal(result, expected, check_like=True) + tm.assert_frame_equal(result, expected, check_like=True) @pytest.mark.parametrize( @@ -159,7 +159,7 @@ def test_merge_indexes_and_columns_lefton_righton( # Perform merge result = left_df.merge(right_df, left_on=left_on, right_on=right_on, how=how) - assert_frame_equal(result, expected, check_like=True) + tm.assert_frame_equal(result, expected, check_like=True) @pytest.mark.parametrize("left_index", ["inner", ["inner", "outer"]]) @@ -185,4 +185,4 @@ def test_join_indexes_and_columns_on(df1, df2, left_index, join_type): right_df, on=["outer", "inner"], how=join_type, lsuffix="_x", rsuffix="_y" ) - assert_frame_equal(result, expected, check_like=True) + tm.assert_frame_equal(result, expected, check_like=True) diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py index a9f23313a83b94..6d6429fb4e6b56 100644 --- a/pandas/tests/reshape/merge/test_merge_ordered.py +++ b/pandas/tests/reshape/merge/test_merge_ordered.py @@ -3,7 +3,7 @@ import pandas as pd from pandas import DataFrame, merge_ordered -from pandas.util.testing import assert_frame_equal +import pandas.util.testing as tm class TestMergeOrdered: @@ -22,7 +22,7 @@ def test_basic(self): } ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_ffill(self): result = merge_ordered(self.left, self.right, on="key", fill_method="ffill") @@ -33,7 +33,7 @@ def test_ffill(self): "rvalue": [np.nan, 1, 2, 3, 3, 4], } ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_multigroup(self): left = pd.concat([self.left, self.left], ignore_index=True) @@ -52,12 +52,12 @@ def test_multigroup(self): ) expected["group"] = ["a"] * 6 + ["b"] * 6 - assert_frame_equal(result, expected.loc[:, result.columns]) + tm.assert_frame_equal(result, expected.loc[:, result.columns]) result2 = merge_ordered( self.right, left, on="key", right_by="group", fill_method="ffill" ) - assert_frame_equal(result, result2.loc[:, result.columns]) + tm.assert_frame_equal(result, result2.loc[:, result.columns]) result = merge_ordered(left, self.right, on="key", left_by="group") assert result["group"].notna().all() @@ -114,4 +114,4 @@ def test_doc_example(self): } ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index eda7bc0ec4df7e..5c930e01c735d9 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -29,8 +29,7 @@ ) import pandas.core.common as com from pandas.tests.extension.decimal import to_decimal -from pandas.util import testing as tm -from pandas.util.testing import assert_frame_equal, makeCustomDataframe as mkdf +import pandas.util.testing as tm @pytest.fixture(params=[True, False]) @@ -860,7 +859,7 @@ def test_append_length0_frame(self, sort): df5 = df.append(df3, sort=sort) expected = DataFrame(index=[0, 1], columns=["A", "B", "C"]) - assert_frame_equal(df5, expected) + tm.assert_frame_equal(df5, expected) def test_append_records(self): arr1 = np.zeros((2,), dtype=("i4,f4,a10")) @@ -874,7 +873,7 @@ def test_append_records(self): result = df1.append(df2, ignore_index=True) expected = DataFrame(np.concatenate((arr1, arr2))) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # rewrite sort fixture, since we also want to test default of None def test_append_sorts(self, sort_with_none): @@ -981,7 +980,7 @@ def test_append_same_columns_type(self, index): expected = pd.DataFrame( [[1.0, 2.0, 3.0], [4, 5, 6], [7, 8, np.nan]], index=[0, 1, 2], columns=index ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # ser wider than df ser_index = index @@ -994,7 +993,7 @@ def test_append_same_columns_type(self, index): index=[0, 1, 2], columns=ser_index, ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( "df_columns, series_index", @@ -1021,7 +1020,7 @@ def test_append_different_columns_types(self, df_columns, series_index): index=[0, 1, 2], columns=combined_columns, ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( "index_can_append", indexes_can_append, ids=lambda x: x.__class__.__name__ @@ -1109,7 +1108,7 @@ def test_append_dtype_coerce(self, sort): else: expected = expected[["start_time", "end_time"]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_append_missing_column_proper_upcast(self, sort): df1 = DataFrame({"A": np.array([1, 2, 3, 4], dtype="i8")}) @@ -1138,7 +1137,7 @@ def test_append_empty_frame_to_series_with_dateutil_tz(self): # These columns get cast to object after append expected["a"] = expected["a"].astype(float) expected["b"] = expected["b"].astype(float) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) class TestConcatenate: @@ -1364,7 +1363,7 @@ def test_concat_multiindex_with_none_in_index_names(self): expected = pd.DataFrame( {"col": list(range(5)) * 2}, index=index, dtype=np.int32 ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = concat([df, df[:2]], keys=[1, 2], names=["level2"]) level2 = [1] * 5 + [2] * 2 @@ -1373,7 +1372,7 @@ def test_concat_multiindex_with_none_in_index_names(self): tuples = list(zip(level2, level1, no_name)) index = pd.MultiIndex.from_tuples(tuples, names=["level2", "level1", None]) expected = pd.DataFrame({"col": no_name}, index=index, dtype=np.int32) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_concat_keys_and_levels(self): df = DataFrame(np.random.randn(1, 3)) @@ -1494,12 +1493,12 @@ def test_dups_index(self): ) result = concat([df, df], axis=1) - assert_frame_equal(result.iloc[:, :4], df) - assert_frame_equal(result.iloc[:, 4:], df) + tm.assert_frame_equal(result.iloc[:, :4], df) + tm.assert_frame_equal(result.iloc[:, 4:], df) result = concat([df, df], axis=0) - assert_frame_equal(result.iloc[:10], df) - assert_frame_equal(result.iloc[10:], df) + tm.assert_frame_equal(result.iloc[:10], df) + tm.assert_frame_equal(result.iloc[10:], df) # multi dtypes df = concat( @@ -1513,23 +1512,23 @@ def test_dups_index(self): ) result = concat([df, df], axis=1) - assert_frame_equal(result.iloc[:, :6], df) - assert_frame_equal(result.iloc[:, 6:], df) + tm.assert_frame_equal(result.iloc[:, :6], df) + tm.assert_frame_equal(result.iloc[:, 6:], df) result = concat([df, df], axis=0) - assert_frame_equal(result.iloc[:10], df) - assert_frame_equal(result.iloc[10:], df) + tm.assert_frame_equal(result.iloc[:10], df) + tm.assert_frame_equal(result.iloc[10:], df) # append result = df.iloc[0:8, :].append(df.iloc[8:]) - assert_frame_equal(result, df) + tm.assert_frame_equal(result, df) result = df.iloc[0:8, :].append(df.iloc[8:9]).append(df.iloc[9:10]) - assert_frame_equal(result, df) + tm.assert_frame_equal(result, df) expected = concat([df, df], axis=0) result = df.append(df) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_with_mixed_tuples(self, sort): # 10697 @@ -1563,14 +1562,14 @@ def test_handle_empty_objects(self, sort): ) empty = DataFrame() result = concat([df, empty], axis=1) - assert_frame_equal(result, df) + tm.assert_frame_equal(result, df) result = concat([empty, df], axis=1) - assert_frame_equal(result, df) + tm.assert_frame_equal(result, df) result = concat([df, empty]) - assert_frame_equal(result, df) + tm.assert_frame_equal(result, df) result = concat([empty, df]) - assert_frame_equal(result, df) + tm.assert_frame_equal(result, df) def test_concat_mixed_objs(self): @@ -1588,25 +1587,25 @@ def test_concat_mixed_objs(self): np.repeat(arr, 2).reshape(-1, 2), index=index, columns=[0, 0] ) result = concat([df, df], axis=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) expected = DataFrame( np.repeat(arr, 2).reshape(-1, 2), index=index, columns=[0, 1] ) result = concat([s1, s2], axis=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) expected = DataFrame( np.repeat(arr, 3).reshape(-1, 3), index=index, columns=[0, 1, 2] ) result = concat([s1, s2, s1], axis=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) expected = DataFrame( np.repeat(arr, 5).reshape(-1, 5), index=index, columns=[0, 0, 1, 2, 3] ) result = concat([s1, df, s2, s2, s1], axis=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # with names s1.name = "foo" @@ -1614,32 +1613,32 @@ def test_concat_mixed_objs(self): np.repeat(arr, 3).reshape(-1, 3), index=index, columns=["foo", 0, 0] ) result = concat([s1, df, s2], axis=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) s2.name = "bar" expected = DataFrame( np.repeat(arr, 3).reshape(-1, 3), index=index, columns=["foo", 0, "bar"] ) result = concat([s1, df, s2], axis=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # ignore index expected = DataFrame( np.repeat(arr, 3).reshape(-1, 3), index=index, columns=[0, 1, 2] ) result = concat([s1, df, s2], axis=1, ignore_index=True) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # axis 0 expected = DataFrame( np.tile(arr, 3).reshape(-1, 1), index=index.tolist() * 3, columns=[0] ) result = concat([s1, df, s2]) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) expected = DataFrame(np.tile(arr, 3).reshape(-1, 1), columns=[0]) result = concat([s1, df, s2], ignore_index=True) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_empty_dtype_coerce(self): @@ -1705,11 +1704,11 @@ def test_concat_series_axis1(self, sort=sort): result = concat(pieces, axis=1) expected = DataFrame(pieces).T - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = concat(pieces, keys=["A", "B", "C"], axis=1) expected = DataFrame(pieces, index=["A", "B", "C"]).T - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # preserve series names, #2489 s = Series(randn(5), name="A") @@ -1717,7 +1716,7 @@ def test_concat_series_axis1(self, sort=sort): result = concat([s, s2], axis=1) expected = DataFrame({"A": s, "B": s2}) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) s2.name = None result = concat([s, s2], axis=1) @@ -1728,7 +1727,7 @@ def test_concat_series_axis1(self, sort=sort): s2 = Series(randn(4), index=["d", "a", "b", "c"], name="B") result = concat([s, s2], axis=1, sort=sort) expected = DataFrame({"A": s, "B": s2}) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_concat_series_axis1_names_applied(self): # ensure names argument is not ignored on axis=1, #23490 @@ -1738,14 +1737,14 @@ def test_concat_series_axis1_names_applied(self): expected = DataFrame( [[1, 4], [2, 5], [3, 6]], columns=pd.Index(["a", "b"], name="A") ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = concat([s, s2], axis=1, keys=[("a", 1), ("b", 2)], names=["A", "B"]) expected = DataFrame( [[1, 4], [2, 5], [3, 6]], columns=MultiIndex.from_tuples([("a", 1), ("b", 2)], names=["A", "B"]), ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_concat_single_with_key(self): df = DataFrame(np.random.randn(10, 4)) @@ -1818,7 +1817,7 @@ def test_concat_bug_2972(self): expected = DataFrame({0: ts0, 1: ts1}) expected.columns = ["same name", "same name"] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_concat_bug_3602(self): @@ -1844,7 +1843,7 @@ def test_concat_bug_3602(self): expected.columns = ["firmNo", "prc", "stringvar", "C", "misc", "prc"] result = concat([df1, df2], axis=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_concat_inner_join_empty(self): # GH 15328 @@ -1854,7 +1853,7 @@ def test_concat_inner_join_empty(self): for how, expected in [("inner", df_expected), ("outer", df_a)]: result = pd.concat([df_a, df_empty], axis=1, join=how) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_concat_series_axis1_same_names_ignore_index(self): dates = date_range("01-Jan-2013", "01-Jan-2014", freq="MS")[0:-1] @@ -1872,12 +1871,12 @@ def test_concat_iterables(self): df1 = DataFrame([1, 2, 3]) df2 = DataFrame([4, 5, 6]) expected = DataFrame([1, 2, 3, 4, 5, 6]) - assert_frame_equal(concat((df1, df2), ignore_index=True), expected) - assert_frame_equal(concat([df1, df2], ignore_index=True), expected) - assert_frame_equal( + tm.assert_frame_equal(concat((df1, df2), ignore_index=True), expected) + tm.assert_frame_equal(concat([df1, df2], ignore_index=True), expected) + tm.assert_frame_equal( concat((df for df in (df1, df2)), ignore_index=True), expected ) - assert_frame_equal(concat(deque((df1, df2)), ignore_index=True), expected) + tm.assert_frame_equal(concat(deque((df1, df2)), ignore_index=True), expected) class CustomIterator1: def __len__(self): @@ -1889,19 +1888,19 @@ def __getitem__(self, index): except KeyError: raise IndexError - assert_frame_equal(pd.concat(CustomIterator1(), ignore_index=True), expected) + tm.assert_frame_equal(pd.concat(CustomIterator1(), ignore_index=True), expected) class CustomIterator2(abc.Iterable): def __iter__(self): yield df1 yield df2 - assert_frame_equal(pd.concat(CustomIterator2(), ignore_index=True), expected) + tm.assert_frame_equal(pd.concat(CustomIterator2(), ignore_index=True), expected) def test_concat_invalid(self): # trying to concat a ndframe with a non-ndframe - df1 = mkdf(10, 2) + df1 = tm.makeCustomDataframe(10, 2) msg = ( "cannot concatenate object of type '{}';" " only Series and DataFrame objs are valid" @@ -1911,8 +1910,8 @@ def test_concat_invalid(self): concat([df1, obj]) def test_concat_invalid_first_argument(self): - df1 = mkdf(10, 2) - df2 = mkdf(10, 2) + df1 = tm.makeCustomDataframe(10, 2) + df2 = tm.makeCustomDataframe(10, 2) msg = ( "first argument must be an iterable of pandas " 'objects, you passed an object of type "DataFrame"' @@ -1937,7 +1936,7 @@ def test_concat_invalid_first_argument(self): reader = read_csv(StringIO(data), chunksize=1) result = concat(reader, ignore_index=True) expected = read_csv(StringIO(data)) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_concat_NaT_series(self): # GH 11693 @@ -1981,7 +1980,7 @@ def test_concat_tz_frame(self): # concat df3 = pd.concat([df2.A.to_frame(), df2.B.to_frame()], axis=1) - assert_frame_equal(df2, df3) + tm.assert_frame_equal(df2, df3) def test_concat_tz_series(self): # gh-11755: tz and no tz @@ -2087,7 +2086,7 @@ def test_concat_NaT_dataframes_all_NaT_axis_0(self, tz1, tz2, s): if tz1 != tz2: expected = expected.astype(object) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("tz1", [None, "UTC"]) @pytest.mark.parametrize("tz2", [None, "UTC"]) @@ -2103,7 +2102,7 @@ def test_concat_NaT_dataframes_all_NaT_axis_1(self, tz1, tz2): } ) result = pd.concat([first, second], axis=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("tz1", [None, "UTC"]) @pytest.mark.parametrize("tz2", [None, "UTC"]) @@ -2132,7 +2131,7 @@ def test_concat_NaT_series_dataframe_all_NaT(self, tz1, tz2): expected = expected.astype(object) result = pd.concat([first, second]) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("tz", [None, "UTC"]) def test_concat_NaT_dataframes(self, tz): @@ -2154,7 +2153,7 @@ def test_concat_NaT_dataframes(self, tz): ) result = pd.concat([first, second], axis=0) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_concat_period_series(self): x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D")) @@ -2237,7 +2236,7 @@ def test_concat_empty_series_timelike(self, tz, values): } ) result = concat([first, second], axis=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_default_index(self): # is_series and ignore_index @@ -2619,7 +2618,7 @@ def test_concat_empty_and_non_empty_frame_regression(): df2 = pd.DataFrame({"foo": []}) expected = pd.DataFrame({"foo": [1.0]}) result = pd.concat([df1, df2]) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_concat_empty_and_non_empty_series_regression(): diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index 66a24fa57a68c5..2e94eeba1d05b0 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -9,7 +9,6 @@ from pandas import Categorical, DataFrame, Index, Series, get_dummies from pandas.core.arrays.sparse import SparseArray, SparseDtype import pandas.util.testing as tm -from pandas.util.testing import assert_frame_equal class TestGetDummies: @@ -48,14 +47,14 @@ def test_basic(self, sparse, dtype): if sparse: expected = expected.apply(pd.SparseArray, fill_value=0.0) result = get_dummies(s_list, sparse=sparse, dtype=dtype) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = get_dummies(s_series, sparse=sparse, dtype=dtype) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) expected.index = list("ABC") result = get_dummies(s_series_index, sparse=sparse, dtype=dtype) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_basic_types(self, sparse, dtype): # GH 10531 @@ -134,7 +133,7 @@ def test_include_na(self, sparse, dtype): ) if sparse: exp = exp.apply(pd.SparseArray, fill_value=0.0) - assert_frame_equal(res, exp) + tm.assert_frame_equal(res, exp) # Sparse dataframes do not allow nan labelled columns, see #GH8822 res_na = get_dummies(s, dummy_na=True, sparse=sparse, dtype=dtype) @@ -147,7 +146,7 @@ def test_include_na(self, sparse, dtype): exp_na.columns = res_na.columns if sparse: exp_na = exp_na.apply(pd.SparseArray, fill_value=0.0) - assert_frame_equal(res_na, exp_na) + tm.assert_frame_equal(res_na, exp_na) res_just_na = get_dummies([np.nan], dummy_na=True, sparse=sparse, dtype=dtype) exp_just_na = DataFrame( @@ -169,7 +168,7 @@ def test_unicode(self, sparse): ) if sparse: exp = exp.apply(pd.SparseArray, fill_value=0) - assert_frame_equal(res, exp) + tm.assert_frame_equal(res, exp) def test_dataframe_dummies_all_obj(self, df, sparse): df = df[["A", "B"]] @@ -188,7 +187,7 @@ def test_dataframe_dummies_all_obj(self, df, sparse): } ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_dataframe_dummies_mix_default(self, df, sparse, dtype): result = get_dummies(df, sparse=sparse, dtype=dtype) @@ -208,7 +207,7 @@ def test_dataframe_dummies_mix_default(self, df, sparse, dtype): } ) expected = expected[["C", "A_a", "A_b", "B_b", "B_c"]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_dataframe_dummies_prefix_list(self, df, sparse): prefixes = ["from_A", "from_B"] @@ -229,7 +228,7 @@ def test_dataframe_dummies_prefix_list(self, df, sparse): typ = pd.SparseArray if sparse else pd.Series expected[cols] = expected[cols].apply(lambda x: typ(x)) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_dataframe_dummies_prefix_str(self, df, sparse): # not that you should do this... @@ -255,7 +254,7 @@ def test_dataframe_dummies_prefix_str(self, df, sparse): axis=1, ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_dataframe_dummies_subset(self, df, sparse): result = get_dummies(df, prefix=["from_A"], columns=["A"], sparse=sparse) @@ -272,7 +271,7 @@ def test_dataframe_dummies_subset(self, df, sparse): if sparse: cols = ["from_A_a", "from_A_b"] expected[cols] = expected[cols].astype(pd.SparseDtype("uint8", 0)) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_dataframe_dummies_prefix_sep(self, df, sparse): result = get_dummies(df, prefix_sep="..", sparse=sparse) @@ -292,14 +291,14 @@ def test_dataframe_dummies_prefix_sep(self, df, sparse): cols = ["A..a", "A..b", "B..b", "B..c"] expected[cols] = expected[cols].astype(pd.SparseDtype("uint8", 0)) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = get_dummies(df, prefix_sep=["..", "__"], sparse=sparse) expected = expected.rename(columns={"B..b": "B__b", "B..c": "B__c"}) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = get_dummies(df, prefix_sep={"A": "..", "B": "__"}, sparse=sparse) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_dataframe_dummies_prefix_bad_length(self, df, sparse): with pytest.raises(ValueError): @@ -329,7 +328,7 @@ def test_dataframe_dummies_prefix_dict(self, sparse): if sparse: expected[columns] = expected[columns].astype(pd.SparseDtype("uint8", 0)) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_dataframe_dummies_with_na(self, df, sparse, dtype): df.loc[3, :] = [np.nan, np.nan, np.nan] @@ -356,11 +355,11 @@ def test_dataframe_dummies_with_na(self, df, sparse, dtype): } ).sort_index(axis=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = get_dummies(df, dummy_na=False, sparse=sparse, dtype=dtype) expected = expected[["C", "A_a", "A_b", "B_b", "B_c"]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_dataframe_dummies_with_categorical(self, df, sparse, dtype): df["cat"] = pd.Categorical(["x", "y", "y"]) @@ -384,7 +383,7 @@ def test_dataframe_dummies_with_categorical(self, df, sparse, dtype): } ).sort_index(axis=1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( "get_dummies_kwargs,expected", @@ -411,7 +410,7 @@ def test_dataframe_dummies_unicode(self, get_dummies_kwargs, expected): # GH22084 pd.get_dummies incorrectly encodes unicode characters # in dataframe column names result = get_dummies(**get_dummies_kwargs) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_basic_drop_first(self, sparse): # GH12402 Add a new parameter `drop_first` to avoid collinearity @@ -425,14 +424,14 @@ def test_basic_drop_first(self, sparse): result = get_dummies(s_list, drop_first=True, sparse=sparse) if sparse: expected = expected.apply(pd.SparseArray, fill_value=0) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = get_dummies(s_series, drop_first=True, sparse=sparse) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) expected.index = list("ABC") result = get_dummies(s_series_index, drop_first=True, sparse=sparse) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_basic_drop_first_one_level(self, sparse): # Test the case that categorical variable only has one level. @@ -443,14 +442,14 @@ def test_basic_drop_first_one_level(self, sparse): expected = DataFrame(index=np.arange(3)) result = get_dummies(s_list, drop_first=True, sparse=sparse) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = get_dummies(s_series, drop_first=True, sparse=sparse) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) expected = DataFrame(index=list("ABC")) result = get_dummies(s_series_index, drop_first=True, sparse=sparse) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_basic_drop_first_NA(self, sparse): # Test NA handling together with drop_first @@ -460,7 +459,7 @@ def test_basic_drop_first_NA(self, sparse): if sparse: exp = exp.apply(pd.SparseArray, fill_value=0) - assert_frame_equal(res, exp) + tm.assert_frame_equal(res, exp) res_na = get_dummies(s_NA, dummy_na=True, drop_first=True, sparse=sparse) exp_na = DataFrame({"b": [0, 1, 0], np.nan: [0, 0, 1]}, dtype=np.uint8).reindex( @@ -468,13 +467,13 @@ def test_basic_drop_first_NA(self, sparse): ) if sparse: exp_na = exp_na.apply(pd.SparseArray, fill_value=0) - assert_frame_equal(res_na, exp_na) + tm.assert_frame_equal(res_na, exp_na) res_just_na = get_dummies( [np.nan], dummy_na=True, drop_first=True, sparse=sparse ) exp_just_na = DataFrame(index=np.arange(1)) - assert_frame_equal(res_just_na, exp_just_na) + tm.assert_frame_equal(res_just_na, exp_just_na) def test_dataframe_dummies_drop_first(self, df, sparse): df = df[["A", "B"]] @@ -482,7 +481,7 @@ def test_dataframe_dummies_drop_first(self, df, sparse): expected = DataFrame({"A_b": [0, 1, 0], "B_c": [0, 0, 1]}, dtype=np.uint8) if sparse: expected = expected.apply(pd.SparseArray, fill_value=0) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_dataframe_dummies_drop_first_with_categorical(self, df, sparse, dtype): df["cat"] = pd.Categorical(["x", "y", "y"]) @@ -496,7 +495,7 @@ def test_dataframe_dummies_drop_first_with_categorical(self, df, sparse, dtype): if sparse: for col in cols: expected[col] = pd.SparseArray(expected[col]) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_dataframe_dummies_drop_first_with_na(self, df, sparse): df.loc[3, :] = [np.nan, np.nan, np.nan] @@ -519,11 +518,11 @@ def test_dataframe_dummies_drop_first_with_na(self, df, sparse): for col in cols: expected[col] = pd.SparseArray(expected[col]) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = get_dummies(df, dummy_na=False, drop_first=True, sparse=sparse) expected = expected[["C", "A_b", "B_c"]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_int_int(self): data = Series([1, 2, 1])