diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index b9ae4fd4abd4c4..04d27f4c12c59d 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -9,11 +9,7 @@ import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series, date_range from pandas.core.computation.check import _NUMEXPR_INSTALLED -from pandas.util.testing import ( - assert_frame_equal, - assert_series_equal, - makeCustomDataframe as mkdf, -) +import pandas.util.testing as tm PARSERS = "python", "pandas" ENGINES = "python", pytest.param("numexpr", marks=td.skip_if_no_ne) @@ -46,34 +42,34 @@ def test_query_default(self): # this should always work, whether _NUMEXPR_INSTALLED or not df = self.df result = df.query("A>0") - assert_frame_equal(result, self.expected1) + tm.assert_frame_equal(result, self.expected1) result = df.eval("A+1") - assert_series_equal(result, self.expected2, check_names=False) + tm.assert_series_equal(result, self.expected2, check_names=False) def test_query_None(self): df = self.df result = df.query("A>0", engine=None) - assert_frame_equal(result, self.expected1) + tm.assert_frame_equal(result, self.expected1) result = df.eval("A+1", engine=None) - assert_series_equal(result, self.expected2, check_names=False) + tm.assert_series_equal(result, self.expected2, check_names=False) def test_query_python(self): df = self.df result = df.query("A>0", engine="python") - assert_frame_equal(result, self.expected1) + tm.assert_frame_equal(result, self.expected1) result = df.eval("A+1", engine="python") - assert_series_equal(result, self.expected2, check_names=False) + tm.assert_series_equal(result, self.expected2, check_names=False) def test_query_numexpr(self): df = self.df if _NUMEXPR_INSTALLED: result = df.query("A>0", engine="numexpr") - assert_frame_equal(result, self.expected1) + tm.assert_frame_equal(result, self.expected1) result = df.eval("A+1", engine="numexpr") - assert_series_equal(result, self.expected2, check_names=False) + tm.assert_series_equal(result, self.expected2, check_names=False) else: with pytest.raises(ImportError): df.query("A>0", engine="numexpr") @@ -109,17 +105,17 @@ def test_ops(self): # ops as strings result = eval("m{op}df".format(op=op_str)) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # these are commutative if op in ["+", "*"]: result = getattr(df, op)(m) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # these are not elif op in ["-", "/"]: result = getattr(df, rop)(m) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # GH7192: Note we need a large number of rows to ensure this # goes through the numexpr path @@ -127,7 +123,7 @@ def test_ops(self): df.iloc[0:5] = np.nan expected = 1 - np.isnan(df.iloc[0:25]) result = (1 - np.isnan(df)).iloc[0:25] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_query_non_str(self): # GH 11485 @@ -172,41 +168,41 @@ def test_query_with_named_multiindex(self, parser, engine): res1 = df.query('color == "red"', parser=parser, engine=engine) res2 = df.query('"red" == color', parser=parser, engine=engine) exp = df[ind == "red"] - assert_frame_equal(res1, exp) - assert_frame_equal(res2, exp) + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) # inequality res1 = df.query('color != "red"', parser=parser, engine=engine) res2 = df.query('"red" != color', parser=parser, engine=engine) exp = df[ind != "red"] - assert_frame_equal(res1, exp) - assert_frame_equal(res2, exp) + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) # list equality (really just set membership) res1 = df.query('color == ["red"]', parser=parser, engine=engine) res2 = df.query('["red"] == color', parser=parser, engine=engine) exp = df[ind.isin(["red"])] - assert_frame_equal(res1, exp) - assert_frame_equal(res2, exp) + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) res1 = df.query('color != ["red"]', parser=parser, engine=engine) res2 = df.query('["red"] != color', parser=parser, engine=engine) exp = df[~ind.isin(["red"])] - assert_frame_equal(res1, exp) - assert_frame_equal(res2, exp) + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) # in/not in ops res1 = df.query('["red"] in color', parser=parser, engine=engine) res2 = df.query('"red" in color', parser=parser, engine=engine) exp = df[ind.isin(["red"])] - assert_frame_equal(res1, exp) - assert_frame_equal(res2, exp) + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) res1 = df.query('["red"] not in color', parser=parser, engine=engine) res2 = df.query('"red" not in color', parser=parser, engine=engine) exp = df[~ind.isin(["red"])] - assert_frame_equal(res1, exp) - assert_frame_equal(res2, exp) + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) def test_query_with_unnamed_multiindex(self, parser, engine): skip_if_no_pandas_parser(parser) @@ -219,82 +215,82 @@ def test_query_with_unnamed_multiindex(self, parser, engine): res1 = df.query('ilevel_0 == "red"', parser=parser, engine=engine) res2 = df.query('"red" == ilevel_0', parser=parser, engine=engine) exp = df[ind == "red"] - assert_frame_equal(res1, exp) - assert_frame_equal(res2, exp) + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) # inequality res1 = df.query('ilevel_0 != "red"', parser=parser, engine=engine) res2 = df.query('"red" != ilevel_0', parser=parser, engine=engine) exp = df[ind != "red"] - assert_frame_equal(res1, exp) - assert_frame_equal(res2, exp) + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) # list equality (really just set membership) res1 = df.query('ilevel_0 == ["red"]', parser=parser, engine=engine) res2 = df.query('["red"] == ilevel_0', parser=parser, engine=engine) exp = df[ind.isin(["red"])] - assert_frame_equal(res1, exp) - assert_frame_equal(res2, exp) + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) res1 = df.query('ilevel_0 != ["red"]', parser=parser, engine=engine) res2 = df.query('["red"] != ilevel_0', parser=parser, engine=engine) exp = df[~ind.isin(["red"])] - assert_frame_equal(res1, exp) - assert_frame_equal(res2, exp) + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) # in/not in ops res1 = df.query('["red"] in ilevel_0', parser=parser, engine=engine) res2 = df.query('"red" in ilevel_0', parser=parser, engine=engine) exp = df[ind.isin(["red"])] - assert_frame_equal(res1, exp) - assert_frame_equal(res2, exp) + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) res1 = df.query('["red"] not in ilevel_0', parser=parser, engine=engine) res2 = df.query('"red" not in ilevel_0', parser=parser, engine=engine) exp = df[~ind.isin(["red"])] - assert_frame_equal(res1, exp) - assert_frame_equal(res2, exp) + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) # ## LEVEL 1 ind = Series(df.index.get_level_values(1).values, index=index) res1 = df.query('ilevel_1 == "eggs"', parser=parser, engine=engine) res2 = df.query('"eggs" == ilevel_1', parser=parser, engine=engine) exp = df[ind == "eggs"] - assert_frame_equal(res1, exp) - assert_frame_equal(res2, exp) + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) # inequality res1 = df.query('ilevel_1 != "eggs"', parser=parser, engine=engine) res2 = df.query('"eggs" != ilevel_1', parser=parser, engine=engine) exp = df[ind != "eggs"] - assert_frame_equal(res1, exp) - assert_frame_equal(res2, exp) + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) # list equality (really just set membership) res1 = df.query('ilevel_1 == ["eggs"]', parser=parser, engine=engine) res2 = df.query('["eggs"] == ilevel_1', parser=parser, engine=engine) exp = df[ind.isin(["eggs"])] - assert_frame_equal(res1, exp) - assert_frame_equal(res2, exp) + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) res1 = df.query('ilevel_1 != ["eggs"]', parser=parser, engine=engine) res2 = df.query('["eggs"] != ilevel_1', parser=parser, engine=engine) exp = df[~ind.isin(["eggs"])] - assert_frame_equal(res1, exp) - assert_frame_equal(res2, exp) + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) # in/not in ops res1 = df.query('["eggs"] in ilevel_1', parser=parser, engine=engine) res2 = df.query('"eggs" in ilevel_1', parser=parser, engine=engine) exp = df[ind.isin(["eggs"])] - assert_frame_equal(res1, exp) - assert_frame_equal(res2, exp) + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) res1 = df.query('["eggs"] not in ilevel_1', parser=parser, engine=engine) res2 = df.query('"eggs" not in ilevel_1', parser=parser, engine=engine) exp = df[~ind.isin(["eggs"])] - assert_frame_equal(res1, exp) - assert_frame_equal(res2, exp) + tm.assert_frame_equal(res1, exp) + tm.assert_frame_equal(res2, exp) def test_query_with_partially_named_multiindex(self, parser, engine): skip_if_no_pandas_parser(parser) @@ -308,27 +304,29 @@ def test_query_with_partially_named_multiindex(self, parser, engine): df.index.get_level_values("rating").values, index=index, name="rating" ) exp = df[ind == 1] - assert_frame_equal(res, exp) + tm.assert_frame_equal(res, exp) res = df.query("rating != 1", parser=parser, engine=engine) ind = Series( df.index.get_level_values("rating").values, index=index, name="rating" ) exp = df[ind != 1] - assert_frame_equal(res, exp) + tm.assert_frame_equal(res, exp) res = df.query('ilevel_0 == "red"', parser=parser, engine=engine) ind = Series(df.index.get_level_values(0).values, index=index) exp = df[ind == "red"] - assert_frame_equal(res, exp) + tm.assert_frame_equal(res, exp) res = df.query('ilevel_0 != "red"', parser=parser, engine=engine) ind = Series(df.index.get_level_values(0).values, index=index) exp = df[ind != "red"] - assert_frame_equal(res, exp) + tm.assert_frame_equal(res, exp) def test_query_multiindex_get_index_resolvers(self): - df = mkdf(10, 3, r_idx_nlevels=2, r_idx_names=["spam", "eggs"]) + df = tm.makeCustomDataframe( + 10, 3, r_idx_nlevels=2, r_idx_names=["spam", "eggs"] + ) resolvers = df._get_index_resolvers() def to_series(mi, level): @@ -349,7 +347,7 @@ def to_series(mi, level): if isinstance(v, Index): assert v.is_(expected[k]) elif isinstance(v, Series): - assert_series_equal(v, expected[k]) + tm.assert_series_equal(v, expected[k]) else: raise AssertionError("object must be a Series or Index") @@ -376,7 +374,7 @@ def test_date_query_with_attribute_access(self): "@df.dates1 < 20130101 < @df.dates3", engine=engine, parser=parser ) expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)] - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) def test_date_query_no_attribute_access(self): engine, parser = self.engine, self.parser @@ -386,7 +384,7 @@ def test_date_query_no_attribute_access(self): df["dates3"] = date_range("1/1/2014", periods=5) res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser) expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)] - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) def test_date_query_with_NaT(self): engine, parser = self.engine, self.parser @@ -399,7 +397,7 @@ def test_date_query_with_NaT(self): df.loc[np.random.rand(n) > 0.5, "dates3"] = pd.NaT res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser) expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)] - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) def test_date_index_query(self): engine, parser = self.engine, self.parser @@ -410,7 +408,7 @@ def test_date_index_query(self): df.set_index("dates1", inplace=True, drop=True) res = df.query("index < 20130101 < dates3", engine=engine, parser=parser) expec = df[(df.index < "20130101") & ("20130101" < df.dates3)] - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) def test_date_index_query_with_NaT(self): engine, parser = self.engine, self.parser @@ -422,7 +420,7 @@ def test_date_index_query_with_NaT(self): df.set_index("dates1", inplace=True, drop=True) res = df.query("index < 20130101 < dates3", engine=engine, parser=parser) expec = df[(df.index < "20130101") & ("20130101" < df.dates3)] - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) def test_date_index_query_with_NaT_duplicates(self): engine, parser = self.engine, self.parser @@ -435,7 +433,7 @@ def test_date_index_query_with_NaT_duplicates(self): df.set_index("dates1", inplace=True, drop=True) res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser) expec = df[(df.index.to_series() < "20130101") & ("20130101" < df.dates3)] - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) def test_date_query_with_non_date(self): engine, parser = self.engine, self.parser @@ -449,7 +447,7 @@ def test_date_query_with_non_date(self): assert len(result) == 0 result = df.query("dates != nondate", parser=parser, engine=engine) - assert_frame_equal(result, df) + tm.assert_frame_equal(result, df) for op in ["<", ">", "<=", ">="]: with pytest.raises(TypeError): @@ -474,11 +472,11 @@ def test_query_scope(self): a, b = 1, 2 # noqa res = df.query("a > b", engine=engine, parser=parser) expected = df[df.a > df.b] - assert_frame_equal(res, expected) + tm.assert_frame_equal(res, expected) res = df.query("@a > b", engine=engine, parser=parser) expected = df[a > df.b] - assert_frame_equal(res, expected) + tm.assert_frame_equal(res, expected) # no local variable c with pytest.raises(UndefinedVariableError): @@ -516,10 +514,10 @@ def test_query(self): engine, parser = self.engine, self.parser df = DataFrame(np.random.randn(10, 3), columns=["a", "b", "c"]) - assert_frame_equal( + tm.assert_frame_equal( df.query("a < b", engine=engine, parser=parser), df[df.a < df.b] ) - assert_frame_equal( + tm.assert_frame_equal( df.query("a + b > b * c", engine=engine, parser=parser), df[df.a + df.b > df.b * df.c], ) @@ -533,12 +531,12 @@ def test_query_index_with_name(self): ) res = df.query("(blob < 5) & (a < b)", engine=engine, parser=parser) expec = df[(df.index < 5) & (df.a < df.b)] - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) res = df.query("blob < b", engine=engine, parser=parser) expec = df[df.index < df.b] - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) def test_query_index_without_name(self): engine, parser = self.engine, self.parser @@ -551,12 +549,12 @@ def test_query_index_without_name(self): # "index" should refer to the index res = df.query("index < b", engine=engine, parser=parser) expec = df[df.index < df.b] - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) # test against a scalar res = df.query("index < 5", engine=engine, parser=parser) expec = df[df.index < 5] - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) def test_nested_scope(self): engine = self.engine @@ -569,20 +567,20 @@ def test_nested_scope(self): expected = df[(df > 0) & (df2 > 0)] result = df.query("(@df > 0) & (@df2 > 0)", engine=engine, parser=parser) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = pd.eval("df[df > 0 and df2 > 0]", engine=engine, parser=parser) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = pd.eval( "df[df > 0 and df2 > 0 and df[df > 0] > 0]", engine=engine, parser=parser ) expected = df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = pd.eval("df[(df>0) & (df2>0)]", engine=engine, parser=parser) expected = df.query("(@df>0) & (@df2>0)", engine=engine, parser=parser) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_nested_raises_on_local_self_reference(self): from pandas.core.computation.ops import UndefinedVariableError @@ -601,11 +599,11 @@ def test_local_syntax(self): b = 1 expect = df[df.a < b] result = df.query("a < @b", engine=engine, parser=parser) - assert_frame_equal(result, expect) + tm.assert_frame_equal(result, expect) expect = df[df.a < df.b] result = df.query("a < b", engine=engine, parser=parser) - assert_frame_equal(result, expect) + tm.assert_frame_equal(result, expect) def test_chained_cmp_and_in(self): skip_if_no_pandas_parser(self.parser) @@ -619,7 +617,7 @@ def test_chained_cmp_and_in(self): (df.a < df.b) & (df.b < df.c) & ~df.b.isin(df.a) & ~df.c.isin(df.b) ) # noqa expec = df[ind] - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) def test_local_variable_with_in(self): engine, parser = self.engine, self.parser @@ -630,12 +628,12 @@ def test_local_variable_with_in(self): expected = df.loc[(df.b - 1).isin(a)] result = df.query("b - 1 in a", engine=engine, parser=parser) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) b = Series(np.random.randint(10, size=15), name="b") expected = df.loc[(b - 1).isin(a)] result = df.query("@b - 1 in a", engine=engine, parser=parser) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) def test_at_inside_string(self): engine, parser = self.engine, self.parser @@ -644,7 +642,7 @@ def test_at_inside_string(self): df = DataFrame({"a": ["a", "a", "b", "b", "@c", "@c"]}) result = df.query('a == "@c"', engine=engine, parser=parser) expected = df[df.a == "@c"] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_query_undefined_local(self): from pandas.core.computation.ops import UndefinedVariableError @@ -666,22 +664,22 @@ def test_index_resolvers_come_after_columns_with_the_same_name(self): df.index.name = "index" result = df.query("index > 5", engine=self.engine, parser=self.parser) expected = df[df["index"] > 5] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) df = DataFrame({"index": a, "b": np.random.randn(a.size)}) result = df.query("ilevel_0 > 5", engine=self.engine, parser=self.parser) expected = df.loc[df.index[df.index > 5]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) df = DataFrame({"a": a, "b": np.random.randn(a.size)}) df.index.name = "a" result = df.query("a > 5", engine=self.engine, parser=self.parser) expected = df[df.a > 5] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.query("index > 5", engine=self.engine, parser=self.parser) expected = df.loc[df.index[df.index > 5]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_inf(self): n = 10 @@ -693,7 +691,7 @@ def test_inf(self): q = "a {op} inf".format(op=op) expected = df[f(df.a, np.inf)] result = df.query(q, engine=self.engine, parser=self.parser) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) @td.skip_if_no_ne @@ -714,7 +712,7 @@ def test_date_query_no_attribute_access(self): "(dates1 < 20130101) & (20130101 < dates3)", engine=engine, parser=parser ) expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)] - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) def test_date_query_with_NaT(self): engine, parser = self.engine, self.parser @@ -729,7 +727,7 @@ def test_date_query_with_NaT(self): "(dates1 < 20130101) & (20130101 < dates3)", engine=engine, parser=parser ) expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)] - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) def test_date_index_query(self): engine, parser = self.engine, self.parser @@ -742,7 +740,7 @@ def test_date_index_query(self): "(index < 20130101) & (20130101 < dates3)", engine=engine, parser=parser ) expec = df[(df.index < "20130101") & ("20130101" < df.dates3)] - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) def test_date_index_query_with_NaT(self): engine, parser = self.engine, self.parser @@ -756,7 +754,7 @@ def test_date_index_query_with_NaT(self): "(index < 20130101) & (20130101 < dates3)", engine=engine, parser=parser ) expec = df[(df.index < "20130101") & ("20130101" < df.dates3)] - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) def test_date_index_query_with_NaT_duplicates(self): engine, parser = self.engine, self.parser @@ -791,13 +789,13 @@ def test_nested_scope(self): expected = df[(df > 0) & (df2 > 0)] result = pd.eval("df[(df > 0) & (df2 > 0)]", engine=engine, parser=parser) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) expected = df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)] result = pd.eval( "df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)]", engine=engine, parser=parser ) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) class TestDataFrameQueryPythonPandas(TestDataFrameQueryNumExprPandas): @@ -816,7 +814,7 @@ def test_query_builtin(self): df.index.name = "sin" expected = df[df.index > 5] result = df.query("sin > 5", engine=engine, parser=parser) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) class TestDataFrameQueryPythonPython(TestDataFrameQueryNumExprPython): @@ -834,7 +832,7 @@ def test_query_builtin(self): df.index.name = "sin" expected = df[df.index > 5] result = df.query("sin > 5", engine=engine, parser=parser) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) class TestDataFrameQueryStrings: @@ -865,19 +863,19 @@ def test_str_query_method(self, parser, engine): ) else: res = df.query('"a" == strings', engine=engine, parser=parser) - assert_frame_equal(res, expect) + tm.assert_frame_equal(res, expect) res = df.query('strings == "a"', engine=engine, parser=parser) - assert_frame_equal(res, expect) - assert_frame_equal(res, df[df.strings.isin(["a"])]) + tm.assert_frame_equal(res, expect) + tm.assert_frame_equal(res, df[df.strings.isin(["a"])]) expect = df[df.strings != "a"] res = df.query('strings != "a"', engine=engine, parser=parser) - assert_frame_equal(res, expect) + tm.assert_frame_equal(res, expect) res = df.query('"a" != strings', engine=engine, parser=parser) - assert_frame_equal(res, expect) - assert_frame_equal(res, df[~df.strings.isin(["a"])]) + tm.assert_frame_equal(res, expect) + tm.assert_frame_equal(res, df[~df.strings.isin(["a"])]) def test_str_list_query_method(self, parser, engine): df = DataFrame(np.random.randn(10, 1), columns=["b"]) @@ -900,18 +898,18 @@ def test_str_list_query_method(self, parser, engine): df.query(ex, engine=engine, parser=parser) else: res = df.query('strings == ["a", "b"]', engine=engine, parser=parser) - assert_frame_equal(res, expect) + tm.assert_frame_equal(res, expect) res = df.query('["a", "b"] == strings', engine=engine, parser=parser) - assert_frame_equal(res, expect) + tm.assert_frame_equal(res, expect) expect = df[~df.strings.isin(["a", "b"])] res = df.query('strings != ["a", "b"]', engine=engine, parser=parser) - assert_frame_equal(res, expect) + tm.assert_frame_equal(res, expect) res = df.query('["a", "b"] != strings', engine=engine, parser=parser) - assert_frame_equal(res, expect) + tm.assert_frame_equal(res, expect) def test_query_with_string_columns(self, parser, engine): df = DataFrame( @@ -925,11 +923,11 @@ def test_query_with_string_columns(self, parser, engine): if parser == "pandas": res = df.query("a in b", parser=parser, engine=engine) expec = df[df.a.isin(df.b)] - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) res = df.query("a in b and c < d", parser=parser, engine=engine) expec = df[df.a.isin(df.b) & (df.c < df.d)] - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) else: with pytest.raises(NotImplementedError): df.query("a in b", parser=parser, engine=engine) @@ -948,11 +946,11 @@ def test_object_array_eq_ne(self, parser, engine): ) res = df.query("a == b", parser=parser, engine=engine) exp = df[df.a == df.b] - assert_frame_equal(res, exp) + tm.assert_frame_equal(res, exp) res = df.query("a != b", parser=parser, engine=engine) exp = df[df.a != df.b] - assert_frame_equal(res, exp) + tm.assert_frame_equal(res, exp) def test_query_with_nested_strings(self, parser, engine): skip_if_no_pandas_parser(parser) @@ -975,14 +973,14 @@ def test_query_with_nested_strings(self, parser, engine): ) expected = df[df.event == '"page 1 load"'] res = df.query("""'"page 1 load"' in event""", parser=parser, engine=engine) - assert_frame_equal(expected, res) + tm.assert_frame_equal(expected, res) def test_query_with_nested_special_character(self, parser, engine): skip_if_no_pandas_parser(parser) df = DataFrame({"a": ["a", "b", "test & test"], "b": [1, 2, 3]}) res = df.query('a == "test & test"', parser=parser, engine=engine) expec = df[df.a == "test & test"] - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) def test_query_lex_compare_strings(self, parser, engine): @@ -995,7 +993,7 @@ def test_query_lex_compare_strings(self, parser, engine): for op, func in ops.items(): res = df.query('X %s "d"' % op, engine=engine, parser=parser) expected = df[func(df.X, "d")] - assert_frame_equal(res, expected) + tm.assert_frame_equal(res, expected) def test_query_single_element_booleans(self, parser, engine): columns = "bid", "bidsize", "ask", "asksize" @@ -1003,7 +1001,7 @@ def test_query_single_element_booleans(self, parser, engine): df = DataFrame(data, columns=columns) res = df.query("bid & ask", engine=engine, parser=parser) expected = df[df.bid & df.ask] - assert_frame_equal(res, expected) + tm.assert_frame_equal(res, expected) def test_query_string_scalar_variable(self, parser, engine): skip_if_no_pandas_parser(parser) @@ -1016,7 +1014,7 @@ def test_query_string_scalar_variable(self, parser, engine): e = df[df.Symbol == "BUD US"] symb = "BUD US" # noqa r = df.query("Symbol == @symb", parser=parser, engine=engine) - assert_frame_equal(e, r) + tm.assert_frame_equal(e, r) class TestDataFrameEvalWithFrame: @@ -1029,12 +1027,12 @@ def teardown_method(self, method): def test_simple_expr(self, parser, engine): res = self.frame.eval("a + b", engine=engine, parser=parser) expect = self.frame.a + self.frame.b - assert_series_equal(res, expect) + tm.assert_series_equal(res, expect) def test_bool_arith_expr(self, parser, engine): res = self.frame.eval("a[a < 1] + b", engine=engine, parser=parser) expect = self.frame.a[self.frame.a < 1] + self.frame.b - assert_series_equal(res, expect) + tm.assert_series_equal(res, expect) @pytest.mark.parametrize("op", ["+", "-", "*", "/"]) def test_invalid_type_for_operator_raises(self, parser, engine, op): @@ -1061,39 +1059,39 @@ def df(self): def test_single_backtick_variable_query(self, df): res = df.query("1 < `B B`") expect = df[1 < df["B B"]] - assert_frame_equal(res, expect) + tm.assert_frame_equal(res, expect) def test_two_backtick_variables_query(self, df): res = df.query("1 < `B B` and 4 < `C C`") expect = df[(1 < df["B B"]) & (4 < df["C C"])] - assert_frame_equal(res, expect) + tm.assert_frame_equal(res, expect) def test_single_backtick_variable_expr(self, df): res = df.eval("A + `B B`") expect = df["A"] + df["B B"] - assert_series_equal(res, expect) + tm.assert_series_equal(res, expect) def test_two_backtick_variables_expr(self, df): res = df.eval("`B B` + `C C`") expect = df["B B"] + df["C C"] - assert_series_equal(res, expect) + tm.assert_series_equal(res, expect) def test_already_underscore_variable(self, df): res = df.eval("`C_C` + A") expect = df["C_C"] + df["A"] - assert_series_equal(res, expect) + tm.assert_series_equal(res, expect) def test_same_name_but_underscores(self, df): res = df.eval("C_C + `C C`") expect = df["C_C"] + df["C C"] - assert_series_equal(res, expect) + tm.assert_series_equal(res, expect) def test_mixed_underscores_and_spaces(self, df): res = df.eval("A + `D_D D`") expect = df["A"] + df["D_D D"] - assert_series_equal(res, expect) + tm.assert_series_equal(res, expect) def backtick_quote_name_with_no_spaces(self, df): res = df.eval("A + `C_C`") expect = df["A"] + df["C_C"] - assert_series_equal(res, expect) + tm.assert_series_equal(res, expect) diff --git a/pandas/tests/frame/test_rank.py b/pandas/tests/frame/test_rank.py index fd9c53c7d9f5bb..be1a423c22aeab 100644 --- a/pandas/tests/frame/test_rank.py +++ b/pandas/tests/frame/test_rank.py @@ -5,7 +5,6 @@ from pandas import DataFrame, Series import pandas.util.testing as tm -from pandas.util.testing import assert_frame_equal class TestRank: @@ -231,7 +230,7 @@ def test_rank_descending(self, method, dtype): res = df.rank(ascending=False) expected = (df.max() - df).rank() - assert_frame_equal(res, expected) + tm.assert_frame_equal(res, expected) if method == "first" and dtype == "O": return @@ -240,10 +239,10 @@ def test_rank_descending(self, method, dtype): if dtype != "O": res2 = df.rank(method=method, ascending=False, numeric_only=True) - assert_frame_equal(res2, expected) + tm.assert_frame_equal(res2, expected) res3 = df.rank(method=method, ascending=False, numeric_only=False) - assert_frame_equal(res3, expected) + tm.assert_frame_equal(res3, expected) @pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize("dtype", [None, object]) @@ -258,7 +257,7 @@ def _check2d(df, expected, method="average", axis=0): exp_df = exp_df.T result = df.rank(method=method, axis=axis) - assert_frame_equal(result, exp_df) + tm.assert_frame_equal(result, exp_df) disabled = {(object, "first")} if (dtype, method) in disabled: diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index fdb450da53137d..5eb2416d0dcd78 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -8,7 +8,7 @@ import pandas as pd from pandas import DataFrame, Index, Series, Timestamp, date_range -from pandas.util.testing import assert_frame_equal, assert_series_equal +import pandas.util.testing as tm @pytest.fixture @@ -28,7 +28,7 @@ def test_replace_inplace(self, datetime_frame, float_string_frame): tsframe = datetime_frame.copy() tsframe.replace(np.nan, 0, inplace=True) - assert_frame_equal(tsframe, datetime_frame.fillna(0)) + tm.assert_frame_equal(tsframe, datetime_frame.fillna(0)) # mixed type mf = float_string_frame @@ -37,11 +37,11 @@ def test_replace_inplace(self, datetime_frame, float_string_frame): result = float_string_frame.replace(np.nan, 0) expected = float_string_frame.fillna(value=0) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) tsframe = datetime_frame.copy() tsframe.replace([np.nan], [0], inplace=True) - assert_frame_equal(tsframe, datetime_frame.fillna(0)) + tm.assert_frame_equal(tsframe, datetime_frame.fillna(0)) def test_regex_replace_scalar(self, mix_ab): obj = {"a": list("ab.."), "b": list("efgh")} @@ -52,11 +52,11 @@ def test_regex_replace_scalar(self, mix_ab): # regex -> value # obj frame res = dfobj.replace(r"\s*\.\s*", np.nan, regex=True) - assert_frame_equal(dfobj, res.fillna(".")) + tm.assert_frame_equal(dfobj, res.fillna(".")) # mixed res = dfmix.replace(r"\s*\.\s*", np.nan, regex=True) - assert_frame_equal(dfmix, res.fillna(".")) + tm.assert_frame_equal(dfmix, res.fillna(".")) # regex -> regex # obj frame @@ -64,22 +64,22 @@ def test_regex_replace_scalar(self, mix_ab): objc = obj.copy() objc["a"] = ["a", "b", "...", "..."] expec = DataFrame(objc) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) # with mixed res = dfmix.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True) mixc = mix_ab.copy() mixc["b"] = ["a", "b", "...", "..."] expec = DataFrame(mixc) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) # everything with compiled regexs as well res = dfobj.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True) - assert_frame_equal(dfobj, res.fillna(".")) + tm.assert_frame_equal(dfobj, res.fillna(".")) # mixed res = dfmix.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True) - assert_frame_equal(dfmix, res.fillna(".")) + tm.assert_frame_equal(dfmix, res.fillna(".")) # regex -> regex # obj frame @@ -87,26 +87,26 @@ def test_regex_replace_scalar(self, mix_ab): objc = obj.copy() objc["a"] = ["a", "b", "...", "..."] expec = DataFrame(objc) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) # with mixed res = dfmix.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1") mixc = mix_ab.copy() mixc["b"] = ["a", "b", "...", "..."] expec = DataFrame(mixc) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) res = dfmix.replace(regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1") mixc = mix_ab.copy() mixc["b"] = ["a", "b", "...", "..."] expec = DataFrame(mixc) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) res = dfmix.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1") mixc = mix_ab.copy() mixc["b"] = ["a", "b", "...", "..."] expec = DataFrame(mixc) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) def test_regex_replace_scalar_inplace(self, mix_ab): obj = {"a": list("ab.."), "b": list("efgh")} @@ -118,12 +118,12 @@ def test_regex_replace_scalar_inplace(self, mix_ab): # obj frame res = dfobj.copy() res.replace(r"\s*\.\s*", np.nan, regex=True, inplace=True) - assert_frame_equal(dfobj, res.fillna(".")) + tm.assert_frame_equal(dfobj, res.fillna(".")) # mixed res = dfmix.copy() res.replace(r"\s*\.\s*", np.nan, regex=True, inplace=True) - assert_frame_equal(dfmix, res.fillna(".")) + tm.assert_frame_equal(dfmix, res.fillna(".")) # regex -> regex # obj frame @@ -132,7 +132,7 @@ def test_regex_replace_scalar_inplace(self, mix_ab): objc = obj.copy() objc["a"] = ["a", "b", "...", "..."] expec = DataFrame(objc) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) # with mixed res = dfmix.copy() @@ -140,17 +140,17 @@ def test_regex_replace_scalar_inplace(self, mix_ab): mixc = mix_ab.copy() mixc["b"] = ["a", "b", "...", "..."] expec = DataFrame(mixc) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) # everything with compiled regexs as well res = dfobj.copy() res.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True, inplace=True) - assert_frame_equal(dfobj, res.fillna(".")) + tm.assert_frame_equal(dfobj, res.fillna(".")) # mixed res = dfmix.copy() res.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True, inplace=True) - assert_frame_equal(dfmix, res.fillna(".")) + tm.assert_frame_equal(dfmix, res.fillna(".")) # regex -> regex # obj frame @@ -159,7 +159,7 @@ def test_regex_replace_scalar_inplace(self, mix_ab): objc = obj.copy() objc["a"] = ["a", "b", "...", "..."] expec = DataFrame(objc) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) # with mixed res = dfmix.copy() @@ -167,16 +167,16 @@ def test_regex_replace_scalar_inplace(self, mix_ab): mixc = mix_ab.copy() mixc["b"] = ["a", "b", "...", "..."] expec = DataFrame(mixc) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) res = dfobj.copy() res.replace(regex=r"\s*\.\s*", value=np.nan, inplace=True) - assert_frame_equal(dfobj, res.fillna(".")) + tm.assert_frame_equal(dfobj, res.fillna(".")) # mixed res = dfmix.copy() res.replace(regex=r"\s*\.\s*", value=np.nan, inplace=True) - assert_frame_equal(dfmix, res.fillna(".")) + tm.assert_frame_equal(dfmix, res.fillna(".")) # regex -> regex # obj frame @@ -185,7 +185,7 @@ def test_regex_replace_scalar_inplace(self, mix_ab): objc = obj.copy() objc["a"] = ["a", "b", "...", "..."] expec = DataFrame(objc) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) # with mixed res = dfmix.copy() @@ -193,17 +193,17 @@ def test_regex_replace_scalar_inplace(self, mix_ab): mixc = mix_ab.copy() mixc["b"] = ["a", "b", "...", "..."] expec = DataFrame(mixc) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) # everything with compiled regexs as well res = dfobj.copy() res.replace(regex=re.compile(r"\s*\.\s*"), value=np.nan, inplace=True) - assert_frame_equal(dfobj, res.fillna(".")) + tm.assert_frame_equal(dfobj, res.fillna(".")) # mixed res = dfmix.copy() res.replace(regex=re.compile(r"\s*\.\s*"), value=np.nan, inplace=True) - assert_frame_equal(dfmix, res.fillna(".")) + tm.assert_frame_equal(dfmix, res.fillna(".")) # regex -> regex # obj frame @@ -212,7 +212,7 @@ def test_regex_replace_scalar_inplace(self, mix_ab): objc = obj.copy() objc["a"] = ["a", "b", "...", "..."] expec = DataFrame(objc) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) # with mixed res = dfmix.copy() @@ -220,7 +220,7 @@ def test_regex_replace_scalar_inplace(self, mix_ab): mixc = mix_ab.copy() mixc["b"] = ["a", "b", "...", "..."] expec = DataFrame(mixc) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) def test_regex_replace_list_obj(self): obj = {"a": list("ab.."), "b": list("efgh"), "c": list("helo")} @@ -238,7 +238,7 @@ def test_regex_replace_list_obj(self): "c": ["h", "crap", "l", "o"], } ) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) # list of [re1, re2, ..., reN] -> [re1, re2, .., reN] to_replace_res = [r"\s*(\.)\s*", r"(e|f|g)"] @@ -251,7 +251,7 @@ def test_regex_replace_list_obj(self): "c": ["h", "e_crap", "l", "o"], } ) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN # or vN)] @@ -265,7 +265,7 @@ def test_regex_replace_list_obj(self): "c": ["h", "crap", "l", "o"], } ) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) to_replace_res = [r"\s*(\.)\s*", r"e"] values = [r"\1\1", r"crap"] @@ -277,7 +277,7 @@ def test_regex_replace_list_obj(self): "c": ["h", "crap", "l", "o"], } ) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) def test_regex_replace_list_obj_inplace(self): # same as above with inplace=True @@ -298,7 +298,7 @@ def test_regex_replace_list_obj_inplace(self): "c": ["h", "crap", "l", "o"], } ) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) # list of [re1, re2, ..., reN] -> [re1, re2, .., reN] to_replace_res = [r"\s*(\.)\s*", r"(e|f|g)"] @@ -312,7 +312,7 @@ def test_regex_replace_list_obj_inplace(self): "c": ["h", "e_crap", "l", "o"], } ) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN # or vN)] @@ -327,7 +327,7 @@ def test_regex_replace_list_obj_inplace(self): "c": ["h", "crap", "l", "o"], } ) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) to_replace_res = [r"\s*(\.)\s*", r"e"] values = [r"\1\1", r"crap"] @@ -340,7 +340,7 @@ def test_regex_replace_list_obj_inplace(self): "c": ["h", "crap", "l", "o"], } ) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) def test_regex_replace_list_mixed(self, mix_ab): # mixed frame to make sure this doesn't break things @@ -360,14 +360,14 @@ def test_regex_replace_list_mixed(self, mix_ab): "c": ["h", "crap", "l", "o"], } ) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) # list of [re1, re2, ..., reN] -> [re1, re2, .., reN] to_replace_res = [r"\s*(\.)\s*", r"(a|b)"] values = [r"\1\1", r"\1_crap"] res = dfmix.replace(to_replace_res, values, regex=True) expec = DataFrame({"a": mix_ab["a"], "b": ["a_crap", "b_crap", "..", ".."]}) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN # or vN)] @@ -375,13 +375,13 @@ def test_regex_replace_list_mixed(self, mix_ab): values = [r"\1\1", r"crap", r"\1_crap"] res = dfmix.replace(to_replace_res, values, regex=True) expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b_crap", "..", ".."]}) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) to_replace_res = [r"\s*(\.)\s*", r"a", r"(b)"] values = [r"\1\1", r"crap", r"\1_crap"] res = dfmix.replace(regex=to_replace_res, value=values) expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b_crap", "..", ".."]}) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) def test_regex_replace_list_mixed_inplace(self, mix_ab): dfmix = DataFrame(mix_ab) @@ -393,7 +393,7 @@ def test_regex_replace_list_mixed_inplace(self, mix_ab): res = dfmix.copy() res.replace(to_replace_res, values, inplace=True, regex=True) expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b", np.nan, np.nan]}) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) # list of [re1, re2, ..., reN] -> [re1, re2, .., reN] to_replace_res = [r"\s*(\.)\s*", r"(a|b)"] @@ -401,7 +401,7 @@ def test_regex_replace_list_mixed_inplace(self, mix_ab): res = dfmix.copy() res.replace(to_replace_res, values, inplace=True, regex=True) expec = DataFrame({"a": mix_ab["a"], "b": ["a_crap", "b_crap", "..", ".."]}) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) # list of [re1, re2, ..., reN] -> [(re1 or v1), (re2 or v2), ..., (reN # or vN)] @@ -410,14 +410,14 @@ def test_regex_replace_list_mixed_inplace(self, mix_ab): res = dfmix.copy() res.replace(to_replace_res, values, inplace=True, regex=True) expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b_crap", "..", ".."]}) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) to_replace_res = [r"\s*(\.)\s*", r"a", r"(b)"] values = [r"\1\1", r"crap", r"\1_crap"] res = dfmix.copy() res.replace(regex=to_replace_res, value=values, inplace=True) expec = DataFrame({"a": mix_ab["a"], "b": ["crap", "b_crap", "..", ".."]}) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) def test_regex_replace_dict_mixed(self, mix_abc): dfmix = DataFrame(mix_abc) @@ -434,8 +434,8 @@ def test_regex_replace_dict_mixed(self, mix_abc): expec = DataFrame( {"a": mix_abc["a"], "b": ["a", "b", np.nan, np.nan], "c": mix_abc["c"]} ) - assert_frame_equal(res, expec) - assert_frame_equal(res2, expec) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) # list of dicts {re1: re11, re2: re12, ..., reN: re1N}, search the # whole frame @@ -445,8 +445,8 @@ def test_regex_replace_dict_mixed(self, mix_abc): expec = DataFrame( {"a": mix_abc["a"], "b": ["a", "b", ".ty", ".ty"], "c": mix_abc["c"]} ) - assert_frame_equal(res, expec) - assert_frame_equal(res2, expec) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) res = dfmix.replace(regex={"b": r"\s*(\.)\s*"}, value={"b": r"\1ty"}) res2 = dfmix.copy() @@ -454,8 +454,8 @@ def test_regex_replace_dict_mixed(self, mix_abc): expec = DataFrame( {"a": mix_abc["a"], "b": ["a", "b", ".ty", ".ty"], "c": mix_abc["c"]} ) - assert_frame_equal(res, expec) - assert_frame_equal(res2, expec) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) # scalar -> dict # to_replace regex, {value: value} @@ -465,8 +465,8 @@ def test_regex_replace_dict_mixed(self, mix_abc): res = dfmix.replace("a", {"b": np.nan}, regex=True) res2 = dfmix.copy() res2.replace("a", {"b": np.nan}, regex=True, inplace=True) - assert_frame_equal(res, expec) - assert_frame_equal(res2, expec) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) res = dfmix.replace("a", {"b": np.nan}, regex=True) res2 = dfmix.copy() @@ -474,8 +474,8 @@ def test_regex_replace_dict_mixed(self, mix_abc): expec = DataFrame( {"a": mix_abc["a"], "b": [np.nan, "b", ".", "."], "c": mix_abc["c"]} ) - assert_frame_equal(res, expec) - assert_frame_equal(res2, expec) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) def test_regex_replace_dict_nested(self, mix_abc): # nested dicts will not work until this is implemented for Series @@ -489,23 +489,23 @@ def test_regex_replace_dict_nested(self, mix_abc): expec = DataFrame( {"a": mix_abc["a"], "b": ["a", "b", np.nan, np.nan], "c": mix_abc["c"]} ) - assert_frame_equal(res, expec) - assert_frame_equal(res2, expec) - assert_frame_equal(res3, expec) - assert_frame_equal(res4, expec) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + tm.assert_frame_equal(res3, expec) + tm.assert_frame_equal(res4, expec) def test_regex_replace_dict_nested_non_first_character(self): # GH 25259 df = pd.DataFrame({"first": ["abc", "bca", "cab"]}) expected = pd.DataFrame({"first": [".bc", "bc.", "c.b"]}) result = df.replace({"a": "."}, regex=True) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_regex_replace_dict_nested_gh4115(self): df = pd.DataFrame({"Type": ["Q", "T", "Q", "Q", "T"], "tmp": 2}) expected = DataFrame({"Type": [0, 1, 0, 0, 1], "tmp": 2}) result = df.replace({"Type": {"Q": 0, "T": 1}}) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_regex_replace_list_to_scalar(self, mix_abc): df = DataFrame(mix_abc) @@ -521,9 +521,9 @@ def test_regex_replace_list_to_scalar(self, mix_abc): res3 = df.copy() res2.replace([r"\s*\.\s*", "a|b"], np.nan, regex=True, inplace=True) res3.replace(regex=[r"\s*\.\s*", "a|b"], value=np.nan, inplace=True) - assert_frame_equal(res, expec) - assert_frame_equal(res2, expec) - assert_frame_equal(res3, expec) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + tm.assert_frame_equal(res3, expec) def test_regex_replace_str_to_numeric(self, mix_abc): # what happens when you try to replace a numeric value with a regex? @@ -534,9 +534,9 @@ def test_regex_replace_str_to_numeric(self, mix_abc): res3 = df.copy() res3.replace(regex=r"\s*\.\s*", value=0, inplace=True) expec = DataFrame({"a": mix_abc["a"], "b": ["a", "b", 0, 0], "c": mix_abc["c"]}) - assert_frame_equal(res, expec) - assert_frame_equal(res2, expec) - assert_frame_equal(res3, expec) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + tm.assert_frame_equal(res3, expec) def test_regex_replace_regex_list_to_numeric(self, mix_abc): df = DataFrame(mix_abc) @@ -548,9 +548,9 @@ def test_regex_replace_regex_list_to_numeric(self, mix_abc): expec = DataFrame( {"a": mix_abc["a"], "b": ["a", 0, 0, 0], "c": ["a", 0, np.nan, "d"]} ) - assert_frame_equal(res, expec) - assert_frame_equal(res2, expec) - assert_frame_equal(res3, expec) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + tm.assert_frame_equal(res3, expec) def test_regex_replace_series_of_regexes(self, mix_abc): df = DataFrame(mix_abc) @@ -564,15 +564,15 @@ def test_regex_replace_series_of_regexes(self, mix_abc): expec = DataFrame( {"a": mix_abc["a"], "b": ["a", "b", np.nan, np.nan], "c": mix_abc["c"]} ) - assert_frame_equal(res, expec) - assert_frame_equal(res2, expec) - assert_frame_equal(res3, expec) + tm.assert_frame_equal(res, expec) + tm.assert_frame_equal(res2, expec) + tm.assert_frame_equal(res3, expec) def test_regex_replace_numeric_to_object_conversion(self, mix_abc): df = DataFrame(mix_abc) expec = DataFrame({"a": ["a", 1, 2, 3], "b": mix_abc["b"], "c": mix_abc["c"]}) res = df.replace(0, "a") - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) assert res.a.dtype == np.object_ @pytest.mark.parametrize("metachar", ["[]", "()", r"\d", r"\w", r"\s"]) @@ -580,15 +580,15 @@ def test_replace_regex_metachar(self, metachar): df = DataFrame({"a": [metachar, "else"]}) result = df.replace({"a": {metachar: "paren"}}) expected = DataFrame({"a": ["paren", "else"]}) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_replace(self, datetime_frame): datetime_frame["A"][:5] = np.nan datetime_frame["A"][-5:] = np.nan zero_filled = datetime_frame.replace(np.nan, -1e8) - assert_frame_equal(zero_filled, datetime_frame.fillna(-1e8)) - assert_frame_equal(zero_filled.replace(-1e8, np.nan), datetime_frame) + tm.assert_frame_equal(zero_filled, datetime_frame.fillna(-1e8)) + tm.assert_frame_equal(zero_filled.replace(-1e8, np.nan), datetime_frame) datetime_frame["A"][:5] = np.nan datetime_frame["A"][-5:] = np.nan @@ -596,7 +596,7 @@ def test_replace(self, datetime_frame): # empty df = DataFrame(index=["a", "b"]) - assert_frame_equal(df, df.replace(5, 7)) + tm.assert_frame_equal(df, df.replace(5, 7)) # GH 11698 # test for mixed data types. @@ -607,7 +607,7 @@ def test_replace(self, datetime_frame): expected_df = pd.DataFrame( [(np.nan, pd.to_datetime("20150101")), ("a", pd.to_datetime("20150102"))] ) - assert_frame_equal(df1, expected_df) + tm.assert_frame_equal(df1, expected_df) def test_replace_list(self): obj = {"a": list("ab.."), "b": list("efgh"), "c": list("helo")} @@ -625,7 +625,7 @@ def test_replace_list(self): "c": ["h", "crap", "l", "o"], } ) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) # list of [v1, v2, ..., vN] -> [v1, v2, .., vN] to_replace_res = [r".", r"f"] @@ -638,7 +638,7 @@ def test_replace_list(self): "c": ["h", "e", "l", "o"], } ) - assert_frame_equal(res, expec) + tm.assert_frame_equal(res, expec) def test_replace_with_empty_list(self): # GH 21977 @@ -646,7 +646,7 @@ def test_replace_with_empty_list(self): df = pd.DataFrame({"col": s}) expected = df result = df.replace([], np.nan) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # GH 19266 with pytest.raises(ValueError, match="cannot assign mismatch"): @@ -659,20 +659,20 @@ def test_replace_series_dict(self): df = DataFrame({"zero": {"a": 0.0, "b": 1}, "one": {"a": 2.0, "b": 0}}) result = df.replace(0, {"zero": 0.5, "one": 1.0}) expected = DataFrame({"zero": {"a": 0.5, "b": 1}, "one": {"a": 2.0, "b": 1.0}}) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.replace(0, df.mean()) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # series to series/dict df = DataFrame({"zero": {"a": 0.0, "b": 1}, "one": {"a": 2.0, "b": 0}}) s = Series({"zero": 0.0, "one": 2.0}) result = df.replace(s, {"zero": 0.5, "one": 1.0}) expected = DataFrame({"zero": {"a": 0.5, "b": 1}, "one": {"a": 1.0, "b": 0.0}}) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.replace(s, df.mean()) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_replace_convert(self): # gh 3907 @@ -681,7 +681,7 @@ def test_replace_convert(self): rep = df.replace(m) expec = Series([np.int64] * 3) res = rep.dtypes - assert_series_equal(expec, res) + tm.assert_series_equal(expec, res) def test_replace_mixed(self, float_string_frame): mf = float_string_frame @@ -690,13 +690,13 @@ def test_replace_mixed(self, float_string_frame): result = float_string_frame.replace(np.nan, -18) expected = float_string_frame.fillna(value=-18) - assert_frame_equal(result, expected) - assert_frame_equal(result.replace(-18, np.nan), float_string_frame) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result.replace(-18, np.nan), float_string_frame) result = float_string_frame.replace(np.nan, -1e8) expected = float_string_frame.fillna(value=-1e8) - assert_frame_equal(result, expected) - assert_frame_equal(result.replace(-1e8, np.nan), float_string_frame) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result.replace(-1e8, np.nan), float_string_frame) # int block upcasting df = DataFrame( @@ -712,10 +712,10 @@ def test_replace_mixed(self, float_string_frame): } ) result = df.replace(0, 0.5) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) df.replace(0, 0.5, inplace=True) - assert_frame_equal(df, expected) + tm.assert_frame_equal(df, expected) # int block splitting df = DataFrame( @@ -733,7 +733,7 @@ def test_replace_mixed(self, float_string_frame): } ) result = df.replace(0, 0.5) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # to object block upcasting df = DataFrame( @@ -749,7 +749,7 @@ def test_replace_mixed(self, float_string_frame): } ) result = df.replace(2, "foo") - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) expected = DataFrame( { @@ -758,7 +758,7 @@ def test_replace_mixed(self, float_string_frame): } ) result = df.replace([1, 2], ["foo", "bar"]) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # test case from df = DataFrame( @@ -769,28 +769,28 @@ def test_replace_mixed(self, float_string_frame): m = df.mean() expected.iloc[0, 0] = m[0] expected.iloc[1, 1] = m[1] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_replace_simple_nested_dict(self): df = DataFrame({"col": range(1, 5)}) expected = DataFrame({"col": ["a", 2, 3, "b"]}) result = df.replace({"col": {1: "a", 4: "b"}}) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) # in this case, should be the same as the not nested version result = df.replace({1: "a", 4: "b"}) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) def test_replace_simple_nested_dict_with_nonexistent_value(self): df = DataFrame({"col": range(1, 5)}) expected = DataFrame({"col": ["a", 2, 3, "b"]}) result = df.replace({-1: "-", 1: "a", 4: "b"}) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) result = df.replace({"col": {-1: "-", 1: "a", 4: "b"}}) - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) def test_replace_value_is_none(self, datetime_frame): orig_value = datetime_frame.iloc[0, 0] @@ -801,14 +801,14 @@ def test_replace_value_is_none(self, datetime_frame): result = datetime_frame.replace(to_replace={np.nan: 0}) expected = datetime_frame.T.replace(to_replace={np.nan: 0}).T - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = datetime_frame.replace(to_replace={np.nan: 0, 1: -1e8}) tsframe = datetime_frame.copy() tsframe.iloc[0, 0] = 0 tsframe.iloc[1, 0] = -1e8 expected = tsframe - assert_frame_equal(expected, result) + tm.assert_frame_equal(expected, result) datetime_frame.iloc[0, 0] = orig_value datetime_frame.iloc[1, 0] = orig2 @@ -820,8 +820,8 @@ def test_replace_for_new_dtypes(self, datetime_frame): tsframe["A"][-5:] = np.nan zero_filled = tsframe.replace(np.nan, -1e8) - assert_frame_equal(zero_filled, tsframe.fillna(-1e8)) - assert_frame_equal(zero_filled.replace(-1e8, np.nan), tsframe) + tm.assert_frame_equal(zero_filled, tsframe.fillna(-1e8)) + tm.assert_frame_equal(zero_filled.replace(-1e8, np.nan), tsframe) tsframe["A"][:5] = np.nan tsframe["A"][-5:] = np.nan @@ -831,7 +831,7 @@ def test_replace_for_new_dtypes(self, datetime_frame): b[b == -1e8] = np.nan tsframe["B"] = b result = tsframe.fillna(method="bfill") - assert_frame_equal(result, tsframe.fillna(method="bfill")) + tm.assert_frame_equal(result, tsframe.fillna(method="bfill")) @pytest.mark.parametrize( "frame, to_replace, value, expected", @@ -908,7 +908,7 @@ def test_replace_for_new_dtypes(self, datetime_frame): ) def test_replace_dtypes(self, frame, to_replace, value, expected): result = getattr(frame, "replace")(to_replace, value) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_replace_input_formats_listlike(self): # both dicts @@ -919,13 +919,13 @@ def test_replace_input_formats_listlike(self): ) filled = df.replace(to_rep, values) expected = {k: v.replace(to_rep[k], values[k]) for k, v in df.items()} - assert_frame_equal(filled, DataFrame(expected)) + tm.assert_frame_equal(filled, DataFrame(expected)) result = df.replace([0, 2, 5], [5, 2, 0]) expected = DataFrame( {"A": [np.nan, 5, np.inf], "B": [5, 2, 0], "C": ["", "asdf", "fd"]} ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # scalar to dict values = {"A": 0, "B": -1, "C": "missing"} @@ -934,7 +934,7 @@ def test_replace_input_formats_listlike(self): ) filled = df.replace(np.nan, values) expected = {k: v.replace(np.nan, values[k]) for k, v in df.items()} - assert_frame_equal(filled, DataFrame(expected)) + tm.assert_frame_equal(filled, DataFrame(expected)) # list to list to_rep = [np.nan, 0, ""] @@ -943,7 +943,7 @@ def test_replace_input_formats_listlike(self): expected = df.copy() for i in range(len(to_rep)): expected.replace(to_rep[i], values[i], inplace=True) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) msg = r"Replacement lists must match in length\. Expecting 3 got 2" with pytest.raises(ValueError, match=msg): @@ -958,7 +958,7 @@ def test_replace_input_formats_scalar(self): to_rep = {"A": np.nan, "B": 0, "C": ""} filled = df.replace(to_rep, 0) expected = {k: v.replace(to_rep[k], 0) for k, v in df.items()} - assert_frame_equal(filled, DataFrame(expected)) + tm.assert_frame_equal(filled, DataFrame(expected)) msg = "value argument must be scalar, dict, or Series" with pytest.raises(TypeError, match=msg): @@ -970,7 +970,7 @@ def test_replace_input_formats_scalar(self): expected = df.copy() for i in range(len(to_rep)): expected.replace(to_rep[i], -1, inplace=True) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_replace_limit(self): pass @@ -994,7 +994,7 @@ def test_replace_dict_no_regex(self): } expected = Series({0: 5, 1: 4, 2: 3, 3: 2, 4: 1}) result = answer.replace(weights) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_replace_series_no_regex(self): answer = Series( @@ -1017,7 +1017,7 @@ def test_replace_series_no_regex(self): ) expected = Series({0: 5, 1: 4, 2: 3, 3: 2, 4: 1}) result = answer.replace(weights) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_replace_dict_tuple_list_ordering_remains_the_same(self): df = DataFrame(dict(A=[np.nan, 1])) @@ -1026,9 +1026,9 @@ def test_replace_dict_tuple_list_ordering_remains_the_same(self): res3 = df.replace(to_replace=[1, np.nan], value=[-1e8, 0]) expected = DataFrame({"A": [0, -1e8]}) - assert_frame_equal(res1, res2) - assert_frame_equal(res2, res3) - assert_frame_equal(res3, expected) + tm.assert_frame_equal(res1, res2) + tm.assert_frame_equal(res2, res3) + tm.assert_frame_equal(res3, expected) def test_replace_doesnt_replace_without_regex(self): raw = """fol T_opp T_Dir T_Enh @@ -1038,24 +1038,24 @@ def test_replace_doesnt_replace_without_regex(self): 3 3 0 bt 0""" df = pd.read_csv(StringIO(raw), sep=r"\s+") res = df.replace({r"\D": 1}) - assert_frame_equal(df, res) + tm.assert_frame_equal(df, res) def test_replace_bool_with_string(self): df = DataFrame({"a": [True, False], "b": list("ab")}) result = df.replace(True, "a") expected = DataFrame({"a": ["a", False], "b": df.b}) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_replace_pure_bool_with_string_no_op(self): df = DataFrame(np.random.rand(2, 2) > 0.5) result = df.replace("asdf", "fdsa") - assert_frame_equal(df, result) + tm.assert_frame_equal(df, result) def test_replace_bool_with_bool(self): df = DataFrame(np.random.rand(2, 2) > 0.5) result = df.replace(False, True) expected = DataFrame(np.ones((2, 2), dtype=bool)) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_replace_with_dict_with_bool_keys(self): df = DataFrame({0: [True, False], 1: [False, True]}) @@ -1066,7 +1066,7 @@ def test_replace_truthy(self): df = DataFrame({"a": [True, True]}) r = df.replace([np.inf, -np.inf], np.nan) e = df - assert_frame_equal(r, e) + tm.assert_frame_equal(r, e) def test_nested_dict_overlapping_keys_replace_int(self): # GH 27660 keep behaviour consistent for simple dictionary and @@ -1075,7 +1075,7 @@ def test_nested_dict_overlapping_keys_replace_int(self): result = df.replace({"a": dict(zip(range(1, 5), range(2, 6)))}) expected = df.replace(dict(zip(range(1, 5), range(2, 6)))) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_nested_dict_overlapping_keys_replace_str(self): # GH 27660 @@ -1085,18 +1085,18 @@ def test_nested_dict_overlapping_keys_replace_str(self): df = DataFrame({"a": astr}) result = df.replace(dict(zip(astr, bstr))) expected = df.replace({"a": dict(zip(astr, bstr))}) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_replace_swapping_bug(self): df = pd.DataFrame({"a": [True, False, True]}) res = df.replace({"a": {True: "Y", False: "N"}}) expect = pd.DataFrame({"a": ["Y", "N", "Y"]}) - assert_frame_equal(res, expect) + tm.assert_frame_equal(res, expect) df = pd.DataFrame({"a": [0, 1, 0]}) res = df.replace({"a": {0: "Y", 1: "N"}}) expect = pd.DataFrame({"a": ["Y", "N", "Y"]}) - assert_frame_equal(res, expect) + tm.assert_frame_equal(res, expect) def test_replace_period(self): d = { @@ -1132,7 +1132,7 @@ def test_replace_period(self): {"fname": [d["fname"][k] for k in df.fname.values]}, dtype=object ) result = df.replace(d) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_replace_datetime(self): d = { @@ -1162,7 +1162,7 @@ def test_replace_datetime(self): assert set(df.fname.values) == set(d["fname"].keys()) expected = DataFrame({"fname": [d["fname"][k] for k in df.fname.values]}) result = df.replace(d) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_replace_datetimetz(self): @@ -1181,10 +1181,10 @@ def test_replace_datetimetz(self): "B": Series([0, 1, 2], dtype="float64"), } ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.fillna(1) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.replace(0, np.nan) expected = DataFrame( @@ -1193,7 +1193,7 @@ def test_replace_datetimetz(self): "B": [np.nan, np.nan, 2], } ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.replace( Timestamp("20130102", tz="US/Eastern"), @@ -1209,12 +1209,12 @@ def test_replace_datetimetz(self): "B": [0, np.nan, 2], } ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.copy() result.iloc[1, 0] = np.nan result = result.replace({"A": pd.NaT}, Timestamp("20130104", tz="US/Eastern")) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # coerce to object result = df.copy() @@ -1230,7 +1230,7 @@ def test_replace_datetimetz(self): "B": [0, np.nan, 2], } ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = df.copy() result.iloc[1, 0] = np.nan @@ -1245,16 +1245,16 @@ def test_replace_datetimetz(self): "B": [0, np.nan, 2], } ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_replace_with_empty_dictlike(self, mix_abc): # GH 15289 df = DataFrame(mix_abc) - assert_frame_equal(df, df.replace({})) - assert_frame_equal(df, df.replace(Series([]))) + tm.assert_frame_equal(df, df.replace({})) + tm.assert_frame_equal(df, df.replace(Series([]))) - assert_frame_equal(df, df.replace({"b": {}})) - assert_frame_equal(df, df.replace(Series({"b": {}}))) + tm.assert_frame_equal(df, df.replace({"b": {}})) + tm.assert_frame_equal(df, df.replace(Series({"b": {}}))) @pytest.mark.parametrize( "to_replace, method, expected", @@ -1294,4 +1294,4 @@ def test_replace_method(self, to_replace, method, expected): result = df.replace(to_replace=to_replace, value=None, method=method) expected = DataFrame(expected) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 5ce811712b9891..5d2c115ce8eb53 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -7,7 +7,6 @@ import pandas as pd from pandas import DataFrame, Index, MultiIndex, Period, Series, Timedelta, date_range import pandas.util.testing as tm -from pandas.util.testing import assert_frame_equal, assert_series_equal class TestDataFrameReshape: @@ -82,7 +81,7 @@ def test_pivot_index_none(self): ) expected.index.name, expected.columns.name = "index", "columns" - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # omit values result = frame.pivot(columns="columns") @@ -110,13 +109,13 @@ def test_stack_unstack(self, float_frame): unstacked = stacked.unstack() unstacked_df = stacked_df.unstack() - assert_frame_equal(unstacked, df) - assert_frame_equal(unstacked_df["bar"], df) + tm.assert_frame_equal(unstacked, df) + tm.assert_frame_equal(unstacked_df["bar"], df) unstacked_cols = stacked.unstack(0) unstacked_cols_df = stacked_df.unstack(0) - assert_frame_equal(unstacked_cols.T, df) - assert_frame_equal(unstacked_cols_df["bar"].T, df) + tm.assert_frame_equal(unstacked_cols.T, df) + tm.assert_frame_equal(unstacked_cols_df["bar"].T, df) def test_stack_mixed_level(self): # GH 18310 @@ -126,7 +125,7 @@ def test_stack_mixed_level(self): df = DataFrame(1, index=levels[0], columns=levels[1]) result = df.stack() expected = Series(1, index=MultiIndex.from_product(levels[:2])) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # MultiIndex columns: df = DataFrame(1, index=levels[0], columns=MultiIndex.from_product(levels[1:])) @@ -134,12 +133,12 @@ def test_stack_mixed_level(self): expected = DataFrame( 1, index=MultiIndex.from_product([levels[0], levels[2]]), columns=levels[1] ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # as above, but used labels in level are actually of homogeneous type result = df[["a", "b"]].stack(1) expected = expected[["a", "b"]] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_unstack_fill(self): @@ -156,14 +155,14 @@ def test_unstack_fill(self): expected = DataFrame( {"a": [1, -1, 5], "b": [2, 4, -1]}, index=["x", "y", "z"], dtype=np.int16 ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # From a series with incorrect data type for fill_value result = data.unstack(fill_value=0.5) expected = DataFrame( {"a": [1, 0.5, 5], "b": [2, 4, 0.5]}, index=["x", "y", "z"], dtype=np.float ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # GH #13971: fill_value when unstacking multiple levels: df = DataFrame( @@ -173,20 +172,20 @@ def test_unstack_fill(self): key = ("w", "b", "j") expected = unstacked[key] result = pd.Series([0, 0, 2], index=unstacked.index, name=key) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) stacked = unstacked.stack(["x", "y"]) stacked.index = stacked.index.reorder_levels(df.index.names) # Workaround for GH #17886 (unnecessarily casts to float): stacked = stacked.astype(np.int64) result = stacked.loc[df.index] - assert_frame_equal(result, df) + tm.assert_frame_equal(result, df) # From a series s = df["w"] result = s.unstack(["x", "y"], fill_value=0) expected = unstacked["w"] - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_unstack_fill_frame(self): @@ -204,7 +203,7 @@ def test_unstack_fill_frame(self): expected.columns = MultiIndex.from_tuples( [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")] ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # From a mixed type dataframe df["A"] = df["A"].astype(np.int16) @@ -213,7 +212,7 @@ def test_unstack_fill_frame(self): result = df.unstack(fill_value=-1) expected["A"] = expected["A"].astype(np.int16) expected["B"] = expected["B"].astype(np.float64) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # From a dataframe with incorrect data type for fill_value result = df.unstack(fill_value=0.5) @@ -223,7 +222,7 @@ def test_unstack_fill_frame(self): expected.columns = MultiIndex.from_tuples( [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")] ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_unstack_fill_frame_datetime(self): @@ -239,14 +238,14 @@ def test_unstack_fill_frame_datetime(self): {"a": [dv[0], pd.NaT, dv[3]], "b": [dv[1], dv[2], pd.NaT]}, index=["x", "y", "z"], ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = data.unstack(fill_value=dv[0]) expected = DataFrame( {"a": [dv[0], dv[0], dv[3]], "b": [dv[1], dv[2], dv[0]]}, index=["x", "y", "z"], ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_unstack_fill_frame_timedelta(self): @@ -262,14 +261,14 @@ def test_unstack_fill_frame_timedelta(self): {"a": [td[0], pd.NaT, td[3]], "b": [td[1], td[2], pd.NaT]}, index=["x", "y", "z"], ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = data.unstack(fill_value=td[1]) expected = DataFrame( {"a": [td[0], td[1], td[3]], "b": [td[1], td[2], td[1]]}, index=["x", "y", "z"], ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_unstack_fill_frame_period(self): @@ -290,7 +289,7 @@ def test_unstack_fill_frame_period(self): {"a": [periods[0], None, periods[3]], "b": [periods[1], periods[2], None]}, index=["x", "y", "z"], ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) result = data.unstack(fill_value=periods[1]) expected = DataFrame( @@ -300,7 +299,7 @@ def test_unstack_fill_frame_period(self): }, index=["x", "y", "z"], ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_unstack_fill_frame_categorical(self): @@ -319,7 +318,7 @@ def test_unstack_fill_frame_categorical(self): }, index=list("xyz"), ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # Fill with non-category results in a TypeError msg = r"'fill_value' \('d'\) is not in" @@ -335,7 +334,7 @@ def test_unstack_fill_frame_categorical(self): }, index=list("xyz"), ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_unstack_preserve_dtypes(self): # Checks fix for #11847 @@ -358,7 +357,7 @@ def test_unstack_preserve_dtypes(self): def unstack_and_compare(df, column_name): unstacked1 = df.unstack([column_name]) unstacked2 = df.unstack(column_name) - assert_frame_equal(unstacked1, unstacked2) + tm.assert_frame_equal(unstacked1, unstacked2) df1 = df.set_index(["state", "index"]) unstack_and_compare(df1, "index") @@ -383,13 +382,15 @@ def test_stack_ints(self): columns = MultiIndex.from_tuples(list(itertools.product(range(3), repeat=3))) df = DataFrame(np.random.randn(30, 27), columns=columns) - assert_frame_equal(df.stack(level=[1, 2]), df.stack(level=1).stack(level=1)) - assert_frame_equal(df.stack(level=[-2, -1]), df.stack(level=1).stack(level=1)) + tm.assert_frame_equal(df.stack(level=[1, 2]), df.stack(level=1).stack(level=1)) + tm.assert_frame_equal( + df.stack(level=[-2, -1]), df.stack(level=1).stack(level=1) + ) df_named = df.copy() df_named.columns.set_names(range(3), inplace=True) - assert_frame_equal( + tm.assert_frame_equal( df_named.stack(level=[1, 2]), df_named.stack(level=1).stack(level=1) ) @@ -413,10 +414,10 @@ def test_stack_mixed_levels(self): # the level numbers df2 = df.copy() df2.columns.names = ["exp", "animal", 1] - assert_frame_equal( + tm.assert_frame_equal( df2.stack(level=["animal", 1]), animal_hair_stacked, check_names=False ) - assert_frame_equal( + tm.assert_frame_equal( df2.stack(level=["exp", 1]), exp_hair_stacked, check_names=False ) @@ -433,7 +434,7 @@ def test_stack_mixed_levels(self): # strange error about lexsort depth df3 = df.copy() df3.columns.names = ["exp", "animal", 0] - assert_frame_equal( + tm.assert_frame_equal( df3.stack(level=["animal", 0]), animal_hair_stacked, check_names=False ) @@ -455,24 +456,28 @@ def test_stack_int_level_names(self): df2 = df.copy() df2.columns.names = [0, 1, 2] - assert_frame_equal( + tm.assert_frame_equal( df2.stack(level=[1, 2]), animal_hair_stacked, check_names=False ) - assert_frame_equal( + tm.assert_frame_equal( df2.stack(level=[0, 1]), exp_animal_stacked, check_names=False ) - assert_frame_equal(df2.stack(level=[0, 2]), exp_hair_stacked, check_names=False) + tm.assert_frame_equal( + df2.stack(level=[0, 2]), exp_hair_stacked, check_names=False + ) # Out-of-order int column names df3 = df.copy() df3.columns.names = [2, 0, 1] - assert_frame_equal( + tm.assert_frame_equal( df3.stack(level=[0, 1]), animal_hair_stacked, check_names=False ) - assert_frame_equal( + tm.assert_frame_equal( df3.stack(level=[2, 0]), exp_animal_stacked, check_names=False ) - assert_frame_equal(df3.stack(level=[2, 1]), exp_hair_stacked, check_names=False) + tm.assert_frame_equal( + df3.stack(level=[2, 1]), exp_hair_stacked, check_names=False + ) def test_unstack_bool(self): df = DataFrame( @@ -486,7 +491,7 @@ def test_unstack_bool(self): index=["a", "b"], columns=MultiIndex.from_arrays([["col", "col"], ["c", "l"]]), ) - assert_frame_equal(rs, xp) + tm.assert_frame_equal(rs, xp) def test_unstack_level_binding(self): # GH9856 @@ -512,7 +517,7 @@ def test_unstack_level_binding(self): columns=pd.Index(["a", "b"], name="third"), ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_unstack_to_series(self, float_frame): # check reversibility @@ -520,7 +525,7 @@ def test_unstack_to_series(self, float_frame): assert isinstance(data, Series) undo = data.unstack().T - assert_frame_equal(undo, float_frame) + tm.assert_frame_equal(undo, float_frame) # check NA handling data = DataFrame({"x": [1, 2, np.NaN], "y": [3.0, 4, np.NaN]}) @@ -533,13 +538,13 @@ def test_unstack_to_series(self, float_frame): ) expected = Series([1, 2, np.NaN, 3, 4, np.NaN], index=midx) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # check composability of unstack old_data = data.copy() for _ in range(4): data = data.unstack() - assert_frame_equal(old_data, data) + tm.assert_frame_equal(old_data, data) def test_unstack_dtypes(self): @@ -549,7 +554,7 @@ def test_unstack_dtypes(self): df = DataFrame(rows, columns=list("ABCD")) result = df.dtypes expected = Series([np.dtype("int64")] * 4, index=list("ABCD")) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # single dtype df2 = df.set_index(["A", "B"]) @@ -561,7 +566,7 @@ def test_unstack_dtypes(self): [["C", "C", "D", "D"], [1, 2, 1, 2]], names=(None, "B") ), ) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # mixed df2 = df.set_index(["A", "B"]) @@ -574,7 +579,7 @@ def test_unstack_dtypes(self): [["C", "C", "D", "D"], [1, 2, 1, 2]], names=(None, "B") ), ) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) df2["D"] = "foo" df3 = df2.unstack("B") result = df3.dtypes @@ -584,7 +589,7 @@ def test_unstack_dtypes(self): [["C", "C", "D", "D"], [1, 2, 1, 2]], names=(None, "B") ), ) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) # GH7405 for c, d in ( @@ -707,7 +712,7 @@ def verify(df): left = df.set_index(["jim", "joe"]).unstack()["jolie"] right = df.set_index(["joe", "jim"]).unstack()["jolie"].T - assert_frame_equal(left, right) + tm.assert_frame_equal(left, right) for idx in itertools.permutations(df.columns[:2]): mi = df.set_index(list(idx)) @@ -785,7 +790,7 @@ def verify(df): ) right = DataFrame(vals, columns=cols, index=idx) - assert_frame_equal(left, right) + tm.assert_frame_equal(left, right) df = DataFrame({"A": list("aaaabbbb"), "B": list(range(4)) * 2, "C": range(8)}) df.iloc[2, 1] = np.NaN @@ -797,7 +802,7 @@ def verify(df): ) idx = Index([np.nan, 0, 1, 2, 3], name="B") right = DataFrame(vals, columns=cols, index=idx) - assert_frame_equal(left, right) + tm.assert_frame_equal(left, right) df = pd.DataFrame( {"A": list("aaaabbbb"), "B": list(range(4)) * 2, "C": range(8)} @@ -811,7 +816,7 @@ def verify(df): ) idx = Index([np.nan, 0, 1, 2, 3], name="B") right = DataFrame(vals, columns=cols, index=idx) - assert_frame_equal(left, right) + tm.assert_frame_equal(left, right) # GH7401 df = pd.DataFrame( @@ -834,7 +839,7 @@ def verify(df): ) right = DataFrame(vals, columns=cols, index=idx) - assert_frame_equal(left, right) + tm.assert_frame_equal(left, right) # GH4862 vals = [ @@ -871,10 +876,10 @@ def verify(df): ) right = DataFrame(vals, columns=cols, index=idx) - assert_frame_equal(left, right) + tm.assert_frame_equal(left, right) left = df.loc[17264:].copy().set_index(["s_id", "dosage", "agent"]) - assert_frame_equal(left.unstack(), right) + tm.assert_frame_equal(left.unstack(), right) # GH9497 - multiple unstack with nulls df = DataFrame( @@ -907,7 +912,7 @@ def test_stack_datetime_column_multiIndex(self): eidx = MultiIndex.from_product([(0, 1, 2, 3), ("B",)]) ecols = MultiIndex.from_tuples([(t, "A")]) expected = DataFrame([1, 2, 3, 4], index=eidx, columns=ecols) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_stack_partial_multiIndex(self): # GH 8844 @@ -925,18 +930,18 @@ def _test_stack_with_multiindex(multiindex): # as df.stack(level=level, dropna=True). expected = df.stack(level=level, dropna=True) if isinstance(expected, Series): - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) else: - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) df.columns = MultiIndex.from_tuples( df.columns.to_numpy(), names=df.columns.names ) expected = df.stack(level=level, dropna=False) if isinstance(expected, Series): - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) else: - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) full_multiindex = MultiIndex.from_tuples( [("B", "x"), ("B", "z"), ("A", "y"), ("C", "x"), ("C", "u")], @@ -973,7 +978,7 @@ def _test_stack_with_multiindex(multiindex): columns=Index(["B", "C"], name="Upper"), dtype=df.dtypes[0], ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("ordered", [False, True]) @pytest.mark.parametrize("labels", [list("yxz"), list("yxy")]) @@ -1075,14 +1080,14 @@ def test_unstack_fill_frame_object(): expected = pd.DataFrame( {"a": ["a", np.nan, "a"], "b": ["b", "c", np.nan]}, index=list("xyz") ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) # Fill with any value replaces missing values as expected result = data.unstack(fill_value="d") expected = pd.DataFrame( {"a": ["a", "d", "a"], "b": ["b", "c", "d"]}, index=list("xyz") ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_unstack_timezone_aware_values(): @@ -1106,7 +1111,7 @@ def test_unstack_timezone_aware_values(): names=[None, "b"], ), ) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_stack_timezone_aware_values(): @@ -1122,4 +1127,4 @@ def test_stack_timezone_aware_values(): levels=[["a", "b", "c"], ["A"]], codes=[[0, 1, 2], [0, 0, 0]] ), ) - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected)