From 3e5002f235610fc551c447b949ec6e9bbfa9ba62 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin <37011898+mzeitlin11@users.noreply.github.com> Date: Tue, 18 May 2021 23:01:15 -0400 Subject: [PATCH] TST/CLN: parameterize/dedup replace test2 (#41501) --- pandas/tests/frame/methods/test_replace.py | 239 +++------------------ 1 file changed, 35 insertions(+), 204 deletions(-) diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 645de6f193750d..d2f02be43daceb 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1430,213 +1430,44 @@ def test_replace_bytes(self, frame_or_series): class TestDataFrameReplaceRegex: - def test_regex_replace_scalar(self, mix_ab): - obj = {"a": list("ab.."), "b": list("efgh")} - dfobj = DataFrame(obj) - dfmix = DataFrame(mix_ab) - - # simplest cases - # regex -> value - # obj frame - res = dfobj.replace(r"\s*\.\s*", np.nan, regex=True) - tm.assert_frame_equal(dfobj, res.fillna(".")) - - # mixed - res = dfmix.replace(r"\s*\.\s*", np.nan, regex=True) - tm.assert_frame_equal(dfmix, res.fillna(".")) - - # regex -> regex - # obj frame - res = dfobj.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True) - objc = obj.copy() - objc["a"] = ["a", "b", "...", "..."] - expec = DataFrame(objc) - tm.assert_frame_equal(res, expec) - - # with mixed - res = dfmix.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True) - mixc = mix_ab.copy() - mixc["b"] = ["a", "b", "...", "..."] - expec = DataFrame(mixc) - tm.assert_frame_equal(res, expec) - - # everything with compiled regexs as well - res = dfobj.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True) - tm.assert_frame_equal(dfobj, res.fillna(".")) - - # mixed - res = dfmix.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True) - tm.assert_frame_equal(dfmix, res.fillna(".")) - - # regex -> regex - # obj frame - res = dfobj.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1") - objc = obj.copy() - objc["a"] = ["a", "b", "...", "..."] - expec = DataFrame(objc) - tm.assert_frame_equal(res, expec) - - # with mixed - res = dfmix.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1") - mixc = mix_ab.copy() - mixc["b"] = ["a", "b", "...", "..."] - expec = DataFrame(mixc) - tm.assert_frame_equal(res, expec) - - res = dfmix.replace(regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1") - mixc = mix_ab.copy() - mixc["b"] = ["a", "b", "...", "..."] - expec = DataFrame(mixc) - tm.assert_frame_equal(res, expec) - - res = dfmix.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1") - mixc = mix_ab.copy() - mixc["b"] = ["a", "b", "...", "..."] - expec = DataFrame(mixc) - tm.assert_frame_equal(res, expec) - - def test_regex_replace_scalar_inplace(self, mix_ab): - obj = {"a": list("ab.."), "b": list("efgh")} - dfobj = DataFrame(obj) - dfmix = DataFrame(mix_ab) - - # simplest cases - # regex -> value - # obj frame - res = dfobj.copy() - return_value = res.replace(r"\s*\.\s*", np.nan, regex=True, inplace=True) - assert return_value is None - tm.assert_frame_equal(dfobj, res.fillna(".")) - - # mixed - res = dfmix.copy() - return_value = res.replace(r"\s*\.\s*", np.nan, regex=True, inplace=True) - assert return_value is None - tm.assert_frame_equal(dfmix, res.fillna(".")) - - # regex -> regex - # obj frame - res = dfobj.copy() - return_value = res.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True, inplace=True) - assert return_value is None - objc = obj.copy() - objc["a"] = ["a", "b", "...", "..."] - expec = DataFrame(objc) - tm.assert_frame_equal(res, expec) - - # with mixed - res = dfmix.copy() - return_value = res.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True, inplace=True) - assert return_value is None - mixc = mix_ab.copy() - mixc["b"] = ["a", "b", "...", "..."] - expec = DataFrame(mixc) - tm.assert_frame_equal(res, expec) - - # everything with compiled regexs as well - res = dfobj.copy() - return_value = res.replace( - re.compile(r"\s*\.\s*"), np.nan, regex=True, inplace=True - ) - assert return_value is None - tm.assert_frame_equal(dfobj, res.fillna(".")) - - # mixed - res = dfmix.copy() - return_value = res.replace( - re.compile(r"\s*\.\s*"), np.nan, regex=True, inplace=True - ) - assert return_value is None - tm.assert_frame_equal(dfmix, res.fillna(".")) - - # regex -> regex - # obj frame - res = dfobj.copy() - return_value = res.replace( - re.compile(r"\s*(\.)\s*"), r"\1\1\1", regex=True, inplace=True - ) - assert return_value is None - objc = obj.copy() - objc["a"] = ["a", "b", "...", "..."] - expec = DataFrame(objc) - tm.assert_frame_equal(res, expec) - - # with mixed - res = dfmix.copy() - return_value = res.replace( - re.compile(r"\s*(\.)\s*"), r"\1\1\1", regex=True, inplace=True - ) - assert return_value is None - mixc = mix_ab.copy() - mixc["b"] = ["a", "b", "...", "..."] - expec = DataFrame(mixc) - tm.assert_frame_equal(res, expec) - - res = dfobj.copy() - return_value = res.replace(regex=r"\s*\.\s*", value=np.nan, inplace=True) - assert return_value is None - tm.assert_frame_equal(dfobj, res.fillna(".")) - - # mixed - res = dfmix.copy() - return_value = res.replace(regex=r"\s*\.\s*", value=np.nan, inplace=True) - assert return_value is None - tm.assert_frame_equal(dfmix, res.fillna(".")) + @pytest.mark.parametrize( + "data", + [ + {"a": list("ab.."), "b": list("efgh")}, + {"a": list("ab.."), "b": list(range(4))}, + ], + ) + @pytest.mark.parametrize( + "to_replace,value", [(r"\s*\.\s*", np.nan), (r"\s*(\.)\s*", r"\1\1\1")] + ) + @pytest.mark.parametrize("compile_regex", [True, False]) + @pytest.mark.parametrize("regex_kwarg", [True, False]) + @pytest.mark.parametrize("inplace", [True, False]) + def test_regex_replace_scalar( + self, data, to_replace, value, compile_regex, regex_kwarg, inplace + ): + df = DataFrame(data) + expected = df.copy() - # regex -> regex - # obj frame - res = dfobj.copy() - return_value = res.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1", inplace=True) - assert return_value is None - objc = obj.copy() - objc["a"] = ["a", "b", "...", "..."] - expec = DataFrame(objc) - tm.assert_frame_equal(res, expec) + if compile_regex: + to_replace = re.compile(to_replace) - # with mixed - res = dfmix.copy() - return_value = res.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1", inplace=True) - assert return_value is None - mixc = mix_ab.copy() - mixc["b"] = ["a", "b", "...", "..."] - expec = DataFrame(mixc) - tm.assert_frame_equal(res, expec) + if regex_kwarg: + regex = to_replace + to_replace = None + else: + regex = True - # everything with compiled regexs as well - res = dfobj.copy() - return_value = res.replace( - regex=re.compile(r"\s*\.\s*"), value=np.nan, inplace=True - ) - assert return_value is None - tm.assert_frame_equal(dfobj, res.fillna(".")) + result = df.replace(to_replace, value, inplace=inplace, regex=regex) - # mixed - res = dfmix.copy() - return_value = res.replace( - regex=re.compile(r"\s*\.\s*"), value=np.nan, inplace=True - ) - assert return_value is None - tm.assert_frame_equal(dfmix, res.fillna(".")) + if inplace: + assert result is None + result = df - # regex -> regex - # obj frame - res = dfobj.copy() - return_value = res.replace( - regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1", inplace=True - ) - assert return_value is None - objc = obj.copy() - objc["a"] = ["a", "b", "...", "..."] - expec = DataFrame(objc) - tm.assert_frame_equal(res, expec) + if value is np.nan: + expected_replace_val = np.nan + else: + expected_replace_val = "..." - # with mixed - res = dfmix.copy() - return_value = res.replace( - regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1", inplace=True - ) - assert return_value is None - mixc = mix_ab.copy() - mixc["b"] = ["a", "b", "...", "..."] - expec = DataFrame(mixc) - tm.assert_frame_equal(res, expec) + expected.loc[expected["a"] == ".", "a"] = expected_replace_val + tm.assert_frame_equal(result, expected)