Skip to content

Commit

Permalink
TST/CLN: parameterize/dedup replace test2 (pandas-dev#41501)
Browse files Browse the repository at this point in the history
  • Loading branch information
mzeitlin11 authored and JulianWgs committed Jul 3, 2021
1 parent 213a00d commit 3e5002f
Showing 1 changed file with 35 additions and 204 deletions.
239 changes: 35 additions & 204 deletions pandas/tests/frame/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -1430,213 +1430,44 @@ def test_replace_bytes(self, frame_or_series):


class TestDataFrameReplaceRegex:
def test_regex_replace_scalar(self, mix_ab):
obj = {"a": list("ab.."), "b": list("efgh")}
dfobj = DataFrame(obj)
dfmix = DataFrame(mix_ab)

# simplest cases
# regex -> value
# obj frame
res = dfobj.replace(r"\s*\.\s*", np.nan, regex=True)
tm.assert_frame_equal(dfobj, res.fillna("."))

# mixed
res = dfmix.replace(r"\s*\.\s*", np.nan, regex=True)
tm.assert_frame_equal(dfmix, res.fillna("."))

# regex -> regex
# obj frame
res = dfobj.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True)
objc = obj.copy()
objc["a"] = ["a", "b", "...", "..."]
expec = DataFrame(objc)
tm.assert_frame_equal(res, expec)

# with mixed
res = dfmix.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True)
mixc = mix_ab.copy()
mixc["b"] = ["a", "b", "...", "..."]
expec = DataFrame(mixc)
tm.assert_frame_equal(res, expec)

# everything with compiled regexs as well
res = dfobj.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True)
tm.assert_frame_equal(dfobj, res.fillna("."))

# mixed
res = dfmix.replace(re.compile(r"\s*\.\s*"), np.nan, regex=True)
tm.assert_frame_equal(dfmix, res.fillna("."))

# regex -> regex
# obj frame
res = dfobj.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1")
objc = obj.copy()
objc["a"] = ["a", "b", "...", "..."]
expec = DataFrame(objc)
tm.assert_frame_equal(res, expec)

# with mixed
res = dfmix.replace(re.compile(r"\s*(\.)\s*"), r"\1\1\1")
mixc = mix_ab.copy()
mixc["b"] = ["a", "b", "...", "..."]
expec = DataFrame(mixc)
tm.assert_frame_equal(res, expec)

res = dfmix.replace(regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1")
mixc = mix_ab.copy()
mixc["b"] = ["a", "b", "...", "..."]
expec = DataFrame(mixc)
tm.assert_frame_equal(res, expec)

res = dfmix.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1")
mixc = mix_ab.copy()
mixc["b"] = ["a", "b", "...", "..."]
expec = DataFrame(mixc)
tm.assert_frame_equal(res, expec)

def test_regex_replace_scalar_inplace(self, mix_ab):
obj = {"a": list("ab.."), "b": list("efgh")}
dfobj = DataFrame(obj)
dfmix = DataFrame(mix_ab)

# simplest cases
# regex -> value
# obj frame
res = dfobj.copy()
return_value = res.replace(r"\s*\.\s*", np.nan, regex=True, inplace=True)
assert return_value is None
tm.assert_frame_equal(dfobj, res.fillna("."))

# mixed
res = dfmix.copy()
return_value = res.replace(r"\s*\.\s*", np.nan, regex=True, inplace=True)
assert return_value is None
tm.assert_frame_equal(dfmix, res.fillna("."))

# regex -> regex
# obj frame
res = dfobj.copy()
return_value = res.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True, inplace=True)
assert return_value is None
objc = obj.copy()
objc["a"] = ["a", "b", "...", "..."]
expec = DataFrame(objc)
tm.assert_frame_equal(res, expec)

# with mixed
res = dfmix.copy()
return_value = res.replace(r"\s*(\.)\s*", r"\1\1\1", regex=True, inplace=True)
assert return_value is None
mixc = mix_ab.copy()
mixc["b"] = ["a", "b", "...", "..."]
expec = DataFrame(mixc)
tm.assert_frame_equal(res, expec)

# everything with compiled regexs as well
res = dfobj.copy()
return_value = res.replace(
re.compile(r"\s*\.\s*"), np.nan, regex=True, inplace=True
)
assert return_value is None
tm.assert_frame_equal(dfobj, res.fillna("."))

# mixed
res = dfmix.copy()
return_value = res.replace(
re.compile(r"\s*\.\s*"), np.nan, regex=True, inplace=True
)
assert return_value is None
tm.assert_frame_equal(dfmix, res.fillna("."))

# regex -> regex
# obj frame
res = dfobj.copy()
return_value = res.replace(
re.compile(r"\s*(\.)\s*"), r"\1\1\1", regex=True, inplace=True
)
assert return_value is None
objc = obj.copy()
objc["a"] = ["a", "b", "...", "..."]
expec = DataFrame(objc)
tm.assert_frame_equal(res, expec)

# with mixed
res = dfmix.copy()
return_value = res.replace(
re.compile(r"\s*(\.)\s*"), r"\1\1\1", regex=True, inplace=True
)
assert return_value is None
mixc = mix_ab.copy()
mixc["b"] = ["a", "b", "...", "..."]
expec = DataFrame(mixc)
tm.assert_frame_equal(res, expec)

res = dfobj.copy()
return_value = res.replace(regex=r"\s*\.\s*", value=np.nan, inplace=True)
assert return_value is None
tm.assert_frame_equal(dfobj, res.fillna("."))

# mixed
res = dfmix.copy()
return_value = res.replace(regex=r"\s*\.\s*", value=np.nan, inplace=True)
assert return_value is None
tm.assert_frame_equal(dfmix, res.fillna("."))
@pytest.mark.parametrize(
"data",
[
{"a": list("ab.."), "b": list("efgh")},
{"a": list("ab.."), "b": list(range(4))},
],
)
@pytest.mark.parametrize(
"to_replace,value", [(r"\s*\.\s*", np.nan), (r"\s*(\.)\s*", r"\1\1\1")]
)
@pytest.mark.parametrize("compile_regex", [True, False])
@pytest.mark.parametrize("regex_kwarg", [True, False])
@pytest.mark.parametrize("inplace", [True, False])
def test_regex_replace_scalar(
self, data, to_replace, value, compile_regex, regex_kwarg, inplace
):
df = DataFrame(data)
expected = df.copy()

# regex -> regex
# obj frame
res = dfobj.copy()
return_value = res.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1", inplace=True)
assert return_value is None
objc = obj.copy()
objc["a"] = ["a", "b", "...", "..."]
expec = DataFrame(objc)
tm.assert_frame_equal(res, expec)
if compile_regex:
to_replace = re.compile(to_replace)

# with mixed
res = dfmix.copy()
return_value = res.replace(regex=r"\s*(\.)\s*", value=r"\1\1\1", inplace=True)
assert return_value is None
mixc = mix_ab.copy()
mixc["b"] = ["a", "b", "...", "..."]
expec = DataFrame(mixc)
tm.assert_frame_equal(res, expec)
if regex_kwarg:
regex = to_replace
to_replace = None
else:
regex = True

# everything with compiled regexs as well
res = dfobj.copy()
return_value = res.replace(
regex=re.compile(r"\s*\.\s*"), value=np.nan, inplace=True
)
assert return_value is None
tm.assert_frame_equal(dfobj, res.fillna("."))
result = df.replace(to_replace, value, inplace=inplace, regex=regex)

# mixed
res = dfmix.copy()
return_value = res.replace(
regex=re.compile(r"\s*\.\s*"), value=np.nan, inplace=True
)
assert return_value is None
tm.assert_frame_equal(dfmix, res.fillna("."))
if inplace:
assert result is None
result = df

# regex -> regex
# obj frame
res = dfobj.copy()
return_value = res.replace(
regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1", inplace=True
)
assert return_value is None
objc = obj.copy()
objc["a"] = ["a", "b", "...", "..."]
expec = DataFrame(objc)
tm.assert_frame_equal(res, expec)
if value is np.nan:
expected_replace_val = np.nan
else:
expected_replace_val = "..."

# with mixed
res = dfmix.copy()
return_value = res.replace(
regex=re.compile(r"\s*(\.)\s*"), value=r"\1\1\1", inplace=True
)
assert return_value is None
mixc = mix_ab.copy()
mixc["b"] = ["a", "b", "...", "..."]
expec = DataFrame(mixc)
tm.assert_frame_equal(res, expec)
expected.loc[expected["a"] == ".", "a"] = expected_replace_val
tm.assert_frame_equal(result, expected)

0 comments on commit 3e5002f

Please sign in to comment.