Skip to content

Commit

Permalink
worked on review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
iynehz committed May 12, 2021
1 parent 527a587 commit 29a2c43
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 37 deletions.
18 changes: 12 additions & 6 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7927,6 +7927,10 @@ def explode(
For multiple columns, specify a non-empty list with each element
be str or tuple, and all specified columns their list-like data
on same row of the frame must have matching length.
.. versionadded:: 1.3.0
Multi-column explode
ignore_index : bool, default False
If True, the resulting index will be labeled 0, 1, …, n - 1.
Expand Down Expand Up @@ -7973,6 +7977,8 @@ def explode(
2 [] 1 []
3 [3, 4] 1 [d, e]
Single-column explode.
>>> df.explode('A')
A B C
0 0 1 [a, b, c]
Expand All @@ -7983,6 +7989,8 @@ def explode(
3 3 1 [d, e]
3 4 1 [d, e]
Multi-column explode.
>>> df.explode(list('AC'))
A B C
0 0 1 a
Expand All @@ -7998,14 +8006,12 @@ def explode(

columns: list[str | tuple]
if is_scalar(column) or isinstance(column, tuple):
# mypy: List item 0 has incompatible type "Union[str, Tuple[Any, ...],
# List[Union[str, Tuple[Any, ...]]]]"; expected
# "Union[str, Tuple[Any, ...]]"
columns = [column] # type: ignore[list-item]
assert isinstance(column, (str, tuple))
columns = [column]
elif isinstance(column, list) and all(
map(lambda c: is_scalar(c) or isinstance(c, tuple), column)
):
if len(column) == 0:
if not column:
raise ValueError("column must be nonempty")
if len(column) > len(set(column)):
raise ValueError("column must be unique")
Expand All @@ -8015,7 +8021,7 @@ def explode(

df = self.reset_index(drop=True)
if len(columns) == 1:
result = df[column].explode()
result = df[columns[0]].explode()
else:
mylen = lambda x: len(x) if is_list_like(x) else -1
counts0 = self[columns[0]].apply(mylen)
Expand Down
128 changes: 97 additions & 31 deletions pandas/tests/frame/methods/test_explode.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,47 @@ def test_error():
with pytest.raises(ValueError, match="columns must be unique"):
df.explode("A")

# GH 39240
df1 = df.assign(C=[["a", "b", "c"], "foo", [], ["d", "e", "f"]])
df1.columns = list("ABC")
with pytest.raises(ValueError, match="columns must have matching element counts"):
df1.explode(list("AC"))

# GH 39240
with pytest.raises(ValueError, match="column must be nonempty"):
df1.explode([])

@pytest.mark.parametrize(
"input_dict, input_index, input_subset, error_message",
[
(
{
"A": [[0, 1, 2], np.nan, [], (3, 4)],
"B": 1,
"C": [["a", "b", "c"], "foo", [], ["d", "e", "f"]],
},
list("abcd"),
list("AC"),
"columns must have matching element counts",
),
(
{
"A": [[0, 1, 2], np.nan, [], (3, 4)],
"B": 1,
"C": [["a", "b", "c"], "foo", [], ["d", "e", "f"]],
},
list("abcd"),
[],
"column must be nonempty",
),
(
{
"A": [[0, 1, 2], np.nan, [], (3, 4)],
"B": 1,
"C": [["a", "b", "c"], "foo", [], "d"],
},
list("abcd"),
list("AC"),
"columns must have matching element counts",
),
],
)
def test_error_multi_columns(input_dict, input_index, input_subset, error_message):
# GH 39240
df2 = df.assign(C=[["a", "b", "c"], "foo", [], "d"])
df2.columns = list("ABC")
with pytest.raises(ValueError, match="columns must have matching element counts"):
df2.explode(list("AC"))
df = pd.DataFrame(input_dict, index=input_index)
with pytest.raises(ValueError, match=error_message):
df.explode(input_subset)


def test_basic():
Expand Down Expand Up @@ -203,23 +229,63 @@ def test_explode_sets():
tm.assert_frame_equal(result, expected)


def test_multi_columns():
@pytest.mark.parametrize(
"input_dict, input_index, input_subset, expected_dict, expected_index",
[
(
{
"A": [[0, 1, 2], np.nan, [], (3, 4), np.nan],
"B": 1,
"C": [["a", "b", "c"], "foo", [], ["d", "e"], np.nan],
},
list("abcde"),
list("AC"),
{
"A": pd.Series(
[0, 1, 2, np.nan, np.nan, 3, 4, np.nan],
index=list("aaabcdde"),
dtype=object,
),
"B": 1,
"C": ["a", "b", "c", "foo", np.nan, "d", "e", np.nan],
},
list("aaabcdde"),
),
(
{
"A": [[0, 1, 2], np.nan, [], (3, 4), np.nan],
"B": 1,
"C": [["a", "b", "c"], "foo", [], ["d", "e"], np.nan],
},
list("abcde"),
list("A"),
{
"A": pd.Series(
[0, 1, 2, np.nan, np.nan, 3, 4, np.nan],
index=list("aaabcdde"),
dtype=object,
),
"B": 1,
"C": [
["a", "b", "c"],
["a", "b", "c"],
["a", "b", "c"],
"foo",
[],
["d", "e"],
["d", "e"],
np.nan,
],
},
list("aaabcdde"),
),
],
)
def test_multi_columns(
input_dict, input_index, input_subset, expected_dict, expected_index
):
# GH 39240
df = pd.DataFrame(
{
"A": pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")),
"B": 1,
"C": [["a", "b", "c"], "foo", [], ["d", "e"]],
}
)
result = df.explode(list("AC"))
expected = pd.DataFrame(
{
"A": pd.Series(
[0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object
),
"B": 1,
"C": ["a", "b", "c", "foo", np.nan, "d", "e"],
}
)
df = pd.DataFrame(input_dict, index=input_index)
result = df.explode(input_subset)
expected = pd.DataFrame(expected_dict, expected_index)
tm.assert_frame_equal(result, expected)

0 comments on commit 29a2c43

Please sign in to comment.