worked on review comments

pandas-dev · May 12, 2021 · 29a2c43 · 29a2c43
1 parent 527a587
commit 29a2c43
Show file tree

Hide file tree

Showing 2 changed files with 109 additions and 37 deletions.
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -7927,6 +7927,10 @@ def explode(
             For multiple columns, specify a non-empty list with each element
             be str or tuple, and all specified columns their list-like data
             on same row of the frame must have matching length.
+
+            .. versionadded:: 1.3.0
+                Multi-column explode
+
         ignore_index : bool, default False
             If True, the resulting index will be labeled 0, 1, …, n - 1.
 
@@ -7973,6 +7977,8 @@ def explode(
         2         []  1         []
         3     [3, 4]  1     [d, e]
 
+        Single-column explode.
+
         >>> df.explode('A')
              A  B          C
         0    0  1  [a, b, c]
@@ -7983,6 +7989,8 @@ def explode(
         3    3  1     [d, e]
         3    4  1     [d, e]
 
+        Multi-column explode.
+
         >>> df.explode(list('AC'))
              A  B    C
         0    0  1    a
@@ -7998,14 +8006,12 @@ def explode(
 
         columns: list[str | tuple]
         if is_scalar(column) or isinstance(column, tuple):
-            # mypy: List item 0 has incompatible type "Union[str, Tuple[Any, ...],
-            # List[Union[str, Tuple[Any, ...]]]]"; expected
-            # "Union[str, Tuple[Any, ...]]"
-            columns = [column]  # type: ignore[list-item]
+            assert isinstance(column, (str, tuple))
+            columns = [column]
         elif isinstance(column, list) and all(
             map(lambda c: is_scalar(c) or isinstance(c, tuple), column)
         ):
-            if len(column) == 0:
+            if not column:
                 raise ValueError("column must be nonempty")
             if len(column) > len(set(column)):
                 raise ValueError("column must be unique")
@@ -8015,7 +8021,7 @@ def explode(
 
         df = self.reset_index(drop=True)
         if len(columns) == 1:
-            result = df[column].explode()
+            result = df[columns[0]].explode()
         else:
             mylen = lambda x: len(x) if is_list_like(x) else -1
             counts0 = self[columns[0]].apply(mylen)

diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py
@@ -21,21 +21,47 @@ def test_error():
     with pytest.raises(ValueError, match="columns must be unique"):
         df.explode("A")
 
-    # GH 39240
-    df1 = df.assign(C=[["a", "b", "c"], "foo", [], ["d", "e", "f"]])
-    df1.columns = list("ABC")
-    with pytest.raises(ValueError, match="columns must have matching element counts"):
-        df1.explode(list("AC"))
-
-    # GH 39240
-    with pytest.raises(ValueError, match="column must be nonempty"):
-        df1.explode([])
 
+@pytest.mark.parametrize(
+    "input_dict, input_index, input_subset, error_message",
+    [
+        (
+            {
+                "A": [[0, 1, 2], np.nan, [], (3, 4)],
+                "B": 1,
+                "C": [["a", "b", "c"], "foo", [], ["d", "e", "f"]],
+            },
+            list("abcd"),
+            list("AC"),
+            "columns must have matching element counts",
+        ),
+        (
+            {
+                "A": [[0, 1, 2], np.nan, [], (3, 4)],
+                "B": 1,
+                "C": [["a", "b", "c"], "foo", [], ["d", "e", "f"]],
+            },
+            list("abcd"),
+            [],
+            "column must be nonempty",
+        ),
+        (
+            {
+                "A": [[0, 1, 2], np.nan, [], (3, 4)],
+                "B": 1,
+                "C": [["a", "b", "c"], "foo", [], "d"],
+            },
+            list("abcd"),
+            list("AC"),
+            "columns must have matching element counts",
+        ),
+    ],
+)
+def test_error_multi_columns(input_dict, input_index, input_subset, error_message):
     # GH 39240
-    df2 = df.assign(C=[["a", "b", "c"], "foo", [], "d"])
-    df2.columns = list("ABC")
-    with pytest.raises(ValueError, match="columns must have matching element counts"):
-        df2.explode(list("AC"))
+    df = pd.DataFrame(input_dict, index=input_index)
+    with pytest.raises(ValueError, match=error_message):
+        df.explode(input_subset)
 
 
 def test_basic():
@@ -203,23 +229,63 @@ def test_explode_sets():
     tm.assert_frame_equal(result, expected)
 
 
-def test_multi_columns():
+@pytest.mark.parametrize(
+    "input_dict, input_index, input_subset, expected_dict, expected_index",
+    [
+        (
+            {
+                "A": [[0, 1, 2], np.nan, [], (3, 4), np.nan],
+                "B": 1,
+                "C": [["a", "b", "c"], "foo", [], ["d", "e"], np.nan],
+            },
+            list("abcde"),
+            list("AC"),
+            {
+                "A": pd.Series(
+                    [0, 1, 2, np.nan, np.nan, 3, 4, np.nan],
+                    index=list("aaabcdde"),
+                    dtype=object,
+                ),
+                "B": 1,
+                "C": ["a", "b", "c", "foo", np.nan, "d", "e", np.nan],
+            },
+            list("aaabcdde"),
+        ),
+        (
+            {
+                "A": [[0, 1, 2], np.nan, [], (3, 4), np.nan],
+                "B": 1,
+                "C": [["a", "b", "c"], "foo", [], ["d", "e"], np.nan],
+            },
+            list("abcde"),
+            list("A"),
+            {
+                "A": pd.Series(
+                    [0, 1, 2, np.nan, np.nan, 3, 4, np.nan],
+                    index=list("aaabcdde"),
+                    dtype=object,
+                ),
+                "B": 1,
+                "C": [
+                    ["a", "b", "c"],
+                    ["a", "b", "c"],
+                    ["a", "b", "c"],
+                    "foo",
+                    [],
+                    ["d", "e"],
+                    ["d", "e"],
+                    np.nan,
+                ],
+            },
+            list("aaabcdde"),
+        ),
+    ],
+)
+def test_multi_columns(
+    input_dict, input_index, input_subset, expected_dict, expected_index
+):
     # GH 39240
-    df = pd.DataFrame(
-        {
-            "A": pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd")),
-            "B": 1,
-            "C": [["a", "b", "c"], "foo", [], ["d", "e"]],
-        }
-    )
-    result = df.explode(list("AC"))
-    expected = pd.DataFrame(
-        {
-            "A": pd.Series(
-                [0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object
-            ),
-            "B": 1,
-            "C": ["a", "b", "c", "foo", np.nan, "d", "e"],
-        }
-    )
+    df = pd.DataFrame(input_dict, index=input_index)
+    result = df.explode(input_subset)
+    expected = pd.DataFrame(expected_dict, expected_index)
     tm.assert_frame_equal(result, expected)