ENH: DataFrame.pivot accepts a list of values (pandas-dev#18636)

dworvos · Apr 2, 2018 · 4d0ae9c · 4d0ae9c
1 parent 340d594
commit 4d0ae9c
Show file tree

Hide file tree

Showing 4 changed files with 115 additions and 15 deletions.
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -326,6 +326,7 @@ Other Enhancements
 - ``Resampler`` objects now have a functioning :attr:`~pandas.core.resample.Resampler.pipe` method.
   Previously, calls to ``pipe`` were diverted to  the ``mean`` method (:issue:`17905`).
 - :func:`~pandas.api.types.is_scalar` now returns ``True`` for ``DateOffset`` objects (:issue:`18943`).
+- :func:`DataFrame.pivot` now accepts a list for the ``values=`` kwarg (:issue:`17160`).
 - Added :func:`pandas.api.extensions.register_dataframe_accessor`,
   :func:`pandas.api.extensions.register_series_accessor`, and
   :func:`pandas.api.extensions.register_index_accessor`, accessor for libraries downstream of pandas

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -5050,11 +5050,14 @@ def pivot(self, index=None, columns=None, values=None):
             existing index.
         columns : string or object
             Column to use to make new frame's columns.
-        values : string or object, optional
-            Column to use for populating new frame's values. If not
+        values : string, object or a list of the previous, optional
+            Column(s) to use for populating new frame's values. If not
             specified, all remaining columns will be used and the result will
             have hierarchically indexed columns.
 
+            .. versionchanged :: 0.23.0
+               Also accept list of column names.
+
         Returns
         -------
         DataFrame
@@ -5083,15 +5086,16 @@ def pivot(self, index=None, columns=None, values=None):
         >>> df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two',
         ...                            'two'],
         ...                    'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
-        ...                    'baz': [1, 2, 3, 4, 5, 6]})
+        ...                    'baz': [1, 2, 3, 4, 5, 6],
+        ...                    'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
         >>> df
-            foo   bar  baz
-        0   one   A    1
-        1   one   B    2
-        2   one   C    3
-        3   two   A    4
-        4   two   B    5
-        5   two   C    6
+            foo   bar  baz  zoo
+        0   one   A    1    x
+        1   one   B    2    y
+        2   one   C    3    z
+        3   two   A    4    q
+        4   two   B    5    w
+        5   two   C    6    t
 
         >>> df.pivot(index='foo', columns='bar', values='baz')
         bar  A   B   C
@@ -5105,6 +5109,13 @@ def pivot(self, index=None, columns=None, values=None):
         one  1   2   3
         two  4   5   6
 
+        >>> df.pivot(index='foo', columns='bar', values=['baz', 'zoo'])
+              baz       zoo
+        bar   A  B  C   A  B  C
+        foo
+        one   1  2  3   x  y  z
+        two   4  5  6   q  w  t
+
         A ValueError is raised if there are any duplicates.
 
         >>> df = pd.DataFrame({"foo": ['one', 'one', 'two', 'two'],

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
@@ -392,16 +392,21 @@ def pivot(self, index=None, columns=None, values=None):
         cols = [columns] if index is None else [index, columns]
         append = index is None
         indexed = self.set_index(cols, append=append)
-        return indexed.unstack(columns)
     else:
         if index is None:
             index = self.index
         else:
             index = self[index]
-        indexed = self._constructor_sliced(
-            self[values].values,
-            index=MultiIndex.from_arrays([index, self[columns]]))
-        return indexed.unstack(columns)
+        index = MultiIndex.from_arrays([index, self[columns]])
+
+        if is_list_like(values) and not isinstance(values, tuple):
+            # Exclude tuple because it is seen as a single column name
+            indexed = self._constructor(self[values].values, index=index,
+                                        columns=values)
+        else:
+            indexed = self._constructor_sliced(self[values].values,
+                                               index=index)
+    return indexed.unstack(columns)
 
 
 def pivot_simple(index, columns, values):

diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
@@ -371,6 +371,89 @@ def test_pivot_periods(self):
         pv = df.pivot(index='p1', columns='p2', values='data1')
         tm.assert_frame_equal(pv, expected)
 
+    @pytest.mark.parametrize('values', [
+        ['baz', 'zoo'], np.array(['baz', 'zoo']),
+        pd.Series(['baz', 'zoo']), pd.Index(['baz', 'zoo'])
+    ])
+    def test_pivot_with_list_like_values(self, values):
+        # issue #17160
+        df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
+                           'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
+                           'baz': [1, 2, 3, 4, 5, 6],
+                           'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
+
+        result = df.pivot(index='foo', columns='bar', values=values)
+
+        data = [[1, 2, 3, 'x', 'y', 'z'],
+                [4, 5, 6, 'q', 'w', 't']]
+        index = Index(data=['one', 'two'], name='foo')
+        columns = MultiIndex(levels=[['baz', 'zoo'], ['A', 'B', 'C']],
+                             labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]],
+                             names=[None, 'bar'])
+        expected = DataFrame(data=data, index=index,
+                             columns=columns, dtype='object')
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize('values', [
+        ['bar', 'baz'], np.array(['bar', 'baz']),
+        pd.Series(['bar', 'baz']), pd.Index(['bar', 'baz'])
+    ])
+    def test_pivot_with_list_like_values_nans(self, values):
+        # issue #17160
+        df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
+                           'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
+                           'baz': [1, 2, 3, 4, 5, 6],
+                           'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
+
+        result = df.pivot(index='zoo', columns='foo', values=values)
+
+        data = [[np.nan, 'A', np.nan, 4],
+                [np.nan, 'C', np.nan, 6],
+                [np.nan, 'B', np.nan, 5],
+                ['A', np.nan, 1, np.nan],
+                ['B', np.nan, 2, np.nan],
+                ['C', np.nan, 3, np.nan]]
+        index = Index(data=['q', 't', 'w', 'x', 'y', 'z'], name='zoo')
+        columns = MultiIndex(levels=[['bar', 'baz'], ['one', 'two']],
+                             labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
+                             names=[None, 'foo'])
+        expected = DataFrame(data=data, index=index,
+                             columns=columns, dtype='object')
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.xfail(reason='MultiIndexed unstack with tuple names fails'
+                              'with KeyError #19966')
+    def test_pivot_with_multiindex(self):
+        # issue #17160
+        index = Index(data=[0, 1, 2, 3, 4, 5])
+        data = [['one', 'A', 1, 'x'],
+                ['one', 'B', 2, 'y'],
+                ['one', 'C', 3, 'z'],
+                ['two', 'A', 4, 'q'],
+                ['two', 'B', 5, 'w'],
+                ['two', 'C', 6, 't']]
+        columns = MultiIndex(levels=[['bar', 'baz'], ['first', 'second']],
+                             labels=[[0, 0, 1, 1], [0, 1, 0, 1]])
+        df = DataFrame(data=data, index=index, columns=columns, dtype='object')
+        result = df.pivot(index=('bar', 'first'), columns=('bar', 'second'),
+                          values=('baz', 'first'))
+
+        data = {'A': Series([1, 4], index=['one', 'two']),
+                'B': Series([2, 5], index=['one', 'two']),
+                'C': Series([3, 6], index=['one', 'two'])}
+        expected = DataFrame(data)
+        tm.assert_frame_equal(result, expected)
+
+    def test_pivot_with_tuple_of_values(self):
+        # issue #17160
+        df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
+                           'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
+                           'baz': [1, 2, 3, 4, 5, 6],
+                           'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
+        with pytest.raises(KeyError):
+            # tuple is seen as a single column name
+            df.pivot(index='zoo', columns='foo', values=('bar', 'baz'))
+
     def test_margins(self):
         def _check_output(result, values_col, index=['A', 'B'],
                           columns=['C'],