Skip to content

Commit

Permalink
ENH: DataFrame.pivot accepts a list of values (pandas-dev#18636)
Browse files Browse the repository at this point in the history
  • Loading branch information
ibrahimsharaf authored and Andrew Bui committed Apr 2, 2018
1 parent 340d594 commit 4d0ae9c
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 15 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ Other Enhancements
- ``Resampler`` objects now have a functioning :attr:`~pandas.core.resample.Resampler.pipe` method.
Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`).
- :func:`~pandas.api.types.is_scalar` now returns ``True`` for ``DateOffset`` objects (:issue:`18943`).
- :func:`DataFrame.pivot` now accepts a list for the ``values=`` kwarg (:issue:`17160`).
- Added :func:`pandas.api.extensions.register_dataframe_accessor`,
:func:`pandas.api.extensions.register_series_accessor`, and
:func:`pandas.api.extensions.register_index_accessor`, accessor for libraries downstream of pandas
Expand Down
31 changes: 21 additions & 10 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5050,11 +5050,14 @@ def pivot(self, index=None, columns=None, values=None):
existing index.
columns : string or object
Column to use to make new frame's columns.
values : string or object, optional
Column to use for populating new frame's values. If not
values : string, object or a list of the previous, optional
Column(s) to use for populating new frame's values. If not
specified, all remaining columns will be used and the result will
have hierarchically indexed columns.
.. versionchanged :: 0.23.0
Also accept list of column names.
Returns
-------
DataFrame
Expand Down Expand Up @@ -5083,15 +5086,16 @@ def pivot(self, index=None, columns=None, values=None):
>>> df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two',
... 'two'],
... 'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
... 'baz': [1, 2, 3, 4, 5, 6]})
... 'baz': [1, 2, 3, 4, 5, 6],
... 'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
>>> df
foo bar baz
0 one A 1
1 one B 2
2 one C 3
3 two A 4
4 two B 5
5 two C 6
foo bar baz zoo
0 one A 1 x
1 one B 2 y
2 one C 3 z
3 two A 4 q
4 two B 5 w
5 two C 6 t
>>> df.pivot(index='foo', columns='bar', values='baz')
bar A B C
Expand All @@ -5105,6 +5109,13 @@ def pivot(self, index=None, columns=None, values=None):
one 1 2 3
two 4 5 6
>>> df.pivot(index='foo', columns='bar', values=['baz', 'zoo'])
baz zoo
bar A B C A B C
foo
one 1 2 3 x y z
two 4 5 6 q w t
A ValueError is raised if there are any duplicates.
>>> df = pd.DataFrame({"foo": ['one', 'one', 'two', 'two'],
Expand Down
15 changes: 10 additions & 5 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,16 +392,21 @@ def pivot(self, index=None, columns=None, values=None):
cols = [columns] if index is None else [index, columns]
append = index is None
indexed = self.set_index(cols, append=append)
return indexed.unstack(columns)
else:
if index is None:
index = self.index
else:
index = self[index]
indexed = self._constructor_sliced(
self[values].values,
index=MultiIndex.from_arrays([index, self[columns]]))
return indexed.unstack(columns)
index = MultiIndex.from_arrays([index, self[columns]])

if is_list_like(values) and not isinstance(values, tuple):
# Exclude tuple because it is seen as a single column name
indexed = self._constructor(self[values].values, index=index,
columns=values)
else:
indexed = self._constructor_sliced(self[values].values,
index=index)
return indexed.unstack(columns)


def pivot_simple(index, columns, values):
Expand Down
83 changes: 83 additions & 0 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,89 @@ def test_pivot_periods(self):
pv = df.pivot(index='p1', columns='p2', values='data1')
tm.assert_frame_equal(pv, expected)

@pytest.mark.parametrize('values', [
['baz', 'zoo'], np.array(['baz', 'zoo']),
pd.Series(['baz', 'zoo']), pd.Index(['baz', 'zoo'])
])
def test_pivot_with_list_like_values(self, values):
# issue #17160
df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
'baz': [1, 2, 3, 4, 5, 6],
'zoo': ['x', 'y', 'z', 'q', 'w', 't']})

result = df.pivot(index='foo', columns='bar', values=values)

data = [[1, 2, 3, 'x', 'y', 'z'],
[4, 5, 6, 'q', 'w', 't']]
index = Index(data=['one', 'two'], name='foo')
columns = MultiIndex(levels=[['baz', 'zoo'], ['A', 'B', 'C']],
labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]],
names=[None, 'bar'])
expected = DataFrame(data=data, index=index,
columns=columns, dtype='object')
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize('values', [
['bar', 'baz'], np.array(['bar', 'baz']),
pd.Series(['bar', 'baz']), pd.Index(['bar', 'baz'])
])
def test_pivot_with_list_like_values_nans(self, values):
# issue #17160
df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
'baz': [1, 2, 3, 4, 5, 6],
'zoo': ['x', 'y', 'z', 'q', 'w', 't']})

result = df.pivot(index='zoo', columns='foo', values=values)

data = [[np.nan, 'A', np.nan, 4],
[np.nan, 'C', np.nan, 6],
[np.nan, 'B', np.nan, 5],
['A', np.nan, 1, np.nan],
['B', np.nan, 2, np.nan],
['C', np.nan, 3, np.nan]]
index = Index(data=['q', 't', 'w', 'x', 'y', 'z'], name='zoo')
columns = MultiIndex(levels=[['bar', 'baz'], ['one', 'two']],
labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
names=[None, 'foo'])
expected = DataFrame(data=data, index=index,
columns=columns, dtype='object')
tm.assert_frame_equal(result, expected)

@pytest.mark.xfail(reason='MultiIndexed unstack with tuple names fails'
'with KeyError #19966')
def test_pivot_with_multiindex(self):
# issue #17160
index = Index(data=[0, 1, 2, 3, 4, 5])
data = [['one', 'A', 1, 'x'],
['one', 'B', 2, 'y'],
['one', 'C', 3, 'z'],
['two', 'A', 4, 'q'],
['two', 'B', 5, 'w'],
['two', 'C', 6, 't']]
columns = MultiIndex(levels=[['bar', 'baz'], ['first', 'second']],
labels=[[0, 0, 1, 1], [0, 1, 0, 1]])
df = DataFrame(data=data, index=index, columns=columns, dtype='object')
result = df.pivot(index=('bar', 'first'), columns=('bar', 'second'),
values=('baz', 'first'))

data = {'A': Series([1, 4], index=['one', 'two']),
'B': Series([2, 5], index=['one', 'two']),
'C': Series([3, 6], index=['one', 'two'])}
expected = DataFrame(data)
tm.assert_frame_equal(result, expected)

def test_pivot_with_tuple_of_values(self):
# issue #17160
df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
'baz': [1, 2, 3, 4, 5, 6],
'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
with pytest.raises(KeyError):
# tuple is seen as a single column name
df.pivot(index='zoo', columns='foo', values=('bar', 'baz'))

def test_margins(self):
def _check_output(result, values_col, index=['A', 'B'],
columns=['C'],
Expand Down

0 comments on commit 4d0ae9c

Please sign in to comment.