Skip to content

Commit

Permalink
ENH: Add set_index to Series
Browse files Browse the repository at this point in the history
  • Loading branch information
h-vetinari committed Sep 18, 2018
1 parent 8a1c8ad commit 7355d16
Show file tree
Hide file tree
Showing 5 changed files with 344 additions and 78 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ Other Enhancements
The default compression for ``to_csv``, ``to_json``, and ``to_pickle`` methods has been updated to ``'infer'`` (:issue:`22004`).
- :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`)
- :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`)
- :class:`Series` has gained the method :meth:`Series.set_index`, which works like its :class:`DataFrame` counterpart :meth:`DataFrame.set_index` (:issue:`21684`)
- :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`)
- :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`).
- :meth:`Series.resample` and :meth:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`).
Expand Down
72 changes: 8 additions & 64 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@
from pandas.core.dtypes.concat import _get_sliced_frame_result_type
from pandas.core.dtypes.missing import isna, notna


from pandas.core.generic import NDFrame, _shared_docs
from pandas.core.index import (Index, MultiIndex, ensure_index,
ensure_index_from_sequences)
Expand Down Expand Up @@ -3843,6 +3842,10 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
necessary. Setting to False will improve the performance of this
method
Returns
-------
reindexed : DataFrame if inplace is False, else None
Examples
--------
>>> df = pd.DataFrame({'month': [1, 4, 7, 10],
Expand Down Expand Up @@ -3883,73 +3886,14 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
2 2014 4 40
3 2013 7 84
4 2014 10 31
Returns
-------
dataframe : DataFrame
"""
inplace = validate_bool_kwarg(inplace, 'inplace')
if not isinstance(keys, list):
keys = [keys]

if inplace:
frame = self
else:
frame = self.copy()

arrays = []
names = []
if append:
names = [x for x in self.index.names]
if isinstance(self.index, MultiIndex):
for i in range(self.index.nlevels):
arrays.append(self.index._get_level_values(i))
else:
arrays.append(self.index)

to_remove = []
for col in keys:
if isinstance(col, MultiIndex):
# append all but the last column so we don't have to modify
# the end of this loop
for n in range(col.nlevels - 1):
arrays.append(col._get_level_values(n))

level = col._get_level_values(col.nlevels - 1)
names.extend(col.names)
elif isinstance(col, Series):
level = col._values
names.append(col.name)
elif isinstance(col, Index):
level = col
names.append(col.name)
elif isinstance(col, (list, np.ndarray, Index)):
level = col
names.append(None)
else:
level = frame[col]._values
names.append(col)
if drop:
to_remove.append(col)
arrays.append(level)

index = ensure_index_from_sequences(arrays, names)

if verify_integrity and not index.is_unique:
duplicates = index[index.duplicated()].unique()
raise ValueError('Index has duplicate keys: {dup}'.format(
dup=duplicates))

for c in to_remove:
del frame[c]

# clear up memory usage
index._cleanup()

frame.index = index

if not inplace:
return frame
vi = verify_integrity
return super(DataFrame, self).set_index(keys=keys, drop=drop,
append=append, inplace=inplace,
verify_integrity=vi)

def reset_index(self, level=None, drop=False, inplace=False, col_level=0,
col_fill=''):
Expand Down
133 changes: 130 additions & 3 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,13 @@
from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask
from pandas.core.dtypes.inference import is_hashable
from pandas.core.dtypes.missing import isna, notna
from pandas.core.dtypes.generic import ABCSeries, ABCPanel, ABCDataFrame
from pandas.core.dtypes.generic import (ABCIndexClass, ABCMultiIndex, ABCPanel,
ABCSeries, ABCDataFrame)

from pandas.core.base import PandasObject, SelectionMixin
from pandas.core.index import (Index, MultiIndex, ensure_index,
InvalidIndexError, RangeIndex)
from pandas.core.index import (Index, MultiIndex,
InvalidIndexError, RangeIndex,
ensure_index, ensure_index_from_sequences)
import pandas.core.indexing as indexing
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.indexes.period import PeriodIndex, Period
Expand Down Expand Up @@ -663,6 +665,131 @@ def _set_axis(self, axis, labels):
y : same as input
"""

def set_index(self, keys, drop=True, append=False, inplace=False,
verify_integrity=False):
"""
Set the Series/DataFrame index (row labels) using one or more given
arrays (or column labels in case of DataFrame).
By default yields a new object.
Parameters
----------
keys : column label or list of column labels / arrays. For Series case,
only array or list of arrays is allowed.
drop : boolean, default True
Delete columns to be used as the new index (only for DataFrame).
append : boolean, default False
Whether to append columns to existing index
inplace : boolean, default False
Modify the Series/DataFrame in place (do not create a new object)
verify_integrity : boolean, default False
Check the new index for duplicates. Otherwise defer the check until
necessary. Setting to False will improve the performance of this
method
Returns
-------
reindexed : Series/DataFrame if inplace is False, else None
Examples
--------
>>> df = pd.DataFrame({'month': [1, 4, 7, 10],
... 'year': [2012, 2014, 2013, 2014],
... 'sale':[55, 40, 84, 31]})
month sale year
0 1 55 2012
1 4 40 2014
2 7 84 2013
3 10 31 2014
Set the index to become the 'month' column:
>>> df.set_index('month')
sale year
month
1 55 2012
4 40 2014
7 84 2013
10 31 2014
Create a multi-index using columns 'year' and 'month':
>>> df.set_index(['year', 'month'])
sale
year month
2012 1 55
2014 4 40
2013 7 84
2014 10 31
Create a multi-index using a set of values and a column:
>>> df.set_index([[1, 2, 3, 4], 'year'])
month sale
year
1 2012 1 55
2 2014 4 40
3 2013 7 84
4 2014 10 31
"""
inplace = validate_bool_kwarg(inplace, 'inplace')
if inplace:
obj = self
else:
obj = self.copy()

arrays = []
names = []
if append:
names = [x for x in self.index.names]
if isinstance(self.index, ABCMultiIndex):
for i in range(self.index.nlevels):
arrays.append(self.index._get_level_values(i))
else:
arrays.append(self.index)

to_remove = []
for col in keys:
if isinstance(col, ABCMultiIndex):
for n in range(col.nlevels):
arrays.append(col._get_level_values(n))
names.extend(col.names)
elif isinstance(col, ABCIndexClass):
# Index but not MultiIndex (treated above)
arrays.append(col)
names.append(col.name)
elif isinstance(col, ABCSeries):
arrays.append(col._values)
names.append(col.name)
elif isinstance(col, (list, np.ndarray)):
arrays.append(col)
names.append(None)
# from here, col can only be a column label (and obj a DataFrame);
# see checks in Series.set_index and DataFrame.set_index
else:
arrays.append(obj[col]._values)
names.append(col)
if drop:
to_remove.append(col)

index = ensure_index_from_sequences(arrays, names)

if verify_integrity and not index.is_unique:
duplicates = list(index[index.duplicated()])
raise ValueError('Index has duplicate keys: {dup}'.format(
dup=duplicates))

for c in to_remove:
del obj[c]

# clear up memory usage
index._cleanup()

obj.index = index

if not inplace:
return obj

@Appender(_shared_docs['transpose'] % _shared_doc_kwargs)
def transpose(self, *args, **kwargs):

Expand Down
84 changes: 82 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@
_is_unorderable_exception,
ensure_platform_int,
pandas_dtype)
from pandas.core.dtypes.generic import (
ABCSparseArray, ABCDataFrame, ABCIndexClass)
from pandas.core.dtypes.generic import (ABCDataFrame, ABCIndexClass,
ABCSeries, ABCSparseArray)
from pandas.core.dtypes.cast import (
maybe_upcast, infer_dtype_from_scalar,
maybe_convert_platform,
Expand Down Expand Up @@ -1093,6 +1093,86 @@ def _set_value(self, label, value, takeable=False):
return self
_set_value.__doc__ = set_value.__doc__

def set_index(self, arrays, append=False, inplace=False,
verify_integrity=False):
"""
Set the Series index (row labels) using one or more columns.
By default yields a new object.
Parameters
----------
arrays : array or list of arrays
Either a Series, Index, MultiIndex, list, np.ndarray or a list
containing only Series, Index, MultiIndex, list, np.ndarray
append : boolean, default False
Whether to append columns to existing index
inplace : boolean, default False
Modify the Series in place (do not create a new object)
verify_integrity : boolean, default False
Check the new index for duplicates. Otherwise defer the check until
necessary. Setting to False will improve the performance of this
method
Returns
-------
reindexed : Series if inplace is False, else None
Examples
--------
>>> s = pd.Series(range(3))
0 10
1 11
2 12
dtype: int64
Set the index to become `['a', 'b', 'c']`:
>>> s.set_index(['a', 'b', 'c'])
a 10
b 11
c 12
dtype: int64
Create a multi-index by appending to the existing index:
>>> s.set_index(['a', 'b', 'c'], append=True)
0 a 10
1 b 11
2 c 12
dtype: int64
Create a multi-index by passing a list of arrays:
>>> t = s.set_index([['a', 'b', 'c'], ['I', 'II', 'III']]) ** 2
>>> t
a I 100
b II 121
c III 144
dtype: int64
Apply index from another object (of the same length!):
>>> s.set_index(t.index)
a I 10
b II 11
c III 12
dtype: int64
"""
if not isinstance(arrays, list):
arrays = [arrays]
elif all(is_scalar(x) for x in arrays):
arrays = [arrays]

if any(not isinstance(x, (ABCSeries, ABCIndexClass, list, np.ndarray))
for x in arrays):
raise TypeError('arrays must be Series, Index, MultiIndex, list, '
'np.ndarray or list containing only Series, '
'Index, MultiIndex, list, np.ndarray')

return super(Series, self).set_index(keys=arrays, drop=False,
append=append, inplace=inplace,
verify_integrity=verify_integrity)

def reset_index(self, level=None, drop=False, name=None, inplace=False):
"""
Generate a new DataFrame or Series with the index reset.
Expand Down
Loading

0 comments on commit 7355d16

Please sign in to comment.