-
-
Notifications
You must be signed in to change notification settings - Fork 18.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
API: Added axis argument to rename, reindex #17800
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -65,6 +65,7 @@ | |
_values_from_object, | ||
_maybe_box_datetimelike, | ||
_dict_compat, | ||
_all_not_none, | ||
standardize_mapping) | ||
from pandas.core.generic import NDFrame, _shared_docs | ||
from pandas.core.index import (Index, MultiIndex, _ensure_index, | ||
|
@@ -111,7 +112,13 @@ | |
optional_by=""" | ||
by : str or list of str | ||
Name or list of names which refer to the axis items.""", | ||
versionadded_to_excel='') | ||
versionadded_to_excel='', | ||
optional_labels="""labels : array-like, optional | ||
New labels / index to conform the axis specified by 'axis' to.""", | ||
optional_axis="""axis : int or str, optional | ||
Axis to target. Can be either the axis name ('index', 'columns') | ||
or number (0, 1).""", | ||
) | ||
|
||
_numeric_only_doc = """numeric_only : boolean, default None | ||
Include only float, int, boolean data. If None, will attempt to use | ||
|
@@ -2776,6 +2783,47 @@ def reindexer(value): | |
|
||
return np.atleast_2d(np.asarray(value)) | ||
|
||
def _validate_axis_style_args(self, arg, arg_name, index, columns, | ||
axis, method_name): | ||
if axis is not None: | ||
# Using "axis" style, along with a positional arg | ||
# Both index and columns should be None then | ||
axis = self._get_axis_name(axis) | ||
if index is not None or columns is not None: | ||
msg = ( | ||
"Can't specify both 'axis' and 'index' or 'columns'. " | ||
"Specify either\n" | ||
"\t.{method_name}.rename({arg_name}, axis=axis), or\n" | ||
"\t.{method_name}.rename(index=index, columns=columns)" | ||
).format(arg_name=arg_name, method_name=method_name) | ||
raise TypeError(msg) | ||
if axis == 'index': | ||
index = arg | ||
elif axis == 'columns': | ||
columns = arg | ||
|
||
elif _all_not_none(arg, index, columns): | ||
msg = ( | ||
"Cannot specify all of '{arg_name}', 'index', and 'columns'. " | ||
"Specify either {arg_name} and 'axis', or 'index' and " | ||
"'columns'." | ||
).format(arg_name=arg_name) | ||
raise TypeError(msg) | ||
|
||
elif _all_not_none(arg, index): | ||
# This is the "ambiguous" case, so emit a warning | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe worth factoring this function out if its common with the drop changes? not sure There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe put in pandas/util/_validators.py with all other arg validation code |
||
msg = ( | ||
"Interpreting call to '.{method_name}(a, b)' as " | ||
"'.{method_name}(index=a, columns=b)'. " | ||
"Use keyword arguments to remove any ambiguity." | ||
).format(method_name=method_name) | ||
warnings.warn(msg, stacklevel=3) | ||
index, columns = arg, index | ||
elif index is None: | ||
# This is for the default axis, like reindex([0, 1]) | ||
index = arg | ||
return index, columns | ||
|
||
@property | ||
def _series(self): | ||
result = {} | ||
|
@@ -2902,7 +2950,11 @@ def align(self, other, join='outer', axis=None, level=None, copy=True, | |
broadcast_axis=broadcast_axis) | ||
|
||
@Appender(_shared_docs['reindex'] % _shared_doc_kwargs) | ||
def reindex(self, index=None, columns=None, **kwargs): | ||
def reindex(self, labels=None, index=None, columns=None, axis=None, | ||
**kwargs): | ||
index, columns = self._validate_axis_style_args(labels, 'labels', | ||
index, columns, | ||
axis, 'reindex') | ||
return super(DataFrame, self).reindex(index=index, columns=columns, | ||
**kwargs) | ||
|
||
|
@@ -2914,8 +2966,84 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, | |
method=method, level=level, copy=copy, | ||
limit=limit, fill_value=fill_value) | ||
|
||
@Appender(_shared_docs['rename'] % _shared_doc_kwargs) | ||
def rename(self, index=None, columns=None, **kwargs): | ||
def rename(self, mapper=None, index=None, columns=None, axis=None, | ||
**kwargs): | ||
"""Alter axes labels. | ||
|
||
Function / dict values must be unique (1-to-1). Labels not contained in | ||
a dict / Series will be left as-is. Extra labels listed don't throw an | ||
error. | ||
|
||
See the :ref:`user guide <basics.rename>` for more. | ||
|
||
Parameters | ||
---------- | ||
mapper, index, columns : dict-like or function, optional | ||
dict-like or functions transformations to apply to | ||
that axis' values. Use either ``mapper`` and ``axis`` to | ||
specify the axis to target with ``mapper``, or ``index`` and | ||
``columns``. | ||
axis : int or str, optional | ||
Axis to target with ``mapper``. Can be either the axis name | ||
('index', 'columns') or number (0, 1). The default is 'index'. | ||
copy : boolean, default True | ||
Also copy underlying data | ||
inplace : boolean, default False | ||
Whether to return a new %(klass)s. If True then value of copy is | ||
ignored. | ||
level : int or level name, default None | ||
In case of a MultiIndex, only rename labels in the specified | ||
level. | ||
|
||
Returns | ||
------- | ||
renamed : DataFrame | ||
|
||
See Also | ||
-------- | ||
pandas.DataFrame.rename_axis | ||
|
||
Examples | ||
-------- | ||
|
||
``DataFrame.rename`` supports two calling conventions | ||
|
||
* ``(index=index_mapper, columns=columns_mapper, ...) | ||
* ``(mapper, axis={'index', 'columns'}, ...) | ||
|
||
We *highly* recommend using keyword arguments to clarify your | ||
intent. | ||
|
||
>>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) | ||
>>> df.rename(index=str, columns={"A": "a", "B": "c"}) | ||
a c | ||
0 1 4 | ||
1 2 5 | ||
2 3 6 | ||
|
||
>>> df.rename(index=str, columns={"A": "a", "C": "c"}) | ||
a B | ||
0 1 4 | ||
1 2 5 | ||
2 3 6 | ||
|
||
Using axis-style parameters | ||
|
||
>>> df.rename(str.lower, axis='columns') | ||
a b | ||
0 1 4 | ||
1 2 5 | ||
2 3 6 | ||
|
||
>>> df.rename({1: 2, 2: 4}, axis='index') | ||
A B | ||
0 1 4 | ||
2 2 5 | ||
4 3 6 | ||
""" | ||
index, columns = self._validate_axis_style_args(mapper, 'mapper', | ||
index, columns, | ||
axis, 'rename') | ||
return super(DataFrame, self).rename(index=index, columns=columns, | ||
**kwargs) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I suppose it would make it a lot harder if the default
axis=0
is used instead of None ?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Right now I raise on
If I change the default axis to 0, then I can't detect those cases. I could go either way here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do you raise on
df.reindex(labels, axis=0)
? That seems perfectly valid?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's valid, just the
axis=0
is redundant. Likewise withcolumns=labels, axis=1
(I currently raise). Happy to adjust that though.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
that are two different things; because you can either specify mapper/axis or index/columns.
df.reindex(labels, axis=0)
is a case of mapper/axis, and is thus perfectly valid (and even more explicit than leaving out axis=0, although it is the default, so I don't think we should raise on this), whiledf.reindex(columns=labels, axis=1)
is mixture of columns and axis, so OK to raise on that.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
df.reindex(index=labels, axis=0)
is a bit the tricky case, as this one is indeed redundant and mixing the two. So in principle I would also raise here like fordf.reindex(columns=labels, axis=1)
.But if it is easier implementation-wise to allow that, I think that is OK (as although it is mixing both idioms, it is consistent in which axis compared to eg
df.reindex(index=labels, axis=1)
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, #17800 (comment) was incorrect.
.reindex(labels, axis=0)
should clearly work!There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I do raise on the "mixing" case, so I think we agree on what should happen? And I think the current implementation does that. I'll ensure there are tests for all this.