-
-
Notifications
You must be signed in to change notification settings - Fork 18.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ENH: Add Index.fillna #11343
ENH: Add Index.fillna #11343
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1367,6 +1367,31 @@ with duplicates dropped. | |
idx1.sym_diff(idx2) | ||
idx1 ^ idx2 | ||
|
||
Missing values | ||
~~~~~~~~~~~~~~ | ||
|
||
.. _indexing.missing: | ||
|
||
.. versionadded:: 0.17.1 | ||
|
||
.. important:: | ||
|
||
Even though ``Index`` can hold missing values (``NaN``), it should be avoided | ||
if you do not want any unexpected results. For example, some operations | ||
exclude missing values implicitly. | ||
|
||
``Index.fillna`` fills missing values with specified scalar value. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you make this a link to the API docs with |
||
|
||
.. ipython:: python | ||
|
||
idx1 = pd.Index([1, np.nan, 3, 4]) | ||
idx1 | ||
idx1.fillna(2) | ||
|
||
idx2 = pd.DatetimeIndex([pd.Timestamp('2011-01-01'), pd.NaT, pd.Timestamp('2011-01-03')]) | ||
idx2 | ||
idx2.fillna(pd.Timestamp('2011-01-02')) | ||
|
||
Set / Reset Index | ||
----------------- | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,7 +15,8 @@ | |
from pandas.compat import range, zip, lrange, lzip, u, map | ||
from pandas import compat | ||
from pandas.core import algorithms | ||
from pandas.core.base import PandasObject, FrozenList, FrozenNDArray, IndexOpsMixin, _shared_docs, PandasDelegate | ||
from pandas.core.base import PandasObject, FrozenList, FrozenNDArray, IndexOpsMixin, PandasDelegate | ||
import pandas.core.base as base | ||
from pandas.util.decorators import (Appender, Substitution, cache_readonly, | ||
deprecate, deprecate_kwarg) | ||
import pandas.core.common as com | ||
|
@@ -29,8 +30,6 @@ | |
from pandas.io.common import PerformanceWarning | ||
|
||
|
||
|
||
|
||
# simplify | ||
default_pprint = lambda x, max_seq_items=None: com.pprint_thing(x, | ||
escape_chars=('\t', '\r', '\n'), | ||
|
@@ -45,6 +44,7 @@ | |
|
||
_index_doc_kwargs = dict(klass='Index', inplace='', | ||
duplicated='np.array') | ||
_index_shared_docs = dict() | ||
|
||
|
||
def _try_get_item(x): | ||
|
@@ -108,6 +108,7 @@ class Index(IndexOpsMixin, PandasObject): | |
_allow_datetime_index_ops = False | ||
_allow_period_index_ops = False | ||
_is_numeric_dtype = False | ||
_can_hold_na = True | ||
|
||
_engine_type = _index.ObjectEngine | ||
|
||
|
@@ -1236,6 +1237,43 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None): | |
taken = self.values.take(indices) | ||
return self._shallow_copy(taken) | ||
|
||
@cache_readonly | ||
def _isnan(self): | ||
""" return if each value is nan""" | ||
if self._can_hold_na: | ||
return isnull(self) | ||
else: | ||
# shouldn't reach to this condition by checking hasnans beforehand | ||
values = np.empty(len(self), dtype=np.bool_) | ||
values.fill(False) | ||
return values | ||
|
||
@cache_readonly | ||
def _nan_idxs(self): | ||
if self._can_hold_na: | ||
w, = self._isnan.nonzero() | ||
return w | ||
else: | ||
return np.array([], dtype=np.int64) | ||
|
||
@cache_readonly | ||
def hasnans(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is override There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I haven't notice |
||
""" return if I have any nans; enables various perf speedups """ | ||
if self._can_hold_na: | ||
return self._isnan.any() | ||
else: | ||
return False | ||
|
||
def _convert_for_op(self, value): | ||
""" Convert value to be insertable to ndarray """ | ||
return value | ||
|
||
def _assert_can_do_op(self, value): | ||
""" Check value is valid for scalar op """ | ||
if not lib.isscalar(value): | ||
msg = "'value' must be a scalar, passed: {0}" | ||
raise TypeError(msg.format(type(value).__name__)) | ||
|
||
def putmask(self, mask, value): | ||
""" | ||
return a new Index of the values set with the mask | ||
|
@@ -1245,8 +1283,12 @@ def putmask(self, mask, value): | |
numpy.ndarray.putmask | ||
""" | ||
values = self.values.copy() | ||
np.putmask(values, mask, value) | ||
return self._shallow_copy(values) | ||
try: | ||
np.putmask(values, mask, self._convert_for_op(value)) | ||
return self._shallow_copy(values) | ||
except (ValueError, TypeError): | ||
# coerces to object | ||
return self.astype(object).putmask(mask, value) | ||
|
||
def format(self, name=False, formatter=None, **kwargs): | ||
""" | ||
|
@@ -2766,15 +2808,45 @@ def drop(self, labels, errors='raise'): | |
return self.delete(indexer) | ||
|
||
@deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'}) | ||
@Appender(_shared_docs['drop_duplicates'] % _index_doc_kwargs) | ||
@Appender(base._shared_docs['drop_duplicates'] % _index_doc_kwargs) | ||
def drop_duplicates(self, keep='first'): | ||
return super(Index, self).drop_duplicates(keep=keep) | ||
|
||
@deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'}) | ||
@Appender(_shared_docs['duplicated'] % _index_doc_kwargs) | ||
@Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) | ||
def duplicated(self, keep='first'): | ||
return super(Index, self).duplicated(keep=keep) | ||
|
||
_index_shared_docs['fillna'] = """ | ||
Fill NA/NaN values with the specified value | ||
|
||
Parameters | ||
---------- | ||
value : scalar | ||
Scalar value to use to fill holes (e.g. 0). | ||
This value cannot be a list-likes. | ||
downcast : dict, default is None | ||
a dict of item->dtype of what to downcast if possible, | ||
or the string 'infer' which will try to downcast to an appropriate | ||
equal type (e.g. float64 to int64 if possible) | ||
|
||
Returns | ||
------- | ||
filled : Index | ||
""" | ||
|
||
@Appender(_index_shared_docs['fillna']) | ||
def fillna(self, value=None, downcast=None): | ||
self._assert_can_do_op(value) | ||
if self.hasnans: | ||
result = self.putmask(self._isnan, value) | ||
if downcast is None: | ||
# no need to care metadata other than name | ||
# because it can't have freq if | ||
return Index(result, name=self.name) | ||
|
||
return self._shallow_copy() | ||
|
||
def _evaluate_with_timedelta_like(self, other, op, opstr): | ||
raise TypeError("can only perform ops with timedelta like values") | ||
|
||
|
@@ -3200,6 +3272,16 @@ def __array__(self, dtype=None): | |
""" the array interface, return my values """ | ||
return np.array(self._data, dtype=dtype) | ||
|
||
@cache_readonly | ||
def _isnan(self): | ||
""" return if each value is nan""" | ||
return self._data.codes == -1 | ||
|
||
@Appender(_index_shared_docs['fillna']) | ||
def fillna(self, value, downcast=None): | ||
self._assert_can_do_op(value) | ||
return CategoricalIndex(self._data.fillna(value), name=self.name) | ||
|
||
def argsort(self, *args, **kwargs): | ||
return self.values.argsort(*args, **kwargs) | ||
|
||
|
@@ -3214,7 +3296,7 @@ def is_unique(self): | |
return not self.duplicated().any() | ||
|
||
@deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'}) | ||
@Appender(_shared_docs['duplicated'] % _index_doc_kwargs) | ||
@Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) | ||
def duplicated(self, keep='first'): | ||
from pandas.hashtable import duplicated_int64 | ||
return duplicated_int64(self.codes.astype('i8'), keep) | ||
|
@@ -3612,6 +3694,8 @@ class Int64Index(NumericIndex): | |
_inner_indexer = _algos.inner_join_indexer_int64 | ||
_outer_indexer = _algos.outer_join_indexer_int64 | ||
|
||
_can_hold_na = False | ||
|
||
_engine_type = _index.Int64Engine | ||
|
||
def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, **kwargs): | ||
|
@@ -3646,11 +3730,6 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, * | |
def inferred_type(self): | ||
return 'integer' | ||
|
||
@cache_readonly | ||
def hasnans(self): | ||
# by definition | ||
return False | ||
|
||
@property | ||
def asi8(self): | ||
# do not cache or you'll create a memory leak | ||
|
@@ -3872,19 +3951,6 @@ def is_all_dates(self): | |
""" | ||
return False | ||
|
||
@cache_readonly | ||
def _nan_idxs(self): | ||
w, = self._isnan.nonzero() | ||
return w | ||
|
||
@cache_readonly | ||
def _isnan(self): | ||
return np.isnan(self.values) | ||
|
||
@cache_readonly | ||
def hasnans(self): | ||
return self._isnan.any() | ||
|
||
@cache_readonly | ||
def is_unique(self): | ||
return super(Float64Index, self).is_unique and self._nan_idxs.size < 2 | ||
|
@@ -4409,7 +4475,7 @@ def is_unique(self): | |
return not self.duplicated().any() | ||
|
||
@deprecate_kwarg('take_last', 'keep', mapping={True: 'last', False: 'first'}) | ||
@Appender(_shared_docs['duplicated'] % _index_doc_kwargs) | ||
@Appender(base._shared_docs['duplicated'] % _index_doc_kwargs) | ||
def duplicated(self, keep='first'): | ||
from pandas.core.groupby import get_group_index | ||
from pandas.hashtable import duplicated_int64 | ||
|
@@ -4419,6 +4485,11 @@ def duplicated(self, keep='first'): | |
|
||
return duplicated_int64(ids, keep) | ||
|
||
@Appender(_index_shared_docs['fillna']) | ||
def fillna(self, value=None, downcast=None): | ||
# isnull is not implemented for MultiIndex | ||
raise NotImplementedError('isnull is not defined for MultiIndex') | ||
|
||
def get_value(self, series, key): | ||
# somewhat broken encapsulation | ||
from pandas.core.indexing import maybe_droplevels | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you put this above the title?