From 379e145e2733fcee8ed055dec31f282efba1bd79 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 13 Feb 2015 22:38:12 -0500 Subject: [PATCH 1/2] promote consistency among type typetesting routines now all are is_* --- pandas/core/categorical.py | 4 --- pandas/core/common.py | 21 ++++++++++------ pandas/core/frame.py | 4 +-- pandas/core/groupby.py | 4 +-- pandas/core/index.py | 47 +++++++++++++++++------------------ pandas/core/indexing.py | 50 +++++++++++++++++++------------------- pandas/core/internals.py | 16 ++++++------ pandas/core/nanops.py | 24 +++++++++--------- pandas/core/series.py | 10 ++++---- pandas/tools/merge.py | 2 +- pandas/tseries/index.py | 2 +- pandas/tseries/period.py | 4 +-- pandas/tseries/tdi.py | 2 +- 13 files changed, 97 insertions(+), 93 deletions(-) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index a06ff5b492879..4032a8e552418 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -78,10 +78,6 @@ def f(self, other): return f -def _is_categorical(array): - """ return if we are a categorical possibility """ - return isinstance(array, Categorical) or isinstance(array.dtype, CategoricalDtype) - def _maybe_to_categorical(array): """ coerce to a categorical if a series is given """ if isinstance(array, ABCSeries): diff --git a/pandas/core/common.py b/pandas/core/common.py index 581ed31b9819b..05739a11cc8ab 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -368,7 +368,7 @@ def notnull(obj): return not res return ~res -def _is_null_datelike_scalar(other): +def is_null_datelike_scalar(other): """ test whether the object is a null datelike, e.g. Nat but guard against passing a non-scalar """ if other is pd.NaT or other is None: @@ -2084,7 +2084,7 @@ def _try_timedelta(v): return value -def _is_bool_indexer(key): +def is_bool_indexer(key): if isinstance(key, (ABCSeries, np.ndarray)): if key.dtype == np.object_: key = np.asarray(_values_from_object(key)) @@ -2363,6 +2363,9 @@ def _maybe_make_list(obj): return [obj] return obj +######################## +##### TYPE TESTING ##### +######################## is_bool = lib.is_bool @@ -2431,7 +2434,7 @@ def _get_dtype_type(arr_or_dtype): return arr_or_dtype.dtype.type -def _is_any_int_dtype(arr_or_dtype): +def is_any_int_dtype(arr_or_dtype): tipo = _get_dtype_type(arr_or_dtype) return issubclass(tipo, np.integer) @@ -2442,7 +2445,7 @@ def is_integer_dtype(arr_or_dtype): not issubclass(tipo, (np.datetime64, np.timedelta64))) -def _is_int_or_datetime_dtype(arr_or_dtype): +def is_int_or_datetime_dtype(arr_or_dtype): tipo = _get_dtype_type(arr_or_dtype) return (issubclass(tipo, np.integer) or issubclass(tipo, (np.datetime64, np.timedelta64))) @@ -2467,12 +2470,12 @@ def is_timedelta64_ns_dtype(arr_or_dtype): return tipo == _TD_DTYPE -def _is_datetime_or_timedelta_dtype(arr_or_dtype): +def is_datetime_or_timedelta_dtype(arr_or_dtype): tipo = _get_dtype_type(arr_or_dtype) return issubclass(tipo, (np.datetime64, np.timedelta64)) -needs_i8_conversion = _is_datetime_or_timedelta_dtype +needs_i8_conversion = is_datetime_or_timedelta_dtype def i8_boxer(arr_or_dtype): """ return the scalar boxer for the dtype """ @@ -2493,7 +2496,7 @@ def is_float_dtype(arr_or_dtype): return issubclass(tipo, np.floating) -def _is_floating_dtype(arr_or_dtype): +def is_floating_dtype(arr_or_dtype): tipo = _get_dtype_type(arr_or_dtype) return isinstance(tipo, np.floating) @@ -2502,6 +2505,10 @@ def is_bool_dtype(arr_or_dtype): tipo = _get_dtype_type(arr_or_dtype) return issubclass(tipo, np.bool_) +def is_categorical(array): + """ return if we are a categorical possibility """ + return isinstance(array, ABCCategorical) or isinstance(array.dtype, CategoricalDtype) + def is_categorical_dtype(arr_or_dtype): if hasattr(arr_or_dtype,'dtype'): arr_or_dtype = arr_or_dtype.dtype diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 46f284f22c82e..dbb4e83ed7d53 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1798,7 +1798,7 @@ def _getitem_slice(self, key): def _getitem_array(self, key): # also raises Exception if object array with NA values - if com._is_bool_indexer(key): + if com.is_bool_indexer(key): # warning here just in case -- previously __setitem__ was # reindexing but __getitem__ was not; it seems more reasonable to # go with the __setitem__ behavior since that is more consistent @@ -2115,7 +2115,7 @@ def _setitem_slice(self, key, value): def _setitem_array(self, key, value): # also raises Exception if object array with NA values - if com._is_bool_indexer(key): + if com.is_bool_indexer(key): if len(key) != len(self.index): raise ValueError('Item wrong length %d instead of %d!' % (len(key), len(self.index))) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 9d5fde5600be3..7c5a75d868d84 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -25,7 +25,7 @@ notnull, _DATELIKE_DTYPES, is_numeric_dtype, is_timedelta64_dtype, is_datetime64_dtype, is_categorical_dtype, _values_from_object, - _is_datetime_or_timedelta_dtype, is_bool_dtype) + is_datetime_or_timedelta_dtype, is_bool_dtype) from pandas.core.config import option_context import pandas.lib as lib from pandas.lib import Timestamp @@ -1491,7 +1491,7 @@ def aggregate(self, values, how, axis=0): is_numeric = is_numeric_dtype(values.dtype) - if _is_datetime_or_timedelta_dtype(values.dtype): + if is_datetime_or_timedelta_dtype(values.dtype): values = values.view('int64') elif is_bool_dtype(values.dtype): values = _algos.ensure_float64(values) diff --git a/pandas/core/index.py b/pandas/core/index.py index 75a4e0c9647df..c62f18f24b565 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -19,7 +19,8 @@ from pandas.core.common import isnull, array_equivalent import pandas.core.common as com from pandas.core.common import (_values_from_object, is_float, is_integer, - ABCSeries, _ensure_object, _ensure_int64) + ABCSeries, _ensure_object, _ensure_int64, is_bool_indexer, + is_list_like, is_bool_dtype, is_integer_dtype) from pandas.core.config import get_option from pandas.io.common import PerformanceWarning @@ -55,7 +56,7 @@ def wrapper(self, other): # technically we could support bool dtyped Index # for now just return the indexing array directly - if com.is_bool_dtype(result): + if is_bool_dtype(result): return result try: return Index(result) @@ -160,7 +161,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, return Int64Index(data, copy=copy, dtype=dtype, name=name) elif issubclass(data.dtype.type, np.floating): return Float64Index(data, copy=copy, dtype=dtype, name=name) - elif issubclass(data.dtype.type, np.bool) or com.is_bool_dtype(data): + elif issubclass(data.dtype.type, np.bool) or is_bool_dtype(data): subarr = data.astype('object') else: subarr = com._asarray_tuplesafe(data, dtype=object) @@ -510,15 +511,15 @@ def set_names(self, names, level=None, inplace=False): if level is not None and self.nlevels == 1: raise ValueError('Level must be None for non-MultiIndex') - if level is not None and not com.is_list_like(level) and com.is_list_like(names): + if level is not None and not is_list_like(level) and is_list_like(names): raise TypeError("Names must be a string") - if not com.is_list_like(names) and level is None and self.nlevels > 1: + if not is_list_like(names) and level is None and self.nlevels > 1: raise TypeError("Must pass list-like as `names`.") - if not com.is_list_like(names): + if not is_list_like(names): names = [names] - if level is not None and not com.is_list_like(level): + if level is not None and not is_list_like(level): level = [level] if inplace: @@ -768,7 +769,7 @@ def _convert_list_indexer_for_mixed(self, keyarr, typ=None): and we have a mixed index (e.g. number/labels). figure out the indexer. return None if we can't help """ - if (typ is None or typ in ['iloc','ix']) and (com.is_integer_dtype(keyarr) and not self.is_floating()): + if (typ is None or typ in ['iloc','ix']) and (is_integer_dtype(keyarr) and not self.is_floating()): if self.inferred_type != 'integer': keyarr = np.where(keyarr < 0, len(self) + keyarr, keyarr) @@ -929,7 +930,7 @@ def __getitem__(self, key): # pessimization of basic indexing. return promote(getitem(key)) - if com._is_bool_indexer(key): + if is_bool_indexer(key): key = np.asarray(key) key = _values_from_object(key) @@ -2104,7 +2105,7 @@ def get_slice_bound(self, label, side): if isinstance(slc, np.ndarray): # get_loc may return a boolean array or an array of indices, which # is OK as long as they are representable by a slice. - if com.is_bool_dtype(slc): + if is_bool_dtype(slc): slc = lib.maybe_booleans_to_slice(slc.view('u1')) else: slc = lib.maybe_indices_to_slice(slc.astype('i8')) @@ -2882,15 +2883,15 @@ def set_levels(self, levels, level=None, inplace=False, verify_integrity=True): labels=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[u'foo', u'bar']) """ - if level is not None and not com.is_list_like(level): - if not com.is_list_like(levels): + if level is not None and not is_list_like(level): + if not is_list_like(levels): raise TypeError("Levels must be list-like") - if com.is_list_like(levels[0]): + if is_list_like(levels[0]): raise TypeError("Levels must be list-like") level = [level] levels = [levels] - elif level is None or com.is_list_like(level): - if not com.is_list_like(levels) or not com.is_list_like(levels[0]): + elif level is None or is_list_like(level): + if not is_list_like(levels) or not is_list_like(levels[0]): raise TypeError("Levels must be list of lists-like") if inplace: @@ -2980,15 +2981,15 @@ def set_labels(self, labels, level=None, inplace=False, verify_integrity=True): labels=[[1, 0, 1, 0], [0, 0, 1, 1]], names=[u'foo', u'bar']) """ - if level is not None and not com.is_list_like(level): - if not com.is_list_like(labels): + if level is not None and not is_list_like(level): + if not is_list_like(labels): raise TypeError("Labels must be list-like") - if com.is_list_like(labels[0]): + if is_list_like(labels[0]): raise TypeError("Labels must be list-like") level = [level] labels = [labels] - elif level is None or com.is_list_like(level): - if not com.is_list_like(labels) or not com.is_list_like(labels[0]): + elif level is None or is_list_like(level): + if not is_list_like(labels) or not is_list_like(labels[0]): raise TypeError("Labels must be list of lists-like") if inplace: @@ -3642,7 +3643,7 @@ def __getitem__(self, key): return tuple(retval) else: - if com._is_bool_indexer(key): + if is_bool_indexer(key): key = np.asarray(key) sortorder = self.sortorder else: @@ -4404,14 +4405,14 @@ def _convert_indexer(r): ranges = [] for i,k in enumerate(tup): - if com._is_bool_indexer(k): + if is_bool_indexer(k): # a boolean indexer, must be the same length! k = np.asarray(k) if len(k) != len(self): raise ValueError("cannot index with a boolean indexer that is" " not the same length as the index") ranges.append(k) - elif com.is_list_like(k): + elif is_list_like(k): # a collection of labels to include from this level (these are or'd) indexers = [] for x in k: diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 1ce9decd178a0..56446d0566fa7 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -5,10 +5,10 @@ from pandas.compat import range, zip import pandas.compat as compat import pandas.core.common as com -from pandas.core.common import (_is_bool_indexer, is_integer_dtype, +from pandas.core.common import (is_bool_indexer, is_integer_dtype, _asarray_tuplesafe, is_list_like, isnull, ABCSeries, ABCDataFrame, ABCPanel, is_float, - _values_from_object, _infer_fill_value) + _values_from_object, _infer_fill_value, is_integer) import pandas.lib as lib import numpy as np @@ -188,7 +188,7 @@ def _has_valid_positional_setitem_indexer(self, indexer): elif is_list_like(i): # should check the elements? pass - elif com.is_integer(i): + elif is_integer(i): if i >= len(ax): raise IndexError("{0} cannot enlarge its target object" .format(self.name)) @@ -342,7 +342,7 @@ def _setitem_with_indexer(self, indexer, value): value = self._align_series(indexer, value) info_idx = indexer[info_axis] - if com.is_integer(info_idx): + if is_integer(info_idx): info_idx = [info_idx] labels = item_labels[info_idx] @@ -479,7 +479,7 @@ def can_do_equal_len(): # if we are setting on the info axis ONLY # set using those methods to avoid block-splitting # logic here - if len(indexer) > info_axis and com.is_integer(indexer[info_axis]) and all( + if len(indexer) > info_axis and is_integer(indexer[info_axis]) and all( _is_null_slice(idx) for i, idx in enumerate(indexer) if i != info_axis): self.obj[item_labels[indexer[info_axis]]] = value return @@ -728,7 +728,7 @@ def _multi_take_opportunity(self, tup): for indexer, ax in zip(tup, self.obj._data.axes): if isinstance(ax, MultiIndex): return False - elif com._is_bool_indexer(indexer): + elif is_bool_indexer(indexer): return False elif not ax.is_unique: return False @@ -752,7 +752,7 @@ def _multi_take(self, tup): def _convert_for_reindex(self, key, axis=0): labels = self.obj._get_axis(axis) - if com._is_bool_indexer(key): + if is_bool_indexer(key): key = _check_bool_indexer(labels, key) return labels[key] else: @@ -907,7 +907,7 @@ def _getitem_axis(self, key, axis=0): return self._getitem_iterable(key, axis=axis) else: - if com.is_integer(key): + if is_integer(key): if axis == 0 and isinstance(labels, MultiIndex): try: return self._get_label(key, axis=axis) @@ -945,7 +945,7 @@ def _reindex(keys, level=None): return result - if com._is_bool_indexer(key): + if is_bool_indexer(key): key = _check_bool_indexer(labels, key) inds, = key.nonzero() return self.obj.take(inds, axis=axis, convert=False) @@ -1053,7 +1053,7 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): # see if we are positional in nature is_int_index = labels.is_integer() - is_int_positional = com.is_integer(obj) and not is_int_index + is_int_positional = is_integer(obj) and not is_int_index # if we are a label return me try: @@ -1094,7 +1094,7 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): elif _is_nested_tuple(obj, labels): return labels.get_locs(obj) elif _is_list_like(obj): - if com._is_bool_indexer(obj): + if is_bool_indexer(obj): obj = _check_bool_indexer(labels, obj) inds, = obj.nonzero() return inds @@ -1174,7 +1174,7 @@ def _has_valid_type(self, key, axis): if isinstance(key, slice): return True - elif com._is_bool_indexer(key): + elif is_bool_indexer(key): return True elif _is_list_like(key): @@ -1261,7 +1261,7 @@ def _has_valid_type(self, key, axis): (key.stop, self.obj._get_axis_name(axis)) ) - elif com._is_bool_indexer(key): + elif is_bool_indexer(key): return True elif _is_list_like(key): @@ -1308,7 +1308,7 @@ def _getitem_axis(self, key, axis=0): if isinstance(key, slice): self._has_valid_type(key, axis) return self._get_slice_axis(key, axis=axis) - elif com._is_bool_indexer(key): + elif is_bool_indexer(key): return self._getbool_axis(key, axis=axis) elif _is_list_like(key): @@ -1348,7 +1348,7 @@ class _iLocIndexer(_LocationIndexer): _exception = IndexError def _has_valid_type(self, key, axis): - if com._is_bool_indexer(key): + if is_bool_indexer(key): if hasattr(key, 'index') and isinstance(key.index, Index): if key.index.inferred_type == 'integer': raise NotImplementedError( @@ -1361,9 +1361,9 @@ def _has_valid_type(self, key, axis): if isinstance(key, slice): return True - elif com.is_integer(key): + elif is_integer(key): return self._is_valid_integer(key, axis) - elif (_is_list_like(key)): + elif _is_list_like(key): return self._is_valid_list_like(key, axis) return False @@ -1438,7 +1438,7 @@ def _getitem_axis(self, key, axis=0): self._has_valid_type(key, axis) return self._get_slice_axis(key, axis=axis) - elif com._is_bool_indexer(key): + elif is_bool_indexer(key): self._has_valid_type(key, axis) return self._getbool_axis(key, axis=axis) @@ -1456,7 +1456,7 @@ def _getitem_axis(self, key, axis=0): else: key = self._convert_scalar_indexer(key, axis) - if not com.is_integer(key): + if not is_integer(key): raise TypeError("Cannot index by location index with a " "non-integer key") @@ -1526,11 +1526,11 @@ def _convert_key(self, key, is_setter=False): for ax, i in zip(self.obj.axes, key): if ax.is_integer(): - if not com.is_integer(i): + if not is_integer(i): raise ValueError("At based indexing on an integer index can only have integer " "indexers") else: - if com.is_integer(i): + if is_integer(i): raise ValueError("At based indexing on an non-integer index can only have non-integer " "indexers") return key @@ -1546,7 +1546,7 @@ def _has_valid_setitem_indexer(self, indexer): def _convert_key(self, key, is_setter=False): """ require integer args (and convert to label arguments) """ for a, i in zip(self.obj.axes, key): - if not com.is_integer(i): + if not is_integer(i): raise ValueError("iAt based indexing can only have integer " "indexers") return key @@ -1608,7 +1608,7 @@ def _convert_to_index_sliceable(obj, key): def _is_index_slice(obj): def _is_valid_index(x): - return (com.is_integer(x) or com.is_float(x) + return (is_integer(x) or is_float(x) and np.allclose(x, int(x), rtol=_eps, atol=0)) def _crit(v): @@ -1623,7 +1623,7 @@ def _check_bool_indexer(ax, key): # boolean indexing, need to check that the data are aligned, otherwise # disallowed - # this function assumes that com._is_bool_indexer(key) == True + # this function assumes that is_bool_indexer(key) == True result = key if isinstance(key, ABCSeries) and not key.index.equals(ax): @@ -1635,7 +1635,7 @@ def _check_bool_indexer(ax, key): result = result.astype(bool).values else: - # com._is_bool_indexer has already checked for nulls in the case of an + # is_bool_indexer has already checked for nulls in the case of an # object array key, so no check needed here result = np.asarray(result, dtype=bool) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 6cf7fa5888539..6c0b8f5ec0af5 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -11,13 +11,13 @@ from pandas.core.common import (_possibly_downcast_to_dtype, isnull, _NS_DTYPE, _TD_DTYPE, ABCSeries, is_list_like, ABCSparseSeries, _infer_dtype_from_scalar, - _is_null_datelike_scalar, _maybe_promote, + is_null_datelike_scalar, _maybe_promote, is_timedelta64_dtype, is_datetime64_dtype, _possibly_infer_to_datetimelike, array_equivalent, - _maybe_convert_string_to_object) + _maybe_convert_string_to_object, is_categorical) from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import (_maybe_convert_indices, _length_of_indexer) -from pandas.core.categorical import Categorical, _maybe_to_categorical, _is_categorical +from pandas.core.categorical import Categorical, _maybe_to_categorical import pandas.core.common as com from pandas.sparse.array import _maybe_to_sparse, SparseArray import pandas.lib as lib @@ -1324,7 +1324,7 @@ def masker(v): values = masker(values) - if _is_null_datelike_scalar(other): + if is_null_datelike_scalar(other): other = np.nan elif isinstance(other, (np.timedelta64, Timedelta, timedelta)): other = _coerce_scalar_to_timedelta_type(other, unit='s', box=False).item() @@ -1799,7 +1799,7 @@ def _try_coerce_args(self, values, other): we are going to compare vs i8, so coerce to integer values is always ndarra like, other may not be """ values = values.view('i8') - if _is_null_datelike_scalar(other): + if is_null_datelike_scalar(other): other = tslib.iNaT elif isinstance(other, datetime): other = lib.Timestamp(other).asm8.view('i8') @@ -2072,7 +2072,7 @@ def make_block(values, placement, klass=None, ndim=None, klass = DatetimeBlock elif issubclass(vtype, np.complexfloating): klass = ComplexBlock - elif _is_categorical(values): + elif is_categorical(values): klass = CategoricalBlock else: @@ -2947,7 +2947,7 @@ def set(self, item, value, check=False): # can prob also fix the various if tests for sparse/categorical value_is_sparse = isinstance(value, SparseArray) - value_is_cat = _is_categorical(value) + value_is_cat = is_categorical(value) value_is_nonconsolidatable = value_is_sparse or value_is_cat if value_is_sparse: @@ -3594,7 +3594,7 @@ def form_blocks(arrays, names, axes): int_items.append((i, k, v)) elif v.dtype == np.bool_: bool_items.append((i, k, v)) - elif _is_categorical(v): + elif is_categorical(v): cat_items.append((i, k, v)) else: object_items.append((i, k, v)) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 9587d0d4a9043..602850d859d27 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -19,12 +19,12 @@ ensure_float, _ensure_float64, _ensure_int64, _ensure_object, is_float, is_integer, is_complex, - is_float_dtype, _is_floating_dtype, + is_float_dtype, is_floating_dtype, is_complex_dtype, is_integer_dtype, is_bool_dtype, is_object_dtype, is_datetime64_dtype, is_timedelta64_dtype, - _is_datetime_or_timedelta_dtype, - _is_int_or_datetime_dtype, _is_any_int_dtype) + is_datetime_or_timedelta_dtype, + is_int_or_datetime_dtype, is_any_int_dtype) class disallow(object): @@ -105,7 +105,7 @@ def f(values, axis=None, skipna=True, **kwds): def _bn_ok_dtype(dt, name): # Bottleneck chokes on datetime64 if (not is_object_dtype(dt) and - not _is_datetime_or_timedelta_dtype(dt)): + not is_datetime_or_timedelta_dtype(dt)): # bottleneck does not properly upcast during the sum # so can overflow @@ -198,7 +198,7 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, def _isfinite(values): - if _is_datetime_or_timedelta_dtype(values): + if is_datetime_or_timedelta_dtype(values): return isnull(values) if (is_complex_dtype(values) or is_float_dtype(values) or is_integer_dtype(values) or is_bool_dtype(values)): @@ -207,11 +207,11 @@ def _isfinite(values): def _na_ok_dtype(dtype): - return not _is_int_or_datetime_dtype(dtype) + return not is_int_or_datetime_dtype(dtype) def _view_if_needed(values): - if _is_datetime_or_timedelta_dtype(values): + if is_datetime_or_timedelta_dtype(values): return values.view(np.int64) return values @@ -332,7 +332,7 @@ def _get_counts_nanvar(mask, axis, ddof): def _nanvar(values, axis=None, skipna=True, ddof=1): # private nanvar calculator mask = isnull(values) - if not _is_floating_dtype(values): + if not is_floating_dtype(values): values = values.astype('f8') count, d = _get_counts_nanvar(mask, axis, ddof) @@ -367,7 +367,7 @@ def nansem(values, axis=None, skipna=True, ddof=1): var = nanvar(values, axis, skipna, ddof=ddof) mask = isnull(values) - if not _is_floating_dtype(values): + if not is_floating_dtype(values): values = values.astype('f8') count, _ = _get_counts_nanvar(mask, axis, ddof) @@ -461,7 +461,7 @@ def nanargmin(values, axis=None, skipna=True): def nanskew(values, axis=None, skipna=True): mask = isnull(values) - if not _is_floating_dtype(values): + if not is_floating_dtype(values): values = values.astype('f8') count = _get_counts(mask, axis) @@ -496,7 +496,7 @@ def nanskew(values, axis=None, skipna=True): def nankurt(values, axis=None, skipna=True): mask = isnull(values) - if not _is_floating_dtype(values): + if not is_floating_dtype(values): values = values.astype('f8') count = _get_counts(mask, axis) @@ -533,7 +533,7 @@ def nankurt(values, axis=None, skipna=True): @disallow('M8','m8') def nanprod(values, axis=None, skipna=True): mask = isnull(values) - if skipna and not _is_any_int_dtype(values): + if skipna and not is_any_int_dtype(values): values = values.copy() values[mask] = 1 result = values.prod(axis) diff --git a/pandas/core/series.py b/pandas/core/series.py index e3129f1819271..8e859c06c564d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -13,7 +13,7 @@ import numpy as np import numpy.ma as ma -from pandas.core.common import (isnull, notnull, _is_bool_indexer, +from pandas.core.common import (isnull, notnull, is_bool_indexer, _default_index, _maybe_upcast, _asarray_tuplesafe, _infer_dtype_from_scalar, is_list_like, _values_from_object, @@ -531,7 +531,7 @@ def __getitem__(self, key): pass elif key is Ellipsis: return self - elif _is_bool_indexer(key): + elif is_bool_indexer(key): pass else: @@ -547,7 +547,7 @@ def __getitem__(self, key): if com.is_iterator(key): key = list(key) - if _is_bool_indexer(key): + if is_bool_indexer(key): key = _check_bool_indexer(self.index, key) return self._get_with(key) @@ -640,7 +640,7 @@ def setitem(key, value): elif key is Ellipsis: self[:] = value return - elif _is_bool_indexer(key): + elif is_bool_indexer(key): pass elif com.is_timedelta64_dtype(self.dtype): # reassign a null value to iNaT @@ -665,7 +665,7 @@ def setitem(key, value): if 'unorderable' in str(e): # pragma: no cover raise IndexError(key) - if _is_bool_indexer(key): + if is_bool_indexer(key): key = _check_bool_indexer(self.index, key) try: self.where(~key, value, inplace=True) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 27e4845e3faee..454b0f79310e4 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -609,7 +609,7 @@ def _right_outer_join(x, y, max_groups): def _factorize_keys(lk, rk, sort=True): - if com._is_int_or_datetime_dtype(lk) and com._is_int_or_datetime_dtype(rk): + if com.is_int_or_datetime_dtype(lk) and com.is_int_or_datetime_dtype(rk): klass = _hash.Int64Factorizer lk = com._ensure_int64(lk) rk = com._ensure_int64(rk) diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 65414fe39d18c..34cbfe0a3abda 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1348,7 +1348,7 @@ def __getitem__(self, key): val = getitem(key) return Timestamp(val, offset=self.offset, tz=self.tz) else: - if com._is_bool_indexer(key): + if com.is_bool_indexer(key): key = np.asarray(key) if key.all(): key = slice(0,None,None) diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index fbea7a3e1af67..58d2606ec41f1 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -112,7 +112,7 @@ def __init__(self, value=None, freq=None, ordinal=None, converted = other.asfreq(freq) self.ordinal = converted.ordinal - elif com._is_null_datelike_scalar(value) or value in tslib._nat_strings: + elif com.is_null_datelike_scalar(value) or value in tslib._nat_strings: self.ordinal = tslib.iNaT if freq is None: raise ValueError("If value is NaT, freq cannot be None " @@ -1113,7 +1113,7 @@ def __getitem__(self, key): val = getitem(key) return Period(ordinal=val, freq=self.freq) else: - if com._is_bool_indexer(key): + if com.is_bool_indexer(key): key = np.asarray(key) result = getitem(key) diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index db23c42294fd5..cf32d4ccc33e6 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -752,7 +752,7 @@ def __getitem__(self, key): val = getitem(key) return Timedelta(val) else: - if com._is_bool_indexer(key): + if com.is_bool_indexer(key): key = np.asarray(key) if key.all(): key = slice(0,None,None) From 00dbf3de095e3658c63163f0a15d366429cbed81 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Fri, 13 Feb 2015 22:57:15 -0500 Subject: [PATCH 2/2] clean up indexing/index method names to make all module public (rather than _ leading) --- pandas/core/categorical.py | 9 ++- pandas/core/common.py | 6 +- pandas/core/frame.py | 20 +++--- pandas/core/index.py | 34 +++++---- pandas/core/indexing.py | 140 +++++++++++++++++-------------------- pandas/core/internals.py | 12 ++-- pandas/core/panel.py | 10 +-- pandas/core/series.py | 8 +-- pandas/sparse/frame.py | 1 - pandas/tests/test_frame.py | 3 +- 10 files changed, 118 insertions(+), 125 deletions(-) diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index 4032a8e552418..fd9ef5d2e319a 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -10,7 +10,6 @@ from pandas.core.algorithms import factorize from pandas.core.base import PandasObject, PandasDelegate from pandas.core.index import Index, _ensure_index -from pandas.core.indexing import _is_null_slice from pandas.tseries.period import PeriodIndex import pandas.core.common as com from pandas.util.decorators import cache_readonly @@ -18,7 +17,7 @@ from pandas.core.common import (CategoricalDtype, ABCSeries, isnull, notnull, is_categorical_dtype, is_integer_dtype, is_object_dtype, _possibly_infer_to_datetimelike, get_dtype_kinds, - is_list_like, is_sequence, + is_list_like, is_sequence, is_null_slice, _ensure_platform_int, _ensure_object, _ensure_int64, _coerce_indexer_dtype, _values_from_object, take_1d) from pandas.util.terminal import get_terminal_size @@ -78,7 +77,7 @@ def f(self, other): return f -def _maybe_to_categorical(array): +def maybe_to_categorical(array): """ coerce to a categorical if a series is given """ if isinstance(array, ABCSeries): return array.values @@ -1116,7 +1115,7 @@ def _slice(self, slicer): # only allow 1 dimensional slicing, but can # in a 2-d case be passd (slice(None),....) if isinstance(slicer, tuple) and len(slicer) == 2: - if not _is_null_slice(slicer[0]): + if not is_null_slice(slicer[0]): raise AssertionError("invalid slicing for a 1-ndim categorical") slicer = slicer[1] @@ -1263,7 +1262,7 @@ def __setitem__(self, key, value): # only allow 1 dimensional slicing, but can # in a 2-d case be passd (slice(None),....) if len(key) == 2: - if not _is_null_slice(key[0]): + if not is_null_slice(key[0]): raise AssertionError("invalid slicing for a 1-ndim categorical") key = key[1] elif len(key) == 1: diff --git a/pandas/core/common.py b/pandas/core/common.py index 05739a11cc8ab..b48e73ca7c85c 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -2544,9 +2544,13 @@ def is_re_compilable(obj): def is_list_like(arg): - return (hasattr(arg, '__iter__') and + return (hasattr(arg, '__iter__') and not isinstance(arg, compat.string_and_binary_types)) +def is_null_slice(obj): + return (isinstance(obj, slice) and obj.start is None and + obj.stop is None and obj.step is None) + def is_hashable(arg): """Return True if hash(arg) will succeed, False otherwise. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index dbb4e83ed7d53..733de1fc202e5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -30,9 +30,9 @@ is_categorical_dtype) from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import Index, MultiIndex, _ensure_index -from pandas.core.indexing import (_maybe_droplevels, - _convert_to_index_sliceable, - _check_bool_indexer) +from pandas.core.indexing import (maybe_droplevels, + convert_to_index_sliceable, + check_bool_indexer) from pandas.core.internals import (BlockManager, create_block_manager_from_arrays, create_block_manager_from_blocks) @@ -1765,7 +1765,7 @@ def __getitem__(self, key): pass # see if we can slice the rows - indexer = _convert_to_index_sliceable(self, key) + indexer = convert_to_index_sliceable(self, key) if indexer is not None: return self._getitem_slice(indexer) @@ -1809,9 +1809,9 @@ def _getitem_array(self, key): elif len(key) != len(self.index): raise ValueError('Item wrong length %d instead of %d.' % (len(key), len(self.index))) - # _check_bool_indexer will throw exception if Series key cannot + # check_bool_indexer will throw exception if Series key cannot # be reindexed to match DataFrame rows - key = _check_bool_indexer(self.index, key) + key = check_bool_indexer(self.index, key) indexer = key.nonzero()[0] return self.take(indexer, axis=0, convert=False) else: @@ -1822,7 +1822,7 @@ def _getitem_multilevel(self, key): loc = self.columns.get_loc(key) if isinstance(loc, (slice, Series, np.ndarray, Index)): new_columns = self.columns[loc] - result_columns = _maybe_droplevels(new_columns, key) + result_columns = maybe_droplevels(new_columns, key) if self._is_mixed_type: result = self.reindex(columns=new_columns) result.columns = result_columns @@ -2097,7 +2097,7 @@ def _box_col_values(self, values, items): def __setitem__(self, key, value): # see if we can slice the rows - indexer = _convert_to_index_sliceable(self, key) + indexer = convert_to_index_sliceable(self, key) if indexer is not None: return self._setitem_slice(indexer, value) @@ -2119,7 +2119,7 @@ def _setitem_array(self, key, value): if len(key) != len(self.index): raise ValueError('Item wrong length %d instead of %d!' % (len(key), len(self.index))) - key = _check_bool_indexer(self.index, key) + key = check_bool_indexer(self.index, key) indexer = key.nonzero()[0] self._check_setitem_copy() self.ix._setitem_with_indexer(indexer, value) @@ -2246,7 +2246,7 @@ def reindexer(value): if isinstance(self.columns, MultiIndex) and key in self.columns: loc = self.columns.get_loc(key) if isinstance(loc, (slice, Series, np.ndarray, Index)): - cols = _maybe_droplevels(self.columns[loc], key) + cols = maybe_droplevels(self.columns[loc], key) if len(cols) and not cols.equals(value.columns): value = value.reindex_axis(cols, axis=1) # now align rows diff --git a/pandas/core/index.py b/pandas/core/index.py index c62f18f24b565..2444014ac9779 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -20,7 +20,7 @@ import pandas.core.common as com from pandas.core.common import (_values_from_object, is_float, is_integer, ABCSeries, _ensure_object, _ensure_int64, is_bool_indexer, - is_list_like, is_bool_dtype, is_integer_dtype) + is_list_like, is_bool_dtype, is_null_slice, is_integer_dtype) from pandas.core.config import get_option from pandas.io.common import PerformanceWarning @@ -720,7 +720,7 @@ def validate(v): def is_int(v): return v is None or is_integer(v) - is_null_slice = start is None and stop is None + is_null_slicer = start is None and stop is None is_index_slice = is_int(start) and is_int(stop) is_positional = is_index_slice and not self.is_integer() @@ -742,7 +742,7 @@ def is_int(v): if self.inferred_type == 'mixed-integer-float': raise - if is_null_slice: + if is_null_slicer: indexer = key elif is_positional: indexer = key @@ -2615,7 +2615,7 @@ def get_value(self, series, key): if not np.isscalar(key): raise InvalidIndexError - from pandas.core.indexing import _maybe_droplevels + from pandas.core.indexing import maybe_droplevels from pandas.core.series import Series k = _values_from_object(key) @@ -2626,7 +2626,7 @@ def get_value(self, series, key): return new_values new_index = self[loc] - new_index = _maybe_droplevels(new_index, k) + new_index = maybe_droplevels(new_index, k) return Series(new_values, index=new_index, name=series.name) def equals(self, other): @@ -3245,7 +3245,7 @@ def duplicated(self, take_last=False): def get_value(self, series, key): # somewhat broken encapsulation - from pandas.core.indexing import _maybe_droplevels + from pandas.core.indexing import maybe_droplevels from pandas.core.series import Series # Label-based @@ -3257,7 +3257,7 @@ def _try_mi(k): loc = self.get_loc(k) new_values = series.values[loc] new_index = self[loc] - new_index = _maybe_droplevels(new_index, k) + new_index = maybe_droplevels(new_index, k) return Series(new_values, index=new_index, name=series.name) try: @@ -4192,7 +4192,7 @@ def get_loc_level(self, key, level=0, drop_level=True): ------- loc : int or slice object """ - def _maybe_drop_levels(indexer, levels, drop_level): + def maybe_droplevels(indexer, levels, drop_level): if not drop_level: return self[indexer] # kludgearound @@ -4221,7 +4221,7 @@ def _maybe_drop_levels(indexer, levels, drop_level): result = loc if result is None else result & loc - return result, _maybe_drop_levels(result, level, drop_level) + return result, maybe_droplevels(result, level, drop_level) level = self._get_level_number(level) @@ -4234,7 +4234,7 @@ def _maybe_drop_levels(indexer, levels, drop_level): try: if key in self.levels[0]: indexer = self._get_level_indexer(key, level=level) - new_index = _maybe_drop_levels(indexer, [0], drop_level) + new_index = maybe_droplevels(indexer, [0], drop_level) return indexer, new_index except TypeError: pass @@ -4248,8 +4248,8 @@ def partial_selection(key, indexer=None): indexer = self.get_loc(key) ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] - return indexer, _maybe_drop_levels(indexer, ilevels, - drop_level) + return indexer, maybe_droplevels(indexer, ilevels, + drop_level) if len(key) == self.nlevels: @@ -4307,11 +4307,11 @@ def partial_selection(key, indexer=None): indexer = slice(None, None) ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] - return indexer, _maybe_drop_levels(indexer, ilevels, - drop_level) + return indexer, maybe_droplevels(indexer, ilevels, + drop_level) else: indexer = self._get_level_indexer(key, level=level) - return indexer, _maybe_drop_levels(indexer, [level], drop_level) + return indexer, maybe_droplevels(indexer, [level], drop_level) def _get_level_indexer(self, key, level=0): # return a boolean indexer or a slice showing where the key is @@ -4388,8 +4388,6 @@ def get_locs(self, tup): for passing to iloc """ - from pandas.core.indexing import _is_null_slice - # must be lexsorted to at least as many levels if not self.is_lexsorted_for_tuple(tup): raise KeyError('MultiIndex Slicing requires the index to be fully lexsorted' @@ -4427,7 +4425,7 @@ def _convert_indexer(r): else: ranges.append(np.zeros(self.labels[i].shape, dtype=bool)) - elif _is_null_slice(k): + elif is_null_slice(k): # empty slice pass diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 56446d0566fa7..426fce0797ad2 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -7,6 +7,7 @@ import pandas.core.common as com from pandas.core.common import (is_bool_indexer, is_integer_dtype, _asarray_tuplesafe, is_list_like, isnull, + is_null_slice, ABCSeries, ABCDataFrame, ABCPanel, is_float, _values_from_object, _infer_fill_value, is_integer) import pandas.lib as lib @@ -140,7 +141,7 @@ def _should_validate_iterable(self, axis=0): def _is_nested_tuple_indexer(self, tup): if any([ isinstance(ax, MultiIndex) for ax in self.obj.axes ]): - return any([ _is_nested_tuple(tup,ax) for ax in self.obj.axes ]) + return any([ is_nested_tuple(tup,ax) for ax in self.obj.axes ]) return False def _convert_tuple(self, key, is_setter=False): @@ -185,7 +186,7 @@ def _has_valid_positional_setitem_indexer(self, indexer): if isinstance(i, slice): # should check the stop slice? pass - elif is_list_like(i): + elif is_list_like_indexer(i): # should check the elements? pass elif is_integer(i): @@ -214,7 +215,7 @@ def _setitem_with_indexer(self, indexer, value): # reindex the axis to the new value # and set inplace - key, _ = _convert_missing_indexer(idx) + key, _ = convert_missing_indexer(idx) # if this is the items axes, then take the main missing # path first @@ -232,7 +233,7 @@ def _setitem_with_indexer(self, indexer, value): if _i != i ] if any([not l for l in len_non_info_axes]): - if not is_list_like(value): + if not is_list_like_indexer(value): raise ValueError("cannot set a frame with no " "defined index and a scalar") self.obj[key] = value @@ -242,7 +243,7 @@ def _setitem_with_indexer(self, indexer, value): # add a new item with the dtype setup self.obj[key] = _infer_fill_value(value) - new_indexer = _convert_from_missing_indexer_tuple( + new_indexer = convert_from_missing_indexer_tuple( indexer, self.obj.axes) self._setitem_with_indexer(new_indexer, value) return self.obj @@ -252,7 +253,7 @@ def _setitem_with_indexer(self, indexer, value): # just replacing the block manager here # so the object is the same index = self.obj._get_axis(i) - labels = _safe_append_to_index(index, key) + labels = safe_append_to_index(index, key) self.obj._data = self.obj.reindex_axis(labels, i)._data self.obj._maybe_update_cacher(clear=True) self.obj.is_copy=None @@ -265,7 +266,7 @@ def _setitem_with_indexer(self, indexer, value): indexer = tuple(nindexer) else: - indexer, missing = _convert_missing_indexer(indexer) + indexer, missing = convert_missing_indexer(indexer) if missing: @@ -276,7 +277,7 @@ def _setitem_with_indexer(self, indexer, value): if len(index) == 0: new_index = Index([indexer]) else: - new_index = _safe_append_to_index(index, indexer) + new_index = safe_append_to_index(index, indexer) # this preserves dtype of the value new_values = Series([value]).values @@ -307,7 +308,7 @@ def _setitem_with_indexer(self, indexer, value): else: # must have conforming columns - if com.is_list_like(value): + if is_list_like_indexer(value): if len(value) != len(self.obj.columns): raise ValueError( "cannot set a row with mismatched columns" @@ -356,11 +357,11 @@ def _setitem_with_indexer(self, indexer, value): idx = indexer[:info_axis][0] plane_indexer = tuple([idx]) + indexer[info_axis + 1:] - lplane_indexer = _length_of_indexer(plane_indexer[0], index) + lplane_indexer = length_of_indexer(plane_indexer[0], index) # require that we are setting the right number of values that # we are indexing - if is_list_like(value) and np.iterable(value) and lplane_indexer != len(value): + if is_list_like_indexer(value) and np.iterable(value) and lplane_indexer != len(value): if len(obj[idx]) != len(value): raise ValueError( @@ -384,8 +385,8 @@ def _setitem_with_indexer(self, indexer, value): plane_indexer = indexer[:info_axis] + indexer[info_axis + 1:] if info_axis > 0: plane_axis = self.obj.axes[:info_axis][0] - lplane_indexer = _length_of_indexer(plane_indexer[0], - plane_axis) + lplane_indexer = length_of_indexer(plane_indexer[0], + plane_axis) else: lplane_indexer = 0 @@ -397,7 +398,7 @@ def setter(item, v): # as we have a null slice which means essentially reassign to the columns # of a multi-dim object # GH6149 - if isinstance(pi, tuple) and all(_is_null_slice(idx) for idx in pi): + if isinstance(pi, tuple) and all(is_null_slice(idx) for idx in pi): s = v else: # set the item, possibly having a dtype change @@ -427,7 +428,7 @@ def can_do_equal_len(): # we need an interable, with a ndim of at least 1 # eg. don't pass thru np.array(0) - if _is_list_like(value) and getattr(value,'ndim',1) > 0: + if is_list_like_indexer(value) and getattr(value,'ndim',1) > 0: # we have an equal len Frame if isinstance(value, ABCDataFrame) and value.ndim > 1: @@ -474,13 +475,13 @@ def can_do_equal_len(): else: if isinstance(indexer, tuple): - indexer = _maybe_convert_ix(*indexer) + indexer = maybe_convert_ix(*indexer) # if we are setting on the info axis ONLY # set using those methods to avoid block-splitting # logic here if len(indexer) > info_axis and is_integer(indexer[info_axis]) and all( - _is_null_slice(idx) for i, idx in enumerate(indexer) if i != info_axis): + is_null_slice(idx) for i, idx in enumerate(indexer) if i != info_axis): self.obj[item_labels[indexer[info_axis]]] = value return @@ -511,7 +512,7 @@ def _align_series(self, indexer, ser): ravel = lambda i: i.ravel() if isinstance(i, np.ndarray) else i indexer = tuple(map(ravel, indexer)) - aligners = [not _is_null_slice(idx) for idx in indexer] + aligners = [not is_null_slice(idx) for idx in indexer] sum_aligners = sum(aligners) single_aligner = sum_aligners == 1 is_frame = self.obj.ndim == 2 @@ -550,10 +551,10 @@ def _align_series(self, indexer, ser): # multiple aligners (or null slices) if com.is_sequence(idx) or isinstance(idx, slice): - if single_aligner and _is_null_slice(idx): + if single_aligner and is_null_slice(idx): continue new_ix = ax[idx] - if not is_list_like(new_ix): + if not is_list_like_indexer(new_ix): new_ix = Index([new_ix]) else: new_ix = Index(new_ix) @@ -611,7 +612,7 @@ def _align_frame(self, indexer, df): if isinstance(indexer, tuple): - aligners = [not _is_null_slice(idx) for idx in indexer] + aligners = [not is_null_slice(idx) for idx in indexer] sum_aligners = sum(aligners) single_aligner = sum_aligners == 1 @@ -652,7 +653,7 @@ def _align_frame(self, indexer, df): val = df.reindex(idx, columns=cols).values return val - elif ((isinstance(indexer, slice) or com.is_list_like(indexer)) + elif ((isinstance(indexer, slice) or is_list_like_indexer(indexer)) and is_frame): ax = self.obj.index[indexer] if df.index.equals(ax): @@ -707,7 +708,7 @@ def _getitem_tuple(self, tup): if i >= self.obj.ndim: raise IndexingError('Too many indexers') - if _is_null_slice(key): + if is_null_slice(key): continue retval = getattr(retval, self.name)._getitem_axis(key, axis=i) @@ -721,7 +722,7 @@ def _multi_take_opportunity(self, tup): if not isinstance(self.obj, NDFrame): return False - if not all(_is_list_like(x) for x in tup): + if not all(is_list_like_indexer(x) for x in tup): return False # just too complicated @@ -753,7 +754,7 @@ def _convert_for_reindex(self, key, axis=0): labels = self.obj._get_axis(axis) if is_bool_indexer(key): - key = _check_bool_indexer(labels, key) + key = check_bool_indexer(labels, key) return labels[key] else: if isinstance(key, Index): @@ -814,11 +815,11 @@ def _getitem_lowerdim(self, tup): # df.ix[d1:d2, 0] -> columns first (True) # df.ix[0, ['C', 'B', A']] -> rows first (False) for i, key in enumerate(tup): - if _is_label_like(key) or isinstance(key, tuple): + if is_label_like(key) or isinstance(key, tuple): section = self._getitem_axis(key, axis=i) # we have yielded a scalar ? - if not _is_list_like(section): + if not is_list_like_indexer(section): return section elif section.ndim == self.ndim: @@ -865,7 +866,7 @@ def _getitem_nested_tuple(self, tup): axis = 0 for i, key in enumerate(tup): - if _is_null_slice(key): + if is_null_slice(key): axis += 1 continue @@ -899,8 +900,8 @@ def _getitem_axis(self, key, axis=0): labels = self.obj._get_axis(axis) if isinstance(key, slice): return self._get_slice_axis(key, axis=axis) - elif _is_list_like(key) and not (isinstance(key, tuple) and - isinstance(labels, MultiIndex)): + elif is_list_like_indexer(key) and not (isinstance(key, tuple) and + isinstance(labels, MultiIndex)): if hasattr(key, 'ndim') and key.ndim > 1: raise ValueError('Cannot index with multidimensional key') @@ -946,7 +947,7 @@ def _reindex(keys, level=None): return result if is_bool_indexer(key): - key = _check_bool_indexer(labels, key) + key = check_bool_indexer(labels, key) inds, = key.nonzero() return self.obj.take(inds, axis=axis, convert=False) else: @@ -1091,11 +1092,11 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): if isinstance(obj, slice): return self._convert_slice_indexer(obj, axis) - elif _is_nested_tuple(obj, labels): + elif is_nested_tuple(obj, labels): return labels.get_locs(obj) - elif _is_list_like(obj): + elif is_list_like_indexer(obj): if is_bool_indexer(obj): - obj = _check_bool_indexer(labels, obj) + obj = check_bool_indexer(labels, obj) inds, = obj.nonzero() return inds else: @@ -1144,7 +1145,7 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False): return labels.get_loc(obj) except KeyError: # allow a not found key only if we are a setter - if not is_list_like(obj) and is_setter: + if not is_list_like_indexer(obj) and is_setter: return {'key': obj} raise @@ -1156,7 +1157,7 @@ def _tuplify(self, loc): def _get_slice_axis(self, slice_obj, axis=0): obj = self.obj - if not _need_slice(slice_obj): + if not need_slice(slice_obj): return obj indexer = self._convert_slice_indexer(slice_obj, axis) @@ -1177,7 +1178,7 @@ def _has_valid_type(self, key, axis): elif is_bool_indexer(key): return True - elif _is_list_like(key): + elif is_list_like_indexer(key): return True else: @@ -1201,7 +1202,7 @@ def _getitem_axis(self, key, axis=0): def _getbool_axis(self, key, axis=0): labels = self.obj._get_axis(axis) - key = _check_bool_indexer(labels, key) + key = check_bool_indexer(labels, key) inds, = key.nonzero() try: return self.obj.take(inds, axis=axis, convert=False) @@ -1211,7 +1212,7 @@ def _getbool_axis(self, key, axis=0): def _get_slice_axis(self, slice_obj, axis=0): """ this is pretty simple as we just have to deal with labels """ obj = self.obj - if not _need_slice(slice_obj): + if not need_slice(slice_obj): return obj labels = obj._get_axis(axis) @@ -1264,7 +1265,7 @@ def _has_valid_type(self, key, axis): elif is_bool_indexer(key): return True - elif _is_list_like(key): + elif is_list_like_indexer(key): # mi is just a passthru if isinstance(key, tuple) and isinstance(ax, MultiIndex): @@ -1310,7 +1311,7 @@ def _getitem_axis(self, key, axis=0): return self._get_slice_axis(key, axis=axis) elif is_bool_indexer(key): return self._getbool_axis(key, axis=axis) - elif _is_list_like(key): + elif is_list_like_indexer(key): # GH 7349 # possibly convert a list-like into a nested tuple @@ -1329,7 +1330,7 @@ def _getitem_axis(self, key, axis=0): return self._getitem_iterable(key, axis=axis) # nested tuple slicing - if _is_nested_tuple(key, labels): + if is_nested_tuple(key, labels): locs = labels.get_locs(key) indexer = [ slice(None) ] * self.ndim indexer[axis] = locs @@ -1363,7 +1364,7 @@ def _has_valid_type(self, key, axis): return True elif is_integer(key): return self._is_valid_integer(key, axis) - elif _is_list_like(key): + elif is_list_like_indexer(key): return self._is_valid_list_like(key, axis) return False @@ -1405,7 +1406,7 @@ def _getitem_tuple(self, tup): if i >= self.obj.ndim: raise IndexingError('Too many indexers') - if _is_null_slice(key): + if is_null_slice(key): axis += 1 continue @@ -1423,7 +1424,7 @@ def _getitem_tuple(self, tup): def _get_slice_axis(self, slice_obj, axis=0): obj = self.obj - if not _need_slice(slice_obj): + if not need_slice(slice_obj): return obj slice_obj = self._convert_slice_indexer(slice_obj, axis) @@ -1445,7 +1446,7 @@ def _getitem_axis(self, key, axis=0): # a single integer or a list of integers else: - if _is_list_like(key): + if is_list_like_indexer(key): # validate list bounds self._is_valid_list_like(key, axis) @@ -1493,7 +1494,7 @@ def __getitem__(self, key): if not isinstance(key, tuple): # we could have a convertible item here (e.g. Timestamp) - if not _is_list_like(key): + if not is_list_like_indexer(key): key = tuple([key]) else: raise ValueError('Invalid call for scalar access (getting)!') @@ -1555,7 +1556,7 @@ def _convert_key(self, key, is_setter=False): _eps = np.finfo('f4').eps -def _length_of_indexer(indexer, target=None): +def length_of_indexer(indexer, target=None): """return the length of a single non-tuple indexer which could be a slice """ if target is not None and isinstance(indexer, slice): @@ -1578,12 +1579,12 @@ def _length_of_indexer(indexer, target=None): return (stop - start) / step elif isinstance(indexer, (ABCSeries, Index, np.ndarray, list)): return len(indexer) - elif not is_list_like(indexer): + elif not is_list_like_indexer(indexer): return 1 raise AssertionError("cannot find the length of the indexer") -def _convert_to_index_sliceable(obj, key): +def convert_to_index_sliceable(obj, key): """if we are index sliceable, then return my slicer, otherwise return None """ idx = obj.index @@ -1606,7 +1607,7 @@ def _convert_to_index_sliceable(obj, key): return None -def _is_index_slice(obj): +def is_index_slice(obj): def _is_valid_index(x): return (is_integer(x) or is_float(x) and np.allclose(x, int(x), rtol=_eps, atol=0)) @@ -1619,7 +1620,7 @@ def _crit(v): return not both_none and (_crit(obj.start) and _crit(obj.stop)) -def _check_bool_indexer(ax, key): +def check_bool_indexer(ax, key): # boolean indexing, need to check that the data are aligned, otherwise # disallowed @@ -1642,7 +1643,7 @@ def _check_bool_indexer(ax, key): return result -def _convert_missing_indexer(indexer): +def convert_missing_indexer(indexer): """ reverse convert a missing indexer, which is a dict return the scalar indexer and a boolean indicating if we converted """ @@ -1658,7 +1659,7 @@ def _convert_missing_indexer(indexer): return indexer, False -def _convert_from_missing_indexer_tuple(indexer, axes): +def convert_from_missing_indexer_tuple(indexer, axes): """ create a filtered indexer that doesn't have any missing indexers """ def get_indexer(_i, _idx): return (axes[_i].get_loc(_idx['key']) @@ -1666,7 +1667,7 @@ def get_indexer(_i, _idx): return tuple([get_indexer(_i, _idx) for _i, _idx in enumerate(indexer)]) -def _safe_append_to_index(index, key): +def safe_append_to_index(index, key): """ a safe append to an index, if incorrect type, then catch and recreate """ try: @@ -1679,7 +1680,7 @@ def _safe_append_to_index(index, key): "{1}".format(index.__class__.__name__, key)) -def _maybe_convert_indices(indices, n): +def maybe_convert_indices(indices, n): """ if we have negative indicies, translate to postive here if have indicies that are out-of-bounds, raise an IndexError """ @@ -1699,7 +1700,7 @@ def _maybe_convert_indices(indices, n): return indices -def _maybe_convert_ix(*args): +def maybe_convert_ix(*args): """ We likely want to take the cross-product """ @@ -1715,7 +1716,7 @@ def _maybe_convert_ix(*args): return args -def _is_nested_tuple(tup, labels): +def is_nested_tuple(tup, labels): # check for a compatiable nested tuple and multiindexes among the axes if not isinstance(tup, tuple): return False @@ -1728,31 +1729,22 @@ def _is_nested_tuple(tup, labels): return False +def is_list_like_indexer(key): + # allow a list_like, but exclude NamedTuples which can be indexers + return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple) -def _is_null_slice(obj): - return (isinstance(obj, slice) and obj.start is None and - obj.stop is None and obj.step is None) - - -def _is_label_like(key): +def is_label_like(key): # select a label or row - return not isinstance(key, slice) and not _is_list_like(key) - - -def _is_list_like(obj): - # Consider namedtuples to be not list like as they are useful as indices - return (hasattr(obj, '__iter__') - and not isinstance(obj, compat.string_types) - and not (isinstance(obj, tuple) and type(obj) is not tuple)) + return not isinstance(key, slice) and not is_list_like_indexer(key) -def _need_slice(obj): +def need_slice(obj): return (obj.start is not None or obj.stop is not None or (obj.step is not None and obj.step != 1)) -def _maybe_droplevels(index, key): +def maybe_droplevels(index, key): # drop levels original_index = index if isinstance(key, tuple): diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 6c0b8f5ec0af5..359463b10d3d0 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -16,8 +16,8 @@ _possibly_infer_to_datetimelike, array_equivalent, _maybe_convert_string_to_object, is_categorical) from pandas.core.index import Index, MultiIndex, _ensure_index -from pandas.core.indexing import (_maybe_convert_indices, _length_of_indexer) -from pandas.core.categorical import Categorical, _maybe_to_categorical +from pandas.core.indexing import maybe_convert_indices, length_of_indexer +from pandas.core.categorical import Categorical, maybe_to_categorical import pandas.core.common as com from pandas.sparse.array import _maybe_to_sparse, SparseArray import pandas.lib as lib @@ -560,7 +560,7 @@ def setitem(self, indexer, value): elif isinstance(indexer, slice): if is_list_like(value) and l: - if len(value) != _length_of_indexer(indexer, values): + if len(value) != length_of_indexer(indexer, values): raise ValueError("cannot set using a slice indexer with a " "different length than the value") @@ -1638,7 +1638,7 @@ def __init__(self, values, placement, fastpath=False, **kwargs): # coerce to categorical if we can - super(CategoricalBlock, self).__init__(_maybe_to_categorical(values), + super(CategoricalBlock, self).__init__(maybe_to_categorical(values), fastpath=True, placement=placement, **kwargs) @@ -3262,7 +3262,7 @@ def take(self, indexer, axis=1, verify=True, convert=True): n = self.shape[axis] if convert: - indexer = _maybe_convert_indices(indexer, n) + indexer = maybe_convert_indices(indexer, n) if verify: if ((indexer == -1) | (indexer >= n)).any(): @@ -4449,5 +4449,5 @@ def _preprocess_slice_or_indexer(slice_or_indexer, length, allow_fill): else: indexer = np.asanyarray(slice_or_indexer, dtype=np.int64) if not allow_fill: - indexer = _maybe_convert_indices(indexer, length) + indexer = maybe_convert_indices(indexer, length) return 'fancy', indexer, len(indexer) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index df3e6c0195be3..2b2d28d62f758 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -10,11 +10,11 @@ import warnings import numpy as np from pandas.core.common import (PandasError, _try_sort, _default_index, - _infer_dtype_from_scalar, notnull) + _infer_dtype_from_scalar, notnull, is_list_like) from pandas.core.categorical import Categorical from pandas.core.index import (Index, MultiIndex, _ensure_index, _get_combined_index) -from pandas.core.indexing import _maybe_droplevels, _is_list_like +from pandas.core.indexing import maybe_droplevels from pandas.core.internals import (BlockManager, create_block_manager_from_arrays, create_block_manager_from_blocks) @@ -253,7 +253,7 @@ def from_dict(cls, data, intersect=False, orient='items', dtype=None): def __getitem__(self, key): if isinstance(self._info_axis, MultiIndex): return self._getitem_multilevel(key) - if not (_is_list_like(key) or isinstance(key, slice)): + if not (is_list_like(key) or isinstance(key, slice)): return super(Panel, self).__getitem__(key) return self.ix[key] @@ -262,7 +262,7 @@ def _getitem_multilevel(self, key): loc = info.get_loc(key) if isinstance(loc, (slice, np.ndarray)): new_index = info[loc] - result_index = _maybe_droplevels(new_index, key) + result_index = maybe_droplevels(new_index, key) slices = [loc] + [slice(None) for x in range( self._AXIS_LEN - 1)] new_values = self.values[slices] @@ -806,7 +806,7 @@ def _ixs(self, i, axis=0): # xs cannot handle a non-scalar key, so just reindex here # if we have a multi-index and a single tuple, then its a reduction (GH 7516) if not (isinstance(ax, MultiIndex) and isinstance(key, tuple)): - if _is_list_like(key): + if is_list_like(key): indexer = {self._get_axis_name(axis): key} return self.reindex(**indexer) diff --git a/pandas/core/series.py b/pandas/core/series.py index 8e859c06c564d..901faef484377 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -24,7 +24,7 @@ _maybe_box_datetimelike, ABCDataFrame) from pandas.core.index import (Index, MultiIndex, InvalidIndexError, _ensure_index) -from pandas.core.indexing import _check_bool_indexer, _maybe_convert_indices +from pandas.core.indexing import check_bool_indexer, maybe_convert_indices from pandas.core import generic, base from pandas.core.internals import SingleBlockManager from pandas.core.categorical import Categorical, CategoricalAccessor @@ -548,7 +548,7 @@ def __getitem__(self, key): key = list(key) if is_bool_indexer(key): - key = _check_bool_indexer(self.index, key) + key = check_bool_indexer(self.index, key) return self._get_with(key) @@ -666,7 +666,7 @@ def setitem(key, value): raise IndexError(key) if is_bool_indexer(key): - key = _check_bool_indexer(self.index, key) + key = check_bool_indexer(self.index, key) try: self.where(~key, value, inplace=True) return @@ -2183,7 +2183,7 @@ def take(self, indices, axis=0, convert=True, is_copy=False): """ # check/convert indicies here if convert: - indices = _maybe_convert_indices( + indices = maybe_convert_indices( indices, len(self._get_axis(axis))) indices = com._ensure_platform_int(indices) diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index bd34c7e5f02b2..821720f4035a8 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -13,7 +13,6 @@ from pandas.core.common import (isnull, notnull, _pickle_array, _unpickle_array, _try_sort) from pandas.core.index import Index, MultiIndex, _ensure_index -from pandas.core.indexing import _maybe_convert_indices from pandas.core.series import Series from pandas.core.frame import (DataFrame, extract_index, _prep_ndarray, _default_index) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 563e9d4dae57c..82e5d68187b1e 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -13300,7 +13300,8 @@ def test_index_namedtuple(self): index = Index([idx1, idx2], name="composite_index", tupleize_cols=False) df = DataFrame([(1, 2), (3, 4)], index=index, columns=["A", "B"]) - self.assertEqual(df.ix[IndexType("foo", "bar")]["A"], 1) + result = df.ix[IndexType("foo", "bar")]["A"] + self.assertEqual(result, 1) def test_empty_nonzero(self): df = DataFrame([1, 2, 3])