Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add allow_sets-kwarg to is_list_like #23065

Merged
merged 21 commits into from
Oct 18, 2018
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,7 @@ Other API Changes
- :class:`Index` subtraction will attempt to operate element-wise instead of raising ``TypeError`` (:issue:`19369`)
- :class:`pandas.io.formats.style.Styler` supports a ``number-format`` property when using :meth:`~pandas.io.formats.style.Styler.to_excel` (:issue:`22015`)
- :meth:`DataFrame.corr` and :meth:`Series.corr` now raise a ``ValueError`` along with a helpful error message instead of a ``KeyError`` when supplied with an invalid method (:issue:`22298`)
- :meth:`dtypes.common.is_list_like` has gained a ``strict``-kwarg, which is ``False`` by default. If set to ``True``, sets are not considered list-like. (:issue:`22397`)
- :meth:`shift` will now always return a copy, instead of the previous behaviour of returning self when shifting by 0 (:issue:`22397`)

.. _whatsnew_0240.deprecations:
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,11 +396,11 @@ def isin(comps, values):
boolean array same length as comps
"""

if not is_list_like(comps):
if not is_list_like(comps, strict=False):
raise TypeError("only list-like objects are allowed to be passed"
" to isin(), you passed a [{comps_type}]"
.format(comps_type=type(comps).__name__))
if not is_list_like(values):
if not is_list_like(values, strict=False):
raise TypeError("only list-like objects are allowed to be passed"
" to isin(), you passed a [{values_type}]"
.format(values_type=type(values).__name__))
Expand Down Expand Up @@ -1178,7 +1178,7 @@ class SelectNFrame(SelectN):

def __init__(self, obj, n, keep, columns):
super(SelectNFrame, self).__init__(obj, n, keep)
if not is_list_like(columns):
if not is_list_like(columns, strict=False):
columns = [columns]
columns = list(columns)
self.columns = columns
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def get_result(self):
""" compute the results """

# dispatch to agg
if is_list_like(self.f) or is_dict_like(self.f):
if is_list_like(self.f, strict=False) or is_dict_like(self.f):
return self.obj.aggregate(self.f, axis=self.axis,
*self.args, **self.kwds)

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,8 @@ def _create_method(cls, op, coerce_to_dtype=True):

def _binop(self, other):
def convert_values(param):
if isinstance(param, ExtensionArray) or is_list_like(param):
if (isinstance(param, ExtensionArray)
or is_list_like(param, strict=False)):
ovalues = param
else: # Assume its an object
ovalues = [param] * len(self)
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1017,7 +1017,7 @@ def add_categories(self, new_categories, inplace=False):
set_categories
"""
inplace = validate_bool_kwarg(inplace, 'inplace')
if not is_list_like(new_categories):
if not is_list_like(new_categories, strict=False):
new_categories = [new_categories]
already_included = set(new_categories) & set(self.dtype.categories)
if len(already_included) != 0:
Expand Down Expand Up @@ -1065,7 +1065,7 @@ def remove_categories(self, removals, inplace=False):
set_categories
"""
inplace = validate_bool_kwarg(inplace, 'inplace')
if not is_list_like(removals):
if not is_list_like(removals, strict=False):
removals = [removals]

removal_set = set(list(removals))
Expand Down Expand Up @@ -1981,7 +1981,7 @@ def __setitem__(self, key, value):
raise ValueError("Cannot set a Categorical with another, "
"without identical categories")

rvalue = value if is_list_like(value) else [value]
rvalue = value if is_list_like(value, strict=False) else [value]

from pandas import Index
to_add = Index(rvalue).difference(self.categories)
Expand Down Expand Up @@ -2350,7 +2350,7 @@ def isin(self, values):
array([ True, False, True, False, True, False])
"""
from pandas.core.series import _sanitize_array
if not is_list_like(values):
if not is_list_like(values, strict=False):
raise TypeError("only list-like objects are allowed to be passed"
" to isin(), you passed a [{values_type}]"
.format(values_type=type(values).__name__))
Expand Down Expand Up @@ -2523,7 +2523,7 @@ def _factorize_from_iterable(values):
"""
from pandas.core.indexes.category import CategoricalIndex

if not is_list_like(values):
if not is_list_like(values, strict=False):
raise TypeError("Input must be list-like")

if is_categorical(values):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,7 +751,7 @@ def _evaluate_compare(self, other, op):

if not isinstance(other, type(self)):
# coerce to a similar object
if not is_list_like(other):
if not is_list_like(other, strict=False):
# scalar
other = [other]
elif lib.is_scalar(lib.item_from_zerodim(other)):
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ def cmp_method(self, other):
mask = None
if isinstance(other, IntegerArray):
other, mask = other._data, other._mask
elif is_list_like(other):
elif is_list_like(other, strict=False):
other = np.asarray(other)
if other.ndim > 0 and len(self) != len(other):
raise ValueError('Lengths must match to compare')
Expand Down Expand Up @@ -568,7 +568,7 @@ def integer_arithmetic_method(self, other):
elif getattr(other, 'ndim', 0) > 1:
raise NotImplementedError(
"can only perform ops with 1-d structures")
elif is_list_like(other):
elif is_list_like(other, strict=False):
other = np.asarray(other)
if not other.ndim:
other = other.item()
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def wrapper(self, other):
if isna(other):
result.fill(nat_result)

elif not is_list_like(other):
elif not is_list_like(other, strict=False):
raise TypeError(msg.format(cls=type(self).__name__,
typ=type(other).__name__))
else:
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,8 @@ def is_any_frame():
name=getattr(self, 'name', None))

return result, True
elif is_list_like(arg) and arg not in compat.string_types:
elif (is_list_like(arg, strict=False)
and arg not in compat.string_types):
# we require a list, but not an 'str'
return self._aggregate_multiple_funcs(arg,
_level=_level,
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/computation/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ def _in(x, y):
try:
return x.isin(y)
except AttributeError:
if is_list_like(x):
if is_list_like(x, strict=False):
try:
return y.isin(x)
except AttributeError:
Expand All @@ -249,7 +249,7 @@ def _not_in(x, y):
try:
return ~x.isin(y)
except AttributeError:
if is_list_like(x):
if is_list_like(x, strict=False):
try:
return ~y.isin(x)
except AttributeError:
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/computation/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def pr(left, right):

def conform(self, rhs):
""" inplace conform rhs """
if not is_list_like(rhs):
if not is_list_like(rhs, strict=False):
rhs = [rhs]
if isinstance(rhs, np.ndarray):
rhs = rhs.ravel()
Expand Down Expand Up @@ -472,7 +472,8 @@ def _validate_where(w):
TypeError : An invalid data type was passed in for w (e.g. dict).
"""

if not (isinstance(w, (Expr, string_types)) or is_list_like(w)):
if not (isinstance(w, (Expr, string_types))
or is_list_like(w, strict=False)):
raise TypeError("where must be passed as a string, Expr, "
"or list-like of Exprs")

Expand Down
6 changes: 3 additions & 3 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,7 @@ def infer_dtype_from_array(arr, pandas_dtype=False):
if isinstance(arr, np.ndarray):
return arr.dtype, arr

if not is_list_like(arr):
if not is_list_like(arr, strict=False):
arr = [arr]

if pandas_dtype and is_extension_type(arr):
Expand Down Expand Up @@ -518,7 +518,7 @@ def maybe_infer_dtype_type(element):
tipo = None
if hasattr(element, 'dtype'):
tipo = element.dtype
elif is_list_like(element):
elif is_list_like(element, strict=False):
element = np.asarray(element)
tipo = element.dtype
return tipo
Expand Down Expand Up @@ -914,7 +914,7 @@ def maybe_infer_to_datetimelike(value, convert_dates=False):

v = value

if not is_list_like(v):
if not is_list_like(v, strict=False):
v = [v]
v = np.array(v, copy=False)

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,8 +327,8 @@ def is_offsetlike(arr_or_obj):
"""
if isinstance(arr_or_obj, ABCDateOffset):
return True
elif (is_list_like(arr_or_obj) and len(arr_or_obj) and
is_object_dtype(arr_or_obj)):
elif (is_list_like(arr_or_obj, strict=False) and len(arr_or_obj)
and is_object_dtype(arr_or_obj)):
return all(isinstance(x, ABCDateOffset) for x in arr_or_obj)
return False

Expand Down
32 changes: 22 additions & 10 deletions pandas/core/dtypes/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from pandas.compat import (PY2, string_types, text_type,
string_and_binary_types, re_type)
from pandas._libs import lib
import warnings

is_bool = lib.is_bool

Expand Down Expand Up @@ -247,7 +248,7 @@ def is_re_compilable(obj):
return True


def is_list_like(obj):
def is_list_like(obj, strict=None):
"""
Check if the object is list-like.

Expand All @@ -259,6 +260,8 @@ def is_list_like(obj):
Parameters
----------
obj : The object to check.
strict : boolean, default None
Whether `set` should be counted as list-like

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a versionadded tag

Returns
-------
Expand All @@ -282,12 +285,20 @@ def is_list_like(obj):
>>> is_list_like(np.array(2)))
False
"""

return (isinstance(obj, compat.Iterable) and
# we do not count strings/unicode/bytes as list-like
not isinstance(obj, string_and_binary_types) and
# exclude zero-dimensional numpy arrays, effectively scalars
not (isinstance(obj, np.ndarray) and obj.ndim == 0))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aside from adding the kwarg everywhere, this is the only substantial change of this PR.

if strict is None and isinstance(obj, set):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the isinstance checks should be against something like collections.abc.Set to catch more exotic set variations:

In [2]: x = list('aabbc')

In [3]: s1 = set(x)

In [4]: s2 = frozenset(x)

In [5]: isinstance(s2, set)
Out[5]: False

In [6]: isinstance(s2, collections.abc.Set)
Out[6]: True

In [7]: isinstance(s1, collections.abc.Set)
Out[7]: True

# only raise warning if necessary
warnings.warn('is_list_like will in the future return False for sets. '
Copy link
Member

@jschendel jschendel Oct 9, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a consensus that we want this to be the default future behavior instead of a non-default option? I'm not convinced. Would be interesting to see how many of the existing uses of is_list_like are valid vs. invalid for sets.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this whole idea was just that - just an idea
i don’t think we actually want to add a kwarg at all

the idea was to change it within pandas only (if this is useful)
and not actually change the external api at all

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since list is ordered in python, the name is_list_like suggests the same property. If that's not the case it's good ("explicit is better than implicit") to have to specify it.

Don't see why that shouldn't apply to external API as well (with deprecation cycle etc.)

'To keep the previous behavior, pass `strict=False`. To '
'adopt the future behavior and silence this warning, '
'pass `strict=True`', FutureWarning)
strict = False if strict is None else strict

list_like = (isinstance(obj, compat.Iterable)
# we do not count strings/unicode/bytes as set-like
and not isinstance(obj, string_and_binary_types)
# exclude zero-dimensional numpy arrays, effectively scalars
and not (isinstance(obj, np.ndarray) and obj.ndim == 0))
return list_like and (not strict or not isinstance(obj, set))


def is_array_like(obj):
Expand Down Expand Up @@ -320,7 +331,7 @@ def is_array_like(obj):
False
"""

return is_list_like(obj) and hasattr(obj, "dtype")
return is_list_like(obj, strict=False) and hasattr(obj, "dtype")


def is_nested_list_like(obj):
Expand Down Expand Up @@ -363,8 +374,9 @@ def is_nested_list_like(obj):
--------
is_list_like
"""
return (is_list_like(obj) and hasattr(obj, '__len__') and
len(obj) > 0 and all(is_list_like(item) for item in obj))
return (is_list_like(obj, strict=False) and hasattr(obj, '__len__')
and len(obj) > 0 and all(is_list_like(item, strict=False)
for item in obj))


def is_dict_like(obj):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,7 @@ def _infer_fill_value(val):
element to provide proper block construction
"""

if not is_list_like(val):
if not is_list_like(val, strict=False):
val = [val]
val = np.array(val, copy=False)
if is_datetimelike(val):
Expand Down
19 changes: 11 additions & 8 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,8 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
if not isinstance(data, compat.Sequence):
data = list(data)
if len(data) > 0:
if is_list_like(data[0]) and getattr(data[0], 'ndim', 1) == 1:
if (is_list_like(data[0], strict=False)
and getattr(data[0], 'ndim', 1) == 1):
if is_named_tuple(data[0]) and columns is None:
columns = data[0]._fields
arrays, columns = _to_arrays(data, columns, dtype=dtype)
Expand Down Expand Up @@ -2790,7 +2791,8 @@ def __getitem__(self, key):

# We are left with two options: a single key, and a collection of keys,
# We interpret tuples as collections only for non-MultiIndex
is_single_key = isinstance(key, tuple) or not is_list_like(key)
is_single_key = (isinstance(key, tuple)
or not is_list_like(key, strict=False))

if is_single_key:
if self.columns.nlevels > 1:
Expand Down Expand Up @@ -3152,9 +3154,9 @@ def select_dtypes(self, include=None, exclude=None):
5 False 2.0
"""

if not is_list_like(include):
if not is_list_like(include, strict=False):
include = (include,) if include is not None else ()
if not is_list_like(exclude):
if not is_list_like(exclude, strict=False):
exclude = (exclude,) if exclude is not None else ()

selection = tuple(map(frozenset, (include, exclude)))
Expand Down Expand Up @@ -3279,7 +3281,7 @@ def _ensure_valid_index(self, value):
passed value
"""
# GH5632, make sure that we are a Series convertible
if not len(self.index) and is_list_like(value):
if not len(self.index) and is_list_like(value, strict=False):
try:
value = Series(value)
except (ValueError, NotImplementedError, TypeError):
Expand Down Expand Up @@ -7661,7 +7663,7 @@ def isin(self, values):
"a duplicate axis.")
return self.eq(values.reindex_like(self))
else:
if not is_list_like(values):
if not is_list_like(values, strict=False):
raise TypeError("only list-like or dict-like objects are "
"allowed to be passed to DataFrame.isin(), "
"you passed a "
Expand Down Expand Up @@ -7731,7 +7733,7 @@ def extract_index(data):
elif isinstance(v, dict):
have_dicts = True
indexes.append(list(v.keys()))
elif is_list_like(v) and getattr(v, 'ndim', 1) == 1:
elif is_list_like(v, strict=False) and getattr(v, 'ndim', 1) == 1:
have_raw_arrays = True
raw_lengths.append(len(v))

Expand Down Expand Up @@ -7774,7 +7776,8 @@ def convert(v):
# this is equiv of np.asarray, but does object conversion
# and platform dtype preservation
try:
if is_list_like(values[0]) or hasattr(values[0], 'len'):
if (is_list_like(values[0], strict=False)
or hasattr(values[0], 'len')):
values = np.array([convert(v) for v in values])
elif isinstance(values[0], np.ndarray) and values[0].ndim == 0:
# GH#21861
Expand Down
Loading