diff --git a/pandas/core/common.py b/pandas/core/common.py index e138668b369fe..90a1ceb3cf0b9 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -217,7 +217,7 @@ def _isnull_new(obj): return _isnull_ndarraylike(obj) elif isinstance(obj, ABCGeneric): return obj._constructor(obj._data.isnull(func=isnull)) - elif isinstance(obj, list) or hasattr(obj, '__array__'): + elif isinstance(obj, list) or is_array_like(obj): return _isnull_ndarraylike(np.asarray(obj)) else: return obj is None @@ -243,7 +243,7 @@ def _isnull_old(obj): return _isnull_ndarraylike_old(obj) elif isinstance(obj, ABCGeneric): return obj._constructor(obj._data.isnull(func=_isnull_old)) - elif isinstance(obj, list) or hasattr(obj, '__array__'): + elif isinstance(obj, list) or is_array_like(obj): return _isnull_ndarraylike_old(np.asarray(obj)) else: return obj is None @@ -2266,7 +2266,7 @@ def _asarray_tuplesafe(values, dtype=None): from pandas.core.index import Index if not (isinstance(values, (list, tuple)) - or hasattr(values, '__array__')): + or is_array_like(values)): values = list(values) elif isinstance(values, Index): return values.values @@ -2489,6 +2489,38 @@ def is_list_like(arg): return (hasattr(arg, '__iter__') and not isinstance(arg, compat.string_and_binary_types)) +def is_array_like(obj): + """ + Check if object provides access to a data buffer via one of the numpy + array apis. + + http://docs.scipy.org/doc/numpy/reference/arrays.classes.html + http://docs.scipy.org/doc/numpy/reference/arrays.interface.html + + Parameters + ---------- + obj : Object + + Note + ---- + Remember that ndarrays and NDFrames are array-like. + """ + # numpy ndarray subclass api + tmp = getattr(obj, '__array__', None) + if callable(tmp): + return True + + # Python side + # __array_interface__ is a dict + tmp = getattr(obj, '__array_interface__', None) + if isinstance(tmp, dict): + return True + + # C-struct access + if hasattr(obj, '__array_struct__'): + return True + + return False def _is_sequence(x): try: @@ -3105,3 +3137,39 @@ def _maybe_match_name(a, b): if a_name == b_name: return a_name return None + +def _unhandled_array_interface(obj): + """ + Checks whether an object: + 1) Implements the array interface + 2) Is not an object type that pandas handles natively + + #2 is a moving target. Essentially any 3rd party module can implement the + NumPy Array Interface and should be treated as array-like. For example, + the rpy2 SexpVector implements `__array_struct__` which we do not + explicitly handle. + + In the future, if we add explicit handling for the SexpVector, this + function would have to account for that. + + Parameters + ---------- + obj : Object + + Usage + ----- + + ``` + if com._unhandled_array_interface(data): + data = np.asarray(data) + ``` + + """ + if isinstance(obj, (np.ndarray)): + return False + + import pandas.core.base as base + if isinstance(obj, (base.PandasObject)): + return False + + return is_array_like(obj) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4654ceee9896b..e909ff3dcd763 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -197,6 +197,10 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, if dtype is not None: dtype = self._validate_dtype(dtype) + # convert unhandled array-like objects + if com._unhandled_array_interface(data): + data = np.asarray(data) + if isinstance(data, DataFrame): data = data._data diff --git a/pandas/core/index.py b/pandas/core/index.py index 23f4cfd442a59..fede864462848 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -166,7 +166,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, if copy: subarr = subarr.copy() - elif hasattr(data, '__array__'): + elif com.is_array_like(data): return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs) elif data is None or np.isscalar(data): diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 95d279add172c..7b7a9b636a8e1 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -145,6 +145,10 @@ def _init_data(self, data, copy, dtype, **kwargs): if dtype is not None: dtype = self._validate_dtype(dtype) + # convert unhandled array-like objects + if com._unhandled_array_interface(data): + data = np.asarray(data) + passed_axes = [kwargs.get(a) for a in self._AXIS_ORDERS] axes = None if isinstance(data, BlockManager): diff --git a/pandas/core/series.py b/pandas/core/series.py index 078bf0def241e..2289433d2aeff 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -190,6 +190,9 @@ def __init__(self, data=None, index=None, dtype=None, name=None, raise TypeError("{0!r} type is unordered" "".format(data.__class__.__name__)) else: + # unhandled array-like objects + if com.is_array_like(data): + data = np.asarray(data) # handle sparse passed here (and force conversion) if isinstance(data, ABCSparseArray): diff --git a/pandas/rpy/tests/test_common.py b/pandas/rpy/tests/test_common.py index a2e6d08d07b58..c87cde58b0479 100644 --- a/pandas/rpy/tests/test_common.py +++ b/pandas/rpy/tests/test_common.py @@ -207,6 +207,31 @@ def test_factor(self): result = com.load_data(name) assert np.equal(result, factors) + def test_pandas_constructor_compat(self): + """ + test that rpy2 SexpVector get handled by Pandas object constructors + """ + types = [pd.Series, pd.DataFrame, pd.Panel] + rnorm = r['rnorm'] + for typ in types: + shape = typ._AXIS_LEN * [10] + N = 10 ** typ._AXIS_LEN + + # create array on the R side + r_cmd = "test_arr = rnorm({N}); dim(test_arr) = c({shape});test_arr" + r_cmd = r_cmd.format(N=N, shape=','.join(map(str, shape))) + test_arr = r(r_cmd) + + # numpy.array handles array interfaces correctly + npy_arr = np.array(test_arr) + assert npy_arr.ndim == typ._AXIS_LEN + assert npy_arr.size == N + + assert isinstance(test_arr, robj.SexpVector) + pobj = typ(test_arr) + tm.assert_almost_equal(pobj.values, np.array(test_arr)) + tm.assert_almost_equal(pobj.values, npy_arr) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], # '--with-coverage', '--cover-package=pandas.core'], diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 5e91adbe1a2fa..59a5f9edda951 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -42,7 +42,7 @@ def test_get_callable_name(): from functools import partial getname = com._get_callable_name - def fn(x): + def fn(x): return x lambda_ = lambda x: x part1 = partial(fn) @@ -890,6 +890,69 @@ def test_2d_datetime64(self): expected[:, [2, 4]] = datetime(2007, 1, 1) tm.assert_almost_equal(result, expected) +class FakeArrArray(object): + def __init__(self, arr): + self.arr = arr + + def __array__(self): + return self.arr.__array__() + +class FakeArrInterface(object): + def __init__(self, arr): + self.arr = arr + + @property + def __array_interface__(self): + return self.arr.__array_interface__ + +class FakeArrStruct(object): + def __init__(self, arr): + self.arr = arr + + @property + def __array_struct__(self): + return self.arr.__array_struct__ + +def test_is_array_like(): + """ + Test interface from: + http://docs.scipy.org/doc/numpy/reference/arrays.interface.html + + Different from ndarray subclass + """ + arr = np.arange(10) + assert com.is_array_like(arr) is True + + # __array__ + arr_array = FakeArrArray(arr) + assert com.is_array_like(arr_array) is True + + # __array_interface__ + arr_interface = FakeArrInterface(arr) + assert com.is_array_like(arr_interface) is True + + # __array_struct__ + arr_struct= FakeArrStruct(arr) + assert com.is_array_like(arr_struct) is True + +def test_unhandled_array_interface(): + """ + """ + # skip the strutures we already explicitly handle + arr = np.arange(10) + series = Series(arr) + frame = tm.makeDataFrame() + assert not com._unhandled_array_interface(series) + assert not com._unhandled_array_interface(frame) + assert not com._unhandled_array_interface(arr) + + # __array_interface__ + arr_interface = FakeArrInterface(arr) + assert com._unhandled_array_interface(arr_interface) is True + + # __array_struct__ + arr_struct= FakeArrStruct(arr) + assert com._unhandled_array_interface(arr_struct) is True if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 0734da1ab09aa..a0a232157e72f 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -227,6 +227,41 @@ def f(dtype): f('float64') f('M8[ns]') + def test_constructor_array_interface(self): + """ + Test that objects implementing NumPy Array Interface get treated + like arrays in constructor + """ + class FakeArrInterface(object): + def __init__(self, arr): + self.arr = arr + + @property + def __array_interface__(self): + return self.arr.__array_interface__ + + class FakeArrStruct(object): + def __init__(self, arr): + self.arr = arr + + @property + def __array_struct__(self): + return self.arr.__array_struct__ + + shape = [10] * self._ndim + arr = np.random.randn(*shape) + fai = FakeArrInterface(arr) + pobj = self._typ(fai) + assert_almost_equal(pobj.values, arr) + assert_almost_equal(pobj.values, np.array(fai)) + + arr = np.random.randn(*shape) + fas = FakeArrStruct(arr) + pobj2 = self._typ(fas) + assert_almost_equal(pobj2.values, arr) + assert_almost_equal(pobj2.values, np.array(fas)) + + def check_metadata(self, x, y=None): for m in x._metadata: v = getattr(x,m,None)