From eaadcbcacbd01067601a0cec42007a4d4c0c803c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 26 Sep 2018 15:24:26 -0500 Subject: [PATCH 001/132] WIP: PeriodArray --- pandas/core/arrays/__init__.py | 2 +- pandas/core/arrays/base.py | 2 +- pandas/core/arrays/datetimelike.py | 12 +- pandas/core/arrays/period.py | 240 ++++++++++++++++-- pandas/core/dtypes/common.py | 8 +- pandas/core/dtypes/dtypes.py | 26 +- pandas/core/indexes/accessors.py | 12 +- pandas/core/indexes/base.py | 11 + pandas/core/indexes/datetimelike.py | 13 +- pandas/core/indexes/period.py | 194 +++++++------- pandas/core/series.py | 2 +- pandas/tests/arrays/test_datetimelike.py | 6 +- pandas/tests/dtypes/test_common.py | 4 +- pandas/tests/extension/conftest.py | 4 +- pandas/tests/extension/test_period.py | 127 +++++++++ .../tests/indexes/period/test_construction.py | 42 ++- pandas/tests/indexes/period/test_formats.py | 18 +- 17 files changed, 529 insertions(+), 194 deletions(-) create mode 100644 pandas/tests/extension/test_period.py diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py index 29f258bf1b29e..dcf018ce6610d 100644 --- a/pandas/core/arrays/__init__.py +++ b/pandas/core/arrays/__init__.py @@ -4,7 +4,7 @@ from .categorical import Categorical # noqa from .datetimes import DatetimeArrayMixin # noqa from .interval import IntervalArray # noqa -from .period import PeriodArrayMixin # noqa +from .period import PeriodArray # noqa from .timedeltas import TimedeltaArrayMixin # noqa from .integer import ( # noqa IntegerArray, integer_array) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 7bf13fb2fecc0..00f2753547ac4 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -118,7 +118,7 @@ def _from_factorized(cls, values, original): Parameters ---------- values : ndarray - An integer ndarray with the factorized values. + An ndarray with the unique factorized values. original : ExtensionArray The original ExtensionArray that factorize was called on. diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 91c119808db52..8d0c7e3bb3612 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -11,7 +11,9 @@ from pandas._libs.tslibs.period import ( Period, DIFFERENT_FREQ_INDEX, IncompatibleFrequency) -from pandas.errors import NullFrequencyError, PerformanceWarning +from pandas.errors import ( + NullFrequencyError, PerformanceWarning, AbstractMethodError +) from pandas import compat from pandas.tseries import frequencies @@ -76,12 +78,10 @@ class AttributesMixin(object): @property def _attributes(self): # Inheriting subclass should implement _attributes as a list of strings - from pandas.errors import AbstractMethodError raise AbstractMethodError(self) @classmethod def _simple_new(cls, values, **kwargs): - from pandas.errors import AbstractMethodError raise AbstractMethodError(cls) def _get_attributes_dict(self): @@ -118,7 +118,7 @@ def _box_func(self): """ box function to get object from internal representation """ - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _box_values(self, values): """ @@ -351,13 +351,13 @@ def _add_datelike(self, other): typ=type(other).__name__)) def _sub_datelike(self, other): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _sub_period(self, other): return NotImplemented def _add_offset(self, offset): - raise com.AbstractMethodError(self) + raise AbstractMethodError(self) def _add_delta(self, other): return NotImplemented diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 481d5313f0e25..510dd648da1a9 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -17,15 +17,21 @@ from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import ( - is_integer_dtype, is_float_dtype, is_period_dtype) + is_integer_dtype, is_float_dtype, is_period_dtype, + is_float, is_integer, pandas_dtype, is_scalar, + is_datetime64_dtype, + ensure_object +) from pandas.core.dtypes.dtypes import PeriodDtype -from pandas.core.dtypes.generic import ABCSeries +from pandas.core.dtypes.generic import ABCSeries, ABCIndex import pandas.core.common as com from pandas.tseries import frequencies +from pandas.tseries.frequencies import get_freq_code as _gfc from pandas.tseries.offsets import Tick, DateOffset +from pandas.core.arrays import ExtensionArray from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin @@ -49,13 +55,16 @@ def _period_array_cmp(cls, op): def wrapper(self, other): op = getattr(self._ndarray_values, opname) + if isinstance(other, (ABCSeries, ABCIndex)): + other = other.values + if isinstance(other, Period): if other.freq != self.freq: msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) result = op(other.ordinal) - elif isinstance(other, PeriodArrayMixin): + elif isinstance(other, PeriodArray): if other.freq != self.freq: msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) @@ -70,6 +79,9 @@ def wrapper(self, other): elif other is NaT: result = np.empty(len(self._ndarray_values), dtype=bool) result.fill(nat_result) + elif isinstance(other, (list, np.ndarray)): + # XXX: is this correct? + return NotImplemented else: other = Period(other, freq=self.freq) result = op(other.ordinal) @@ -82,7 +94,186 @@ def wrapper(self, other): return compat.set_function_name(wrapper, opname, cls) -class PeriodArrayMixin(DatetimeLikeArrayMixin): +class PeriodArray(DatetimeLikeArrayMixin, ExtensionArray): + """ + Pandas ExtensionArray for Period data. + + There are two components to a PeriodArray + + - ordinals + - freq + + The values are physically stored as an ndarray of integers. These are + called "ordinals" and represent some kind of offset from a base. + + The `freq` indicates the span covered by each element of the array. + All elements in the PeriodArray have the same `freq`. + """ + _attributes = ["freq"] + # -------------------------------------------------------------------- + # Constructors + + def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, + periods=None, tz=None, dtype=None, copy=False, + **fields): + from pandas import PeriodIndex, DatetimeIndex, Int64Index + + # copy-pase from PeriodIndex.__new__ with slight adjustments. + # + # - removed all uses of name + valid_field_set = {'year', 'month', 'day', 'quarter', + 'hour', 'minute', 'second'} + + if not set(fields).issubset(valid_field_set): + raise TypeError('__new__() got an unexpected keyword argument {}'. + format(list(set(fields) - valid_field_set)[0])) + + if periods is not None: + if is_float(periods): + periods = int(periods) + elif not is_integer(periods): + msg = 'periods must be a number, got {periods}' + raise TypeError(msg.format(periods=periods)) + + if dtype is not None: + dtype = pandas_dtype(dtype) + if not is_period_dtype(dtype): + raise ValueError('dtype must be PeriodDtype') + if freq is None: + freq = dtype.freq + elif freq != dtype.freq: + msg = 'specified freq and dtype are different' + raise IncompatibleFrequency(msg) + + # coerce freq to freq object, otherwise it can be coerced elementwise + # which is slow + if freq: + freq = Period._maybe_convert_freq(freq) + + if data is None: + if ordinal is not None: + data = np.asarray(ordinal, dtype=np.int64) + else: + data, freq = cls._generate_range(start, end, periods, + freq, fields) + return cls._from_ordinals(data, freq=freq) + + if isinstance(data, (PeriodArray, PeriodIndex)): + if freq is None or freq == data.freq: # no freq change + freq = data.freq + data = data._ndarray_values + else: + base1, _ = _gfc(data.freq) + base2, _ = _gfc(freq) + data = libperiod.period_asfreq_arr(data._ndarray_values, + base1, base2, 1) + return cls._simple_new(data, freq=freq) + + # not array / index + if not isinstance(data, (np.ndarray, PeriodIndex, + DatetimeIndex, Int64Index)): + if is_scalar(data) or isinstance(data, Period): + # XXX + cls._scalar_data_error(data) + + # other iterable of some kind + if not isinstance(data, (list, tuple)): + data = list(data) + + data = np.asarray(data) + + # datetime other than period + if is_datetime64_dtype(data.dtype): + data = dt64arr_to_periodarr(data, freq, tz) + return cls._from_ordinals(data, freq=freq) + + # check not floats + if lib.infer_dtype(data) == 'floating' and len(data) > 0: + raise TypeError("PeriodIndex does not allow " + "floating point in construction") + + # anything else, likely an array of strings or periods + data = ensure_object(data) + freq = freq or libperiod.extract_freq(data) + data = libperiod.extract_ordinals(data, freq) + return cls._from_ordinals(data, freq=freq) + + @property + def asi8(self): + return self._data.view("i8") + + @classmethod + def _from_sequence(cls, scalars, dtype=None, copy=False): + return cls(scalars, dtype=dtype, copy=copy) + + @classmethod + def _from_factorized(cls, values, original): + return cls(values, dtype=original.dtype) + + def __repr__(self): + return '\n{}\nLength: {}, dtype: {}'.format( + [str(s) for s in self], + len(self), + self.dtype + ) + + def __len__(self): + return len(self._data) + + def isna(self): + return self._data == iNaT + + def take(self, indices, allow_fill=False, fill_value=None): + from pandas.core.algorithms import take + + if fill_value is None: + fill_value = iNaT + elif isinstance(fill_value, Period): + fill_value = fill_value.ordinal + elif fill_value is NaT: + fill_value = iNaT + elif fill_value != self.dtype.na_value: + raise ValueError("Expected a Period.") + + new_values = take(self._data, + indices, + allow_fill=allow_fill, + fill_value=fill_value) + + return self._from_ordinals(new_values, self.freq) + + @property + def nbytes(self): + return self._data.nbytes + + def copy(self, deep=False): + return self._from_ordinals(self._data.copy(), freq=self.freq) + + @classmethod + def _concat_same_type(cls, to_concat): + freq = {x.freq for x in to_concat} + assert len(freq) == 1 + freq = list(freq)[0] + values = np.concatenate([x._data for x in to_concat]) + return cls._from_ordinals(values, freq=freq) + + def value_counts(self, dropna=False): + from pandas.core.algorithms import value_counts + from pandas.core.indexes.period import PeriodIndex + + if dropna: + values = self[~self.isna()]._data + else: + values = self._data + + result = value_counts(values) + index = PeriodIndex._from_ordinals(result.index, + name=result.index.name, + freq=self.freq) + return type(result)(result.values, + index=index, + name=result.name) + @property def _box_func(self): return lambda x: Period._from_ordinal(ordinal=x, freq=self.freq) @@ -114,21 +305,6 @@ def freq(self, value): FutureWarning, stacklevel=2) self._freq = value - # -------------------------------------------------------------------- - # Constructors - - _attributes = ["freq"] - - def __new__(cls, values, freq=None, **kwargs): - if is_period_dtype(values): - # PeriodArray, PeriodIndex - if freq is not None and values.freq != freq: - raise IncompatibleFrequency(freq, values.freq) - freq = values.freq - values = values.asi8 - - return cls._simple_new(values, freq, **kwargs) - @classmethod def _simple_new(cls, values, freq=None, **kwargs): """ @@ -264,7 +440,7 @@ def asfreq(self, freq=None, how='E'): if self.hasnans: new_data[self._isnan] = iNaT - return self._simple_new(new_data, self.name, freq=freq) + return self._simple_new(new_data, freq=freq) # ------------------------------------------------------------------ # Arithmetic Methods @@ -319,7 +495,7 @@ def _add_delta(self, other): ordinal_delta = self._maybe_convert_timedelta(other) return self.shift(ordinal_delta) - def shift(self, n): + def shift(self, periods=1): """ Specialized shift which produces an Period Array/Index @@ -332,7 +508,8 @@ def shift(self, n): ------- shifted : Period Array/Index """ - values = self._ndarray_values + n * self.freq.n + # TODO: ensure we match EA semantics, not PeriodIndex + values = self._ndarray_values + periods * self.freq.n if self.hasnans: values[self._isnan] = iNaT return self._shallow_copy(values=values) @@ -384,9 +561,15 @@ def _maybe_convert_timedelta(self, other): raise IncompatibleFrequency(msg.format(cls=type(self).__name__, freqstr=self.freqstr)) + @classmethod + def _scalar_data_error(cls, data): + raise TypeError('{0}(...) must be called with a collection of some ' + 'kind, {1} was passed'.format(cls.__name__, + repr(data))) + -PeriodArrayMixin._add_comparison_ops() -PeriodArrayMixin._add_datetimelike_methods() +PeriodArray._add_comparison_ops() +PeriodArray._add_datetimelike_methods() # ------------------------------------------------------------------- @@ -486,3 +669,12 @@ def _make_field_arrays(*fields): else np.repeat(x, length) for x in fields] return arrays + + +def dt64arr_to_periodarr(data, freq, tz): + if data.dtype != np.dtype('M8[ns]'): + raise ValueError('Wrong dtype: %s' % data.dtype) + + freq = Period._maybe_convert_freq(freq) + base, mult = _gfc(freq) + return libperiod.dt64arr_to_periodarr(data.view('i8'), base, tz) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index e2b9e246aee50..94596abd78085 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -4,11 +4,11 @@ from pandas.compat import (string_types, text_type, binary_type, PY3, PY36) from pandas._libs import algos, lib -from pandas._libs.tslibs import conversion +from pandas._libs.tslibs import conversion, Period from pandas.core.dtypes.dtypes import ( registry, CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, - DatetimeTZDtypeType, PeriodDtype, PeriodDtypeType, IntervalDtype, + DatetimeTZDtypeType, PeriodDtype, IntervalDtype, IntervalDtypeType, PandasExtensionDtype, ExtensionDtype, _pandas_registry) from pandas.core.dtypes.generic import ( @@ -1909,14 +1909,14 @@ def _get_dtype_type(arr_or_dtype): elif isinstance(arr_or_dtype, IntervalDtype): return IntervalDtypeType elif isinstance(arr_or_dtype, PeriodDtype): - return PeriodDtypeType + return arr_or_dtype.type elif isinstance(arr_or_dtype, string_types): if is_categorical_dtype(arr_or_dtype): return CategoricalDtypeType elif is_datetime64tz_dtype(arr_or_dtype): return DatetimeTZDtypeType elif is_period_dtype(arr_or_dtype): - return PeriodDtypeType + return Period elif is_interval_dtype(arr_or_dtype): return IntervalDtypeType return _get_dtype_type(np.dtype(arr_or_dtype)) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index d879ded4f0f09..00c66b4136fa9 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -4,6 +4,7 @@ import numpy as np from pandas import compat from pandas.core.dtypes.generic import ABCIndexClass, ABCCategoricalIndex +from pandas._libs.tslibs import Period, iNaT from .base import ExtensionDtype, _DtypeOpsMixin @@ -583,20 +584,13 @@ def __eq__(self, other): str(self.tz) == str(other.tz)) -class PeriodDtypeType(type): - """ - the type of PeriodDtype, this metaclass determines subclass ability - """ - pass - - -class PeriodDtype(PandasExtensionDtype): +class PeriodDtype(ExtensionDtype): """ A Period duck-typed class, suitable for holding a period with freq dtype. THIS IS NOT A REAL NUMPY DTYPE, but essentially a sub-class of np.int64. """ - type = PeriodDtypeType + type = Period kind = 'O' str = '|O08' base = np.dtype('O') @@ -666,11 +660,15 @@ def construct_from_string(cls, string): raise TypeError("could not construct PeriodDtype") def __unicode__(self): - return "period[{freq}]".format(freq=self.freq.freqstr) + return self.name @property def name(self): - return str(self) + return u"period[{freq}]".format(freq=self.freq.freqstr) + + @property + def na_value(self): + return iNaT def __hash__(self): # make myself hashable @@ -704,6 +702,12 @@ def is_dtype(cls, dtype): return False return super(PeriodDtype, cls).is_dtype(dtype) + @classmethod + def construct_array_type(cls): + from pandas.core.arrays import PeriodArray + + return PeriodArray + class IntervalDtypeType(type): """ diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index a1868980faed3..02e659cadc3ae 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -270,12 +270,12 @@ def freq(self): return self._get_values().inferred_freq -@delegate_names(delegate=PeriodIndex, - accessors=PeriodIndex._datetimelike_ops, - typ="property") -@delegate_names(delegate=PeriodIndex, - accessors=PeriodIndex._datetimelike_methods, - typ="method") +# @delegate_names(delegate=PeriodIndex, +# accessors=PeriodIndex._datetimelike_ops, +# typ="property") +# @delegate_names(delegate=PeriodIndex, +# accessors=PeriodIndex._datetimelike_methods, +# typ="method") class PeriodProperties(Properties): """ Accessor object for datetimelike properties of the Series values. diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b42bbdafcab45..b569eaabbbc7c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -303,8 +303,19 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, else: return result + elif (is_period_dtype(data) or + (dtype is not None and is_period_dtype(dtype))): + # TODO: ensure is_period_dtype for PeriodArray + from pandas import PeriodIndex + result = PeriodIndex(data, copy=copy, name=name, **kwargs) + if dtype is not None and _o_dtype == dtype: + return Index(result.to_pytimedelta(), dtype=_o_dtype) + else: + return result + # extension dtype elif is_extension_array_dtype(data) or is_extension_array_dtype(dtype): + import pdb; pdb.set_trace() data = np.asarray(data) if not (dtype is None or is_object_dtype(dtype)): diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 578167a7db500..012e7f8ed1db1 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -364,6 +364,9 @@ def sort_values(self, return_indexer=False, ascending=True): if not ascending: sorted_values = sorted_values[::-1] + sorted_values = self._maybe_box_as_values(sorted_values, + **attribs) + return self._simple_new(sorted_values, **attribs) @Appender(_index_shared_docs['take'] % _index_doc_kwargs) @@ -676,8 +679,13 @@ def _concat_same_dtype(self, to_concat, name): return _concat._concat_datetimetz(to_concat, name) else: new_data = np.concatenate([c.asi8 for c in to_concat]) + + new_data = self._maybe_box_as_values(new_data, **attribs) return self._simple_new(new_data, **attribs) + def _maybe_box_as_values(self, values, **attribs): + return values + def astype(self, dtype, copy=True): if is_object_dtype(dtype): return self._box_values_as_index() @@ -697,10 +705,13 @@ def astype(self, dtype, copy=True): def _ensure_datetimelike_to_i8(other): """ helper for coercing an input scalar or array to i8 """ + from pandas.core.arrays import PeriodArray + if is_scalar(other) and isna(other): other = iNaT - elif isinstance(other, ABCIndexClass): + elif isinstance(other, (PeriodArray, ABCIndexClass)): # convert tz if needed + # TODO: Ensure PeriodArray.tz_localize if getattr(other, 'tz', None) is not None: other = other.tz_localize(None).asi8 else: diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 0f86e18103e3c..f5da4784565be 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -31,7 +31,7 @@ _validate_end_alias) from pandas._libs.tslibs import resolution, period -from pandas.core.arrays.period import PeriodArrayMixin +from pandas.core.arrays.period import PeriodArray from pandas.core.base import _shared_docs from pandas.core.indexes.base import _index_shared_docs, ensure_index @@ -45,7 +45,7 @@ def _wrap_field_accessor(name): - fget = getattr(PeriodArrayMixin, name).fget + fget = getattr(PeriodArray, name).fget def f(self): result = fget(self) @@ -55,15 +55,6 @@ def f(self): f.__doc__ = fget.__doc__ return property(f) - -def dt64arr_to_periodarr(data, freq, tz): - if data.dtype != np.dtype('M8[ns]'): - raise ValueError('Wrong dtype: %s' % data.dtype) - - freq = Period._maybe_convert_freq(freq) - base, mult = _gfc(freq) - return period.dt64arr_to_periodarr(data.view('i8'), base, tz) - # --- Period index sketch @@ -74,7 +65,7 @@ def _new_PeriodIndex(cls, **d): return cls._from_ordinals(values=values, **d) -class PeriodIndex(PeriodArrayMixin, DatelikeOps, DatetimeIndexOpsMixin, +class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index): """ Immutable ndarray holding ordinal values indicating regular periods in @@ -178,102 +169,68 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, periods=None, tz=None, dtype=None, copy=False, name=None, **fields): - valid_field_set = {'year', 'month', 'day', 'quarter', - 'hour', 'minute', 'second'} - - if not set(fields).issubset(valid_field_set): - raise TypeError('__new__() got an unexpected keyword argument {}'. - format(list(set(fields) - valid_field_set)[0])) - - if periods is not None: - if is_float(periods): - periods = int(periods) - elif not is_integer(periods): - msg = 'periods must be a number, got {periods}' - raise TypeError(msg.format(periods=periods)) - if name is None and hasattr(data, 'name'): name = data.name - if dtype is not None: - dtype = pandas_dtype(dtype) - if not is_period_dtype(dtype): - raise ValueError('dtype must be PeriodDtype') - if freq is None: - freq = dtype.freq - elif freq != dtype.freq: - msg = 'specified freq and dtype are different' - raise IncompatibleFrequency(msg) + data = PeriodArray(data=data, ordinal=ordinal, freq=freq, + start=start, end=end, periods=periods, + tz=tz, dtype=dtype, copy=copy, **fields) + return cls._simple_new(data, name=name) - # coerce freq to freq object, otherwise it can be coerced elementwise - # which is slow - if freq: - freq = Period._maybe_convert_freq(freq) + @property + def _ndarray_values(self): + return self.values.values - if data is None: - if ordinal is not None: - data = np.asarray(ordinal, dtype=np.int64) - else: - data, freq = cls._generate_range(start, end, periods, - freq, fields) - return cls._from_ordinals(data, name=name, freq=freq) - - if isinstance(data, PeriodIndex): - if freq is None or freq == data.freq: # no freq change - freq = data.freq - data = data._ndarray_values + # ------------------------------------------------------------------------ + # Mixin Candidates + # err. maybe not... + + @property + def _box_func(self): + def func(x): + if isinstance(x, Period) or x is tslib.NaT: + return x else: - base1, _ = _gfc(data.freq) - base2, _ = _gfc(freq) - data = period.period_asfreq_arr(data._ndarray_values, - base1, base2, 1) - return cls._simple_new(data, name=name, freq=freq) - - # not array / index - if not isinstance(data, (np.ndarray, PeriodIndex, - DatetimeIndex, Int64Index)): - if is_scalar(data) or isinstance(data, Period): - cls._scalar_data_error(data) - - # other iterable of some kind - if not isinstance(data, (list, tuple)): - data = list(data) - - data = np.asarray(data) - - # datetime other than period - if is_datetime64_dtype(data.dtype): - data = dt64arr_to_periodarr(data, freq, tz) - return cls._from_ordinals(data, name=name, freq=freq) - - # check not floats - if infer_dtype(data) == 'floating' and len(data) > 0: - raise TypeError("PeriodIndex does not allow " - "floating point in construction") - - # anything else, likely an array of strings or periods - data = ensure_object(data) - freq = freq or period.extract_freq(data) - data = period.extract_ordinals(data, freq) - return cls._from_ordinals(data, name=name, freq=freq) + return Period._from_ordinal(ordinal=x, freq=self.freq) + return func + + # ------------------------------------------------------------------------ + # Straight Dispatch + + @property + def freq(self): + """Return the frequency object if it is set, otherwise None""" + return self.values.freq + + # ------------------------------------------------------------------------ + # Dispatch and Wrap + + def asfreq(self, freq=None, how='E'): + result = self.values.asfreq(freq=freq, how=how) + return self._simple_new(result, name=self.name) + # ------------------------------------------------------------------------ + @cache_readonly def _engine(self): - return self._engine_type(lambda: self, len(self)) + return self._engine_type(lambda: self._ndarray_values, len(self)) + + @property + def asi8(self): + return self.values.asi8 @classmethod def _simple_new(cls, values, name=None, freq=None, **kwargs): + # TODO: clean up signature. """ Values can be any type that can be coerced to Periods. Ordinals in an ndarray are fastpath-ed to `_from_ordinals` """ - if not is_integer_dtype(values): - values = np.array(values, copy=False) - if len(values) > 0 and is_float_dtype(values): - raise TypeError("PeriodIndex can't take floats") - return cls(values, name=name, freq=freq, **kwargs) - - return cls._from_ordinals(values, name, freq, **kwargs) + assert isinstance(values, PeriodArray) + result = object.__new__(cls) + result._data = values + result.name = name + return result @classmethod def _from_ordinals(cls, values, name=None, freq=None, **kwargs): @@ -281,12 +238,42 @@ def _from_ordinals(cls, values, name=None, freq=None, **kwargs): Values should be int ordinals `__new__` & `_simple_new` cooerce to ordinals and call this method """ - result = super(PeriodIndex, cls)._from_ordinals(values, freq) - - result.name = name + data = PeriodArray._from_ordinals(values, freq=freq) + result = cls._simple_new(data, name=name) result._reset_identity() return result + def _shallow_copy(self, values=None, **kwargs): + # TODO: update take to do this? + if values is None: + # Note: this is the Index implementation. + # slightly different from AttributesMixin implementation which + # defaults to self._ndarray_values + values = self.values + else: + # this differs too + if not isinstance(values, PeriodArray): + try: + values = PeriodArray._from_ordinals(values, freq=self.freq) + except TypeError: + # TODO: this is probably ambiguous for some oridinals. + values = PeriodArray(values, freq=self.freq) + + attributes = self._get_attributes_dict() + attributes.update(kwargs) + if not len(values) and 'dtype' not in kwargs: + attributes['dtype'] = self.dtype + return self._simple_new(values, **attributes) + + def _maybe_box_as_values(self, values, **attribs): + freq = attribs['freq'] + return PeriodArray._from_ordinals(values, freq=freq) + + def shift(self, n): + # TODO: May need to + result = self.values.shift(n) + return self._simple_new(result, name=self.name) + def _shallow_copy_with_infer(self, values=None, **kwargs): """ we always want to return a PeriodIndex """ return self._shallow_copy(values=values, **kwargs) @@ -323,7 +310,7 @@ def _int64index(self): @property def values(self): - return self.astype(object).values + return self._data def __array__(self, dtype=None): if is_integer_dtype(dtype): @@ -389,6 +376,15 @@ def shape(self): def _formatter_func(self): return lambda x: "'%s'" % x + def _format_attrs(self): + # TODO: this is (always?) redundant with dtype + attrs = super(DatetimeIndexOpsMixin, self)._format_attrs() + freq = self.freqstr + if freq is not None: + freq = "'%s'" % freq + attrs.append(('freq', freq)) + return attrs + def asof_locs(self, where, mask): """ where : array of timestamps @@ -811,9 +807,9 @@ def __setstate__(self, state): _unpickle_compat = __setstate__ -PeriodIndex._add_comparison_ops() -PeriodIndex._add_numeric_methods_disabled() -PeriodIndex._add_logical_methods_disabled() +# PeriodIndex._add_comparison_ops() +# PeriodIndex._add_numeric_methods_disabled() +# PeriodIndex._add_logical_methods_disabled() PeriodIndex._add_datetimelike_methods() diff --git a/pandas/core/series.py b/pandas/core/series.py index 83f80c305c5eb..5c91a91209d14 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2326,7 +2326,7 @@ def combine(self, other, func, fill_value=None): # if the type is compatible with the calling EA try: new_values = self._values._from_sequence(new_values) - except TypeError: + except Exception: pass return self._constructor(new_values, index=new_index, name=new_name) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 24f34884dc077..54abe19ef8d31 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -5,7 +5,7 @@ from pandas.core.arrays.datetimes import DatetimeArrayMixin from pandas.core.arrays.timedeltas import TimedeltaArrayMixin -from pandas.core.arrays.period import PeriodArrayMixin +from pandas.core.arrays.period import PeriodArray class TestDatetimeArray(object): @@ -55,7 +55,7 @@ class TestPeriodArray(object): def test_from_pi(self): pi = pd.period_range('2016', freq='Q', periods=3) - arr = PeriodArrayMixin(pi) + arr = PeriodArray(pi) assert list(arr) == list(pi) # Check that Index.__new__ knows what to do with TimedeltaArray @@ -65,7 +65,7 @@ def test_from_pi(self): def test_astype_object(self): pi = pd.period_range('2016', freq='Q', periods=3) - arr = PeriodArrayMixin(pi) + arr = PeriodArray(pi) asobj = arr.astype('O') assert isinstance(asobj, np.ndarray) assert asobj.dtype == 'O' diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index a7a9faa9e77eb..b745a1e5d454a 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -611,8 +611,8 @@ def test__get_dtype_fails(input_param): ('datetime64[ns, Europe/London]', com.DatetimeTZDtypeType), (pd.SparseSeries([1, 2], dtype='int32'), np.int32), (pd.SparseSeries([1, 2], dtype='int32').dtype, np.int32), - (PeriodDtype(freq='D'), com.PeriodDtypeType), - ('period[D]', com.PeriodDtypeType), + (PeriodDtype(freq='D'), pd.Period), + ('period[D]', pd.Period), (IntervalDtype(), com.IntervalDtypeType), (None, type(None)), (1, type(None)), diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py index 4bbbb7df2f399..ecd904c170750 100644 --- a/pandas/tests/extension/conftest.py +++ b/pandas/tests/extension/conftest.py @@ -31,11 +31,11 @@ def all_data(request, data, data_missing): @pytest.fixture -def data_repeated(): +def data_repeated(data): """Return different versions of data for count times""" def gen(count): for _ in range(count): - yield NotImplementedError + yield data yield gen diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py new file mode 100644 index 0000000000000..0ba929f24c2ce --- /dev/null +++ b/pandas/tests/extension/test_period.py @@ -0,0 +1,127 @@ +import pytest +import numpy as np + +import pandas as pd +from pandas.tests.extension import base +from pandas.core.dtypes.dtypes import PeriodDtype +from pandas.core.arrays import PeriodArray + + +@pytest.fixture +def dtype(): + return PeriodDtype(freq='D') + + +@pytest.fixture +def data(dtype): + return PeriodArray(np.arange(1970, 2070), dtype=dtype) + + +@pytest.fixture +def data_for_sorting(dtype): + return PeriodArray([2018, 2019, 2017], dtype=dtype) + + +@pytest.fixture +def data_missing(dtype): + return PeriodArray([None, 2017], dtype=dtype) + + +@pytest.fixture +def data_missing_for_sorting(dtype): + return PeriodArray([2018, None, 2017], dtype=dtype) + + +@pytest.fixture +def data_for_grouping(dtype): + B = 2018 + NA = None + A = 2017 + C = 2019 + return PeriodArray([B, B, NA, NA, A, A, B, C], dtype=dtype) + + +@pytest.fixture +def na_value(): + return pd.NaT + + +class BasePeriodTests(object): + pass + + +class TestPeriodDtype(BasePeriodTests, base.BaseDtypeTests): + pass + + +class TestConstructors(BasePeriodTests, base.BaseConstructorsTests): + pass + + +class TestGetitem(BasePeriodTests, base.BaseGetitemTests): + pass + + +class TestMethods(BasePeriodTests, base.BaseMethodsTests): + + def test_combine_add(self, data_repeated): + pass + + def test_container_shift(self): + raise pytest.xfail('todo') + + +class TestInterface(BasePeriodTests, base.BaseInterfaceTests): + def test_no_values_attribute(self, data): + pass + + +class TestArithmeticOps(BasePeriodTests, base.BaseArithmeticOpsTests): + + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + op_name = all_arithmetic_operators + if op_name in ('__sub__', '__rsub__'): + s = pd.Series(data) + self.check_opname(s, op_name, s.iloc[0], exc=None) + else: + super().test_arith_series_with_scalar(data, + all_arithmetic_operators) + + def test_error(self): + pass + + +class TestCasting(BasePeriodTests, base.BaseCastingTests): + pass + + +class TestComparisonOps(BasePeriodTests, base.BaseComparisonOpsTests): + def test_compare_scalar(self): + pass + + +class TestMissing(BasePeriodTests, base.BaseMissingTests): + + @pytest.mark.xfail(reason="__setitem__") + def test_fillna_scalar(self, data_missing): + super().test_fillna_scalar(data_missing) + + @pytest.mark.xfail(reason="__setitem__") + def test_fillna_series(self, data_missing): + super().test_fillna_series(data_missing) + + @pytest.mark.xfail(reason="__setitem__") + def test_fillna_frame(self, data_missing): + super().test_fillna_frame(data_missing) + + +class TestReshaping(BasePeriodTests, base.BaseReshapingTests): + pass + + +# class TestGroupby(BasePeriodTests, base.BaseGroupbyTests): +# pass +# +# +# class TestSetitem(BasePeriodTests, base.BaseSetitemTests): +# pass diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index be741592ec7a2..448aa5e272661 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -7,6 +7,7 @@ from pandas.compat import lrange, PY3, text_type, lmap from pandas import (Period, PeriodIndex, period_range, offsets, date_range, Series, Index) +from pandas.core.dtypes.dtypes import PeriodDtype class TestPeriodIndex(object): @@ -264,36 +265,28 @@ def test_constructor_mixed(self): def test_constructor_simple_new(self): idx = period_range('2007-01', name='p', periods=2, freq='M') - result = idx._simple_new(idx, 'p', freq=idx.freq) - tm.assert_index_equal(result, idx) - - result = idx._simple_new(idx.astype('i8'), 'p', freq=idx.freq) - tm.assert_index_equal(result, idx) - - result = idx._simple_new([pd.Period('2007-01', freq='M'), - pd.Period('2007-02', freq='M')], - 'p', freq=idx.freq) - tm.assert_index_equal(result, idx) - - result = idx._simple_new(np.array([pd.Period('2007-01', freq='M'), - pd.Period('2007-02', freq='M')]), - 'p', freq=idx.freq) + result = idx._simple_new(idx.values, 'p', freq=idx.freq) tm.assert_index_equal(result, idx) + # _simple_new has type: (PeriodArray, name, Optional[Freq]) + # TODO: Add tests to PeriodArray._simple_new for + # - [ ] int + # - [ ] List[Period] + # - [ ] ndarray[Period] def test_constructor_simple_new_empty(self): # GH13079 idx = PeriodIndex([], freq='M', name='p') - result = idx._simple_new(idx, name='p', freq='M') + result = idx._simple_new(idx.values, name='p', freq='M') tm.assert_index_equal(result, idx) - @pytest.mark.parametrize('floats', [[1.1, 2.1], np.array([1.1, 2.1])]) - def test_constructor_floats(self, floats): - # GH#13079 - with pytest.raises(TypeError): - pd.PeriodIndex._simple_new(floats, freq='M') - - with pytest.raises(TypeError): - pd.PeriodIndex(floats, freq='M') + # @pytest.mark.parametrize('floats', [[1.1, 2.1], np.array([1.1, 2.1])]) + # def test_constructor_floats(self, floats): + # # GH#13079 + # with pytest.raises(TypeError): + # pd.PeriodIndex._simple_new(floats, freq='M') + # + # with pytest.raises(TypeError): + # pd.PeriodIndex(floats, freq='M') def test_constructor_nat(self): pytest.raises(ValueError, period_range, start='NaT', @@ -474,6 +467,7 @@ def test_map_with_string_constructor(self): class TestSeriesPeriod(object): + # TODO: many more tests def setup_method(self, method): self.series = Series(period_range('2000-01-01', periods=10, freq='D')) @@ -484,6 +478,6 @@ def test_constructor_cant_cast_period(self): dtype=float) def test_constructor_cast_object(self): - s = Series(period_range('1/1/2000', periods=10), dtype=object) + s = Series(period_range('1/1/2000', periods=10), dtype=PeriodDtype("D")) exp = Series(period_range('1/1/2000', periods=10)) tm.assert_series_equal(s, exp) diff --git a/pandas/tests/indexes/period/test_formats.py b/pandas/tests/indexes/period/test_formats.py index daf44a559cf5c..2107055f454ef 100644 --- a/pandas/tests/indexes/period/test_formats.py +++ b/pandas/tests/indexes/period/test_formats.py @@ -116,41 +116,41 @@ def test_representation_to_series(self): idx8 = pd.period_range('2013Q1', periods=2, freq="Q") idx9 = pd.period_range('2013Q1', periods=3, freq="Q") - exp1 = """Series([], dtype: object)""" + exp1 = """Series([], dtype: period[D])""" exp2 = """0 2011-01-01 -dtype: object""" +dtype: period[D]""" exp3 = """0 2011-01-01 1 2011-01-02 -dtype: object""" +dtype: period[D]""" exp4 = """0 2011-01-01 1 2011-01-02 2 2011-01-03 -dtype: object""" +dtype: period[D]""" exp5 = """0 2011 1 2012 2 2013 -dtype: object""" +dtype: period[A-DEC]""" exp6 = """0 2011-01-01 09:00 1 2012-02-01 10:00 2 NaT -dtype: object""" +dtype: period[H]""" exp7 = """0 2013Q1 -dtype: object""" +dtype: period[Q-DEC]""" exp8 = """0 2013Q1 1 2013Q2 -dtype: object""" +dtype: period[Q-DEC]""" exp9 = """0 2013Q1 1 2013Q2 2 2013Q3 -dtype: object""" +dtype: period[Q-DEC]""" for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, idx6, idx7, idx8, idx9], From a05928a295e7f3f474ec547678bd612bb11b7355 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 27 Sep 2018 14:46:24 -0500 Subject: [PATCH 002/132] WIP * remove debug --- pandas/core/indexes/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b569eaabbbc7c..e82265facb871 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -315,7 +315,6 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, # extension dtype elif is_extension_array_dtype(data) or is_extension_array_dtype(dtype): - import pdb; pdb.set_trace() data = np.asarray(data) if not (dtype is None or is_object_dtype(dtype)): From 3c0d9ee994f417849ab99ce5ddecdba224dc210e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 27 Sep 2018 15:04:41 -0500 Subject: [PATCH 003/132] Just moves --- pandas/core/indexes/period.py | 157 ++++++++++++++++++++-------------- 1 file changed, 95 insertions(+), 62 deletions(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index f5da4784565be..e64e04814c999 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -162,6 +162,7 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, _infer_as_myclass = True _freq = None + _data = None # type: PeriodArray _engine_type = libindex.PeriodEngine @@ -177,47 +178,18 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, tz=tz, dtype=dtype, copy=copy, **fields) return cls._simple_new(data, name=name) + # ------------------------------------------------------------------------ + # Data @property def _ndarray_values(self): return self.values.values - # ------------------------------------------------------------------------ - # Mixin Candidates - # err. maybe not... - - @property - def _box_func(self): - def func(x): - if isinstance(x, Period) or x is tslib.NaT: - return x - else: - return Period._from_ordinal(ordinal=x, freq=self.freq) - return func - - # ------------------------------------------------------------------------ - # Straight Dispatch - @property - def freq(self): - """Return the frequency object if it is set, otherwise None""" - return self.values.freq + def values(self): + return self._data # ------------------------------------------------------------------------ - # Dispatch and Wrap - - def asfreq(self, freq=None, how='E'): - result = self.values.asfreq(freq=freq, how=how) - return self._simple_new(result, name=self.name) - # ------------------------------------------------------------------------ - - - @cache_readonly - def _engine(self): - return self._engine_type(lambda: self._ndarray_values, len(self)) - - @property - def asi8(self): - return self.values.asi8 + # Index Constructors @classmethod def _simple_new(cls, values, name=None, freq=None, **kwargs): @@ -265,28 +237,67 @@ def _shallow_copy(self, values=None, **kwargs): attributes['dtype'] = self.dtype return self._simple_new(values, **attributes) + def _shallow_copy_with_infer(self, values=None, **kwargs): + """ we always want to return a PeriodIndex """ + return self._shallow_copy(values=values, **kwargs) + + # ------------------------------------------------------------------------ + # Boxing + # err. maybe not... + + @property + def _box_func(self): + def func(x): + if isinstance(x, Period) or x is tslib.NaT: + return x + else: + return Period._from_ordinal(ordinal=x, freq=self.freq) + return func + def _maybe_box_as_values(self, values, **attribs): + """Box an array of ordinals to a PeriodArray + + This is purely for compatibility between PeriodIndex + and Datetime/TimedeltaIndex. Once these are all backed by + an ExtensionArray, this can be removed + """ freq = attribs['freq'] return PeriodArray._from_ordinals(values, freq=freq) - def shift(self, n): - # TODO: May need to - result = self.values.shift(n) - return self._simple_new(result, name=self.name) + # ------------------------------------------------------------------------ + # Straight Dispatch - def _shallow_copy_with_infer(self, values=None, **kwargs): - """ we always want to return a PeriodIndex """ - return self._shallow_copy(values=values, **kwargs) + @property + def freq(self): + """Return the frequency object if it is set, otherwise None""" + return self._data.freq - def _coerce_scalar_to_index(self, item): - """ - we need to coerce a scalar to a compat for our index type + @property + def asi8(self): + return self._data.asi8 - Parameters - ---------- - item : scalar item to coerce - """ - return PeriodIndex([item], **self._get_attributes_dict()) + @property + def size(self): + # Avoid materializing self._values + return self._data.size + + @property + def shape(self): + # Avoid materializing self._values + return self._data.shape + # ------------------------------------------------------------------------ + # Dispatch and Wrap + + def asfreq(self, freq=None, how='E'): + result = self._data.asfreq(freq=freq, how=how) + return self._simple_new(result, name=self.name) + + # ------------------------------------------------------------------------ + # Indexing + @cache_readonly + def _engine(self): + # TODO: understand indexing before just changing this. + return self._engine_type(lambda: self._ndarray_values, len(self)) @Appender(_index_shared_docs['__contains__']) def __contains__(self, key): @@ -308,9 +319,41 @@ def __contains__(self, key): def _int64index(self): return Int64Index(self.asi8, name=self.name, fastpath=True) - @property - def values(self): - return self._data + # ------------------------------------------------------------------------ + # Index Methods + + def shift(self, n): + """ + Specialized shift which produces a PeriodIndex + + Parameters + ---------- + n : int + Periods to shift by + + Returns + ------- + shifted : PeriodIndex + """ + # TODO: docs + # Note, this differs from the definition of ExtensionArray.shift + # 1. EA.shift takes a single `periods` argument, this accepts array + # 2. This accepts a `freq` argument + # so we don't dispatch + values = self._ndarray_values + n * self.freq.n + if self.hasnans: + values[self._isnan] = tslib.iNaT + return self._shallow_copy(values=values) + + def _coerce_scalar_to_index(self, item): + """ + we need to coerce a scalar to a compat for our index type + + Parameters + ---------- + item : scalar item to coerce + """ + return PeriodIndex([item], **self._get_attributes_dict()) def __array__(self, dtype=None): if is_integer_dtype(dtype): @@ -362,16 +405,6 @@ def _to_embed(self, keep_tz=False, dtype=None): return self.astype(object).values - @property - def size(self): - # Avoid materializing self._values - return self._ndarray_values.size - - @property - def shape(self): - # Avoid materializing self._values - return self._ndarray_values.shape - @property def _formatter_func(self): return lambda x: "'%s'" % x From 63fc3fa00a8bbc229b9ea8ae3e23e5635fa5da5b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 27 Sep 2018 15:04:49 -0500 Subject: [PATCH 004/132] PeriodArray.shift definition --- pandas/core/arrays/period.py | 9 ++++----- pandas/core/indexes/period.py | 2 -- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 510dd648da1a9..76c85690c0c06 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -508,11 +508,10 @@ def shift(self, periods=1): ------- shifted : Period Array/Index """ - # TODO: ensure we match EA semantics, not PeriodIndex - values = self._ndarray_values + periods * self.freq.n - if self.hasnans: - values[self._isnan] = iNaT - return self._shallow_copy(values=values) + # TODO: remove from DatetimeLikeArrayMixin + # The semantics for Index.shift differ from EA.shift + # then just call super. + return ExtensionArray.shift(self, periods) def _maybe_convert_timedelta(self, other): """ diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index e64e04814c999..14e81258764ee 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -337,8 +337,6 @@ def shift(self, n): """ # TODO: docs # Note, this differs from the definition of ExtensionArray.shift - # 1. EA.shift takes a single `periods` argument, this accepts array - # 2. This accepts a `freq` argument # so we don't dispatch values = self._ndarray_values + n * self.freq.n if self.hasnans: From 7d5d71ce78983ce300c813be6fa3054e953b2243 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 27 Sep 2018 15:05:11 -0500 Subject: [PATCH 005/132] _data type --- pandas/core/indexes/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e82265facb871..24dc8c60c28c4 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -229,7 +229,7 @@ class Index(IndexOpsMixin, PandasObject): _outer_indexer = libjoin.outer_join_indexer_object _typ = 'index' - _data = None + _data = None # type: Union[np.ndarray, ExtensionArray] _id = None name = None asi8 = None From e5caac67c5ff33522728f022028605a72d144027 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 27 Sep 2018 15:18:40 -0500 Subject: [PATCH 006/132] clean --- pandas/core/indexes/period.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 14e81258764ee..ac04851a6a08a 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -407,15 +407,6 @@ def _to_embed(self, keep_tz=False, dtype=None): def _formatter_func(self): return lambda x: "'%s'" % x - def _format_attrs(self): - # TODO: this is (always?) redundant with dtype - attrs = super(DatetimeIndexOpsMixin, self)._format_attrs() - freq = self.freqstr - if freq is not None: - freq = "'%s'" % freq - attrs.append(('freq', freq)) - return attrs - def asof_locs(self, where, mask): """ where : array of timestamps From c1944077f988d475069dd0b94b714b9e89a169f1 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 27 Sep 2018 16:17:30 -0500 Subject: [PATCH 007/132] accessor wip --- pandas/core/accessor.py | 15 +++++--- pandas/core/arrays/period.py | 60 +++++++++++++++++++++++++++++++- pandas/core/indexes/accessors.py | 25 ++++++++----- pandas/core/indexes/period.py | 52 +++++---------------------- 4 files changed, 94 insertions(+), 58 deletions(-) diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index eab529584d1fb..34ed49fca890e 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -68,6 +68,7 @@ def _add_delegate_accessors(cls, delegate, accessors, typ, overwrite : boolean, default False overwrite the method/property in the target class if it exists """ + delegated_to = getattr(delegate, '_delegated_to', delegate) def _create_delegator_property(name): @@ -81,7 +82,7 @@ def _setter(self, new_values): _setter.__name__ = name return property(fget=_getter, fset=_setter, - doc=getattr(delegate, name).__doc__) + doc=getattr(delegated_to, name).__doc__) def _create_delegator_method(name): @@ -89,7 +90,7 @@ def f(self, *args, **kwargs): return self._delegate_method(name, *args, **kwargs) f.__name__ = name - f.__doc__ = getattr(delegate, name).__doc__ + f.__doc__ = getattr(delegated_to, name).__doc__ return f @@ -107,9 +108,13 @@ def f(self, *args, **kwargs): def delegate_names(delegate, accessors, typ, overwrite=False): """ - Add delegated names to a class using a class decorator. This provides - an alternative usage to directly calling `_add_delegate_accessors` - below a class definition. + Add delegated names to a class using a class decorator. + + Methods and attributes are delegated to ... + + + This provides an alternative usage to directly calling + `_add_delegate_accessors` below a class definition. Parameters ---------- diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 76c85690c0c06..9ef2ca349721c 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -4,11 +4,13 @@ import numpy as np +from pandas._libs import Timedelta from pandas._libs import lib from pandas._libs.tslib import NaT, iNaT from pandas._libs.tslibs.period import ( Period, IncompatibleFrequency, DIFFERENT_FREQ_INDEX, - get_period_field_arr, period_asfreq_arr) + get_period_field_arr, period_asfreq_arr, + _validate_end_alias) from pandas._libs.tslibs import period as libperiod from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds from pandas._libs.tslibs.fields import isleapyear_arr @@ -566,6 +568,62 @@ def _scalar_data_error(cls, data): 'kind, {1} was passed'.format(cls.__name__, repr(data))) + def _format_native_types(self, na_rep=u'NaT', date_format=None): + values = self.astype(object) + + if date_format: + formatter = lambda dt: dt.strftime(date_format) + else: + formatter = lambda dt: u'%s' % dt + + if self.hasnans: + mask = self._isnan + values[mask] = na_rep + imask = ~mask + values[imask] = np.array([formatter(dt) for dt + in values[imask]]) + else: + values = np.array([formatter(dt) for dt in values]) + return values + + # Delegation... + def strftime(self, date_format): + return self._format_native_types(date_format=date_format) + + def to_timestamp(self, freq=None, how='start'): + how = _validate_end_alias(how) + + end = how == 'E' + if end: + if freq == 'B': + # roll forward to ensure we land on B date + adjust = Timedelta(1, 'D') - Timedelta(1, 'ns') + return self.to_timestamp(how='start') + adjust + else: + adjust = Timedelta(1, 'ns') + return (self + 1).to_timestamp(how='start') - adjust + + if freq is None: + base, mult = _gfc(self.freq) + freq = frequencies.get_to_timestamp_base(base) + else: + freq = Period._maybe_convert_freq(freq) + + base, mult = _gfc(freq) + new_data = self.asfreq(freq, how) + + new_data = libperiod.periodarr_to_dt64arr(new_data._ndarray_values, + base) + return new_data + + @property + def start_time(self): + return self.to_timestamp(how='start') + + @property + def end_time(self): + return self.to_timestamp(how='end') + PeriodArray._add_comparison_ops() PeriodArray._add_datetimelike_methods() diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 02e659cadc3ae..781e235a2e123 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -1,5 +1,12 @@ """ datetimelike delegation + +Hmm OK. + +We want a couple of dispatches... + +- + """ import numpy as np @@ -15,7 +22,7 @@ from pandas.core.accessor import PandasDelegate, delegate_names from pandas.core.base import NoNewAttributesMixin, PandasObject from pandas.core.indexes.datetimes import DatetimeIndex -from pandas.core.indexes.period import PeriodIndex +from pandas.core.indexes.period import PeriodIndex, PeriodArray from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.algorithms import take_1d @@ -46,7 +53,7 @@ def _get_values(self): else: if is_period_arraylike(data): - return PeriodIndex(data, copy=False, name=self.name) + return PeriodArray(data, copy=False, name=self.name) if is_datetime_arraylike(data): return DatetimeIndex(data, copy=False, name=self.name) @@ -270,12 +277,12 @@ def freq(self): return self._get_values().inferred_freq -# @delegate_names(delegate=PeriodIndex, -# accessors=PeriodIndex._datetimelike_ops, -# typ="property") -# @delegate_names(delegate=PeriodIndex, -# accessors=PeriodIndex._datetimelike_methods, -# typ="method") +@delegate_names(delegate=PeriodIndex, + accessors=PeriodIndex._datetimelike_ops, + typ="property") +@delegate_names(delegate=PeriodIndex, + accessors=PeriodIndex._datetimelike_methods, + typ="method") class PeriodProperties(Properties): """ Accessor object for datetimelike properties of the Series values. @@ -289,6 +296,8 @@ class PeriodProperties(Properties): Returns a Series indexed like the original Series. Raises TypeError if the Series does not contain datetimelike values. """ + def _delegate_method(self, name, *args, **kwargs): + pass class CombinedDatetimelikeProperties(DatetimeProperties, TimedeltaProperties): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index ac04851a6a08a..0cead28324c2c 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -27,8 +27,7 @@ from pandas._libs.lib import infer_dtype from pandas._libs import tslib, index as libindex, Timedelta from pandas._libs.tslibs.period import (Period, IncompatibleFrequency, - DIFFERENT_FREQ_INDEX, - _validate_end_alias) + DIFFERENT_FREQ_INDEX) from pandas._libs.tslibs import resolution, period from pandas.core.arrays.period import PeriodArray @@ -146,6 +145,7 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, """ _typ = 'periodindex' _attributes = ['name', 'freq'] + _delegated_to = PeriodArray # define my properties & methods for delegation _other_ops = [] @@ -285,6 +285,10 @@ def size(self): def shape(self): # Avoid materializing self._values return self._data.shape + + def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): + return self._data._format_native_types(na_rep=na_rep, + date_format=date_format) # ------------------------------------------------------------------------ # Dispatch and Wrap @@ -515,29 +519,8 @@ def to_timestamp(self, freq=None, how='start'): ------- DatetimeIndex """ - how = _validate_end_alias(how) - - end = how == 'E' - if end: - if freq == 'B': - # roll forward to ensure we land on B date - adjust = Timedelta(1, 'D') - Timedelta(1, 'ns') - return self.to_timestamp(how='start') + adjust - else: - adjust = Timedelta(1, 'ns') - return (self + 1).to_timestamp(how='start') - adjust - - if freq is None: - base, mult = _gfc(self.freq) - freq = frequencies.get_to_timestamp_base(base) - else: - freq = Period._maybe_convert_freq(freq) - - base, mult = _gfc(freq) - new_data = self.asfreq(freq, how) - - new_data = period.periodarr_to_dt64arr(new_data._ndarray_values, base) - return DatetimeIndex(new_data, freq='infer', name=self.name) + result = self._data.to_timestamp(freq=freq, how=how) + return DatetimeIndex(result, freq='infer', name=self.name) @property def inferred_type(self): @@ -781,25 +764,6 @@ def _apply_meta(self, rawarr): name=self.name) return rawarr - def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): - - values = self.astype(object).values - - if date_format: - formatter = lambda dt: dt.strftime(date_format) - else: - formatter = lambda dt: u'%s' % dt - - if self.hasnans: - mask = self._isnan - values[mask] = na_rep - imask = ~mask - values[imask] = np.array([formatter(dt) for dt - in values[imask]]) - else: - values = np.array([formatter(dt) for dt in values]) - return values - def __setstate__(self, state): """Necessary for making this object picklable""" From eb4506b2beaaf50ee421a306310a78dbb611b2b5 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 27 Sep 2018 16:53:37 -0500 Subject: [PATCH 008/132] some more wip --- pandas/core/arrays/period.py | 1 + pandas/core/dtypes/common.py | 8 +++----- pandas/core/dtypes/generic.py | 2 ++ pandas/core/indexes/accessors.py | 9 +-------- 4 files changed, 7 insertions(+), 13 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 9ef2ca349721c..72983dc6d020c 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -112,6 +112,7 @@ class PeriodArray(DatetimeLikeArrayMixin, ExtensionArray): All elements in the PeriodArray have the same `freq`. """ _attributes = ["freq"] + _typ = "period" # ABCPeriodArray # -------------------------------------------------------------------- # Constructors diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 94596abd78085..fc7bf2613a302 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -14,7 +14,7 @@ from pandas.core.dtypes.generic import ( ABCCategorical, ABCPeriodIndex, ABCDatetimeIndex, ABCSeries, ABCSparseArray, ABCSparseSeries, ABCCategoricalIndex, ABCIndexClass, - ABCDateOffset) + ABCDateOffset, ABCPeriodArray) from pandas.core.dtypes.inference import ( # noqa:F401 is_bool, is_integer, is_hashable, is_iterator, is_float, is_dict_like, is_scalar, is_string_like, is_list_like, is_number, @@ -498,7 +498,6 @@ def is_period_dtype(arr_or_dtype): >>> is_period_dtype(pd.PeriodIndex([], freq="A")) True """ - # TODO: Consider making Period an instance of PeriodDtype if arr_or_dtype is None: return False @@ -636,11 +635,10 @@ def is_period_arraylike(arr): >>> is_period_arraylike(pd.PeriodIndex(["2017-01-01"], freq="D")) True """ - - if isinstance(arr, ABCPeriodIndex): + if isinstance(arr, (ABCPeriodIndex, ABCPeriodArray)): return True elif isinstance(arr, (np.ndarray, ABCSeries)): - return arr.dtype == object and lib.infer_dtype(arr) == 'period' + return is_period_dtype(arr.dtype) return getattr(arr, 'inferred_type', None) == 'period' diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index cb54c94d29205..8d1dfbb89535f 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -53,6 +53,8 @@ def _check(cls, inst): ('sparse_array', 'sparse_series')) ABCCategorical = create_pandas_abc_type("ABCCategorical", "_typ", ("categorical")) +ABCPeriodArray = create_pandas_abc_type("ABCPeriodArray", "_typ", + ("period", )) ABCPeriod = create_pandas_abc_type("ABCPeriod", "_typ", ("period", )) ABCDateOffset = create_pandas_abc_type("ABCDateOffset", "_typ", ("dateoffset",)) diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 781e235a2e123..6b583728bb3ce 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -1,12 +1,5 @@ """ datetimelike delegation - -Hmm OK. - -We want a couple of dispatches... - -- - """ import numpy as np @@ -53,7 +46,7 @@ def _get_values(self): else: if is_period_arraylike(data): - return PeriodArray(data, copy=False, name=self.name) + return PeriodArray(data, copy=False) if is_datetime_arraylike(data): return DatetimeIndex(data, copy=False, name=self.name) From 1b9fd7aa0018b7e615c58bbe9b98228171978570 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Sep 2018 06:18:47 -0500 Subject: [PATCH 009/132] tshift, shift --- pandas/core/accessor.py | 10 +++---- pandas/core/arrays/datetimelike.py | 7 ++++- pandas/core/arrays/period.py | 42 +++++++++++++++++++++++++++++- pandas/core/indexes/accessors.py | 10 +++---- pandas/core/indexes/period.py | 40 ++++++++++++++-------------- 5 files changed, 75 insertions(+), 34 deletions(-) diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 34ed49fca890e..6b6e56cc98254 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -68,8 +68,6 @@ def _add_delegate_accessors(cls, delegate, accessors, typ, overwrite : boolean, default False overwrite the method/property in the target class if it exists """ - delegated_to = getattr(delegate, '_delegated_to', delegate) - def _create_delegator_property(name): def _getter(self): @@ -82,7 +80,7 @@ def _setter(self, new_values): _setter.__name__ = name return property(fget=_getter, fset=_setter, - doc=getattr(delegated_to, name).__doc__) + doc=getattr(delegate, name).__doc__) def _create_delegator_method(name): @@ -90,7 +88,7 @@ def f(self, *args, **kwargs): return self._delegate_method(name, *args, **kwargs) f.__name__ = name - f.__doc__ = getattr(delegated_to, name).__doc__ + f.__doc__ = getattr(delegate, name).__doc__ return f @@ -107,12 +105,10 @@ def f(self, *args, **kwargs): def delegate_names(delegate, accessors, typ, overwrite=False): + # type (type, list, str, bool) -> Callable """ Add delegated names to a class using a class decorator. - Methods and attributes are delegated to ... - - This provides an alternative usage to directly calling `_add_delegate_accessors` below a class definition. diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 8d0c7e3bb3612..3125cf0bd7238 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -553,6 +553,9 @@ def shift(self, periods, freq=None): -------- Index.shift : Shift values of Index. """ + return self._tshift(periods, freq=freq) + + def _tshift(self, periods, freq=None): if freq is not None and freq != self.freq: if isinstance(freq, compat.string_types): freq = frequencies.to_offset(freq) @@ -600,7 +603,9 @@ def __add__(self, other): elif lib.is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these - result = self.shift(other) + # TODO: make a _shift method that's consistent between + # Index and EA + result = self._tshift(other) # array-like others elif is_timedelta64_dtype(other): diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 72983dc6d020c..40b3866b9a58e 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -113,9 +113,32 @@ class PeriodArray(DatetimeLikeArrayMixin, ExtensionArray): """ _attributes = ["freq"] _typ = "period" # ABCPeriodArray + + # Names others delegate to us on + _other_ops = [] + _bool_ops = ['is_leap_year'] + _object_ops = ['start_time', 'end_time', 'freq'] + _field_ops = ['year', 'month', 'day', 'hour', 'minute', 'second', + 'weekofyear', 'weekday', 'week', 'dayofweek', + 'dayofyear', 'quarter', 'qyear', + 'days_in_month', 'daysinmonth'] + _datetimelike_ops = _field_ops + _object_ops + _bool_ops + _datetimelike_methods = ['strftime', 'to_timestamp', 'asfreq'] + # -------------------------------------------------------------------- # Constructors + @property + def _foo(self): + return 'foo!' + + @_foo.setter + def _foo(self, value): + print("setting foo to ", value) + + def _bar(self, arg, kwarg=1): + print(arg, kwarg) + def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, periods=None, tz=None, dtype=None, copy=False, **fields): @@ -511,11 +534,27 @@ def shift(self, periods=1): ------- shifted : Period Array/Index """ + # We have two kinds of shift. + # 1. ExtensionArray.shift: move positions of each value, + # fill NA on the end + # 2. Datelike.tshift: move each value through time + # Each Datelike array will implement both. It's up to the + # caller to call the correct one. + return self._ea_shift(periods=periods) + + def _ea_shift(self, periods=1): # TODO: remove from DatetimeLikeArrayMixin # The semantics for Index.shift differ from EA.shift # then just call super. return ExtensionArray.shift(self, periods) + def _tshift(self, n, freq=None): + # TODO: docs + values = self.values + n * self.freq.n + if self.hasnans: + values[self._isnan] = iNaT + return self._simple_new(values, freq=self.freq) + def _maybe_convert_timedelta(self, other): """ Convert timedelta-like input to an integer multiple of self.freq @@ -628,7 +667,8 @@ def end_time(self): PeriodArray._add_comparison_ops() PeriodArray._add_datetimelike_methods() - +# PeriodArray._add_numeric_methods_disabled() +# PeriodArray._add_logical_methods_disabled() # ------------------------------------------------------------------- # Constructor Helpers diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 6b583728bb3ce..4755e6d38b7df 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -270,11 +270,11 @@ def freq(self): return self._get_values().inferred_freq -@delegate_names(delegate=PeriodIndex, - accessors=PeriodIndex._datetimelike_ops, +@delegate_names(delegate=PeriodArray, + accessors=PeriodArray._datetimelike_ops, typ="property") -@delegate_names(delegate=PeriodIndex, - accessors=PeriodIndex._datetimelike_methods, +@delegate_names(delegate=PeriodArray, + accessors=PeriodArray._datetimelike_methods, typ="method") class PeriodProperties(Properties): """ @@ -289,8 +289,6 @@ class PeriodProperties(Properties): Returns a Series indexed like the original Series. Raises TypeError if the Series does not contain datetimelike values. """ - def _delegate_method(self, name, *args, **kwargs): - pass class CombinedDatetimelikeProperties(DatetimeProperties, TimedeltaProperties): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 0cead28324c2c..42cf3c71eb26c 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -1,6 +1,7 @@ # pylint: disable=E1101,E1103,W0232 from datetime import datetime import numpy as np +import operator import warnings from pandas.core import common as com @@ -20,6 +21,7 @@ import pandas.tseries.frequencies as frequencies from pandas.tseries.frequencies import get_freq_code as _gfc +from pandas.core.accessor import PandasDelegate, delegate_names from pandas.core.indexes.datetimes import DatetimeIndex, Int64Index, Index from pandas.core.indexes.datetimelike import DatelikeOps, DatetimeIndexOpsMixin from pandas.core.tools.datetimes import parse_time_string @@ -64,8 +66,23 @@ def _new_PeriodIndex(cls, **d): return cls._from_ordinals(values=values, **d) +class PeriodDelegateMixin(PandasDelegate): + """ + Delegate from PeriodIndex to PeriodArray. + """ + def _delegate_property_get(self, name, *args, **kwargs): + return getattr(self._data, name) + + def _delegate_property_set(self, name, value, *args, **kwargs): + setattr(self._data, name, value) + + def _delegate_method(self, name, *args, **kwargs): + return operator.methodcaller(name, *args, **kwargs)(self._data) + + +# @delegate_names(PeriodArray, PeriodArray._datetimelike_methods, typ="method") class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, - Int64Index): + Int64Index, PeriodDelegateMixin): """ Immutable ndarray holding ordinal values indicating regular periods in time such as particular years, quarters, months, etc. @@ -145,19 +162,9 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, """ _typ = 'periodindex' _attributes = ['name', 'freq'] - _delegated_to = PeriodArray + # _delegated_to = PeriodArray # define my properties & methods for delegation - _other_ops = [] - _bool_ops = ['is_leap_year'] - _object_ops = ['start_time', 'end_time', 'freq'] - _field_ops = ['year', 'month', 'day', 'hour', 'minute', 'second', - 'weekofyear', 'weekday', 'week', 'dayofweek', - 'dayofyear', 'quarter', 'qyear', - 'days_in_month', 'daysinmonth'] - _datetimelike_ops = _field_ops + _object_ops + _bool_ops - _datetimelike_methods = ['strftime', 'to_timestamp', 'asfreq'] - _is_numeric_dtype = False _infer_as_myclass = True @@ -339,13 +346,8 @@ def shift(self, n): ------- shifted : PeriodIndex """ - # TODO: docs - # Note, this differs from the definition of ExtensionArray.shift - # so we don't dispatch - values = self._ndarray_values + n * self.freq.n - if self.hasnans: - values[self._isnan] = tslib.iNaT - return self._shallow_copy(values=values) + i8values = self._data._tshift(n) + return self._simple_new(i8values, name=self.name, freq=self.freq) def _coerce_scalar_to_index(self, item): """ From 0fa0ed10d13ec7a953bcdc04f1565e67b228c07e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Sep 2018 08:24:51 -0500 Subject: [PATCH 010/132] Arithmetic --- pandas/core/indexes/period.py | 36 ++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 42cf3c71eb26c..c58db32c5ef53 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -23,7 +23,7 @@ from pandas.core.accessor import PandasDelegate, delegate_names from pandas.core.indexes.datetimes import DatetimeIndex, Int64Index, Index -from pandas.core.indexes.datetimelike import DatelikeOps, DatetimeIndexOpsMixin +from pandas.core.indexes.datetimelike import DatelikeOps, DatetimeIndexOpsMixin, wrap_arithmetic_op from pandas.core.tools.datetimes import parse_time_string from pandas._libs.lib import infer_dtype @@ -794,6 +794,40 @@ def __setstate__(self, state): _unpickle_compat = __setstate__ + @classmethod + def _add_datetimelike_methods(cls): + """ + add in the datetimelike methods (as we may have to override the + superclass) + """ + # TODO(DatetimeArray): move this up to DatetimeArrayMixin + + def __add__(self, other): + # dispatch to ExtensionArray implementation + result = self._data.__add__(other) + return wrap_arithmetic_op(self, other, result) + + cls.__add__ = __add__ + + def __radd__(self, other): + # alias for __add__ + return self.__add__(other) + cls.__radd__ = __radd__ + + def __sub__(self, other): + # dispatch to ExtensionArray implementation + result = self._data.__sub__(other) + return wrap_arithmetic_op(self, other, result) + + cls.__sub__ = __sub__ + + def __rsub__(self, other): + result = self._data.__rsub__(other) + return wrap_arithmetic_op(self, other, result) + + cls.__rsub__ = __rsub__ + + # PeriodIndex._add_comparison_ops() # PeriodIndex._add_numeric_methods_disabled() From 3247ea8f5d5ebe13b49c7930111d0836b50bae06 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Sep 2018 08:31:50 -0500 Subject: [PATCH 011/132] repr changes --- pandas/tests/indexes/period/test_formats.py | 36 ++++++++++----------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/pandas/tests/indexes/period/test_formats.py b/pandas/tests/indexes/period/test_formats.py index 2107055f454ef..2a893ae16e30d 100644 --- a/pandas/tests/indexes/period/test_formats.py +++ b/pandas/tests/indexes/period/test_formats.py @@ -118,38 +118,38 @@ def test_representation_to_series(self): exp1 = """Series([], dtype: period[D])""" - exp2 = """0 2011-01-01 + exp2 = """0 2011-01-01 dtype: period[D]""" - exp3 = """0 2011-01-01 -1 2011-01-02 + exp3 = """0 2011-01-01 +1 2011-01-02 dtype: period[D]""" - exp4 = """0 2011-01-01 -1 2011-01-02 -2 2011-01-03 + exp4 = """0 2011-01-01 +1 2011-01-02 +2 2011-01-03 dtype: period[D]""" - exp5 = """0 2011 -1 2012 -2 2013 + exp5 = """0 2011 +1 2012 +2 2013 dtype: period[A-DEC]""" - exp6 = """0 2011-01-01 09:00 -1 2012-02-01 10:00 -2 NaT + exp6 = """0 2011-01-01 09:00 +1 2012-02-01 10:00 +2 NaT dtype: period[H]""" - exp7 = """0 2013Q1 + exp7 = """0 2013Q1 dtype: period[Q-DEC]""" - exp8 = """0 2013Q1 -1 2013Q2 + exp8 = """0 2013Q1 +1 2013Q2 dtype: period[Q-DEC]""" - exp9 = """0 2013Q1 -1 2013Q2 -2 2013Q3 + exp9 = """0 2013Q1 +1 2013Q2 +2 2013Q3 dtype: period[Q-DEC]""" for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, From c162cdd6a4668e05c4f220a69113899ea57fbcf3 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Sep 2018 10:22:32 -0500 Subject: [PATCH 012/132] wip --- pandas/core/arrays/period.py | 11 ++++++++++- pandas/core/indexes/period.py | 25 +++++++++++++++++++++++++ pandas/tests/indexes/period/test_ops.py | 11 +++++++---- pandas/tseries/frequencies.py | 2 ++ 4 files changed, 44 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 40b3866b9a58e..6a99f081e468b 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -329,6 +329,10 @@ def freq(self, value): 'The {cls}.freq setter is not guaranteed to work.') warnings.warn(msg.format(cls=type(self).__name__), FutureWarning, stacklevel=2) + if value is not None: + value = frequencies.to_offset(value) + self._validate_frequency(self, value) + self._freq = value @classmethod @@ -631,6 +635,8 @@ def strftime(self, date_format): return self._format_native_types(date_format=date_format) def to_timestamp(self, freq=None, how='start'): + from pandas import DatetimeIndex + how = _validate_end_alias(how) end = how == 'E' @@ -654,7 +660,10 @@ def to_timestamp(self, freq=None, how='start'): new_data = libperiod.periodarr_to_dt64arr(new_data._ndarray_values, base) - return new_data + # TODO: what should the return type of this be? + # Eventually a DatetimeArray makes sense. + # But for now let's do a DatetimeIndex? + return DatetimeIndex(new_data) @property def start_time(self): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index c58db32c5ef53..a17657bcbef1b 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -163,6 +163,13 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, _typ = 'periodindex' _attributes = ['name', 'freq'] # _delegated_to = PeriodArray + # # TODO: do this in a decorator + # _other_ops = PeriodArray._other_ops + # _bool_ops = PeriodArray._bool_ops + # _object_ops = PeriodArray._object_ops + # _field_ops = PeriodArray._field_ops + # _datetimelike_ops = PeriodArray._datetimelike_ops + # _datetimelike_methods = PeriodArray._datetimelike_methods # define my properties & methods for delegation _is_numeric_dtype = False @@ -277,8 +284,14 @@ def _maybe_box_as_values(self, values, **attribs): @property def freq(self): """Return the frequency object if it is set, otherwise None""" + # TODO(DatetimeArray): remove return self._data.freq + @freq.setter + def freq(self, value): + # TODO(DatetimeArray): remove + self._data.freq = value + @property def asi8(self): return self._data.asi8 @@ -296,6 +309,10 @@ def shape(self): def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): return self._data._format_native_types(na_rep=na_rep, date_format=date_format) + + @property + def is_leap_year(self): + return self._data.is_leap_year # ------------------------------------------------------------------------ # Dispatch and Wrap @@ -303,6 +320,14 @@ def asfreq(self, freq=None, how='E'): result = self._data.asfreq(freq=freq, how=how) return self._simple_new(result, name=self.name) + def _nat_new(self, box=True): + # TODO(DatetimeArray): remove this + result = self._data._nat_new(box=box) + if box: + result = self._simple_new(result, name=self.name) + return result + + # ------------------------------------------------------------------------ # Indexing @cache_readonly diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 85aa3f6a38fb3..0b97391da76a1 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -8,6 +8,7 @@ from pandas import (DatetimeIndex, PeriodIndex, Series, Period, _np_version_under1p10, Index) +from pandas.core.arrays import PeriodArray from pandas.tests.test_base import Ops @@ -22,9 +23,9 @@ def setup_method(self, method): def test_ops_properties(self): f = lambda x: isinstance(x, PeriodIndex) - self.check_ops_properties(PeriodIndex._field_ops, f) - self.check_ops_properties(PeriodIndex._object_ops, f) - self.check_ops_properties(PeriodIndex._bool_ops, f) + self.check_ops_properties(PeriodArray._field_ops, f) + self.check_ops_properties(PeriodArray._object_ops, f) + self.check_ops_properties(PeriodArray._bool_ops, f) def test_minmax(self): @@ -392,7 +393,9 @@ def test_equals(self, freq): assert not idx.equals(pd.Series(idx2)) # same internal, different tz - idx3 = pd.PeriodIndex._simple_new(idx.asi8, freq='H') + idx3 = pd.PeriodIndex._simple_new( + idx.values._simple_new(idx.values.asi8, freq="H") + ) tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) assert not idx.equals(idx3) assert not idx.equals(idx3.copy()) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index d6e4824575468..292208d130432 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -233,6 +233,7 @@ def get_offset(name): def infer_freq(index, warn=True): + # type: (Union[Series, Index, PeriodArray]) -> Freq """ Infer the most likely frequency given the input index. If the frequency is uncertain, a warning will be printed. @@ -250,6 +251,7 @@ def infer_freq(index, warn=True): TypeError if the index is not datetime-like ValueError if there are less than three values. """ + import pdb; pdb.set_trace() import pandas as pd if isinstance(index, ABCSeries): From 611d37838765fb1231246724c059e3dab0fe2daa Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Sep 2018 10:24:51 -0500 Subject: [PATCH 013/132] freq setter --- pandas/core/arrays/period.py | 8 ++------ pandas/tseries/frequencies.py | 1 - 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 6a99f081e468b..e185fbcad41d2 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -327,12 +327,8 @@ def freq(self, value): msg = ('Setting {cls}.freq has been deprecated and will be ' 'removed in a future version; use {cls}.asfreq instead. ' 'The {cls}.freq setter is not guaranteed to work.') - warnings.warn(msg.format(cls=type(self).__name__), - FutureWarning, stacklevel=2) - if value is not None: - value = frequencies.to_offset(value) - self._validate_frequency(self, value) - + warnings.warn(msg.format(cls='PeriodIndex'), + FutureWarning, stacklevel=3) self._freq = value @classmethod diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 292208d130432..22c7447768fff 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -251,7 +251,6 @@ def infer_freq(index, warn=True): TypeError if the index is not datetime-like ValueError if there are less than three values. """ - import pdb; pdb.set_trace() import pandas as pd if isinstance(index, ABCSeries): From fb2ff8266cb01a33c5f06c3ebc4bab02c4678666 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Sep 2018 10:29:12 -0500 Subject: [PATCH 014/132] Added disabled ops --- pandas/core/indexes/period.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index a17657bcbef1b..4de550e81e3ae 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -327,7 +327,6 @@ def _nat_new(self, box=True): result = self._simple_new(result, name=self.name) return result - # ------------------------------------------------------------------------ # Indexing @cache_readonly @@ -855,8 +854,8 @@ def __rsub__(self, other): # PeriodIndex._add_comparison_ops() -# PeriodIndex._add_numeric_methods_disabled() -# PeriodIndex._add_logical_methods_disabled() +PeriodIndex._add_numeric_methods_disabled() +PeriodIndex._add_logical_methods_disabled() PeriodIndex._add_datetimelike_methods() From 25a380f0a9389e88f1dc70cf98a5ce76d64b177e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Sep 2018 10:33:39 -0500 Subject: [PATCH 015/132] copy --- pandas/core/arrays/period.py | 2 ++ pandas/tests/indexes/common.py | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index e185fbcad41d2..cbea53c6ac4db 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -193,6 +193,8 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, base2, _ = _gfc(freq) data = libperiod.period_asfreq_arr(data._ndarray_values, base1, base2, 1) + if copy: + data = data.copy() return cls._simple_new(data, freq=freq) # not array / index diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 49a247608ab0b..6fe97c49d1f68 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -309,7 +309,8 @@ def test_ensure_copied_data(self): index_type = index.__class__ result = index_type(index.values, copy=True, **init_kwargs) tm.assert_index_equal(index, result) - tm.assert_numpy_array_equal(index.values, result.values, + tm.assert_numpy_array_equal(index._ndarray_values, + result._ndarray_values, check_same='copy') if isinstance(index, PeriodIndex): From 1b2c4ec17377334fa45876331f3aae26bf53a428 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Sep 2018 12:39:21 -0500 Subject: [PATCH 016/132] Support concat --- pandas/core/dtypes/concat.py | 10 ++++++---- pandas/core/indexes/base.py | 1 - 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index c1aab961dcc9f..dac972f40b733 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -468,10 +468,12 @@ def _concat_datetime(to_concat, axis=0, typs=None): axis=axis).view(_TD_DTYPE) elif any(typ.startswith('period') for typ in typs): - # PeriodIndex must be handled by PeriodIndex, - # Thus can't meet this condition ATM - # Must be changed when we adding PeriodDtype - raise NotImplementedError("unable to concat PeriodDtype") + assert len(typs) == 1 + # TODO: Need a generic way to say "concatenate these by + # concatenating the underlying EA and wrapping. + cls = to_concat[0] + new_values = cls._concat_same_type(to_concat) + return new_values def _convert_datetimelike_to_object(x): diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 24dc8c60c28c4..97e4d4f9ceb37 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -305,7 +305,6 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, elif (is_period_dtype(data) or (dtype is not None and is_period_dtype(dtype))): - # TODO: ensure is_period_dtype for PeriodArray from pandas import PeriodIndex result = PeriodIndex(data, copy=copy, name=name, **kwargs) if dtype is not None and _o_dtype == dtype: From d04293e1b0707f1a94153feda2ddd01021e76c52 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Sep 2018 12:50:33 -0500 Subject: [PATCH 017/132] object ctor --- pandas/core/indexes/base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 97e4d4f9ceb37..44a945da8aaec 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -261,7 +261,6 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, return cls._simple_new(data, name) from .range import RangeIndex - # range if isinstance(data, RangeIndex): return RangeIndex(start=data, copy=copy, dtype=dtype, name=name) @@ -303,7 +302,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, else: return result - elif (is_period_dtype(data) or + elif ((is_period_dtype(data) and not is_object_dtype(dtype)) or (dtype is not None and is_period_dtype(dtype))): from pandas import PeriodIndex result = PeriodIndex(data, copy=copy, name=name, **kwargs) From eacad390134a31661afcca89d6508e192ce77130 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Sep 2018 15:32:58 -0500 Subject: [PATCH 018/132] Updates --- pandas/core/arrays/categorical.py | 3 +- pandas/core/arrays/period.py | 44 ++++++++++++++++------ pandas/core/dtypes/dtypes.py | 4 +- pandas/core/dtypes/generic.py | 7 +++- pandas/core/indexes/period.py | 2 +- pandas/tests/indexes/period/test_period.py | 13 ++++--- pandas/util/testing.py | 14 ++++++- 7 files changed, 62 insertions(+), 25 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 216bccf7d6309..2413b762cf690 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1259,7 +1259,8 @@ def __array__(self, dtype=None): if dtype==None (default), the same dtype as categorical.categories.dtype """ - ret = take_1d(self.categories.values, self._codes) + values = np.asarray(self.categories.values) + ret = take_1d(values, self._codes) if dtype and not is_dtype_equal(dtype, self.categories.dtype): return np.asarray(ret, dtype) if is_extension_array_dtype(ret): diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index cbea53c6ac4db..b4d51fe3131dc 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -4,6 +4,8 @@ import numpy as np +from pandas import compat +from pandas.compat.numpy import function as nv from pandas._libs import Timedelta from pandas._libs import lib from pandas._libs.tslib import NaT, iNaT @@ -14,10 +16,7 @@ from pandas._libs.tslibs import period as libperiod from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds from pandas._libs.tslibs.fields import isleapyear_arr - -from pandas import compat from pandas.util._decorators import cache_readonly - from pandas.core.dtypes.common import ( is_integer_dtype, is_float_dtype, is_period_dtype, is_float, is_integer, pandas_dtype, is_scalar, @@ -112,7 +111,7 @@ class PeriodArray(DatetimeLikeArrayMixin, ExtensionArray): All elements in the PeriodArray have the same `freq`. """ _attributes = ["freq"] - _typ = "period" # ABCPeriodArray + _typ = "periodarray" # ABCPeriodArray # Names others delegate to us on _other_ops = [] @@ -253,15 +252,16 @@ def isna(self): def take(self, indices, allow_fill=False, fill_value=None): from pandas.core.algorithms import take + from pandas import isna - if fill_value is None: - fill_value = iNaT - elif isinstance(fill_value, Period): - fill_value = fill_value.ordinal - elif fill_value is NaT: - fill_value = iNaT - elif fill_value != self.dtype.na_value: - raise ValueError("Expected a Period.") + if allow_fill: + if isna(fill_value): + fill_value = iNaT + elif isinstance(fill_value, Period): + fill_value = fill_value.ordinal + else: + msg = "'fill_value' should be a Period. Got '{}'." + raise ValueError(msg.format(fill_value)) new_values = take(self._data, indices, @@ -628,6 +628,26 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None): values = np.array([formatter(dt) for dt in values]) return values + def view(self, dtype=None, type=None): + # This is to support PeriodIndex.view('i8') + # I don't like adding this, + return self._data.view(dtype=dtype) + + def repeat(self, repeats, *args, **kwargs): + """ + Repeat elements of a Categorical. + + See also + -------- + numpy.ndarray.repeat + """ + # TODO: Share with Categorical.repeat? + # need to use ndarray_values in Categorical + # and some kind of _constructor (from_ordinals, from_codes). + nv.validate_repeat(args, kwargs) + values = self._ndarray_values.repeat(repeats) + return self._from_ordinals(values, self.freq) + # Delegation... def strftime(self, date_format): return self._format_native_types(date_format=date_format) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 00c66b4136fa9..29a3552d2e47a 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -4,7 +4,7 @@ import numpy as np from pandas import compat from pandas.core.dtypes.generic import ABCIndexClass, ABCCategoricalIndex -from pandas._libs.tslibs import Period, iNaT +from pandas._libs.tslibs import Period, NaT from .base import ExtensionDtype, _DtypeOpsMixin @@ -668,7 +668,7 @@ def name(self): @property def na_value(self): - return iNaT + return NaT def __hash__(self): # make myself hashable diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index 8d1dfbb89535f..f6926a192a724 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -54,13 +54,16 @@ def _check(cls, inst): ABCCategorical = create_pandas_abc_type("ABCCategorical", "_typ", ("categorical")) ABCPeriodArray = create_pandas_abc_type("ABCPeriodArray", "_typ", - ("period", )) + ("periodarray", )) ABCPeriod = create_pandas_abc_type("ABCPeriod", "_typ", ("period", )) ABCDateOffset = create_pandas_abc_type("ABCDateOffset", "_typ", ("dateoffset",)) ABCInterval = create_pandas_abc_type("ABCInterval", "_typ", ("interval", )) ABCExtensionArray = create_pandas_abc_type("ABCExtensionArray", "_typ", - ("extension", "categorical",)) + ("extension", + "categorical", + "periodarray", + )) class _ABCGeneric(type): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 4de550e81e3ae..6961a45609d6a 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -216,6 +216,7 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs): result = object.__new__(cls) result._data = values result.name = name + result._reset_identity() return result @classmethod @@ -226,7 +227,6 @@ def _from_ordinals(cls, values, name=None, freq=None, **kwargs): """ data = PeriodArray._from_ordinals(values, freq=freq) result = cls._simple_new(data, name=name) - result._reset_identity() return result def _shallow_copy(self, values=None, **kwargs): diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 405edba83dc7a..1ea7b519bb179 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -8,6 +8,7 @@ from pandas import (PeriodIndex, period_range, DatetimeIndex, NaT, Index, Period, Series, DataFrame, date_range, offsets) +from pandas.core.arrays import PeriodArray from ..datetimelike import DatetimeLike @@ -139,18 +140,18 @@ def test_view_asi8(self): def test_values(self): idx = pd.PeriodIndex([], freq='M') + exp = PeriodArray([], freq='M') + tm.assert_period_array_equal(idx.values, exp) + tm.assert_numpy_array_equal(idx.get_values(), exp.values) - exp = np.array([], dtype=np.object) - tm.assert_numpy_array_equal(idx.values, exp) - tm.assert_numpy_array_equal(idx.get_values(), exp) exp = np.array([], dtype=np.int64) tm.assert_numpy_array_equal(idx._ndarray_values, exp) idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M') - exp = np.array([pd.Period('2011-01', freq='M'), pd.NaT], dtype=object) - tm.assert_numpy_array_equal(idx.values, exp) - tm.assert_numpy_array_equal(idx.get_values(), exp) + exp = PeriodArray([pd.Period('2011-01', freq='M'), pd.NaT]) + tm.assert_period_array_equal(idx.values, exp) + tm.assert_numpy_array_equal(idx.get_values(), exp.values) exp = np.array([492, -9223372036854775808], dtype=np.int64) tm.assert_numpy_array_equal(idx._ndarray_values, exp) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 3db251e89842d..f60152f36b81c 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -19,7 +19,11 @@ import numpy as np import pandas as pd -from pandas.core.arrays import ExtensionArray, IntervalArray +from pandas.core.arrays import ( + ExtensionArray, + IntervalArray, + PeriodArray, +) from pandas.core.dtypes.missing import array_equivalent from pandas.core.dtypes.common import ( is_datetimelike_v_numeric, @@ -1047,6 +1051,14 @@ def assert_interval_array_equal(left, right, exact='equiv', assert_attr_equal('closed', left, right, obj=obj) +def assert_period_array_equal(left, right, obj='PeriodArray'): + _check_isinstance(left, right, PeriodArray) + + assert_numpy_array_equal(left.values, right.values, + obj='{obj}.values'.format(obj=obj)) + assert_attr_equal('freq', left, right, obj=obj) + + def raise_assert_detail(obj, message, left, right, diff=None): if isinstance(left, np.ndarray): left = pprint_thing(left) From 70cd3b8d8c47947281b1ef53407224ef2fe25821 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Sep 2018 15:34:53 -0500 Subject: [PATCH 019/132] lint --- pandas/core/arrays/period.py | 5 +---- pandas/core/indexes/period.py | 20 +++++++------------- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index b4d51fe3131dc..b1d8afe6355c6 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -225,10 +225,6 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, data = libperiod.extract_ordinals(data, freq) return cls._from_ordinals(data, freq=freq) - @property - def asi8(self): - return self._data.view("i8") - @classmethod def _from_sequence(cls, scalars, dtype=None, copy=False): return cls(scalars, dtype=dtype, copy=copy) @@ -697,6 +693,7 @@ def end_time(self): # PeriodArray._add_numeric_methods_disabled() # PeriodArray._add_logical_methods_disabled() + # ------------------------------------------------------------------- # Constructor Helpers diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 6961a45609d6a..82fc65ab4040f 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -9,28 +9,23 @@ is_integer, is_float, is_integer_dtype, - is_float_dtype, - is_scalar, - is_datetime64_dtype, is_datetime64_any_dtype, is_period_dtype, is_bool_dtype, pandas_dtype, - ensure_object) +) -import pandas.tseries.frequencies as frequencies -from pandas.tseries.frequencies import get_freq_code as _gfc - -from pandas.core.accessor import PandasDelegate, delegate_names +from pandas.core.accessor import PandasDelegate from pandas.core.indexes.datetimes import DatetimeIndex, Int64Index, Index -from pandas.core.indexes.datetimelike import DatelikeOps, DatetimeIndexOpsMixin, wrap_arithmetic_op +from pandas.core.indexes.datetimelike import ( + DatelikeOps, DatetimeIndexOpsMixin, wrap_arithmetic_op +) from pandas.core.tools.datetimes import parse_time_string -from pandas._libs.lib import infer_dtype -from pandas._libs import tslib, index as libindex, Timedelta +from pandas._libs import tslib, index as libindex from pandas._libs.tslibs.period import (Period, IncompatibleFrequency, DIFFERENT_FREQ_INDEX) -from pandas._libs.tslibs import resolution, period +from pandas._libs.tslibs import resolution from pandas.core.arrays.period import PeriodArray from pandas.core.base import _shared_docs @@ -852,7 +847,6 @@ def __rsub__(self, other): cls.__rsub__ = __rsub__ - # PeriodIndex._add_comparison_ops() PeriodIndex._add_numeric_methods_disabled() PeriodIndex._add_logical_methods_disabled() From 9b2288919427d3e63e0d249e912c98f4956fabfd Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Sep 2018 15:35:34 -0500 Subject: [PATCH 020/132] lint --- pandas/core/arrays/period.py | 96 +++++++++---------- pandas/core/indexes/accessors.py | 3 +- pandas/core/indexes/period.py | 91 ++++++++---------- pandas/tests/extension/test_period.py | 13 +-- .../tests/indexes/period/test_construction.py | 3 +- pandas/tests/indexes/period/test_indexing.py | 4 +- pandas/tests/indexes/period/test_period.py | 1 + 7 files changed, 104 insertions(+), 107 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index b1d8afe6355c6..061cb62399858 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -101,8 +101,8 @@ class PeriodArray(DatetimeLikeArrayMixin, ExtensionArray): There are two components to a PeriodArray - - ordinals - - freq + - ordinals : integer ndarray + - freq : pd.tseries.offsets.Tick The values are physically stored as an ndarray of integers. These are called "ordinals" and represent some kind of offset from a base. @@ -126,26 +126,26 @@ class PeriodArray(DatetimeLikeArrayMixin, ExtensionArray): # -------------------------------------------------------------------- # Constructors + def __init__(self, values, freq=None): + # type: (np.ndarray[int64], Union[str, Tick]) -> None + values = np.array(values, dtype='int64', copy=False) + self._data = values + if freq is None: + raise ValueError('freq is not specified and cannot be inferred') + self._freq = Period._maybe_convert_freq(freq) - @property - def _foo(self): - return 'foo!' - - @_foo.setter - def _foo(self, value): - print("setting foo to ", value) - - def _bar(self, arg, kwarg=1): - print(arg, kwarg) - - def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, - periods=None, tz=None, dtype=None, copy=False, - **fields): + @classmethod + def _complex_new(cls, data=None, ordinal=None, freq=None, start=None, + end=None, periods=None, tz=None, dtype=None, copy=False, + **fields): from pandas import PeriodIndex, DatetimeIndex, Int64Index # copy-pase from PeriodIndex.__new__ with slight adjustments. # # - removed all uses of name + # - refactored to smaller, more dedicated constructors. + + # TODO: move fields validation to range init valid_field_set = {'year', 'month', 'day', 'quarter', 'hour', 'minute', 'second'} @@ -199,9 +199,11 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, # not array / index if not isinstance(data, (np.ndarray, PeriodIndex, DatetimeIndex, Int64Index)): - if is_scalar(data) or isinstance(data, Period): - # XXX - cls._scalar_data_error(data) + if is_scalar(data): + raise TypeError('{0}(...) must be called with a ' + 'collection of some ' + 'kind, {1} was passed'.format(cls.__name__, + repr(data))) # other iterable of some kind if not isinstance(data, (list, tuple)): @@ -221,17 +223,33 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, # anything else, likely an array of strings or periods data = ensure_object(data) - freq = freq or libperiod.extract_freq(data) - data = libperiod.extract_ordinals(data, freq) - return cls._from_ordinals(data, freq=freq) + return cls._from_periods(data, freq=freq) + + @classmethod + def _from_ordinals(cls, values, freq=None): + # type: (ndarray[int], Optional[Tick]) -> PeriodArray + """ + Values should be int ordinals + `__new__` & `_simple_new` cooerce to ordinals and call this method + """ + return cls(values, freq=freq) @classmethod def _from_sequence(cls, scalars, dtype=None, copy=False): - return cls(scalars, dtype=dtype, copy=copy) + # type: (Sequence[Optional[Period]], Dtype, bool) -> PeriodArray + return cls._complex_new(scalars, dtype=dtype, copy=copy) @classmethod def _from_factorized(cls, values, original): - return cls(values, dtype=original.dtype) + # type: (Sequence[Optional[Period]], PeriodArray) -> PeriodArray + return cls._from_periods(values, freq=original.freq) + + @classmethod + def _from_periods(cls, periods, freq=None): + # type: (np.ndarray[Optional[Period]], Optional[Tick]) -> PeriodArray + freq = freq or libperiod.extract_freq(periods) + ordinals = libperiod.extract_ordinals(periods, freq) + return cls._from_ordinals(ordinals, freq=freq) def __repr__(self): return '\n{}\nLength: {}, dtype: {}'.format( @@ -268,6 +286,7 @@ def take(self, indices, allow_fill=False, fill_value=None): @property def nbytes(self): + # TODO(DatetimeArray): remove return self._data.nbytes def copy(self, deep=False): @@ -335,7 +354,6 @@ def _simple_new(cls, values, freq=None, **kwargs): Values can be any type that can be coerced to Periods. Ordinals in an ndarray are fastpath-ed to `_from_ordinals` """ - if not is_integer_dtype(values): values = np.array(values, copy=False) if len(values) > 0 and is_float_dtype(values): @@ -343,23 +361,7 @@ def _simple_new(cls, values, freq=None, **kwargs): .format(cls=cls.__name__)) return cls(values, freq=freq) - return cls._from_ordinals(values, freq) - - @classmethod - def _from_ordinals(cls, values, freq=None): - """ - Values should be int ordinals - `__new__` & `_simple_new` cooerce to ordinals and call this method - """ - - values = np.array(values, dtype='int64', copy=False) - - result = object.__new__(cls) - result._data = values - if freq is None: - raise ValueError('freq is not specified and cannot be inferred') - result._freq = Period._maybe_convert_freq(freq) - return result + return cls(values, freq=freq) @classmethod def _generate_range(cls, start, end, periods, freq, fields): @@ -600,12 +602,6 @@ def _maybe_convert_timedelta(self, other): raise IncompatibleFrequency(msg.format(cls=type(self).__name__, freqstr=self.freqstr)) - @classmethod - def _scalar_data_error(cls, data): - raise TypeError('{0}(...) must be called with a collection of some ' - 'kind, {1} was passed'.format(cls.__name__, - repr(data))) - def _format_native_types(self, na_rep=u'NaT', date_format=None): values = self.astype(object) @@ -697,6 +693,10 @@ def end_time(self): # ------------------------------------------------------------------- # Constructor Helpers +def to_period_array(data): + return PeriodArray._complex_new(data, freq=None) + + def _get_ordinal_range(start, end, periods, freq, mult=1): if com.count_not_none(start, end, periods) != 2: raise ValueError('Of the three parameters: start, end, and periods, ' diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 4755e6d38b7df..f6b3e7ea71d05 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -46,7 +46,8 @@ def _get_values(self): else: if is_period_arraylike(data): - return PeriodArray(data, copy=False) + # TODO: use to_period_array + return PeriodArray._complex_new(data, copy=False) if is_datetime_arraylike(data): return DatetimeIndex(data, copy=False, name=self.name) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 82fc65ab4040f..6b93a9bf3107d 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -15,7 +15,7 @@ pandas_dtype, ) -from pandas.core.accessor import PandasDelegate +from pandas.core.accessor import PandasDelegate, delegate_names from pandas.core.indexes.datetimes import DatetimeIndex, Int64Index, Index from pandas.core.indexes.datetimelike import ( DatelikeOps, DatetimeIndexOpsMixin, wrap_arithmetic_op @@ -56,9 +56,11 @@ def f(self): def _new_PeriodIndex(cls, **d): # GH13277 for unpickling - if d['data'].dtype == 'int64': - values = d.pop('data') - return cls._from_ordinals(values=values, **d) + values = d.pop('data') + if values.dtype == 'int64': + return cls._from_ordinals(values=values, **d) + else: + return cls(values, **d) class PeriodDelegateMixin(PandasDelegate): @@ -75,7 +77,23 @@ def _delegate_method(self, name, *args, **kwargs): return operator.methodcaller(name, *args, **kwargs)(self._data) -# @delegate_names(PeriodArray, PeriodArray._datetimelike_methods, typ="method") +@delegate_names( + PeriodArray, + PeriodArray._datetimelike_ops + [ + 'size', + 'asi8', + 'shape', + ], + "property" +) +@delegate_names( + PeriodArray, + PeriodArray._datetimelike_methods + [ + '_format_native_types', + '_maybe_convert_timedelta', + ], + "method" +) class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index, PeriodDelegateMixin): """ @@ -170,7 +188,6 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, _is_numeric_dtype = False _infer_as_myclass = True - _freq = None _data = None # type: PeriodArray _engine_type = libindex.PeriodEngine @@ -182,9 +199,10 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, if name is None and hasattr(data, 'name'): name = data.name - data = PeriodArray(data=data, ordinal=ordinal, freq=freq, - start=start, end=end, periods=periods, - tz=tz, dtype=dtype, copy=copy, **fields) + data = PeriodArray._complex_new(data=data, ordinal=ordinal, freq=freq, + start=start, end=end, periods=periods, + tz=tz, dtype=dtype, copy=copy, + **fields) return cls._simple_new(data, name=name) # ------------------------------------------------------------------------ @@ -197,6 +215,17 @@ def _ndarray_values(self): def values(self): return self._data + @property + def freq(self): + # TODO(DatetimeArray): remove + # Can't simply use delegate_names since our base class is defining + # freq + return self._data.freq + + @freq.setter + def freq(self, value): + self._data.freq = value + # ------------------------------------------------------------------------ # Index Constructors @@ -225,7 +254,7 @@ def _from_ordinals(cls, values, name=None, freq=None, **kwargs): return result def _shallow_copy(self, values=None, **kwargs): - # TODO: update take to do this? + # TODO: simplify, figure out type of values if values is None: # Note: this is the Index implementation. # slightly different from AttributesMixin implementation which @@ -238,7 +267,7 @@ def _shallow_copy(self, values=None, **kwargs): values = PeriodArray._from_ordinals(values, freq=self.freq) except TypeError: # TODO: this is probably ambiguous for some oridinals. - values = PeriodArray(values, freq=self.freq) + values = PeriodArray._complex_new(values, freq=self.freq) attributes = self._get_attributes_dict() attributes.update(kwargs) @@ -274,41 +303,6 @@ def _maybe_box_as_values(self, values, **attribs): return PeriodArray._from_ordinals(values, freq=freq) # ------------------------------------------------------------------------ - # Straight Dispatch - - @property - def freq(self): - """Return the frequency object if it is set, otherwise None""" - # TODO(DatetimeArray): remove - return self._data.freq - - @freq.setter - def freq(self, value): - # TODO(DatetimeArray): remove - self._data.freq = value - - @property - def asi8(self): - return self._data.asi8 - - @property - def size(self): - # Avoid materializing self._values - return self._data.size - - @property - def shape(self): - # Avoid materializing self._values - return self._data.shape - - def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): - return self._data._format_native_types(na_rep=na_rep, - date_format=date_format) - - @property - def is_leap_year(self): - return self._data.is_leap_year - # ------------------------------------------------------------------------ # Dispatch and Wrap def asfreq(self, freq=None, how='E'): @@ -326,8 +320,7 @@ def _nat_new(self, box=True): # Indexing @cache_readonly def _engine(self): - # TODO: understand indexing before just changing this. - return self._engine_type(lambda: self._ndarray_values, len(self)) + return self._engine_type(lambda: self, len(self)) @Appender(_index_shared_docs['__contains__']) def __contains__(self, key): @@ -497,7 +490,7 @@ def is_full(self): values = self.asi8 return ((values[1:] - values[:-1]) < 2).all() - year = _wrap_field_accessor('year') + # year = _wrap_field_accessor('year') month = _wrap_field_accessor('month') day = _wrap_field_accessor('day') hour = _wrap_field_accessor('hour') diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index 0ba929f24c2ce..27cf70b807254 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -2,6 +2,7 @@ import numpy as np import pandas as pd +from pandas._libs.tslib import iNaT from pandas.tests.extension import base from pandas.core.dtypes.dtypes import PeriodDtype from pandas.core.arrays import PeriodArray @@ -14,31 +15,31 @@ def dtype(): @pytest.fixture def data(dtype): - return PeriodArray(np.arange(1970, 2070), dtype=dtype) + return PeriodArray(np.arange(1970, 2070), freq=dtype.freq) @pytest.fixture def data_for_sorting(dtype): - return PeriodArray([2018, 2019, 2017], dtype=dtype) + return PeriodArray([2018, 2019, 2017], freq=dtype.freq) @pytest.fixture def data_missing(dtype): - return PeriodArray([None, 2017], dtype=dtype) + return PeriodArray([iNaT, 2017], freq=dtype.freq) @pytest.fixture def data_missing_for_sorting(dtype): - return PeriodArray([2018, None, 2017], dtype=dtype) + return PeriodArray([2018, iNaT, 2017], freq=dtype.freq) @pytest.fixture def data_for_grouping(dtype): B = 2018 - NA = None + NA = iNaT A = 2017 C = 2019 - return PeriodArray([B, B, NA, NA, A, A, B, C], dtype=dtype) + return PeriodArray([B, B, NA, NA, A, A, B, C], freq=dtype.freq) @pytest.fixture diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index 448aa5e272661..9203797bd9644 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -478,6 +478,7 @@ def test_constructor_cant_cast_period(self): dtype=float) def test_constructor_cast_object(self): - s = Series(period_range('1/1/2000', periods=10), dtype=PeriodDtype("D")) + s = Series(period_range('1/1/2000', periods=10), + dtype=PeriodDtype("D")) exp = Series(period_range('1/1/2000', periods=10)) tm.assert_series_equal(s, exp) diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 6b8e2203e83fd..85b7e824761b3 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -564,7 +564,7 @@ def test_get_loc2(self): 'unit abbreviation w/o a number'): idx.get_loc('2000-01-10', method='nearest', tolerance='foo') - msg = 'Input has different freq from PeriodIndex\\(freq=D\\)' + msg = 'Input has different freq from PeriodArray\\(freq=D\\)' with tm.assert_raises_regex(ValueError, msg): idx.get_loc('2000-01-10', method='nearest', tolerance='1 hour') with pytest.raises(KeyError): @@ -594,7 +594,7 @@ def test_get_indexer2(self): tolerance='1 hour'), np.array([0, -1, 1], dtype=np.intp)) - msg = 'Input has different freq from PeriodIndex\\(freq=H\\)' + msg = 'Input has different freq from PeriodArray\\(freq=H\\)' with tm.assert_raises_regex(ValueError, msg): idx.get_indexer(target, 'nearest', tolerance='1 minute') diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 1ea7b519bb179..b02542af10eb6 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -138,6 +138,7 @@ def test_view_asi8(self): tm.assert_numpy_array_equal(idx.view('i8'), exp) tm.assert_numpy_array_equal(idx.asi8, exp) + @pytest.mark.xfail(reason="XXX: Determine the desired behavior here.") def test_values(self): idx = pd.PeriodIndex([], freq='M') exp = PeriodArray([], freq='M') From 6369c7f3dc32f1825606913ad3f6643e76ac9388 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 1 Oct 2018 09:45:52 -0500 Subject: [PATCH 021/132] wip --- pandas/core/indexes/datetimelike.py | 3 ++- pandas/core/indexes/period.py | 27 +++++++++++++++++++++------ 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index e2b31889a40db..d722029b85631 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -34,6 +34,7 @@ import pandas.io.formats.printing as printing +from pandas.core.arrays import PeriodArray from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin from pandas.core.indexes.base import Index, _index_shared_docs from pandas.util._decorators import Appender, cache_readonly @@ -725,7 +726,7 @@ def _ensure_datetimelike_to_i8(other, to_utc=False): """ if is_scalar(other) and isna(other): return iNaT - elif isinstance(other, PeriodArray, ABCIndexClass): + elif isinstance(other, (PeriodArray, ABCIndexClass)): # convert tz if needed # TODO: Ensure PeriodArray.tz_localize if getattr(other, 'tz', None) is not None: diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 6b93a9bf3107d..da432331e05e7 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -68,7 +68,10 @@ class PeriodDelegateMixin(PandasDelegate): Delegate from PeriodIndex to PeriodArray. """ def _delegate_property_get(self, name, *args, **kwargs): - return getattr(self._data, name) + result = getattr(self._data, name) + if name in PeriodArray._datetimelike_ops: + result = Index(result, name=self.name) + return result def _delegate_property_set(self, name, value, *args, **kwargs): setattr(self._data, name, value) @@ -92,7 +95,8 @@ def _delegate_method(self, name, *args, **kwargs): '_format_native_types', '_maybe_convert_timedelta', ], - "method" + "method", + overwrite=True, ) class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index, PeriodDelegateMixin): @@ -450,14 +454,25 @@ def asof_locs(self, where, mask): @Appender(_index_shared_docs['astype']) def astype(self, dtype, copy=True, how='start'): dtype = pandas_dtype(dtype) - if is_integer_dtype(dtype): - return self._int64index.copy() if copy else self._int64index - elif is_datetime64_any_dtype(dtype): + + # We have a few special-cases for `dtype`. + # Failing those, we fall back to astyping the values + + if is_datetime64_any_dtype(dtype): + # 'how' is index-speicifc, isn't part of the EA interface. tz = getattr(dtype, 'tz', None) return self.to_timestamp(how=how).tz_localize(tz) + + elif is_integer_dtype(dtype): + # astype(int) -> Index, so don't dispatch + return self._int64index.copy() if copy else self._int64index + elif is_period_dtype(dtype): return self.asfreq(freq=dtype.freq) - return super(PeriodIndex, self).astype(dtype, copy=copy) + + return Index(self._data.astype(dtype, copy=copy), name=self.name, + dtype=dtype, # disable Index inference + copy=False) @Substitution(klass='PeriodIndex') @Appender(_shared_docs['searchsorted']) From 01551f08f3740603ccbffa8975a57162074b0a56 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 1 Oct 2018 10:25:57 -0500 Subject: [PATCH 022/132] more wip --- pandas/core/arrays/datetimelike.py | 10 ++++++++-- pandas/core/arrays/period.py | 21 ++++++++++++++++++--- pandas/core/indexes/period.py | 22 +++++++++++++--------- pandas/tests/indexes/period/test_ops.py | 9 +++++++++ 4 files changed, 48 insertions(+), 14 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 3125cf0bd7238..b879c96726387 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -63,8 +63,15 @@ def cmp_method(self, other): # comparisons, this will raise in the future with warnings.catch_warnings(record=True): warnings.filterwarnings("ignore", "elementwise", FutureWarning) + + # XXX: temporary hack till I figure out what's going on. + # For PeriodIndex.__eq__, we don't want to convert a scalar + # other to a scalar ndarray. + if getattr(self, '_wrap_cmp_method', True): + other = np.asarray(other) + with np.errstate(all='ignore'): - result = op(self.values, np.asarray(other)) + result = op(self.values, other) return result @@ -760,7 +767,6 @@ def _evaluate_compare(self, other, op): result[mask] = filler return result - # TODO: get this from ExtensionOpsMixin @classmethod def _add_comparison_methods(cls): """ add in comparison methods """ diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 061cb62399858..6319ff39e6139 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -21,10 +21,13 @@ is_integer_dtype, is_float_dtype, is_period_dtype, is_float, is_integer, pandas_dtype, is_scalar, is_datetime64_dtype, + is_categorical_dtype, ensure_object ) from pandas.core.dtypes.dtypes import PeriodDtype -from pandas.core.dtypes.generic import ABCSeries, ABCIndex +from pandas.core.dtypes.generic import ( + ABCSeries, ABCIndex, ABCPeriodIndex +) import pandas.core.common as com @@ -65,7 +68,7 @@ def wrapper(self, other): raise IncompatibleFrequency(msg) result = op(other.ordinal) - elif isinstance(other, PeriodArray): + elif isinstance(other, (ABCPeriodIndex, PeriodArray)): if other.freq != self.freq: msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) @@ -111,7 +114,7 @@ class PeriodArray(DatetimeLikeArrayMixin, ExtensionArray): All elements in the PeriodArray have the same `freq`. """ _attributes = ["freq"] - _typ = "periodarray" # ABCPeriodArray + _typ = "periodarray" # ABCPeriodAray # Names others delegate to us on _other_ops = [] @@ -683,6 +686,18 @@ def start_time(self): def end_time(self): return self.to_timestamp(how='end') + def astype(self, dtype, copy=True): + # TODO: Figure out something better here... + # We have DatetimeLikeArrayMixin -> + # super(...), which ends up being... DatetimeIndexOpsMixin? + # this is complicated. + # need a pandas_astype(arr, dtype). + from pandas.core.arrays import Categorical + + if is_categorical_dtype(dtype): + return Categorical(self, dtype=dtype) + return super(PeriodArray, self).astype(dtype, copy=copy) + PeriodArray._add_comparison_ops() PeriodArray._add_datetimelike_methods() diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index da432331e05e7..c4777f3c2a654 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -179,14 +179,8 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, """ _typ = 'periodindex' _attributes = ['name', 'freq'] - # _delegated_to = PeriodArray - # # TODO: do this in a decorator - # _other_ops = PeriodArray._other_ops - # _bool_ops = PeriodArray._bool_ops - # _object_ops = PeriodArray._object_ops - # _field_ops = PeriodArray._field_ops - # _datetimelike_ops = PeriodArray._datetimelike_ops - # _datetimelike_methods = PeriodArray._datetimelike_methods + # see hack in arrays/datetimelike.py make_comparison_op + # _wrap_cmp_method = False # define my properties & methods for delegation _is_numeric_dtype = False @@ -854,8 +848,18 @@ def __rsub__(self, other): cls.__rsub__ = __rsub__ + @classmethod + def _create_comparison_method(cls, op): + """ + Create a comparison method that dispatches to ``cls.values``. + """ + # TODO(DatetimeArray): move to base class. + def wrapper(self, other): + return op(self.values, other) + return wrapper + -# PeriodIndex._add_comparison_ops() +PeriodIndex._add_comparison_ops() PeriodIndex._add_numeric_methods_disabled() PeriodIndex._add_logical_methods_disabled() PeriodIndex._add_datetimelike_methods() diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 0b97391da76a1..bdcc7ba0bf364 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -506,3 +506,12 @@ def test_pi_comp_period_nat(self): f = lambda x: tslib.NaT >= x exp = np.array([False, False, False, False], dtype=np.bool) self._check(idx, f, exp) + + +@pytest.mark.parametrize("other", ["2017", 2017]) +def test_eq(other): + idx = pd.PeriodIndex(['2017', '2017', '2018'], freq="D") + expected = np.array([True, True, False]) + result = idx == other + + tm.assert_numpy_array_equal(result, expected) From 0437940486b4b71ba27f52efee982d217ae04b8f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 1 Oct 2018 11:23:12 -0500 Subject: [PATCH 023/132] array-setitem --- pandas/core/arrays/period.py | 30 ++++++++++++++++++++++++ pandas/tests/arrays/test_period.py | 37 ++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 pandas/tests/arrays/test_period.py diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 6319ff39e6139..0f87541f26b68 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -267,6 +267,36 @@ def __len__(self): def isna(self): return self._data == iNaT + def __setitem__(self, key, value): + + if isinstance(value, compat.Sequence): + + if len(key) != len(value): + msg = ("shape mismatch: value array of length '{}' does not " + "match indexing result of length '{}'.") + raise ValueError(msg.format(len(key), len(value))) + + value = type(self)._complex_new(value) + if self.freqstr != value.freqstr: + msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, value.freqstr) + raise IncompatibleFrequency(msg) + + value = value.asi8 + elif isinstance(value, Period): + + if self.freqstr != value.freqstr: + msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, value.freqstr) + raise IncompatibleFrequency(msg) + + value = value.ordinal + elif isinstance(value, (type(None), type(NaT))): + value = iNaT + else: + msg = ("'value' should be a 'Period', 'NaT', or array of those. " + "Got '{}' instead.".format(type(value).__name__)) + raise TypeError(msg) + self._data[key] = value + def take(self, indices, allow_fill=False, fill_value=None): from pandas.core.algorithms import take from pandas import isna diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py new file mode 100644 index 0000000000000..b8236ff547074 --- /dev/null +++ b/pandas/tests/arrays/test_period.py @@ -0,0 +1,37 @@ +import pytest +import numpy as np + +import pandas as pd +import pandas.util.testing as tm +from pandas._libs.tslibs import iNaT +from pandas._libs.tslibs.period import IncompatibleFrequency +from pandas.core.arrays import PeriodArray + + +@pytest.mark.parametrize('key, value, expected', [ + ([0], pd.Period("2000", "D"), [10957, 1, 2]), + ([0], None, [iNaT, 1, 2]), + ([0, 1, 2], pd.Period("2000", "D"), [10957] * 3), + ([0, 1, 2], [pd.Period("2000", "D"), + pd.Period("2001", "D"), + pd.Period("2002", "D")], + [10957, 11323, 11688]), +]) +def test_setitem(key, value, expected): + arr = PeriodArray(np.arange(3), freq="D") + expected = PeriodArray(expected, freq="D") + arr[key] = value + tm.assert_period_array_equal(arr, expected) + + +def test_setitem_raises(): + arr = PeriodArray(np.arange(3), freq="D") + with tm.assert_raises_regex(IncompatibleFrequency, "freq"): + arr[0] = pd.Period("2000", freq="A") + + with tm.assert_raises_regex(ValueError, "length"): + arr[[0, 1]] = [pd.Period("2000", freq="D")] + + with tm.assert_raises_regex(TypeError, "int"): + arr[0] = 1 + From 42ab1378c751ff2ec7044fb3918e0ba587f35333 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 1 Oct 2018 13:16:05 -0500 Subject: [PATCH 024/132] wip --- pandas/core/accessor.py | 1 - pandas/core/arrays/categorical.py | 2 + pandas/core/arrays/datetimelike.py | 21 +++---- pandas/core/arrays/period.py | 41 +++++++++++-- pandas/core/indexes/base.py | 4 +- pandas/core/indexes/period.py | 71 +++++++++++----------- pandas/tests/indexes/period/test_astype.py | 4 +- 7 files changed, 86 insertions(+), 58 deletions(-) diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 6b6e56cc98254..9722d8340f6d0 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -105,7 +105,6 @@ def f(self, *args, **kwargs): def delegate_names(delegate, accessors, typ, overwrite=False): - # type (type, list, str, bool) -> Callable """ Add delegated names to a class using a class decorator. diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 2413b762cf690..c724f8484b787 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1259,6 +1259,8 @@ def __array__(self, dtype=None): if dtype==None (default), the same dtype as categorical.categories.dtype """ + # Need asarray, in case self.categories.values is an ExtensionArray + # e.g. in a PeriodIndex. More generally, any Index backed by an EA. values = np.asarray(self.categories.values) ret = take_1d(values, self._codes) if dtype and not is_dtype_equal(dtype, self.categories.dtype): diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index b879c96726387..cc7418266b537 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -11,9 +11,7 @@ from pandas._libs.tslibs.period import ( Period, DIFFERENT_FREQ_INDEX, IncompatibleFrequency) -from pandas.errors import ( - NullFrequencyError, PerformanceWarning, AbstractMethodError -) +from pandas.errors import NullFrequencyError, PerformanceWarning from pandas import compat from pandas.tseries import frequencies @@ -63,15 +61,8 @@ def cmp_method(self, other): # comparisons, this will raise in the future with warnings.catch_warnings(record=True): warnings.filterwarnings("ignore", "elementwise", FutureWarning) - - # XXX: temporary hack till I figure out what's going on. - # For PeriodIndex.__eq__, we don't want to convert a scalar - # other to a scalar ndarray. - if getattr(self, '_wrap_cmp_method', True): - other = np.asarray(other) - with np.errstate(all='ignore'): - result = op(self.values, other) + result = op(self.values, np.asarray(other)) return result @@ -85,10 +76,12 @@ class AttributesMixin(object): @property def _attributes(self): # Inheriting subclass should implement _attributes as a list of strings + from pandas.errors import AbstractMethodError raise AbstractMethodError(self) @classmethod def _simple_new(cls, values, **kwargs): + from pandas.errors import AbstractMethodError raise AbstractMethodError(cls) def _get_attributes_dict(self): @@ -125,7 +118,7 @@ def _box_func(self): """ box function to get object from internal representation """ - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _box_values(self, values): """ @@ -358,13 +351,13 @@ def _add_datelike(self, other): typ=type(other).__name__)) def _sub_datelike(self, other): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _sub_period(self, other): return NotImplemented def _add_offset(self, offset): - raise AbstractMethodError(self) + raise com.AbstractMethodError(self) def _add_delta(self, other): return NotImplemented diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 0f87541f26b68..bf8bf7c2274ce 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -22,6 +22,10 @@ is_float, is_integer, pandas_dtype, is_scalar, is_datetime64_dtype, is_categorical_dtype, + is_object_dtype, + is_string_dtype, + is_datetime_or_timedelta_dtype, + is_dtype_equal, ensure_object ) from pandas.core.dtypes.dtypes import PeriodDtype @@ -68,7 +72,7 @@ def wrapper(self, other): raise IncompatibleFrequency(msg) result = op(other.ordinal) - elif isinstance(other, (ABCPeriodIndex, PeriodArray)): + elif isinstance(other, (ABCPeriodIndex, cls)): if other.freq != self.freq: msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) @@ -186,7 +190,7 @@ def _complex_new(cls, data=None, ordinal=None, freq=None, start=None, freq, fields) return cls._from_ordinals(data, freq=freq) - if isinstance(data, (PeriodArray, PeriodIndex)): + if isinstance(data, (cls, PeriodIndex)): if freq is None or freq == data.freq: # no freq change freq = data.freq data = data._ndarray_values @@ -722,11 +726,36 @@ def astype(self, dtype, copy=True): # super(...), which ends up being... DatetimeIndexOpsMixin? # this is complicated. # need a pandas_astype(arr, dtype). - from pandas.core.arrays import Categorical - - if is_categorical_dtype(dtype): + from pandas import Categorical + + dtype = pandas_dtype(dtype) + + if is_object_dtype(dtype): + return np.asarray(self, dtype=object) + elif is_string_dtype(dtype) and not is_categorical_dtype(dtype): + return self._format_native_types() + elif is_integer_dtype(dtype): + return self.values.astype("i8", copy=copy) + elif (is_datetime_or_timedelta_dtype(dtype) and + not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype): + # disallow conversion between datetime/timedelta, + # and conversions for any datetimelike to float + msg = 'Cannot cast {name} to dtype {dtype}' + raise TypeError(msg.format(name=type(self).__name__, dtype=dtype)) + elif is_categorical_dtype(dtype): return Categorical(self, dtype=dtype) - return super(PeriodArray, self).astype(dtype, copy=copy) + elif is_period_dtype(dtype): + return self.asfreq(dtype.freq) + else: + return np.asarray(self, dtype=dtype) + + def _box_values_as_index(self): + """ + return object Index which contains boxed values + """ + # This is implemented just for astype + from pandas.core.index import Index + return Index(self._box_values(self.asi8), dtype=object) PeriodArray._add_comparison_ops() diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 44a945da8aaec..b292ea079a83c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -306,7 +306,9 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, (dtype is not None and is_period_dtype(dtype))): from pandas import PeriodIndex result = PeriodIndex(data, copy=copy, name=name, **kwargs) - if dtype is not None and _o_dtype == dtype: + if (dtype is not None and + not is_period_dtype(dtype) and + _o_dtype == dtype): return Index(result.to_pytimedelta(), dtype=_o_dtype) else: return result diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index c4777f3c2a654..f5cf5fad27674 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -10,7 +10,6 @@ is_float, is_integer_dtype, is_datetime64_any_dtype, - is_period_dtype, is_bool_dtype, pandas_dtype, ) @@ -91,11 +90,13 @@ def _delegate_method(self, name, *args, **kwargs): ) @delegate_names( PeriodArray, - PeriodArray._datetimelike_methods + [ + [x for x in PeriodArray._datetimelike_methods + if x not in {"asfreq", "to_timestamp"}] + [ '_format_native_types', '_maybe_convert_timedelta', ], "method", + # overwrite size, asi8, etc. but not asfreq, to_timestamp overwrite=True, ) class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, @@ -445,6 +446,14 @@ def asof_locs(self, where, mask): return result + def _box_values_as_index(self): + """ + return object Index which contains boxed values + """ + # TODO(DatetimeArray): remove + # Have to add our name. + return Index(self._data._box_values_as_index(), name=self.name) + @Appender(_index_shared_docs['astype']) def astype(self, dtype, copy=True, how='start'): dtype = pandas_dtype(dtype) @@ -457,16 +466,8 @@ def astype(self, dtype, copy=True, how='start'): tz = getattr(dtype, 'tz', None) return self.to_timestamp(how=how).tz_localize(tz) - elif is_integer_dtype(dtype): - # astype(int) -> Index, so don't dispatch - return self._int64index.copy() if copy else self._int64index - - elif is_period_dtype(dtype): - return self.asfreq(freq=dtype.freq) - - return Index(self._data.astype(dtype, copy=copy), name=self.name, - dtype=dtype, # disable Index inference - copy=False) + result = self._data.astype(dtype, copy=copy) + return Index(result, name=self.name, dtype=dtype, copy=False) @Substitution(klass='PeriodIndex') @Appender(_shared_docs['searchsorted']) @@ -499,29 +500,29 @@ def is_full(self): values = self.asi8 return ((values[1:] - values[:-1]) < 2).all() - # year = _wrap_field_accessor('year') - month = _wrap_field_accessor('month') - day = _wrap_field_accessor('day') - hour = _wrap_field_accessor('hour') - minute = _wrap_field_accessor('minute') - second = _wrap_field_accessor('second') - weekofyear = _wrap_field_accessor('week') - week = weekofyear - dayofweek = _wrap_field_accessor('dayofweek') - weekday = dayofweek - dayofyear = day_of_year = _wrap_field_accessor('dayofyear') - quarter = _wrap_field_accessor('quarter') - qyear = _wrap_field_accessor('qyear') - days_in_month = _wrap_field_accessor('days_in_month') - daysinmonth = days_in_month - - @property - def start_time(self): - return self.to_timestamp(how='start') - - @property - def end_time(self): - return self.to_timestamp(how='end') + # # year = _wrap_field_accessor('year') + # month = _wrap_field_accessor('month') + # day = _wrap_field_accessor('day') + # hour = _wrap_field_accessor('hour') + # minute = _wrap_field_accessor('minute') + # second = _wrap_field_accessor('second') + # weekofyear = _wrap_field_accessor('week') + # week = weekofyear + # dayofweek = _wrap_field_accessor('dayofweek') + # weekday = dayofweek + # dayofyear = day_of_year = _wrap_field_accessor('dayofyear') + # quarter = _wrap_field_accessor('quarter') + # qyear = _wrap_field_accessor('qyear') + # days_in_month = _wrap_field_accessor('days_in_month') + # daysinmonth = days_in_month + # + # @property + # def start_time(self): + # return self.to_timestamp(how='start') + # + # @property + # def end_time(self): + # return self.to_timestamp(how='end') def _mpl_repr(self): # how to represent ourselves to matplotlib diff --git a/pandas/tests/indexes/period/test_astype.py b/pandas/tests/indexes/period/test_astype.py index f2126487496c4..c51d728a5ba9f 100644 --- a/pandas/tests/indexes/period/test_astype.py +++ b/pandas/tests/indexes/period/test_astype.py @@ -14,7 +14,9 @@ class TestPeriodIndexAsType(object): def test_astype_raises(self, dtype): # GH#13149, GH#13209 idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') - msg = 'Cannot cast PeriodIndex to dtype' + # XXX: do we care about the name PeriodArray vs. PeriodIndex in the + # exception message? + msg = 'Cannot cast PeriodArray to dtype' with tm.assert_raises_regex(TypeError, msg): idx.astype(dtype) From 298390f7bde1ea2bc7ca3e6fa7c8b5b59228d055 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 1 Oct 2018 14:37:12 -0500 Subject: [PATCH 025/132] wip --- pandas/core/arrays/datetimelike.py | 9 ++--- pandas/core/arrays/period.py | 51 +++++++++++++++++++------- pandas/core/indexes/period.py | 12 +++--- pandas/core/ops.py | 10 ++++- pandas/tests/arithmetic/test_period.py | 18 +++++---- pandas/tests/test_base.py | 13 +++---- pandas/tests/test_multilevel.py | 8 ++-- 7 files changed, 78 insertions(+), 43 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index cc7418266b537..3d28956904749 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -518,10 +518,9 @@ def _addsub_offset_array(self, other, op): left = lib.values_from_object(self.astype('O')) res_values = op(left, np.array(other)) - kwargs = {} if not is_period_dtype(self): - kwargs['freq'] = 'infer' - return type(self)(res_values, **kwargs) + return type(self)(res_values, freq='infer') + return self._from_sequence(res_values) @deprecate_kwarg(old_arg_name='n', new_arg_name='periods') def shift(self, periods, freq=None): @@ -603,8 +602,6 @@ def __add__(self, other): elif lib.is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these - # TODO: make a _shift method that's consistent between - # Index and EA result = self._tshift(other) # array-like others @@ -657,7 +654,7 @@ def __sub__(self, other): elif lib.is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these - result = self.shift(-other) + result = self._tshift(-other) elif isinstance(other, Period): result = self._sub_period(other) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index bf8bf7c2274ce..76fdfa2911559 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -30,7 +30,7 @@ ) from pandas.core.dtypes.dtypes import PeriodDtype from pandas.core.dtypes.generic import ( - ABCSeries, ABCIndex, ABCPeriodIndex + ABCSeries, ABCPeriodIndex, ABCIndexClass, ) import pandas.core.common as com @@ -63,7 +63,7 @@ def _period_array_cmp(cls, op): def wrapper(self, other): op = getattr(self._ndarray_values, opname) - if isinstance(other, (ABCSeries, ABCIndex)): + if isinstance(other, (ABCSeries, ABCIndexClass)): other = other.values if isinstance(other, Period): @@ -72,7 +72,7 @@ def wrapper(self, other): raise IncompatibleFrequency(msg) result = op(other.ordinal) - elif isinstance(other, (ABCPeriodIndex, cls)): + elif isinstance(other, cls): if other.freq != self.freq: msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) @@ -88,7 +88,8 @@ def wrapper(self, other): result = np.empty(len(self._ndarray_values), dtype=bool) result.fill(nat_result) elif isinstance(other, (list, np.ndarray)): - # XXX: is this correct? + # XXX: is this correct? Why not convert the + # sequence to a PeriodArray? return NotImplemented else: other = Period(other, freq=self.freq) @@ -111,16 +112,16 @@ class PeriodArray(DatetimeLikeArrayMixin, ExtensionArray): - ordinals : integer ndarray - freq : pd.tseries.offsets.Tick - The values are physically stored as an ndarray of integers. These are + The values are physically stored as a 1-D ndarray of integers. These are called "ordinals" and represent some kind of offset from a base. The `freq` indicates the span covered by each element of the array. All elements in the PeriodArray have the same `freq`. """ _attributes = ["freq"] - _typ = "periodarray" # ABCPeriodAray + _typ = "periodarray" # ABCPeriodArray - # Names others delegate to us on + # Names others delegate to us _other_ops = [] _bool_ops = ['is_leap_year'] _object_ops = ['start_time', 'end_time', 'freq'] @@ -134,7 +135,7 @@ class PeriodArray(DatetimeLikeArrayMixin, ExtensionArray): # -------------------------------------------------------------------- # Constructors def __init__(self, values, freq=None): - # type: (np.ndarray[int64], Union[str, Tick]) -> None + # type: (np.ndarray[np.int64], Union[str, Tick]) -> None values = np.array(values, dtype='int64', copy=False) self._data = values if freq is None: @@ -237,7 +238,7 @@ def _from_ordinals(cls, values, freq=None): # type: (ndarray[int], Optional[Tick]) -> PeriodArray """ Values should be int ordinals - `__new__` & `_simple_new` cooerce to ordinals and call this method + `__new__` & `_simple_new` coerce to ordinals and call this method """ return cls(values, freq=freq) @@ -536,7 +537,7 @@ def _add_offset(self, other): if base != self.freq.rule_code: msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) - return self.shift(other.n) + return self._tshift(other.n) def _add_delta_td(self, other): assert isinstance(other, (timedelta, np.timedelta64, Tick)) @@ -546,7 +547,7 @@ def _add_delta_td(self, other): if isinstance(own_offset, Tick): offset_nanos = delta_to_nanoseconds(own_offset) if np.all(nanos % offset_nanos == 0): - return self.shift(nanos // offset_nanos) + return self._tshift(nanos // offset_nanos) # raise when input doesn't have freq raise IncompatibleFrequency("Input has different freq from " @@ -556,7 +557,7 @@ def _add_delta_td(self, other): def _add_delta(self, other): ordinal_delta = self._maybe_convert_timedelta(other) - return self.shift(ordinal_delta) + return self._tshift(ordinal_delta) def shift(self, periods=1): """ @@ -640,6 +641,7 @@ def _maybe_convert_timedelta(self, other): freqstr=self.freqstr)) def _format_native_types(self, na_rep=u'NaT', date_format=None): + # TODO(DatetimeArray): remove values = self.astype(object) if date_format: @@ -658,7 +660,8 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None): return values def view(self, dtype=None, type=None): - # This is to support PeriodIndex.view('i8') + # This is to support things like `.asi8` + # PeriodIndex's parent does .values.view('i8'). # I don't like adding this, return self._data.view(dtype=dtype) @@ -757,6 +760,28 @@ def _box_values_as_index(self): from pandas.core.index import Index return Index(self._box_values(self.asi8), dtype=object) + @property + def flags(self): + """Deprecated""" + # Just here to support Index.flags deprecation. + # could also override PeriodIndex.flags if we don't want a + # version with PeriodArray.flags + return self.values.flags + + @property + def base(self): + return self.values.base + + @property + def data(self): + return self.astype(object).data + + def item(self): + if len(self) == 1: + return Period._from_ordinal(self.values[0], self.freq) + else: + raise ValueError('can only convert an array of size 1 to a ' + 'Python scalar') PeriodArray._add_comparison_ops() PeriodArray._add_datetimelike_methods() diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index f5cf5fad27674..3dfe33255930c 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -262,11 +262,13 @@ def _shallow_copy(self, values=None, **kwargs): else: # this differs too if not isinstance(values, PeriodArray): - try: - values = PeriodArray._from_ordinals(values, freq=self.freq) - except TypeError: - # TODO: this is probably ambiguous for some oridinals. - values = PeriodArray._complex_new(values, freq=self.freq) + values = PeriodArray._complex_new(values, freq=self.freq) + + # I don't like overloading shallow_copy with freq changes. + # See if it's used anywhere outside of test_resample_empty_dataframe + freq = kwargs.pop("freq", None) + if freq: + values = values.asfreq(freq) attributes = self._get_attributes_dict() attributes.update(kwargs) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 70fe7de0a973e..b4848a0abeeb5 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1173,6 +1173,11 @@ def dispatch_to_extension_op(op, left, right): if is_extension_array_dtype(left): new_left = left.values + if (is_extension_array_dtype(right) + and isinstance(right, (ABCIndexClass, ABCSeries))): + # unbox + right = right._values + if isinstance(right, np.ndarray): # handle numpy scalars, this is a PITA @@ -1181,8 +1186,11 @@ def dispatch_to_extension_op(op, left, right): if is_scalar(new_right): new_right = [new_right] new_right = list(new_right) + elif (is_extension_array_dtype(right) and + type(new_left) == type(right)): + new_right = right elif is_extension_array_dtype(right) and type(left) != type(right): - new_right = list(new_right) + new_right = list(right) else: new_right = right diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 3210290b9c5c8..3ab78e194ad1d 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -623,7 +623,7 @@ def test_pi_sub_isub_timedeltalike_daily(self, three_days): def test_pi_add_iadd_timedeltalike_freq_mismatch_daily(self, not_daily): other = not_daily rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') - msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=D\\)' + msg = 'Input has different freq(=.+)? from Period.*?\\(freq=D\\)' with tm.assert_raises_regex(period.IncompatibleFrequency, msg): rng + other with tm.assert_raises_regex(period.IncompatibleFrequency, msg): @@ -632,7 +632,7 @@ def test_pi_add_iadd_timedeltalike_freq_mismatch_daily(self, not_daily): def test_pi_sub_timedeltalike_freq_mismatch_daily(self, not_daily): other = not_daily rng = pd.period_range('2014-05-01', '2014-05-15', freq='D') - msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=D\\)' + msg = 'Input has different freq(=.+)? from Period.*?\\(freq=D\\)' with tm.assert_raises_regex(period.IncompatibleFrequency, msg): rng - other @@ -651,7 +651,7 @@ def test_pi_add_iadd_timedeltalike_hourly(self, two_hours): def test_pi_add_timedeltalike_mismatched_freq_hourly(self, not_hourly): other = not_hourly rng = pd.period_range('2014-01-01 10:00', '2014-01-05 10:00', freq='H') - msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=H\\)' + msg = 'Input has different freq(=.+)? from Period.*?\\(freq=H\\)' with tm.assert_raises_regex(period.IncompatibleFrequency, msg): rng + other @@ -686,7 +686,7 @@ def test_pi_add_iadd_timedeltalike_freq_mismatch_annual(self, other = mismatched_freq rng = pd.period_range('2014', '2024', freq='A') msg = ('Input has different freq(=.+)? ' - 'from PeriodIndex\\(freq=A-DEC\\)') + 'from Period.*?\\(freq=A-DEC\\)') with tm.assert_raises_regex(period.IncompatibleFrequency, msg): rng + other with tm.assert_raises_regex(period.IncompatibleFrequency, msg): @@ -697,7 +697,7 @@ def test_pi_sub_isub_timedeltalike_freq_mismatch_annual(self, other = mismatched_freq rng = pd.period_range('2014', '2024', freq='A') msg = ('Input has different freq(=.+)? ' - 'from PeriodIndex\\(freq=A-DEC\\)') + 'from Period.*?\\(freq=A-DEC\\)') with tm.assert_raises_regex(period.IncompatibleFrequency, msg): rng - other with tm.assert_raises_regex(period.IncompatibleFrequency, msg): @@ -717,7 +717,7 @@ def test_pi_add_iadd_timedeltalike_freq_mismatch_monthly(self, mismatched_freq): other = mismatched_freq rng = pd.period_range('2014-01', '2016-12', freq='M') - msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=M\\)' + msg = 'Input has different freq(=.+)? from Period.*?\\(freq=M\\)' with tm.assert_raises_regex(period.IncompatibleFrequency, msg): rng + other with tm.assert_raises_regex(period.IncompatibleFrequency, msg): @@ -727,7 +727,7 @@ def test_pi_sub_isub_timedeltalike_freq_mismatch_monthly(self, mismatched_freq): other = mismatched_freq rng = pd.period_range('2014-01', '2016-12', freq='M') - msg = 'Input has different freq(=.+)? from PeriodIndex\\(freq=M\\)' + msg = 'Input has different freq(=.+)? from Period.*?\\(freq=M\\)' with tm.assert_raises_regex(period.IncompatibleFrequency, msg): rng - other with tm.assert_raises_regex(period.IncompatibleFrequency, msg): @@ -852,6 +852,7 @@ def test_pi_ops_errors(self, ng): with pytest.raises(TypeError): np.subtract(ng, obj) + @pytest.mark.xfail(reason="GH-22798", strict=True) def test_pi_ops_nat(self): idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], freq='M', name='idx') @@ -876,6 +877,7 @@ def test_pi_ops_nat(self): self._check(idx + 3, lambda x: x - 3, idx) self._check(idx + 3, lambda x: np.subtract(x, 3), idx) + @pytest.mark.xfail(reason="TODO", strict=True) def test_pi_ops_array_int(self): idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], freq='M', name='idx') @@ -924,7 +926,7 @@ def test_pi_offset_errors(self): # Series op is applied per Period instance, thus error is raised # from Period - msg_idx = r"Input has different freq from PeriodIndex\(freq=D\)" + msg_idx = r"Input has different freq from Period.*?\(freq=D\)" msg_s = r"Input cannot be converted to Period\(freq=D\)" for obj, msg in [(idx, msg_idx), (ser, msg_s)]: with tm.assert_raises_regex(period.IncompatibleFrequency, msg): diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index bbc5bd96bad55..c9dbdd358ad87 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -1198,7 +1198,8 @@ def test_iter_box(self): (pd.DatetimeIndex(['2017', '2018'], tz="US/Central"), pd.DatetimeIndex, 'datetime64[ns, US/Central]'), (pd.TimedeltaIndex([10**10]), np.ndarray, 'm8[ns]'), - (pd.PeriodIndex([2018, 2019], freq='A'), np.ndarray, 'object'), + (pd.PeriodIndex([2018, 2019], freq='A'), pd.core.arrays.PeriodArray, + pd.core.dtypes.dtypes.PeriodDtype("A-DEC")), (pd.IntervalIndex.from_breaks([0, 1, 2]), pd.core.arrays.IntervalArray, 'interval'), ]) @@ -1214,6 +1215,8 @@ def test_values_consistent(array, expected_type, dtype): tm.assert_index_equal(l_values, r_values) elif pd.api.types.is_categorical(l_values): tm.assert_categorical_equal(l_values, r_values) + elif pd.api.types.is_period_dtype(l_values): + tm.assert_period_array_equal(l_values, r_values) elif pd.api.types.is_interval_dtype(l_values): tm.assert_interval_array_equal(l_values, r_values) else: @@ -1232,12 +1235,8 @@ def test_values_consistent(array, expected_type, dtype): (pd.DatetimeIndex(['2017-01-01T00:00:00'], tz="US/Eastern"), np.array(['2017-01-01T05:00:00'], dtype='M8[ns]')), (pd.TimedeltaIndex([10**10]), np.array([10**10], dtype='m8[ns]')), - pytest.param( - pd.PeriodIndex(['2017', '2018'], freq='D'), - np.array([17167, 17532]), - marks=pytest.mark.xfail(reason="PeriodArray Not implemented", - strict=True) - ), + (pd.PeriodIndex(['2017', '2018'], freq='D'), + np.array([17167, 17532])), ]) def test_ndarray_values(array, expected): l_values = pd.Series(array)._ndarray_values diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 1718c6beaef55..801656a3dee5e 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -16,6 +16,7 @@ from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype import pandas.core.common as com import pandas.util.testing as tm +from pandas.core.arrays import PeriodArray from pandas.compat import (range, lrange, StringIO, lzip, u, product as cart_product, zip) import pandas as pd @@ -2319,9 +2320,10 @@ def test_reset_index_period(self): df = DataFrame(np.arange(9, dtype='int64').reshape(-1, 1), index=idx, columns=['a']) expected = DataFrame({ - 'month': ([pd.Period('2013-01', freq='M')] * 3 + - [pd.Period('2013-02', freq='M')] * 3 + - [pd.Period('2013-03', freq='M')] * 3), + 'month': PeriodArray._from_periods(np.array( + [pd.Period('2013-01', freq='M')] * 3 + + [pd.Period('2013-02', freq='M')] * 3 + + [pd.Period('2013-03', freq='M')] * 3, dtype=object)), 'feature': ['a', 'b', 'c'] * 3, 'a': np.arange(9, dtype='int64') }, columns=['month', 'feature', 'a']) From 23e5cfca21fae0a74d867073096b7fb76ab332fe Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 2 Oct 2018 13:10:41 -0500 Subject: [PATCH 026/132] Use ._tshift internally for datetimelike ops In preperation for PeriodArray / DatetimeArray / TimedeltaArray. Index.shift has a different meaning from ExtensionArray.shift. - Index.shift pointwise shifts each element by some amount - ExtensionArray.shift shits the *position* of each value in the array padding the end with NA This is going to get confusing. This PR tries to avoid some of that by internally using a new `_tshift` method (time-shift) when we want to do pointwise shifting of each value. Places that know they want that behavior (like in the datetimelike ops) should use that. --- pandas/core/arrays/datetimelike.py | 23 ++++++++++++++++++++--- pandas/core/arrays/period.py | 9 ++++++--- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 91c119808db52..1a8cffaca0be1 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -455,7 +455,7 @@ def _sub_period_array(self, other): def _addsub_int_array(self, other, op): """ Add or subtract array-like of integers equivalent to applying - `shift` pointwise. + `_tshift` pointwise. Parameters ---------- @@ -553,6 +553,23 @@ def shift(self, periods, freq=None): -------- Index.shift : Shift values of Index. """ + return self._tshift(periods=periods, freq=freq) + + def _tshift(self, periods, freq=None): + """ + Shift each value by `periods`. + + Note this is different from ExtensionArray.shift, which + shifts the *position* of each element, padding the end with + missing values. + + Parameters + ---------- + periods : int + Number of periods to shift by. + freq : pandas.DateOffset, pandas.Timedelta, or string + Frequency increment to shift by. + """ if freq is not None and freq != self.freq: if isinstance(freq, compat.string_types): freq = frequencies.to_offset(freq) @@ -600,7 +617,7 @@ def __add__(self, other): elif lib.is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these - result = self.shift(other) + result = self._tshift(other) # array-like others elif is_timedelta64_dtype(other): @@ -652,7 +669,7 @@ def __sub__(self, other): elif lib.is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these - result = self.shift(-other) + result = self._tshift(-other) elif isinstance(other, Period): result = self._sub_period(other) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 481d5313f0e25..91806a805398a 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -297,7 +297,7 @@ def _add_offset(self, other): if base != self.freq.rule_code: msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) - return self.shift(other.n) + return self._tshift(other.n) def _add_delta_td(self, other): assert isinstance(other, (timedelta, np.timedelta64, Tick)) @@ -307,7 +307,7 @@ def _add_delta_td(self, other): if isinstance(own_offset, Tick): offset_nanos = delta_to_nanoseconds(own_offset) if np.all(nanos % offset_nanos == 0): - return self.shift(nanos // offset_nanos) + return self._tshift(nanos // offset_nanos) # raise when input doesn't have freq raise IncompatibleFrequency("Input has different freq from " @@ -317,7 +317,7 @@ def _add_delta_td(self, other): def _add_delta(self, other): ordinal_delta = self._maybe_convert_timedelta(other) - return self.shift(ordinal_delta) + return self._tshift(ordinal_delta) def shift(self, n): """ @@ -332,6 +332,9 @@ def shift(self, n): ------- shifted : Period Array/Index """ + return self._tshift(n) + + def _tshift(self, n): values = self._ndarray_values + n * self.freq.n if self.hasnans: values[self._isnan] = iNaT From 9d17fd2e58e3855b38890bc07de8f9d1525050f9 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 2 Oct 2018 14:13:46 -0500 Subject: [PATCH 027/132] deep --- pandas/core/arrays/period.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 76fdfa2911559..57e1a651c4ce3 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -245,7 +245,7 @@ def _from_ordinals(cls, values, freq=None): @classmethod def _from_sequence(cls, scalars, dtype=None, copy=False): # type: (Sequence[Optional[Period]], Dtype, bool) -> PeriodArray - return cls._complex_new(scalars, dtype=dtype, copy=copy) + return cls._from_periods(scalars, dtype=dtype, copy=copy) @classmethod def _from_factorized(cls, values, original): @@ -328,7 +328,7 @@ def nbytes(self): return self._data.nbytes def copy(self, deep=False): - return self._from_ordinals(self._data.copy(), freq=self.freq) + return self._from_ordinals(self._data.copy(deep=deep), freq=self.freq) @classmethod def _concat_same_type(cls, to_concat): From 959cd721b1006ed6af720d5701ce75a943f98396 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 2 Oct 2018 14:17:04 -0500 Subject: [PATCH 028/132] Squashed commit of the following: commit 23e5cfca21fae0a74d867073096b7fb76ab332fe Author: Tom Augspurger Date: Tue Oct 2 13:10:41 2018 -0500 Use ._tshift internally for datetimelike ops In preperation for PeriodArray / DatetimeArray / TimedeltaArray. Index.shift has a different meaning from ExtensionArray.shift. - Index.shift pointwise shifts each element by some amount - ExtensionArray.shift shits the *position* of each value in the array padding the end with NA This is going to get confusing. This PR tries to avoid some of that by internally using a new `_tshift` method (time-shift) when we want to do pointwise shifting of each value. Places that know they want that behavior (like in the datetimelike ops) should use that. commit 1d9f76c5055d1ef31ce76134e88b5568a119f498 Author: Joris Van den Bossche Date: Tue Oct 2 17:11:11 2018 +0200 CLN: remove Index._to_embed (#22879) * CLN: remove Index._to_embed * pep8 commit 6247da0db4835ff723126640145b4fad3ce17343 Author: Tom Augspurger Date: Tue Oct 2 08:50:41 2018 -0500 Provide default implementation for `data_repated` (#22935) commit 5ce06b5bdb8c44043c6463bf8ce3da758800a189 Author: Matthew Roeschke Date: Mon Oct 1 14:22:20 2018 -0700 BUG: to_datetime preserves name of Index argument in the result (#22918) * BUG: to_datetime preserves name of Index argument in the result * correct test --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/arrays/datetimelike.py | 16 +++++++++++- pandas/core/arrays/period.py | 26 +++++++++++-------- pandas/core/indexes/base.py | 14 +--------- pandas/core/indexes/datetimes.py | 18 +++---------- pandas/core/indexes/period.py | 10 ------- pandas/core/tools/datetimes.py | 13 ++++++---- pandas/tests/extension/conftest.py | 20 ++++++++++++++ .../tests/extension/decimal/test_decimal.py | 8 ------ pandas/tests/extension/test_categorical.py | 9 ------- pandas/tests/extension/test_integer.py | 8 ------ pandas/tests/extension/test_interval.py | 9 ------- pandas/tests/indexes/datetimes/test_tools.py | 17 ++++++++++++ 13 files changed, 81 insertions(+), 88 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index b71edcf1f6f51..851c1a3fbd6e9 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -655,6 +655,7 @@ Datetimelike - Bug in :class:`DatetimeIndex` incorrectly allowing indexing with ``Timedelta`` object (:issue:`20464`) - Bug in :class:`DatetimeIndex` where frequency was being set if original frequency was ``None`` (:issue:`22150`) - Bug in rounding methods of :class:`DatetimeIndex` (:meth:`~DatetimeIndex.round`, :meth:`~DatetimeIndex.ceil`, :meth:`~DatetimeIndex.floor`) and :class:`Timestamp` (:meth:`~Timestamp.round`, :meth:`~Timestamp.ceil`, :meth:`~Timestamp.floor`) could give rise to loss of precision (:issue:`22591`) +- Bug in :func:`to_datetime` with an :class:`Index` argument that would drop the ``name`` from the result (:issue:`21697`) Timedelta ^^^^^^^^^ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 3d28956904749..2385d50f58341 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -455,7 +455,7 @@ def _sub_period_array(self, other): def _addsub_int_array(self, other, op): """ Add or subtract array-like of integers equivalent to applying - `shift` pointwise. + `_tshift` pointwise. Parameters ---------- @@ -555,6 +555,20 @@ def shift(self, periods, freq=None): return self._tshift(periods, freq=freq) def _tshift(self, periods, freq=None): + """ + Shift each value by `periods`. + + Note this is different from ExtensionArray.shift, which + shifts the *position* of each element, padding the end with + missing values. + + Parameters + ---------- + periods : int + Number of periods to shift by. + freq : pandas.DateOffset, pandas.Timedelta, or string + Frequency increment to shift by. + """ if freq is not None and freq != self.freq: if isinstance(freq, compat.string_types): freq = frequencies.to_offset(freq) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 57e1a651c4ce3..4d346b7be1e75 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -572,22 +572,26 @@ def shift(self, periods=1): ------- shifted : Period Array/Index """ - # We have two kinds of shift. - # 1. ExtensionArray.shift: move positions of each value, - # fill NA on the end - # 2. Datelike.tshift: move each value through time - # Each Datelike array will implement both. It's up to the - # caller to call the correct one. - return self._ea_shift(periods=periods) - - def _ea_shift(self, periods=1): - # TODO: remove from DatetimeLikeArrayMixin + # TODO(DatetimeArray): remove from DatetimeLikeArrayMixin # The semantics for Index.shift differ from EA.shift # then just call super. return ExtensionArray.shift(self, periods) def _tshift(self, n, freq=None): - # TODO: docs + """ + Shift each value by `periods`. + + Note this is different from ExtensionArray.shift, which + shifts the *position* of each element, padding the end with + missing values. + + Parameters + ---------- + periods : int + Number of periods to shift by. + freq : pandas.DateOffset, pandas.Timedelta, or string + Frequency increment to shift by. + """ values = self.values + n * self.freq.n if self.hasnans: values[self._isnan] = iNaT diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b292ea079a83c..19f9209ef39ca 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1124,7 +1124,7 @@ def to_series(self, index=None, name=None): if name is None: name = self.name - return Series(self._to_embed(), index=index, name=name) + return Series(self.values.copy(), index=index, name=name) def to_frame(self, index=True, name=None): """ @@ -1187,18 +1187,6 @@ def to_frame(self, index=True, name=None): result.index = self return result - def _to_embed(self, keep_tz=False, dtype=None): - """ - *this is an internal non-public method* - - return an array repr of this object, potentially casting to object - - """ - if dtype is not None: - return self.astype(dtype)._to_embed(keep_tz=keep_tz) - - return self.values.copy() - _index_shared_docs['astype'] = """ Create an Index with values cast to dtypes. The class of a new Index is determined by dtype. When conversion is impossible, a ValueError diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 9b00f21668bf5..a6cdaa0c2163a 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -665,23 +665,13 @@ def to_series(self, keep_tz=False, index=None, name=None): if name is None: name = self.name - return Series(self._to_embed(keep_tz), index=index, name=name) - - def _to_embed(self, keep_tz=False, dtype=None): - """ - return an array repr of this object, potentially casting to object - - This is for internal compat - """ - if dtype is not None: - return self.astype(dtype)._to_embed(keep_tz=keep_tz) - if keep_tz and self.tz is not None: - # preserve the tz & copy - return self.copy(deep=True) + values = self.copy(deep=True) + else: + values = self.values.copy() - return self.values.copy() + return Series(values, index=index, name=name) def to_period(self, freq=None): """ diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 3dfe33255930c..a8c3c962077f3 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -412,16 +412,6 @@ def __array_wrap__(self, result, context=None): # cannot pass _simple_new as it is return self._shallow_copy(result, freq=self.freq, name=self.name) - def _to_embed(self, keep_tz=False, dtype=None): - """ - return an array repr of this object, potentially casting to object - """ - - if dtype is not None: - return self.astype(dtype)._to_embed(keep_tz=keep_tz) - - return self.astype(object).values - @property def _formatter_func(self): return lambda x: "'%s'" % x diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 57387b9ea870a..4a5290a90313d 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -99,13 +99,13 @@ def _convert_and_box_cache(arg, cache_array, box, errors, name=None): result = Series(arg).map(cache_array) if box: if errors == 'ignore': - return Index(result) + return Index(result, name=name) else: return DatetimeIndex(result, name=name) return result.values -def _return_parsed_timezone_results(result, timezones, box, tz): +def _return_parsed_timezone_results(result, timezones, box, tz, name): """ Return results from array_strptime if a %z or %Z directive was passed. @@ -119,6 +119,9 @@ def _return_parsed_timezone_results(result, timezones, box, tz): True boxes result as an Index-like, False returns an ndarray tz : object None or pytz timezone object + name : string, default None + Name for a DatetimeIndex + Returns ------- tz_result : ndarray of parsed dates with timezone @@ -136,7 +139,7 @@ def _return_parsed_timezone_results(result, timezones, box, tz): in zip(result, timezones)]) if box: from pandas import Index - return Index(tz_results) + return Index(tz_results, name=name) return tz_results @@ -209,7 +212,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, if box: if errors == 'ignore': from pandas import Index - return Index(result) + return Index(result, name=name) return DatetimeIndex(result, tz=tz, name=name) return result @@ -252,7 +255,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, arg, format, exact=exact, errors=errors) if '%Z' in format or '%z' in format: return _return_parsed_timezone_results( - result, timezones, box, tz) + result, timezones, box, tz, name) except tslibs.OutOfBoundsDatetime: if errors == 'raise': raise diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py index ecd904c170750..3106b0a6f9fe5 100644 --- a/pandas/tests/extension/conftest.py +++ b/pandas/tests/extension/conftest.py @@ -32,11 +32,31 @@ def all_data(request, data, data_missing): @pytest.fixture def data_repeated(data): +<<<<<<< HEAD """Return different versions of data for count times""" def gen(count): for _ in range(count): yield data yield gen +======= + """ + Generate many datasets. + + Parameters + ---------- + data : fixture implementing `data` + + Returns + ------- + Callable[[int], Generator]: + A callable that takes a `count` argument and + returns a generator yielding `count` datasets. + """ + def gen(count): + for _ in range(count): + yield data + return gen +>>>>>>> datetimelike-tshift @pytest.fixture diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 03fdd25826b79..93b8ea786ef5b 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -30,14 +30,6 @@ def data_missing(): return DecimalArray([decimal.Decimal('NaN'), decimal.Decimal(1)]) -@pytest.fixture -def data_repeated(): - def gen(count): - for _ in range(count): - yield DecimalArray(make_data()) - yield gen - - @pytest.fixture def data_for_sorting(): return DecimalArray([decimal.Decimal('1'), diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 6c6cf80c16da6..ff66f53eab6f6 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -45,15 +45,6 @@ def data_missing(): return Categorical([np.nan, 'A']) -@pytest.fixture -def data_repeated(): - """Return different versions of data for count times""" - def gen(count): - for _ in range(count): - yield Categorical(make_data()) - yield gen - - @pytest.fixture def data_for_sorting(): return Categorical(['A', 'B', 'C'], categories=['C', 'A', 'B'], diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index 57e0922a0b7d9..7aa33006dadda 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -47,14 +47,6 @@ def data_missing(dtype): return integer_array([np.nan, 1], dtype=dtype) -@pytest.fixture -def data_repeated(data): - def gen(count): - for _ in range(count): - yield data - yield gen - - @pytest.fixture def data_for_sorting(dtype): return integer_array([1, 2, 0], dtype=dtype) diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index 34b98f590df0d..7302c5757d144 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -47,15 +47,6 @@ def data_missing(): return IntervalArray.from_tuples([None, (0, 1)]) -@pytest.fixture -def data_repeated(): - """Return different versions of data for count times""" - def gen(count): - for _ in range(count): - yield IntervalArray(make_data()) - yield gen - - @pytest.fixture def data_for_sorting(): return IntervalArray.from_tuples([(1, 2), (2, 3), (0, 1)]) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index cc6db8f5854c8..3b7d6a709230b 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -233,6 +233,15 @@ def test_to_datetime_parse_timezone_malformed(self, offset): with pytest.raises(ValueError): pd.to_datetime([date], format=fmt) + def test_to_datetime_parse_timezone_keeps_name(self): + # GH 21697 + fmt = '%Y-%m-%d %H:%M:%S %z' + arg = pd.Index(['2010-01-01 12:00:00 Z'], name='foo') + result = pd.to_datetime(arg, format=fmt) + expected = pd.DatetimeIndex(['2010-01-01 12:00:00'], tz='UTC', + name='foo') + tm.assert_index_equal(result, expected) + class TestToDatetime(object): def test_to_datetime_pydatetime(self): @@ -765,6 +774,14 @@ def test_unit_rounding(self, cache): expected = pd.Timestamp('2015-06-19 19:55:31.877000093') assert result == expected + @pytest.mark.parametrize('cache', [True, False]) + def test_unit_ignore_keeps_name(self, cache): + # GH 21697 + expected = pd.Index([15e9] * 2, name='name') + result = pd.to_datetime(expected, errors='ignore', box=True, unit='s', + cache=cache) + tm.assert_index_equal(result, expected) + @pytest.mark.parametrize('cache', [True, False]) def test_dataframe(self, cache): From b66f617dd1816d08de28ab7d9407d95d93f2c7b6 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 2 Oct 2018 14:19:49 -0500 Subject: [PATCH 029/132] Squashed commit of the following: commit bccfc3f8d821d739892ea0bc6eca88e9ffb04434 Merge: d65980ec6 9caf04836 Author: Tom Augspurger Date: Tue Oct 2 13:47:48 2018 -0500 Merge remote-tracking branch 'upstream/master' into period-dtype-type commit 9caf04836ad34ca17da7b86ba7120cca58ce142a Author: Tom Augspurger Date: Tue Oct 2 13:25:22 2018 -0500 CI: change windows vm image (#22948) commit d65980ec629558bfc205316ce6be01daecb54a9d Author: Tom Augspurger Date: Tue Oct 2 11:46:38 2018 -0500 typo commit e5c61fcab619dde608add7a2b33bb51d100ebfaf Merge: d7a8e1b86 1d9f76c50 Author: Tom Augspurger Date: Tue Oct 2 10:57:59 2018 -0500 Merge remote-tracking branch 'upstream/master' into period-dtype-type commit d7a8e1b8686024b3c891d01ce267ccbfd1beabb9 Author: Tom Augspurger Date: Tue Oct 2 10:57:56 2018 -0500 Fixed commit 598cc622d957ecba6cbb76c72ef3aeea98f55526 Author: Tom Augspurger Date: Tue Oct 2 10:32:22 2018 -0500 doc note commit 83db05c7ddec7a87ff77430ab686873b8a329a0f Author: Tom Augspurger Date: Tue Oct 2 10:28:52 2018 -0500 updates commit 1d9f76c5055d1ef31ce76134e88b5568a119f498 Author: Joris Van den Bossche Date: Tue Oct 2 17:11:11 2018 +0200 CLN: remove Index._to_embed (#22879) * CLN: remove Index._to_embed * pep8 commit 6247da0db4835ff723126640145b4fad3ce17343 Author: Tom Augspurger Date: Tue Oct 2 08:50:41 2018 -0500 Provide default implementation for `data_repated` (#22935) commit f07ab807e5eb393121197307de5718db429b71a8 Author: Tom Augspurger Date: Tue Oct 2 06:22:27 2018 -0500 str, bytes commit 8a8bdb05008ed52fe0eb8d4168347bc2af116b4a Author: Tom Augspurger Date: Mon Oct 1 21:40:59 2018 -0500 import at top commit 99bafdd5513c7091e88416c725690605d9e808f4 Author: Tom Augspurger Date: Mon Oct 1 21:38:12 2018 -0500 Update type for PeriodDtype Removed unused IntervalDtypeType commit 5ce06b5bdb8c44043c6463bf8ce3da758800a189 Author: Matthew Roeschke Date: Mon Oct 1 14:22:20 2018 -0700 BUG: to_datetime preserves name of Index argument in the result (#22918) * BUG: to_datetime preserves name of Index argument in the result * correct test --- azure-pipelines.yml | 4 ++-- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/dtypes/base.py | 4 +++- pandas/core/dtypes/common.py | 17 +++++++++-------- pandas/core/dtypes/dtypes.py | 24 +++++------------------- pandas/tests/dtypes/test_common.py | 8 ++++---- pandas/tests/extension/conftest.py | 8 -------- 7 files changed, 24 insertions(+), 42 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index c82dafa224961..5d473bfc5a38c 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -18,8 +18,8 @@ jobs: - template: ci/azure/windows.yml parameters: name: Windows - vmImage: vs2017-win2017 + vmImage: vs2017-win2016 - template: ci/azure/windows-py27.yml parameters: name: WindowsPy27 - vmImage: vs2017-win2017 + vmImage: vs2017-win2016 diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 851c1a3fbd6e9..9808f5d735535 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -505,6 +505,7 @@ ExtensionType Changes - :meth:`Series.astype` and :meth:`DataFrame.astype` now dispatch to :meth:`ExtensionArray.astype` (:issue:`21185:`). - Slicing a single row of a ``DataFrame`` with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`) - Added :meth:`pandas.api.types.register_extension_dtype` to register an extension type with pandas (:issue:`22664`) +- Updated the ``.type`` attribute for ``PeriodDtype``, ``DatetimeTZDtype``, and ``IntervalDtype`` to be instances of the dtype (``Period``, ``Timestamp``, and ``Interval`` respectively) (:issue:`22938`) .. _whatsnew_0240.api.incompatibilities: diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index a552251ebbafa..5c9ba921226c0 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -175,7 +175,9 @@ def type(self): """The scalar type for the array, e.g. ``int`` It's expected ``ExtensionArray[item]`` returns an instance - of ``ExtensionDtype.type`` for scalar ``item``. + of ``ExtensionDtype.type`` for scalar ``item``, assuming + that value is valid (not NA). NA values do not need to be + instances of `type`. """ raise AbstractMethodError(self) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index fc7bf2613a302..4860c2d766425 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -4,12 +4,13 @@ from pandas.compat import (string_types, text_type, binary_type, PY3, PY36) from pandas._libs import algos, lib -from pandas._libs.tslibs import conversion, Period +from pandas._libs.tslibs import conversion, Period, Timestamp +from pandas._libs.interval import Interval from pandas.core.dtypes.dtypes import ( registry, CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, - DatetimeTZDtypeType, PeriodDtype, IntervalDtype, - IntervalDtypeType, PandasExtensionDtype, ExtensionDtype, + PeriodDtype, IntervalDtype, + PandasExtensionDtype, ExtensionDtype, _pandas_registry) from pandas.core.dtypes.generic import ( ABCCategorical, ABCPeriodIndex, ABCDatetimeIndex, ABCSeries, @@ -1903,20 +1904,20 @@ def _get_dtype_type(arr_or_dtype): elif isinstance(arr_or_dtype, CategoricalDtype): return CategoricalDtypeType elif isinstance(arr_or_dtype, DatetimeTZDtype): - return DatetimeTZDtypeType + return Timestamp elif isinstance(arr_or_dtype, IntervalDtype): - return IntervalDtypeType + return Interval elif isinstance(arr_or_dtype, PeriodDtype): - return arr_or_dtype.type + return Period elif isinstance(arr_or_dtype, string_types): if is_categorical_dtype(arr_or_dtype): return CategoricalDtypeType elif is_datetime64tz_dtype(arr_or_dtype): - return DatetimeTZDtypeType + return Timestamp elif is_period_dtype(arr_or_dtype): return Period elif is_interval_dtype(arr_or_dtype): - return IntervalDtypeType + return Interval return _get_dtype_type(np.dtype(arr_or_dtype)) try: return arr_or_dtype.dtype.type diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 29a3552d2e47a..492f65cff254c 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -4,7 +4,8 @@ import numpy as np from pandas import compat from pandas.core.dtypes.generic import ABCIndexClass, ABCCategoricalIndex -from pandas._libs.tslibs import Period, NaT +from pandas._libs.tslibs import Period, NaT, Timestamp +from pandas._libs.interval import Interval from .base import ExtensionDtype, _DtypeOpsMixin @@ -470,13 +471,6 @@ def _is_boolean(self): return is_bool_dtype(self.categories) -class DatetimeTZDtypeType(type): - """ - the type of DatetimeTZDtype, this metaclass determines subclass ability - """ - pass - - class DatetimeTZDtype(PandasExtensionDtype): """ @@ -486,7 +480,7 @@ class DatetimeTZDtype(PandasExtensionDtype): THIS IS NOT A REAL NUMPY DTYPE, but essentially a sub-class of np.datetime64[ns] """ - type = DatetimeTZDtypeType + type = Timestamp kind = 'M' str = '|M8[ns]' num = 101 @@ -660,11 +654,11 @@ def construct_from_string(cls, string): raise TypeError("could not construct PeriodDtype") def __unicode__(self): - return self.name + return compat.text_type(self.name) @property def name(self): - return u"period[{freq}]".format(freq=self.freq.freqstr) + return str("period[{freq}]".format(freq=self.freq.freqstr)) @property def na_value(self): @@ -709,13 +703,6 @@ def construct_array_type(cls): return PeriodArray -class IntervalDtypeType(type): - """ - the type of IntervalDtype, this metaclass determines subclass ability - """ - pass - - @register_extension_dtype class IntervalDtype(PandasExtensionDtype, ExtensionDtype): """ @@ -804,7 +791,6 @@ def construct_from_string(cls, string): @property def type(self): - from pandas import Interval return Interval def __unicode__(self): diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index b745a1e5d454a..f87c51a4ee16b 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -605,15 +605,15 @@ def test__get_dtype_fails(input_param): (pd.DatetimeIndex([1, 2]), np.datetime64), (pd.DatetimeIndex([1, 2]).dtype, np.datetime64), ('>>>>>> datetimelike-tshift @pytest.fixture From 5669675d6b40c5d8aac4f59a4eb9d89c13cd8304 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 2 Oct 2018 14:47:53 -0500 Subject: [PATCH 030/132] fixup --- pandas/core/arrays/period.py | 7 +++++-- pandas/core/indexes/period.py | 4 ++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 4d346b7be1e75..1e487304888c0 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -245,7 +245,10 @@ def _from_ordinals(cls, values, freq=None): @classmethod def _from_sequence(cls, scalars, dtype=None, copy=False): # type: (Sequence[Optional[Period]], Dtype, bool) -> PeriodArray - return cls._from_periods(scalars, dtype=dtype, copy=copy) + if dtype: + dtype = dtype.freq + scalars = np.asarray(scalars, dtype=object) + return cls._from_periods(scalars, freq=dtype) @classmethod def _from_factorized(cls, values, original): @@ -328,7 +331,7 @@ def nbytes(self): return self._data.nbytes def copy(self, deep=False): - return self._from_ordinals(self._data.copy(deep=deep), freq=self.freq) + return self._from_ordinals(self._data.copy(), freq=self.freq) @classmethod def _concat_same_type(cls, to_concat): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index a8c3c962077f3..cc61c1baa7bf6 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -230,7 +230,7 @@ def freq(self, value): @classmethod def _simple_new(cls, values, name=None, freq=None, **kwargs): - # TODO: clean up signature. + # type: (PeriodArray, Any, Any) -> PeriodIndex """ Values can be any type that can be coerced to Periods. Ordinals in an ndarray are fastpath-ed to `_from_ordinals` @@ -262,7 +262,7 @@ def _shallow_copy(self, values=None, **kwargs): else: # this differs too if not isinstance(values, PeriodArray): - values = PeriodArray._complex_new(values, freq=self.freq) + values = PeriodArray._from_ordinals(values, freq=self.freq) # I don't like overloading shallow_copy with freq changes. # See if it's used anywhere outside of test_resample_empty_dataframe From 2c0311c5ba819181fd1269eb94e8df8bfb8d3eb7 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 2 Oct 2018 15:52:10 -0500 Subject: [PATCH 031/132] The rest of the EA tests --- pandas/core/arrays/period.py | 40 ++++++++++++++++++++++++--- pandas/tests/extension/test_period.py | 36 +++++++++++------------- 2 files changed, 52 insertions(+), 24 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 1e487304888c0..a27627023d67c 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -275,14 +275,46 @@ def __len__(self): def isna(self): return self._data == iNaT - def __setitem__(self, key, value): - - if isinstance(value, compat.Sequence): + def fillna(self, value=None, method=None, limit=None): + from pandas.api.types import is_array_like + from pandas.util._validators import validate_fillna_kwargs + from pandas.core.missing import pad_1d, backfill_1d + + if isinstance(value, ABCSeries): + value = value.values + + value, method = validate_fillna_kwargs(value, method) + + mask = self.isna() + + if is_array_like(value): + if len(value) != len(self): + raise ValueError("Length of 'value' does not match. Got ({}) " + " expected {}".format(len(value), len(self))) + value = value[mask] + + if mask.any(): + if method is not None: + func = pad_1d if method == 'pad' else backfill_1d + new_values = func(self._ndarray_values, limit=limit, + mask=mask) + new_values = self._from_ordinals(new_values, freq=self.freq) + else: + # fill with value + new_values = self.copy() + new_values[mask] = value + else: + new_values = self.copy() + return new_values - if len(key) != len(value): + def __setitem__(self, key, value): + if isinstance(value, (compat.Sequence, type(self))): + if len(key) != len(value) and not com.is_bool_indexer(key): msg = ("shape mismatch: value array of length '{}' does not " "match indexing result of length '{}'.") raise ValueError(msg.format(len(key), len(value))) + if len(key) == 0: + return value = type(self)._complex_new(value) if self.freqstr != value.freqstr: diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index 27cf70b807254..fbca480fd8dc2 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -66,25 +66,27 @@ class TestGetitem(BasePeriodTests, base.BaseGetitemTests): class TestMethods(BasePeriodTests, base.BaseMethodsTests): def test_combine_add(self, data_repeated): + # Period + Period is not defined. pass - def test_container_shift(self): - raise pytest.xfail('todo') - class TestInterface(BasePeriodTests, base.BaseInterfaceTests): + def test_no_values_attribute(self, data): + # We have a values attribute. pass class TestArithmeticOps(BasePeriodTests, base.BaseArithmeticOpsTests): def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + # we implement substitution... op_name = all_arithmetic_operators if op_name in ('__sub__', '__rsub__'): s = pd.Series(data) self.check_opname(s, op_name, s.iloc[0], exc=None) else: + # ... but not the rest. super().test_arith_series_with_scalar(data, all_arithmetic_operators) @@ -97,32 +99,26 @@ class TestCasting(BasePeriodTests, base.BaseCastingTests): class TestComparisonOps(BasePeriodTests, base.BaseComparisonOpsTests): - def test_compare_scalar(self): + + def _compare_other(self): + # the base test is not appropriate for us. We raise on comparison + # with (some) integers, depending on the value. pass class TestMissing(BasePeriodTests, base.BaseMissingTests): + pass - @pytest.mark.xfail(reason="__setitem__") - def test_fillna_scalar(self, data_missing): - super().test_fillna_scalar(data_missing) - @pytest.mark.xfail(reason="__setitem__") - def test_fillna_series(self, data_missing): - super().test_fillna_series(data_missing) - @pytest.mark.xfail(reason="__setitem__") - def test_fillna_frame(self, data_missing): - super().test_fillna_frame(data_missing) +class TestReshaping(BasePeriodTests, base.BaseReshapingTests): + pass -class TestReshaping(BasePeriodTests, base.BaseReshapingTests): +class TestSetitem(BasePeriodTests, base.BaseSetitemTests): pass -# class TestGroupby(BasePeriodTests, base.BaseGroupbyTests): -# pass -# -# -# class TestSetitem(BasePeriodTests, base.BaseSetitemTests): -# pass +class TestGroupby(BasePeriodTests, base.BaseGroupbyTests): + pass + From 012be1c0a62b330cac59a56594d1aefb3e9b2215 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 2 Oct 2018 16:20:25 -0500 Subject: [PATCH 032/132] docs --- doc/source/whatsnew/v0.24.0.txt | 68 +++++++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 9808f5d735535..291f67a4f67ca 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -138,11 +138,11 @@ Current Behavior: .. _whatsnew_0240.enhancements.interval: -Storing Interval Data in Series and DataFrame -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Storing Interval and Period Data in Series and DataFrame +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Interval data may now be stored in a ``Series`` or ``DataFrame``, in addition to an -:class:`IntervalIndex` like previously (:issue:`19453`). +Interval and Period data may now be stored in a ``Series`` or ``DataFrame``, in addition to an +:class:`IntervalIndex` and :class:`PeriodIndex` like previously (:issue:`19453`, :issue:`22862`). .. ipython:: python @@ -150,20 +150,28 @@ Interval data may now be stored in a ``Series`` or ``DataFrame``, in addition to ser ser.dtype -Previously, these would be cast to a NumPy array of ``Interval`` objects. In general, -this should result in better performance when storing an array of intervals in -a :class:`Series`. +And for periods: + +.. ipython:: python + + pser = pd.Series(pd.date_range("2000", freq="D", periods=5)) + pser + pser.dtype -Note that the ``.values`` of a ``Series`` containing intervals is no longer a NumPy +Previously, these would be cast to a NumPy array with object dtype. In general, +this should result in better performance when storing an array of intervals or periods +in a :class:`Series`. + +Note that the ``.values`` of a ``Series`` containing one of these types is no longer a NumPy array, but rather an ``ExtensionArray``: .. ipython:: python ser.values + pser.values This is the same behavior as ``Series.values`` for categorical data. See -:ref:`whatsnew_0240.api_breaking.interval_values` for more. - +:ref:`whatsnew_0240.api_breaking.interval_values` and :ref:`whatsnew_0240.api_breaking.period_values` for more. .. _whatsnew_0240.enhancements.other: @@ -231,13 +239,51 @@ New Behavior: This mirrors ``CategoricalIndex.values``, which returns a ``Categorical``. For situations where you need an ``ndarray`` of ``Interval`` objects, use -:meth:`numpy.asarray` or ``idx.astype(object)``. +:meth:`numpy.asarray` or ``idx.values.astype(object)``. .. ipython:: python np.asarray(idx) idx.values.astype(object) + +.. _whatsnew_0240.api_breaking.period_values: + +``PeriodIndex.values`` is now a ``PeriodArray`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``values`` attribute of a :class:`PeriodIndex` now returns a ``PeriodArray`` +rather than a NumPy array of :class:`Period` objects (:issue:`22862`). + +Previous Behavior: + +.. code-block:: ipython + + In [1]: idx = pd.period_range("2000", freq="D", periods=4) + + In [2]: idx.values + Out [2]: + array([Period('2000-01-01', 'D'), Period('2000-01-02', 'D'), + Period('2000-01-03', 'D'), Period('2000-01-04', 'D')], dtype=object) + +New Behavior: + +.. ipython:: python + + idx = pd.period_range("2000", freq="D", periods=4) + idx.values + +This mirrors ``CategoricalIndex.values``, which returns a ``Categorical``. + +For situations where you need an ``ndarray`` of ``Period`` objects, use +:meth:`numpy.asarray` or ``idx.values.astype(object)``. + +.. ipython:: python + + np.asarray(idx) + idx.values.astype(object) + + .. _whatsnew_0240.api.timezone_offset_parsing: Parsing Datetime Strings with Timezone Offsets From 67faabcd95ed8bf3a43a02623f18b4ce9d8e4a3f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 3 Oct 2018 14:20:15 -0500 Subject: [PATCH 033/132] rename to time_shift --- pandas/core/arrays/datetimelike.py | 10 +++++----- pandas/core/arrays/period.py | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 1a8cffaca0be1..1ce60510c6a69 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -455,7 +455,7 @@ def _sub_period_array(self, other): def _addsub_int_array(self, other, op): """ Add or subtract array-like of integers equivalent to applying - `_tshift` pointwise. + `_time_shift` pointwise. Parameters ---------- @@ -553,9 +553,9 @@ def shift(self, periods, freq=None): -------- Index.shift : Shift values of Index. """ - return self._tshift(periods=periods, freq=freq) + return self._time_shift(periods=periods, freq=freq) - def _tshift(self, periods, freq=None): + def _time_shift(self, periods, freq=None): """ Shift each value by `periods`. @@ -617,7 +617,7 @@ def __add__(self, other): elif lib.is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these - result = self._tshift(other) + result = self._time_shift(other) # array-like others elif is_timedelta64_dtype(other): @@ -669,7 +669,7 @@ def __sub__(self, other): elif lib.is_integer(other): # This check must come after the check for np.timedelta64 # as is_integer returns True for these - result = self._tshift(-other) + result = self._time_shift(-other) elif isinstance(other, Period): result = self._sub_period(other) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index f42f87cac0634..92803ab5f52e0 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -297,7 +297,7 @@ def _add_offset(self, other): if base != self.freq.rule_code: msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) - return self._tshift(other.n) + return self._time_shift(other.n) def _add_delta_td(self, other): assert isinstance(other, (timedelta, np.timedelta64, Tick)) @@ -307,7 +307,7 @@ def _add_delta_td(self, other): if isinstance(own_offset, Tick): offset_nanos = delta_to_nanoseconds(own_offset) if np.all(nanos % offset_nanos == 0): - return self._tshift(nanos // offset_nanos) + return self._time_shift(nanos // offset_nanos) # raise when input doesn't have freq raise IncompatibleFrequency("Input has different freq from " @@ -317,7 +317,7 @@ def _add_delta_td(self, other): def _add_delta(self, other): ordinal_delta = self._maybe_convert_timedelta(other) - return self._tshift(ordinal_delta) + return self._time_shift(ordinal_delta) def shift(self, n): """ @@ -332,9 +332,9 @@ def shift(self, n): ------- shifted : Period Array/Index """ - return self._tshift(n) + return self._time_shift(n) - def _tshift(self, n): + def _time_shift(self, n): values = self._ndarray_values + n * self.freq.n if self.hasnans: values[self._isnan] = iNaT From ff7c06c3effaed0a90ac5dd6577d7c302547aa88 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 3 Oct 2018 14:34:42 -0500 Subject: [PATCH 034/132] Squashed commit of the following: commit 11a0d938cdaf7482546691519577b5dd28f69aac Author: Tom Augspurger Date: Wed Oct 3 14:26:34 2018 -0500 typerror commit a0cd5e79eb06ac71cf2f510b1a2122bc2b21fcf0 Author: Tom Augspurger Date: Wed Oct 3 14:25:38 2018 -0500 TypeError for Series commit 2247461ec0b1017db320cb8581337cba0b5c6679 Author: Tom Augspurger Date: Wed Oct 3 13:29:29 2018 -0500 Test op(Series[EA], EA]) commit c9fe5d318d7077f99413532cdaf392ae3ea9cd2c Author: Tom Augspurger Date: Wed Oct 3 13:21:33 2018 -0500 make strict commit 7ef697cffdcd2f8d701de3cdfd2e6897358effbf Author: Tom Augspurger Date: Wed Oct 3 13:14:52 2018 -0500 Use super commit 35d42133acbcb3c25308b1c10e0e2dc3fa1052b6 Merge: 0671e7d67 ee808033b Author: Tom Augspurger Date: Wed Oct 3 13:11:05 2018 -0500 Merge remote-tracking branch 'upstream/master' into ea-divmod commit ee808033bd5c546e7439a06d2ed37b57c9e66844 Author: Matthew Roeschke Date: Wed Oct 3 08:25:44 2018 -0700 BUG: Correctly weekly resample over DST (#22941) * test resample fix * move the localization until needed * BUG: Correctly weekly resample over DST * Move whatsnew to new section commit fea27f0736a4b8f6626da60a6abc2f6e26b8a365 Author: Tom Augspurger Date: Wed Oct 3 08:49:44 2018 -0500 CI: pin moto to 1.3.4 (#22959) commit 15d32bbad832908c9d06a9019e613bb6b35d6878 Author: jbrockmendel Date: Wed Oct 3 04:32:35 2018 -0700 [CLN] Dispatch (some) Frame ops to Series, avoiding _data.eval (#22019) * avoid casting to object dtype in mixed-type frames * Dispatch to Series ops in _combine_match_columns * comment * docstring * flake8 fixup * dont bother with try_cast_result * revert non-central change * simplify * revert try_cast_results * revert non-central changes * Fixup typo syntaxerror * simplify assertion * use dispatch_to_series in combine_match_columns * Pass unwrapped op where appropriate * catch correct error * whatsnew note * comment * whatsnew section * remove unnecessary tester * doc fixup commit 3e3256bb6038111812b4b28f6b3b049214d83d2d Author: alimcmaster1 Date: Wed Oct 3 12:23:22 2018 +0100 Allow passing a mask to NanOps (#22865) commit e756e991d57c2656906d0a3e8fc76950844e3f3e Author: jbrockmendel Date: Wed Oct 3 02:19:27 2018 -0700 CLN: Use is_period_dtype instead of ABCPeriodIndex checks (#22958) commit 03181f0569c8b1f93f620a2986b4f174f9b6179b Author: Wenhuan Date: Wed Oct 3 15:28:07 2018 +0800 BUG: fix Series(extension array) + extension array values addition (#22479) commit 04ea51ddf7623b897aaaf2e504952d3c11e88205 Author: Joris Van den Bossche Date: Wed Oct 3 09:24:36 2018 +0200 CLN: small clean-up of IntervalIndex (#22956) commit b0f9a104f323d687a56ea878ff78ff005f37b42d Author: Tony Tao <34781056+tonytao2012@users.noreply.github.com> Date: Tue Oct 2 19:01:08 2018 -0500 DOC GH22893 Fix docstring of groupby in pandas/core/generic.py (#22920) commit 08ecba8dab4a35ad3cad89fe02c7240674938b97 Author: jbrockmendel Date: Tue Oct 2 14:22:53 2018 -0700 BUG: fix DataFrame+DataFrame op with timedelta64 dtype (#22696) commit c44bad24996f9e747f2119fa0c6a90d893f6e2aa Author: Pamela Wu Date: Tue Oct 2 17:16:25 2018 -0400 CLN GH22873 Replace base excepts in pandas/core (#22901) commit 8e749a33b5f814bded42044a4182449d5d6c8213 Author: Pamela Wu Date: Tue Oct 2 17:14:48 2018 -0400 CLN GH22874 replace bare excepts in pandas/io/pytables.py (#22919) commit 1102a33d9776ed316cade079e22be6daa76c9e42 Author: Joris Van den Bossche Date: Tue Oct 2 22:31:36 2018 +0200 DOC/CLN: clean-up shared_docs in generic.py (#20074) commit 9caf04836ad34ca17da7b86ba7120cca58ce142a Author: Tom Augspurger Date: Tue Oct 2 13:25:22 2018 -0500 CI: change windows vm image (#22948) commit 0671e7d67df8b0aa258fd864ef5f3169fe0ffc55 Author: Tom Augspurger Date: Tue Oct 2 11:10:42 2018 -0500 Fixup commit 1b4261f41c70379fa868866bc77e7a31c43baa5d Merge: c92a4a899 1d9f76c50 Author: Tom Augspurger Date: Tue Oct 2 10:58:43 2018 -0500 Merge remote-tracking branch 'upstream/master' into ea-divmod commit 1d9f76c5055d1ef31ce76134e88b5568a119f498 Author: Joris Van den Bossche Date: Tue Oct 2 17:11:11 2018 +0200 CLN: remove Index._to_embed (#22879) * CLN: remove Index._to_embed * pep8 commit 6247da0db4835ff723126640145b4fad3ce17343 Author: Tom Augspurger Date: Tue Oct 2 08:50:41 2018 -0500 Provide default implementation for `data_repated` (#22935) commit c92a4a899b8d5e5e6a0479f390a604dc9f624f89 Author: Tom Augspurger Date: Mon Oct 1 16:56:15 2018 -0500 Update old test commit 52538fa03a8c9722ab5c86c88419105b6ebfe5a1 Author: Tom Augspurger Date: Mon Oct 1 16:51:48 2018 -0500 BUG: divmod return type commit 5ce06b5bdb8c44043c6463bf8ce3da758800a189 Author: Matthew Roeschke Date: Mon Oct 1 14:22:20 2018 -0700 BUG: to_datetime preserves name of Index argument in the result (#22918) * BUG: to_datetime preserves name of Index argument in the result * correct test --- ci/travis-27.yaml | 2 +- doc/source/extending.rst | 15 +- doc/source/whatsnew/v0.24.0.txt | 33 +- pandas/core/arrays/base.py | 16 +- pandas/core/arrays/interval.py | 7 +- pandas/core/arrays/period.py | 2 +- pandas/core/computation/pytables.py | 2 +- pandas/core/dtypes/common.py | 2 +- pandas/core/dtypes/dtypes.py | 8 +- pandas/core/frame.py | 22 +- pandas/core/generic.py | 166 +++---- pandas/core/indexes/datetimelike.py | 18 +- pandas/core/indexes/frozen.py | 2 +- pandas/core/indexes/interval.py | 49 +-- pandas/core/indexes/multi.py | 14 +- pandas/core/indexes/range.py | 18 +- pandas/core/indexing.py | 6 +- pandas/core/internals/blocks.py | 10 +- pandas/core/nanops.py | 409 ++++++++++++++++-- pandas/core/ops.py | 62 ++- pandas/core/panel.py | 16 +- pandas/core/resample.py | 16 +- pandas/core/series.py | 5 +- pandas/core/sparse/array.py | 2 +- pandas/core/sparse/series.py | 7 +- pandas/core/tools/datetimes.py | 10 +- pandas/core/window.py | 2 +- pandas/io/pytables.py | 49 ++- pandas/tests/arithmetic/test_timedelta64.py | 34 +- pandas/tests/extension/base/ops.py | 13 +- pandas/tests/extension/decimal/array.py | 4 + .../tests/extension/decimal/test_decimal.py | 27 +- pandas/tests/extension/json/test_json.py | 10 + pandas/tests/extension/test_categorical.py | 11 + pandas/tests/extension/test_integer.py | 6 + pandas/tests/frame/test_arithmetic.py | 15 + .../tests/frame/test_axis_select_reindex.py | 2 +- pandas/tests/reshape/test_pivot.py | 8 +- pandas/tests/series/test_operators.py | 10 +- pandas/tests/test_nanops.py | 36 +- pandas/tests/test_resample.py | 22 + 41 files changed, 849 insertions(+), 319 deletions(-) diff --git a/ci/travis-27.yaml b/ci/travis-27.yaml index a921bcb46dba4..6955db363ca1f 100644 --- a/ci/travis-27.yaml +++ b/ci/travis-27.yaml @@ -44,7 +44,7 @@ dependencies: # universal - pytest - pytest-xdist - - moto + - moto==1.3.4 - hypothesis>=3.58.0 - pip: - backports.lzma diff --git a/doc/source/extending.rst b/doc/source/extending.rst index 9422434a1d998..da249cb3592f4 100644 --- a/doc/source/extending.rst +++ b/doc/source/extending.rst @@ -160,9 +160,18 @@ your ``MyExtensionArray`` class, as follows: MyExtensionArray._add_arithmetic_ops() MyExtensionArray._add_comparison_ops() -Note that since ``pandas`` automatically calls the underlying operator on each -element one-by-one, this might not be as performant as implementing your own -version of the associated operators directly on the ``ExtensionArray``. + +.. note:: + + Since ``pandas`` automatically calls the underlying operator on each + element one-by-one, this might not be as performant as implementing your own + version of the associated operators directly on the ``ExtensionArray``. + +This implementation will try to reconstruct a new ``ExtensionArray`` with the +result of the element-wise operation. Whether or not that succeeds depends on +whether the operation returns a result that's valid for the ``ExtensionArray``. +If an ``ExtensionArray`` cannot be reconstructed, a list containing the scalars +returned instead. .. _extending.extension.testing: diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 291f67a4f67ca..cfc49bb694325 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -579,6 +579,35 @@ Current Behavior: ... OverflowError: Trying to coerce negative values to unsigned integers +.. _whatsnew_0240.api.crosstab_dtypes + +Crosstab Preserves Dtypes +^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`crosstab` will preserve now dtypes in some cases that previously would +cast from integer dtype to floating dtype (:issue:`22019`) + +Previous Behavior: + +.. code-block:: ipython + + In [3]: df = pd.DataFrame({'a': [1, 2, 2, 2, 2], 'b': [3, 3, 4, 4, 4], + ...: 'c': [1, 1, np.nan, 1, 1]}) + In [4]: pd.crosstab(df.a, df.b, normalize='columns') + Out[4]: + b 3 4 + a + 1 0.5 0.0 + 2 0.5 1.0 + +Current Behavior: + +.. code-block:: ipython + + In [3]: df = pd.DataFrame({'a': [1, 2, 2, 2, 2], 'b': [3, 3, 4, 4, 4], + ...: 'c': [1, 1, np.nan, 1, 1]}) + In [4]: pd.crosstab(df.a, df.b, normalize='columns') + Datetimelike API Changes ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -713,7 +742,7 @@ Timedelta - Bug in :class:`Index` with numeric dtype when multiplying or dividing an array with dtype ``timedelta64`` (:issue:`22390`) - Bug in :class:`TimedeltaIndex` incorrectly allowing indexing with ``Timestamp`` object (:issue:`20464`) - Fixed bug where subtracting :class:`Timedelta` from an object-dtyped array would raise ``TypeError`` (:issue:`21980`) -- +- Fixed bug in adding a :class:`DataFrame` with all-`timedelta64[ns]` dtypes to a :class:`DataFrame` with all-integer dtypes returning incorrect results instead of raising ``TypeError`` (:issue:`22696`) - Timezones @@ -841,6 +870,7 @@ Groupby/Resample/Rolling - Bug in :meth:`Resampler.asfreq` when frequency of ``TimedeltaIndex`` is a subperiod of a new frequency (:issue:`13022`). - Bug in :meth:`SeriesGroupBy.mean` when values were integral but could not fit inside of int64, overflowing instead. (:issue:`22487`) - :func:`RollingGroupby.agg` and :func:`ExpandingGroupby.agg` now support multiple aggregation functions as parameters (:issue:`15072`) +- Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` when resampling by a weekly offset (``'W'``) across a DST transition (:issue:`9119`, :issue:`21459`) Sparse ^^^^^^ @@ -881,4 +911,3 @@ Other - :meth:`DataFrame.nlargest` and :meth:`DataFrame.nsmallest` now returns the correct n values when keep != 'all' also when tied on the first columns (:issue:`22752`) - :meth:`~pandas.io.formats.style.Styler.bar` now also supports tablewise application (in addition to rowwise and columnwise) with ``axis=None`` and setting clipping range with ``vmin`` and ``vmax`` (:issue:`21548` and :issue:`21526`). ``NaN`` values are also handled properly. - Logical operations ``&, |, ^`` between :class:`Series` and :class:`Index` will no longer raise ``ValueError`` (:issue:`22092`) -- diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 00f2753547ac4..45743a3ece0e5 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -775,10 +775,18 @@ def convert_values(param): res = [op(a, b) for (a, b) in zip(lvalues, rvalues)] if coerce_to_dtype: - try: - res = self._from_sequence(res) - except TypeError: - pass + if op.__name__ in {'divmod', 'rdivmod'}: + try: + a, b = zip(*res) + res = (self._from_sequence(a), + self._from_sequence(b)) + except TypeError: + pass + else: + try: + res = self._from_sequence(res) + except TypeError: + pass return res diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 90df596b98296..134999f05364f 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -108,12 +108,7 @@ class IntervalArray(IntervalMixin, ExtensionArray): _na_value = _fill_value = np.nan def __new__(cls, data, closed=None, dtype=None, copy=False, - fastpath=False, verify_integrity=True): - - if fastpath: - return cls._simple_new(data.left, data.right, closed, - copy=copy, dtype=dtype, - verify_integrity=False) + verify_integrity=True): if isinstance(data, ABCSeries) and is_interval_dtype(data): data = data.values diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index a27627023d67c..6f6d1670da845 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -539,7 +539,7 @@ def asfreq(self, freq=None, how='E'): if self.hasnans: new_data[self._isnan] = iNaT - return self._simple_new(new_data, freq=freq) + return self._shallow_copy(new_data, freq=freq) # ------------------------------------------------------------------ # Arithmetic Methods diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 2bd1b0c5b3507..e08df3e340138 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -411,7 +411,7 @@ def visit_Subscript(self, node, **kwargs): slobj = self.visit(node.slice) try: value = value.value - except: + except AttributeError: pass try: diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 4860c2d766425..82a0aa87976ef 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -468,7 +468,7 @@ def is_timedelta64_dtype(arr_or_dtype): return False try: tipo = _get_dtype_type(arr_or_dtype) - except: + except (TypeError, ValueError, SyntaxError): return False return issubclass(tipo, np.timedelta64) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 492f65cff254c..838821a3706d5 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -360,11 +360,11 @@ def construct_from_string(cls, string): try: if string == 'category': return cls() - except: + else: + raise TypeError("cannot construct a CategoricalDtype") + except AttributeError: pass - raise TypeError("cannot construct a CategoricalDtype") - @staticmethod def validate_ordered(ordered): """ @@ -514,7 +514,7 @@ def __new__(cls, unit=None, tz=None): if m is not None: unit = m.groupdict()['unit'] tz = m.groupdict()['tz'] - except: + except TypeError: raise ValueError("could not construct DatetimeTZDtype") elif isinstance(unit, compat.string_types): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b4e8b4e3a6bec..ff7590f6d5358 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3260,7 +3260,7 @@ def _ensure_valid_index(self, value): if not len(self.index) and is_list_like(value): try: value = Series(value) - except: + except (ValueError, NotImplementedError, TypeError): raise ValueError('Cannot set a frame with no defined index ' 'and a value that cannot be converted to a ' 'Series') @@ -3629,7 +3629,8 @@ def align(self, other, join='outer', axis=None, level=None, copy=True, fill_axis=fill_axis, broadcast_axis=broadcast_axis) - @Appender(_shared_docs['reindex'] % _shared_doc_kwargs) + @Substitution(**_shared_doc_kwargs) + @Appender(NDFrame.reindex.__doc__) @rewrite_axis_style_signature('labels', [('method', None), ('copy', True), ('level', None), @@ -4479,7 +4480,8 @@ def f(vals): # ---------------------------------------------------------------------- # Sorting - @Appender(_shared_docs['sort_values'] % _shared_doc_kwargs) + @Substitution(**_shared_doc_kwargs) + @Appender(NDFrame.sort_values.__doc__) def sort_values(self, by, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last'): inplace = validate_bool_kwarg(inplace, 'inplace') @@ -4521,7 +4523,8 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False, else: return self._constructor(new_data).__finalize__(self) - @Appender(_shared_docs['sort_index'] % _shared_doc_kwargs) + @Substitution(**_shared_doc_kwargs) + @Appender(NDFrame.sort_index.__doc__) def sort_index(self, axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True, by=None): @@ -4886,7 +4889,7 @@ def _arith_op(left, right): left, right = ops.fill_binop(left, right, fill_value) return func(left, right) - if this._is_mixed_type or other._is_mixed_type: + if ops.should_series_dispatch(this, other, func): # iterate over columns return ops.dispatch_to_series(this, other, _arith_op) else: @@ -4896,7 +4899,6 @@ def _arith_op(left, right): copy=False) def _combine_match_index(self, other, func, level=None): - assert isinstance(other, Series) left, right = self.align(other, join='outer', axis=0, level=level, copy=False) assert left.index.equals(right.index) @@ -4916,11 +4918,7 @@ def _combine_match_columns(self, other, func, level=None, try_cast=True): left, right = self.align(other, join='outer', axis=1, level=level, copy=False) assert left.columns.equals(right.index) - - new_data = left._data.eval(func=func, other=right, - axes=[left.columns, self.index], - try_cast=try_cast) - return self._constructor(new_data) + return ops.dispatch_to_series(left, right, func, axis="columns") def _combine_const(self, other, func, errors='raise', try_cast=True): if lib.is_scalar(other) or np.ndim(other) == 0: @@ -7747,7 +7745,7 @@ def convert(v): values = np.array([convert(v) for v in values]) else: values = convert(values) - except: + except (ValueError, TypeError): values = convert(values) else: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 393e7caae5fab..cc157cc7228a8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -643,7 +643,8 @@ def _set_axis(self, axis, labels): self._data.set_axis(axis, labels) self._clear_item_cache() - _shared_docs['transpose'] = """ + def transpose(self, *args, **kwargs): + """ Permute the dimensions of the %(klass)s Parameters @@ -663,9 +664,6 @@ def _set_axis(self, axis, labels): y : same as input """ - @Appender(_shared_docs['transpose'] % _shared_doc_kwargs) - def transpose(self, *args, **kwargs): - # construct the args axes, kwargs = self._construct_axes_from_arguments(args, kwargs, require_all=True) @@ -965,9 +963,8 @@ def swaplevel(self, i=-2, j=-1, axis=0): # ---------------------------------------------------------------------- # Rename - # TODO: define separate funcs for DataFrame, Series and Panel so you can - # get completion on keyword arguments. - _shared_docs['rename'] = """ + def rename(self, *args, **kwargs): + """ Alter axes input function or functions. Function / dict values must be unique (1-to-1). Labels not contained in a dict / Series will be left as-is. Extra labels listed don't throw an error. Alternatively, change @@ -975,13 +972,11 @@ def swaplevel(self, i=-2, j=-1, axis=0): Parameters ---------- - %(optional_mapper)s %(axes)s : scalar, list-like, dict-like or function, optional Scalar or list-like will alter the ``Series.name`` attribute, and raise on DataFrame or Panel. dict-like or functions are transformations to apply to that axis' values - %(optional_axis)s copy : boolean, default True Also copy underlying data inplace : boolean, default False @@ -1069,12 +1064,6 @@ def swaplevel(self, i=-2, j=-1, axis=0): See the :ref:`user guide ` for more. """ - - @Appender(_shared_docs['rename'] % dict(axes='axes keywords for this' - ' object', klass='NDFrame', - optional_mapper='', - optional_axis='')) - def rename(self, *args, **kwargs): axes, kwargs = self._construct_axes_from_arguments(args, kwargs) copy = kwargs.pop('copy', True) inplace = kwargs.pop('inplace', False) @@ -1127,8 +1116,6 @@ def f(x): else: return result.__finalize__(self) - rename.__doc__ = _shared_docs['rename'] - def rename_axis(self, mapper, axis=0, copy=True, inplace=False): """ Alter the name of the index or columns. @@ -3024,7 +3011,8 @@ def __delitem__(self, key): except KeyError: pass - _shared_docs['_take'] = """ + def _take(self, indices, axis=0, is_copy=True): + """ Return the elements in the given *positional* indices along an axis. This means that we are not indexing according to actual values in @@ -3055,9 +3043,6 @@ def __delitem__(self, key): numpy.ndarray.take numpy.take """ - - @Appender(_shared_docs['_take']) - def _take(self, indices, axis=0, is_copy=True): self._consolidate_inplace() new_data = self._data.take(indices, @@ -3072,7 +3057,8 @@ def _take(self, indices, axis=0, is_copy=True): return result - _shared_docs['take'] = """ + def take(self, indices, axis=0, convert=None, is_copy=True, **kwargs): + """ Return the elements in the given *positional* indices along an axis. This means that we are not indexing according to actual values in @@ -3155,9 +3141,6 @@ class max_speed 1 monkey mammal NaN 3 lion mammal 80.5 """ - - @Appender(_shared_docs['take']) - def take(self, indices, axis=0, convert=None, is_copy=True, **kwargs): if convert is not None: msg = ("The 'convert' parameter is deprecated " "and will be removed in a future version.") @@ -3580,7 +3563,9 @@ def add_suffix(self, suffix): mapper = {self._info_axis_name: f} return self.rename(**mapper) - _shared_docs['sort_values'] = """ + def sort_values(self, by=None, axis=0, ascending=True, inplace=False, + kind='quicksort', na_position='last'): + """ Sort by the values along either axis Parameters @@ -3665,17 +3650,12 @@ def add_suffix(self, suffix): 0 A 2 0 1 A 1 1 """ - - def sort_values(self, by=None, axis=0, ascending=True, inplace=False, - kind='quicksort', na_position='last'): - """ - NOT IMPLEMENTED: do not call this method, as sorting values is not - supported for Panel objects and will raise an error. - """ raise NotImplementedError("sort_values has not been implemented " "on Panel or Panel4D objects.") - _shared_docs['sort_index'] = """ + def sort_index(self, axis=0, level=None, ascending=True, inplace=False, + kind='quicksort', na_position='last', sort_remaining=True): + """ Sort object by labels (along an axis) Parameters @@ -3703,10 +3683,6 @@ def sort_values(self, by=None, axis=0, ascending=True, inplace=False, ------- sorted_obj : %(klass)s """ - - @Appender(_shared_docs['sort_index'] % dict(axes="axes", klass="NDFrame")) - def sort_index(self, axis=0, level=None, ascending=True, inplace=False, - kind='quicksort', na_position='last', sort_remaining=True): inplace = validate_bool_kwarg(inplace, 'inplace') axis = self._get_axis_number(axis) axis_name = self._get_axis_name(axis) @@ -3724,7 +3700,8 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, new_axis = labels.take(sort_index) return self.reindex(**{axis_name: new_axis}) - _shared_docs['reindex'] = """ + def reindex(self, *args, **kwargs): + """ Conform %(klass)s to new index with optional filling logic, placing NA/NaN in locations having no value in the previous index. A new object is produced unless the new index is equivalent to the current one and @@ -3920,14 +3897,8 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, ------- reindexed : %(klass)s """ - - # TODO: Decide if we care about having different examples for different - # kinds - - @Appender(_shared_docs['reindex'] % dict(axes="axes", klass="NDFrame", - optional_labels="", - optional_axis="")) - def reindex(self, *args, **kwargs): + # TODO: Decide if we care about having different examples for different + # kinds # construct the args axes, kwargs = self._construct_axes_from_arguments(args, kwargs) @@ -7063,8 +7034,12 @@ def clip_lower(self, threshold, axis=None, inplace=False): def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, observed=False, **kwargs): """ - Group series using mapper (dict or key function, apply given function - to group, return result as series) or by a series of columns. + Group DataFrame or Series using a mapper or by a Series of columns. + + A groupby operation involves some combination of splitting the + object, applying a function, and combining the results. This can be + used to group large amounts of data and compute operations on these + groups. Parameters ---------- @@ -7077,54 +7052,95 @@ def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, values are used as-is determine the groups. A label or list of labels may be passed to group by the columns in ``self``. Notice that a tuple is interpreted a (single) key. - axis : int, default 0 + axis : {0 or 'index', 1 or 'columns'}, default 0 + Split along rows (0) or columns (1). level : int, level name, or sequence of such, default None If the axis is a MultiIndex (hierarchical), group by a particular - level or levels - as_index : boolean, default True + level or levels. + as_index : bool, default True For aggregated output, return object with group labels as the index. Only relevant for DataFrame input. as_index=False is - effectively "SQL-style" grouped output - sort : boolean, default True + effectively "SQL-style" grouped output. + sort : bool, default True Sort group keys. Get better performance by turning this off. Note this does not influence the order of observations within each - group. groupby preserves the order of rows within each group. - group_keys : boolean, default True - When calling apply, add group keys to index to identify pieces - squeeze : boolean, default False - reduce the dimensionality of the return type if possible, - otherwise return a consistent type - observed : boolean, default False - This only applies if any of the groupers are Categoricals + group. Groupby preserves the order of rows within each group. + group_keys : bool, default True + When calling apply, add group keys to index to identify pieces. + squeeze : bool, default False + Reduce the dimensionality of the return type if possible, + otherwise return a consistent type. + observed : bool, default False + This only applies if any of the groupers are Categoricals. If True: only show observed values for categorical groupers. If False: show all values for categorical groupers. .. versionadded:: 0.23.0 + **kwargs + Optional, only accepts keyword argument 'mutated' and is passed + to groupby. + Returns ------- - GroupBy object + DataFrameGroupBy or SeriesGroupBy + Depends on the calling object and returns groupby object that + contains information about the groups. - Examples + See Also -------- - DataFrame results - - >>> data.groupby(func, axis=0).mean() - >>> data.groupby(['col1', 'col2'])['col3'].mean() - - DataFrame with hierarchical index - - >>> data.groupby(['col1', 'col2']).mean() + resample : Convenience method for frequency conversion and resampling + of time series. Notes ----- See the `user guide `_ for more. - See also + Examples -------- - resample : Convenience method for frequency conversion and resampling - of time series. + >>> df = pd.DataFrame({'Animal' : ['Falcon', 'Falcon', + ... 'Parrot', 'Parrot'], + ... 'Max Speed' : [380., 370., 24., 26.]}) + >>> df + Animal Max Speed + 0 Falcon 380.0 + 1 Falcon 370.0 + 2 Parrot 24.0 + 3 Parrot 26.0 + >>> df.groupby(['Animal']).mean() + Max Speed + Animal + Falcon 375.0 + Parrot 25.0 + + **Hierarchical Indexes** + + We can groupby different levels of a hierarchical index + using the `level` parameter: + + >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'], + ... ['Capitve', 'Wild', 'Capitve', 'Wild']] + >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type')) + >>> df = pd.DataFrame({'Max Speed' : [390., 350., 30., 20.]}, + ... index=index) + >>> df + Max Speed + Animal Type + Falcon Capitve 390.0 + Wild 350.0 + Parrot Capitve 30.0 + Wild 20.0 + >>> df.groupby(level=0).mean() + Max Speed + Animal + Falcon 370.0 + Parrot 25.0 + >>> df.groupby(level=1).mean() + Max Speed + Type + Capitve 210.0 + Wild 185.0 """ from pandas.core.groupby.groupby import groupby diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index d722029b85631..e26d1fe11d0fb 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -21,6 +21,7 @@ is_list_like, is_scalar, is_bool_dtype, + is_period_dtype, is_categorical_dtype, is_datetime_or_timedelta_dtype, is_float_dtype, @@ -28,7 +29,7 @@ is_object_dtype, is_string_dtype) from pandas.core.dtypes.generic import ( - ABCIndex, ABCSeries, ABCPeriodIndex, ABCIndexClass) + ABCIndex, ABCSeries, ABCIndexClass) from pandas.core.dtypes.missing import isna from pandas.core import common as com, algorithms, ops @@ -240,9 +241,8 @@ def equals(self, other): # have different timezone return False - # ToDo: Remove this when PeriodDtype is added - elif isinstance(self, ABCPeriodIndex): - if not isinstance(other, ABCPeriodIndex): + elif is_period_dtype(self): + if not is_period_dtype(other): return False if self.freq != other.freq: return False @@ -360,7 +360,7 @@ def sort_values(self, return_indexer=False, ascending=True): attribs = self._get_attributes_dict() freq = attribs['freq'] - if freq is not None and not isinstance(self, ABCPeriodIndex): + if freq is not None and not is_period_dtype(self): if freq.n > 0 and not ascending: freq = freq * -1 elif freq.n < 0 and ascending: @@ -390,8 +390,8 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=fill_value, na_value=iNaT) - # keep freq in PeriodIndex, reset otherwise - freq = self.freq if isinstance(self, ABCPeriodIndex) else None + # keep freq in PeriodArray/Index, reset otherwise + freq = self.freq if is_period_dtype(self) else None return self._shallow_copy(taken, freq=freq) _can_hold_na = True @@ -622,7 +622,7 @@ def repeat(self, repeats, *args, **kwargs): Analogous to ndarray.repeat """ nv.validate_repeat(args, kwargs) - if isinstance(self, ABCPeriodIndex): + if is_period_dtype(self): freq = self.freq else: freq = None @@ -677,7 +677,7 @@ def _concat_same_dtype(self, to_concat, name): attribs = self._get_attributes_dict() attribs['name'] = name - if not isinstance(self, ABCPeriodIndex): + if not is_period_dtype(self): # reset freq attribs['freq'] = None diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py index 5a37e03b700f9..289970aaf3a82 100644 --- a/pandas/core/indexes/frozen.py +++ b/pandas/core/indexes/frozen.py @@ -139,7 +139,7 @@ def searchsorted(self, value, side="left", sorter=None): # xref: https://github.com/numpy/numpy/issues/5370 try: value = self.dtype.type(value) - except: + except ValueError: pass return super(FrozenNDArray, self).searchsorted( diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 4b125580bd7e0..f72f87aeb2af6 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -146,17 +146,13 @@ class IntervalIndex(IntervalMixin, Index): _mask = None def __new__(cls, data, closed=None, dtype=None, copy=False, - name=None, fastpath=False, verify_integrity=True): - - if fastpath: - return cls._simple_new(data, name) + name=None, verify_integrity=True): if name is None and hasattr(data, 'name'): name = data.name with rewrite_exception("IntervalArray", cls.__name__): array = IntervalArray(data, closed=closed, copy=copy, dtype=dtype, - fastpath=fastpath, verify_integrity=verify_integrity) return cls._simple_new(array, name) @@ -187,14 +183,6 @@ def _shallow_copy(self, left=None, right=None, **kwargs): attributes.update(kwargs) return self._simple_new(result, **attributes) - @cache_readonly - def hasnans(self): - """ - Return if the IntervalIndex has any nans; enables various performance - speedups - """ - return self._isnan.any() - @cache_readonly def _isnan(self): """Return a mask indicating if each value is NA""" @@ -206,10 +194,6 @@ def _isnan(self): def _engine(self): return IntervalTree(self.left, self.right, closed=self.closed) - @property - def _constructor(self): - return type(self) - def __contains__(self, key): """ return a boolean if this key is IN the index @@ -394,18 +378,7 @@ def _values(self): @cache_readonly def _ndarray_values(self): - left = self.left - right = self.right - mask = self._isnan - closed = self.closed - - result = np.empty(len(left), dtype=object) - for i in range(len(left)): - if mask[i]: - result[i] = np.nan - else: - result[i] = Interval(left[i], right[i], closed) - return result + return np.array(self._data) def __array__(self, result=None): """ the array interface, return my values """ @@ -892,18 +865,12 @@ def take(self, indices, axis=0, allow_fill=True, return self._simple_new(result, **attributes) def __getitem__(self, value): - mask = self._isnan[value] - if is_scalar(mask) and mask: - return self._na_value - - left = self.left[value] - right = self.right[value] - - # scalar - if not isinstance(left, Index): - return Interval(left, right, self.closed) - - return self._shallow_copy(left, right) + result = self._data[value] + if isinstance(result, IntervalArray): + return self._shallow_copy(result) + else: + # scalar + return result # __repr__ associated methods are based on MultiIndex diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 3e6b934e1e863..6091df776a01b 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -6,6 +6,7 @@ import numpy as np from pandas._libs import algos as libalgos, index as libindex, lib, Timestamp +from pandas._libs import tslibs from pandas.compat import range, zip, lrange, lzip, map from pandas.compat.numpy import function as nv @@ -1000,14 +1001,15 @@ def _try_mi(k): (compat.PY3 and isinstance(key, compat.string_types))): try: return _try_mi(key) - except (KeyError): + except KeyError: raise - except: + except (IndexError, ValueError, TypeError): pass try: return _try_mi(Timestamp(key)) - except: + except (KeyError, TypeError, + IndexError, ValueError, tslibs.OutOfBoundsDatetime): pass raise InvalidIndexError(key) @@ -1686,7 +1688,7 @@ def append(self, other): # if all(isinstance(x, MultiIndex) for x in other): try: return MultiIndex.from_tuples(new_tuples, names=self.names) - except: + except (TypeError, IndexError): return Index(new_tuples) def argsort(self, *args, **kwargs): @@ -2315,7 +2317,7 @@ def maybe_droplevels(indexer, levels, drop_level): for i in sorted(levels, reverse=True): try: new_index = new_index.droplevel(i) - except: + except ValueError: # no dropping here return orig_index @@ -2818,7 +2820,7 @@ def _convert_can_do_setop(self, other): msg = 'other must be a MultiIndex or a list of tuples' try: other = MultiIndex.from_tuples(other) - except: + except TypeError: raise TypeError(msg) else: result_names = self.names if self.names == other.names else None diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 981bfddeadac1..fd8e17c369f5a 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -512,33 +512,33 @@ def __getitem__(self, key): # This is basically PySlice_GetIndicesEx, but delegation to our # super routines if we don't have integers - l = len(self) + length = len(self) # complete missing slice information step = 1 if key.step is None else key.step if key.start is None: - start = l - 1 if step < 0 else 0 + start = length - 1 if step < 0 else 0 else: start = key.start if start < 0: - start += l + start += length if start < 0: start = -1 if step < 0 else 0 - if start >= l: - start = l - 1 if step < 0 else l + if start >= length: + start = length - 1 if step < 0 else length if key.stop is None: - stop = -1 if step < 0 else l + stop = -1 if step < 0 else length else: stop = key.stop if stop < 0: - stop += l + stop += length if stop < 0: stop = -1 - if stop > l: - stop = l + if stop > length: + stop = length # delegate non-integer slices if (start != int(start) or diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b63f874abff85..150518aadcfd9 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2146,7 +2146,7 @@ def _getitem_tuple(self, tup): self._has_valid_tuple(tup) try: return self._getitem_lowerdim(tup) - except: + except IndexingError: pass retval = self.obj @@ -2705,13 +2705,13 @@ def maybe_droplevels(index, key): for _ in key: try: index = index.droplevel(0) - except: + except ValueError: # we have dropped too much, so back out return original_index else: try: index = index.droplevel(0) - except: + except ValueError: pass return index diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 6576db9f642a6..0e57dd33b1c4e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -666,7 +666,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, newb = make_block(values, placement=self.mgr_locs, klass=klass, ndim=self.ndim) - except: + except Exception: # noqa: E722 if errors == 'raise': raise newb = self.copy() if copy else self @@ -1142,7 +1142,7 @@ def check_int_bool(self, inplace): # a fill na type method try: m = missing.clean_fill_method(method) - except: + except ValueError: m = None if m is not None: @@ -1157,7 +1157,7 @@ def check_int_bool(self, inplace): # try an interp method try: m = missing.clean_interp_method(method, **kwargs) - except: + except ValueError: m = None if m is not None: @@ -2438,7 +2438,7 @@ def set(self, locs, values, check=False): try: if (self.values[locs] == values).all(): return - except: + except (IndexError, ValueError): pass try: self.values[locs] = values @@ -3172,7 +3172,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, def __len__(self): try: return self.sp_index.length - except: + except AttributeError: return 0 def copy(self, deep=True, mgr=None): diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 7619d47cbc8f9..2884bc1a19491 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1,12 +1,16 @@ -import itertools import functools +import itertools import operator import warnings from distutils.version import LooseVersion import numpy as np + +import pandas.core.common as com from pandas import compat from pandas._libs import tslibs, lib +from pandas.core.config import get_option +from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask from pandas.core.dtypes.common import ( _get_dtype, is_float, is_scalar, @@ -17,10 +21,7 @@ is_datetime64_dtype, is_timedelta64_dtype, is_datetime_or_timedelta_dtype, is_int_or_datetime_dtype, is_any_int_dtype) -from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask from pandas.core.dtypes.missing import isna, notna, na_value_for_dtype -from pandas.core.config import get_option -import pandas.core.common as com _BOTTLENECK_INSTALLED = False _MIN_BOTTLENECK_VERSION = '1.0.0' @@ -200,16 +201,18 @@ def _get_fill_value(dtype, fill_value=None, fill_value_typ=None): def _get_values(values, skipna, fill_value=None, fill_value_typ=None, - isfinite=False, copy=True): + isfinite=False, copy=True, mask=None): """ utility to get the values view, mask, dtype if necessary copy and mask using the specified fill_value copy = True will force the copy """ values = com.values_from_object(values) - if isfinite: - mask = _isfinite(values) - else: - mask = isna(values) + + if mask is None: + if isfinite: + mask = _isfinite(values) + else: + mask = isna(values) dtype = values.dtype dtype_ok = _na_ok_dtype(dtype) @@ -315,19 +318,98 @@ def _na_for_min_count(values, axis): return result -def nanany(values, axis=None, skipna=True): - values, mask, dtype, _ = _get_values(values, skipna, False, copy=skipna) +def nanany(values, axis=None, skipna=True, mask=None): + """ + Check if any elements along an axis evaluate to True. + + Parameters + ---------- + values : ndarray + axis : int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : bool + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2]) + >>> nanops.nanany(s) + True + + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([np.nan]) + >>> nanops.nanany(s) + False + """ + values, mask, dtype, _ = _get_values(values, skipna, False, copy=skipna, + mask=mask) return values.any(axis) -def nanall(values, axis=None, skipna=True): - values, mask, dtype, _ = _get_values(values, skipna, True, copy=skipna) +def nanall(values, axis=None, skipna=True, mask=None): + """ + Check if all elements along an axis evaluate to True. + + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : bool + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, np.nan]) + >>> nanops.nanall(s) + True + + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 0]) + >>> nanops.nanall(s) + False + """ + values, mask, dtype, _ = _get_values(values, skipna, True, copy=skipna, + mask=mask) return values.all(axis) @disallow('M8') -def nansum(values, axis=None, skipna=True, min_count=0): - values, mask, dtype, dtype_max = _get_values(values, skipna, 0) +def nansum(values, axis=None, skipna=True, min_count=0, mask=None): + """ + Sum the elements along an axis ignoring NaNs + + Parameters + ---------- + values : ndarray[dtype] + axis: int, optional + skipna : bool, default True + min_count: int, default 0 + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : dtype + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, np.nan]) + >>> nanops.nansum(s) + 3.0 + """ + values, mask, dtype, dtype_max = _get_values(values, skipna, 0, mask=mask) dtype_sum = dtype_max if is_float_dtype(dtype): dtype_sum = dtype @@ -341,9 +423,32 @@ def nansum(values, axis=None, skipna=True, min_count=0): @disallow('M8') @bottleneck_switch() -def nanmean(values, axis=None, skipna=True): - values, mask, dtype, dtype_max = _get_values(values, skipna, 0) +def nanmean(values, axis=None, skipna=True, mask=None): + """ + Compute the mean of the element along an axis ignoring NaNs + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, np.nan]) + >>> nanops.nanmean(s) + 1.5 + """ + values, mask, dtype, dtype_max = _get_values(values, skipna, 0, mask=mask) dtype_sum = dtype_max dtype_count = np.float64 if is_integer_dtype(dtype) or is_timedelta64_dtype(dtype): @@ -367,15 +472,36 @@ def nanmean(values, axis=None, skipna=True): @disallow('M8') @bottleneck_switch() -def nanmedian(values, axis=None, skipna=True): +def nanmedian(values, axis=None, skipna=True, mask=None): + """ + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + Returns + ------- + result : float + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 2, 2]) + >>> nanops.nanmedian(s) + 2.0 + """ def get_median(x): mask = notna(x) if not skipna and not mask.all(): return np.nan return np.nanmedian(x[mask]) - values, mask, dtype, dtype_max = _get_values(values, skipna) + values, mask, dtype, dtype_max = _get_values(values, skipna, mask=mask) if not is_float_dtype(values): values = values.astype('f8') values[mask] = np.nan @@ -431,18 +557,73 @@ def _get_counts_nanvar(mask, axis, ddof, dtype=float): @disallow('M8') @bottleneck_switch(ddof=1) -def nanstd(values, axis=None, skipna=True, ddof=1): - result = np.sqrt(nanvar(values, axis=axis, skipna=skipna, ddof=ddof)) +def nanstd(values, axis=None, skipna=True, ddof=1, mask=None): + """ + Compute the standard deviation along given axis while ignoring NaNs + + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 2, 3]) + >>> nanops.nanstd(s) + 1.0 + """ + result = np.sqrt(nanvar(values, axis=axis, skipna=skipna, ddof=ddof, + mask=mask)) return _wrap_results(result, values.dtype) @disallow('M8') @bottleneck_switch(ddof=1) -def nanvar(values, axis=None, skipna=True, ddof=1): +def nanvar(values, axis=None, skipna=True, ddof=1, mask=None): + """ + Compute the variance along given axis while ignoring NaNs + + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + mask : ndarray[bool], optional + nan-mask if known + Returns + ------- + result : float + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 2, 3]) + >>> nanops.nanvar(s) + 1.0 + """ values = com.values_from_object(values) dtype = values.dtype - mask = isna(values) + if mask is None: + mask = isna(values) if is_any_int_dtype(values): values = values.astype('f8') values[mask] = np.nan @@ -465,7 +646,7 @@ def nanvar(values, axis=None, skipna=True, ddof=1): avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count if axis is not None: avg = np.expand_dims(avg, axis) - sqr = _ensure_numeric((avg - values)**2) + sqr = _ensure_numeric((avg - values) ** 2) np.putmask(sqr, mask, 0) result = sqr.sum(axis=axis, dtype=np.float64) / d @@ -478,12 +659,41 @@ def nanvar(values, axis=None, skipna=True, ddof=1): @disallow('M8', 'm8') -def nansem(values, axis=None, skipna=True, ddof=1): +def nansem(values, axis=None, skipna=True, ddof=1, mask=None): + """ + Compute the standard error in the mean along given axis while ignoring NaNs + + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + ddof : int, default 1 + Delta Degrees of Freedom. The divisor used in calculations is N - ddof, + where N represents the number of elements. + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float64 + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, np.nan, 2, 3]) + >>> nanops.nansem(s) + 0.5773502691896258 + """ + # This checks if non-numeric-like data is passed with numeric_only=False # and raises a TypeError otherwise - nanvar(values, axis, skipna, ddof=ddof) + nanvar(values, axis, skipna, ddof=ddof, mask=mask) - mask = isna(values) + if mask is None: + mask = isna(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count, _ = _get_counts_nanvar(mask, axis, ddof, values.dtype) @@ -494,16 +704,17 @@ def nansem(values, axis=None, skipna=True, ddof=1): def _nanminmax(meth, fill_value_typ): @bottleneck_switch() - def reduction(values, axis=None, skipna=True): + def reduction(values, axis=None, skipna=True, mask=None): values, mask, dtype, dtype_max = _get_values( - values, skipna, fill_value_typ=fill_value_typ, ) + values, skipna, fill_value_typ=fill_value_typ, mask=mask) if ((axis is not None and values.shape[axis] == 0) or values.size == 0): try: result = getattr(values, meth)(axis, dtype=dtype_max) result.fill(np.nan) - except: + except (AttributeError, TypeError, + ValueError, np.core._internal.AxisError): result = np.nan else: result = getattr(values, meth)(axis) @@ -520,39 +731,97 @@ def reduction(values, axis=None, skipna=True): @disallow('O') -def nanargmax(values, axis=None, skipna=True): +def nanargmax(values, axis=None, skipna=True, mask=None): """ - Returns -1 in the NA case + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + -------- + result : int + The index of max value in specified axis or -1 in the NA case + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, 3, np.nan, 4]) + >>> nanops.nanargmax(s) + 4 """ - values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='-inf') + values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='-inf', + mask=mask) result = values.argmax(axis) result = _maybe_arg_null_out(result, axis, mask, skipna) return result @disallow('O') -def nanargmin(values, axis=None, skipna=True): +def nanargmin(values, axis=None, skipna=True, mask=None): """ - Returns -1 in the NA case + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + -------- + result : int + The index of min value in specified axis or -1 in the NA case + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, 3, np.nan, 4]) + >>> nanops.nanargmin(s) + 0 """ - values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='+inf') + values, mask, dtype, _ = _get_values(values, skipna, fill_value_typ='+inf', + mask=mask) result = values.argmin(axis) result = _maybe_arg_null_out(result, axis, mask, skipna) return result @disallow('M8', 'm8') -def nanskew(values, axis=None, skipna=True): +def nanskew(values, axis=None, skipna=True, mask=None): """ Compute the sample skewness. The statistic computed here is the adjusted Fisher-Pearson standardized moment coefficient G1. The algorithm computes this coefficient directly from the second and third central moment. - """ + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + Returns + ------- + result : float64 + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1,np.nan, 1, 2]) + >>> nanops.nanskew(s) + 1.7320508075688787 + """ values = com.values_from_object(values) - mask = isna(values) + if mask is None: + mask = isna(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count = _get_counts(mask, axis) @@ -601,16 +870,38 @@ def nanskew(values, axis=None, skipna=True): @disallow('M8', 'm8') -def nankurt(values, axis=None, skipna=True): - """ Compute the sample excess kurtosis. +def nankurt(values, axis=None, skipna=True, mask=None): + """ + Compute the sample excess kurtosis The statistic computed here is the adjusted Fisher-Pearson standardized moment coefficient G2, computed directly from the second and fourth central moment. + Parameters + ---------- + values : ndarray + axis: int, optional + skipna : bool, default True + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : float64 + Unless input is a float array, in which case use the same + precision as the input array. + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1,np.nan, 1, 3, 2]) + >>> nanops.nankurt(s) + -1.2892561983471076 """ values = com.values_from_object(values) - mask = isna(values) + if mask is None: + mask = isna(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count = _get_counts(mask, axis) @@ -636,7 +927,7 @@ def nankurt(values, axis=None, skipna=True): with np.errstate(invalid='ignore', divide='ignore'): adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3)) numer = count * (count + 1) * (count - 1) * m4 - denom = (count - 2) * (count - 3) * m2**2 + denom = (count - 2) * (count - 3) * m2 ** 2 # floating point error # @@ -668,8 +959,34 @@ def nankurt(values, axis=None, skipna=True): @disallow('M8', 'm8') -def nanprod(values, axis=None, skipna=True, min_count=0): - mask = isna(values) +def nanprod(values, axis=None, skipna=True, min_count=0, mask=None): + """ + Parameters + ---------- + values : ndarray[dtype] + axis: int, optional + skipna : bool, default True + min_count: int, default 0 + mask : ndarray[bool], optional + nan-mask if known + + Returns + ------- + result : dtype + + Examples + -------- + >>> import pandas.core.nanops as nanops + >>> s = pd.Series([1, 2, 3, np.nan]) + >>> nanops.nanprod(s) + 6.0 + + Returns + -------- + The product of all elements on a given axis. ( NaNs are treated as 1) + """ + if mask is None: + mask = isna(values) if skipna and not is_any_int_dtype(values): values = values.copy() values[mask] = 1 @@ -815,7 +1132,7 @@ def _ensure_numeric(x): elif is_object_dtype(x): try: x = x.astype(np.complex128) - except: + except (TypeError, ValueError): x = x.astype(np.float64) else: if not np.any(x.imag): diff --git a/pandas/core/ops.py b/pandas/core/ops.py index b4848a0abeeb5..982b08a5562fb 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -900,6 +900,42 @@ def invalid_comparison(left, right, op): return res_values +# ----------------------------------------------------------------------------- +# Dispatch logic + +def should_series_dispatch(left, right, op): + """ + Identify cases where a DataFrame operation should dispatch to its + Series counterpart. + + Parameters + ---------- + left : DataFrame + right : DataFrame + op : binary operator + + Returns + ------- + override : bool + """ + if left._is_mixed_type or right._is_mixed_type: + return True + + if not len(left.columns) or not len(right.columns): + # ensure obj.dtypes[0] exists for each obj + return False + + ldtype = left.dtypes.iloc[0] + rdtype = right.dtypes.iloc[0] + + if ((is_timedelta64_dtype(ldtype) and is_integer_dtype(rdtype)) or + (is_timedelta64_dtype(rdtype) and is_integer_dtype(ldtype))): + # numpy integer dtypes as timedelta64 dtypes in this scenario + return True + + return False + + # ----------------------------------------------------------------------------- # Functions that add arithmetic methods to objects, given arithmetic factory # methods @@ -1553,7 +1589,8 @@ def na_op(x, y): y = bool(y) try: result = libops.scalar_binop(x, y, op) - except: + except (TypeError, ValueError, AttributeError, + OverflowError, NotImplementedError): raise TypeError("cannot compare a dtyped [{dtype}] array " "with a scalar of type [{typ}]" .format(dtype=x.dtype, @@ -1637,7 +1674,7 @@ def flex_wrapper(self, other, level=None, fill_value=None, axis=0): # ----------------------------------------------------------------------------- # DataFrame -def dispatch_to_series(left, right, func, str_rep=None): +def dispatch_to_series(left, right, func, str_rep=None, axis=None): """ Evaluate the frame operation func(left, right) by evaluating column-by-column, dispatching to the Series implementation. @@ -1648,6 +1685,7 @@ def dispatch_to_series(left, right, func, str_rep=None): right : scalar or DataFrame func : arithmetic or comparison operator str_rep : str or None, default None + axis : {None, 0, 1, "index", "columns"} Returns ------- @@ -1671,6 +1709,15 @@ def column_op(a, b): return {i: func(a.iloc[:, i], b.iloc[:, i]) for i in range(len(a.columns))} + elif isinstance(right, ABCSeries) and axis == "columns": + # We only get here if called via left._combine_match_columns, + # in which case we specifically want to operate row-by-row + assert right.index.equals(left.columns) + + def column_op(a, b): + return {i: func(a.iloc[:, i], b.iloc[i]) + for i in range(len(a.columns))} + elif isinstance(right, ABCSeries): assert right.index.equals(left.index) # Handle other cases later @@ -1810,10 +1857,15 @@ def f(self, other, axis=default_axis, level=None, fill_value=None): other = _align_method_FRAME(self, other, axis) - if isinstance(other, ABCDataFrame): # Another DataFrame - return self._combine_frame(other, na_op, fill_value, level) + if isinstance(other, ABCDataFrame): + # Another DataFrame + pass_op = op if should_series_dispatch(self, other, op) else na_op + return self._combine_frame(other, pass_op, fill_value, level) elif isinstance(other, ABCSeries): - return _combine_series_frame(self, other, na_op, + # For these values of `axis`, we end up dispatching to Series op, + # so do not want the masked op. + pass_op = op if axis in [0, "columns", None] else na_op + return _combine_series_frame(self, other, pass_op, fill_value=fill_value, axis=axis, level=level, try_cast=True) else: diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 81d1e83ee6870..1e2d4000413bb 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -1215,7 +1215,8 @@ def _wrap_result(self, result, axis): return self._construct_return_type(result, axes) - @Appender(_shared_docs['reindex'] % _shared_doc_kwargs) + @Substitution(**_shared_doc_kwargs) + @Appender(NDFrame.reindex.__doc__) def reindex(self, *args, **kwargs): major = kwargs.pop("major", None) minor = kwargs.pop('minor', None) @@ -1236,7 +1237,8 @@ def reindex(self, *args, **kwargs): kwargs.pop('labels', None) return super(Panel, self).reindex(**kwargs) - @Appender(_shared_docs['rename'] % _shared_doc_kwargs) + @Substitution(**_shared_doc_kwargs) + @Appender(NDFrame.rename.__doc__) def rename(self, items=None, major_axis=None, minor_axis=None, **kwargs): major_axis = (major_axis if major_axis is not None else kwargs.pop('major', None)) @@ -1253,7 +1255,8 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, copy=copy, limit=limit, fill_value=fill_value) - @Appender(_shared_docs['transpose'] % _shared_doc_kwargs) + @Substitution(**_shared_doc_kwargs) + @Appender(NDFrame.transpose.__doc__) def transpose(self, *args, **kwargs): # check if a list of axes was passed in instead as a # single *args element @@ -1536,6 +1539,13 @@ def _extract_axis(self, data, axis=0, intersect=False): return ensure_index(index) + def sort_values(self, *args, **kwargs): + """ + NOT IMPLEMENTED: do not call this method, as sorting values is not + supported for Panel objects and will raise an error. + """ + super(Panel, self).sort_values(*args, **kwargs) + Panel._setup_axes(axes=['items', 'major_axis', 'minor_axis'], info_axis=0, stat_axis=1, aliases={'major': 'major_axis', diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 878ac957a8557..70a8deb33b7f2 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -16,7 +16,8 @@ from pandas.tseries.frequencies import to_offset, is_subperiod, is_superperiod from pandas.core.indexes.datetimes import DatetimeIndex, date_range from pandas.core.indexes.timedeltas import TimedeltaIndex -from pandas.tseries.offsets import DateOffset, Tick, Day, delta_to_nanoseconds +from pandas.tseries.offsets import (DateOffset, Tick, Day, + delta_to_nanoseconds, Nano) from pandas.core.indexes.period import PeriodIndex from pandas.errors import AbstractMethodError import pandas.core.algorithms as algos @@ -1395,18 +1396,21 @@ def _get_time_bins(self, ax): def _adjust_bin_edges(self, binner, ax_values): # Some hacks for > daily data, see #1471, #1458, #1483 - bin_edges = binner.asi8 - if self.freq != 'D' and is_superperiod(self.freq, 'D'): - day_nanos = delta_to_nanoseconds(timedelta(1)) if self.closed == 'right': - bin_edges = bin_edges + day_nanos - 1 + # GH 21459, GH 9119: Adjust the bins relative to the wall time + bin_edges = binner.tz_localize(None) + bin_edges = bin_edges + timedelta(1) - Nano(1) + bin_edges = bin_edges.tz_localize(binner.tz).asi8 + else: + bin_edges = binner.asi8 # intraday values on last day if bin_edges[-2] > ax_values.max(): bin_edges = bin_edges[:-1] binner = binner[:-1] - + else: + bin_edges = binner.asi8 return binner, bin_edges def _get_time_delta_bins(self, ax): diff --git a/pandas/core/series.py b/pandas/core/series.py index 5c91a91209d14..0fbe5e5ed7b8f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3496,7 +3496,8 @@ def rename(self, index=None, **kwargs): return self._set_name(index, inplace=kwargs.get('inplace')) return super(Series, self).rename(index=index, **kwargs) - @Appender(generic._shared_docs['reindex'] % _shared_doc_kwargs) + @Substitution(**_shared_doc_kwargs) + @Appender(generic.NDFrame.reindex.__doc__) def reindex(self, index=None, **kwargs): return super(Series, self).reindex(index=index, **kwargs) @@ -3680,7 +3681,7 @@ def memory_usage(self, index=True, deep=False): v += self.index.memory_usage(deep=deep) return v - @Appender(generic._shared_docs['_take']) + @Appender(generic.NDFrame._take.__doc__) def _take(self, indices, axis=0, is_copy=False): indices = ensure_platform_int(indices) diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index eb07e5ef6c85f..186a2490a5f2e 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -306,7 +306,7 @@ def __setstate__(self, state): def __len__(self): try: return self.sp_index.length - except: + except AttributeError: return 0 def __unicode__(self): diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 8ac5d81f23bb2..97cd3a0a1fb6a 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -19,7 +19,7 @@ import pandas.core.indexes.base as ibase import pandas.core.ops as ops import pandas._libs.index as libindex -from pandas.util._decorators import Appender +from pandas.util._decorators import Appender, Substitution from pandas.core.sparse.array import ( make_sparse, SparseArray, @@ -563,7 +563,8 @@ def copy(self, deep=True): return self._constructor(new_data, sparse_index=self.sp_index, fill_value=self.fill_value).__finalize__(self) - @Appender(generic._shared_docs['reindex'] % _shared_doc_kwargs) + @Substitution(**_shared_doc_kwargs) + @Appender(generic.NDFrame.reindex.__doc__) def reindex(self, index=None, method=None, copy=True, limit=None, **kwargs): @@ -592,7 +593,7 @@ def sparse_reindex(self, new_index): sparse_index=new_index, fill_value=self.fill_value).__finalize__(self) - @Appender(generic._shared_docs['take']) + @Appender(generic.NDFrame.take.__doc__) def take(self, indices, axis=0, convert=None, *args, **kwargs): if convert is not None: msg = ("The 'convert' parameter is deprecated " diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 4a5290a90313d..eb8d2b0b6c809 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -244,7 +244,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, if format == '%Y%m%d': try: result = _attempt_YYYYMMDD(arg, errors=errors) - except: + except (ValueError, TypeError, tslibs.OutOfBoundsDatetime): raise ValueError("cannot convert the input to " "'%Y%m%d' date format") @@ -334,7 +334,7 @@ def _adjust_to_origin(arg, origin, unit): raise ValueError("unit must be 'D' for origin='julian'") try: arg = arg - j0 - except: + except TypeError: raise ValueError("incompatible 'arg' type for given " "'origin'='julian'") @@ -731,21 +731,21 @@ def calc_with_mask(carg, mask): # try intlike / strings that are ints try: return calc(arg.astype(np.int64)) - except: + except ValueError: pass # a float with actual np.nan try: carg = arg.astype(np.float64) return calc_with_mask(carg, notna(carg)) - except: + except ValueError: pass # string with NaN-like try: mask = ~algorithms.isin(arg, list(tslib.nat_strings)) return calc_with_mask(arg, mask) - except: + except ValueError: pass return None diff --git a/pandas/core/window.py b/pandas/core/window.py index 5cdf62d5a5537..4281d66a640e3 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -2504,7 +2504,7 @@ def _offset(window, center): offset = (window - 1) / 2. if center else 0 try: return int(offset) - except: + except TypeError: return offset.astype(int) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index c57b1c3e211f6..fc9e415ed38f7 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -258,7 +258,7 @@ def _tables(): try: _table_file_open_policy_is_strict = ( tables.file._FILE_OPEN_POLICY == 'strict') - except: + except AttributeError: pass return _table_mod @@ -395,11 +395,11 @@ def read_hdf(path_or_buf, key=None, mode='r', **kwargs): 'contains multiple datasets.') key = candidate_only_group._v_pathname return store.select(key, auto_close=auto_close, **kwargs) - except: + except (ValueError, TypeError): # if there is an error, close the store try: store.close() - except: + except AttributeError: pass raise @@ -517,7 +517,7 @@ def __getattr__(self, name): """ allow attribute access to get stores """ try: return self.get(name) - except: + except (KeyError, ClosedFileError): pass raise AttributeError("'%s' object has no attribute '%s'" % (type(self).__name__, name)) @@ -675,7 +675,7 @@ def flush(self, fsync=False): if fsync: try: os.fsync(self._handle.fileno()) - except: + except OSError: pass def get(self, key): @@ -1161,7 +1161,7 @@ def get_node(self, key): if not key.startswith('/'): key = '/' + key return self._handle.get_node(self.root, key) - except: + except _table_mod.exceptions.NoSuchNodeError: return None def get_storer(self, key): @@ -1270,7 +1270,7 @@ def _validate_format(self, format, kwargs): # validate try: kwargs['format'] = _FORMAT_MAP[format.lower()] - except: + except KeyError: raise TypeError("invalid HDFStore format specified [{0}]" .format(format)) @@ -1307,7 +1307,7 @@ def error(t): try: pt = _TYPE_MAP[type(value)] - except: + except KeyError: error('_TYPE_MAP') # we are actually a table @@ -1318,7 +1318,7 @@ def error(t): if u('table') not in pt: try: return globals()[_STORER_MAP[pt]](self, group, **kwargs) - except: + except KeyError: error('_STORER_MAP') # existing node (and must be a table) @@ -1354,12 +1354,12 @@ def error(t): fields = group.table._v_attrs.fields if len(fields) == 1 and fields[0] == u('value'): tt = u('legacy_frame') - except: + except IndexError: pass try: return globals()[_TABLE_MAP[tt]](self, group, **kwargs) - except: + except KeyError: error('_TABLE_MAP') def _write_to_group(self, key, value, format, index=True, append=False, @@ -1624,7 +1624,7 @@ def is_indexed(self): """ return whether I am an indexed column """ try: return getattr(self.table.cols, self.cname).is_indexed - except: + except AttributeError: False def copy(self): @@ -1654,9 +1654,10 @@ def convert(self, values, nan_rep, encoding, errors): kwargs['freq'] = _ensure_decoded(self.freq) if self.index_name is not None: kwargs['name'] = _ensure_decoded(self.index_name) + # making an Index instance could throw a number of different errors try: self.values = Index(values, **kwargs) - except: + except Exception: # noqa: E722 # if the output freq is different that what we recorded, # it should be None (see also 'doc example part 2') @@ -1869,7 +1870,7 @@ def create_for_block( m = re.search(r"values_block_(\d+)", name) if m: name = "values_%s" % m.groups()[0] - except: + except IndexError: pass return cls(name=name, cname=cname, **kwargs) @@ -2232,7 +2233,7 @@ def convert(self, values, nan_rep, encoding, errors): try: self.data = self.data.astype(dtype, copy=False) - except: + except TypeError: self.data = self.data.astype('O', copy=False) # convert nans / decode @@ -2325,7 +2326,7 @@ def set_version(self): self.version = tuple(int(x) for x in version.split('.')) if len(self.version) == 2: self.version = self.version + (0,) - except: + except AttributeError: self.version = (0, 0, 0) @property @@ -2769,7 +2770,7 @@ def write_array(self, key, value, items=None): else: try: items = list(items) - except: + except TypeError: pass ws = performance_doc % (inferred_type, key, items) warnings.warn(ws, PerformanceWarning, stacklevel=7) @@ -2843,7 +2844,7 @@ class SeriesFixed(GenericFixed): def shape(self): try: return len(getattr(self.group, 'values')), - except: + except (TypeError, AttributeError): return None def read(self, **kwargs): @@ -2961,7 +2962,7 @@ def shape(self): shape = shape[::-1] return shape - except: + except AttributeError: return None def read(self, start=None, stop=None, **kwargs): @@ -3495,7 +3496,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, if axes is None: try: axes = _AXES_MAP[type(obj)] - except: + except KeyError: raise TypeError("cannot properly create the storer for: " "[group->%s,value->%s]" % (self.group._v_name, type(obj))) @@ -3614,7 +3615,7 @@ def get_blk_items(mgr, blocks): b, b_items = by_items.pop(items) new_blocks.append(b) new_blk_items.append(b_items) - except: + except (IndexError, KeyError): raise ValueError( "cannot match existing table structure for [%s] on " "appending data" % ','.join(pprint_thing(item) for @@ -3642,7 +3643,7 @@ def get_blk_items(mgr, blocks): if existing_table is not None and validate: try: existing_col = existing_table.values_axes[i] - except: + except (IndexError, KeyError): raise ValueError("Incompatible appended table [%s] with " "existing table [%s]" % (blocks, existing_table.values_axes)) @@ -4460,7 +4461,7 @@ def _get_info(info, name): """ get/create the info for this name """ try: idx = info[name] - except: + except KeyError: idx = info[name] = dict() return idx @@ -4782,7 +4783,7 @@ def __init__(self, table, where=None, start=None, stop=None, **kwargs): ) self.coordinates = where - except: + except ValueError: pass if self.coordinates is None: diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 5050922173564..a09efe6d4761c 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -505,33 +505,25 @@ def test_tdi_add_dt64_array(self, box_df_broadcast_failure): # ------------------------------------------------------------------ # Operations with int-like others - def test_td64arr_add_int_series_invalid(self, box_df_broadcast_failure, - tdser): - box = box_df_broadcast_failure + def test_td64arr_add_int_series_invalid(self, box, tdser): tdser = tm.box_expected(tdser, box) err = TypeError if box is not pd.Index else NullFrequencyError with pytest.raises(err): tdser + Series([2, 3, 4]) - def test_td64arr_radd_int_series_invalid(self, box_df_broadcast_failure, - tdser): - box = box_df_broadcast_failure + def test_td64arr_radd_int_series_invalid(self, box, tdser): tdser = tm.box_expected(tdser, box) err = TypeError if box is not pd.Index else NullFrequencyError with pytest.raises(err): Series([2, 3, 4]) + tdser - def test_td64arr_sub_int_series_invalid(self, box_df_broadcast_failure, - tdser): - box = box_df_broadcast_failure + def test_td64arr_sub_int_series_invalid(self, box, tdser): tdser = tm.box_expected(tdser, box) err = TypeError if box is not pd.Index else NullFrequencyError with pytest.raises(err): tdser - Series([2, 3, 4]) - def test_td64arr_rsub_int_series_invalid(self, box_df_broadcast_failure, - tdser): - box = box_df_broadcast_failure + def test_td64arr_rsub_int_series_invalid(self, box, tdser): tdser = tm.box_expected(tdser, box) err = TypeError if box is not pd.Index else NullFrequencyError with pytest.raises(err): @@ -605,9 +597,10 @@ def test_td64arr_add_sub_numeric_scalar_invalid(self, box, scalar, tdser): Series([1, 2, 3]) # TODO: Add DataFrame in here? ], ids=lambda x: type(x).__name__) - def test_td64arr_add_sub_numeric_arr_invalid( - self, box_df_broadcast_failure, vec, dtype, tdser): - box = box_df_broadcast_failure + def test_td64arr_add_sub_numeric_arr_invalid(self, box, vec, dtype, tdser): + if box is pd.DataFrame and not isinstance(vec, Series): + raise pytest.xfail(reason="Tries to broadcast incorrectly") + tdser = tm.box_expected(tdser, box) err = TypeError if box is pd.Index and not dtype.startswith('float'): @@ -930,9 +923,9 @@ def test_td64arr_sub_offset_array(self, box_df_broadcast_failure): @pytest.mark.parametrize('names', [(None, None, None), ('foo', 'bar', None), ('foo', 'foo', 'foo')]) - def test_td64arr_with_offset_series(self, names, box_df_broadcast_failure): + def test_td64arr_with_offset_series(self, names, box_df_fail): # GH#18849 - box = box_df_broadcast_failure + box = box_df_fail box2 = Series if box is pd.Index else box tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00'], @@ -963,10 +956,11 @@ def test_td64arr_with_offset_series(self, names, box_df_broadcast_failure): tm.assert_equal(res3, expected_sub) @pytest.mark.parametrize('obox', [np.array, pd.Index, pd.Series]) - def test_td64arr_addsub_anchored_offset_arraylike( - self, obox, box_df_broadcast_failure): + def test_td64arr_addsub_anchored_offset_arraylike(self, obox, box): # GH#18824 - box = box_df_broadcast_failure + if box is pd.DataFrame and obox is not pd.Series: + raise pytest.xfail(reason="Attempts to broadcast incorrectly") + tdi = TimedeltaIndex(['1 days 00:00:00', '3 days 04:00:00']) tdi = tm.box_expected(tdi, box) diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 05351c56862b8..36696bc292162 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -58,7 +58,8 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators): s = pd.Series(data) self.check_opname(s, op_name, s.iloc[0], exc=TypeError) - @pytest.mark.xfail(run=False, reason="_reduce needs implementation") + @pytest.mark.xfail(run=False, reason="_reduce needs implementation", + strict=True) def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): # frame & scalar op_name = all_arithmetic_operators @@ -77,6 +78,16 @@ def test_divmod(self, data): self._check_divmod_op(s, divmod, 1, exc=TypeError) self._check_divmod_op(1, ops.rdivmod, s, exc=TypeError) + def test_divmod_series_array(self, data): + s = pd.Series(data) + self._check_divmod_op(s, divmod, data) + + def test_add_series_with_extension_array(self, data): + s = pd.Series(data) + result = s + data + expected = pd.Series(data + data) + self.assert_series_equal(result, expected) + def test_error(self, data, all_arithmetic_operators): # invalid ops op_name = all_arithmetic_operators diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 387942234e6fd..f324cc2e0f345 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -138,5 +138,9 @@ def _concat_same_type(cls, to_concat): return cls(np.concatenate([x._data for x in to_concat])) +def to_decimal(values, context=None): + return DecimalArray([decimal.Decimal(x) for x in values], context=context) + + DecimalArray._add_arithmetic_ops() DecimalArray._add_comparison_ops() diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 93b8ea786ef5b..928ee83df8dc9 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -8,7 +8,7 @@ from pandas.tests.extension import base -from .array import DecimalDtype, DecimalArray +from .array import DecimalDtype, DecimalArray, to_decimal def make_data(): @@ -244,9 +244,28 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators): context.traps[decimal.DivisionByZero] = divbyzerotrap context.traps[decimal.InvalidOperation] = invalidoptrap - @pytest.mark.skip(reason="divmod not appropriate for decimal") - def test_divmod(self, data): - pass + @pytest.mark.parametrize("reverse, expected_div, expected_mod", [ + (False, [0, 1, 1, 2], [1, 0, 1, 0]), + (True, [2, 1, 0, 0], [0, 0, 2, 2]), + ]) + def test_divmod_array(self, reverse, expected_div, expected_mod): + # https://github.com/pandas-dev/pandas/issues/22930 + arr = to_decimal([1, 2, 3, 4]) + if reverse: + div, mod = divmod(2, arr) + else: + div, mod = divmod(arr, 2) + expected_div = to_decimal(expected_div) + expected_mod = to_decimal(expected_mod) + + tm.assert_extension_array_equal(div, expected_div) + tm.assert_extension_array_equal(mod, expected_mod) + + def _check_divmod_op(self, s, op, other, exc=NotImplementedError): + # We implement divmod + super(TestArithmeticOps, self)._check_divmod_op( + s, op, other, exc=None + ) def test_error(self): pass diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 0126d771caf7f..e6dcbe30a949c 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -261,6 +261,16 @@ class TestArithmeticOps(BaseJSON, base.BaseArithmeticOpsTests): def test_error(self, data, all_arithmetic_operators): pass + def test_add_series_with_extension_array(self, data): + ser = pd.Series(data) + with tm.assert_raises_regex(TypeError, "unsupported"): + ser + data + + def _check_divmod_op(self, s, op, other, exc=NotImplementedError): + return super(TestArithmeticOps, self)._check_divmod_op( + s, op, other, exc=TypeError + ) + class TestComparisonOps(BaseJSON, base.BaseComparisonOpsTests): pass diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index ff66f53eab6f6..924f01077abd1 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -22,6 +22,7 @@ from pandas.api.types import CategoricalDtype from pandas import Categorical from pandas.tests.extension import base +import pandas.util.testing as tm def make_data(): @@ -202,6 +203,16 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators): else: pytest.skip('rmod never called when string is first argument') + def test_add_series_with_extension_array(self, data): + ser = pd.Series(data) + with tm.assert_raises_regex(TypeError, "cannot perform"): + ser + data + + def _check_divmod_op(self, s, op, other, exc=NotImplementedError): + return super(TestArithmeticOps, self)._check_divmod_op( + s, op, other, exc=TypeError + ) + class TestComparisonOps(base.BaseComparisonOpsTests): diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index 7aa33006dadda..fa5c89d85e548 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -143,6 +143,12 @@ def test_error(self, data, all_arithmetic_operators): # other specific errors tested in the integer array specific tests pass + @pytest.mark.xfail(reason="EA is listified. GH-22922", strict=True) + def test_add_series_with_extension_array(self, data): + super(TestArithmeticOps, self).test_add_series_with_extension_array( + data + ) + class TestComparisonOps(base.BaseComparisonOpsTests): diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 2b08897864db0..2eb11c3a2e2f7 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -266,3 +266,18 @@ def test_df_bool_mul_int(self): result = 1 * df kinds = result.dtypes.apply(lambda x: x.kind) assert (kinds == 'i').all() + + def test_td64_df_add_int_frame(self): + # GH#22696 Check that we don't dispatch to numpy implementation, + # which treats int64 as m8[ns] + tdi = pd.timedelta_range('1', periods=3) + df = tdi.to_frame() + other = pd.DataFrame([1, 2, 3], index=tdi) # indexed like `df` + with pytest.raises(TypeError): + df + other + with pytest.raises(TypeError): + other + df + with pytest.raises(TypeError): + df - other + with pytest.raises(TypeError): + other - df diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 0bc74c6890ee9..6186ce4d45ef2 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -721,7 +721,7 @@ def test_align_int_fill_bug(self): result = df1 - df1.mean() expected = df2 - df2.mean() - assert_frame_equal(result, expected) + assert_frame_equal(result.astype('f8'), expected) def test_align_multiindex(self): # GH 10665 diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 1ee48d0120c7d..1cb036dccf23c 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1566,8 +1566,9 @@ def test_crosstab_normalize(self): full_normal) tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='index'), row_normal) - tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='columns'), - col_normal) + tm.assert_frame_equal( + pd.crosstab(df.a, df.b, normalize='columns').astype('f8'), + col_normal) tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize=1), pd.crosstab(df.a, df.b, normalize='columns')) tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize=0), @@ -1600,7 +1601,8 @@ def test_crosstab_normalize(self): tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='index', margins=True), row_normal_margins) tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize='columns', - margins=True), col_normal_margins) + margins=True).astype('f8'), + col_normal_margins) tm.assert_frame_equal(pd.crosstab(df.a, df.b, normalize=True, margins=True), all_normal_margins) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 601e251d45b4b..f3ab197771d53 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -758,9 +758,6 @@ def test_operators_bitwise(self): def test_scalar_na_cmp_corners(self): s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10]) - def tester(a, b): - return a & b - with pytest.raises(TypeError): s & datetime(2005, 1, 1) @@ -780,8 +777,11 @@ def tester(a, b): # this is an alignment issue; these are equivalent # https://github.com/pandas-dev/pandas/issues/5284 - pytest.raises(ValueError, lambda: d.__and__(s, axis='columns')) - pytest.raises(ValueError, tester, s, d) + with pytest.raises(TypeError): + d.__and__(s, axis='columns') + + with pytest.raises(TypeError): + s & d # this is wrong as its not a boolean result # result = d.__and__(s,axis='index') diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index b6c2c65fb6dce..b06463d3c07aa 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -1,19 +1,19 @@ # -*- coding: utf-8 -*- from __future__ import division, print_function +import warnings from functools import partial -import pytest -import warnings import numpy as np +import pytest import pandas as pd -from pandas import Series, isna -from pandas.core.dtypes.common import is_integer_dtype import pandas.core.nanops as nanops -import pandas.util.testing as tm import pandas.util._test_decorators as td +import pandas.util.testing as tm +from pandas import Series, isna from pandas.compat.numpy import _np_version_under1p13 +from pandas.core.dtypes.common import is_integer_dtype use_bn = nanops._USE_BOTTLENECK @@ -1041,3 +1041,29 @@ def test_numpy_ops_np_version_under1p13(numpy_op, expected): assert result == expected else: assert result == expected + + +@pytest.mark.parametrize("operation", [ + nanops.nanany, + nanops.nanall, + nanops.nansum, + nanops.nanmean, + nanops.nanmedian, + nanops.nanstd, + nanops.nanvar, + nanops.nansem, + nanops.nanargmax, + nanops.nanargmin, + nanops.nanmax, + nanops.nanmin, + nanops.nanskew, + nanops.nankurt, + nanops.nanprod, +]) +def test_nanops_independent_of_mask_param(operation): + # GH22764 + s = pd.Series([1, 2, np.nan, 3, np.nan, 4]) + mask = s.isna() + median_expected = operation(s) + median_result = operation(s, mask=mask) + assert median_expected == median_result diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index ccd2461d1512e..5cd31e08e0a9b 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -2114,6 +2114,28 @@ def test_downsample_across_dst(self): freq='H')) tm.assert_series_equal(result, expected) + def test_downsample_across_dst_weekly(self): + # GH 9119, GH 21459 + df = DataFrame(index=DatetimeIndex([ + '2017-03-25', '2017-03-26', '2017-03-27', + '2017-03-28', '2017-03-29' + ], tz='Europe/Amsterdam'), + data=[11, 12, 13, 14, 15]) + result = df.resample('1W').sum() + expected = DataFrame([23, 42], index=pd.DatetimeIndex([ + '2017-03-26', '2017-04-02' + ], tz='Europe/Amsterdam')) + tm.assert_frame_equal(result, expected) + + idx = pd.date_range("2013-04-01", "2013-05-01", tz='Europe/London', + freq='H') + s = Series(index=idx) + result = s.resample('W').mean() + expected = Series(index=pd.date_range( + '2013-04-07', freq='W', periods=5, tz='Europe/London' + )) + tm.assert_series_equal(result, expected) + def test_resample_with_nat(self): # GH 13020 index = DatetimeIndex([pd.NaT, From c2d57bdbb2ed7842d9bb626087f30c70b31d6bcc Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 3 Oct 2018 14:36:26 -0500 Subject: [PATCH 035/132] Squashed commit of the following: commit 7714e79a512f5d13f52c286458fcc9bac8b40bd0 Author: Tom Augspurger Date: Wed Oct 3 10:13:06 2018 -0500 Always return ndarray commit 1921c6f5d00e0a9a5da2ce8e628b2c6328ee215e Merge: 01f736696 fea27f073 Author: Tom Augspurger Date: Wed Oct 3 09:50:30 2018 -0500 Merge remote-tracking branch 'upstream/master' into combine-exception commit fea27f0736a4b8f6626da60a6abc2f6e26b8a365 Author: Tom Augspurger Date: Wed Oct 3 08:49:44 2018 -0500 CI: pin moto to 1.3.4 (#22959) commit 15d32bbad832908c9d06a9019e613bb6b35d6878 Author: jbrockmendel Date: Wed Oct 3 04:32:35 2018 -0700 [CLN] Dispatch (some) Frame ops to Series, avoiding _data.eval (#22019) * avoid casting to object dtype in mixed-type frames * Dispatch to Series ops in _combine_match_columns * comment * docstring * flake8 fixup * dont bother with try_cast_result * revert non-central change * simplify * revert try_cast_results * revert non-central changes * Fixup typo syntaxerror * simplify assertion * use dispatch_to_series in combine_match_columns * Pass unwrapped op where appropriate * catch correct error * whatsnew note * comment * whatsnew section * remove unnecessary tester * doc fixup commit 3e3256bb6038111812b4b28f6b3b049214d83d2d Author: alimcmaster1 Date: Wed Oct 3 12:23:22 2018 +0100 Allow passing a mask to NanOps (#22865) commit e756e991d57c2656906d0a3e8fc76950844e3f3e Author: jbrockmendel Date: Wed Oct 3 02:19:27 2018 -0700 CLN: Use is_period_dtype instead of ABCPeriodIndex checks (#22958) commit 03181f0569c8b1f93f620a2986b4f174f9b6179b Author: Wenhuan Date: Wed Oct 3 15:28:07 2018 +0800 BUG: fix Series(extension array) + extension array values addition (#22479) commit 04ea51ddf7623b897aaaf2e504952d3c11e88205 Author: Joris Van den Bossche Date: Wed Oct 3 09:24:36 2018 +0200 CLN: small clean-up of IntervalIndex (#22956) commit b0f9a104f323d687a56ea878ff78ff005f37b42d Author: Tony Tao <34781056+tonytao2012@users.noreply.github.com> Date: Tue Oct 2 19:01:08 2018 -0500 DOC GH22893 Fix docstring of groupby in pandas/core/generic.py (#22920) commit 08ecba8dab4a35ad3cad89fe02c7240674938b97 Author: jbrockmendel Date: Tue Oct 2 14:22:53 2018 -0700 BUG: fix DataFrame+DataFrame op with timedelta64 dtype (#22696) commit c44bad24996f9e747f2119fa0c6a90d893f6e2aa Author: Pamela Wu Date: Tue Oct 2 17:16:25 2018 -0400 CLN GH22873 Replace base excepts in pandas/core (#22901) commit 8e749a33b5f814bded42044a4182449d5d6c8213 Author: Pamela Wu Date: Tue Oct 2 17:14:48 2018 -0400 CLN GH22874 replace bare excepts in pandas/io/pytables.py (#22919) commit 1102a33d9776ed316cade079e22be6daa76c9e42 Author: Joris Van den Bossche Date: Tue Oct 2 22:31:36 2018 +0200 DOC/CLN: clean-up shared_docs in generic.py (#20074) commit 01f73669666f577fe141f5066646c4253408d3b8 Merge: 5372134ea 9caf04836 Author: Tom Augspurger Date: Tue Oct 2 13:50:28 2018 -0500 Merge remote-tracking branch 'upstream/master' into combine-exception commit 9caf04836ad34ca17da7b86ba7120cca58ce142a Author: Tom Augspurger Date: Tue Oct 2 13:25:22 2018 -0500 CI: change windows vm image (#22948) commit 5372134ea2d22c90fff4b5830464a8f2c9932407 Author: Tom Augspurger Date: Tue Oct 2 11:35:07 2018 -0500 fixed move commit ce1a3c6b112a6228847f9c622ef8246c671f7170 Author: Tom Augspurger Date: Tue Oct 2 11:32:11 2018 -0500 fixed move commit b9c7e4b2c0577fed6601e4f6e27974b943f280da Author: Tom Augspurger Date: Tue Oct 2 11:28:57 2018 -0500 remove old note commit a4a2933117c493394a5656c887a22ea02e94093f Author: Tom Augspurger Date: Tue Oct 2 11:24:48 2018 -0500 handle test commit be63feb818b1e39d0f711d55c1eac809a03ee061 Author: Tom Augspurger Date: Tue Oct 2 11:19:17 2018 -0500 move test commit 0eef0cfcdb17caed9cdddce03cb5d07924225375 Author: Tom Augspurger Date: Tue Oct 2 11:18:18 2018 -0500 move back commit 2183f7bf71a90661877c87fe04dcd52efa481184 Author: Tom Augspurger Date: Tue Oct 2 11:17:28 2018 -0500 api commit 85fc5d83a4754fb46e900d51491f92024a3ff4b8 Merge: 9059c0d23 1d9f76c50 Author: Tom Augspurger Date: Tue Oct 2 11:15:52 2018 -0500 Merge remote-tracking branch 'upstream/master' into combine-exception commit 1d9f76c5055d1ef31ce76134e88b5568a119f498 Author: Joris Van den Bossche Date: Tue Oct 2 17:11:11 2018 +0200 CLN: remove Index._to_embed (#22879) * CLN: remove Index._to_embed * pep8 commit 6247da0db4835ff723126640145b4fad3ce17343 Author: Tom Augspurger Date: Tue Oct 2 08:50:41 2018 -0500 Provide default implementation for `data_repated` (#22935) commit 9059c0d23357a717f5f7ba8f5e165000b70efcc2 Author: Tom Augspurger Date: Tue Oct 2 06:33:15 2018 -0500 Note commit 0c53f080b419151286b6991acff540150f13fccc Author: Tom Augspurger Date: Tue Oct 2 06:30:54 2018 -0500 Imports commit ce94bf9b9c6799bd4220976022895edd4e60abd5 Author: Tom Augspurger Date: Tue Oct 2 06:28:16 2018 -0500 Moves commit fdd43c4de98992a6f97a835fdbb525f829ef1d69 Author: Tom Augspurger Date: Mon Oct 1 21:26:09 2018 -0500 Closes https://github.com/pandas-dev/pandas/issues/22850 commit 5ce06b5bdb8c44043c6463bf8ce3da758800a189 Author: Matthew Roeschke Date: Mon Oct 1 14:22:20 2018 -0700 BUG: to_datetime preserves name of Index argument in the result (#22918) * BUG: to_datetime preserves name of Index argument in the result * correct test --- pandas/core/arrays/base.py | 21 +++++--- pandas/core/series.py | 6 ++- pandas/tests/extension/base/ops.py | 4 -- pandas/tests/extension/decimal/__init__.py | 4 ++ pandas/tests/extension/decimal/array.py | 5 ++ .../tests/extension/decimal/test_decimal.py | 52 ++++++++++++++++--- pandas/tests/extension/json/__init__.py | 3 ++ pandas/tests/extension/json/array.py | 9 ++++ pandas/tests/extension/json/test_json.py | 16 +----- pandas/tests/extension/test_categorical.py | 5 -- 10 files changed, 87 insertions(+), 38 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 45743a3ece0e5..b16f550b3641d 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -739,14 +739,22 @@ def _create_method(cls, op, coerce_to_dtype=True): ---------- op : function An operator that takes arguments op(a, b) - coerce_to_dtype : bool + coerce_to_dtype : bool, default True boolean indicating whether to attempt to convert - the result to the underlying ExtensionArray dtype - (default True) + the result to the underlying ExtensionArray dtype. + If it's not possible to create a new ExtensionArray with the + values, an ndarray is returned instead. Returns ------- - A method that can be bound to a method of a class + Callable[[Any, Any], Union[ndarray, ExtensionArray]] + A method that can be bound to a class. When used, the method + receives the two arguments, one of which is the instance of + this class, and should return an ExtensionArray or an ndarray. + + Returning an ndarray may be necessary when the result of the + `op` cannot be stored in the ExtensionArray. The dtype of the + ndarray uses NumPy's normal inference rules. Example ------- @@ -757,7 +765,6 @@ def _create_method(cls, op, coerce_to_dtype=True): in the class definition of MyExtensionArray to create the operator for addition, that will be based on the operator implementation of the underlying elements of the ExtensionArray - """ def _binop(self, other): @@ -780,12 +787,12 @@ def convert_values(param): a, b = zip(*res) res = (self._from_sequence(a), self._from_sequence(b)) - except TypeError: + except Exception: pass else: try: res = self._from_sequence(res) - except TypeError: + except Exception: pass return res diff --git a/pandas/core/series.py b/pandas/core/series.py index 0fbe5e5ed7b8f..2e22e4e6e1bfc 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2323,10 +2323,14 @@ def combine(self, other, func, fill_value=None): pass elif is_extension_array_dtype(self.values): # The function can return something of any type, so check - # if the type is compatible with the calling EA + # if the type is compatible with the calling EA. try: new_values = self._values._from_sequence(new_values) except Exception: + # https://github.com/pandas-dev/pandas/issues/22850 + # pandas has no control over what 3rd-party ExtensionArrays + # do in _values_from_sequence. We still want ops to work + # though, so we catch any regular Exception. pass return self._constructor(new_values, index=new_index, name=new_name) diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 36696bc292162..78b8ea0566dd0 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -78,10 +78,6 @@ def test_divmod(self, data): self._check_divmod_op(s, divmod, 1, exc=TypeError) self._check_divmod_op(1, ops.rdivmod, s, exc=TypeError) - def test_divmod_series_array(self, data): - s = pd.Series(data) - self._check_divmod_op(s, divmod, data) - def test_add_series_with_extension_array(self, data): s = pd.Series(data) result = s + data diff --git a/pandas/tests/extension/decimal/__init__.py b/pandas/tests/extension/decimal/__init__.py index e69de29bb2d1d..c37aad0af8407 100644 --- a/pandas/tests/extension/decimal/__init__.py +++ b/pandas/tests/extension/decimal/__init__.py @@ -0,0 +1,4 @@ +from .array import DecimalArray, DecimalDtype, to_decimal, make_data + + +__all__ = ['DecimalArray', 'DecimalDtype', 'to_decimal', 'make_data'] diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index f324cc2e0f345..79e1a692f744a 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -1,5 +1,6 @@ import decimal import numbers +import random import sys import numpy as np @@ -142,5 +143,9 @@ def to_decimal(values, context=None): return DecimalArray([decimal.Decimal(x) for x in values], context=context) +def make_data(): + return [decimal.Decimal(random.random()) for _ in range(100)] + + DecimalArray._add_arithmetic_ops() DecimalArray._add_comparison_ops() diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 928ee83df8dc9..d07ac61b00ae9 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -1,6 +1,6 @@ +import operator import decimal -import random import numpy as np import pandas as pd import pandas.util.testing as tm @@ -8,11 +8,7 @@ from pandas.tests.extension import base -from .array import DecimalDtype, DecimalArray, to_decimal - - -def make_data(): - return [decimal.Decimal(random.random()) for _ in range(100)] +from .array import DecimalDtype, DecimalArray, make_data @pytest.fixture @@ -294,3 +290,47 @@ def test_compare_array(self, data, all_compare_operators): other = pd.Series(data) * [decimal.Decimal(pow(2.0, i)) for i in alter] self._compare_other(s, data, op_name, other) + + +class DecimalArrayWithoutFromSequence(DecimalArray): + """Helper class for testing error handling in _from_sequence.""" + def _from_sequence(cls, scalars, dtype=None, copy=False): + raise KeyError("For the test") + + +class DecimalArrayWithoutCoercion(DecimalArrayWithoutFromSequence): + @classmethod + def _create_arithmetic_method(cls, op): + return cls._create_method(op, coerce_to_dtype=False) + + +DecimalArrayWithoutCoercion._add_arithmetic_ops() + + +def test_combine_from_sequence_raises(): + # https://github.com/pandas-dev/pandas/issues/22850 + ser = pd.Series(DecimalArrayWithoutFromSequence([ + decimal.Decimal("1.0"), + decimal.Decimal("2.0") + ])) + result = ser.combine(ser, operator.add) + + # note: object dtype + expected = pd.Series([decimal.Decimal("2.0"), + decimal.Decimal("4.0")], dtype="object") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("class_", [DecimalArrayWithoutFromSequence, + DecimalArrayWithoutCoercion]) +def test_scalar_ops_from_sequence_raises(class_): + # op(EA, EA) should return an EA, or an ndarray if it's not possible + # to return an EA with the return values. + arr = class_([ + decimal.Decimal("1.0"), + decimal.Decimal("2.0") + ]) + result = arr + arr + expected = np.array([decimal.Decimal("2.0"), decimal.Decimal("4.0")], + dtype="object") + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/extension/json/__init__.py b/pandas/tests/extension/json/__init__.py index e69de29bb2d1d..f2679d087c841 100644 --- a/pandas/tests/extension/json/__init__.py +++ b/pandas/tests/extension/json/__init__.py @@ -0,0 +1,3 @@ +from .array import JSONArray, JSONDtype, make_data + +__all__ = ['JSONArray', 'JSONDtype', 'make_data'] diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 6ce0d63eb63ec..87876d84bef99 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -13,6 +13,8 @@ import collections import itertools import numbers +import random +import string import sys import numpy as np @@ -179,3 +181,10 @@ def _values_for_argsort(self): # cast them to an (N, P) array, instead of an (N,) array of tuples. frozen = [()] + [tuple(x.items()) for x in self] return np.array(frozen, dtype=object)[1:] + + +def make_data(): + # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer + return [collections.UserDict([ + (random.choice(string.ascii_letters), random.randint(0, 100)) + for _ in range(random.randint(0, 10))]) for _ in range(100)] diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index e6dcbe30a949c..bcbc3e9109182 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -1,7 +1,5 @@ import operator import collections -import random -import string import pytest @@ -10,18 +8,11 @@ from pandas.compat import PY2, PY36 from pandas.tests.extension import base -from .array import JSONArray, JSONDtype +from .array import JSONArray, JSONDtype, make_data pytestmark = pytest.mark.skipif(PY2, reason="Py2 doesn't have a UserDict") -def make_data(): - # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer - return [collections.UserDict([ - (random.choice(string.ascii_letters), random.randint(0, 100)) - for _ in range(random.randint(0, 10))]) for _ in range(100)] - - @pytest.fixture def dtype(): return JSONDtype() @@ -266,11 +257,6 @@ def test_add_series_with_extension_array(self, data): with tm.assert_raises_regex(TypeError, "unsupported"): ser + data - def _check_divmod_op(self, s, op, other, exc=NotImplementedError): - return super(TestArithmeticOps, self)._check_divmod_op( - s, op, other, exc=TypeError - ) - class TestComparisonOps(BaseJSON, base.BaseComparisonOpsTests): pass diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 924f01077abd1..c588552572aed 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -208,11 +208,6 @@ def test_add_series_with_extension_array(self, data): with tm.assert_raises_regex(TypeError, "cannot perform"): ser + data - def _check_divmod_op(self, s, op, other, exc=NotImplementedError): - return super(TestArithmeticOps, self)._check_divmod_op( - s, op, other, exc=TypeError - ) - class TestComparisonOps(base.BaseComparisonOpsTests): From 1c4bbe7eb70141fbcd05749b55c4bd1a09150861 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 3 Oct 2018 14:40:29 -0500 Subject: [PATCH 036/132] Squashed commit of the following: commit 11a0d938cdaf7482546691519577b5dd28f69aac Author: Tom Augspurger Date: Wed Oct 3 14:26:34 2018 -0500 typerror commit a0cd5e79eb06ac71cf2f510b1a2122bc2b21fcf0 Author: Tom Augspurger Date: Wed Oct 3 14:25:38 2018 -0500 TypeError for Series commit 2247461ec0b1017db320cb8581337cba0b5c6679 Author: Tom Augspurger Date: Wed Oct 3 13:29:29 2018 -0500 Test op(Series[EA], EA]) commit c9fe5d318d7077f99413532cdaf392ae3ea9cd2c Author: Tom Augspurger Date: Wed Oct 3 13:21:33 2018 -0500 make strict commit 7ef697cffdcd2f8d701de3cdfd2e6897358effbf Author: Tom Augspurger Date: Wed Oct 3 13:14:52 2018 -0500 Use super commit 35d42133acbcb3c25308b1c10e0e2dc3fa1052b6 Merge: 0671e7d67 ee808033b Author: Tom Augspurger Date: Wed Oct 3 13:11:05 2018 -0500 Merge remote-tracking branch 'upstream/master' into ea-divmod commit 0671e7d67df8b0aa258fd864ef5f3169fe0ffc55 Author: Tom Augspurger Date: Tue Oct 2 11:10:42 2018 -0500 Fixup commit 1b4261f41c70379fa868866bc77e7a31c43baa5d Merge: c92a4a899 1d9f76c50 Author: Tom Augspurger Date: Tue Oct 2 10:58:43 2018 -0500 Merge remote-tracking branch 'upstream/master' into ea-divmod commit c92a4a899b8d5e5e6a0479f390a604dc9f624f89 Author: Tom Augspurger Date: Mon Oct 1 16:56:15 2018 -0500 Update old test commit 52538fa03a8c9722ab5c86c88419105b6ebfe5a1 Author: Tom Augspurger Date: Mon Oct 1 16:51:48 2018 -0500 BUG: divmod return type --- pandas/tests/extension/base/ops.py | 4 ++++ pandas/tests/extension/json/test_json.py | 5 +++++ pandas/tests/extension/test_categorical.py | 5 +++++ 3 files changed, 14 insertions(+) diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 78b8ea0566dd0..36696bc292162 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -78,6 +78,10 @@ def test_divmod(self, data): self._check_divmod_op(s, divmod, 1, exc=TypeError) self._check_divmod_op(1, ops.rdivmod, s, exc=TypeError) + def test_divmod_series_array(self, data): + s = pd.Series(data) + self._check_divmod_op(s, divmod, data) + def test_add_series_with_extension_array(self, data): s = pd.Series(data) result = s + data diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index bcbc3e9109182..e503e54db64c5 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -257,6 +257,11 @@ def test_add_series_with_extension_array(self, data): with tm.assert_raises_regex(TypeError, "unsupported"): ser + data + def _check_divmod_op(self, s, op, other, exc=NotImplementedError): + return super(TestArithmeticOps, self)._check_divmod_op( + s, op, other, exc=TypeError + ) + class TestComparisonOps(BaseJSON, base.BaseComparisonOpsTests): pass diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index c588552572aed..924f01077abd1 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -208,6 +208,11 @@ def test_add_series_with_extension_array(self, data): with tm.assert_raises_regex(TypeError, "cannot perform"): ser + data + def _check_divmod_op(self, s, op, other, exc=NotImplementedError): + return super(TestArithmeticOps, self)._check_divmod_op( + s, op, other, exc=TypeError + ) + class TestComparisonOps(base.BaseComparisonOpsTests): From b395c907461e321edba4804bf0771855b18a4a56 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 3 Oct 2018 14:41:04 -0500 Subject: [PATCH 037/132] fixed merge conflict --- pandas/tests/extension/decimal/test_decimal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index d07ac61b00ae9..a33cc6c4ab6cb 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -8,7 +8,7 @@ from pandas.tests.extension import base -from .array import DecimalDtype, DecimalArray, make_data +from .array import DecimalDtype, DecimalArray, make_data, to_decimal @pytest.fixture From d68a5c5ecebbb8b70fea2b9427c8cad5217f863a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 3 Oct 2018 14:42:28 -0500 Subject: [PATCH 038/132] Handle divmod test --- pandas/tests/extension/test_period.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index fbca480fd8dc2..9fe5061fda599 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -90,6 +90,11 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators): super().test_arith_series_with_scalar(data, all_arithmetic_operators) + def _check_divmod_op(self, s, op, other, exc=NotImplementedError): + super(TestArithmeticOps, self)._check_divmod_op( + s, op, other, exc=TypeError + ) + def test_error(self): pass From 0c7b704d712e9d26a62de38a50b9ad16bd7ac8fb Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 3 Oct 2018 14:46:29 -0500 Subject: [PATCH 039/132] extension tests passing --- pandas/tests/extension/test_period.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index 9fe5061fda599..2ed7159b02513 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -2,6 +2,7 @@ import numpy as np import pandas as pd +import pandas.util.testing as tm from pandas._libs.tslib import iNaT from pandas.tests.extension import base from pandas.core.dtypes.dtypes import PeriodDtype @@ -95,6 +96,12 @@ def _check_divmod_op(self, s, op, other, exc=NotImplementedError): s, op, other, exc=TypeError ) + def test_add_series_with_extension_array(self, data): + # we don't implement + for Period + s = pd.Series(data) + with tm.assert_raises_regex(TypeError, "cannot add period\[D\]"): + s + data + def test_error(self): pass @@ -105,7 +112,7 @@ class TestCasting(BasePeriodTests, base.BaseCastingTests): class TestComparisonOps(BasePeriodTests, base.BaseComparisonOpsTests): - def _compare_other(self): + def _compare_other(self, s, data, op_name, other): # the base test is not appropriate for us. We raise on comparison # with (some) integers, depending on the value. pass From d26d3d23af984bf1154c152e66dd294a55302a59 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 4 Oct 2018 11:29:22 -0500 Subject: [PATCH 040/132] Squashed commit of the following: commit c9d6e89a1f401e4f47b384b72030873cc4cc2f2b Author: Tom Augspurger Date: Thu Oct 4 08:34:22 2018 -0500 xpass -> skip commit 95d5cbfe4eaf53ed60e84a938723062a14d2d625 Author: Tom Augspurger Date: Thu Oct 4 08:22:17 2018 -0500 typo, import commit 4e9b7f0a6ceec0275e22f5f1edac1daeb41f5033 Author: Tom Augspurger Date: Thu Oct 4 08:18:40 2018 -0500 doc update commit cc2bfc8b991f4d8cf46a993bf4205cc80656384e Merge: 11a0d938c fe67b94e7 Author: Tom Augspurger Date: Thu Oct 4 08:15:46 2018 -0500 Merge remote-tracking branch 'upstream/master' into ea-divmod commit fe67b94e7681c1f21fc2be212514ca0d67a6603c Author: Tom Augspurger Date: Thu Oct 4 06:55:09 2018 -0500 Update type for PeriodDtype / DatetimeTZDtype / IntervalDtype (#22938) commit b12e5ba55c3691733dab36373e80d1b16134c8c2 Author: Tom Augspurger Date: Thu Oct 4 06:30:29 2018 -0500 Safer is dtype (#22975) commit c19c8052f384206c3b2cd87f277344d21d0ae2c7 Author: Tom Augspurger Date: Thu Oct 4 06:27:54 2018 -0500 Catch Exception in combine (#22936) commit d553ab3e5650d105de8e02ae6fd57d03af57b214 Author: Anjali2019 Date: Thu Oct 4 13:24:06 2018 +0200 TST: Fixturize series/test_combine_concat.py (#22964) commit 4c78b9738e01ae147106301cca76c6b36ee68d06 Author: Anjali2019 Date: Thu Oct 4 13:23:39 2018 +0200 TST: Fixturize series/test_constructors.py (#22965) commit 45d3bb761dd44edd0853b06fd81f05af915fd695 Author: Anjali2019 Date: Thu Oct 4 13:23:20 2018 +0200 TST: Fixturize series/test_datetime_values.py (#22966) commit f1a22ff56f895ed340ed7db6dc46841b81d331a1 Author: Anjali2019 Date: Thu Oct 4 13:22:21 2018 +0200 TST: Fixturize series/test_dtypes.py (#22967) commit abf68fd1d5694403e506416c68f6abec6d780c39 Author: Anjali2019 Date: Thu Oct 4 13:21:45 2018 +0200 TST: Fixturize series/test_io.py (#22972) commit e6b0c2915f6433d7c29af908f91a6d511177eec1 Author: Anjali2019 Date: Thu Oct 4 13:20:46 2018 +0200 TST: Fixturize series/test_missing.py (#22973) commit 9b405b829bf5e3fd142cccbcca46df4cc3df4ccb Author: Joris Van den Bossche Date: Thu Oct 4 13:16:28 2018 +0200 CLN: values is required argument in _shallow_copy_with_infer (#22983) commit c282e310809921a0dadd4446f23c9273c15da443 Author: h-vetinari <33685575+h-vetinari@users.noreply.github.com> Date: Thu Oct 4 03:34:35 2018 +0200 Fix ASV import error (#22978) commit 11a0d938cdaf7482546691519577b5dd28f69aac Author: Tom Augspurger Date: Wed Oct 3 14:26:34 2018 -0500 typerror commit a0cd5e79eb06ac71cf2f510b1a2122bc2b21fcf0 Author: Tom Augspurger Date: Wed Oct 3 14:25:38 2018 -0500 TypeError for Series commit 2247461ec0b1017db320cb8581337cba0b5c6679 Author: Tom Augspurger Date: Wed Oct 3 13:29:29 2018 -0500 Test op(Series[EA], EA]) commit c9fe5d318d7077f99413532cdaf392ae3ea9cd2c Author: Tom Augspurger Date: Wed Oct 3 13:21:33 2018 -0500 make strict commit 7ef697cffdcd2f8d701de3cdfd2e6897358effbf Author: Tom Augspurger Date: Wed Oct 3 13:14:52 2018 -0500 Use super commit 35d42133acbcb3c25308b1c10e0e2dc3fa1052b6 Merge: 0671e7d67 ee808033b Author: Tom Augspurger Date: Wed Oct 3 13:11:05 2018 -0500 Merge remote-tracking branch 'upstream/master' into ea-divmod commit 0671e7d67df8b0aa258fd864ef5f3169fe0ffc55 Author: Tom Augspurger Date: Tue Oct 2 11:10:42 2018 -0500 Fixup commit 1b4261f41c70379fa868866bc77e7a31c43baa5d Merge: c92a4a899 1d9f76c50 Author: Tom Augspurger Date: Tue Oct 2 10:58:43 2018 -0500 Merge remote-tracking branch 'upstream/master' into ea-divmod commit c92a4a899b8d5e5e6a0479f390a604dc9f624f89 Author: Tom Augspurger Date: Mon Oct 1 16:56:15 2018 -0500 Update old test commit 52538fa03a8c9722ab5c86c88419105b6ebfe5a1 Author: Tom Augspurger Date: Mon Oct 1 16:51:48 2018 -0500 BUG: divmod return type --- .travis.yml | 19 ++--- asv_bench/benchmarks/indexing.py | 8 +-- asv_bench/benchmarks/join_merge.py | 7 +- asv_bench/benchmarks/panel_ctor.py | 4 +- asv_bench/benchmarks/panel_methods.py | 3 +- doc/source/extending.rst | 10 +-- pandas/core/arrays/base.py | 24 ++++--- pandas/core/dtypes/base.py | 8 ++- pandas/core/frame.py | 3 +- pandas/core/indexes/base.py | 4 +- pandas/core/indexes/multi.py | 2 +- pandas/core/indexes/period.py | 13 +--- pandas/core/series.py | 2 +- pandas/tests/dtypes/test_dtypes.py | 20 ++++++ .../tests/extension/decimal/test_decimal.py | 2 +- pandas/tests/extension/json/test_json.py | 12 ++-- pandas/tests/extension/test_categorical.py | 4 +- pandas/tests/frame/test_operators.py | 6 ++ pandas/tests/series/test_combine_concat.py | 25 ++++--- pandas/tests/series/test_constructors.py | 36 +++++----- pandas/tests/series/test_datetime_values.py | 4 +- pandas/tests/series/test_dtypes.py | 18 +++-- pandas/tests/series/test_io.py | 70 +++++++++---------- pandas/tests/series/test_missing.py | 46 ++++++------ 24 files changed, 182 insertions(+), 168 deletions(-) diff --git a/.travis.yml b/.travis.yml index 40baee2c03ea0..c9bdb91283d42 100644 --- a/.travis.yml +++ b/.travis.yml @@ -53,11 +53,7 @@ matrix: - dist: trusty env: - JOB="3.6, coverage" ENV_FILE="ci/travis-36.yaml" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true DOCTEST=true - # In allow_failures - - dist: trusty - env: - - JOB="3.6, slow" ENV_FILE="ci/travis-36-slow.yaml" SLOW=true - # In allow_failures + - dist: trusty env: - JOB="3.7, NumPy dev" ENV_FILE="ci/travis-37-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network -W error" PANDAS_TESTING_MODE="deprecate" @@ -65,6 +61,12 @@ matrix: apt: packages: - xsel + + # In allow_failures + - dist: trusty + env: + - JOB="3.6, slow" ENV_FILE="ci/travis-36-slow.yaml" SLOW=true + # In allow_failures - dist: trusty env: @@ -73,13 +75,6 @@ matrix: - dist: trusty env: - JOB="3.6, slow" ENV_FILE="ci/travis-36-slow.yaml" SLOW=true - - dist: trusty - env: - - JOB="3.7, NumPy dev" ENV_FILE="ci/travis-37-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network -W error" PANDAS_TESTING_MODE="deprecate" - addons: - apt: - packages: - - xsel - dist: trusty env: - JOB="3.6, doc" ENV_FILE="ci/travis-36-doc.yaml" DOC=true diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index c5b147b152aa6..2850fa249725c 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -2,10 +2,10 @@ import numpy as np import pandas.util.testing as tm -from pandas import (Series, DataFrame, MultiIndex, Int64Index, Float64Index, - IntervalIndex, CategoricalIndex, - IndexSlice, concat, date_range) -from .pandas_vb_common import setup, Panel # noqa +from pandas import (Series, DataFrame, MultiIndex, Panel, + Int64Index, Float64Index, IntervalIndex, + CategoricalIndex, IndexSlice, concat, date_range) +from .pandas_vb_common import setup # noqa class NumericSeriesIndexing(object): diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py index 7487a0d8489b7..6624c3d0aaf49 100644 --- a/asv_bench/benchmarks/join_merge.py +++ b/asv_bench/benchmarks/join_merge.py @@ -3,14 +3,15 @@ import numpy as np import pandas.util.testing as tm -from pandas import (DataFrame, Series, MultiIndex, date_range, concat, merge, - merge_asof) +from pandas import (DataFrame, Series, Panel, MultiIndex, + date_range, concat, merge, merge_asof) + try: from pandas import merge_ordered except ImportError: from pandas import ordered_merge as merge_ordered -from .pandas_vb_common import Panel, setup # noqa +from .pandas_vb_common import setup # noqa class Append(object): diff --git a/asv_bench/benchmarks/panel_ctor.py b/asv_bench/benchmarks/panel_ctor.py index ce946c76ed199..4614bbd198afa 100644 --- a/asv_bench/benchmarks/panel_ctor.py +++ b/asv_bench/benchmarks/panel_ctor.py @@ -1,9 +1,9 @@ import warnings from datetime import datetime, timedelta -from pandas import DataFrame, DatetimeIndex, date_range +from pandas import DataFrame, Panel, DatetimeIndex, date_range -from .pandas_vb_common import Panel, setup # noqa +from .pandas_vb_common import setup # noqa class DifferentIndexes(object): diff --git a/asv_bench/benchmarks/panel_methods.py b/asv_bench/benchmarks/panel_methods.py index a5b1a92e9cf67..4d19e9a87c507 100644 --- a/asv_bench/benchmarks/panel_methods.py +++ b/asv_bench/benchmarks/panel_methods.py @@ -1,8 +1,9 @@ import warnings import numpy as np +from pandas import Panel -from .pandas_vb_common import Panel, setup # noqa +from .pandas_vb_common import setup # noqa class PanelMethods(object): diff --git a/doc/source/extending.rst b/doc/source/extending.rst index da249cb3592f4..ab940384594bc 100644 --- a/doc/source/extending.rst +++ b/doc/source/extending.rst @@ -167,11 +167,11 @@ your ``MyExtensionArray`` class, as follows: element one-by-one, this might not be as performant as implementing your own version of the associated operators directly on the ``ExtensionArray``. -This implementation will try to reconstruct a new ``ExtensionArray`` with the -result of the element-wise operation. Whether or not that succeeds depends on -whether the operation returns a result that's valid for the ``ExtensionArray``. -If an ``ExtensionArray`` cannot be reconstructed, a list containing the scalars -returned instead. +For arithmetic operations, this implementation will try to reconstruct a new +``ExtensionArray`` with the result of the element-wise operation. Whether +or not that succeeds depends on whether the operation returns a result +that's valid for the ``ExtensionArray``. If an ``ExtensionArray`` cannot +be reconstructed, an ndarray containing the scalars returned instead. .. _extending.extension.testing: diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index b16f550b3641d..23eea5f4c04f2 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -781,20 +781,24 @@ def convert_values(param): # a TypeError should be raised res = [op(a, b) for (a, b) in zip(lvalues, rvalues)] - if coerce_to_dtype: - if op.__name__ in {'divmod', 'rdivmod'}: + def _maybe_convert(arr): + if coerce_to_dtype: + # https://github.com/pandas-dev/pandas/issues/22850 + # We catch all regular exceptions here, and fall back + # to an ndarray. try: - a, b = zip(*res) - res = (self._from_sequence(a), - self._from_sequence(b)) + res = self._from_sequence(arr) except Exception: - pass + res = np.asarray(arr) else: - try: - res = self._from_sequence(res) - except Exception: - pass + res = np.asarray(arr) + return res + if op.__name__ in {'divmod', 'rdivmod'}: + a, b = zip(*res) + res = _maybe_convert(a), _maybe_convert(b) + else: + res = _maybe_convert(res) return res op_name = ops._get_op_name(op, True) diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 5c9ba921226c0..b0fa55e346613 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -2,6 +2,7 @@ import numpy as np from pandas import compat +from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass, ABCDataFrame from pandas.errors import AbstractMethodError @@ -83,7 +84,12 @@ def is_dtype(cls, dtype): """ dtype = getattr(dtype, 'dtype', dtype) - if isinstance(dtype, np.dtype): + if isinstance(dtype, (ABCSeries, ABCIndexClass, + ABCDataFrame, np.dtype)): + # https://github.com/pandas-dev/pandas/issues/22960 + # avoid passing data to `construct_from_string`. This could + # cause a FutureWarning from numpy about failing elementwise + # comparison from, e.g., comparing DataFrame == 'category'. return False elif dtype is None: return False diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ff7590f6d5358..f4b7ccb0fdf5b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4908,7 +4908,8 @@ def _combine_match_index(self, other, func, level=None): return ops.dispatch_to_series(left, right, func) else: # fastpath --> operate directly on values - new_data = func(left.values.T, right.values).T + with np.errstate(all="ignore"): + new_data = func(left.values.T, right.values).T return self._constructor(new_data, index=left.index, columns=self.columns, copy=False) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 19f9209ef39ca..d0dbe76547e75 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -540,7 +540,7 @@ def _shallow_copy(self, values=None, **kwargs): return self._simple_new(values, **attributes) - def _shallow_copy_with_infer(self, values=None, **kwargs): + def _shallow_copy_with_infer(self, values, **kwargs): """ create a new Index inferring the class with passed value, don't copy the data, use the same object attributes with passed in attributes @@ -553,8 +553,6 @@ def _shallow_copy_with_infer(self, values=None, **kwargs): values : the values to create the new Index, optional kwargs : updates the default attributes for this Index """ - if values is None: - values = self.values attributes = self._get_attributes_dict() attributes.update(kwargs) attributes['copy'] = False diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 6091df776a01b..3cccb65503378 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -556,7 +556,7 @@ def view(self, cls=None): result._id = self._id return result - def _shallow_copy_with_infer(self, values=None, **kwargs): + def _shallow_copy_with_infer(self, values, **kwargs): # On equal MultiIndexes the difference is empty. # Therefore, an empty MultiIndex is returned GH13490 if len(values) == 0: diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index cc61c1baa7bf6..f3f680f085118 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -252,17 +252,10 @@ def _from_ordinals(cls, values, name=None, freq=None, **kwargs): result = cls._simple_new(data, name=name) return result - def _shallow_copy(self, values=None, **kwargs): + def _shallow_copy(self, values, **kwargs): # TODO: simplify, figure out type of values - if values is None: - # Note: this is the Index implementation. - # slightly different from AttributesMixin implementation which - # defaults to self._ndarray_values - values = self.values - else: - # this differs too - if not isinstance(values, PeriodArray): - values = PeriodArray._from_ordinals(values, freq=self.freq) + if not isinstance(values, PeriodArray): + values = PeriodArray._from_ordinals(values, freq=self.freq) # I don't like overloading shallow_copy with freq changes. # See if it's used anywhere outside of test_resample_empty_dataframe diff --git a/pandas/core/series.py b/pandas/core/series.py index 2e22e4e6e1bfc..a613b22ea9046 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4228,7 +4228,7 @@ def _try_cast(arr, take_fast_path): try: # gh-15832: Check if we are requesting a numeric dype and # that we can convert the data to the requested dtype. - if is_float_dtype(dtype) or is_integer_dtype(dtype): + if is_integer_dtype(dtype): subarr = maybe_cast_to_integer_array(arr, dtype) subarr = maybe_cast_to_datetime(arr, dtype) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index e3d14497a38f9..7e95b076a8a66 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -815,3 +815,23 @@ def test_registry_find(dtype, expected): ('datetime64[ns, US/Eastern]', DatetimeTZDtype('ns', 'US/Eastern'))]) def test_pandas_registry_find(dtype, expected): assert _pandas_registry.find(dtype) == expected + + +@pytest.mark.parametrize("check", [ + is_categorical_dtype, + is_datetime64tz_dtype, + is_period_dtype, + is_datetime64_ns_dtype, + is_datetime64_dtype, + is_interval_dtype, + is_datetime64_any_dtype, + is_string_dtype, + is_bool_dtype, +]) +def test_is_dtype_no_warning(check): + data = pd.DataFrame({"A": [1, 2]}) + with tm.assert_produces_warning(None): + check(data) + + with tm.assert_produces_warning(None): + check(data["A"]) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index a33cc6c4ab6cb..317170e8db1e1 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -102,7 +102,7 @@ class TestInterface(BaseDecimal, base.BaseInterfaceTests): class TestConstructors(BaseDecimal, base.BaseConstructorsTests): - @pytest.mark.xfail(reason="not implemented constructor from dtype") + @pytest.mark.skip(reason="not implemented constructor from dtype") def test_from_dtype(self, data): # construct from our dtype & string dtype pass diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index e503e54db64c5..115afdcc99f2b 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -131,8 +131,7 @@ def test_custom_asserts(self): class TestConstructors(BaseJSON, base.BaseConstructorsTests): - # TODO: Should this be pytest.mark.skip? - @pytest.mark.xfail(reason="not implemented constructor from dtype") + @pytest.mark.skip(reason="not implemented constructor from dtype") def test_from_dtype(self, data): # construct from our dtype & string dtype pass @@ -147,13 +146,11 @@ class TestGetitem(BaseJSON, base.BaseGetitemTests): class TestMissing(BaseJSON, base.BaseMissingTests): - # TODO: Should this be pytest.mark.skip? - @pytest.mark.xfail(reason="Setting a dict as a scalar") + @pytest.mark.skip(reason="Setting a dict as a scalar") def test_fillna_series(self): """We treat dictionaries as a mapping in fillna, not a scalar.""" - # TODO: Should this be pytest.mark.skip? - @pytest.mark.xfail(reason="Setting a dict as a scalar") + @pytest.mark.skip(reason="Setting a dict as a scalar") def test_fillna_frame(self): """We treat dictionaries as a mapping in fillna, not a scalar.""" @@ -204,8 +201,7 @@ def test_combine_add(self, data_repeated): class TestCasting(BaseJSON, base.BaseCastingTests): - # TODO: Should this be pytest.mark.skip? - @pytest.mark.xfail(reason="failing on np.array(self, dtype=str)") + @pytest.mark.skip(reason="failing on np.array(self, dtype=str)") def test_astype_str(self): """This currently fails in NumPy on np.array(self, dtype=str) with diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 924f01077abd1..f118279c4b915 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -140,11 +140,11 @@ def test_take_series(self): def test_reindex_non_na_fill_value(self): pass - @pytest.mark.xfail(reason="Categorical.take buggy") + @pytest.mark.skip(reason="Categorical.take buggy") def test_take_empty(self): pass - @pytest.mark.xfail(reason="test not written correctly for categorical") + @pytest.mark.skip(reason="test not written correctly for categorical") def test_reindex(self): pass diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 97c94e1134cc8..6ed289614b96a 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -1030,3 +1030,9 @@ def test_alignment_non_pandas(self): align(df, val, 'index') with pytest.raises(ValueError): align(df, val, 'columns') + + def test_no_warning(self, all_arithmetic_operators): + df = pd.DataFrame({"A": [0., 0.], "B": [0., None]}) + b = df['B'] + with tm.assert_produces_warning(None): + getattr(df, all_arithmetic_operators)(b, 0) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 35ba4fbf0ce25..8b021ab81ff81 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -15,29 +15,28 @@ from pandas.util.testing import assert_series_equal import pandas.util.testing as tm -from .common import TestData +class TestSeriesCombine(): -class TestSeriesCombine(TestData): - - def test_append(self): - appendedSeries = self.series.append(self.objSeries) + def test_append(self, datetime_series, string_series, object_series): + appendedSeries = string_series.append(object_series) for idx, value in compat.iteritems(appendedSeries): - if idx in self.series.index: - assert value == self.series[idx] - elif idx in self.objSeries.index: - assert value == self.objSeries[idx] + if idx in string_series.index: + assert value == string_series[idx] + elif idx in object_series.index: + assert value == object_series[idx] else: raise AssertionError("orphaned index!") - pytest.raises(ValueError, self.ts.append, self.ts, + pytest.raises(ValueError, datetime_series.append, datetime_series, verify_integrity=True) - def test_append_many(self): - pieces = [self.ts[:5], self.ts[5:10], self.ts[10:]] + def test_append_many(self, datetime_series): + pieces = [datetime_series[:5], datetime_series[5:10], + datetime_series[10:]] result = pieces[0].append(pieces[1:]) - assert_series_equal(result, self.ts) + assert_series_equal(result, datetime_series) def test_append_duplicates(self): # GH 13677 diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 4817f5bdccc29..57a3f54fadbcc 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -26,10 +26,8 @@ from pandas.util.testing import assert_series_equal import pandas.util.testing as tm -from .common import TestData - -class TestSeriesConstructors(TestData): +class TestSeriesConstructors(): def test_invalid_dtype(self): # GH15520 @@ -50,23 +48,23 @@ def test_scalar_conversion(self): assert int(Series([1.])) == 1 assert long(Series([1.])) == 1 - def test_constructor(self): - assert self.ts.index.is_all_dates + def test_constructor(self, datetime_series, empty_series): + assert datetime_series.index.is_all_dates # Pass in Series - derived = Series(self.ts) + derived = Series(datetime_series) assert derived.index.is_all_dates - assert tm.equalContents(derived.index, self.ts.index) + assert tm.equalContents(derived.index, datetime_series.index) # Ensure new index is not created - assert id(self.ts.index) == id(derived.index) + assert id(datetime_series.index) == id(derived.index) # Mixed type Series mixed = Series(['hello', np.NaN], index=[0, 1]) assert mixed.dtype == np.object_ assert mixed[1] is np.NaN - assert not self.empty.index.is_all_dates + assert not empty_series.index.is_all_dates assert not Series({}).index.is_all_dates pytest.raises(Exception, Series, np.random.randn(3, 3), index=np.arange(3)) @@ -977,27 +975,27 @@ def test_fromDict(self): series = Series(data, dtype=float) assert series.dtype == np.float64 - def test_fromValue(self): + def test_fromValue(self, datetime_series): - nans = Series(np.NaN, index=self.ts.index) + nans = Series(np.NaN, index=datetime_series.index) assert nans.dtype == np.float_ - assert len(nans) == len(self.ts) + assert len(nans) == len(datetime_series) - strings = Series('foo', index=self.ts.index) + strings = Series('foo', index=datetime_series.index) assert strings.dtype == np.object_ - assert len(strings) == len(self.ts) + assert len(strings) == len(datetime_series) d = datetime.now() - dates = Series(d, index=self.ts.index) + dates = Series(d, index=datetime_series.index) assert dates.dtype == 'M8[ns]' - assert len(dates) == len(self.ts) + assert len(dates) == len(datetime_series) # GH12336 # Test construction of categorical series from value - categorical = Series(0, index=self.ts.index, dtype="category") - expected = Series(0, index=self.ts.index).astype("category") + categorical = Series(0, index=datetime_series.index, dtype="category") + expected = Series(0, index=datetime_series.index).astype("category") assert categorical.dtype == 'category' - assert len(categorical) == len(self.ts) + assert len(categorical) == len(datetime_series) tm.assert_series_equal(categorical, expected) def test_constructor_dtype_timedelta64(self): diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index fee2323310b9c..e06d3a67db662 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -23,10 +23,8 @@ from pandas.util.testing import assert_series_equal import pandas.util.testing as tm -from .common import TestData - -class TestSeriesDatetimeValues(TestData): +class TestSeriesDatetimeValues(): def test_dt_namespace_accessor(self): diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 125dff9ecfa7c..63ead2dc7d245 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -24,10 +24,8 @@ from pandas import compat import pandas.util.testing as tm -from .common import TestData - -class TestSeriesDtypes(TestData): +class TestSeriesDtypes(): def test_dt64_series_astype_object(self): dt64ser = Series(date_range('20130101', periods=3)) @@ -56,17 +54,17 @@ def test_asobject_deprecated(self): o = s.asobject assert isinstance(o, np.ndarray) - def test_dtype(self): + def test_dtype(self, datetime_series): - assert self.ts.dtype == np.dtype('float64') - assert self.ts.dtypes == np.dtype('float64') - assert self.ts.ftype == 'float64:dense' - assert self.ts.ftypes == 'float64:dense' - tm.assert_series_equal(self.ts.get_dtype_counts(), + assert datetime_series.dtype == np.dtype('float64') + assert datetime_series.dtypes == np.dtype('float64') + assert datetime_series.ftype == 'float64:dense' + assert datetime_series.ftypes == 'float64:dense' + tm.assert_series_equal(datetime_series.get_dtype_counts(), Series(1, ['float64'])) # GH18243 - Assert .get_ftype_counts is deprecated with tm.assert_produces_warning(FutureWarning): - tm.assert_series_equal(self.ts.get_ftype_counts(), + tm.assert_series_equal(datetime_series.get_ftype_counts(), Series(1, ['float64:dense'])) @pytest.mark.parametrize("value", [np.nan, np.inf]) diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index cbf9bff06ad34..50f548b855247 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -16,10 +16,8 @@ assert_frame_equal, ensure_clean) import pandas.util.testing as tm -from .common import TestData - -class TestSeriesToCSV(TestData): +class TestSeriesToCSV(): def read_csv(self, path, **kwargs): params = dict(squeeze=True, index_col=0, @@ -34,10 +32,10 @@ def read_csv(self, path, **kwargs): return out - def test_from_csv_deprecation(self): + def test_from_csv_deprecation(self, datetime_series): # see gh-17812 with ensure_clean() as path: - self.ts.to_csv(path, header=False) + datetime_series.to_csv(path, header=False) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): @@ -46,7 +44,7 @@ def test_from_csv_deprecation(self): assert_series_equal(depr_ts, ts) @pytest.mark.parametrize("arg", ["path", "header", "both"]) - def test_to_csv_deprecation(self, arg): + def test_to_csv_deprecation(self, arg, datetime_series): # see gh-19715 with ensure_clean() as path: if arg == "path": @@ -57,18 +55,18 @@ def test_to_csv_deprecation(self, arg): kwargs = dict(path=path) with tm.assert_produces_warning(FutureWarning): - self.ts.to_csv(**kwargs) + datetime_series.to_csv(**kwargs) # Make sure roundtrip still works. ts = self.read_csv(path) - assert_series_equal(self.ts, ts, check_names=False) + assert_series_equal(datetime_series, ts, check_names=False) - def test_from_csv(self): + def test_from_csv(self, datetime_series, string_series): with ensure_clean() as path: - self.ts.to_csv(path, header=False) + datetime_series.to_csv(path, header=False) ts = self.read_csv(path) - assert_series_equal(self.ts, ts, check_names=False) + assert_series_equal(datetime_series, ts, check_names=False) assert ts.name is None assert ts.index.name is None @@ -79,18 +77,18 @@ def test_from_csv(self): assert_series_equal(depr_ts, ts) # see gh-10483 - self.ts.to_csv(path, header=True) + datetime_series.to_csv(path, header=True) ts_h = self.read_csv(path, header=0) assert ts_h.name == "ts" - self.series.to_csv(path, header=False) + string_series.to_csv(path, header=False) series = self.read_csv(path) - assert_series_equal(self.series, series, check_names=False) + assert_series_equal(string_series, series, check_names=False) assert series.name is None assert series.index.name is None - self.series.to_csv(path, header=True) + string_series.to_csv(path, header=True) series_h = self.read_csv(path, header=0) assert series_h.name == "series" @@ -106,19 +104,19 @@ def test_from_csv(self): check_series = Series({"1998-01-01": 1.0, "1999-01-01": 2.0}) assert_series_equal(check_series, series) - def test_to_csv(self): + def test_to_csv(self, datetime_series): import io with ensure_clean() as path: - self.ts.to_csv(path, header=False) + datetime_series.to_csv(path, header=False) with io.open(path, newline=None) as f: lines = f.readlines() assert (lines[1] != '\n') - self.ts.to_csv(path, index=False, header=False) + datetime_series.to_csv(path, index=False, header=False) arr = np.loadtxt(path) - assert_almost_equal(arr, self.ts.values) + assert_almost_equal(arr, datetime_series.values) def test_to_csv_unicode_index(self): buf = StringIO() @@ -196,22 +194,23 @@ def test_to_csv_compression(self, s, encoding, compression): encoding=encoding)) -class TestSeriesIO(TestData): +class TestSeriesIO(): - def test_to_frame(self): - self.ts.name = None - rs = self.ts.to_frame() - xp = pd.DataFrame(self.ts.values, index=self.ts.index) + def test_to_frame(self, datetime_series): + datetime_series.name = None + rs = datetime_series.to_frame() + xp = pd.DataFrame(datetime_series.values, index=datetime_series.index) assert_frame_equal(rs, xp) - self.ts.name = 'testname' - rs = self.ts.to_frame() - xp = pd.DataFrame(dict(testname=self.ts.values), index=self.ts.index) + datetime_series.name = 'testname' + rs = datetime_series.to_frame() + xp = pd.DataFrame(dict(testname=datetime_series.values), + index=datetime_series.index) assert_frame_equal(rs, xp) - rs = self.ts.to_frame(name='testdifferent') - xp = pd.DataFrame( - dict(testdifferent=self.ts.values), index=self.ts.index) + rs = datetime_series.to_frame(name='testdifferent') + xp = pd.DataFrame(dict(testdifferent=datetime_series.values), + index=datetime_series.index) assert_frame_equal(rs, xp) def test_timeseries_periodindex(self): @@ -256,11 +255,12 @@ class SubclassedFrame(DataFrame): dict, collections.defaultdict(list), collections.OrderedDict)) - def test_to_dict(self, mapping): + def test_to_dict(self, mapping, datetime_series): # GH16122 - ts = TestData().ts tm.assert_series_equal( - Series(ts.to_dict(mapping), name='ts'), ts) - from_method = Series(ts.to_dict(collections.Counter)) - from_constructor = Series(collections.Counter(ts.iteritems())) + Series(datetime_series.to_dict(mapping), name='ts'), + datetime_series) + from_method = Series(datetime_series.to_dict(collections.Counter)) + from_constructor = Series(collections + .Counter(datetime_series.iteritems())) tm.assert_series_equal(from_method, from_constructor) diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index ab3fdd8cbf84f..b3f105ee5cb67 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -21,8 +21,6 @@ import pandas.util.testing as tm import pandas.util._test_decorators as td -from .common import TestData - try: import scipy _is_scipy_ge_0190 = (LooseVersion(scipy.__version__) >= @@ -52,7 +50,7 @@ def _simple_ts(start, end, freq='D'): return Series(np.random.randn(len(rng)), index=rng) -class TestSeriesMissingData(TestData): +class TestSeriesMissingData(): def test_remove_na_deprecation(self): # see gh-16971 @@ -489,7 +487,7 @@ def test_isnull_for_inf_deprecated(self): tm.assert_series_equal(r, e) tm.assert_series_equal(dr, de) - def test_fillna(self): + def test_fillna(self, datetime_series): ts = Series([0., 1., 2., 3., 4.], index=tm.makeDateIndex(5)) tm.assert_series_equal(ts, ts.fillna(method='ffill')) @@ -506,7 +504,8 @@ def test_fillna(self): tm.assert_series_equal(ts.fillna(value=5), exp) pytest.raises(ValueError, ts.fillna) - pytest.raises(ValueError, self.ts.fillna, value=0, method='ffill') + pytest.raises(ValueError, datetime_series.fillna, value=0, + method='ffill') # GH 5703 s1 = Series([np.nan]) @@ -576,9 +575,9 @@ def test_fillna_inplace(self): expected = x.fillna(value=0) assert_series_equal(y, expected) - def test_fillna_invalid_method(self): + def test_fillna_invalid_method(self, datetime_series): try: - self.ts.fillna(method='ffil') + datetime_series.fillna(method='ffil') except ValueError as inst: assert 'ffil' in str(inst) @@ -632,8 +631,8 @@ def test_timedelta64_nan(self): # def test_logical_range_select(self): # np.random.seed(12345) - # selector = -0.5 <= self.ts <= 0.5 - # expected = (self.ts >= -0.5) & (self.ts <= 0.5) + # selector = -0.5 <= datetime_series <= 0.5 + # expected = (datetime_series >= -0.5) & (datetime_series <= 0.5) # assert_series_equal(selector, expected) def test_dropna_empty(self): @@ -688,8 +687,8 @@ def test_dropna_intervals(self): expected = s.iloc[1:] assert_series_equal(result, expected) - def test_valid(self): - ts = self.ts.copy() + def test_valid(self, datetime_series): + ts = datetime_series.copy() ts[::2] = np.NaN result = ts.dropna() @@ -734,12 +733,12 @@ def test_pad_require_monotonicity(self): pytest.raises(ValueError, rng2.get_indexer, rng, method='pad') - def test_dropna_preserve_name(self): - self.ts[:5] = np.nan - result = self.ts.dropna() - assert result.name == self.ts.name - name = self.ts.name - ts = self.ts.copy() + def test_dropna_preserve_name(self, datetime_series): + datetime_series[:5] = np.nan + result = datetime_series.dropna() + assert result.name == datetime_series.name + name = datetime_series.name + ts = datetime_series.copy() ts.dropna(inplace=True) assert ts.name == name @@ -825,10 +824,11 @@ def test_series_pad_backfill_limit(self): assert_series_equal(result, expected) -class TestSeriesInterpolateData(TestData): +class TestSeriesInterpolateData(): - def test_interpolate(self): - ts = Series(np.arange(len(self.ts), dtype=float), self.ts.index) + def test_interpolate(self, datetime_series, string_series): + ts = Series(np.arange(len(datetime_series), dtype=float), + datetime_series.index) ts_copy = ts.copy() ts_copy[5:10] = np.NaN @@ -836,8 +836,8 @@ def test_interpolate(self): linear_interp = ts_copy.interpolate(method='linear') tm.assert_series_equal(linear_interp, ts) - ord_ts = Series([d.toordinal() for d in self.ts.index], - index=self.ts.index).astype(float) + ord_ts = Series([d.toordinal() for d in datetime_series.index], + index=datetime_series.index).astype(float) ord_ts_copy = ord_ts.copy() ord_ts_copy[5:10] = np.NaN @@ -847,7 +847,7 @@ def test_interpolate(self): # try time interpolation on a non-TimeSeries # Only raises ValueError if there are NaNs. - non_ts = self.series.copy() + non_ts = string_series.copy() non_ts[0] = np.NaN pytest.raises(ValueError, non_ts.interpolate, method='time') From 7f6c144b0719910216f51a078903d164955fbd02 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 4 Oct 2018 11:33:03 -0500 Subject: [PATCH 041/132] merge conflict --- pandas/core/indexes/period.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index f3f680f085118..1794b59a2f9c1 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -252,8 +252,10 @@ def _from_ordinals(cls, values, name=None, freq=None, **kwargs): result = cls._simple_new(data, name=name) return result - def _shallow_copy(self, values, **kwargs): + def _shallow_copy(self, values=None, **kwargs): # TODO: simplify, figure out type of values + if values is None: + values = self._ndarray_values if not isinstance(values, PeriodArray): values = PeriodArray._from_ordinals(values, freq=self.freq) @@ -352,7 +354,7 @@ def shift(self, n): ------- shifted : PeriodIndex """ - i8values = self._data._tshift(n) + i8values = self._data._time_shift(n) return self._simple_new(i8values, name=self.name, freq=self.freq) def _coerce_scalar_to_index(self, item): From b4aa4caaec9a3732e9307db59e837546be0bff01 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 4 Oct 2018 11:48:32 -0500 Subject: [PATCH 042/132] wip --- pandas/core/indexes/period.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 1794b59a2f9c1..74c1df2871264 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -257,7 +257,11 @@ def _shallow_copy(self, values=None, **kwargs): if values is None: values = self._ndarray_values if not isinstance(values, PeriodArray): - values = PeriodArray._from_ordinals(values, freq=self.freq) + # in particular, I would like to avoid complex_new here. + # Some people seem to be calling use with unexpected types + # Index.difference -> ndarray[Period] + # DatetimelikeIndexOpsMixin.repeat -> ndarray[ordinal] + values = PeriodArray._complex_new(values, freq=self.freq) # I don't like overloading shallow_copy with freq changes. # See if it's used anywhere outside of test_resample_empty_dataframe @@ -846,6 +850,10 @@ def wrapper(self, other): return op(self.values, other) return wrapper + def repeat(self, repeats, *args, **kwargs): + # TODO(DatetimeArray): Just use Index.repeat + return Index.repeat(self, repeats, *args, **kwargs) + PeriodIndex._add_comparison_ops() PeriodIndex._add_numeric_methods_disabled() From 6a7013193e4a0d3209ed860d07cb43ed3f68458c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 4 Oct 2018 13:10:51 -0500 Subject: [PATCH 043/132] indexes passing --- pandas/core/indexes/period.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 74c1df2871264..a5979e424bcad 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -255,13 +255,19 @@ def _from_ordinals(cls, values, name=None, freq=None, **kwargs): def _shallow_copy(self, values=None, **kwargs): # TODO: simplify, figure out type of values if values is None: - values = self._ndarray_values + values = self._values if not isinstance(values, PeriodArray): - # in particular, I would like to avoid complex_new here. - # Some people seem to be calling use with unexpected types - # Index.difference -> ndarray[Period] - # DatetimelikeIndexOpsMixin.repeat -> ndarray[ordinal] - values = PeriodArray._complex_new(values, freq=self.freq) + if (isinstance(values, np.ndarray) and + is_integer_dtype(values.dtype)): + values = PeriodArray._from_ordinals(values, freq=self.freq) + else: + # in particular, I would like to avoid complex_new here. + # Some people seem to be calling use with unexpected types + # Index.difference -> ndarray[Period] + # DatetimelikeIndexOpsMixin.repeat -> ndarray[ordinal] + # I think that once all of Datetime* are EAs, we can simplify + # this quite a bit. + values = PeriodArray._complex_new(values, freq=self.freq) # I don't like overloading shallow_copy with freq changes. # See if it's used anywhere outside of test_resample_empty_dataframe From 9aa077c912523d3b8dcd1043274864d9b89522ca Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 4 Oct 2018 13:20:17 -0500 Subject: [PATCH 044/132] op names --- pandas/core/indexes/period.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index a5979e424bcad..3a19f97cef1e1 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -854,6 +854,9 @@ def _create_comparison_method(cls, op): # TODO(DatetimeArray): move to base class. def wrapper(self, other): return op(self.values, other) + + wrapper.__doc__ = op.__doc__ + wrapper.__name__ = '__{}__'.format(op.__name__) return wrapper def repeat(self, repeats, *args, **kwargs): From 411738c1277f775ce5c9d6aa480d00501ced2f45 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 4 Oct 2018 13:30:31 -0500 Subject: [PATCH 045/132] extension, arrays passing --- pandas/core/arrays/period.py | 7 +++++++ pandas/io/pytables.py | 2 +- pandas/tests/extension/test_common.py | 1 - pandas/tests/extension/test_integer.py | 6 ------ pandas/tests/reshape/test_concat.py | 6 ++++-- 5 files changed, 12 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index e0d0061cec681..1f4bada5edff3 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -136,6 +136,13 @@ class PeriodArray(DatetimeLikeArrayMixin, ExtensionArray): # Constructors def __init__(self, values, freq=None): # type: (np.ndarray[np.int64], Union[str, Tick]) -> None + # TODO: constructor discussion. The type above doesn't match what + # we handle right now (values can be PeriodArray or PeriodIndex + if isinstance(values, type(self)): + values, freq = values._data, values.freq + elif isinstance(values, ABCPeriodIndex): + values, freq = values._ndarray_values, values.freq + values = np.array(values, dtype='int64', copy=False) self._data = values if freq is None: diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index fc9e415ed38f7..aec610fea012c 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2484,7 +2484,7 @@ def f(values, freq=None, tz=None): return f elif klass == PeriodIndex: def f(values, freq=None, tz=None): - return PeriodIndex._simple_new(values, None, freq=freq) + return PeriodIndex._from_ordinals(values, freq=freq) return f return klass diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py index b6223ea96d7dd..a0a8f86a5d7b5 100644 --- a/pandas/tests/extension/test_common.py +++ b/pandas/tests/extension/test_common.py @@ -78,7 +78,6 @@ def test_astype_no_copy(): @pytest.mark.parametrize('dtype', [ dtypes.DatetimeTZDtype('ns', 'US/Central'), - dtypes.PeriodDtype("D"), ]) def test_is_not_extension_array_dtype(dtype): assert not isinstance(dtype, dtypes.ExtensionDtype) diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index fa5c89d85e548..7aa33006dadda 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -143,12 +143,6 @@ def test_error(self, data, all_arithmetic_operators): # other specific errors tested in the integer array specific tests pass - @pytest.mark.xfail(reason="EA is listified. GH-22922", strict=True) - def test_add_series_with_extension_array(self, data): - super(TestArithmeticOps, self).test_add_series_with_extension_array( - data - ) - class TestComparisonOps(base.BaseComparisonOpsTests): diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 2aaa04d571e69..f4bf24d69916f 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1994,11 +1994,12 @@ def test_concat_NaT_dataframes(self, tz): def test_concat_period_series(self): x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='D')) - expected = Series([x[0], x[1], y[0], y[1]], dtype='object') + expected = Series([x[0], x[1], y[0], y[1]], dtype='Period[D]') result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) - assert result.dtype == 'object' + def test_concat_period_multiple_freq_series(self): + # XXX: Support this somehow. Will require some work. # different freq x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='M')) @@ -2007,6 +2008,7 @@ def test_concat_period_series(self): tm.assert_series_equal(result, expected) assert result.dtype == 'object' + def test_concat_period_other_series(self): x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) y = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='M')) expected = Series([x[0], x[1], y[0], y[1]], dtype='object') From 6d98e8574383ea3cf22efd1a3a493944c27b341f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 9 Oct 2018 10:58:26 -0500 Subject: [PATCH 046/132] fixup --- pandas/core/arrays/datetimes.py | 4 +-- pandas/core/arrays/period.py | 45 ++++++++++++++++++++------------- pandas/core/indexes/period.py | 32 ++++++++++++++++++----- 3 files changed, 55 insertions(+), 26 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 7daaa8de1734f..908048b462833 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -812,7 +812,7 @@ def to_period(self, freq=None): pandas.PeriodIndex: Immutable ndarray holding ordinal values pandas.DatetimeIndex.to_pydatetime: Return DatetimeIndex as object """ - from pandas.core.arrays.period import PeriodArrayMixin + from pandas.core.arrays.period import PeriodArray if self.tz is not None: warnings.warn("Converting to PeriodArray/Index representation " @@ -827,7 +827,7 @@ def to_period(self, freq=None): freq = get_period_alias(freq) - return PeriodArrayMixin(self.values, freq=freq) + return PeriodArray._from_datetime64(self.values, freq, tz=self.tz) # ----------------------------------------------------------------- # Properties - Vectorized Timestamp Properties/Methods diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 06e6437b89dd7..eec4323f6b254 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -16,7 +16,7 @@ from pandas._libs.tslibs import period as libperiod from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds, Timedelta from pandas._libs.tslibs.fields import isleapyear_arr -from pandas.util._decorators import cache_readonly, deprecate_kwarg +from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import ( is_integer_dtype, is_float_dtype, is_period_dtype, is_float, is_integer, pandas_dtype, is_scalar, @@ -276,6 +276,23 @@ def _from_periods(cls, periods, freq=None): ordinals = libperiod.extract_ordinals(periods, freq) return cls._from_ordinals(ordinals, freq=freq) + @classmethod + def _from_datetime64(cls, data, freq, tz=None): + """Construct a PeriodArray from a datetime64 array + + Parameters + ---------- + data : ndarray[datetime64[ns], datetime64[ns, tz]] + freq : str or Tick + tz : tzinfo, option + + Returns + ------- + + """ + data = dt64arr_to_periodarr(data, freq, tz) + return cls._simple_new(data, freq=freq) + def __repr__(self): return '\n{}\nLength: {}, dtype: {}'.format( [str(s) for s in self], @@ -658,30 +675,24 @@ def _add_delta(self, other): ordinal_delta = self._maybe_convert_timedelta(other) return self._time_shift(ordinal_delta) - @deprecate_kwarg(old_arg_name='n', new_arg_name='periods') - def shift(self, periods): + def shift(self, periods=1): """ - Shift index by desired number of increments. + Shift values by desired number. - This method is for shifting the values of period indexes - by a specified time increment. + Newly introduced missing values are filled with + ``self.dtype.na_value``. + + .. versionadded:: 0.24.0 Parameters ---------- - periods : int - Number of periods (or increments) to shift by, - can be positive or negative. - - .. versionchanged:: 0.24.0 + periods : int, default 1 + The number of periods to shift. Negative values are allowed + for shifting backwards. Returns ------- - pandas.PeriodIndex - Shifted index. - - See Also - -------- - DatetimeIndex.shift : Shift values of DatetimeIndex. + shifted : PeriodArray """ # TODO(DatetimeArray): remove from DatetimeLikeArrayMixin # The semantics for Index.shift differ from EA.shift diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index df6a8b0579519..159b8a8f94e3d 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -37,7 +37,9 @@ from pandas.core.indexes.base import _index_shared_docs, ensure_index from pandas import compat -from pandas.util._decorators import Appender, Substitution, cache_readonly +from pandas.util._decorators import ( + Appender, Substitution, cache_readonly, deprecate_kwarg +) import pandas.core.indexes.base as ibase _index_doc_kwargs = dict(ibase._index_doc_kwargs) @@ -328,6 +330,10 @@ def _nat_new(self, box=True): result = self._simple_new(result, name=self.name) return result + def to_timestamp(self, freq=None, how='start'): + result = self._data.to_timestamp(freq=freq, how=how) + return result._simple_new(result, name=self.name) + # ------------------------------------------------------------------------ # Indexing @cache_readonly @@ -357,20 +363,32 @@ def _int64index(self): # ------------------------------------------------------------------------ # Index Methods - def shift(self, n): + @deprecate_kwarg(old_arg_name='n', new_arg_name='periods') + def shift(self, periods): """ - Specialized shift which produces a PeriodIndex + Shift index by desired number of increments. + + This method is for shifting the values of period indexes + by a specified time increment. Parameters ---------- - n : int - Periods to shift by + periods : int, default 1 + Number of periods (or increments) to shift by, + can be positive or negative. + + .. versionchanged:: 0.24.0 Returns ------- - shifted : PeriodIndex + pandas.PeriodIndex + Shifted index. + + See Also + -------- + DatetimeIndex.shift : Shift values of DatetimeIndex. """ - i8values = self._data._time_shift(n) + i8values = self._data._time_shift(periods) return self._simple_new(i8values, name=self.name, freq=self.freq) def _coerce_scalar_to_index(self, item): From 6d9e15070e045e630ed66f6731f01e3680282429 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 9 Oct 2018 11:02:57 -0500 Subject: [PATCH 047/132] lint --- pandas/core/arrays/datetimelike.py | 17 --------- pandas/core/arrays/period.py | 55 +-------------------------- pandas/tests/arrays/test_period.py | 1 - pandas/tests/extension/test_period.py | 2 - 4 files changed, 2 insertions(+), 73 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 3aef61fb55427..d3608f69a5df0 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -555,23 +555,6 @@ def shift(self, periods, freq=None): """ return self._time_shift(periods=periods, freq=freq) - def _time_shift(self, periods, freq=None): - """ - Shift each value by `periods`. - - Note this is different from ExtensionArray.shift, which - shifts the *position* of each element, padding the end with - missing values. - - Parameters - ---------- - periods : int - Number of periods to shift by. - freq : pandas.DateOffset, pandas.Timedelta, or string - Frequency increment to shift by. - """ - return self._time_shift(periods=periods, freq=freq) - def _time_shift(self, periods, freq=None): """ Shift each value by `periods`. diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index eec4323f6b254..b74bc736c6bb0 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -6,13 +6,12 @@ from pandas import compat from pandas.compat.numpy import function as nv -from pandas._libs import Timedelta from pandas._libs import lib from pandas._libs.tslib import NaT, iNaT from pandas._libs.tslibs.period import ( Period, IncompatibleFrequency, DIFFERENT_FREQ_INDEX, get_period_field_arr, period_asfreq_arr, - _validate_end_alias) +) from pandas._libs.tslibs import period as libperiod from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds, Timedelta from pandas._libs.tslibs.fields import isleapyear_arr @@ -29,9 +28,6 @@ ensure_object ) -from pandas.core.dtypes.common import ( - is_integer_dtype, is_float_dtype, is_period_dtype, - is_datetime64_dtype) from pandas.core.dtypes.dtypes import PeriodDtype from pandas.core.dtypes.generic import ( ABCSeries, ABCPeriodIndex, ABCIndexClass, @@ -810,45 +806,6 @@ def repeat(self, repeats, *args, **kwargs): def strftime(self, date_format): return self._format_native_types(date_format=date_format) - def to_timestamp(self, freq=None, how='start'): - from pandas import DatetimeIndex - - how = _validate_end_alias(how) - - end = how == 'E' - if end: - if freq == 'B': - # roll forward to ensure we land on B date - adjust = Timedelta(1, 'D') - Timedelta(1, 'ns') - return self.to_timestamp(how='start') + adjust - else: - adjust = Timedelta(1, 'ns') - return (self + 1).to_timestamp(how='start') - adjust - - if freq is None: - base, mult = _gfc(self.freq) - freq = frequencies.get_to_timestamp_base(base) - else: - freq = Period._maybe_convert_freq(freq) - - base, mult = _gfc(freq) - new_data = self.asfreq(freq, how) - - new_data = libperiod.periodarr_to_dt64arr(new_data._ndarray_values, - base) - # TODO: what should the return type of this be? - # Eventually a DatetimeArray makes sense. - # But for now let's do a DatetimeIndex? - return DatetimeIndex(new_data) - - @property - def start_time(self): - return self.to_timestamp(how='start') - - @property - def end_time(self): - return self.to_timestamp(how='end') - def astype(self, dtype, copy=True): # TODO: Figure out something better here... # We have DatetimeLikeArrayMixin -> @@ -909,6 +866,7 @@ def item(self): raise ValueError('can only convert an array of size 1 to a ' 'Python scalar') + PeriodArray._add_comparison_ops() PeriodArray._add_datetimelike_methods() # PeriodArray._add_numeric_methods_disabled() @@ -1025,12 +983,3 @@ def _make_field_arrays(*fields): else np.repeat(x, length) for x in fields] return arrays - - -def dt64arr_to_periodarr(data, freq, tz): - if data.dtype != np.dtype('M8[ns]'): - raise ValueError('Wrong dtype: %s' % data.dtype) - - freq = Period._maybe_convert_freq(freq) - base, mult = _gfc(freq) - return libperiod.dt64arr_to_periodarr(data.view('i8'), base, tz) diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index b8236ff547074..9157f1654f724 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -34,4 +34,3 @@ def test_setitem_raises(): with tm.assert_raises_regex(TypeError, "int"): arr[0] = 1 - diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index 2ed7159b02513..bf625c13a894c 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -122,7 +122,6 @@ class TestMissing(BasePeriodTests, base.BaseMissingTests): pass - class TestReshaping(BasePeriodTests, base.BaseReshapingTests): pass @@ -133,4 +132,3 @@ class TestSetitem(BasePeriodTests, base.BaseSetitemTests): class TestGroupby(BasePeriodTests, base.BaseGroupbyTests): pass - From 48994798120a35ecaae79507e6b0b91ff9780bf1 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 9 Oct 2018 11:12:47 -0500 Subject: [PATCH 048/132] Fixed to_timestamp --- pandas/core/arrays/period.py | 24 +++++++++++------------- pandas/core/indexes/period.py | 3 ++- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index b74bc736c6bb0..c1f1f9ff1dd19 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -243,15 +243,6 @@ def _complex_new(cls, data=None, ordinal=None, freq=None, start=None, data = ensure_object(data) return cls._from_periods(data, freq=freq) - @classmethod - def _from_ordinals(cls, values, freq=None): - # type: (ndarray[int], Optional[Tick]) -> PeriodArray - """ - Values should be int ordinals - `__new__` & `_simple_new` coerce to ordinals and call this method - """ - return cls(values, freq=freq) - @classmethod def _from_sequence(cls, scalars, dtype=None, copy=False): # type: (Sequence[Optional[Period]], Dtype, bool) -> PeriodArray @@ -265,6 +256,15 @@ def _from_factorized(cls, values, original): # type: (Sequence[Optional[Period]], PeriodArray) -> PeriodArray return cls._from_periods(values, freq=original.freq) + @classmethod + def _from_ordinals(cls, values, freq=None): + # type: (ndarray[int], Optional[Tick]) -> PeriodArray + """ + Values should be int ordinals + `__new__` & `_simple_new` coerce to ordinals and call this method + """ + return cls(values, freq=freq) + @classmethod def _from_periods(cls, periods, freq=None): # type: (np.ndarray[Optional[Period]], Optional[Tick]) -> PeriodArray @@ -280,11 +280,11 @@ def _from_datetime64(cls, data, freq, tz=None): ---------- data : ndarray[datetime64[ns], datetime64[ns, tz]] freq : str or Tick - tz : tzinfo, option + tz : tzinfo, optional Returns ------- - + PeriodArray[freq] """ data = dt64arr_to_periodarr(data, freq, tz) return cls._simple_new(data, freq=freq) @@ -869,8 +869,6 @@ def item(self): PeriodArray._add_comparison_ops() PeriodArray._add_datetimelike_methods() -# PeriodArray._add_numeric_methods_disabled() -# PeriodArray._add_logical_methods_disabled() # ------------------------------------------------------------------- diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 159b8a8f94e3d..674b5b6317dfc 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -331,8 +331,9 @@ def _nat_new(self, box=True): return result def to_timestamp(self, freq=None, how='start'): + from pandas import DatetimeIndex result = self._data.to_timestamp(freq=freq, how=how) - return result._simple_new(result, name=self.name) + return DatetimeIndex._simple_new(result, name=self.name) # ------------------------------------------------------------------------ # Indexing From 634def1a16d712c1535367ab9aafbeb1ad98c92a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 9 Oct 2018 11:32:54 -0500 Subject: [PATCH 049/132] Same error message for index, series --- pandas/tests/arithmetic/test_period.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 3ab78e194ad1d..727e24db8230a 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -926,9 +926,8 @@ def test_pi_offset_errors(self): # Series op is applied per Period instance, thus error is raised # from Period - msg_idx = r"Input has different freq from Period.*?\(freq=D\)" - msg_s = r"Input cannot be converted to Period\(freq=D\)" - for obj, msg in [(idx, msg_idx), (ser, msg_s)]: + msg = r"Input has different freq from Period.*?\(freq=D\)" + for obj in [idx, ser]: with tm.assert_raises_regex(period.IncompatibleFrequency, msg): obj + pd.offsets.Hour(2) From 1f1845245c99e029ee3a347ac4cc4bb5bc5af12f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 9 Oct 2018 13:09:08 -0500 Subject: [PATCH 050/132] Fix freq handling in to_timestamp --- pandas/core/indexes/period.py | 4 +++- pandas/tests/indexes/period/test_tools.py | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 674b5b6317dfc..c318de05ef086 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -333,7 +333,9 @@ def _nat_new(self, box=True): def to_timestamp(self, freq=None, how='start'): from pandas import DatetimeIndex result = self._data.to_timestamp(freq=freq, how=how) - return DatetimeIndex._simple_new(result, name=self.name) + return DatetimeIndex._simple_new(result, + name=self.name, + freq=result.freq) # ------------------------------------------------------------------------ # Indexing diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py index a5c58eb40cc0d..a7bd2f370996b 100644 --- a/pandas/tests/indexes/period/test_tools.py +++ b/pandas/tests/indexes/period/test_tools.py @@ -101,6 +101,12 @@ def _get_with_delta(delta, freq='A-DEC'): tm.assert_index_equal(result.index, exp_index) assert result.name == 'foo' + def test_to_timestamp_freq(self): + idx = pd.period_range('2017', periods=12, freq="A-DEC") + result = idx.to_timestamp() + expected = pd.date_range("2017", periods=12, freq="AS-JAN") + tm.assert_index_equal(result, expected) + def test_to_timestamp_repr_is_code(self): zs = [Timestamp('99-04-17 00:00:00', tz='UTC'), Timestamp('2001-04-17 00:00:00', tz='UTC'), From 2f92b2297207fc0edb56d3ba44f0fbd095acce91 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 9 Oct 2018 13:15:47 -0500 Subject: [PATCH 051/132] dtype update --- pandas/core/dtypes/dtypes.py | 2 +- pandas/tests/dtypes/test_dtypes.py | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index e3eddb3121544..0a249dfb2c0c4 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -577,7 +577,7 @@ def __eq__(self, other): str(self.tz) == str(other.tz)) -class PeriodDtype(ExtensionDtype): +class PeriodDtype(ExtensionDtype, PandasExtensionDtype): """ A Period duck-typed class, suitable for holding a period with freq dtype. diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 7e95b076a8a66..887cc7eec15ae 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -378,11 +378,9 @@ def test_basic(self): assert is_period(pidx) s = Series(pidx, name='A') - # dtypes - # series results in object dtype currently, - # is_period checks period_arraylike - assert not is_period_dtype(s.dtype) - assert not is_period_dtype(s) + + assert is_period_dtype(s.dtype) + assert is_period_dtype(s) assert is_period(s) assert not is_period_dtype(np.dtype('float64')) From 23f232cdda9df3febb4c520b9634ee473e618461 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 9 Oct 2018 13:20:59 -0500 Subject: [PATCH 052/132] accept kwargs --- pandas/core/arrays/period.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index c1f1f9ff1dd19..bbfe7defc570e 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -762,7 +762,8 @@ def _maybe_convert_timedelta(self, other): raise IncompatibleFrequency(msg.format(cls=type(self).__name__, freqstr=self.freqstr)) - def _format_native_types(self, na_rep=u'NaT', date_format=None): + def _format_native_types(self, na_rep=u'NaT', date_format=None, + **kwargs): # TODO(DatetimeArray): remove values = self.astype(object) From dd3b8cdc44c16b2c825886f2c967cd3da1924b82 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 9 Oct 2018 15:27:32 -0500 Subject: [PATCH 053/132] fixups --- pandas/core/arrays/period.py | 6 ++++- pandas/core/frame.py | 10 ++++++- pandas/core/indexes/period.py | 12 ++++++--- pandas/tests/frame/test_combine_concat.py | 2 +- pandas/tests/frame/test_constructors.py | 5 ++-- pandas/tests/io/formats/test_format.py | 30 ++++++++++++++------- pandas/tests/reshape/test_concat.py | 7 +++-- pandas/tests/scalar/test_nat.py | 5 ++-- pandas/tests/series/test_api.py | 5 ++-- pandas/tests/series/test_constructors.py | 12 ++++++--- pandas/tests/series/test_datetime_values.py | 16 ++++++----- pandas/tests/series/test_period.py | 19 +++++++------ 12 files changed, 87 insertions(+), 42 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index bbfe7defc570e..1593f54e2f347 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -335,6 +335,8 @@ def fillna(self, value=None, method=None, limit=None): return new_values def __setitem__(self, key, value): + from pandas.core.dtypes.missing import isna + if isinstance(value, (compat.Sequence, type(self))): if len(key) != len(value) and not com.is_bool_indexer(key): msg = ("shape mismatch: value array of length '{}' does not " @@ -356,7 +358,9 @@ def __setitem__(self, key, value): raise IncompatibleFrequency(msg) value = value.ordinal - elif isinstance(value, (type(None), type(NaT))): + elif isna(value): + # Previously we allowed setting np.nan on a Series[object] + # do we still want to allow that, or should we require None / NaT? value = iNaT else: msg = ("'value' should be a 'Period', 'NaT', or array of those. " diff --git a/pandas/core/frame.py b/pandas/core/frame.py index fba27e8e46553..91c6752ea0a00 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5115,6 +5115,8 @@ def combine(self, other, func, fill_value=None, overwrite=True): # see if we need to be represented as i8 (datetimelike) # try to keep us at this dtype needs_i8_conversion_i = needs_i8_conversion(new_dtype) + # Is this third argument documented? The docs say func is binary + # don't mention needs_i8_conversion... if needs_i8_conversion_i: arr = func(series, otherSeries, True) else: @@ -5189,7 +5191,13 @@ def combiner(x, y, needs_i8_conversion=False): if y.name not in self.columns: return y_values - return expressions.where(mask, y_values, x_values) + result = expressions.where(mask, y_values, x_values) + # if needs_i8_conversion: + # TODO: handle all these + from pandas.core.dtypes.common import is_period_dtype + if is_period_dtype(x): + result = x.values._simple_new(result, freq=x.values.freq) + return result return self.combine(other, combiner, overwrite=False) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index c318de05ef086..54143f9bfa301 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -76,7 +76,10 @@ class PeriodDelegateMixin(PandasDelegate): """ def _delegate_property_get(self, name, *args, **kwargs): result = getattr(self._data, name) - if name in PeriodArray._datetimelike_ops: + box_ops = ( + set(PeriodArray._datetimelike_ops) - set(PeriodArray._bool_ops) + ) + if name in box_ops: result = Index(result, name=self.name) return result @@ -84,7 +87,8 @@ def _delegate_property_set(self, name, value, *args, **kwargs): setattr(self._data, name, value) def _delegate_method(self, name, *args, **kwargs): - return operator.methodcaller(name, *args, **kwargs)(self._data) + result = operator.methodcaller(name, *args, **kwargs)(self._data) + return Index(result, name=self.name) @delegate_names( @@ -809,12 +813,14 @@ def __setstate__(self, state): np.ndarray.__setstate__(data, nd_state) # backcompat - self._freq = Period._maybe_convert_freq(own_state[1]) + freq = Period._maybe_convert_freq(own_state[1]) else: # pragma: no cover data = np.empty(state) np.ndarray.__setstate__(self, state) + freq = None # ? + data = PeriodArray._from_ordinals(data, freq=freq) self._data = data else: diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index d1f921bc5e894..0395ad2cbc0c5 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -723,7 +723,7 @@ def test_combine_first_period(self): freq='M') exp = pd.DataFrame({'P': exp_dts}, index=[1, 2, 3, 4, 5, 7]) tm.assert_frame_equal(res, exp) - assert res['P'].dtype == 'object' + assert res['P'].dtype == data1.dtype # different freq dts2 = pd.PeriodIndex(['2012-01-01', '2012-01-02', diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index e2be410d51b88..c1dce74073800 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -582,12 +582,13 @@ def test_constructor_period(self): a = pd.PeriodIndex(['2012-01', 'NaT', '2012-04'], freq='M') b = pd.PeriodIndex(['2012-02-01', '2012-03-01', 'NaT'], freq='D') df = pd.DataFrame({'a': a, 'b': b}) - assert df['a'].dtype == 'object' - assert df['b'].dtype == 'object' + assert df['a'].dtype == a.dtype + assert df['b'].dtype == b.dtype # list of periods df = pd.DataFrame({'a': a.astype(object).tolist(), 'b': b.astype(object).tolist()}) + # TODO: should we infer these? assert df['a'].dtype == 'object' assert df['b'].dtype == 'object' diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 03e830fb09ad6..28aa8a92cc410 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1720,9 +1720,11 @@ def test_period(self): pd.Period('2011-03-01 09:00', freq='H'), pd.Period('2011-04', freq='M')], 'C': list('abcd')}) - exp = (" A B C\n0 2013-01 2011-01 a\n" - "1 2013-02 2011-02-01 b\n2 2013-03 2011-03-01 09:00 c\n" - "3 2013-04 2011-04 d") + exp = (" A B C\n" + "0 2013-01 2011-01 a\n" + "1 2013-02 2011-02-01 b\n" + "2 2013-03 2011-03-01 09:00 c\n" + "3 2013-04 2011-04 d") assert str(df) == exp @@ -2110,21 +2112,31 @@ def test_period(self): # GH 12615 index = pd.period_range('2013-01', periods=6, freq='M') s = Series(np.arange(6, dtype='int64'), index=index) - exp = ("2013-01 0\n2013-02 1\n2013-03 2\n2013-04 3\n" - "2013-05 4\n2013-06 5\nFreq: M, dtype: int64") + exp = ("2013-01 0\n" + "2013-02 1\n" + "2013-03 2\n" + "2013-04 3\n" + "2013-05 4\n" + "2013-06 5\n" + "Freq: M, dtype: int64") assert str(s) == exp s = Series(index) - exp = ("0 2013-01\n1 2013-02\n2 2013-03\n3 2013-04\n" - "4 2013-05\n5 2013-06\ndtype: object") + exp = ("0 2013-01\n" + "1 2013-02\n" + "2 2013-03\n" + "3 2013-04\n" + "4 2013-05\n" + "5 2013-06\n" + "dtype: period[M]") assert str(s) == exp # periods with mixed freq s = Series([pd.Period('2011-01', freq='M'), pd.Period('2011-02-01', freq='D'), pd.Period('2011-03-01 09:00', freq='H')]) - exp = ("0 2011-01\n1 2011-02-01\n" - "2 2011-03-01 09:00\ndtype: object") + exp = ("0 2011-01\n1 2011-02-01\n" + "2 2011-03-01 09:00\ndtype: object") assert str(s) == exp def test_max_multi_index_display(self): diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index f4bf24d69916f..8beb1a695538e 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -415,6 +415,7 @@ def test_concatlike_common_period(self): res = pd.concat([ps1, ps2]) tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) + @pytest.mark.xfail(reason="multiple freq", strict=True) def test_concatlike_common_period_diff_freq_to_object(self): # GH 13221 pi1 = pd.PeriodIndex(['2011-01', '2011-02'], freq='M') @@ -1998,9 +1999,9 @@ def test_concat_period_series(self): result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) + @pytest.mark.xfail(reason="multiple freq", strict=True) def test_concat_period_multiple_freq_series(self): - # XXX: Support this somehow. Will require some work. - # different freq + # Blocked by https://github.com/pandas-dev/pandas/pull/22997 x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='M')) expected = Series([x[0], x[1], y[0], y[1]], dtype='object') @@ -2008,7 +2009,9 @@ def test_concat_period_multiple_freq_series(self): tm.assert_series_equal(result, expected) assert result.dtype == 'object' + @pytest.mark.xfail(reason="multiple freq", strict=True) def test_concat_period_other_series(self): + # Blocked by https://github.com/pandas-dev/pandas/pull/22997 x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) y = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='M')) expected = Series([x[0], x[1], y[0], y[1]], dtype='object') diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index bc8582d9b7d29..b978ccf4a2f6a 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -5,8 +5,9 @@ import numpy as np from pandas import (NaT, Index, Timestamp, Timedelta, Period, - DatetimeIndex, PeriodIndex, + DatetimeIndex, TimedeltaIndex, Series, isna) +from pandas.core.arrays import PeriodArray from pandas.util import testing as tm from pandas._libs.tslib import iNaT @@ -15,7 +16,7 @@ @pytest.mark.parametrize('nat, idx', [(Timestamp('NaT'), DatetimeIndex), (Timedelta('NaT'), TimedeltaIndex), - (Period('NaT', freq='M'), PeriodIndex)]) + (Period('NaT', freq='M'), PeriodArray)]) def test_nat_fields(nat, idx): for field in idx._field_ops: diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index 3b82242626c20..3e68d4fc03f1f 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -14,7 +14,8 @@ from pandas.compat import range, lzip, isidentifier, string_types from pandas import (compat, Categorical, period_range, timedelta_range, - DatetimeIndex, PeriodIndex, TimedeltaIndex) + DatetimeIndex, TimedeltaIndex) +from pandas.core.arrays import PeriodArray import pandas.io.formats.printing as printing from pandas.util.testing import (assert_series_equal, ensure_clean) @@ -698,7 +699,7 @@ def test_dt_accessor_api_for_categorical(self): test_data = [ ("Datetime", get_ops(DatetimeIndex), s_dr, c_dr), - ("Period", get_ops(PeriodIndex), s_pr, c_pr), + ("Period", get_ops(PeriodArray), s_pr, c_pr), ("Timedelta", get_ops(TimedeltaIndex), s_tdr, c_tdr)] assert isinstance(c_dr.dt, Properties) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 57a3f54fadbcc..09f7235e3bf81 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -862,10 +862,9 @@ def test_constructor_periodindex(self): pi = period_range('20130101', periods=5, freq='D') s = Series(pi) + assert s.dtype == 'Period[D]' expected = Series(pi.astype(object)) - assert_series_equal(s, expected) - - assert s.dtype == 'object' + assert_series_equal(s.astype(object), expected) def test_constructor_dict(self): d = {'a': 0., 'b': 1., 'c': 2.} @@ -1141,7 +1140,12 @@ def test_convert_non_ns(self): def test_constructor_cant_cast_datetimelike(self, index): # floats are not ok - msg = "Cannot cast {} to ".format(type(index).__name__) + msg = "Cannot cast {}.*? to ".format( + # strip Index to convert PeriodIndex -> Period + # We don't care whether the error message says + # PeriodIndex or PeriodArray + type(index).__name__.rstrip("Index") + ) with tm.assert_raises_regex(TypeError, msg): Series(index, dtype=float) diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index e06d3a67db662..15a6e029d71ad 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -18,6 +18,7 @@ PeriodIndex, DatetimeIndex, TimedeltaIndex, compat) import pandas.core.common as com +from pandas.core.arrays import PeriodArray from pandas._libs.tslibs.timezones import maybe_get_tz from pandas.util.testing import assert_series_equal @@ -31,7 +32,7 @@ def test_dt_namespace_accessor(self): # GH 7207, 11128 # test .dt namespace accessor - ok_for_period = PeriodIndex._datetimelike_ops + ok_for_period = PeriodArray._datetimelike_ops ok_for_period_methods = ['strftime', 'to_timestamp', 'asfreq'] ok_for_dt = DatetimeIndex._datetimelike_ops ok_for_dt_methods = ['to_period', 'to_pydatetime', 'tz_localize', @@ -195,12 +196,6 @@ def get_dir(s): tm.assert_almost_equal( results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))) - s = Series(period_range('20130101', periods=5, - freq='D', name='xxx').astype(object)) - results = get_dir(s) - tm.assert_almost_equal( - results, list(sorted(set(ok_for_period + ok_for_period_methods)))) - # 11295 # ambiguous time error on the conversions s = Series(pd.date_range('2015-01-01', '2016-01-01', @@ -224,6 +219,13 @@ def get_dir(s): with pytest.raises(com.SettingWithCopyError): s.dt.hour[0] = 5 + # XXX: Series.dt no longer works for Series[object[Period]]. + # I think that's OK, but want a +1. + s = Series(period_range('20130101', periods=5, + freq='D', name='xxx').astype(object)) + with tm.assert_raises_regex(AttributeError, "Can only use .dt"): + results = get_dir(s) + @pytest.mark.parametrize('method, dates', [ ['round', ['2012-01-02', '2012-01-02', '2012-01-01']], ['floor', ['2012-01-01', '2012-01-01', '2012-01-01']], diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index 24c2f30bef569..addaa6456d6dc 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -4,6 +4,7 @@ import pandas as pd import pandas.util.testing as tm import pandas.core.indexes.period as period +from pandas.core.arrays import PeriodArray from pandas import Series, period_range, DataFrame, Period @@ -30,9 +31,9 @@ def test_getitem(self): result = self.series[[2, 4]] exp = pd.Series([pd.Period('2000-01-03', freq='D'), pd.Period('2000-01-05', freq='D')], - index=[2, 4]) + index=[2, 4], dtype='Period[D]') tm.assert_series_equal(result, exp) - assert result.dtype == 'object' + assert result.dtype == 'Period[D]' def test_isna(self): # GH 13737 @@ -91,19 +92,20 @@ def test_NaT_cast(self): expected = Series([pd.NaT]) tm.assert_series_equal(result, expected) - def test_set_none_nan(self): - # currently Period is stored as object dtype, not as NaT + def test_set_none(self): self.series[3] = None - assert self.series[3] is None + assert self.series[3] is pd.NaT self.series[3:5] = None - assert self.series[4] is None + assert self.series[4] is pd.NaT + def test_set_nan(self): + # Do we want to allow this? self.series[5] = np.nan - assert np.isnan(self.series[5]) + assert self.series[5] is pd.NaT self.series[5:7] = np.nan - assert np.isnan(self.series[6]) + assert self.series[6] is pd.NaT def test_intercept_astype_object(self): expected = self.series.astype('object') @@ -184,6 +186,7 @@ def test_end_time_timevalues(self, input_vals): # GH 17157 # Check that the time part of the Period is adjusted by end_time # when using the dt accessor on a Series + input_vals = PeriodArray._from_periods(np.asarray(input_vals)) s = Series(input_vals) result = s.dt.end_time From 87ecb6488472f52af4dfca1bb0e7af87a19dd2cc Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 9 Oct 2018 16:04:41 -0500 Subject: [PATCH 054/132] updates --- pandas/core/accessor.py | 3 ++- pandas/core/indexes/period.py | 18 +++++++----------- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 9722d8340f6d0..f2f8ae58dec56 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -121,7 +121,8 @@ def delegate_names(delegate, accessors, typ, overwrite=False): Returns ------- - decorator + callable + A class decorator Examples -------- diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 54143f9bfa301..0119c63aa589a 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -88,18 +88,15 @@ def _delegate_property_set(self, name, value, *args, **kwargs): def _delegate_method(self, name, *args, **kwargs): result = operator.methodcaller(name, *args, **kwargs)(self._data) + raw = {'_format_native_types'} + if name in raw: + return result return Index(result, name=self.name) -@delegate_names( - PeriodArray, - PeriodArray._datetimelike_ops + [ - 'size', - 'asi8', - 'shape', - ], - "property" -) +@delegate_names(PeriodArray, + PeriodArray._datetimelike_ops + ['size', 'asi8', 'shape'], + typ='property') @delegate_names( PeriodArray, [x for x in PeriodArray._datetimelike_methods @@ -107,8 +104,7 @@ def _delegate_method(self, name, *args, **kwargs): '_format_native_types', '_maybe_convert_timedelta', ], - "method", - # overwrite size, asi8, etc. but not asfreq, to_timestamp + typ="method", overwrite=True, ) class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, From 0bde3292562131bddcd1677f6cc5caf6baf14922 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 9 Oct 2018 16:44:22 -0500 Subject: [PATCH 055/132] explicit --- pandas/core/indexes/period.py | 30 ++++++++++++--------- pandas/tests/series/test_datetime_values.py | 2 +- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 0119c63aa589a..b9343144e87fe 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -88,24 +88,17 @@ def _delegate_property_set(self, name, value, *args, **kwargs): def _delegate_method(self, name, *args, **kwargs): result = operator.methodcaller(name, *args, **kwargs)(self._data) - raw = {'_format_native_types'} - if name in raw: - return result return Index(result, name=self.name) @delegate_names(PeriodArray, PeriodArray._datetimelike_ops + ['size', 'asi8', 'shape'], typ='property') -@delegate_names( - PeriodArray, - [x for x in PeriodArray._datetimelike_methods - if x not in {"asfreq", "to_timestamp"}] + [ - '_format_native_types', - '_maybe_convert_timedelta', - ], - typ="method", - overwrite=True, +@delegate_names(PeriodArray, + [x for x in PeriodArray._datetimelike_methods + if x not in {"asfreq", "to_timestamp"}], + typ="method", + overwrite=True, ) class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index, PeriodDelegateMixin): @@ -317,7 +310,8 @@ def _maybe_box_as_values(self, values, **attribs): return PeriodArray._from_ordinals(values, freq=freq) # ------------------------------------------------------------------------ - # Dispatch and Wrap + # Dispatch and maybe box. Not done in delegate_names because we box + # different from those (which use Index). def asfreq(self, freq=None, how='E'): result = self._data.asfreq(freq=freq, how=how) @@ -337,6 +331,16 @@ def to_timestamp(self, freq=None, how='start'): name=self.name, freq=result.freq) + def _format_native_types(self, na_rep='', quoting=None, **kwargs): + # just dispatch, return ndarray + return self._data._format_native_types(na_rep=na_rep, + quoting=quoting, + **kwargs) + + def _maybe_convert_timedelta(self, other): + # just dispatch, return ndarray + return self._data._maybe_convert_timedelta(other) + # ------------------------------------------------------------------------ # Indexing @cache_readonly diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 15a6e029d71ad..3a93ceddd536d 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -224,7 +224,7 @@ def get_dir(s): s = Series(period_range('20130101', periods=5, freq='D', name='xxx').astype(object)) with tm.assert_raises_regex(AttributeError, "Can only use .dt"): - results = get_dir(s) + get_dir(s) @pytest.mark.parametrize('method, dates', [ ['round', ['2012-01-02', '2012-01-02', '2012-01-01']], From 2d85a8208f91f41a19e723e36fb624bb2ba7c7f4 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 9 Oct 2018 17:09:39 -0500 Subject: [PATCH 056/132] add to assert --- pandas/util/testing.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 3bfad42d0aaf7..27e172e2a0619 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1544,6 +1544,10 @@ def assert_equal(left, right, **kwargs): assert_series_equal(left, right, **kwargs) elif isinstance(left, pd.DataFrame): assert_frame_equal(left, right, **kwargs) + elif isinstance(left, IntervalArray): + assert_interval_array_equal(left, right, **kwargs) + elif isinstance(left, PeriodArray): + assert_period_array_equal(left, right, **kwargs) elif isinstance(left, ExtensionArray): assert_extension_array_equal(left, right, **kwargs) elif isinstance(left, np.ndarray): From 438e6b5d7d9bee00cd152d855bdbce2d3041c54e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 10 Oct 2018 08:36:18 -0500 Subject: [PATCH 057/132] wip period_array --- pandas/core/arrays/period.py | 43 ++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 1593f54e2f347..ffbd577cfa81c 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -268,6 +268,7 @@ def _from_ordinals(cls, values, freq=None): @classmethod def _from_periods(cls, periods, freq=None): # type: (np.ndarray[Optional[Period]], Optional[Tick]) -> PeriodArray + periods = np.asarray(periods, dtype=object) freq = freq or libperiod.extract_freq(periods) ordinals = libperiod.extract_ordinals(periods, freq) return cls._from_ordinals(ordinals, freq=freq) @@ -879,8 +880,46 @@ def item(self): # ------------------------------------------------------------------- # Constructor Helpers -def to_period_array(data): - return PeriodArray._complex_new(data, freq=None) +def period_array(data, freq=None): + # type: (Sequence[Optional[Period]], Optional[Tick]) -> PeriodArray + """ + Construct a new PeriodArray from a sequence of Period scalars. + + Parameters + ---------- + data : Sequence of Period objects + A sequence of Period objects. These are required to all have + the same ``freq.`` Missing values can be indicated by ``None`` + or ``pandas.NaT``. + freq : str, Tick, or Offset + The frequency of every element of the array. This can be specified + to avoid inferring the `freq` from `data`. + + Returns + ------- + PeriodArray + + See Also + -------- + PeriodArray + pandas.PeriodIndex + + Examples + -------- + >>> period_array([pd.Period('2017', freq='A'), + ... pd.Period('2018', freq='A')]) + + ['2017', '2018'] + Length: 2, dtype: period[A-DEC] + + >>> period_array([pd.Period('2017', freq='A'), + ... pd.Period('2018', freq='A'), + ... pd.NaT]) + + ['2017', '2018', 'NaT'] + Length: 3, dtype: period[A-DEC] + """ + return PeriodArray._from_periods(data, freq=freq) def dt64arr_to_periodarr(data, freq, tz=None): From ac0536584dcbea0c4a23b44134ba2c935a433486 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 10 Oct 2018 09:32:44 -0500 Subject: [PATCH 058/132] wip period_array --- pandas/core/arrays/__init__.py | 2 +- pandas/core/arrays/period.py | 25 +++++++++++++++++++++---- pandas/tests/arrays/test_period.py | 30 +++++++++++++++++++++++++++++- 3 files changed, 51 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py index dcf018ce6610d..678a55bc7ac55 100644 --- a/pandas/core/arrays/__init__.py +++ b/pandas/core/arrays/__init__.py @@ -4,7 +4,7 @@ from .categorical import Categorical # noqa from .datetimes import DatetimeArrayMixin # noqa from .interval import IntervalArray # noqa -from .period import PeriodArray # noqa +from .period import PeriodArray, period_array # noqa from .timedeltas import TimedeltaArrayMixin # noqa from .integer import ( # noqa IntegerArray, integer_array) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index ffbd577cfa81c..c8c8c8d6be9b5 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -106,8 +106,13 @@ def wrapper(self, other): class PeriodArray(DatetimeLikeArrayMixin, ExtensionArray): """ - Pandas ExtensionArray for Period data. + Pandas ExtensionArray for storing Period data. + Users should use the :func:`period_array` function to create + new instances of PeriodArray. + + Notes + ----- There are two components to a PeriodArray - ordinals : integer ndarray @@ -118,6 +123,11 @@ class PeriodArray(DatetimeLikeArrayMixin, ExtensionArray): The `freq` indicates the span covered by each element of the array. All elements in the PeriodArray have the same `freq`. + + See Also + -------- + period_array : Create a new PeriodArray + pandas.PeriodIndex : Immutable Index for period data """ _attributes = ["freq"] _typ = "periodarray" # ABCPeriodArray @@ -291,7 +301,8 @@ def _from_datetime64(cls, data, freq, tz=None): return cls._simple_new(data, freq=freq) def __repr__(self): - return '\n{}\nLength: {}, dtype: {}'.format( + return '<{}>\n{}\nLength: {}, dtype: {}'.format( + self.__class__.__name__, [str(s) for s in self], len(self), self.dtype @@ -304,6 +315,12 @@ def isna(self): return self._data == iNaT def fillna(self, value=None, method=None, limit=None): + # TODO(#20300) + # To avoid converting to object, we re-implement here with the changes + # 1. Passing `_ndarray_values` to func instead of self.astype(object) + # 2. Re-boxing with `_from_ordinals` + # #20300 should let us do this kind of logic on ExtensionArray.fillna + # and we can use it. from pandas.api.types import is_array_like from pandas.util._validators import validate_fillna_kwargs from pandas.core.missing import pad_1d, backfill_1d @@ -908,14 +925,14 @@ def period_array(data, freq=None): -------- >>> period_array([pd.Period('2017', freq='A'), ... pd.Period('2018', freq='A')]) - + ['2017', '2018'] Length: 2, dtype: period[A-DEC] >>> period_array([pd.Period('2017', freq='A'), ... pd.Period('2018', freq='A'), ... pd.NaT]) - + ['2017', '2018', 'NaT'] Length: 3, dtype: period[A-DEC] """ diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 9157f1654f724..48053e071b53b 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -5,7 +5,7 @@ import pandas.util.testing as tm from pandas._libs.tslibs import iNaT from pandas._libs.tslibs.period import IncompatibleFrequency -from pandas.core.arrays import PeriodArray +from pandas.core.arrays import PeriodArray, period_array @pytest.mark.parametrize('key, value, expected', [ @@ -34,3 +34,31 @@ def test_setitem_raises(): with tm.assert_raises_regex(TypeError, "int"): arr[0] = 1 + + +@pytest.mark.parametrize("data, freq, expected", [ + ([pd.Period("2017", "D")], None, [17167]), + ([pd.Period("2017", "D")], "D", [17167]), + ([2017], "D", [17167]), + (["2017"], "D", [17167]), + ([pd.Period("2017", "D")], pd.tseries.offsets.Day(), [17167]), + ([pd.Period("2017", "D"), None], None, [17167, iNaT]), +]) +def test_to_period_ok(data, freq, expected): + result = period_array(data, freq=freq).values + expected = np.asarray(expected) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("data, freq, msg", [ + ([pd.Period('2017', 'D'), + pd.Period('2017', 'A')], + None, + "Input has different freq"), + ([pd.Period('2017', 'D')], + "A", + "Input has different freq"), +]) +def test_to_period_raises(data, freq, msg): + with tm.assert_raises_regex(IncompatibleFrequency, msg): + period_array(data, freq) From 36ed547019f6903123d062d7f9fa3b2069b48e7d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 10 Oct 2018 10:27:26 -0500 Subject: [PATCH 059/132] order --- pandas/core/arrays/period.py | 409 ++++++++++++++++++----------------- 1 file changed, 206 insertions(+), 203 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index c8c8c8d6be9b5..2b19953bfde08 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -253,6 +253,21 @@ def _complex_new(cls, data=None, ordinal=None, freq=None, start=None, data = ensure_object(data) return cls._from_periods(data, freq=freq) + @classmethod + def _simple_new(cls, values, freq=None, **kwargs): + """ + Values can be any type that can be coerced to Periods. + Ordinals in an ndarray are fastpath-ed to `_from_ordinals` + """ + if not is_integer_dtype(values): + values = np.array(values, copy=False) + if len(values) > 0 and is_float_dtype(values): + raise TypeError("{cls} can't take floats" + .format(cls=cls.__name__)) + return cls(values, freq=freq) + + return cls(values, freq=freq) + @classmethod def _from_sequence(cls, scalars, dtype=None, copy=False): # type: (Sequence[Optional[Period]], Dtype, bool) -> PeriodArray @@ -300,6 +315,118 @@ def _from_datetime64(cls, data, freq, tz=None): data = dt64arr_to_periodarr(data, freq, tz) return cls._simple_new(data, freq=freq) + @classmethod + def _generate_range(cls, start, end, periods, freq, fields): + if freq is not None: + freq = Period._maybe_convert_freq(freq) + + field_count = len(fields) + if com.count_not_none(start, end) > 0: + if field_count > 0: + raise ValueError('Can either instantiate from fields ' + 'or endpoints, but not both') + subarr, freq = _get_ordinal_range(start, end, periods, freq) + elif field_count > 0: + subarr, freq = _range_from_fields(freq=freq, **fields) + else: + raise ValueError('Not enough parameters to construct ' + 'Period range') + + return subarr, freq + + @classmethod + def _concat_same_type(cls, to_concat): + freq = {x.freq for x in to_concat} + assert len(freq) == 1 + freq = list(freq)[0] + values = np.concatenate([x._data for x in to_concat]) + return cls._from_ordinals(values, freq=freq) + + @property + def asi8(self): + return self._ndarray_values.view('i8') + + # -------------------------------------------------------------------- + # Data / Attributes + @property + def nbytes(self): + # TODO(DatetimeArray): remove + return self._data.nbytes + + @cache_readonly + def dtype(self): + return PeriodDtype.construct_from_string(self.freq) + + @property + def _ndarray_values(self): + # Ordinals + return self._data + + @property + def freq(self): + """Return the frequency object if it is set, otherwise None""" + return self._freq + + @freq.setter + def freq(self, value): + msg = ('Setting {cls}.freq has been deprecated and will be ' + 'removed in a future version; use {cls}.asfreq instead. ' + 'The {cls}.freq setter is not guaranteed to work.') + warnings.warn(msg.format(cls='PeriodIndex'), + FutureWarning, stacklevel=3) + self._freq = value + + @property + def flags(self): + """Deprecated""" + # Just here to support Index.flags deprecation. + # could also override PeriodIndex.flags if we don't want a + # version with PeriodArray.flags + return self.values.flags + + @property + def base(self): + return self.values.base + + @property + def data(self): + return self.astype(object).data + + # -------------------------------------------------------------------- + # Vectorized analogues of Period properties + + year = _field_accessor('year', 0, "The year of the period") + month = _field_accessor('month', 3, "The month as January=1, December=12") + day = _field_accessor('day', 4, "The days of the period") + hour = _field_accessor('hour', 5, "The hour of the period") + minute = _field_accessor('minute', 6, "The minute of the period") + second = _field_accessor('second', 7, "The second of the period") + weekofyear = _field_accessor('week', 8, "The week ordinal of the year") + week = weekofyear + dayofweek = _field_accessor('dayofweek', 10, + "The day of the week with Monday=0, Sunday=6") + weekday = dayofweek + dayofyear = day_of_year = _field_accessor('dayofyear', 9, + "The ordinal day of the year") + quarter = _field_accessor('quarter', 2, "The quarter of the date") + qyear = _field_accessor('qyear', 1) + days_in_month = _field_accessor('days_in_month', 11, + "The number of days in the month") + daysinmonth = days_in_month + + @property + def is_leap_year(self): + """ Logical indicating if the date belongs to a leap year """ + return isleapyear_arr(np.asarray(self.year)) + + @property + def start_time(self): + return self.to_timestamp(how='start') + + @property + def end_time(self): + return self.to_timestamp(how='end') + def __repr__(self): return '<{}>\n{}\nLength: {}, dtype: {}'.format( self.__class__.__name__, @@ -311,47 +438,6 @@ def __repr__(self): def __len__(self): return len(self._data) - def isna(self): - return self._data == iNaT - - def fillna(self, value=None, method=None, limit=None): - # TODO(#20300) - # To avoid converting to object, we re-implement here with the changes - # 1. Passing `_ndarray_values` to func instead of self.astype(object) - # 2. Re-boxing with `_from_ordinals` - # #20300 should let us do this kind of logic on ExtensionArray.fillna - # and we can use it. - from pandas.api.types import is_array_like - from pandas.util._validators import validate_fillna_kwargs - from pandas.core.missing import pad_1d, backfill_1d - - if isinstance(value, ABCSeries): - value = value.values - - value, method = validate_fillna_kwargs(value, method) - - mask = self.isna() - - if is_array_like(value): - if len(value) != len(self): - raise ValueError("Length of 'value' does not match. Got ({}) " - " expected {}".format(len(value), len(self))) - value = value[mask] - - if mask.any(): - if method is not None: - func = pad_1d if method == 'pad' else backfill_1d - new_values = func(self._ndarray_values, limit=limit, - mask=mask) - new_values = self._from_ordinals(new_values, freq=self.freq) - else: - # fill with value - new_values = self.copy() - new_values[mask] = value - else: - new_values = self.copy() - return new_values - def __setitem__(self, key, value): from pandas.core.dtypes.missing import isna @@ -406,22 +492,50 @@ def take(self, indices, allow_fill=False, fill_value=None): return self._from_ordinals(new_values, self.freq) - @property - def nbytes(self): - # TODO(DatetimeArray): remove - return self._data.nbytes + def isna(self): + return self._data == iNaT + + def fillna(self, value=None, method=None, limit=None): + # TODO(#20300) + # To avoid converting to object, we re-implement here with the changes + # 1. Passing `_ndarray_values` to func instead of self.astype(object) + # 2. Re-boxing with `_from_ordinals` + # #20300 should let us do this kind of logic on ExtensionArray.fillna + # and we can use it. + from pandas.api.types import is_array_like + from pandas.util._validators import validate_fillna_kwargs + from pandas.core.missing import pad_1d, backfill_1d + + if isinstance(value, ABCSeries): + value = value.values + + value, method = validate_fillna_kwargs(value, method) + + mask = self.isna() + + if is_array_like(value): + if len(value) != len(self): + raise ValueError("Length of 'value' does not match. Got ({}) " + " expected {}".format(len(value), len(self))) + value = value[mask] + + if mask.any(): + if method is not None: + func = pad_1d if method == 'pad' else backfill_1d + new_values = func(self._ndarray_values, limit=limit, + mask=mask) + new_values = self._from_ordinals(new_values, freq=self.freq) + else: + # fill with value + new_values = self.copy() + new_values[mask] = value + else: + new_values = self.copy() + return new_values def copy(self, deep=False): return self._from_ordinals(self._data.copy(), freq=self.freq) - @classmethod - def _concat_same_type(cls, to_concat): - freq = {x.freq for x in to_concat} - assert len(freq) == 1 - freq = list(freq)[0] - values = np.concatenate([x._data for x in to_concat]) - return cls._from_ordinals(values, freq=freq) - def value_counts(self, dropna=False): from pandas.core.algorithms import value_counts from pandas.core.indexes.period import PeriodIndex @@ -439,105 +553,53 @@ def value_counts(self, dropna=False): index=index, name=result.name) - @property - def _box_func(self): - return lambda x: Period._from_ordinal(ordinal=x, freq=self.freq) - - @cache_readonly - def dtype(self): - return PeriodDtype.construct_from_string(self.freq) - - @property - def _ndarray_values(self): - # Ordinals - return self._data - - @property - def asi8(self): - return self._ndarray_values.view('i8') - - @property - def freq(self): - """Return the frequency object if it is set, otherwise None""" - return self._freq - - @freq.setter - def freq(self, value): - msg = ('Setting {cls}.freq has been deprecated and will be ' - 'removed in a future version; use {cls}.asfreq instead. ' - 'The {cls}.freq setter is not guaranteed to work.') - warnings.warn(msg.format(cls='PeriodIndex'), - FutureWarning, stacklevel=3) - self._freq = value - - @classmethod - def _simple_new(cls, values, freq=None, **kwargs): - """ - Values can be any type that can be coerced to Periods. - Ordinals in an ndarray are fastpath-ed to `_from_ordinals` + def shift(self, periods=1): """ - if not is_integer_dtype(values): - values = np.array(values, copy=False) - if len(values) > 0 and is_float_dtype(values): - raise TypeError("{cls} can't take floats" - .format(cls=cls.__name__)) - return cls(values, freq=freq) - - return cls(values, freq=freq) + Shift values by desired number. - @classmethod - def _generate_range(cls, start, end, periods, freq, fields): - if freq is not None: - freq = Period._maybe_convert_freq(freq) + Newly introduced missing values are filled with + ``self.dtype.na_value``. - field_count = len(fields) - if com.count_not_none(start, end) > 0: - if field_count > 0: - raise ValueError('Can either instantiate from fields ' - 'or endpoints, but not both') - subarr, freq = _get_ordinal_range(start, end, periods, freq) - elif field_count > 0: - subarr, freq = _range_from_fields(freq=freq, **fields) - else: - raise ValueError('Not enough parameters to construct ' - 'Period range') + .. versionadded:: 0.24.0 - return subarr, freq + Parameters + ---------- + periods : int, default 1 + The number of periods to shift. Negative values are allowed + for shifting backwards. - # -------------------------------------------------------------------- - # Vectorized analogues of Period properties + Returns + ------- + shifted : PeriodArray + """ + # TODO(DatetimeArray): remove from DatetimeLikeArrayMixin + # The semantics for Index.shift differ from EA.shift + # then just call super. + return ExtensionArray.shift(self, periods) - year = _field_accessor('year', 0, "The year of the period") - month = _field_accessor('month', 3, "The month as January=1, December=12") - day = _field_accessor('day', 4, "The days of the period") - hour = _field_accessor('hour', 5, "The hour of the period") - minute = _field_accessor('minute', 6, "The minute of the period") - second = _field_accessor('second', 7, "The second of the period") - weekofyear = _field_accessor('week', 8, "The week ordinal of the year") - week = weekofyear - dayofweek = _field_accessor('dayofweek', 10, - "The day of the week with Monday=0, Sunday=6") - weekday = dayofweek - dayofyear = day_of_year = _field_accessor('dayofyear', 9, - "The ordinal day of the year") - quarter = _field_accessor('quarter', 2, "The quarter of the date") - qyear = _field_accessor('qyear', 1) - days_in_month = _field_accessor('days_in_month', 11, - "The number of days in the month") - daysinmonth = days_in_month + def _time_shift(self, n, freq=None): + """ + Shift each value by `periods`. - @property - def is_leap_year(self): - """ Logical indicating if the date belongs to a leap year """ - return isleapyear_arr(np.asarray(self.year)) + Note this is different from ExtensionArray.shift, which + shifts the *position* of each element, padding the end with + missing values. - @property - def start_time(self): - return self.to_timestamp(how='start') + Parameters + ---------- + periods : int + Number of periods to shift by. + freq : pandas.DateOffset, pandas.Timedelta, or string + Frequency increment to shift by. + """ + values = self.values + n * self.freq.n + if self.hasnans: + values[self._isnan] = iNaT + return self._simple_new(values, freq=self.freq) @property - def end_time(self): - return self.to_timestamp(how='end') + def _box_func(self): + return lambda x: Period._from_ordinal(ordinal=x, freq=self.freq) def asfreq(self, freq=None, how='E'): """ @@ -642,7 +704,6 @@ def to_timestamp(self, freq=None, how='start'): # ------------------------------------------------------------------ # Arithmetic Methods - _create_comparison_method = classmethod(_period_array_cmp) def _sub_datelike(self, other): @@ -693,50 +754,6 @@ def _add_delta(self, other): ordinal_delta = self._maybe_convert_timedelta(other) return self._time_shift(ordinal_delta) - def shift(self, periods=1): - """ - Shift values by desired number. - - Newly introduced missing values are filled with - ``self.dtype.na_value``. - - .. versionadded:: 0.24.0 - - Parameters - ---------- - periods : int, default 1 - The number of periods to shift. Negative values are allowed - for shifting backwards. - - Returns - ------- - shifted : PeriodArray - """ - # TODO(DatetimeArray): remove from DatetimeLikeArrayMixin - # The semantics for Index.shift differ from EA.shift - # then just call super. - return ExtensionArray.shift(self, periods) - - def _time_shift(self, n, freq=None): - """ - Shift each value by `periods`. - - Note this is different from ExtensionArray.shift, which - shifts the *position* of each element, padding the end with - missing values. - - Parameters - ---------- - periods : int - Number of periods to shift by. - freq : pandas.DateOffset, pandas.Timedelta, or string - Frequency increment to shift by. - """ - values = self.values + n * self.freq.n - if self.hasnans: - values[self._isnan] = iNaT - return self._simple_new(values, freq=self.freq) - def _maybe_convert_timedelta(self, other): """ Convert timedelta-like input to an integer multiple of self.freq @@ -784,6 +801,8 @@ def _maybe_convert_timedelta(self, other): raise IncompatibleFrequency(msg.format(cls=type(self).__name__, freqstr=self.freqstr)) + # ------------------------------------------------------------------ + # Formatting def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): # TODO(DatetimeArray): remove @@ -866,22 +885,6 @@ def _box_values_as_index(self): from pandas.core.index import Index return Index(self._box_values(self.asi8), dtype=object) - @property - def flags(self): - """Deprecated""" - # Just here to support Index.flags deprecation. - # could also override PeriodIndex.flags if we don't want a - # version with PeriodArray.flags - return self.values.flags - - @property - def base(self): - return self.values.base - - @property - def data(self): - return self.astype(object).data - def item(self): if len(self) == 1: return Period._from_ordinal(self.values[0], self.freq) From 4652ca74a352996c92d2f0c7100114c5810e4f4d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 10 Oct 2018 10:52:18 -0500 Subject: [PATCH 060/132] sort order --- pandas/core/arrays/period.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 2b19953bfde08..f3d2eb087a717 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -545,7 +545,7 @@ def value_counts(self, dropna=False): else: values = self._data - result = value_counts(values) + result = value_counts(values, sort=False) index = PeriodIndex._from_ordinals(result.index, name=result.index.name, freq=self.freq) From a047a1be1478c9d4ea7db700951109f12dca2cf3 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 10 Oct 2018 11:30:54 -0500 Subject: [PATCH 061/132] test for hashing --- pandas/core/util/hashing.py | 6 +++++- pandas/tests/extension/base/methods.py | 4 ++++ pandas/tests/extension/json/test_json.py | 3 +++ pandas/tests/series/test_operators.py | 6 ++++-- 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index e62d70847437c..dc0dabaaa3dcf 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -12,6 +12,7 @@ ABCDataFrame) from pandas.core.dtypes.common import ( is_categorical_dtype, is_list_like) +from pandas.core.dtypes.common import is_extension_array_dtype from pandas.core.dtypes.missing import isna from pandas.core.dtypes.cast import infer_dtype_from_scalar @@ -265,10 +266,13 @@ def hash_array(vals, encoding='utf8', hash_key=None, categorize=True): # numpy if categorical is a subdtype of complex, as it will choke). if is_categorical_dtype(dtype): return _hash_categorical(vals, encoding, hash_key) + elif is_extension_array_dtype(dtype): + vals = vals.astype(object) + dtype = vals.dtype # we'll be working with everything as 64-bit values, so handle this # 128-bit value early - elif np.issubdtype(dtype, np.complex128): + if np.issubdtype(dtype, np.complex128): return hash_array(vals.real) + 23 * hash_array(vals.imag) # First, turn whatever array this is into unsigned 64-bit ints, if we can diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 4e7886dd2e943..371428d102539 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -164,3 +164,7 @@ def test_container_shift(self, data, frame, periods, indices): compare = self.assert_series_equal compare(result, expected) + + def test_hashing_works(self, data): + pd.util.hash_pandas_object(pd.Series(data)) + pd.util.hash_pandas_object(pd.DataFrame({"A": data})) diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 115afdcc99f2b..f679697b71768 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -199,6 +199,9 @@ def test_combine_le(self, data_repeated): def test_combine_add(self, data_repeated): pass + @unhashable + def test_hashing_works(self, data): + pass class TestCasting(BaseJSON, base.BaseCastingTests): @pytest.mark.skip(reason="failing on np.array(self, dtype=str)") diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 55e3dfde3ceb7..77213d2b516c8 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -15,6 +15,7 @@ from pandas.core.indexes.datetimes import Timestamp import pandas.core.nanops as nanops from pandas.core import ops +from pandas.core.arrays import period_array from pandas.compat import range from pandas import compat @@ -553,8 +554,9 @@ def test_unequal_categorical_comparison_raises_type_error(self): ([pd.Timedelta('1 days'), NaT, pd.Timedelta('3 days')], [NaT, NaT, pd.Timedelta('3 days')]), - ([pd.Period('2011-01', freq='M'), NaT, pd.Period('2011-03', freq='M')], - [NaT, NaT, pd.Period('2011-03', freq='M')])]) + (period_array([pd.Period('2011-01', freq='M'), NaT, + pd.Period('2011-03', freq='M')]), + period_array([NaT, NaT, pd.Period('2011-03', freq='M')]))]) @pytest.mark.parametrize('reverse', [True, False]) @pytest.mark.parametrize('box', [Series, Index]) @pytest.mark.parametrize('dtype', [None, object]) From a4a30d790a503a4b2e54e95f23d8cc482ee1a416 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 10 Oct 2018 13:53:02 -0500 Subject: [PATCH 062/132] update --- pandas/core/arrays/period.py | 17 ----------------- pandas/core/indexes/period.py | 12 ++++++++++-- pandas/tests/reshape/merge/test_merge.py | 13 ++++++++----- 3 files changed, 18 insertions(+), 24 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index f3d2eb087a717..923dda3c3d546 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -367,15 +367,6 @@ def freq(self): """Return the frequency object if it is set, otherwise None""" return self._freq - @freq.setter - def freq(self, value): - msg = ('Setting {cls}.freq has been deprecated and will be ' - 'removed in a future version; use {cls}.asfreq instead. ' - 'The {cls}.freq setter is not guaranteed to work.') - warnings.warn(msg.format(cls='PeriodIndex'), - FutureWarning, stacklevel=3) - self._freq = value - @property def flags(self): """Deprecated""" @@ -877,14 +868,6 @@ def astype(self, dtype, copy=True): else: return np.asarray(self, dtype=dtype) - def _box_values_as_index(self): - """ - return object Index which contains boxed values - """ - # This is implemented just for astype - from pandas.core.index import Index - return Index(self._box_values(self.asi8), dtype=object) - def item(self): if len(self) == 1: return Period._from_ordinal(self.values[0], self.freq) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index b9343144e87fe..843219a55e210 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -224,7 +224,15 @@ def freq(self): @freq.setter def freq(self, value): - self._data.freq = value + value = Period._maybe_convert_freq(value) + msg = ('Setting {cls}.freq has been deprecated and will be ' + 'removed in a future version; use {cls}.asfreq instead. ' + 'The {cls}.freq setter is not guaranteed to work.') + warnings.warn(msg.format(cls=type(self).__name__), + FutureWarning, stacklevel=2) + # PeriodArray._freq isn't actually mutable. We set the private _freq + # here, but people shouldn't be doing this anyway. + self._data._freq = value # ------------------------------------------------------------------------ # Index Constructors @@ -331,7 +339,7 @@ def to_timestamp(self, freq=None, how='start'): name=self.name, freq=result.freq) - def _format_native_types(self, na_rep='', quoting=None, **kwargs): + def _format_native_types(self, na_rep=u'NaT', quoting=None, **kwargs): # just dispatch, return ndarray return self._data._format_native_types(na_rep=na_rep, quoting=quoting, diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 50ef622a4147f..a171b47ccbe2f 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -17,6 +17,7 @@ Series, UInt64Index) from pandas.api.types import CategoricalDtype as CDT from pandas.compat import lrange, lzip +from pandas.core.arrays import period_array from pandas.core.dtypes.common import is_categorical_dtype, is_object_dtype from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.reshape.concat import concat @@ -661,13 +662,15 @@ def test_merge_on_periods(self): exp_x = pd.period_range('20151010', periods=2, freq='D') exp_y = pd.period_range('20151011', periods=2, freq='D') - expected = DataFrame({'key': [1, 2, 3], - 'value_x': list(exp_x) + [pd.NaT], - 'value_y': [pd.NaT] + list(exp_y)}) + expected = DataFrame({ + 'key': [1, 2, 3], + 'value_x': period_array(list(exp_x) + [pd.NaT], freq="D"), + 'value_y': period_array([pd.NaT] + list(exp_y), freq="D"), + }) result = pd.merge(left, right, on='key', how='outer') assert_frame_equal(result, expected) - assert result['value_x'].dtype == 'object' - assert result['value_y'].dtype == 'object' + assert result['value_x'].dtype == 'Period[D]' + assert result['value_y'].dtype == 'Period[D]' def test_indicator(self): # PR #10054. xref #7412 and closes #8790. From 1441ae6e88e33aff8d9a0f67421f1e11a13dfba9 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 10 Oct 2018 13:59:54 -0500 Subject: [PATCH 063/132] lint --- pandas/core/arrays/period.py | 1 - pandas/core/indexes/accessors.py | 2 +- pandas/core/indexes/period.py | 10 ++-------- pandas/tests/extension/json/test_json.py | 1 + 4 files changed, 4 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 923dda3c3d546..212712917a297 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- from datetime import timedelta -import warnings import numpy as np diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index f6b3e7ea71d05..48d8d74c3ca23 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -15,7 +15,7 @@ from pandas.core.accessor import PandasDelegate, delegate_names from pandas.core.base import NoNewAttributesMixin, PandasObject from pandas.core.indexes.datetimes import DatetimeIndex -from pandas.core.indexes.period import PeriodIndex, PeriodArray +from pandas.core.indexes.period import PeriodArray from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.algorithms import take_1d diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 843219a55e210..ccf44ed1ce4c2 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -12,11 +12,8 @@ is_datetime64_any_dtype, is_bool_dtype, pandas_dtype, - ensure_object ) -from pandas.tseries.frequencies import get_freq_code as _gfc - from pandas.core.accessor import PandasDelegate, delegate_names from pandas.core.indexes.datetimes import DatetimeIndex, Int64Index, Index from pandas.core.indexes.datetimelike import ( @@ -27,12 +24,10 @@ from pandas._libs import tslib, index as libindex from pandas._libs.tslibs.period import (Period, IncompatibleFrequency, DIFFERENT_FREQ_INDEX) -from pandas._libs.tslibs import resolution from pandas._libs.tslibs import resolution, period -from pandas.core.arrays import datetimelike as dtl -from pandas.core.arrays.period import PeriodArray, dt64arr_to_periodarr +from pandas.core.arrays.period import PeriodArray from pandas.core.base import _shared_docs from pandas.core.indexes.base import _index_shared_docs, ensure_index @@ -98,8 +93,7 @@ def _delegate_method(self, name, *args, **kwargs): [x for x in PeriodArray._datetimelike_methods if x not in {"asfreq", "to_timestamp"}], typ="method", - overwrite=True, -) + overwrite=True) class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index, PeriodDelegateMixin): """ diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index f679697b71768..d4b4385c8312a 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -203,6 +203,7 @@ def test_combine_add(self, data_repeated): def test_hashing_works(self, data): pass + class TestCasting(BaseJSON, base.BaseCastingTests): @pytest.mark.skip(reason="failing on np.array(self, dtype=str)") def test_astype_str(self): From 8003808aa8540efb96a78e0d6f66a989b0bd3155 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 10 Oct 2018 14:18:57 -0500 Subject: [PATCH 064/132] boxing --- pandas/core/arrays/period.py | 3 ++- pandas/tests/scalar/period/test_period.py | 9 +++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 212712917a297..38484ec9a8dd3 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -562,7 +562,7 @@ def shift(self, periods=1): ------- shifted : PeriodArray """ - # TODO(DatetimeArray): remove from DatetimeLikeArrayMixin + # TODO(DatetimeArray): remove # The semantics for Index.shift differ from EA.shift # then just call super. return ExtensionArray.shift(self, periods) @@ -589,6 +589,7 @@ def _time_shift(self, n, freq=None): @property def _box_func(self): + # Used in DatelikeArray.__iter__ return lambda x: Period._from_ordinal(ordinal=x, freq=self.freq) def asfreq(self, freq=None, how='E'): diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index c4c9a5f8452de..33d6c2db5d895 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -9,6 +9,7 @@ import pandas.util.testing as tm import pandas.core.indexes.period as period from pandas.compat import text_type, iteritems +from pandas.core.arrays import period_array from pandas.compat.numpy import np_datetime64_compat from pandas._libs import tslib @@ -1040,7 +1041,7 @@ def test_add_raises(self): with tm.assert_raises_regex(TypeError, msg): dt1 + dt2 - boxes = [lambda x: x, lambda x: pd.Series([x]), lambda x: pd.Index([x])] + boxes = [lambda x: x[0], pd.Series, pd.Index] ids = ['identity', 'Series', 'Index'] @pytest.mark.parametrize('lbox', boxes, ids=ids) @@ -1056,13 +1057,13 @@ def test_add_timestamp_raises(self, rbox, lbox): r"can only operate on a|incompatible type|" r"ufunc add cannot use operands") with tm.assert_raises_regex(TypeError, msg): - lbox(ts) + rbox(per) + lbox([ts]) + rbox(period_array([per])) with tm.assert_raises_regex(TypeError, msg): - lbox(per) + rbox(ts) + lbox(period_array([per])) + rbox([ts]) with tm.assert_raises_regex(TypeError, msg): - lbox(per) + rbox(per) + lbox([per]) + rbox([period_array(per)]) def test_sub(self): dt1 = Period('2011-01-01', freq='D') From 5f4375309cec10bce27d89fbbcdb47f885cd2fc5 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 10 Oct 2018 14:32:16 -0500 Subject: [PATCH 065/132] fix fixtures --- pandas/tests/arrays/test_datetimelike.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index a2e77da07ca5d..d30bacea4e8a8 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -156,7 +156,7 @@ def test_to_timestamp(self, how, period_index): # an EA-specific tm.assert_ function tm.assert_index_equal(pd.Index(result), pd.Index(expected)) - @pytest.mark.parametrize('propname', pd.PeriodIndex._bool_ops) + @pytest.mark.parametrize('propname', pd.core.arrays.PeriodArray._bool_ops) def test_bool_properties(self, period_index, propname): # in this case _bool_ops is just `is_leap_year` pi = period_index @@ -167,7 +167,8 @@ def test_bool_properties(self, period_index, propname): tm.assert_numpy_array_equal(result, expected) - @pytest.mark.parametrize('propname', pd.PeriodIndex._field_ops) + @pytest.mark.parametrize('propname', + pd.core.arrays.PeriodArray._field_ops) def test_int_properties(self, period_index, propname): pi = period_index arr = PeriodArray(pi.values) From 1c13d0f33d24b812b7d47eb0ebe6aba892843cdf Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 10 Oct 2018 15:12:46 -0500 Subject: [PATCH 066/132] infer --- pandas/core/reshape/reshape.py | 13 +++++++++++-- pandas/core/series.py | 15 ++++++++++++--- pandas/tests/frame/test_constructors.py | 5 ++--- pandas/tests/series/test_apply.py | 8 ++++---- pandas/tests/series/test_constructors.py | 21 +++++++++++++++++++-- pandas/tests/series/test_datetime_values.py | 15 ++++++++------- pandas/tests/series/test_period.py | 15 ++++++++------- 7 files changed, 64 insertions(+), 28 deletions(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 50f6e310705d7..bd984ab303aef 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -10,7 +10,9 @@ from pandas.core.dtypes.common import ( ensure_platform_int, is_list_like, is_bool_dtype, - needs_i8_conversion, is_sparse, is_object_dtype) + needs_i8_conversion, is_sparse, is_object_dtype, + is_period_dtype +) from pandas.core.dtypes.cast import maybe_promote from pandas.core.dtypes.missing import notna @@ -21,7 +23,7 @@ from pandas.core.sparse.array import SparseArray from pandas._libs.sparse import IntIndex -from pandas.core.arrays import Categorical +from pandas.core.arrays import Categorical, period_array from pandas.core.arrays.categorical import _factorize_from_iterable from pandas.core.sorting import (get_group_index, get_compressed_ids, compress_group_index, decons_obs_group_ids) @@ -88,6 +90,7 @@ def __init__(self, values, index, level=-1, value_columns=None, self.is_categorical = None self.is_sparse = is_sparse(values) + self.is_period = is_period_dtype(values) if values.ndim == 1: if isinstance(values, Categorical): self.is_categorical = values @@ -96,6 +99,9 @@ def __init__(self, values, index, level=-1, value_columns=None, # XXX: Makes SparseArray *dense*, but it's supposedly # a single column at a time, so it's "doable" values = values.values + elif self.is_period: + # XXX: let's solve this in general. + values = values.astype(object) values = values[:, np.newaxis] self.values = values self.value_columns = value_columns @@ -185,6 +191,9 @@ def get_result(self): values = [Categorical(values[:, i], categories=categories, ordered=ordered) for i in range(values.shape[-1])] + elif self.is_period: + # XXX: solve this in general. + values = [period_array(v) for v in values] return self.constructor(values, index=index, columns=columns) diff --git a/pandas/core/series.py b/pandas/core/series.py index a613b22ea9046..30b11bf7e645d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -13,7 +13,7 @@ import numpy.ma as ma from pandas.core.accessor import CachedAccessor -from pandas.core.arrays import ExtensionArray +from pandas.core.arrays import ExtensionArray, period_array from pandas.core.dtypes.common import ( is_categorical_dtype, is_string_like, @@ -154,8 +154,9 @@ class Series(base.IndexOpsMixin, generic.NDFrame): RangeIndex (0, 1, 2, ..., n) if not provided. If both a dict and index sequence are used, the index will override the keys found in the dict. - dtype : numpy.dtype or None - If None, dtype will be inferred + dtype : str, numpy.dtype, or ExtensionDtype, optional + dtype for the output Series. If not specified, this will be + inferred from `data`. copy : boolean, default False Copy input data """ @@ -4362,4 +4363,12 @@ def _try_cast(arr, take_fast_path): data = np.array(data, dtype=dtype, copy=False) subarr = np.array(data, dtype=object, copy=copy) + if subarr.dtype == 'object' and dtype != 'object': + inferred = lib.infer_dtype(subarr) + if inferred == 'period': + try: + subarr = period_array(subarr) + except tslibs.period.IncompatibleFrequency: + pass + return subarr diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index c1dce74073800..e2a49b93c51a3 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -588,9 +588,8 @@ def test_constructor_period(self): # list of periods df = pd.DataFrame({'a': a.astype(object).tolist(), 'b': b.astype(object).tolist()}) - # TODO: should we infer these? - assert df['a'].dtype == 'object' - assert df['b'].dtype == 'object' + assert df['a'].dtype == a.dtype + assert df['b'].dtype == b.dtype def test_nested_dict_frame_constructor(self): rng = pd.period_range('1/1/2000', periods=5) diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index 20215279cf031..509cd8d0f3241 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -119,11 +119,11 @@ def test_apply_box(self): exp = pd.Series(['Timedelta_1', 'Timedelta_2']) tm.assert_series_equal(res, exp) - # period (object dtype, not boxed) + # period vals = [pd.Period('2011-01-01', freq='M'), pd.Period('2011-01-02', freq='M')] s = pd.Series(vals) - assert s.dtype == 'object' + assert s.dtype == 'Period[M]' res = s.apply(lambda x: '{0}_{1}'.format(x.__class__.__name__, x.freqstr)) exp = pd.Series(['Period_M', 'Period_M']) @@ -599,11 +599,11 @@ def test_map_box(self): exp = pd.Series(['Timedelta_1', 'Timedelta_2']) tm.assert_series_equal(res, exp) - # period (object dtype, not boxed) + # period vals = [pd.Period('2011-01-01', freq='M'), pd.Period('2011-01-02', freq='M')] s = pd.Series(vals) - assert s.dtype == 'object' + assert s.dtype == 'Period[M]' res = s.map(lambda x: '{0}_{1}'.format(x.__class__.__name__, x.freqstr)) exp = pd.Series(['Period_M', 'Period_M']) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 09f7235e3bf81..83990bddcee5d 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -18,7 +18,7 @@ from pandas import (Index, Series, isna, date_range, Timestamp, NaT, period_range, timedelta_range, MultiIndex, IntervalIndex, Categorical, DataFrame) - +from pandas.core.arrays import period_array from pandas._libs import lib from pandas._libs.tslib import iNaT @@ -856,6 +856,23 @@ def test_construction_consistency(self): result = Series(s.values, dtype=s.dtype) tm.assert_series_equal(result, s) + def test_constructor_infer_period(self): + data = [pd.Period('2000', 'D'), pd.Period('2001', 'D'), None] + result = pd.Series(data) + expected = pd.Series(period_array(data)) + tm.assert_series_equal(result, expected) + assert result.dtype == 'Period[D]' + + data = np.asarray(data, dtype=object) + tm.assert_series_equal(result, expected) + assert result.dtype == 'Period[D]' + + def test_constructor_period_incompatible_frequency(self): + data = [pd.Period('2000', 'D'), pd.Period('2001', 'A')] + result = pd.Series(data) + assert result.dtype == object + assert result.tolist() == data + def test_constructor_periodindex(self): # GH7932 # converting a PeriodIndex when put in a Series @@ -864,7 +881,7 @@ def test_constructor_periodindex(self): s = Series(pi) assert s.dtype == 'Period[D]' expected = Series(pi.astype(object)) - assert_series_equal(s.astype(object), expected) + assert_series_equal(s, expected) def test_constructor_dict(self): d = {'a': 0., 'b': 1., 'c': 2.} diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 3a93ceddd536d..736af3531596a 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -196,6 +196,14 @@ def get_dir(s): tm.assert_almost_equal( results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))) + s = Series(period_range('20130101', periods=5, + freq='D', name='xxx').astype(object)) + results = get_dir(s) + tm.assert_almost_equal( + results, list(sorted(set(ok_for_period + ok_for_period_methods)))) + + + # 11295 # ambiguous time error on the conversions s = Series(pd.date_range('2015-01-01', '2016-01-01', @@ -219,13 +227,6 @@ def get_dir(s): with pytest.raises(com.SettingWithCopyError): s.dt.hour[0] = 5 - # XXX: Series.dt no longer works for Series[object[Period]]. - # I think that's OK, but want a +1. - s = Series(period_range('20130101', periods=5, - freq='D', name='xxx').astype(object)) - with tm.assert_raises_regex(AttributeError, "Can only use .dt"): - get_dir(s) - @pytest.mark.parametrize('method, dates', [ ['round', ['2012-01-02', '2012-01-02', '2012-01-01']], ['floor', ['2012-01-01', '2012-01-01', '2012-01-01']], diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index addaa6456d6dc..d0bd1ee0413f0 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -19,11 +19,11 @@ def setup_method(self, method): def test_auto_conversion(self): series = Series(list(period_range('2000-01-01', periods=10, freq='D'))) - assert series.dtype == 'object' + assert series.dtype == 'Period[D]' series = pd.Series([pd.Period('2011-01-01', freq='D'), pd.Period('2011-02-01', freq='D')]) - assert series.dtype == 'object' + assert series.dtype == 'Period[D]' def test_getitem(self): assert self.series[1] == pd.Period('2000-01-02', freq='D') @@ -51,12 +51,13 @@ def test_fillna(self): exp = Series([pd.Period('2011-01', freq='M'), pd.Period('2012-01', freq='M')]) tm.assert_series_equal(res, exp) - assert res.dtype == 'object' + assert res.dtype == 'Period[M]' - res = s.fillna('XXX') - exp = Series([pd.Period('2011-01', freq='M'), 'XXX']) - tm.assert_series_equal(res, exp) - assert res.dtype == 'object' + # We don't support upcasting to object on fillna. + # res = s.fillna('XXX') + # exp = Series([pd.Period('2011-01', freq='M'), 'XXX']) + # tm.assert_series_equal(res, exp) + # assert res.dtype == 'object' def test_dropna(self): # GH 13737 From bae6b3d7ef4a54fe419bb97bfb9688ee46b9e1c1 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 10 Oct 2018 16:37:42 -0500 Subject: [PATCH 067/132] Remove seemingly unreachable code --- pandas/core/indexes/base.py | 7 +------ pandas/core/indexes/datetimelike.py | 2 +- pandas/core/indexes/period.py | 27 +-------------------------- 3 files changed, 3 insertions(+), 33 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d0dbe76547e75..6eb06edb8daa2 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -306,12 +306,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, (dtype is not None and is_period_dtype(dtype))): from pandas import PeriodIndex result = PeriodIndex(data, copy=copy, name=name, **kwargs) - if (dtype is not None and - not is_period_dtype(dtype) and - _o_dtype == dtype): - return Index(result.to_pytimedelta(), dtype=_o_dtype) - else: - return result + return result # extension dtype elif is_extension_array_dtype(data) or is_extension_array_dtype(dtype): diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index e26d1fe11d0fb..9aaed0d5d2569 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -690,6 +690,7 @@ def _concat_same_dtype(self, to_concat, name): return self._simple_new(new_data, **attribs) def _maybe_box_as_values(self, values, **attribs): + # TODO(DatetimeArray): remove return values def astype(self, dtype, copy=True): @@ -728,7 +729,6 @@ def _ensure_datetimelike_to_i8(other, to_utc=False): return iNaT elif isinstance(other, (PeriodArray, ABCIndexClass)): # convert tz if needed - # TODO: Ensure PeriodArray.tz_localize if getattr(other, 'tz', None) is not None: if to_utc: other = other.tz_convert('UTC') diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index ccf44ed1ce4c2..7bdd5795d4e0c 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -175,8 +175,6 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, """ _typ = 'periodindex' _attributes = ['name', 'freq'] - # see hack in arrays/datetimelike.py make_comparison_op - # _wrap_cmp_method = False # define my properties & methods for delegation _is_numeric_dtype = False @@ -308,6 +306,7 @@ def _maybe_box_as_values(self, values, **attribs): and Datetime/TimedeltaIndex. Once these are all backed by an ExtensionArray, this can be removed """ + # TODO(DatetimeArray): remove freq = attribs['freq'] return PeriodArray._from_ordinals(values, freq=freq) @@ -530,30 +529,6 @@ def is_full(self): values = self.asi8 return ((values[1:] - values[:-1]) < 2).all() - # # year = _wrap_field_accessor('year') - # month = _wrap_field_accessor('month') - # day = _wrap_field_accessor('day') - # hour = _wrap_field_accessor('hour') - # minute = _wrap_field_accessor('minute') - # second = _wrap_field_accessor('second') - # weekofyear = _wrap_field_accessor('week') - # week = weekofyear - # dayofweek = _wrap_field_accessor('dayofweek') - # weekday = dayofweek - # dayofyear = day_of_year = _wrap_field_accessor('dayofyear') - # quarter = _wrap_field_accessor('quarter') - # qyear = _wrap_field_accessor('qyear') - # days_in_month = _wrap_field_accessor('days_in_month') - # daysinmonth = days_in_month - # - # @property - # def start_time(self): - # return self.to_timestamp(how='start') - # - # @property - # def end_time(self): - # return self.to_timestamp(how='end') - def _mpl_repr(self): # how to represent ourselves to matplotlib return self.astype(object).values From f422cf0551ca8f6d4421f3aff7a51cfe489bfc56 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 10 Oct 2018 16:53:44 -0500 Subject: [PATCH 068/132] lint --- pandas/tests/series/test_datetime_values.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 736af3531596a..9bbc191a4adf5 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -202,8 +202,6 @@ def get_dir(s): tm.assert_almost_equal( results, list(sorted(set(ok_for_period + ok_for_period_methods)))) - - # 11295 # ambiguous time error on the conversions s = Series(pd.date_range('2015-01-01', '2016-01-01', From 0229d74fe4161035eda0f01e9b82ea94295a9be8 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 12 Oct 2018 12:29:35 -0500 Subject: [PATCH 069/132] wip --- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/core/arrays/period.py | 26 ++++++++++++++------------ 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index dd2ce3d00b7a2..88dbe6a0670ec 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -160,7 +160,7 @@ And for periods: Previously, these would be cast to a NumPy array with object dtype. In general, this should result in better performance when storing an array of intervals or periods -in a :class:`Series`. +in a :class:`Series` or column of a :class:`DataFrame`. Note that the ``.values`` of a ``Series`` containing one of these types is no longer a NumPy array, but rather an ``ExtensionArray``: diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 38484ec9a8dd3..b3ecabb703621 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -145,19 +145,16 @@ class PeriodArray(DatetimeLikeArrayMixin, ExtensionArray): # -------------------------------------------------------------------- # Constructors def __init__(self, values, freq=None): - # type: (np.ndarray[np.int64], Union[str, Tick]) -> None - # TODO: constructor discussion. The type above doesn't match what - # we handle right now (values can be PeriodArray or PeriodIndex + # type: (Union[PeriodArray, np.ndarray], Union[str, Tick]) -> None if isinstance(values, type(self)): values, freq = values._data, values.freq - elif isinstance(values, ABCPeriodIndex): - values, freq = values._ndarray_values, values.freq values = np.array(values, dtype='int64', copy=False) self._data = values if freq is None: raise ValueError('freq is not specified and cannot be inferred') - self._freq = Period._maybe_convert_freq(freq) + freq = Period._maybe_convert_freq(freq) + self._dtype = PeriodDtype(freq) @classmethod def _complex_new(cls, data=None, ordinal=None, freq=None, start=None, @@ -271,14 +268,19 @@ def _simple_new(cls, values, freq=None, **kwargs): def _from_sequence(cls, scalars, dtype=None, copy=False): # type: (Sequence[Optional[Period]], Dtype, bool) -> PeriodArray if dtype: - dtype = dtype.freq + freq = dtype.freq + else: + freq = None scalars = np.asarray(scalars, dtype=object) - return cls._from_periods(scalars, freq=dtype) + return cls._from_periods(scalars, freq=freq) + + def _values_for_factorize(self): + return self.values, iNaT @classmethod def _from_factorized(cls, values, original): # type: (Sequence[Optional[Period]], PeriodArray) -> PeriodArray - return cls._from_periods(values, freq=original.freq) + return cls._simple_new(values, freq=original.freq) @classmethod def _from_ordinals(cls, values, freq=None): @@ -354,7 +356,7 @@ def nbytes(self): @cache_readonly def dtype(self): - return PeriodDtype.construct_from_string(self.freq) + return self._dtype @property def _ndarray_values(self): @@ -363,8 +365,8 @@ def _ndarray_values(self): @property def freq(self): - """Return the frequency object if it is set, otherwise None""" - return self._freq + """Return the frequency object for this PeriodArray.""" + return self.dtype.freq @property def flags(self): From 29085e1731fbd6821a7e8e9fb3092994ce4c1fb7 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 12 Oct 2018 12:52:12 -0500 Subject: [PATCH 070/132] Updates for master --- pandas/tests/extension/test_period.py | 18 +++++++++++++++--- pandas/tests/series/test_datetime_values.py | 2 +- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index bf625c13a894c..7842937dc7e9a 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -79,18 +79,30 @@ def test_no_values_attribute(self, data): class TestArithmeticOps(BasePeriodTests, base.BaseArithmeticOpsTests): + implements = {'__sub__', '__rsub__'} def test_arith_series_with_scalar(self, data, all_arithmetic_operators): # we implement substitution... - op_name = all_arithmetic_operators - if op_name in ('__sub__', '__rsub__'): + if all_arithmetic_operators in self.implements: s = pd.Series(data) - self.check_opname(s, op_name, s.iloc[0], exc=None) + self.check_opname(s, all_arithmetic_operators, s.iloc[0], + exc=None) else: # ... but not the rest. super().test_arith_series_with_scalar(data, all_arithmetic_operators) + def test_arith_series_with_array(self, data, all_arithmetic_operators): + if all_arithmetic_operators in self.implements: + s = pd.Series(data) + self.check_opname(s, all_arithmetic_operators, s.iloc[0], + exc=None) + else: + # ... but not the rest. + super().test_arith_series_with_scalar(data, + all_arithmetic_operators) + + def _check_divmod_op(self, s, op, other, exc=NotImplementedError): super(TestArithmeticOps, self)._check_divmod_op( s, op, other, exc=TypeError diff --git a/pandas/tests/series/test_datetime_values.py b/pandas/tests/series/test_datetime_values.py index 9bbc191a4adf5..7f8bd375cb1a4 100644 --- a/pandas/tests/series/test_datetime_values.py +++ b/pandas/tests/series/test_datetime_values.py @@ -197,7 +197,7 @@ def get_dir(s): results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))) s = Series(period_range('20130101', periods=5, - freq='D', name='xxx').astype(object)) + freq='D', name='xxx').astype(object)) results = get_dir(s) tm.assert_almost_equal( results, list(sorted(set(ok_for_period + ok_for_period_methods)))) From 00ffddf7989ef4e53bf421352f84b51a460e6f15 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 12 Oct 2018 13:28:21 -0500 Subject: [PATCH 071/132] simplify --- pandas/tests/test_multilevel.py | 7 ++++--- pandas/tseries/frequencies.py | 1 - 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 801656a3dee5e..35fdce2e48b81 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -16,7 +16,7 @@ from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype import pandas.core.common as com import pandas.util.testing as tm -from pandas.core.arrays import PeriodArray +from pandas.core.arrays import period_array from pandas.compat import (range, lrange, StringIO, lzip, u, product as cart_product, zip) import pandas as pd @@ -2320,10 +2320,11 @@ def test_reset_index_period(self): df = DataFrame(np.arange(9, dtype='int64').reshape(-1, 1), index=idx, columns=['a']) expected = DataFrame({ - 'month': PeriodArray._from_periods(np.array( + 'month': period_array( [pd.Period('2013-01', freq='M')] * 3 + [pd.Period('2013-02', freq='M')] * 3 + - [pd.Period('2013-03', freq='M')] * 3, dtype=object)), + [pd.Period('2013-03', freq='M')] * 3 + ), 'feature': ['a', 'b', 'c'] * 3, 'a': np.arange(9, dtype='int64') }, columns=['month', 'feature', 'a']) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 22c7447768fff..d6e4824575468 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -233,7 +233,6 @@ def get_offset(name): def infer_freq(index, warn=True): - # type: (Union[Series, Index, PeriodArray]) -> Freq """ Infer the most likely frequency given the input index. If the frequency is uncertain, a warning will be printed. From e81fa9c3b86884dffb42f2f4dbd4b98a29e711f0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 12 Oct 2018 14:04:35 -0500 Subject: [PATCH 072/132] wip --- pandas/tests/scalar/period/test_period.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 33d6c2db5d895..202d2b8032984 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -1041,7 +1041,7 @@ def test_add_raises(self): with tm.assert_raises_regex(TypeError, msg): dt1 + dt2 - boxes = [lambda x: x[0], pd.Series, pd.Index] + boxes = [lambda x: x, lambda x: pd.Series([x]), lambda x: pd.Index([x])] ids = ['identity', 'Series', 'Index'] @pytest.mark.parametrize('lbox', boxes, ids=ids) @@ -1057,13 +1057,13 @@ def test_add_timestamp_raises(self, rbox, lbox): r"can only operate on a|incompatible type|" r"ufunc add cannot use operands") with tm.assert_raises_regex(TypeError, msg): - lbox([ts]) + rbox(period_array([per])) + lbox(ts) + rbox(per) with tm.assert_raises_regex(TypeError, msg): - lbox(period_array([per])) + rbox([ts]) + lbox(per) + rbox(ts) with tm.assert_raises_regex(TypeError, msg): - lbox([per]) + rbox([period_array(per)]) + lbox(per) + rbox(per) def test_sub(self): dt1 = Period('2011-01-01', freq='D') From 96204a10ce5ebc857268831c54552fd24f2ce828 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 15 Oct 2018 12:07:20 -0500 Subject: [PATCH 073/132] remove view --- pandas/core/arrays/period.py | 6 ------ pandas/core/indexes/period.py | 19 +++++++++++++++++++ .../tests/indexes/period/test_construction.py | 15 --------------- 3 files changed, 19 insertions(+), 21 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index b57679ce5a6c9..24d4b6e558600 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -895,12 +895,6 @@ def _check_timedeltalike_freq_compat(self, other): .format(cls=type(self).__name__, freqstr=self.freqstr)) - def view(self, dtype=None, type=None): - # This is to support things like `.asi8` - # PeriodIndex's parent does .values.view('i8'). - # I don't like adding this, - return self._data.view(dtype=dtype) - def repeat(self, repeats, *args, **kwargs): """ Repeat elements of a Categorical. diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 3fc0b66b95ed2..69c7dc3792541 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -28,6 +28,7 @@ from pandas._libs.tslibs import resolution, period from pandas.core.algorithms import unique1d +from pandas.core.dtypes.generic import ABCIndexClass from pandas.core.arrays import datetimelike as dtl from pandas.core.arrays.period import PeriodArray, dt64arr_to_periodarr from pandas.core.base import _shared_docs @@ -238,6 +239,13 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs): Values can be any type that can be coerced to Periods. Ordinals in an ndarray are fastpath-ed to `_from_ordinals` """ + if isinstance(values, cls): + # TODO: don't do this + values = values.values + elif isinstance(values, (ABCIndexClass, np.ndarray)) and is_integer_dtype(values): + # TODO: don't do this. + values = PeriodArray._simple_new(values, freq) + assert isinstance(values, PeriodArray) result = object.__new__(cls) result._data = values @@ -869,6 +877,17 @@ def repeat(self, repeats, *args, **kwargs): # TODO(DatetimeArray): Just use Index.repeat return Index.repeat(self, repeats, *args, **kwargs) + def view(self, dtype=None, type=None): + # TODO(DatetimeArray): remove + if dtype is None or dtype is __builtins__['type'](self): + return self + return self._ndarray_values.view(dtype=dtype) + + @property + def asi8(self): + # TODO(DatetimeArray): remove + return self.view('i8') + PeriodIndex._add_comparison_ops() PeriodIndex._add_numeric_methods_disabled() diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index e9089e9ec305c..bf87e75e746f1 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -271,21 +271,6 @@ def test_constructor_simple_new(self): result = idx._simple_new(idx.astype('i8'), name='p', freq=idx.freq) tm.assert_index_equal(result, idx) - result = idx._simple_new([pd.Period('2007-01', freq='M'), - pd.Period('2007-02', freq='M')], - name='p', freq=idx.freq) - tm.assert_index_equal(result, idx) - - result = idx._simple_new(np.array([pd.Period('2007-01', freq='M'), - pd.Period('2007-02', freq='M')]), - name='p', freq=idx.freq) - tm.assert_index_equal(result, idx) - # _simple_new has type: (PeriodArray, name, Optional[Freq]) - # TODO: Add tests to PeriodArray._simple_new for - # - [ ] int - # - [ ] List[Period] - # - [ ] ndarray[Period] - def test_constructor_simple_new_empty(self): # GH13079 idx = PeriodIndex([], freq='M', name='p') From 8d245827b0f7dad6bb078470b766fc91613bd90b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 17 Oct 2018 09:53:15 -0500 Subject: [PATCH 074/132] simplify --- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/core/accessor.py | 11 +++++------ pandas/core/arrays/base.py | 2 +- pandas/core/arrays/categorical.py | 5 +---- pandas/core/arrays/datetimelike.py | 4 +++- pandas/core/arrays/period.py | 16 +++++++++++++--- pandas/core/dtypes/common.py | 2 ++ pandas/core/dtypes/concat.py | 2 -- pandas/core/frame.py | 10 +--------- pandas/core/indexes/accessors.py | 2 +- pandas/core/indexes/base.py | 6 +++--- pandas/core/indexes/datetimelike.py | 5 +++++ pandas/core/indexes/period.py | 3 ++- pandas/core/ops.py | 8 -------- pandas/core/reshape/reshape.py | 2 +- pandas/tests/arrays/test_datetimelike.py | 15 +++++++-------- pandas/tests/frame/test_combine_concat.py | 1 + pandas/tests/frame/test_reshape.py | 2 ++ pandas/tests/indexes/period/test_astype.py | 2 -- .../tests/indexes/period/test_construction.py | 17 ++++++++--------- pandas/tests/reshape/merge/test_merge.py | 9 +++------ pandas/tests/reshape/test_concat.py | 10 ++++------ pandas/tests/scalar/period/test_period.py | 2 +- pandas/tests/series/test_operators.py | 6 ++---- pandas/tests/series/test_period.py | 6 ------ pandas/tests/test_multilevel.py | 9 +++------ 26 files changed, 70 insertions(+), 89 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 2b79de8ff22fd..834670f8386e5 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -271,7 +271,7 @@ New Behavior: This mirrors ``CategoricalIndex.values``, which returns a ``Categorical``. For situations where you need an ``ndarray`` of ``Interval`` objects, use -:meth:`numpy.asarray` or ``idx.values.astype(object)``. +:meth:`numpy.asarray`. .. ipython:: python diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index f2f8ae58dec56..eab529584d1fb 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -68,6 +68,7 @@ def _add_delegate_accessors(cls, delegate, accessors, typ, overwrite : boolean, default False overwrite the method/property in the target class if it exists """ + def _create_delegator_property(name): def _getter(self): @@ -106,10 +107,9 @@ def f(self, *args, **kwargs): def delegate_names(delegate, accessors, typ, overwrite=False): """ - Add delegated names to a class using a class decorator. - - This provides an alternative usage to directly calling - `_add_delegate_accessors` below a class definition. + Add delegated names to a class using a class decorator. This provides + an alternative usage to directly calling `_add_delegate_accessors` + below a class definition. Parameters ---------- @@ -121,8 +121,7 @@ def delegate_names(delegate, accessors, typ, overwrite=False): Returns ------- - callable - A class decorator + decorator Examples -------- diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index c18af21078ecf..b745569d5bd76 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -122,7 +122,7 @@ def _from_factorized(cls, values, original): Parameters ---------- values : ndarray - An ndarray with the unique factorized values. + An integer ndarray with the factorized values. original : ExtensionArray The original ExtensionArray that factorize was called on. diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index d50e390e472d5..79070bbbfd11a 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1259,10 +1259,7 @@ def __array__(self, dtype=None): if dtype==None (default), the same dtype as categorical.categories.dtype """ - # Need asarray, in case self.categories.values is an ExtensionArray - # e.g. in a PeriodIndex. More generally, any Index backed by an EA. - values = np.asarray(self.categories.values) - ret = take_1d(values, self._codes) + ret = take_1d(self.categories.values, self._codes) if dtype and not is_dtype_equal(dtype, self.categories.dtype): return np.asarray(ret, dtype) if is_extension_array_dtype(ret): diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 8d7087fa75c86..3a23162ff2023 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -524,8 +524,10 @@ def _addsub_offset_array(self, other, op): left = lib.values_from_object(self.astype('O')) res_values = op(left, np.array(other)) + kwargs = {} if not is_period_dtype(self): - return type(self)(res_values, freq='infer') + kwargs['freq'] = 'infer' + return type(self)(res_values, **kwargs) return self._from_sequence(res_values) @deprecate_kwarg(old_arg_name='n', new_arg_name='periods') diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index d8f8d7c5e21a3..f14639ab343a3 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -32,7 +32,7 @@ from pandas.core.dtypes.dtypes import PeriodDtype from pandas.core.dtypes.generic import ( - ABCSeries, ABCIndexClass, + ABCSeries, ABCIndexClass, ABCPeriodIndex ) import pandas.core.common as com @@ -146,12 +146,22 @@ class PeriodArray(dtl.DatetimeLikeArrayMixin, ExtensionArray): # -------------------------------------------------------------------- # Constructors - def __init__(self, values, freq=None): + def __init__(self, values, freq=None, copy=False): # type: (Union[PeriodArray, np.ndarray], Union[str, Tick]) -> None + if isinstance(values, ABCSeries): + values = values.values + if not isinstance(values, type(self)): + raise TypeError("Incorect dtype") + + elif isinstance(values, ABCPeriodIndex): + values = values.values + if isinstance(values, type(self)): + if freq is not None: + raise TypeError("Cannot pass 'freq' and a 'PeriodArray'.") values, freq = values._data, values.freq - values = np.array(values, dtype='int64', copy=False) + values = np.array(values, dtype='int64', copy=copy) self._data = values if freq is None: raise ValueError('freq is not specified and cannot be inferred') diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 50a20bd281d90..af2dac300b7eb 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -499,6 +499,7 @@ def is_period_dtype(arr_or_dtype): >>> is_period_dtype(pd.PeriodIndex([], freq="A")) True """ + # TODO: Consider making Period an instance of PeriodDtype if arr_or_dtype is None: return False @@ -636,6 +637,7 @@ def is_period_arraylike(arr): >>> is_period_arraylike(pd.PeriodIndex(["2017-01-01"], freq="D")) True """ + if isinstance(arr, (ABCPeriodIndex, ABCPeriodArray)): return True elif isinstance(arr, (np.ndarray, ABCSeries)): diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index a545440f482b9..3530048a04b5f 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -471,8 +471,6 @@ def _concat_datetime(to_concat, axis=0, typs=None): elif any(typ.startswith('period') for typ in typs): assert len(typs) == 1 - # TODO: Need a generic way to say "concatenate these by - # concatenating the underlying EA and wrapping. cls = to_concat[0] new_values = cls._concat_same_type(to_concat) return new_values diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 898053d6804bb..8f3873b4299a5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5120,8 +5120,6 @@ def combine(self, other, func, fill_value=None, overwrite=True): # see if we need to be represented as i8 (datetimelike) # try to keep us at this dtype needs_i8_conversion_i = needs_i8_conversion(new_dtype) - # Is this third argument documented? The docs say func is binary - # don't mention needs_i8_conversion... if needs_i8_conversion_i: arr = func(series, otherSeries, True) else: @@ -5196,13 +5194,7 @@ def combiner(x, y, needs_i8_conversion=False): if y.name not in self.columns: return y_values - result = expressions.where(mask, y_values, x_values) - # if needs_i8_conversion: - # TODO: handle all these - from pandas.core.dtypes.common import is_period_dtype - if is_period_dtype(x): - result = x.values._simple_new(result, freq=x.values.freq) - return result + return expressions.where(mask, y_values, x_values) return self.combine(other, combiner, overwrite=False) diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 48d8d74c3ca23..35b9799579628 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -47,7 +47,7 @@ def _get_values(self): else: if is_period_arraylike(data): # TODO: use to_period_array - return PeriodArray._complex_new(data, copy=False) + return PeriodArray(data, copy=False) if is_datetime_arraylike(data): return DatetimeIndex(data, copy=False, name=self.name) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 2c6993bf612f9..7e5bdbb557eea 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -239,7 +239,7 @@ def _outer_indexer(self, left, right): return libjoin.outer_join_indexer(left, right) _typ = 'index' - _data = None # type: Union[np.ndarray, ExtensionArray] + _data = None _id = None name = None asi8 = None @@ -271,6 +271,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, return cls._simple_new(data, name) from .range import RangeIndex + # range if isinstance(data, RangeIndex): return RangeIndex(start=data, copy=copy, dtype=dtype, name=name) @@ -312,8 +313,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, else: return result - elif ((is_period_dtype(data) and not is_object_dtype(dtype)) or - (dtype is not None and is_period_dtype(dtype))): + elif is_period_dtype(data) and not is_object_dtype(dtype): from pandas import PeriodIndex result = PeriodIndex(data, copy=copy, name=name, **kwargs) return result diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 571e6281f05a9..d7d1f5d8af569 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -691,6 +691,11 @@ def _concat_same_dtype(self, to_concat, name): def _maybe_box_as_values(self, values, **attribs): # TODO(DatetimeArray): remove + # This is a temporary shim while PeriodArray is an ExtensoinArray, + # but others are not. When everyone is an ExtensionArray, this can + # be removed. Currently used in + # - sort_values + # - _concat_same_dtype return values def astype(self, dtype, copy=True): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 69c7dc3792541..08147ad075c00 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -246,7 +246,8 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs): # TODO: don't do this. values = PeriodArray._simple_new(values, freq) - assert isinstance(values, PeriodArray) + if not isinstance(values, PeriodArray): + raise TypeError("PeriodIndex._simple_new only accepts PeriodArray") result = object.__new__(cls) result._data = values result.name = name diff --git a/pandas/core/ops.py b/pandas/core/ops.py index cac722e9f7de9..9791354de7ffa 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1209,11 +1209,6 @@ def dispatch_to_extension_op(op, left, right): if is_extension_array_dtype(left): new_left = left.values - if (is_extension_array_dtype(right) - and isinstance(right, (ABCIndexClass, ABCSeries))): - # unbox - right = right._values - if isinstance(right, np.ndarray): # handle numpy scalars, this is a PITA @@ -1222,9 +1217,6 @@ def dispatch_to_extension_op(op, left, right): if is_scalar(new_right): new_right = [new_right] new_right = list(new_right) - elif (is_extension_array_dtype(right) and - type(new_left) == type(right)): - new_right = right elif is_extension_array_dtype(right) and type(left) != type(right): new_right = list(right) else: diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 09e26092f3ab3..03b77f0e787f0 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -22,7 +22,7 @@ from pandas.core.arrays import SparseArray from pandas._libs.sparse import IntIndex -from pandas.core.arrays import Categorical, period_array +from pandas.core.arrays import Categorical from pandas.core.arrays.categorical import _factorize_from_iterable from pandas.core.sorting import (get_group_index, get_compressed_ids, compress_group_index, decons_obs_group_ids) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index a80d81319c4d8..2ebfc9a3e113e 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -127,7 +127,7 @@ class TestPeriodArray(object): def test_from_pi(self, period_index): pi = period_index - arr = PeriodArray(pi.values) + arr = PeriodArray(pi) assert list(arr) == list(pi) # Check that Index.__new__ knows what to do with PeriodArray @@ -137,7 +137,7 @@ def test_from_pi(self, period_index): def test_astype_object(self, period_index): pi = period_index - arr = PeriodArray(pi.values) + arr = PeriodArray(pi) asobj = arr.astype('O') assert isinstance(asobj, np.ndarray) assert asobj.dtype == 'O' @@ -146,7 +146,7 @@ def test_astype_object(self, period_index): @pytest.mark.parametrize('how', ['S', 'E']) def test_to_timestamp(self, how, period_index): pi = period_index - arr = PeriodArray(pi.values) + arr = PeriodArray(pi) expected = DatetimeArrayMixin(pi.to_timestamp(how=how)) result = arr.to_timestamp(how=how) @@ -156,22 +156,21 @@ def test_to_timestamp(self, how, period_index): # an EA-specific tm.assert_ function tm.assert_index_equal(pd.Index(result), pd.Index(expected)) - @pytest.mark.parametrize('propname', pd.core.arrays.PeriodArray._bool_ops) + @pytest.mark.parametrize('propname', PeriodArray._bool_ops) def test_bool_properties(self, period_index, propname): # in this case _bool_ops is just `is_leap_year` pi = period_index - arr = PeriodArray(pi.values) + arr = PeriodArray(pi) result = getattr(arr, propname) expected = np.array(getattr(pi, propname)) tm.assert_numpy_array_equal(result, expected) - @pytest.mark.parametrize('propname', - pd.core.arrays.PeriodArray._field_ops) + @pytest.mark.parametrize('propname', PeriodArray._field_ops) def test_int_properties(self, period_index, propname): pi = period_index - arr = PeriodArray(pi.values) + arr = PeriodArray(pi) result = getattr(arr, propname) expected = np.array(getattr(pi, propname)) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index 5cf8df01caea3..a43c5c7257daa 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -741,6 +741,7 @@ def test_combine_first_timedelta(self): tm.assert_frame_equal(res, exp) assert res['TD'].dtype == 'timedelta64[ns]' + @pytest.mark.xfail(reason="GH-23079", strict=True) def test_combine_first_period(self): data1 = pd.PeriodIndex(['2011-01', 'NaT', '2011-03', '2011-04'], freq='M') diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 9f6735c7ba2bf..e8dff56d703e3 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -277,6 +277,8 @@ def test_unstack_fill_frame_timedelta(self): index=['x', 'y', 'z']) assert_frame_equal(result, expected) + @pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/23077", + strict=True) def test_unstack_fill_frame_period(self): # Test unstacking with period diff --git a/pandas/tests/indexes/period/test_astype.py b/pandas/tests/indexes/period/test_astype.py index c51d728a5ba9f..504e89bd77774 100644 --- a/pandas/tests/indexes/period/test_astype.py +++ b/pandas/tests/indexes/period/test_astype.py @@ -14,8 +14,6 @@ class TestPeriodIndexAsType(object): def test_astype_raises(self, dtype): # GH#13149, GH#13209 idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D') - # XXX: do we care about the name PeriodArray vs. PeriodIndex in the - # exception message? msg = 'Cannot cast PeriodArray to dtype' with tm.assert_raises_regex(TypeError, msg): idx.astype(dtype) diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index bf87e75e746f1..e4c73aa69a015 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -274,17 +274,16 @@ def test_constructor_simple_new(self): def test_constructor_simple_new_empty(self): # GH13079 idx = PeriodIndex([], freq='M', name='p') - result = idx._simple_new(idx.values, name='p', freq='M') + result = idx._simple_new(idx, name='p', freq='M') tm.assert_index_equal(result, idx) - # @pytest.mark.parametrize('floats', [[1.1, 2.1], np.array([1.1, 2.1])]) - # def test_constructor_floats(self, floats): - # # GH#13079 - # with pytest.raises(TypeError): - # pd.PeriodIndex._simple_new(floats, freq='M') - # - # with pytest.raises(TypeError): - # pd.PeriodIndex(floats, freq='M') + @pytest.mark.parametrize('floats', [[1.1, 2.1], np.array([1.1, 2.1])]) + def test_constructor_floats(self, floats): + with pytest.raises(TypeError): + pd.PeriodIndex._simple_new(floats, freq='M') + + with pytest.raises(TypeError): + pd.PeriodIndex(floats, freq='M') def test_constructor_nat(self): pytest.raises(ValueError, period_range, start='NaT', diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index a171b47ccbe2f..2b4a7952ae738 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -17,7 +17,6 @@ Series, UInt64Index) from pandas.api.types import CategoricalDtype as CDT from pandas.compat import lrange, lzip -from pandas.core.arrays import period_array from pandas.core.dtypes.common import is_categorical_dtype, is_object_dtype from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.reshape.concat import concat @@ -662,11 +661,9 @@ def test_merge_on_periods(self): exp_x = pd.period_range('20151010', periods=2, freq='D') exp_y = pd.period_range('20151011', periods=2, freq='D') - expected = DataFrame({ - 'key': [1, 2, 3], - 'value_x': period_array(list(exp_x) + [pd.NaT], freq="D"), - 'value_y': period_array([pd.NaT] + list(exp_y), freq="D"), - }) + expected = DataFrame({'key': [1, 2, 3], + 'value_x': list(exp_x) + [pd.NaT], + 'value_y': [pd.NaT] + list(exp_y)}) result = pd.merge(left, right, on='key', how='outer') assert_frame_equal(result, expected) assert result['value_x'].dtype == 'Period[D]' diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 8beb1a695538e..64b1b9615bd9c 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -92,7 +92,7 @@ def _check_expected_dtype(self, obj, label): assert obj.dtype == label elif isinstance(obj, pd.Series): if label.startswith('period'): - assert obj.dtype == 'object' + assert obj.dtype == 'Period[M]' else: assert obj.dtype == label else: @@ -415,7 +415,7 @@ def test_concatlike_common_period(self): res = pd.concat([ps1, ps2]) tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) - @pytest.mark.xfail(reason="multiple freq", strict=True) + @pytest.mark.xfail(reason="GH-22994", strict=True) def test_concatlike_common_period_diff_freq_to_object(self): # GH 13221 pi1 = pd.PeriodIndex(['2011-01', '2011-02'], freq='M') @@ -1999,9 +1999,8 @@ def test_concat_period_series(self): result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) - @pytest.mark.xfail(reason="multiple freq", strict=True) + @pytest.mark.xfail(reason="GH-22994", strict=True) def test_concat_period_multiple_freq_series(self): - # Blocked by https://github.com/pandas-dev/pandas/pull/22997 x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='M')) expected = Series([x[0], x[1], y[0], y[1]], dtype='object') @@ -2009,9 +2008,8 @@ def test_concat_period_multiple_freq_series(self): tm.assert_series_equal(result, expected) assert result.dtype == 'object' - @pytest.mark.xfail(reason="multiple freq", strict=True) + @pytest.mark.xfail(reason="GH-22994", strict=True) def test_concat_period_other_series(self): - # Blocked by https://github.com/pandas-dev/pandas/pull/22997 x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) y = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='M')) expected = Series([x[0], x[1], y[0], y[1]], dtype='object') diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 202d2b8032984..0d6289c6888ad 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -9,7 +9,6 @@ import pandas.util.testing as tm import pandas.core.indexes.period as period from pandas.compat import text_type, iteritems -from pandas.core.arrays import period_array from pandas.compat.numpy import np_datetime64_compat from pandas._libs import tslib @@ -1046,6 +1045,7 @@ def test_add_raises(self): @pytest.mark.parametrize('lbox', boxes, ids=ids) @pytest.mark.parametrize('rbox', boxes, ids=ids) + @pytest.mark.xfail(reason="Gh-23155", strict=False) def test_add_timestamp_raises(self, rbox, lbox): # GH # 17983 ts = pd.Timestamp('2017') diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 77213d2b516c8..55e3dfde3ceb7 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -15,7 +15,6 @@ from pandas.core.indexes.datetimes import Timestamp import pandas.core.nanops as nanops from pandas.core import ops -from pandas.core.arrays import period_array from pandas.compat import range from pandas import compat @@ -554,9 +553,8 @@ def test_unequal_categorical_comparison_raises_type_error(self): ([pd.Timedelta('1 days'), NaT, pd.Timedelta('3 days')], [NaT, NaT, pd.Timedelta('3 days')]), - (period_array([pd.Period('2011-01', freq='M'), NaT, - pd.Period('2011-03', freq='M')]), - period_array([NaT, NaT, pd.Period('2011-03', freq='M')]))]) + ([pd.Period('2011-01', freq='M'), NaT, pd.Period('2011-03', freq='M')], + [NaT, NaT, pd.Period('2011-03', freq='M')])]) @pytest.mark.parametrize('reverse', [True, False]) @pytest.mark.parametrize('box', [Series, Index]) @pytest.mark.parametrize('dtype', [None, object]) diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index d0bd1ee0413f0..827bdea84377b 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -53,12 +53,6 @@ def test_fillna(self): tm.assert_series_equal(res, exp) assert res.dtype == 'Period[M]' - # We don't support upcasting to object on fillna. - # res = s.fillna('XXX') - # exp = Series([pd.Period('2011-01', freq='M'), 'XXX']) - # tm.assert_series_equal(res, exp) - # assert res.dtype == 'object' - def test_dropna(self): # GH 13737 s = Series([pd.Period('2011-01', freq='M'), diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 36727443b83dc..0dbbe60283cac 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -16,7 +16,6 @@ from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype import pandas.core.common as com import pandas.util.testing as tm -from pandas.core.arrays import period_array from pandas.compat import (range, lrange, StringIO, lzip, u, product as cart_product, zip) import pandas as pd @@ -2320,11 +2319,9 @@ def test_reset_index_period(self): df = DataFrame(np.arange(9, dtype='int64').reshape(-1, 1), index=idx, columns=['a']) expected = DataFrame({ - 'month': period_array( - [pd.Period('2013-01', freq='M')] * 3 + - [pd.Period('2013-02', freq='M')] * 3 + - [pd.Period('2013-03', freq='M')] * 3 - ), + 'month': ([pd.Period('2013-01', freq='M')] * 3 + + [pd.Period('2013-02', freq='M')] * 3 + + [pd.Period('2013-03', freq='M')] * 3), 'feature': ['a', 'b', 'c'] * 3, 'a': np.arange(9, dtype='int64') }, columns=['month', 'feature', 'a']) From 1fc77446bb2d50d16ff72ba5bcfe7189e6aef48b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 17 Oct 2018 13:30:43 -0500 Subject: [PATCH 075/132] lint --- pandas/core/indexes/period.py | 10 +++++----- pandas/tests/extension/test_period.py | 1 - pandas/tests/frame/test_reshape.py | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 08147ad075c00..8255edfe13c7d 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -25,12 +25,11 @@ from pandas._libs.tslibs.period import (Period, IncompatibleFrequency, DIFFERENT_FREQ_INDEX) -from pandas._libs.tslibs import resolution, period +from pandas._libs.tslibs import resolution from pandas.core.algorithms import unique1d from pandas.core.dtypes.generic import ABCIndexClass -from pandas.core.arrays import datetimelike as dtl -from pandas.core.arrays.period import PeriodArray, dt64arr_to_periodarr +from pandas.core.arrays.period import PeriodArray from pandas.core.base import _shared_docs from pandas.core.indexes.base import _index_shared_docs, ensure_index @@ -242,7 +241,8 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs): if isinstance(values, cls): # TODO: don't do this values = values.values - elif isinstance(values, (ABCIndexClass, np.ndarray)) and is_integer_dtype(values): + elif (isinstance(values, (ABCIndexClass, np.ndarray)) and + is_integer_dtype(values)): # TODO: don't do this. values = PeriodArray._simple_new(values, freq) @@ -880,7 +880,7 @@ def repeat(self, repeats, *args, **kwargs): def view(self, dtype=None, type=None): # TODO(DatetimeArray): remove - if dtype is None or dtype is __builtins__['type'](self): + if dtype is None or dtype is __builtins__['type'](self): return self return self._ndarray_values.view(dtype=dtype) diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index efb0343df5fc9..bfeeb421ed65d 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -102,7 +102,6 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators): super().test_arith_series_with_scalar(data, all_arithmetic_operators) - def _check_divmod_op(self, s, op, other, exc=NotImplementedError): super(TestArithmeticOps, self)._check_divmod_op( s, op, other, exc=TypeError diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index e8dff56d703e3..ed3cc39052183 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -277,7 +277,7 @@ def test_unstack_fill_frame_timedelta(self): index=['x', 'y', 'z']) assert_frame_equal(result, expected) - @pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/23077", + @pytest.mark.xfail(reason="GH-23077", strict=True) def test_unstack_fill_frame_period(self): From 6cd428c1a5043fd2c094fae806e0313f5c31f689 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 17 Oct 2018 13:34:11 -0500 Subject: [PATCH 076/132] Removed add_comparison_methods --- pandas/core/arrays/datetimelike.py | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 3a23162ff2023..01fb427ae3bd8 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -789,20 +789,7 @@ def _evaluate_compare(self, other, op): result[mask] = filler return result - @classmethod - def _add_comparison_methods(cls): - """ add in comparison methods """ - # DatetimeArray and TimedeltaArray comparison methods will - # call these as their super(...) methods - cls.__eq__ = _make_comparison_op(operator.eq, cls) - cls.__ne__ = _make_comparison_op(operator.ne, cls) - cls.__lt__ = _make_comparison_op(operator.lt, cls) - cls.__gt__ = _make_comparison_op(operator.gt, cls) - cls.__le__ = _make_comparison_op(operator.le, cls) - cls.__ge__ = _make_comparison_op(operator.ge, cls) - - -DatetimeLikeArrayMixin._add_comparison_methods() + DatetimeLikeArrayMixin._add_comparison_ops() From 21693e07bfc2bce37be012ad7ef976871f51c267 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 17 Oct 2018 13:35:22 -0500 Subject: [PATCH 077/132] xfail op --- pandas/tests/extension/test_period.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index bfeeb421ed65d..516967e47e911 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -107,6 +107,7 @@ def _check_divmod_op(self, s, op, other, exc=NotImplementedError): s, op, other, exc=TypeError ) + @pytest.mark.xfail(reason="GH-23155", strict=True) def test_add_series_with_extension_array(self, data): # we don't implement + for Period s = pd.Series(data) From b65ffad5e2a39877178e6adc03c7d6dcf9cc811e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 17 Oct 2018 13:43:04 -0500 Subject: [PATCH 078/132] remove some --- pandas/core/arrays/period.py | 29 +++-------------------------- pandas/core/indexes/period.py | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 26 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index f14639ab343a3..7ad245cac8450 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -89,10 +89,6 @@ def wrapper(self, other): elif other is NaT: result = np.empty(len(self._ndarray_values), dtype=bool) result.fill(nat_result) - elif isinstance(other, (list, np.ndarray)): - # XXX: is this correct? Why not convert the - # sequence to a PeriodArray? - return NotImplemented else: other = Period(other, freq=self.freq) result = op(other.ordinal) @@ -355,10 +351,6 @@ def _concat_same_type(cls, to_concat): values = np.concatenate([x._data for x in to_concat]) return cls._from_ordinals(values, freq=freq) - @property - def asi8(self): - return self._ndarray_values.view('i8') - # -------------------------------------------------------------------- # Data / Attributes @property @@ -380,22 +372,6 @@ def freq(self): """Return the frequency object for this PeriodArray.""" return self.dtype.freq - @property - def flags(self): - """Deprecated""" - # Just here to support Index.flags deprecation. - # could also override PeriodIndex.flags if we don't want a - # version with PeriodArray.flags - return self.values.flags - - @property - def base(self): - return self.values.base - - @property - def data(self): - return self.astype(object).data - # -------------------------------------------------------------------- # Vectorized analogues of Period properties @@ -650,7 +626,7 @@ def asfreq(self, freq=None, how='E'): base1, mult1 = frequencies.get_freq_code(self.freq) base2, mult2 = frequencies.get_freq_code(freq) - asi8 = self.asi8 + asi8 = self._ndarray_values.view('i8') # mult1 can't be negative or 0 end = how == 'E' if end: @@ -722,7 +698,7 @@ def _sub_period(self, other): msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) - asi8 = self.asi8 + asi8 = self._ndarray_values.view('i8') new_data = asi8 - other.ordinal new_data = np.array([self.freq * x for x in new_data]) @@ -955,6 +931,7 @@ def astype(self, dtype, copy=True): def item(self): if len(self) == 1: + # IndexOpsMixin will catch and re-raise IndexErrors return Period._from_ordinal(self.values[0], self.freq) else: raise ValueError('can only convert an array of size 1 to a ' diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 8255edfe13c7d..32ac750961ac1 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -884,11 +884,30 @@ def view(self, dtype=None, type=None): return self return self._ndarray_values.view(dtype=dtype) + @property + def flags(self): + """ return the ndarray.flags for the underlying data """ + warnings.warn("{obj}.flags is deprecated and will be removed " + "in a future version".format(obj=type(self).__name__), + FutureWarning, stacklevel=2) + return self._ndarray_values.flags + @property def asi8(self): # TODO(DatetimeArray): remove return self.view('i8') + def item(self): + """ return the first element of the underlying data as a python + scalar + """ + try: + return self.values._item() + except IndexError: + # copy numpy's message here because Py26 raises an IndexError + raise ValueError('can only convert an array of size 1 to a ' + 'Python scalar') + PeriodIndex._add_comparison_ops() PeriodIndex._add_numeric_methods_disabled() From 1f438e300a11374442e5c80b9db561c1c053bf64 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 17 Oct 2018 14:48:08 -0500 Subject: [PATCH 079/132] constructors --- pandas/core/arrays/datetimelike.py | 2 +- pandas/core/arrays/period.py | 79 ++++++++++++------------------ pandas/core/indexes/period.py | 8 +-- pandas/tests/series/test_period.py | 2 +- 4 files changed, 37 insertions(+), 54 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 01fb427ae3bd8..ee7425e8cfd8f 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -482,7 +482,7 @@ def _addsub_int_array(self, other, op): arr_mask=self._isnan) res_values = res_values.view('i8') res_values[self._isnan] = iNaT - return self._from_ordinals(res_values, freq=self.freq) + return type(self)(res_values, freq=self.freq) elif self.freq is None: # GH#19123 diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 7ad245cac8450..9d3d9bb16d21f 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -66,6 +66,7 @@ def _period_array_cmp(cls, op): def wrapper(self, other): op = getattr(self._ndarray_values, opname) if isinstance(other, (ABCSeries, ABCIndexClass)): + # TODO: return NotImplemented? other = other.values if isinstance(other, Period): @@ -105,8 +106,7 @@ class PeriodArray(dtl.DatetimeLikeArrayMixin, ExtensionArray): """ Pandas ExtensionArray for storing Period data. - Users should use the :func:`period_array` function to create - new instances of PeriodArray. + Users should use :func:`period_array` to create new instances. Notes ----- @@ -173,7 +173,6 @@ def _complex_new(cls, data=None, ordinal=None, freq=None, start=None, # copy-pase from PeriodIndex.__new__ with slight adjustments. # # - removed all uses of name - # - refactored to smaller, more dedicated constructors. # TODO: move fields validation to range init valid_field_set = {'year', 'month', 'day', 'quarter', @@ -213,7 +212,7 @@ def _complex_new(cls, data=None, ordinal=None, freq=None, start=None, else: data, freq = cls._generate_range(start, end, periods, freq, fields) - return cls._from_ordinals(data, freq=freq) + return cls(data, freq=freq) if isinstance(data, (cls, PeriodIndex)): if freq is None or freq == data.freq: # no freq change @@ -226,7 +225,7 @@ def _complex_new(cls, data=None, ordinal=None, freq=None, start=None, base1, base2, 1) if copy: data = data.copy() - return cls._simple_new(data, freq=freq) + return cls(data, freq=freq) # not array / index if not isinstance(data, (np.ndarray, PeriodIndex, @@ -246,7 +245,7 @@ def _complex_new(cls, data=None, ordinal=None, freq=None, start=None, # datetime other than period if is_datetime64_dtype(data.dtype): data = dt64arr_to_periodarr(data, freq, tz) - return cls._from_ordinals(data, freq=freq) + return cls(data, freq=freq) # check not floats if lib.infer_dtype(data) == 'floating' and len(data) > 0: @@ -255,32 +254,29 @@ def _complex_new(cls, data=None, ordinal=None, freq=None, start=None, # anything else, likely an array of strings or periods data = ensure_object(data) - return cls._from_periods(data, freq=freq) + if dtype is None and freq: + dtype = PeriodDtype(freq) + return cls._from_sequence(data, dtype=dtype) @classmethod def _simple_new(cls, values, freq=None, **kwargs): - """ - Values can be any type that can be coerced to Periods. - Ordinals in an ndarray are fastpath-ed to `_from_ordinals` - """ - if not is_integer_dtype(values): - values = np.array(values, copy=False) - if len(values) > 0 and is_float_dtype(values): - raise TypeError("{cls} can't take floats" - .format(cls=cls.__name__)) - return cls(values, freq=freq, **kwargs) - - return cls(values, freq=freq) + # alias from PeriodArray.__init__ + return cls(values, freq=freq, **kwargs) @classmethod def _from_sequence(cls, scalars, dtype=None, copy=False): - # type: (Sequence[Optional[Period]], Dtype, bool) -> PeriodArray + # type: (Sequence[Optional[Period]], PeriodDtype, bool) -> PeriodArray if dtype: freq = dtype.freq else: freq = None - scalars = np.asarray(scalars, dtype=object) - return cls._from_periods(scalars, freq=freq) + periods = np.asarray(scalars, dtype=object) + if copy: + periods = periods.copy() + + freq = freq or libperiod.extract_freq(periods) + ordinals = libperiod.extract_ordinals(periods, freq) + return cls(ordinals, freq=freq) def _values_for_factorize(self): return self.values, iNaT @@ -288,24 +284,7 @@ def _values_for_factorize(self): @classmethod def _from_factorized(cls, values, original): # type: (Sequence[Optional[Period]], PeriodArray) -> PeriodArray - return cls._simple_new(values, freq=original.freq) - - @classmethod - def _from_ordinals(cls, values, freq=None): - # type: (ndarray[int], Optional[Tick]) -> PeriodArray - """ - Values should be int ordinals - `__new__` & `_simple_new` coerce to ordinals and call this method - """ - return cls(values, freq=freq) - - @classmethod - def _from_periods(cls, periods, freq=None): - # type: (np.ndarray[Optional[Period]], Optional[Tick]) -> PeriodArray - periods = np.asarray(periods, dtype=object) - freq = freq or libperiod.extract_freq(periods) - ordinals = libperiod.extract_ordinals(periods, freq) - return cls._from_ordinals(ordinals, freq=freq) + return cls(values, freq=original.freq) @classmethod def _from_datetime64(cls, data, freq, tz=None): @@ -322,7 +301,7 @@ def _from_datetime64(cls, data, freq, tz=None): PeriodArray[freq] """ data = dt64arr_to_periodarr(data, freq, tz) - return cls._simple_new(data, freq=freq) + return cls(data, freq=freq) @classmethod def _generate_range(cls, start, end, periods, freq, fields): @@ -349,7 +328,7 @@ def _concat_same_type(cls, to_concat): assert len(freq) == 1 freq = list(freq)[0] values = np.concatenate([x._data for x in to_concat]) - return cls._from_ordinals(values, freq=freq) + return cls(values, freq=freq) # -------------------------------------------------------------------- # Data / Attributes @@ -470,7 +449,7 @@ def take(self, indices, allow_fill=False, fill_value=None): allow_fill=allow_fill, fill_value=fill_value) - return self._from_ordinals(new_values, self.freq) + return type(self)(new_values, self.freq) def isna(self): return self._data == iNaT @@ -504,7 +483,7 @@ def fillna(self, value=None, method=None, limit=None): func = pad_1d if method == 'pad' else backfill_1d new_values = func(self._ndarray_values, limit=limit, mask=mask) - new_values = self._from_ordinals(new_values, freq=self.freq) + new_values = type(self)(new_values, freq=self.freq) else: # fill with value new_values = self.copy() @@ -514,7 +493,7 @@ def fillna(self, value=None, method=None, limit=None): return new_values def copy(self, deep=False): - return self._from_ordinals(self._data.copy(), freq=self.freq) + return type(self)(self._data.copy(), freq=self.freq) def value_counts(self, dropna=False): from pandas.core.algorithms import value_counts @@ -575,7 +554,7 @@ def _time_shift(self, n, freq=None): values = self.values + n * self.freq.n if self.hasnans: values[self._isnan] = iNaT - return self._simple_new(values, freq=self.freq) + return type(self)(values, freq=self.freq) @property def _box_func(self): @@ -894,7 +873,7 @@ def repeat(self, repeats, *args, **kwargs): # and some kind of _constructor (from_ordinals, from_codes). nv.validate_repeat(args, kwargs) values = self._ndarray_values.repeat(repeats) - return self._from_ordinals(values, self.freq) + return type(self)(values, self.freq) # Delegation... def strftime(self, date_format): @@ -984,7 +963,11 @@ def period_array(data, freq=None): ['2017', '2018', 'NaT'] Length: 3, dtype: period[A-DEC] """ - return PeriodArray._from_periods(data, freq=freq) + if freq: + dtype = PeriodDtype(freq) + else: + dtype = None + return PeriodArray._from_sequence(data, dtype=dtype) def dt64arr_to_periodarr(data, freq, tz=None): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 32ac750961ac1..2a71dc355f493 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -260,7 +260,7 @@ def _from_ordinals(cls, values, name=None, freq=None, **kwargs): Values should be int ordinals `__new__` & `_simple_new` cooerce to ordinals and call this method """ - data = PeriodArray._from_ordinals(values, freq=freq) + data = PeriodArray(values, freq=freq) result = cls._simple_new(data, name=name) return result @@ -271,7 +271,7 @@ def _shallow_copy(self, values=None, **kwargs): if not isinstance(values, PeriodArray): if (isinstance(values, np.ndarray) and is_integer_dtype(values.dtype)): - values = PeriodArray._from_ordinals(values, freq=self.freq) + values = PeriodArray(values, freq=self.freq) else: # in particular, I would like to avoid complex_new here. # Some people seem to be calling use with unexpected types @@ -319,7 +319,7 @@ def _maybe_box_as_values(self, values, **attribs): """ # TODO(DatetimeArray): remove freq = attribs['freq'] - return PeriodArray._from_ordinals(values, freq=freq) + return PeriodArray(values, freq=freq) # ------------------------------------------------------------------------ # Dispatch and maybe box. Not done in delegate_names because we box @@ -820,7 +820,7 @@ def __setstate__(self, state): np.ndarray.__setstate__(self, state) freq = None # ? - data = PeriodArray._from_ordinals(data, freq=freq) + data = PeriodArray(data, freq=freq) self._data = data else: diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index 827bdea84377b..7a095b6dc6663 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -181,7 +181,7 @@ def test_end_time_timevalues(self, input_vals): # GH 17157 # Check that the time part of the Period is adjusted by end_time # when using the dt accessor on a Series - input_vals = PeriodArray._from_periods(np.asarray(input_vals)) + input_vals = PeriodArray._from_sequence(np.asarray(input_vals)) s = Series(input_vals) result = s.dt.end_time From f3928fbccc88f0462d5fb0b4f8c025dbe96929ba Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 17 Oct 2018 17:20:47 -0500 Subject: [PATCH 080/132] Constructor cleanup --- pandas/core/arrays/period.py | 142 ++++++++++------------------------ pandas/core/indexes/period.py | 60 ++++++++++++-- 2 files changed, 93 insertions(+), 109 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 9d3d9bb16d21f..d23ab572c6cb8 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -153,8 +153,8 @@ def __init__(self, values, freq=None, copy=False): values = values.values if isinstance(values, type(self)): - if freq is not None: - raise TypeError("Cannot pass 'freq' and a 'PeriodArray'.") + if freq is not None and freq != values.freq: + raise TypeError("freq does not match") values, freq = values._data, values.freq values = np.array(values, dtype='int64', copy=copy) @@ -164,100 +164,6 @@ def __init__(self, values, freq=None, copy=False): freq = Period._maybe_convert_freq(freq) self._dtype = PeriodDtype(freq) - @classmethod - def _complex_new(cls, data=None, ordinal=None, freq=None, start=None, - end=None, periods=None, tz=None, dtype=None, copy=False, - **fields): - from pandas import PeriodIndex, DatetimeIndex, Int64Index - - # copy-pase from PeriodIndex.__new__ with slight adjustments. - # - # - removed all uses of name - - # TODO: move fields validation to range init - valid_field_set = {'year', 'month', 'day', 'quarter', - 'hour', 'minute', 'second'} - - if not set(fields).issubset(valid_field_set): - raise TypeError('__new__() got an unexpected keyword argument {}'. - format(list(set(fields) - valid_field_set)[0])) - - if periods is not None: - if is_float(periods): - periods = int(periods) - elif not is_integer(periods): - msg = 'periods must be a number, got {periods}' - raise TypeError(msg.format(periods=periods)) - - periods = dtl.validate_periods(periods) - - if dtype is not None: - dtype = pandas_dtype(dtype) - if not is_period_dtype(dtype): - raise ValueError('dtype must be PeriodDtype') - if freq is None: - freq = dtype.freq - elif freq != dtype.freq: - msg = 'specified freq and dtype are different' - raise IncompatibleFrequency(msg) - - # coerce freq to freq object, otherwise it can be coerced elementwise - # which is slow - if freq: - freq = Period._maybe_convert_freq(freq) - - if data is None: - if ordinal is not None: - data = np.asarray(ordinal, dtype=np.int64) - else: - data, freq = cls._generate_range(start, end, periods, - freq, fields) - return cls(data, freq=freq) - - if isinstance(data, (cls, PeriodIndex)): - if freq is None or freq == data.freq: # no freq change - freq = data.freq - data = data._ndarray_values - else: - base1, _ = _gfc(data.freq) - base2, _ = _gfc(freq) - data = libperiod.period_asfreq_arr(data._ndarray_values, - base1, base2, 1) - if copy: - data = data.copy() - return cls(data, freq=freq) - - # not array / index - if not isinstance(data, (np.ndarray, PeriodIndex, - DatetimeIndex, Int64Index)): - if is_scalar(data): - raise TypeError('{0}(...) must be called with a ' - 'collection of some ' - 'kind, {1} was passed'.format(cls.__name__, - repr(data))) - - # other iterable of some kind - if not isinstance(data, (list, tuple)): - data = list(data) - - data = np.asarray(data) - - # datetime other than period - if is_datetime64_dtype(data.dtype): - data = dt64arr_to_periodarr(data, freq, tz) - return cls(data, freq=freq) - - # check not floats - if lib.infer_dtype(data) == 'floating' and len(data) > 0: - raise TypeError("PeriodIndex does not allow " - "floating point in construction") - - # anything else, likely an array of strings or periods - data = ensure_object(data) - if dtype is None and freq: - dtype = PeriodDtype(freq) - return cls._from_sequence(data, dtype=dtype) - @classmethod def _simple_new(cls, values, freq=None, **kwargs): # alias from PeriodArray.__init__ @@ -408,7 +314,8 @@ def __setitem__(self, key, value): if len(key) == 0: return - value = type(self)._complex_new(value) + value = period_array(value) + if self.freqstr != value.freqstr: msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, value.freqstr) raise IncompatibleFrequency(msg) @@ -618,7 +525,7 @@ def asfreq(self, freq=None, how='E'): if self.hasnans: new_data[self._isnan] = iNaT - return self._shallow_copy(new_data, freq=freq) + return type(self)(new_data, freq=freq) def to_timestamp(self, freq=None, how='start'): """ @@ -703,7 +610,9 @@ def _add_delta_td(self, other): # Note: when calling parent class's _add_delta_td, it will call # delta_to_nanoseconds(delta). Because delta here is an integer, # delta_to_nanoseconds will return it unchanged. - return type(self)._add_delta_td(self, delta) + ordinals = super(PeriodArray, self)._add_delta_td(delta) + return type(self)(ordinals, self.freq) + def _add_delta_tdi(self, other): assert isinstance(self.freq, Tick) # checked by calling function @@ -736,11 +645,11 @@ def _add_delta(self, other): # i8 view or _shallow_copy if isinstance(other, (Tick, timedelta, np.timedelta64)): new_values = self._add_delta_td(other) - return self._shallow_copy(new_values) + return type(self)(new_values) elif is_timedelta64_dtype(other): # ndarray[timedelta64] or TimedeltaArray/index new_values = self._add_delta_tdi(other) - return self._shallow_copy(new_values) + return type(self)(new_values) else: # pragma: no cover raise TypeError(type(other).__name__) @@ -924,7 +833,7 @@ def item(self): # ------------------------------------------------------------------- # Constructor Helpers -def period_array(data, freq=None): +def period_array(data, freq=None, ordinal=None, copy=False): # type: (Sequence[Optional[Period]], Optional[Tick]) -> PeriodArray """ Construct a new PeriodArray from a sequence of Period scalars. @@ -938,6 +847,8 @@ def period_array(data, freq=None): freq : str, Tick, or Offset The frequency of every element of the array. This can be specified to avoid inferring the `freq` from `data`. + copy : bool, default False + Whether to ensure a copy of the data is made. Returns ------- @@ -963,10 +874,37 @@ def period_array(data, freq=None): ['2017', '2018', 'NaT'] Length: 3, dtype: period[A-DEC] """ + + if data is None and ordinal is None: + raise ValueError("range!") + elif data is None: + data = np.asarray(ordinal, dtype=np.int64) + if copy: + data = data.copy() + return PeriodArray(data, freq=freq) + else: + if isinstance(data, (ABCPeriodIndex, ABCSeries, PeriodArray)): + return PeriodArray(data, freq) + elif is_datetime64_dtype(data): + return PeriodArray._from_datetime64(data, freq) + + # other iterable of some kind + if not isinstance(data, (np.ndarray, list, tuple)): + data = list(data) + if freq: dtype = PeriodDtype(freq) else: dtype = None + + if lib.infer_dtype(data) == 'floating' and len(data) > 0: + # Can we avoid infer_dtype? Why pay that tax every time? + raise TypeError("PeriodIndex does not allow " + "floating point in construction") + + data = ensure_object(data) + + return PeriodArray._from_sequence(data, dtype=dtype) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 2a71dc355f493..8927b7cfe6c82 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -28,8 +28,9 @@ from pandas._libs.tslibs import resolution from pandas.core.algorithms import unique1d +from pandas.core.dtypes.dtypes import PeriodDtype from pandas.core.dtypes.generic import ABCIndexClass -from pandas.core.arrays.period import PeriodArray +from pandas.core.arrays.period import PeriodArray, period_array from pandas.core.base import _shared_docs from pandas.core.indexes.base import _index_shared_docs, ensure_index @@ -190,13 +191,54 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, periods=None, tz=None, dtype=None, copy=False, name=None, **fields): + valid_field_set = {'year', 'month', 'day', 'quarter', + 'hour', 'minute', 'second'} + + if not set(fields).issubset(valid_field_set): + raise TypeError('__new__() got an unexpected keyword argument {}'. + format(list(set(fields) - valid_field_set)[0])) + if name is None and hasattr(data, 'name'): name = data.name - data = PeriodArray._complex_new(data=data, ordinal=ordinal, freq=freq, - start=start, end=end, periods=periods, - tz=tz, dtype=dtype, copy=copy, - **fields) + if data is None and ordinal is None: + # range-based. + if periods is not None: + if is_float(periods): + periods = int(periods) + + elif not is_integer(periods): + msg = 'periods must be a number, got {periods}' + raise TypeError(msg.format(periods=periods)) + + data, freq = PeriodArray._generate_range(start, end, periods, + freq, fields) + data = PeriodArray(data, freq=freq) + else: + if freq is None and dtype is not None: + freq = PeriodDtype(dtype).freq + elif freq and dtype: + freq = PeriodDtype(freq).freq + dtype = PeriodDtype(dtype).freq + + if freq != dtype: + msg = "specified freq and dtype are different" + raise IncompatibleFrequency(msg) + + # PeriodIndex allow PeriodIndex(period_index, freq=different) + # Let's not encourage that kind of behavior in PeriodArray. + + if freq and isinstance(data, cls) and data.freq != freq: + # TODO: We can do some of these with no-copy / coercion? + # e.g. D -> 2D seems to be OK + data = data.asfreq(freq) + + data = period_array(data=data, ordinal=ordinal, freq=freq, + copy=copy) + + if copy: + data = data.copy() + return cls._simple_new(data, name=name) # ------------------------------------------------------------------------ @@ -268,18 +310,22 @@ def _shallow_copy(self, values=None, **kwargs): # TODO: simplify, figure out type of values if values is None: values = self._values + + if isinstance(values, type(self)): + values = values.values + if not isinstance(values, PeriodArray): if (isinstance(values, np.ndarray) and is_integer_dtype(values.dtype)): values = PeriodArray(values, freq=self.freq) else: - # in particular, I would like to avoid complex_new here. + # in particular, I would like to avoid period_array here. # Some people seem to be calling use with unexpected types # Index.difference -> ndarray[Period] # DatetimelikeIndexOpsMixin.repeat -> ndarray[ordinal] # I think that once all of Datetime* are EAs, we can simplify # this quite a bit. - values = PeriodArray._complex_new(values, freq=self.freq) + values = period_array(values, freq=self.freq) # I don't like overloading shallow_copy with freq changes. # See if it's used anywhere outside of test_resample_empty_dataframe From 089f8ab4da429ea25068ad85cf2bb5bb9dbdf336 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 17 Oct 2018 17:21:00 -0500 Subject: [PATCH 081/132] misc fixups --- pandas/core/arrays/period.py | 11 ++++++++++- pandas/core/indexes/period.py | 20 ++++++++++++++++++++ pandas/tests/arithmetic/test_period.py | 8 +++++--- pandas/tests/test_base.py | 4 ++-- 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index d23ab572c6cb8..b54eac45d6029 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -817,7 +817,7 @@ def astype(self, dtype, copy=True): else: return np.asarray(self, dtype=dtype) - def item(self): + def _item(self): if len(self) == 1: # IndexOpsMixin will catch and re-raise IndexErrors return Period._from_ordinal(self.values[0], self.freq) @@ -825,6 +825,15 @@ def item(self): raise ValueError('can only convert an array of size 1 to a ' 'Python scalar') + @property + def flags(self): + # TODO: remove + # We need this since reduction.SeriesBinGrouper uses values.flags + # Ideally, we wouldn't be passing objects down there in the first + # place. + return self._ndarray_values.flags + + PeriodArray._add_comparison_ops() PeriodArray._add_datetimelike_methods() diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 8927b7cfe6c82..1a4d437650ba3 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -947,6 +947,8 @@ def item(self): """ return the first element of the underlying data as a python scalar """ + # TODO(DatetimeArray): remove + # override to use _item try: return self.values._item() except IndexError: @@ -954,6 +956,24 @@ def item(self): raise ValueError('can only convert an array of size 1 to a ' 'Python scalar') + @property + def data(self): + """ return the data pointer of the underlying data """ + warnings.warn("{obj}.data is deprecated and will be removed " + "in a future version".format(obj=type(self).__name__), + FutureWarning, stacklevel=2) + return np.asarray(self.values).data + + @property + def base(self): + """ return the base object if the memory of the underlying data is + shared + """ + warnings.warn("{obj}.base is deprecated and will be removed " + "in a future version".format(obj=type(self).__name__), + FutureWarning, stacklevel=2) + return np.asarray(self.values) + PeriodIndex._add_comparison_ops() PeriodIndex._add_numeric_methods_disabled() diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 7aa0ada51d352..00408005d107b 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -388,6 +388,7 @@ def test_pi_sub_pi_with_nat(self): expected = pd.Index([pd.NaT, 0 * off, 0 * off, 0 * off, 0 * off]) tm.assert_index_equal(result, expected) + @pytest.mark.xfail(reason="GH-23155", strict=False) def test_parr_sub_pi_mismatched_freq(self, box_df_broadcast_failure): box = box_df_broadcast_failure @@ -797,7 +798,7 @@ def test_ops_series_timedelta(self): # GH 13043 ser = pd.Series([pd.Period('2015-01-01', freq='D'), pd.Period('2015-01-02', freq='D')], name='xxx') - assert ser.dtype == object + assert ser.dtype == 'Period[D]' expected = pd.Series([pd.Period('2015-01-02', freq='D'), pd.Period('2015-01-03', freq='D')], name='xxx') @@ -814,11 +815,12 @@ def test_ops_series_timedelta(self): result = pd.tseries.offsets.Day() + ser tm.assert_series_equal(result, expected) + @pytest.mark.xfail(reason="GH-23155", strict=True) def test_ops_series_period(self): # GH 13043 ser = pd.Series([pd.Period('2015-01-01', freq='D'), pd.Period('2015-01-02', freq='D')], name='xxx') - assert ser.dtype == object + assert ser.dtype == "Period[D]" per = pd.Period('2015-01-10', freq='D') off = per.freq @@ -829,7 +831,7 @@ def test_ops_series_period(self): s2 = pd.Series([pd.Period('2015-01-05', freq='D'), pd.Period('2015-01-04', freq='D')], name='xxx') - assert s2.dtype == object + assert s2.dtype == "Period[D]" expected = pd.Series([4 * off, 2 * off], name='xxx', dtype=object) tm.assert_series_equal(s2 - ser, expected) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index c9dbdd358ad87..60df3534a696c 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -1179,11 +1179,11 @@ def test_iter_box(self): assert isinstance(res, Timedelta) assert res == exp - # period (object dtype, not boxed) + # period vals = [pd.Period('2011-01-01', freq='M'), pd.Period('2011-01-02', freq='M')] s = Series(vals) - assert s.dtype == 'object' + assert s.dtype == 'Period[M]' for res, exp in zip(s, vals): assert isinstance(res, pd.Period) assert res.freq == 'M' From 700650a42fe101518c41333becb88ad72b1c3b9c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 17 Oct 2018 17:29:44 -0500 Subject: [PATCH 082/132] more xfails --- pandas/tests/dtypes/test_concat.py | 6 ++---- pandas/tests/extension/test_integer.py | 6 ++++++ pandas/tests/frame/test_replace.py | 1 + pandas/tests/series/test_operators.py | 8 ++++++-- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/pandas/tests/dtypes/test_concat.py b/pandas/tests/dtypes/test_concat.py index b6c5c119ffb6f..35623415571c0 100644 --- a/pandas/tests/dtypes/test_concat.py +++ b/pandas/tests/dtypes/test_concat.py @@ -38,16 +38,14 @@ def test_get_dtype_kinds(klass, to_concat, expected): @pytest.mark.parametrize('to_concat, expected', [ - # because we don't have Period dtype (yet), - # Series results in object dtype ([PeriodIndex(['2011-01'], freq='M'), PeriodIndex(['2011-01'], freq='M')], ['period[M]']), ([Series([Period('2011-01', freq='M')]), - Series([Period('2011-02', freq='M')])], ['object']), + Series([Period('2011-02', freq='M')])], ['period[M]']), ([PeriodIndex(['2011-01'], freq='M'), PeriodIndex(['2011-01'], freq='D')], ['period[M]', 'period[D]']), ([Series([Period('2011-01', freq='M')]), - Series([Period('2011-02', freq='D')])], ['object'])]) + Series([Period('2011-02', freq='D')])], ['period[M]', 'period[D]'])]) def test_get_dtype_kinds_period(to_concat, expected): result = _concat.get_dtype_kinds(to_concat) assert result == set(expected) diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py index 26b09d545378b..1c1dc4c629024 100644 --- a/pandas/tests/extension/test_integer.py +++ b/pandas/tests/extension/test_integer.py @@ -143,6 +143,12 @@ def test_error(self, data, all_arithmetic_operators): # other specific errors tested in the integer array specific tests pass + @pytest.mark.xfail(reason="GH-22922", strict=True) + def test_add_series_with_extension_array(self, data): + super(TestArithmeticOps, self).test_add_series_with_extension_array( + data + ) + class TestComparisonOps(base.BaseComparisonOpsTests): diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index 49dba1c769572..cc0d3644df12f 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -959,6 +959,7 @@ def test_replace_swapping_bug(self): assert_frame_equal(res, expect) def test_replace_period(self): + # TODO: Implement this? d = { 'fname': { 'out_augmented_AUG_2011.json': diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 55e3dfde3ceb7..5f6974e791696 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -553,8 +553,12 @@ def test_unequal_categorical_comparison_raises_type_error(self): ([pd.Timedelta('1 days'), NaT, pd.Timedelta('3 days')], [NaT, NaT, pd.Timedelta('3 days')]), - ([pd.Period('2011-01', freq='M'), NaT, pd.Period('2011-03', freq='M')], - [NaT, NaT, pd.Period('2011-03', freq='M')])]) + pytest.param( + ([pd.Period('2011-01', freq='M'), NaT, pd.Period('2011-03', freq='M')], + [NaT, NaT, pd.Period('2011-03', freq='M')]), + marks=pytest.mark.xfail(reason="GH-23155", strict=False) + ), + ]) @pytest.mark.parametrize('reverse', [True, False]) @pytest.mark.parametrize('box', [Series, Index]) @pytest.mark.parametrize('dtype', [None, object]) From 452c22965fb14be2e8a5d4b80d8cca89f07ee198 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 17 Oct 2018 17:41:28 -0500 Subject: [PATCH 083/132] typo --- pandas/core/arrays/period.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index b54eac45d6029..5bf90ab9f50a2 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -147,7 +147,7 @@ def __init__(self, values, freq=None, copy=False): if isinstance(values, ABCSeries): values = values.values if not isinstance(values, type(self)): - raise TypeError("Incorect dtype") + raise TypeError("Incorrect dtype") elif isinstance(values, ABCPeriodIndex): values = values.values From 78751c23093839ed1518736a58c4cbde95e19c4a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 09:55:10 -0500 Subject: [PATCH 084/132] Added asi8 --- pandas/core/arrays/period.py | 20 +++++++++++++------- pandas/tests/arrays/test_period.py | 7 +++++++ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 5bf90ab9f50a2..e84b959da12d8 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -166,6 +166,7 @@ def __init__(self, values, freq=None, copy=False): @classmethod def _simple_new(cls, values, freq=None, **kwargs): + # TODO(DatetimeArray): remove once all constructors are aligned. # alias from PeriodArray.__init__ return cls(values, freq=freq, **kwargs) @@ -257,6 +258,10 @@ def freq(self): """Return the frequency object for this PeriodArray.""" return self.dtype.freq + @property + def asi8(self): + return self._ndarray_values + # -------------------------------------------------------------------- # Vectorized analogues of Period properties @@ -512,7 +517,7 @@ def asfreq(self, freq=None, how='E'): base1, mult1 = frequencies.get_freq_code(self.freq) base2, mult2 = frequencies.get_freq_code(freq) - asi8 = self._ndarray_values.view('i8') + asi8 = self.asi8 # mult1 can't be negative or 0 end = how == 'E' if end: @@ -584,7 +589,7 @@ def _sub_period(self, other): msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) - asi8 = self._ndarray_values.view('i8') + asi8 = self.asi8 new_data = asi8 - other.ordinal new_data = np.array([self.freq * x for x in new_data]) @@ -644,12 +649,10 @@ def _add_delta(self, other): # TODO: standardize across datetimelike subclasses whether to return # i8 view or _shallow_copy if isinstance(other, (Tick, timedelta, np.timedelta64)): - new_values = self._add_delta_td(other) - return type(self)(new_values) + return self._add_delta_td(other) elif is_timedelta64_dtype(other): # ndarray[timedelta64] or TimedeltaArray/index - new_values = self._add_delta_tdi(other) - return type(self)(new_values) + return self._add_delta_tdi(other) else: # pragma: no cover raise TypeError(type(other).__name__) @@ -803,7 +806,10 @@ def astype(self, dtype, copy=True): elif is_string_dtype(dtype) and not is_categorical_dtype(dtype): return self._format_native_types() elif is_integer_dtype(dtype): - return self.values.astype("i8", copy=copy) + values = self._ndarray_values + if copy: + values = values.copy() + return values elif (is_datetime_or_timedelta_dtype(dtype) and not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype): # disallow conversion between datetime/timedelta, diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 48053e071b53b..071eee7f7d03e 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -62,3 +62,10 @@ def test_to_period_ok(data, freq, expected): def test_to_period_raises(data, freq, msg): with tm.assert_raises_regex(IncompatibleFrequency, msg): period_array(data, freq) + + +def test_asi8(): + result = period_array(['2000', '2001', None], freq='D').asi8 + expected = np.array([10957, 11323, iNaT]) + tm.assert_numpy_array_equal(result, expected) + From 203d561bb67332081576a58eae905cca4d33d273 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 09:55:17 -0500 Subject: [PATCH 085/132] Allow setting nan --- pandas/core/arrays/period.py | 2 -- pandas/tests/arrays/test_period.py | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index e84b959da12d8..f9301b946f412 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -334,8 +334,6 @@ def __setitem__(self, key, value): value = value.ordinal elif isna(value): - # Previously we allowed setting np.nan on a Series[object] - # do we still want to allow that, or should we require None / NaT? value = iNaT else: msg = ("'value' should be a 'Period', 'NaT', or array of those. " diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 071eee7f7d03e..b61a64c8110c8 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -11,6 +11,7 @@ @pytest.mark.parametrize('key, value, expected', [ ([0], pd.Period("2000", "D"), [10957, 1, 2]), ([0], None, [iNaT, 1, 2]), + ([0], np.nan, [iNaT, 1, 2]), ([0, 1, 2], pd.Period("2000", "D"), [10957] * 3), ([0, 1, 2], [pd.Period("2000", "D"), pd.Period("2001", "D"), From eb1c67db5a1a72848d9740e1c86944b9fd21247a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 09:57:51 -0500 Subject: [PATCH 086/132] revert breaking docs --- doc/source/whatsnew/v0.24.0.txt | 40 +-------------------------------- 1 file changed, 1 insertion(+), 39 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index ba71068faac37..3ab19aadd7f02 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -178,7 +178,7 @@ array, but rather an ``ExtensionArray``: pser.values This is the same behavior as ``Series.values`` for categorical data. See -:ref:`whatsnew_0240.api_breaking.interval_values` and :ref:`whatsnew_0240.api_breaking.period_values` for more. +:ref:`whatsnew_0240.api_breaking.interval_values` for more. .. _whatsnew_0240.enhancements.other: @@ -278,44 +278,6 @@ For situations where you need an ``ndarray`` of ``Interval`` objects, use np.asarray(idx) idx.values.astype(object) - -.. _whatsnew_0240.api_breaking.period_values: - -``PeriodIndex.values`` is now a ``PeriodArray`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The ``values`` attribute of a :class:`PeriodIndex` now returns a ``PeriodArray`` -rather than a NumPy array of :class:`Period` objects (:issue:`22862`). - -Previous Behavior: - -.. code-block:: ipython - - In [1]: idx = pd.period_range("2000", freq="D", periods=4) - - In [2]: idx.values - Out [2]: - array([Period('2000-01-01', 'D'), Period('2000-01-02', 'D'), - Period('2000-01-03', 'D'), Period('2000-01-04', 'D')], dtype=object) - -New Behavior: - -.. ipython:: python - - idx = pd.period_range("2000", freq="D", periods=4) - idx.values - -This mirrors ``CategoricalIndex.values``, which returns a ``Categorical``. - -For situations where you need an ``ndarray`` of ``Period`` objects, use -:meth:`numpy.asarray` or ``idx.values.astype(object)``. - -.. ipython:: python - - np.asarray(idx) - idx.values.astype(object) - - .. _whatsnew_0240.api.timezone_offset_parsing: Parsing Datetime Strings with Timezone Offsets From e08aa79cf69e2e54c384c7b5b317953b147fb975 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 10:06:24 -0500 Subject: [PATCH 087/132] Override _add_sub_int_array --- pandas/core/arrays/datetimelike.py | 13 ++----------- pandas/core/arrays/period.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index ee7425e8cfd8f..aa4512a44857e 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -474,17 +474,8 @@ def _addsub_int_array(self, other, op): result : same class as self """ assert op in [operator.add, operator.sub] - if is_period_dtype(self): - # easy case for PeriodIndex - if op is operator.sub: - other = -other - res_values = checked_add_with_arr(self.asi8, other, - arr_mask=self._isnan) - res_values = res_values.view('i8') - res_values[self._isnan] = iNaT - return type(self)(res_values, freq=self.freq) - - elif self.freq is None: + + if self.freq is None: # GH#19123 raise NullFrequencyError("Cannot shift with no freq") diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index f9301b946f412..a6931a01ff1b4 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -16,6 +16,7 @@ from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds, Timedelta from pandas._libs.tslibs.fields import isleapyear_arr from pandas.util._decorators import cache_readonly +from pandas.core.algorithms import checked_add_with_arr from pandas.core.dtypes.common import ( is_integer_dtype, is_float_dtype, is_period_dtype, is_float, is_integer, pandas_dtype, is_scalar, @@ -837,6 +838,20 @@ def flags(self): # place. return self._ndarray_values.flags + def _addsub_int_array(self, other, op): + assert op in [operator.add, operator.sub] + # easy case for PeriodIndex + if op is operator.sub: + other = -other + res_values = checked_add_with_arr(self.asi8, other, + arr_mask=self._isnan) + res_values = res_values.view('i8') + res_values[self._isnan] = iNaT + return type(self)(res_values, freq=self.freq) + + # ------------------------------------------------------------------------ + # Ops + PeriodArray._add_comparison_ops() From c1ee04b88feab72f9044145be2c223e0f6bb2857 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 10:08:21 -0500 Subject: [PATCH 088/132] lint --- pandas/core/arrays/period.py | 12 ++++-------- pandas/tests/arrays/test_period.py | 1 - pandas/tests/frame/test_operators.py | 3 ++- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index a6931a01ff1b4..64fdaee8dc16a 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -19,7 +19,7 @@ from pandas.core.algorithms import checked_add_with_arr from pandas.core.dtypes.common import ( is_integer_dtype, is_float_dtype, is_period_dtype, - is_float, is_integer, pandas_dtype, is_scalar, + pandas_dtype, is_datetime64_dtype, is_categorical_dtype, is_timedelta64_dtype, @@ -39,7 +39,6 @@ import pandas.core.common as com from pandas.tseries import frequencies -from pandas.tseries.frequencies import get_freq_code as _gfc from pandas.tseries.offsets import Tick, DateOffset from pandas.core.arrays import ExtensionArray @@ -617,7 +616,6 @@ def _add_delta_td(self, other): ordinals = super(PeriodArray, self)._add_delta_td(delta) return type(self)(ordinals, self.freq) - def _add_delta_tdi(self, other): assert isinstance(self.freq, Tick) # checked by calling function @@ -838,6 +836,9 @@ def flags(self): # place. return self._ndarray_values.flags + # ------------------------------------------------------------------------ + # Ops + def _addsub_int_array(self, other, op): assert op in [operator.add, operator.sub] # easy case for PeriodIndex @@ -849,10 +850,6 @@ def _addsub_int_array(self, other, op): res_values[self._isnan] = iNaT return type(self)(res_values, freq=self.freq) - # ------------------------------------------------------------------------ - # Ops - - PeriodArray._add_comparison_ops() PeriodArray._add_datetimelike_methods() @@ -932,7 +929,6 @@ def period_array(data, freq=None, ordinal=None, copy=False): data = ensure_object(data) - return PeriodArray._from_sequence(data, dtype=dtype) diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index b61a64c8110c8..d2bf048160e43 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -69,4 +69,3 @@ def test_asi8(): result = period_array(['2000', '2001', None], freq='D').asi8 expected = np.array([10957, 11323, iNaT]) tm.assert_numpy_array_equal(result, expected) - diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index dff6e1c34ea50..c223d05ab4645 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -552,7 +552,8 @@ def test_arith_non_pandas_object(self): df = self.simple val1 = df.xs('a').values - added = DataFrame(df.values + val1, index=df.index, columns=df.columns) + added = DataFrame(df.values + val1, index=df.index, + columns=df.columns) assert_frame_equal(df + val1, added) added = DataFrame((df.values.T + val1).T, From 827e563bcd729713d25806ab56b0c2c72e58a67d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 10:42:20 -0500 Subject: [PATCH 089/132] Update PeriodIndex._simple_new --- pandas/core/arrays/period.py | 38 ++++++++---------- pandas/core/indexes/period.py | 74 +++++++++++++++++------------------ pandas/io/packers.py | 6 ++- 3 files changed, 57 insertions(+), 61 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 64fdaee8dc16a..8eea243107b53 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -16,7 +16,7 @@ from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds, Timedelta from pandas._libs.tslibs.fields import isleapyear_arr from pandas.util._decorators import cache_readonly -from pandas.core.algorithms import checked_add_with_arr +import pandas.core.algorithms as algos from pandas.core.dtypes.common import ( is_integer_dtype, is_float_dtype, is_period_dtype, pandas_dtype, @@ -35,6 +35,7 @@ from pandas.core.dtypes.generic import ( ABCSeries, ABCIndexClass, ABCPeriodIndex ) +from pandas.core.dtypes.missing import isna import pandas.core.common as com @@ -342,9 +343,6 @@ def __setitem__(self, key, value): self._data[key] = value def take(self, indices, allow_fill=False, fill_value=None): - from pandas.core.algorithms import take - from pandas import isna - if allow_fill: if isna(fill_value): fill_value = iNaT @@ -354,10 +352,10 @@ def take(self, indices, allow_fill=False, fill_value=None): msg = "'fill_value' should be a Period. Got '{}'." raise ValueError(msg.format(fill_value)) - new_values = take(self._data, - indices, - allow_fill=allow_fill, - fill_value=fill_value) + new_values = algos.take(self._data, + indices, + allow_fill=allow_fill, + fill_value=fill_value) return type(self)(new_values, self.freq) @@ -368,7 +366,7 @@ def fillna(self, value=None, method=None, limit=None): # TODO(#20300) # To avoid converting to object, we re-implement here with the changes # 1. Passing `_ndarray_values` to func instead of self.astype(object) - # 2. Re-boxing with `_from_ordinals` + # 2. Re-boxing output of 1. # #20300 should let us do this kind of logic on ExtensionArray.fillna # and we can use it. from pandas.api.types import is_array_like @@ -376,7 +374,7 @@ def fillna(self, value=None, method=None, limit=None): from pandas.core.missing import pad_1d, backfill_1d if isinstance(value, ABCSeries): - value = value.values + value = value._values value, method = validate_fillna_kwargs(value, method) @@ -406,21 +404,19 @@ def copy(self, deep=False): return type(self)(self._data.copy(), freq=self.freq) def value_counts(self, dropna=False): - from pandas.core.algorithms import value_counts - from pandas.core.indexes.period import PeriodIndex + from pandas import Series, PeriodIndex if dropna: values = self[~self.isna()]._data else: values = self._data - result = value_counts(values, sort=False) - index = PeriodIndex._from_ordinals(result.index, - name=result.index.name, - freq=self.freq) - return type(result)(result.values, - index=index, - name=result.name) + cls = type(self) + + result = algos.value_counts(values, sort=False) + index = PeriodIndex(cls(result.index, freq=self.freq), + name=result.index.name) + return Series(result.values, index=index, name=result.name) def shift(self, periods=1): """ @@ -844,8 +840,8 @@ def _addsub_int_array(self, other, op): # easy case for PeriodIndex if op is operator.sub: other = -other - res_values = checked_add_with_arr(self.asi8, other, - arr_mask=self._isnan) + res_values = algos.checked_add_with_arr(self.asi8, other, + arr_mask=self._isnan) res_values = res_values.view('i8') res_values[self._isnan] = iNaT return type(self)(res_values, freq=self.freq) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 1a4d437650ba3..d3da1b9d382c9 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -8,6 +8,7 @@ from pandas.core.dtypes.common import ( is_integer, is_float, + is_float_dtype, is_integer_dtype, is_datetime64_any_dtype, is_bool_dtype, @@ -29,7 +30,6 @@ from pandas.core.algorithms import unique1d from pandas.core.dtypes.dtypes import PeriodDtype -from pandas.core.dtypes.generic import ABCIndexClass from pandas.core.arrays.period import PeriodArray, period_array from pandas.core.base import _shared_docs from pandas.core.indexes.base import _index_shared_docs, ensure_index @@ -63,7 +63,9 @@ def _new_PeriodIndex(cls, **d): # GH13277 for unpickling values = d.pop('data') if values.dtype == 'int64': - return cls._from_ordinals(values=values, **d) + freq = d.pop('freq', None) + data = PeriodArray(values, freq=freq) + return cls._simple_new(data, **d) else: return cls(values, **d) @@ -187,6 +189,9 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, _engine_type = libindex.PeriodEngine + # ------------------------------------------------------------------------ + # Index Constructors + def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, periods=None, tz=None, dtype=None, copy=False, name=None, **fields): @@ -241,6 +246,35 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, return cls._simple_new(data, name=name) + @classmethod + def _simple_new(cls, values, name=None, freq=None, **kwargs): + """ + Create a new PeriodIndex. + + Parameters + ---------- + values : PeriodArray, PeriodIndex, Index[int64], ndarray[int64] + Values that can be converted to a PeriodArray without inference + or coercion. + + """ + # TODO: raising on floats is tested, but maybe not useful. + # Should the callers know not to pass floats? + # At the very least, I think we can ensure that lists aren't passed. + if isinstance(values, list): + values = np.asarray(values) + if is_float_dtype(values): + raise TypeError("PeriodIndex._simple_new does not accept floats.") + values = PeriodArray(values, freq=freq) + + if not isinstance(values, PeriodArray): + raise TypeError("PeriodIndex._simple_new only accepts PeriodArray") + result = object.__new__(cls) + result._data = values + result.name = name + result._reset_identity() + return result + # ------------------------------------------------------------------------ # Data @property @@ -270,42 +304,6 @@ def freq(self, value): # here, but people shouldn't be doing this anyway. self._data._freq = value - # ------------------------------------------------------------------------ - # Index Constructors - - @classmethod - def _simple_new(cls, values, name=None, freq=None, **kwargs): - # type: (PeriodArray, Any, Any) -> PeriodIndex - """ - Values can be any type that can be coerced to Periods. - Ordinals in an ndarray are fastpath-ed to `_from_ordinals` - """ - if isinstance(values, cls): - # TODO: don't do this - values = values.values - elif (isinstance(values, (ABCIndexClass, np.ndarray)) and - is_integer_dtype(values)): - # TODO: don't do this. - values = PeriodArray._simple_new(values, freq) - - if not isinstance(values, PeriodArray): - raise TypeError("PeriodIndex._simple_new only accepts PeriodArray") - result = object.__new__(cls) - result._data = values - result.name = name - result._reset_identity() - return result - - @classmethod - def _from_ordinals(cls, values, name=None, freq=None, **kwargs): - """ - Values should be int ordinals - `__new__` & `_simple_new` cooerce to ordinals and call this method - """ - data = PeriodArray(values, freq=freq) - result = cls._simple_new(data, name=name) - return result - def _shallow_copy(self, values=None, **kwargs): # TODO: simplify, figure out type of values if values is None: diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 73b9e1dfc24e7..764e27a60abb5 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -53,7 +53,7 @@ ) from pandas.compat import u, u_safe from pandas.core import internals -from pandas.core.arrays import IntervalArray +from pandas.core.arrays import IntervalArray, PeriodArray from pandas.core.arrays.sparse import BlockIndex, IntIndex from pandas.core.dtypes.common import ( is_categorical_dtype, is_object_dtype, needs_i8_conversion, pandas_dtype @@ -599,7 +599,9 @@ def decode(obj): elif typ == u'period_index': data = unconvert(obj[u'data'], np.int64, obj.get(u'compress')) d = dict(name=obj[u'name'], freq=obj[u'freq']) - return globals()[obj[u'klass']]._from_ordinals(data, **d) + freq = d.pop('freq', None) + return globals()[obj[u'klass']](PeriodArray(data, freq), **d) + elif typ == u'datetime_index': data = unconvert(obj[u'data'], np.int64, obj.get(u'compress')) d = dict(name=obj[u'name'], freq=obj[u'freq'], verify_integrity=False) From ca4a7fd378925eef621c6fb2bdadcd86036a5f03 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 11:04:18 -0500 Subject: [PATCH 090/132] Clean up uses of .values, ._values, ._ndarray_values, ._data --- pandas/core/arrays/period.py | 6 +++--- pandas/core/indexes/period.py | 17 +++++++++++------ 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 8eea243107b53..114fc236c248e 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -187,7 +187,7 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): return cls(ordinals, freq=freq) def _values_for_factorize(self): - return self.values, iNaT + return self._ndarray_values, iNaT @classmethod def _from_factorized(cls, values, original): @@ -457,7 +457,7 @@ def _time_shift(self, n, freq=None): freq : pandas.DateOffset, pandas.Timedelta, or string Frequency increment to shift by. """ - values = self.values + n * self.freq.n + values = self._ndarray_values + n * self.freq.n if self.hasnans: values[self._isnan] = iNaT return type(self)(values, freq=self.freq) @@ -819,7 +819,7 @@ def astype(self, dtype, copy=True): def _item(self): if len(self) == 1: # IndexOpsMixin will catch and re-raise IndexErrors - return Period._from_ordinal(self.values[0], self.freq) + return Period._from_ordinal(self._ndarray_values[0], self.freq) else: raise ValueError('can only convert an array of size 1 to a ' 'Python scalar') diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index d3da1b9d382c9..e957ef69de539 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -279,10 +279,15 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs): # Data @property def _ndarray_values(self): - return self.values.values + return self._data._ndarray_values @property def values(self): + # TODO: Discussion on what this should be. + return self._data + + @property + def _values(self): return self._data @property @@ -307,7 +312,7 @@ def freq(self, value): def _shallow_copy(self, values=None, **kwargs): # TODO: simplify, figure out type of values if values is None: - values = self._values + values = self._data if isinstance(values, type(self)): values = values.values @@ -912,7 +917,7 @@ def _create_comparison_method(cls, op): """ # TODO(DatetimeArray): move to base class. def wrapper(self, other): - return op(self.values, other) + return op(self._data, other) wrapper.__doc__ = op.__doc__ wrapper.__name__ = '__{}__'.format(op.__name__) @@ -948,7 +953,7 @@ def item(self): # TODO(DatetimeArray): remove # override to use _item try: - return self.values._item() + return self._data._item() except IndexError: # copy numpy's message here because Py26 raises an IndexError raise ValueError('can only convert an array of size 1 to a ' @@ -960,7 +965,7 @@ def data(self): warnings.warn("{obj}.data is deprecated and will be removed " "in a future version".format(obj=type(self).__name__), FutureWarning, stacklevel=2) - return np.asarray(self.values).data + return np.asarray(self._data).data @property def base(self): @@ -970,7 +975,7 @@ def base(self): warnings.warn("{obj}.base is deprecated and will be removed " "in a future version".format(obj=type(self).__name__), FutureWarning, stacklevel=2) - return np.asarray(self.values) + return np.asarray(self._data) PeriodIndex._add_comparison_ops() From ed185c0d0db993cee9aa9c57f0a22b55cfde29fd Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 11:06:28 -0500 Subject: [PATCH 091/132] one more values --- pandas/core/indexes/period.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index e957ef69de539..e817b5b08d18f 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -315,7 +315,7 @@ def _shallow_copy(self, values=None, **kwargs): values = self._data if isinstance(values, type(self)): - values = values.values + values = values._values if not isinstance(values, PeriodArray): if (isinstance(values, np.ndarray) and From a4011eb56b89cd18271d24505b4556382fc38f73 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 12:35:48 -0500 Subject: [PATCH 092/132] remove xfails --- pandas/tests/reshape/test_concat.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 7d2700297bcc6..e65a2e9f9d4fa 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -416,7 +416,6 @@ def test_concatlike_common_period(self): res = pd.concat([ps1, ps2]) tm.assert_series_equal(res, pd.Series(exp, index=[0, 1, 0, 1])) - @pytest.mark.xfail(reason="GH-22994", strict=True) def test_concatlike_common_period_diff_freq_to_object(self): # GH 13221 pi1 = pd.PeriodIndex(['2011-01', '2011-02'], freq='M') @@ -2000,7 +1999,6 @@ def test_concat_period_series(self): result = concat([x, y], ignore_index=True) tm.assert_series_equal(result, expected) - @pytest.mark.xfail(reason="GH-22994", strict=True) def test_concat_period_multiple_freq_series(self): x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) y = Series(pd.PeriodIndex(['2015-10-01', '2016-01-01'], freq='M')) @@ -2009,7 +2007,6 @@ def test_concat_period_multiple_freq_series(self): tm.assert_series_equal(result, expected) assert result.dtype == 'object' - @pytest.mark.xfail(reason="GH-22994", strict=True) def test_concat_period_other_series(self): x = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='D')) y = Series(pd.PeriodIndex(['2015-11-01', '2015-12-01'], freq='M')) From fc1ca3c4f7834d2f65da2609b7f861beb78a9e3b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 15:00:16 -0500 Subject: [PATCH 093/132] Fixed freq handling in _shallow_copy with a freq --- pandas/core/indexes/period.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index e817b5b08d18f..738f7cd266cf8 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -332,11 +332,12 @@ def _shallow_copy(self, values=None, **kwargs): # I don't like overloading shallow_copy with freq changes. # See if it's used anywhere outside of test_resample_empty_dataframe + attributes = self._get_attributes_dict() freq = kwargs.pop("freq", None) if freq: values = values.asfreq(freq) + attributes.pop("freq", None) - attributes = self._get_attributes_dict() attributes.update(kwargs) if not len(values) and 'dtype' not in kwargs: attributes['dtype'] = self.dtype From 1b1841f9ffb728680aa750591cd95f3c916d8a09 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 15:18:40 -0500 Subject: [PATCH 094/132] test updates --- pandas/tests/arithmetic/test_period.py | 9 +++++---- pandas/tests/arrays/test_period.py | 2 +- pandas/tests/io/test_parquet.py | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 00408005d107b..867b8a944912d 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -307,14 +307,15 @@ def test_cmp_series_period_series_mixed_freq(self): class TestPeriodFrameArithmetic(object): + @pytest.mark.xfail(reason="GH-22922") def test_ops_frame_period(self): # GH#13043 df = pd.DataFrame({'A': [pd.Period('2015-01', freq='M'), pd.Period('2015-02', freq='M')], 'B': [pd.Period('2014-01', freq='M'), pd.Period('2014-02', freq='M')]}) - assert df['A'].dtype == object - assert df['B'].dtype == object + assert df['A'].dtype == 'Period[M]' + assert df['B'].dtype == 'Period[M]' p = pd.Period('2015-03', freq='M') off = p.freq @@ -328,8 +329,8 @@ def test_ops_frame_period(self): pd.Period('2015-06', freq='M')], 'B': [pd.Period('2015-05', freq='M'), pd.Period('2015-06', freq='M')]}) - assert df2['A'].dtype == object - assert df2['B'].dtype == object + assert df2['A'].dtype == 'Period[M]' + assert df2['B'].dtype == 'Period[M]' exp = pd.DataFrame({'A': np.array([4 * off, 4 * off], dtype=object), 'B': np.array([16 * off, 16 * off], dtype=object)}) diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index d2bf048160e43..46d20ed6185db 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -47,7 +47,7 @@ def test_setitem_raises(): ]) def test_to_period_ok(data, freq, expected): result = period_array(data, freq=freq).values - expected = np.asarray(expected) + expected = np.asarray(expected, dtype=np.int64) tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index ab7f04ad86ffc..3e17e3c8c9774 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -440,7 +440,7 @@ def test_duplicate_columns(self, pa): def test_unsupported(self, pa): # period df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)}) - self.check_error_on_write(df, pa, ValueError) + self.check_error_on_write(df, pa, TypeError) # timedelta df = pd.DataFrame({'a': pd.timedelta_range('1 day', @@ -449,7 +449,7 @@ def test_unsupported(self, pa): # mixed python objects df = pd.DataFrame({'a': ['a', 1, 2.0]}) - self.check_error_on_write(df, pa, ValueError) + self.check_error_on_write(df, pa, TypeError) def test_categorical(self, pa_ge_070): pa = pa_ge_070 From b3b315a38a923d7cd081ee69da012ffe5cd9c186 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 16:06:35 -0500 Subject: [PATCH 095/132] API: Keep PeriodIndex.values an ndarray --- pandas/core/arrays/categorical.py | 22 +++++++++++++++++++--- pandas/core/arrays/period.py | 14 ++++++++++---- pandas/core/indexes/datetimelike.py | 3 +++ pandas/core/indexes/period.py | 12 +++--------- pandas/tests/indexes/period/test_ops.py | 4 ++-- 5 files changed, 37 insertions(+), 18 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 8735284617f31..b4e191c09ef5b 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -29,6 +29,7 @@ is_categorical_dtype, is_float_dtype, is_integer_dtype, + is_object_dtype, is_list_like, is_sequence, is_scalar, is_iterator, is_dict_like) @@ -2423,11 +2424,26 @@ def _get_codes_for_values(values, categories): utility routine to turn values into codes given the specified categories """ from pandas.core.algorithms import _get_data_algo, _hashtables - if is_dtype_equal(values.dtype, categories.dtype): + dtype_equal = is_dtype_equal(values.dtype, categories.dtype) + + if is_extension_array_dtype(categories.dtype) and is_object_dtype(values): + # Support inferring the correct extension dtype from an array of + # scalar objects. e.g. + # Categorical(array[Period, Period], categories=PeriodIndex(...)) + try: + values = ( + categories.dtype.construct_array_type()._from_sequence(values) + ) + except Exception: + # but that may fail for any reason, so fall back to object + values = ensure_object(values) + categories = ensure_object(categories) + + elif dtype_equal: # To prevent erroneous dtype coercion in _get_data_algo, retrieve # the underlying numpy array. gh-22702 - values = getattr(values, 'values', values) - categories = getattr(categories, 'values', categories) + values = getattr(values, '_ndarray_values', values) + categories = getattr(categories, '_ndarray_values', categories) else: values = ensure_object(values) categories = ensure_object(categories) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 114fc236c248e..c66aa90a3c11d 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -68,7 +68,7 @@ def wrapper(self, other): op = getattr(self._ndarray_values, opname) if isinstance(other, (ABCSeries, ABCIndexClass)): # TODO: return NotImplemented? - other = other.values + other = other._values if isinstance(other, Period): if other.freq != self.freq: @@ -146,12 +146,12 @@ class PeriodArray(dtl.DatetimeLikeArrayMixin, ExtensionArray): def __init__(self, values, freq=None, copy=False): # type: (Union[PeriodArray, np.ndarray], Union[str, Tick]) -> None if isinstance(values, ABCSeries): - values = values.values + values = values._values if not isinstance(values, type(self)): raise TypeError("Incorrect dtype") elif isinstance(values, ABCPeriodIndex): - values = values.values + values = values._values if isinstance(values, type(self)): if freq is not None and freq != values.freq: @@ -465,7 +465,13 @@ def _time_shift(self, n, freq=None): @property def _box_func(self): # Used in DatelikeArray.__iter__ - return lambda x: Period._from_ordinal(ordinal=x, freq=self.freq) + # TODO: implement this in cython? + def func(x): + if isinstance(x, Period) or x is NaT: + return x + else: + return Period._from_ordinal(ordinal=x, freq=self.freq) + return func def asfreq(self, freq=None, how='E'): """ diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index bd0e1d53ae2b2..88dbb85ec021d 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -708,6 +708,9 @@ def astype(self, dtype, copy=True): elif is_string_dtype(dtype) and not is_categorical_dtype(dtype): return Index(self.format(), name=self.name, dtype=object) elif is_integer_dtype(dtype): + # TODO(DatetimeArray): use self._values here. + # Can't use ._values currently, because that returns a + # DatetimeIndex, which throws us in an infinite loop. return Index(self.values.astype('i8', copy=copy), name=self.name, dtype='i8') elif (is_datetime_or_timedelta_dtype(dtype) and diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 738f7cd266cf8..a3da93f87f396 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -283,8 +283,7 @@ def _ndarray_values(self): @property def values(self): - # TODO: Discussion on what this should be. - return self._data + return np.asarray(self) @property def _values(self): @@ -353,12 +352,7 @@ def _shallow_copy_with_infer(self, values=None, **kwargs): @property def _box_func(self): - def func(x): - if isinstance(x, Period) or x is tslib.NaT: - return x - else: - return Period._from_ordinal(ordinal=x, freq=self.freq) - return func + return self._data._box_func def _maybe_box_as_values(self, values, **attribs): """Box an array of ordinals to a PeriodArray @@ -542,7 +536,7 @@ def _box_values_as_index(self): """ # TODO(DatetimeArray): remove # Have to add our name. - return Index(self._data._box_values_as_index(), name=self.name) + return Index(self.values, name=self.name) @Appender(_index_shared_docs['astype']) def astype(self, dtype, copy=True, how='start'): diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 444aea466807d..33858a28ec81b 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -93,7 +93,7 @@ def test_value_counts_unique(self): # GH 7735 idx = pd.period_range('2011-01-01 09:00', freq='H', periods=10) # create repeated values, 'n'th element is repeated by n+1 times - idx = PeriodIndex(np.repeat(idx.values, range(1, len(idx) + 1)), + idx = PeriodIndex(np.repeat(idx._values, range(1, len(idx) + 1)), freq='H') exp_idx = PeriodIndex(['2011-01-01 18:00', '2011-01-01 17:00', @@ -392,7 +392,7 @@ def test_equals(self, freq): # same internal, different tz idx3 = pd.PeriodIndex._simple_new( - idx.values._simple_new(idx.values.asi8, freq="H") + idx._values._simple_new(idx._values.asi8, freq="H") ) tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) assert not idx.equals(idx3) From 8102475f715606448c1a673f3d3760dbd21f422a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 16:38:44 -0500 Subject: [PATCH 096/132] BUG: Raise for non-equal freq in take --- pandas/core/arrays/period.py | 7 +++++++ pandas/tests/arrays/test_period.py | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index c66aa90a3c11d..1ed8d5bb3945b 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -347,6 +347,13 @@ def take(self, indices, allow_fill=False, fill_value=None): if isna(fill_value): fill_value = iNaT elif isinstance(fill_value, Period): + if self.freq != fill_value.freq: + msg = DIFFERENT_FREQ_INDEX.format( + self.freq.freqstr, + fill_value.freqstr + ) + raise IncompatibleFrequency(msg) + fill_value = fill_value.ordinal else: msg = "'fill_value' should be a Period. Got '{}'." diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 46d20ed6185db..b99e66fe7545e 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -69,3 +69,10 @@ def test_asi8(): result = period_array(['2000', '2001', None], freq='D').asi8 expected = np.array([10957, 11323, iNaT]) tm.assert_numpy_array_equal(result, expected) + + +def test_take_raises(): + arr = period_array(['2000', '2001'], freq='D') + with tm.assert_raises_regex(ValueError, 'freq'): + arr.take([0, -1], allow_fill=True, + fill_value=pd.Period('2000', freq='W')) From 8c329ebaec92cf224442985bd3dbe03a45f50a1d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 16:44:39 -0500 Subject: [PATCH 097/132] Punt on DataFrame.replace specializing --- pandas/tests/frame/test_replace.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py index cc0d3644df12f..bf755b1dac4b8 100644 --- a/pandas/tests/frame/test_replace.py +++ b/pandas/tests/frame/test_replace.py @@ -959,7 +959,6 @@ def test_replace_swapping_bug(self): assert_frame_equal(res, expect) def test_replace_period(self): - # TODO: Implement this? d = { 'fname': { 'out_augmented_AUG_2011.json': @@ -985,8 +984,11 @@ def test_replace_period(self): 'out_augmented_AUG_2011.json', 'out_augmented_JAN_2011.json'], columns=['fname']) assert set(df.fname.values) == set(d['fname'].keys()) + # We don't support converting object -> specialized EA in + # replace yet. expected = DataFrame({'fname': [d['fname'][k] - for k in df.fname.values]}) + for k in df.fname.values]}, + dtype=object) result = df.replace(d) assert_frame_equal(result, expected) From 78d4960b3a8d0dd0dd0c7c69b6eafa6744effbf6 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 16:47:12 -0500 Subject: [PATCH 098/132] lint --- pandas/tests/series/test_operators.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 5f6974e791696..78e71e35aeeff 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -554,7 +554,8 @@ def test_unequal_categorical_comparison_raises_type_error(self): [NaT, NaT, pd.Timedelta('3 days')]), pytest.param( - ([pd.Period('2011-01', freq='M'), NaT, pd.Period('2011-03', freq='M')], + ([pd.Period('2011-01', freq='M'), NaT, + pd.Period('2011-03', freq='M')], [NaT, NaT, pd.Period('2011-03', freq='M')]), marks=pytest.mark.xfail(reason="GH-23155", strict=False) ), From 4e3d914fbe713820f465039bb0d289fa34454282 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 17:05:35 -0500 Subject: [PATCH 099/132] fixed xfail message --- pandas/tests/arithmetic/test_period.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 867b8a944912d..dce192a948707 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -932,7 +932,7 @@ def test_pi_ops_nat(self): self._check(idx + 3, lambda x: x - 3, idx) self._check(idx + 3, lambda x: np.subtract(x, 3), idx) - @pytest.mark.xfail(reason="TODO", strict=True) + @pytest.mark.xfail(reason="GH-22798", strict=True) def test_pi_ops_array_int(self): idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], freq='M', name='idx') From 5e4aaa7c79d77579a9a33d0e5eb985a0a6cfe451 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 20:56:10 -0500 Subject: [PATCH 100/132] TST: _from_datetime64 --- pandas/core/arrays/datetimes.py | 2 +- pandas/core/arrays/period.py | 63 ++++++++++++++++++++++++------ pandas/tests/arrays/test_period.py | 22 +++++++++-- 3 files changed, 71 insertions(+), 16 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 65ff0fc538f00..768d608d8867a 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -803,7 +803,7 @@ def to_period(self, freq=None): pandas.PeriodIndex: Immutable ndarray holding ordinal values pandas.DatetimeIndex.to_pydatetime: Return DatetimeIndex as object """ - from pandas.core.arrays.period import PeriodArray + from pandas.core.arrays import PeriodArray if self.tz is not None: warnings.warn("Converting to PeriodArray/Index representation " diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 1ed8d5bb3945b..11d6b1ed51910 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -16,6 +16,7 @@ from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds, Timedelta from pandas._libs.tslibs.fields import isleapyear_arr from pandas.util._decorators import cache_readonly +from pandas.util._validators import validate_fillna_kwargs import pandas.core.algorithms as algos from pandas.core.dtypes.common import ( is_integer_dtype, is_float_dtype, is_period_dtype, @@ -23,6 +24,8 @@ is_datetime64_dtype, is_categorical_dtype, is_timedelta64_dtype, + is_list_like, + is_array_like, is_object_dtype, is_string_dtype, is_datetime_or_timedelta_dtype, @@ -31,11 +34,13 @@ _TD_DTYPE, ) + from pandas.core.dtypes.dtypes import PeriodDtype from pandas.core.dtypes.generic import ( ABCSeries, ABCIndexClass, ABCPeriodIndex ) from pandas.core.dtypes.missing import isna +from pandas.core.missing import pad_1d, backfill_1d import pandas.core.common as com @@ -208,7 +213,7 @@ def _from_datetime64(cls, data, freq, tz=None): ------- PeriodArray[freq] """ - data = dt64arr_to_periodarr(data, freq, tz) + data, freq = dt64arr_to_periodarr(data, freq, tz) return cls(data, freq=freq) @classmethod @@ -310,9 +315,7 @@ def __len__(self): return len(self._data) def __setitem__(self, key, value): - from pandas.core.dtypes.missing import isna - - if isinstance(value, (compat.Sequence, type(self))): + if is_list_like(value): if len(key) != len(value) and not com.is_bool_indexer(key): msg = ("shape mismatch: value array of length '{}' does not " "match indexing result of length '{}'.") @@ -376,9 +379,6 @@ def fillna(self, value=None, method=None, limit=None): # 2. Re-boxing output of 1. # #20300 should let us do this kind of logic on ExtensionArray.fillna # and we can use it. - from pandas.api.types import is_array_like - from pandas.util._validators import validate_fillna_kwargs - from pandas.core.missing import pad_1d, backfill_1d if isinstance(value, ABCSeries): value = value._values @@ -708,6 +708,7 @@ def _maybe_convert_timedelta(self, other): # Formatting def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): + """ actually format my specific types """ # TODO(DatetimeArray): remove values = self.astype(object) @@ -910,17 +911,17 @@ def period_array(data, freq=None, ordinal=None, copy=False): """ if data is None and ordinal is None: - raise ValueError("range!") + raise ValueError("Must pass one of 'data' or 'ordinal'.") elif data is None: data = np.asarray(ordinal, dtype=np.int64) if copy: data = data.copy() return PeriodArray(data, freq=freq) else: + if is_datetime64_dtype(data): + return PeriodArray._from_datetime64(data, freq) if isinstance(data, (ABCPeriodIndex, ABCSeries, PeriodArray)): return PeriodArray(data, freq) - elif is_datetime64_dtype(data): - return PeriodArray._from_datetime64(data, freq) # other iterable of some kind if not isinstance(data, (np.ndarray, list, tuple)): @@ -942,12 +943,50 @@ def period_array(data, freq=None, ordinal=None, copy=False): def dt64arr_to_periodarr(data, freq, tz=None): + """ + Convert an datetime-like array to values Period ordinals. + + Parameters + ---------- + data : Union[Series[datetime64[ns]], DatetimeIndex, ndarray[datetime64ns]] + freq : Optional[Union[str, Tick]] + Must match the `freq` on the `data` if `data` is a DatetimeIndex + or Series. + tz : Optional[tzinfo] + + Returns + ------- + ordinals : ndarray[int] + freq : Tick + The frequencey extracted from the Series or DatetimeIndex if that's + used. + + """ if data.dtype != np.dtype('M8[ns]'): raise ValueError('Wrong dtype: %s' % data.dtype) - freq = Period._maybe_convert_freq(freq) + if freq is not None: + freq = Period._maybe_convert_freq(freq) + + if isinstance(data, ABCIndexClass): + if freq is None: + freq = data.freq + elif freq != data.freq: + msg = DIFFERENT_FREQ_INDEX.format(freq.freqstr, data.freq.freqstr) + raise IncompatibleFrequency(msg) + data = data._values + + elif isinstance(data, ABCSeries): + if freq is None: + freq = data.dt.freq + elif freq != data.dt.freq: + msg = DIFFERENT_FREQ_INDEX.format(freq.freqstr, + data.dt.freq.freqstr) + raise IncompatibleFrequency(msg) + data = data._values + base, mult = frequencies.get_freq_code(freq) - return libperiod.dt64arr_to_periodarr(data.view('i8'), base, tz) + return libperiod.dt64arr_to_periodarr(data.view('i8'), base, tz), freq def _get_ordinal_range(start, end, periods, freq, mult=1): diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index b99e66fe7545e..8e736d19a3042 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -37,6 +37,8 @@ def test_setitem_raises(): arr[0] = 1 +# period_array + @pytest.mark.parametrize("data, freq, expected", [ ([pd.Period("2017", "D")], None, [17167]), ([pd.Period("2017", "D")], "D", [17167]), @@ -44,13 +46,22 @@ def test_setitem_raises(): (["2017"], "D", [17167]), ([pd.Period("2017", "D")], pd.tseries.offsets.Day(), [17167]), ([pd.Period("2017", "D"), None], None, [17167, iNaT]), + (pd.Series(pd.date_range("2017", periods=3)), None, + [17167, 17168, 17169]), + (pd.date_range("2017", periods=3), None, [17167, 17168, 17169]), ]) -def test_to_period_ok(data, freq, expected): +def test_period_array_ok(data, freq, expected): result = period_array(data, freq=freq).values expected = np.asarray(expected, dtype=np.int64) tm.assert_numpy_array_equal(result, expected) +def test_from_datetime64_raises(): + arr = pd.date_range("2017", periods=3, freq="D") + with tm.assert_raises_regex(IncompatibleFrequency, "freq"): + PeriodArray._from_datetime64(arr, freq="M") + + @pytest.mark.parametrize("data, freq, msg", [ ([pd.Period('2017', 'D'), pd.Period('2017', 'A')], @@ -60,11 +71,16 @@ def test_to_period_ok(data, freq, expected): "A", "Input has different freq"), ]) -def test_to_period_raises(data, freq, msg): +def test_period_array_raises(data, freq, msg): with tm.assert_raises_regex(IncompatibleFrequency, msg): period_array(data, freq) +def test_period_array_no_data(): + with tm.assert_raises_regex(ValueError, "one of"): + period_array(None) + + def test_asi8(): result = period_array(['2000', '2001', None], freq='D').asi8 expected = np.array([10957, 11323, iNaT]) @@ -73,6 +89,6 @@ def test_asi8(): def test_take_raises(): arr = period_array(['2000', '2001'], freq='D') - with tm.assert_raises_regex(ValueError, 'freq'): + with tm.assert_raises_regex(IncompatibleFrequency, 'freq'): arr.take([0, -1], allow_fill=True, fill_value=pd.Period('2000', freq='W')) From 7f77563c8200e2e070aeb5ae96df9ef44e67aa81 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 21:14:31 -0500 Subject: [PATCH 101/132] Fixups - Perf in period_array - pyarrow error - py2 compat --- pandas/core/arrays/period.py | 5 +++-- pandas/tests/extension/test_period.py | 10 ++++++---- pandas/tests/io/test_parquet.py | 4 +++- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 11d6b1ed51910..627f0e3cddcad 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -927,13 +927,14 @@ def period_array(data, freq=None, ordinal=None, copy=False): if not isinstance(data, (np.ndarray, list, tuple)): data = list(data) + data = np.asarray(data) + if freq: dtype = PeriodDtype(freq) else: dtype = None - if lib.infer_dtype(data) == 'floating' and len(data) > 0: - # Can we avoid infer_dtype? Why pay that tax every time? + if is_float_dtype(data) and len(data) > 0: raise TypeError("PeriodIndex does not allow " "floating point in construction") diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index 516967e47e911..e731227ab5f8e 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -89,8 +89,9 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators): exc=None) else: # ... but not the rest. - super().test_arith_series_with_scalar(data, - all_arithmetic_operators) + super(TestArithmeticOps, self).test_arith_series_with_scalar( + data, all_arithmetic_operators + ) def test_arith_series_with_array(self, data, all_arithmetic_operators): if all_arithmetic_operators in self.implements: @@ -99,8 +100,9 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators): exc=None) else: # ... but not the rest. - super().test_arith_series_with_scalar(data, - all_arithmetic_operators) + super(TestArithmeticOps, self).test_arith_series_with_scalar( + data, all_arithmetic_operators + ) def _check_divmod_op(self, s, op, other, exc=NotImplementedError): super(TestArithmeticOps, self)._check_divmod_op( diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 3e17e3c8c9774..bc94ed5f944d3 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -440,7 +440,9 @@ def test_duplicate_columns(self, pa): def test_unsupported(self, pa): # period df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)}) - self.check_error_on_write(df, pa, TypeError) + # pyarrow 0.11 raises ArrowTypeError + # older pyarrows raise ArrowInvalid + self.check_error_on_write(df, pa, (ValueError, TypeError)) # timedelta df = pd.DataFrame({'a': pd.timedelta_range('1 day', From f88d6f70cda0c82302fcdc131378ef4d85ec04ee Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 21:21:17 -0500 Subject: [PATCH 102/132] escape --- pandas/tests/extension/test_period.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index e731227ab5f8e..b3f02d4f9a773 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -113,8 +113,8 @@ def _check_divmod_op(self, s, op, other, exc=NotImplementedError): def test_add_series_with_extension_array(self, data): # we don't implement + for Period s = pd.Series(data) - msg = ("unsupported operand type\(s\) for \+: " - "\'PeriodArray\' and \'PeriodArray\'") + msg = (r"unsupported operand type\(s\) for \+: " + r"\'PeriodArray\' and \'PeriodArray\'") with tm.assert_raises_regex(TypeError, msg): s + data From 7aa78bab37970e5958c017cc6249a207b4bf834c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 21:22:25 -0500 Subject: [PATCH 103/132] dtype --- pandas/tests/test_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 60df3534a696c..fe2956adc35af 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -1236,7 +1236,7 @@ def test_values_consistent(array, expected_type, dtype): np.array(['2017-01-01T05:00:00'], dtype='M8[ns]')), (pd.TimedeltaIndex([10**10]), np.array([10**10], dtype='m8[ns]')), (pd.PeriodIndex(['2017', '2018'], freq='D'), - np.array([17167, 17532])), + np.array([17167, 17532], dtype=np.int64)), ]) def test_ndarray_values(array, expected): l_values = pd.Series(array)._ndarray_values From 2d737f8ed8c29fd2bd6839d5f87ce24459323c97 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 18 Oct 2018 21:31:19 -0500 Subject: [PATCH 104/132] revert and unxfail values --- pandas/tests/indexes/period/test_period.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index b02542af10eb6..405edba83dc7a 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -8,7 +8,6 @@ from pandas import (PeriodIndex, period_range, DatetimeIndex, NaT, Index, Period, Series, DataFrame, date_range, offsets) -from pandas.core.arrays import PeriodArray from ..datetimelike import DatetimeLike @@ -138,21 +137,20 @@ def test_view_asi8(self): tm.assert_numpy_array_equal(idx.view('i8'), exp) tm.assert_numpy_array_equal(idx.asi8, exp) - @pytest.mark.xfail(reason="XXX: Determine the desired behavior here.") def test_values(self): idx = pd.PeriodIndex([], freq='M') - exp = PeriodArray([], freq='M') - tm.assert_period_array_equal(idx.values, exp) - tm.assert_numpy_array_equal(idx.get_values(), exp.values) + exp = np.array([], dtype=np.object) + tm.assert_numpy_array_equal(idx.values, exp) + tm.assert_numpy_array_equal(idx.get_values(), exp) exp = np.array([], dtype=np.int64) tm.assert_numpy_array_equal(idx._ndarray_values, exp) idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M') - exp = PeriodArray([pd.Period('2011-01', freq='M'), pd.NaT]) - tm.assert_period_array_equal(idx.values, exp) - tm.assert_numpy_array_equal(idx.get_values(), exp.values) + exp = np.array([pd.Period('2011-01', freq='M'), pd.NaT], dtype=object) + tm.assert_numpy_array_equal(idx.values, exp) + tm.assert_numpy_array_equal(idx.get_values(), exp) exp = np.array([492, -9223372036854775808], dtype=np.int64) tm.assert_numpy_array_equal(idx._ndarray_values, exp) From 833899a732ccfccd883c4ec27c4ab6a44bfe9319 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 19 Oct 2018 06:24:18 -0500 Subject: [PATCH 105/132] error catching --- pandas/tests/io/test_feather.py | 3 ++- pandas/tests/io/test_parquet.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index 9d04111d64125..85fcaf430064f 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -99,7 +99,8 @@ def test_unsupported_other(self): # period df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)}) - self.check_error_on_write(df, ValueError) + # Some versions raise ValueError, others raise ArrowInvalid. + self.check_error_on_write(df, Exception) @pytest.mark.skipif(fv < LooseVersion('0.4.0'), reason='new in 0.4.0') def test_rw_nthreads(self): diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index bc94ed5f944d3..7dec50effb058 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -442,7 +442,8 @@ def test_unsupported(self, pa): df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)}) # pyarrow 0.11 raises ArrowTypeError # older pyarrows raise ArrowInvalid - self.check_error_on_write(df, pa, (ValueError, TypeError)) + # But on Py2 catching both those doesn't work? + self.check_error_on_write(df, pa, Exception) # timedelta df = pd.DataFrame({'a': pd.timedelta_range('1 day', From 236b49c4b289aa9cb4567ef9fafc8d4ba847b3d3 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 19 Oct 2018 06:24:44 -0500 Subject: [PATCH 106/132] isort --- pandas/tests/arrays/test_period.py | 2 +- pandas/tests/extension/test_period.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 8e736d19a3042..010023336032a 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -1,5 +1,5 @@ -import pytest import numpy as np +import pytest import pandas as pd import pandas.util.testing as tm diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index b3f02d4f9a773..45479ad16d5ff 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -1,12 +1,12 @@ -import pytest import numpy as np +import pytest import pandas as pd import pandas.util.testing as tm from pandas._libs.tslib import iNaT -from pandas.tests.extension import base -from pandas.core.dtypes.dtypes import PeriodDtype from pandas.core.arrays import PeriodArray +from pandas.core.dtypes.dtypes import PeriodDtype +from pandas.tests.extension import base @pytest.fixture From 82303475c2382852e1ce7afd6f8c255221c468e5 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 19 Oct 2018 06:57:26 -0500 Subject: [PATCH 107/132] Avoid PeriodArray.values --- pandas/tests/arrays/test_period.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 010023336032a..e119750f60b28 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -51,7 +51,7 @@ def test_setitem_raises(): (pd.date_range("2017", periods=3), None, [17167, 17168, 17169]), ]) def test_period_array_ok(data, freq, expected): - result = period_array(data, freq=freq).values + result = period_array(data, freq=freq).asi8 expected = np.asarray(expected, dtype=np.int64) tm.assert_numpy_array_equal(result, expected) From bf33a57269c74864cf51fe7800649e1ff5540373 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 19 Oct 2018 06:57:38 -0500 Subject: [PATCH 108/132] clarify _box_func usage --- pandas/core/arrays/period.py | 9 +-------- pandas/core/indexes/period.py | 17 ++++++++++++----- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 627f0e3cddcad..ef2ae60cdf3ad 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -471,14 +471,7 @@ def _time_shift(self, n, freq=None): @property def _box_func(self): - # Used in DatelikeArray.__iter__ - # TODO: implement this in cython? - def func(x): - if isinstance(x, Period) or x is NaT: - return x - else: - return Period._from_ordinal(ordinal=x, freq=self.freq) - return func + return lambda x: Period._from_ordinal(ordinal=x, freq=self.freq) def asfreq(self, freq=None, how='E'): """ diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 816f435b36701..e39d253ca5ada 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -346,13 +346,20 @@ def _shallow_copy_with_infer(self, values=None, **kwargs): """ we always want to return a PeriodIndex """ return self._shallow_copy(values=values, **kwargs) - # ------------------------------------------------------------------------ - # Boxing - # err. maybe not... - @property def _box_func(self): - return self._data._box_func + """Maybe box an ordinal or Period""" + # TODO(DatetimeArray): Avoid double-boxing + # PeriodArray takes care of boxing already, so we need to check + # whether we're given an ordinal or a Period. It seems like some + # places outside of indexes/period.py are calling this _box_func, + # but passing data that's already boxed. + def func(x): + if isinstance(x, Period) or x is tslib.NaT: + return x + else: + return Period._from_ordinal(ordinal=x, freq=self.freq) + return func def _maybe_box_as_values(self, values, **attribs): """Box an array of ordinals to a PeriodArray From 032ec02a2a4c4cc033e1d3e9e1128ca53d5b617c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 19 Oct 2018 07:34:11 -0500 Subject: [PATCH 109/132] TST: unxfail ops tests --- pandas/tests/arithmetic/test_period.py | 3 --- pandas/tests/extension/test_period.py | 10 +++++++++- pandas/tests/scalar/period/test_period.py | 1 - pandas/tests/series/test_operators.py | 10 ++++------ 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index dce192a948707..5e89e365ff3d8 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -307,7 +307,6 @@ def test_cmp_series_period_series_mixed_freq(self): class TestPeriodFrameArithmetic(object): - @pytest.mark.xfail(reason="GH-22922") def test_ops_frame_period(self): # GH#13043 df = pd.DataFrame({'A': [pd.Period('2015-01', freq='M'), @@ -389,7 +388,6 @@ def test_pi_sub_pi_with_nat(self): expected = pd.Index([pd.NaT, 0 * off, 0 * off, 0 * off, 0 * off]) tm.assert_index_equal(result, expected) - @pytest.mark.xfail(reason="GH-23155", strict=False) def test_parr_sub_pi_mismatched_freq(self, box_df_broadcast_failure): box = box_df_broadcast_failure @@ -816,7 +814,6 @@ def test_ops_series_timedelta(self): result = pd.tseries.offsets.Day() + ser tm.assert_series_equal(result, expected) - @pytest.mark.xfail(reason="GH-23155", strict=True) def test_ops_series_period(self): # GH 13043 ser = pd.Series([pd.Period('2015-01-01', freq='D'), diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index 45479ad16d5ff..13d4b80badb8a 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -109,7 +109,6 @@ def _check_divmod_op(self, s, op, other, exc=NotImplementedError): s, op, other, exc=TypeError ) - @pytest.mark.xfail(reason="GH-23155", strict=True) def test_add_series_with_extension_array(self, data): # we don't implement + for Period s = pd.Series(data) @@ -133,6 +132,15 @@ def _compare_other(self, s, data, op_name, other): # with (some) integers, depending on the value. pass + @pytest.mark.xfail(reason="DatetimeArray", strict=True) + def test_direct_arith_with_series_returns_not_implemented(self, data): + # Investigate returning NotImplemented here once all Datetimelike + # are EAs + return ( + super(TestComparisonOps, self) + .test_direct_arith_with_series_returns_not_implemented(data) + ) + class TestMissing(BasePeriodTests, base.BaseMissingTests): pass diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 0d6289c6888ad..c4c9a5f8452de 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -1045,7 +1045,6 @@ def test_add_raises(self): @pytest.mark.parametrize('lbox', boxes, ids=ids) @pytest.mark.parametrize('rbox', boxes, ids=ids) - @pytest.mark.xfail(reason="Gh-23155", strict=False) def test_add_timestamp_raises(self, rbox, lbox): # GH # 17983 ts = pd.Timestamp('2017') diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py index 78e71e35aeeff..32a687be77b95 100644 --- a/pandas/tests/series/test_operators.py +++ b/pandas/tests/series/test_operators.py @@ -553,12 +553,10 @@ def test_unequal_categorical_comparison_raises_type_error(self): ([pd.Timedelta('1 days'), NaT, pd.Timedelta('3 days')], [NaT, NaT, pd.Timedelta('3 days')]), - pytest.param( - ([pd.Period('2011-01', freq='M'), NaT, - pd.Period('2011-03', freq='M')], - [NaT, NaT, pd.Period('2011-03', freq='M')]), - marks=pytest.mark.xfail(reason="GH-23155", strict=False) - ), + ([pd.Period('2011-01', freq='M'), NaT, + pd.Period('2011-03', freq='M')], + [NaT, NaT, pd.Period('2011-03', freq='M')]), + ]) @pytest.mark.parametrize('reverse', [True, False]) @pytest.mark.parametrize('box', [Series, Index]) From 77e389a127419d530a478841a4c044d76d89ee05 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 19 Oct 2018 09:02:55 -0500 Subject: [PATCH 110/132] Avoid use of .values --- pandas/core/dtypes/missing.py | 12 ++++++++++-- pandas/util/testing.py | 2 +- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index e48d09ae9a96a..1800c32add9b1 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -187,10 +187,18 @@ def _use_inf_as_na(key): def _isna_ndarraylike(obj): - values = getattr(obj, 'values', obj) + is_extension = is_extension_array_dtype(obj) + + if not is_extension: + # Avoid accessing `.values` on things like + # PeriodIndex, which may be expensive. + values = getattr(obj, 'values', obj) + else: + values = obj + dtype = values.dtype - if is_extension_array_dtype(obj): + if is_extension: if isinstance(obj, (ABCIndexClass, ABCSeries)): values = obj._values else: diff --git a/pandas/util/testing.py b/pandas/util/testing.py index ddebe180a0aca..a6e5ad9a14d9b 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1055,7 +1055,7 @@ def assert_interval_array_equal(left, right, exact='equiv', def assert_period_array_equal(left, right, obj='PeriodArray'): _check_isinstance(left, right, PeriodArray) - assert_numpy_array_equal(left.values, right.values, + assert_numpy_array_equal(left._data, right._data, obj='{obj}.values'.format(obj=obj)) assert_attr_equal('freq', left, right, obj=obj) From 61031d73d2dcabf5a4088cac04f637b76b337824 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 19 Oct 2018 09:17:25 -0500 Subject: [PATCH 111/132] __setitem__ type --- pandas/core/arrays/period.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index ef2ae60cdf3ad..5c8604d2c1850 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -311,10 +311,12 @@ def __repr__(self): self.dtype ) - def __len__(self): - return len(self._data) - - def __setitem__(self, key, value): + def __setitem__( + self, + key, # type: Union[int, Sequence[int], Sequence[bool]] + value # type: Union[NaTType, Period, Sequence[Period]] + ): + # type: (...) -> None if is_list_like(value): if len(key) != len(value) and not com.is_bool_indexer(key): msg = ("shape mismatch: value array of length '{}' does not " From a094b3df8307855c8ecefae2f12822a41c5919e0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 19 Oct 2018 10:15:02 -0500 Subject: [PATCH 112/132] Misc cleanups * docstring on PeriodArray * examples for period_array * remove _box_values_as_index * names * object_dtype * use __sub__ --- pandas/core/arrays/period.py | 36 +++++++++++++++++++++++++-- pandas/core/indexes/period.py | 12 ++------- pandas/core/series.py | 2 +- pandas/tests/extension/test_period.py | 6 +++++ 4 files changed, 43 insertions(+), 13 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 5c8604d2c1850..e1ecf5c64d1b1 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -114,12 +114,27 @@ class PeriodArray(dtl.DatetimeLikeArrayMixin, ExtensionArray): Users should use :func:`period_array` to create new instances. + Parameters + ---------- + values : Union[PeriodArray, Series[period], ndarary[int], PeriodIndex + The data to store. These should be arrays that can be directly + converted to ordinals without inference or copy (PeriodArray, + ndarray[int64]), or a box around such an array (Series[period], + PeriodIndex). + freq : str or DateOffset + The `freq` to use for the array. Mostly applicable when `values` + is an ndarray of integers, when `freq` is required. When `values` + is a PeriodArray (or box around), it's checked that ``values.freq`` + matches `freq`. + copy : bool, default False + Whether to copy the ordinals before storing. + Notes ----- There are two components to a PeriodArray - ordinals : integer ndarray - - freq : pd.tseries.offsets.Tick + - freq : pd.tseries.offsets.Offset The values are physically stored as a 1-D ndarray of integers. These are called "ordinals" and represent some kind of offset from a base. @@ -149,7 +164,6 @@ class PeriodArray(dtl.DatetimeLikeArrayMixin, ExtensionArray): # -------------------------------------------------------------------- # Constructors def __init__(self, values, freq=None, copy=False): - # type: (Union[PeriodArray, np.ndarray], Union[str, Tick]) -> None if isinstance(values, ABCSeries): values = values._values if not isinstance(values, type(self)): @@ -317,6 +331,11 @@ def __setitem__( value # type: Union[NaTType, Period, Sequence[Period]] ): # type: (...) -> None + # n.b. the type on `value` is a bit too restrictive. + # we also accept a sequence of stuff coercible to a PeriodArray + # by period_array, which includes things like ndarray[object], + # ndarray[datetime64ns]. I think ndarray[int] / ndarray[str] won't + # work, since the freq can't be inferred. if is_list_like(value): if len(key) != len(value) and not com.is_bool_indexer(key): msg = ("shape mismatch: value array of length '{}' does not " @@ -903,6 +922,19 @@ def period_array(data, freq=None, ordinal=None, copy=False): ['2017', '2018', 'NaT'] Length: 3, dtype: period[A-DEC] + + Integers that look like years are handled + + >>> period_array([2000, 2001, 2002], freq='D') + ['2000-01-01', '2001-01-01', '2002-01-01'] + Length: 3, dtype: period[D] + + Datetime-like strings may also be passed + + >>> period_array(['2000-Q1', '2000-Q2', '2000-Q3', '2000-Q4'], freq='Q') + + ['2000Q1', '2000Q2', '2000Q3', '2000Q4'] + Length: 4, dtype: period[Q-DEC] """ if data is None and ordinal is None: diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index e39d253ca5ada..f3e58a23405d9 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -64,8 +64,8 @@ def _new_PeriodIndex(cls, **d): values = d.pop('data') if values.dtype == 'int64': freq = d.pop('freq', None) - data = PeriodArray(values, freq=freq) - return cls._simple_new(data, **d) + values = PeriodArray(values, freq=freq) + return cls._simple_new(values, **d) else: return cls(values, **d) @@ -537,14 +537,6 @@ def asof_locs(self, where, mask): return result - def _box_values_as_index(self): - """ - return object Index which contains boxed values - """ - # TODO(DatetimeArray): remove - # Have to add our name. - return Index(self.values, name=self.name) - @Appender(_index_shared_docs['astype']) def astype(self, dtype, copy=True, how='start'): dtype = pandas_dtype(dtype) diff --git a/pandas/core/series.py b/pandas/core/series.py index ae50e4d7b9e4e..d1cb10337f52f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4358,7 +4358,7 @@ def _try_cast(arr, take_fast_path): data = np.array(data, dtype=dtype, copy=False) subarr = np.array(data, dtype=object, copy=copy) - if subarr.dtype == 'object' and dtype != 'object': + if is_object_dtype(subarr.dtype) and dtype != 'object': inferred = lib.infer_dtype(subarr) if inferred == 'period': try: diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index 13d4b80badb8a..4bec67c0988ce 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -120,6 +120,12 @@ def test_add_series_with_extension_array(self, data): def test_error(self): pass + def test_direct_arith_with_series_returns_not_implemented(self, data): + # Override to use __sub__ instead of __add__ + other = pd.Series(data) + result = data.__sub__(other) + assert result is NotImplemented + class TestCasting(BasePeriodTests, base.BaseCastingTests): pass From ace4856916f52ef2a05e26dfa0ca498b47fef1fa Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 19 Oct 2018 10:37:58 -0500 Subject: [PATCH 113/132] lint --- pandas/core/arrays/period.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index e1ecf5c64d1b1..26ef33250eb66 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -329,7 +329,7 @@ def __setitem__( self, key, # type: Union[int, Sequence[int], Sequence[bool]] value # type: Union[NaTType, Period, Sequence[Period]] - ): + ): # type: (...) -> None # n.b. the type on `value` is a bit too restrictive. # we also accept a sequence of stuff coercible to a PeriodArray From fc6a1c7e95ebe7c4bc005c0b34d1d0439d1149c3 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 19 Oct 2018 10:38:11 -0500 Subject: [PATCH 114/132] API: remove ordinal from period_array --- pandas/core/arrays/period.py | 34 ++++++++++++++---------------- pandas/core/indexes/period.py | 9 ++++++-- pandas/tests/arrays/test_period.py | 5 ----- 3 files changed, 23 insertions(+), 25 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 26ef33250eb66..940c0f2492470 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -882,7 +882,7 @@ def _addsub_int_array(self, other, op): # ------------------------------------------------------------------- # Constructor Helpers -def period_array(data, freq=None, ordinal=None, copy=False): +def period_array(data, freq=None, copy=False): # type: (Sequence[Optional[Period]], Optional[Tick]) -> PeriodArray """ Construct a new PeriodArray from a sequence of Period scalars. @@ -936,25 +936,16 @@ def period_array(data, freq=None, ordinal=None, copy=False): ['2000Q1', '2000Q2', '2000Q3', '2000Q4'] Length: 4, dtype: period[Q-DEC] """ + if is_datetime64_dtype(data): + return PeriodArray._from_datetime64(data, freq) + if isinstance(data, (ABCPeriodIndex, ABCSeries, PeriodArray)): + return PeriodArray(data, freq) - if data is None and ordinal is None: - raise ValueError("Must pass one of 'data' or 'ordinal'.") - elif data is None: - data = np.asarray(ordinal, dtype=np.int64) - if copy: - data = data.copy() - return PeriodArray(data, freq=freq) - else: - if is_datetime64_dtype(data): - return PeriodArray._from_datetime64(data, freq) - if isinstance(data, (ABCPeriodIndex, ABCSeries, PeriodArray)): - return PeriodArray(data, freq) - - # other iterable of some kind - if not isinstance(data, (np.ndarray, list, tuple)): - data = list(data) + # other iterable of some kind + if not isinstance(data, (np.ndarray, list, tuple)): + data = list(data) - data = np.asarray(data) + data = np.asarray(data) if freq: dtype = PeriodDtype(freq) @@ -970,6 +961,13 @@ def period_array(data, freq=None, ordinal=None, copy=False): return PeriodArray._from_sequence(data, dtype=dtype) +def _ordinal_to_periodarr(ordinal, freq, copy=False): + data = np.asarray(ordinal, dtype=np.int64) + if copy: + data = data.copy() + return PeriodArray(data, freq=freq) + + def dt64arr_to_periodarr(data, freq, tz=None): """ Convert an datetime-like array to values Period ordinals. diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index f3e58a23405d9..4c7d9f9c96cd0 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -238,8 +238,13 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, # e.g. D -> 2D seems to be OK data = data.asfreq(freq) - data = period_array(data=data, ordinal=ordinal, freq=freq, - copy=copy) + if data is None and ordinal is not None: + # we strangely ignore `ordinal` if data is passed. + ordinal = np.asarray(ordinal, dtype=np.int64) + data = PeriodArray(ordinal, freq) + else: + # don't pass copy here, since we copy later. + data = period_array(data=data, freq=freq) if copy: data = data.copy() diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index e119750f60b28..19e12f45b673c 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -76,11 +76,6 @@ def test_period_array_raises(data, freq, msg): period_array(data, freq) -def test_period_array_no_data(): - with tm.assert_raises_regex(ValueError, "one of"): - period_array(None) - - def test_asi8(): result = period_array(['2000', '2001', None], freq='D').asi8 expected = np.array([10957, 11323, iNaT]) From 900afcf0e874392a86211f69a3ee2d219fe63663 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 19 Oct 2018 11:09:45 -0500 Subject: [PATCH 115/132] catch exception --- pandas/tests/io/test_parquet.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 7dec50effb058..47728ad6dd4bc 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -442,7 +442,6 @@ def test_unsupported(self, pa): df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)}) # pyarrow 0.11 raises ArrowTypeError # older pyarrows raise ArrowInvalid - # But on Py2 catching both those doesn't work? self.check_error_on_write(df, pa, Exception) # timedelta @@ -452,7 +451,9 @@ def test_unsupported(self, pa): # mixed python objects df = pd.DataFrame({'a': ['a', 1, 2.0]}) - self.check_error_on_write(df, pa, TypeError) + # pyarrow 0.11 raises ArrowTypeError + # older pyarrows raise ArrowInvalid + self.check_error_on_write(df, pa, Exception) def test_categorical(self, pa_ge_070): pa = pa_ge_070 From 0baa3e9971d7d3ef01513c9ce3d4a1d9c047444e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 19 Oct 2018 11:51:09 -0500 Subject: [PATCH 116/132] misc cleanup --- pandas/core/arrays/categorical.py | 14 ++++++-------- pandas/core/arrays/datetimelike.py | 4 +--- pandas/core/indexes/base.py | 3 +-- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index b4e191c09ef5b..64f64d929f0a0 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -343,7 +343,6 @@ def __init__(self, values, categories=None, ordered=None, dtype=None, # a.) use categories, ordered # b.) use values.dtype # c.) infer from values - if dtype is not None: # The dtype argument takes precedence over values.dtype (if any) if isinstance(dtype, compat.string_types): @@ -2426,7 +2425,12 @@ def _get_codes_for_values(values, categories): from pandas.core.algorithms import _get_data_algo, _hashtables dtype_equal = is_dtype_equal(values.dtype, categories.dtype) - if is_extension_array_dtype(categories.dtype) and is_object_dtype(values): + if dtype_equal: + # To prevent erroneous dtype coercion in _get_data_algo, retrieve + # the underlying numpy array. gh-22702 + values = getattr(values, '_ndarray_values', values) + categories = getattr(categories, '_ndarray_values', categories) + elif is_extension_array_dtype(categories.dtype) and is_object_dtype(values): # Support inferring the correct extension dtype from an array of # scalar objects. e.g. # Categorical(array[Period, Period], categories=PeriodIndex(...)) @@ -2438,12 +2442,6 @@ def _get_codes_for_values(values, categories): # but that may fail for any reason, so fall back to object values = ensure_object(values) categories = ensure_object(categories) - - elif dtype_equal: - # To prevent erroneous dtype coercion in _get_data_algo, retrieve - # the underlying numpy array. gh-22702 - values = getattr(values, '_ndarray_values', values) - categories = getattr(categories, '_ndarray_values', categories) else: values = ensure_object(values) categories = ensure_object(categories) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index aa4512a44857e..905ab41287023 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -515,10 +515,8 @@ def _addsub_offset_array(self, other, op): left = lib.values_from_object(self.astype('O')) res_values = op(left, np.array(other)) - kwargs = {} if not is_period_dtype(self): - kwargs['freq'] = 'infer' - return type(self)(res_values, **kwargs) + return type(self)(res_values, freq='infer') return self._from_sequence(res_values) @deprecate_kwarg(old_arg_name='n', new_arg_name='periods') diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 8aacd22e86bb3..e9b0b087179c9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -394,8 +394,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, # maybe coerce to a sub-class from pandas.core.indexes.period import ( PeriodIndex, IncompatibleFrequency) - if isinstance(data, PeriodIndex): - return PeriodIndex(data, copy=copy, name=name, **kwargs) + if is_signed_integer_dtype(data.dtype): from .numeric import Int64Index return Int64Index(data, copy=copy, dtype=dtype, name=name) From f95106e3097d010a8ded174f9a3d4cbba19ce916 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 19 Oct 2018 12:04:24 -0500 Subject: [PATCH 117/132] Handle astype integer size --- pandas/core/arrays/categorical.py | 3 ++- pandas/core/arrays/period.py | 8 +++++++- pandas/tests/arrays/test_period.py | 12 ++++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 64f64d929f0a0..bdd141d4e1a84 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2430,7 +2430,8 @@ def _get_codes_for_values(values, categories): # the underlying numpy array. gh-22702 values = getattr(values, '_ndarray_values', values) categories = getattr(categories, '_ndarray_values', categories) - elif is_extension_array_dtype(categories.dtype) and is_object_dtype(values): + elif (is_extension_array_dtype(categories.dtype) and + is_object_dtype(values)): # Support inferring the correct extension dtype from an array of # scalar objects. e.g. # Categorical(array[Period, Period], categories=PeriodIndex(...)) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 940c0f2492470..95c8a9a401d23 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -828,8 +828,14 @@ def astype(self, dtype, copy=True): return self._format_native_types() elif is_integer_dtype(dtype): values = self._ndarray_values - if copy: + + if values.dtype != dtype: + # int32 vs. int64 + values = values.astype(dtype) + + elif copy: values = values.copy() + return values elif (is_datetime_or_timedelta_dtype(dtype) and not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype): diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 19e12f45b673c..4e64db422a630 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -6,6 +6,7 @@ from pandas._libs.tslibs import iNaT from pandas._libs.tslibs.period import IncompatibleFrequency from pandas.core.arrays import PeriodArray, period_array +from pandas.core.dtypes.common import pandas_dtype @pytest.mark.parametrize('key, value, expected', [ @@ -87,3 +88,14 @@ def test_take_raises(): with tm.assert_raises_regex(IncompatibleFrequency, 'freq'): arr.take([0, -1], allow_fill=True, fill_value=pd.Period('2000', freq='W')) + + +@pytest.mark.parametrize('dtype', [int, np.int32, np.int64]) +def test_astype(dtype): + # Need to ensure ordinals are astyped correctly for both + # int32 and 64 + arr = period_array(['2000', '2001', None], freq='D') + result = arr.astype(dtype) + # need pandas_dtype to handle int32 vs. int64 correctly + expected = pandas_dtype(dtype) + assert result.dtype == expected From e57e24a326252751fb10a120013f30f40c3714f0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 19 Oct 2018 12:33:04 -0500 Subject: [PATCH 118/132] Bump test coverage --- pandas/core/arrays/period.py | 8 +- pandas/tests/arrays/test_period.py | 164 ++++++++++++++++++++++++----- 2 files changed, 142 insertions(+), 30 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 95c8a9a401d23..29e5c98f8a751 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -164,6 +164,9 @@ class PeriodArray(dtl.DatetimeLikeArrayMixin, ExtensionArray): # -------------------------------------------------------------------- # Constructors def __init__(self, values, freq=None, copy=False): + if freq is not None: + freq = Period._maybe_convert_freq(freq) + if isinstance(values, ABCSeries): values = values._values if not isinstance(values, type(self)): @@ -174,14 +177,15 @@ def __init__(self, values, freq=None, copy=False): if isinstance(values, type(self)): if freq is not None and freq != values.freq: - raise TypeError("freq does not match") + msg = DIFFERENT_FREQ_INDEX.format(values.freq.freqstr, + freq.freqstr) + raise IncompatibleFrequency(msg) values, freq = values._data, values.freq values = np.array(values, dtype='int64', copy=copy) self._data = values if freq is None: raise ValueError('freq is not specified and cannot be inferred') - freq = Period._maybe_convert_freq(freq) self._dtype = PeriodDtype(freq) @classmethod diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 4e64db422a630..2f8a6d4fd8566 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -7,36 +7,11 @@ from pandas._libs.tslibs.period import IncompatibleFrequency from pandas.core.arrays import PeriodArray, period_array from pandas.core.dtypes.common import pandas_dtype +from pandas.core.dtypes.dtypes import PeriodDtype -@pytest.mark.parametrize('key, value, expected', [ - ([0], pd.Period("2000", "D"), [10957, 1, 2]), - ([0], None, [iNaT, 1, 2]), - ([0], np.nan, [iNaT, 1, 2]), - ([0, 1, 2], pd.Period("2000", "D"), [10957] * 3), - ([0, 1, 2], [pd.Period("2000", "D"), - pd.Period("2001", "D"), - pd.Period("2002", "D")], - [10957, 11323, 11688]), -]) -def test_setitem(key, value, expected): - arr = PeriodArray(np.arange(3), freq="D") - expected = PeriodArray(expected, freq="D") - arr[key] = value - tm.assert_period_array_equal(arr, expected) - - -def test_setitem_raises(): - arr = PeriodArray(np.arange(3), freq="D") - with tm.assert_raises_regex(IncompatibleFrequency, "freq"): - arr[0] = pd.Period("2000", freq="A") - - with tm.assert_raises_regex(ValueError, "length"): - arr[[0, 1]] = [pd.Period("2000", freq="D")] - - with tm.assert_raises_regex(TypeError, "int"): - arr[0] = 1 - +# ---------------------------------------------------------------------------- +# Constructors # period_array @@ -77,6 +52,21 @@ def test_period_array_raises(data, freq, msg): period_array(data, freq) +def test_period_array_non_period_series_raies(): + ser = pd.Series([1, 2, 3]) + with tm.assert_raises_regex(TypeError, 'dtype'): + PeriodArray(ser, freq='D') + + +def test_period_array_freq_mismatch(): + arr = period_array(['2000', '2001'], freq='D') + with tm.assert_raises_regex(IncompatibleFrequency, 'freq'): + PeriodArray(arr, freq='M') + + with tm.assert_raises_regex(IncompatibleFrequency, 'freq'): + PeriodArray(arr, freq=pd.tseries.offsets.MonthEnd()) + + def test_asi8(): result = period_array(['2000', '2001', None], freq='D').asi8 expected = np.array([10957, 11323, iNaT]) @@ -89,6 +79,9 @@ def test_take_raises(): arr.take([0, -1], allow_fill=True, fill_value=pd.Period('2000', freq='W')) + with tm.assert_raises_regex(ValueError, 'foo'): + arr.take([0, -1], allow_fill=True, fill_value='foo') + @pytest.mark.parametrize('dtype', [int, np.int32, np.int64]) def test_astype(dtype): @@ -99,3 +92,118 @@ def test_astype(dtype): # need pandas_dtype to handle int32 vs. int64 correctly expected = pandas_dtype(dtype) assert result.dtype == expected + + +def test_astype_copies(): + arr = period_array(['2000', '2001', None], freq='D') + result = arr.astype(np.int64, copy=False) + assert result is arr._ndarray_values + + result = arr.astype(np.int64, copy=True) + assert result is not arr._ndarray_values + + +def test_astype_categorical(): + arr = period_array(['2000', '2001', '2001', None], freq='D') + result = arr.astype('category') + categories = pd.PeriodIndex(['2000', '2001'], freq='D') + expected = pd.Categorical.from_codes([0, 1, 1, -1], categories=categories) + tm.assert_categorical_equal(result, expected) + + +def test_astype_period(): + arr = period_array(['2000', '2001', None], freq='D') + result = arr.astype(PeriodDtype("M")) + expected = period_array(['2000', '2001', None], freq='M') + tm.assert_period_array_equal(result, expected) + + +def test_astype_bool(): + arr = period_array([]) + +@pytest.mark.parametrize('other', [ + 'datetime64[ns]', 'timedelta64[ns]', +]) +def test_astype_datetime(other): + arr = period_array(['2000', '2001', None], freq='D') + # slice off the [ns] so that the regex matches. + with tm.assert_raises_regex(TypeError, other[:-4]): + arr.astype(other) + + +def test_fillna_raises(): + arr = period_array(['2000', '2001', '2002'], freq='D') + with tm.assert_raises_regex(ValueError, 'Length'): + arr.fillna(arr[:2]) + + +def test_fillna_copies(): + arr = period_array(['2000', '2001', '2002'], freq='D') + result = arr.fillna(pd.Period("2000", "D")) + assert result is not arr + + +# ---------------------------------------------------------------------------- +# setitem + +@pytest.mark.parametrize('key, value, expected', [ + ([0], pd.Period("2000", "D"), [10957, 1, 2]), + ([0], None, [iNaT, 1, 2]), + ([0], np.nan, [iNaT, 1, 2]), + ([0, 1, 2], pd.Period("2000", "D"), [10957] * 3), + ([0, 1, 2], [pd.Period("2000", "D"), + pd.Period("2001", "D"), + pd.Period("2002", "D")], + [10957, 11323, 11688]), +]) +def test_setitem(key, value, expected): + arr = PeriodArray(np.arange(3), freq="D") + expected = PeriodArray(expected, freq="D") + arr[key] = value + tm.assert_period_array_equal(arr, expected) + + +def test_setitem_raises_incompatible_freq(): + arr = PeriodArray(np.arange(3), freq="D") + with tm.assert_raises_regex(IncompatibleFrequency, "freq"): + arr[0] = pd.Period("2000", freq="A") + + other = period_array(['2000', '2001'], freq='A') + with tm.assert_raises_regex(IncompatibleFrequency, "freq"): + arr[[0, 1]] = other + + +def test_setitem_raises_length(): + arr = PeriodArray(np.arange(3), freq="D") + with tm.assert_raises_regex(ValueError, "length"): + arr[[0, 1]] = [pd.Period("2000", freq="D")] + + +def test_setitem_raises_type(): + arr = PeriodArray(np.arange(3), freq="D") + with tm.assert_raises_regex(TypeError, "int"): + arr[0] = 1 + + +# ---------------------------------------------------------------------------- +# Ops + +def tet_sub_period(): + arr = period_array(['2000', '2001'], freq='D') + other = pd.Period("2000", freq="M") + with tm.assert_raises_regex(IncompatibleFrequency, "freq"): + arr - other + + +# ---------------------------------------------------------------------------- +# other + +def test_maybe_convert_timedelta(): + arr = period_array(['2000', '2001'], freq='D') + offset = pd.tseries.offsets.Day(2) + assert arr._maybe_convert_timedelta(offset) == 2 + assert arr._maybe_convert_timedelta(2) == 2 + + offset = pd.tseries.offsets.BusinessDay() + with tm.assert_raises_regex(ValueError, 'freq'): + arr._maybe_convert_timedelta(offset) From ce1c970d317ab067b5736d9a2c1e96f0c8370687 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 19 Oct 2018 13:16:03 -0500 Subject: [PATCH 119/132] remove partial test --- pandas/tests/arrays/test_period.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 2f8a6d4fd8566..38d612831e6eb 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -118,9 +118,6 @@ def test_astype_period(): tm.assert_period_array_equal(result, expected) -def test_astype_bool(): - arr = period_array([]) - @pytest.mark.parametrize('other', [ 'datetime64[ns]', 'timedelta64[ns]', ]) From a7e12169e04a61b88484f0a324e4a09f2a71e2f7 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 19 Oct 2018 13:21:32 -0500 Subject: [PATCH 120/132] close bracket --- pandas/core/arrays/period.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 29e5c98f8a751..bb479aff2f668 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -116,7 +116,7 @@ class PeriodArray(dtl.DatetimeLikeArrayMixin, ExtensionArray): Parameters ---------- - values : Union[PeriodArray, Series[period], ndarary[int], PeriodIndex + values : Union[PeriodArray, Series[period], ndarary[int], PeriodIndex] The data to store. These should be arrays that can be directly converted to ordinals without inference or copy (PeriodArray, ndarray[int64]), or a box around such an array (Series[period], From 2548d6a84f2964736bc69cbe7d42a8979a6b7a15 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 19 Oct 2018 13:54:36 -0500 Subject: [PATCH 121/132] change the test --- pandas/tests/indexes/period/test_astype.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/period/test_astype.py b/pandas/tests/indexes/period/test_astype.py index 504e89bd77774..a5042b8c714c8 100644 --- a/pandas/tests/indexes/period/test_astype.py +++ b/pandas/tests/indexes/period/test_astype.py @@ -27,7 +27,7 @@ def test_astype_conversion(self): [Period(NaT, freq='D')] * 3, dtype='object') tm.assert_index_equal(result, expected) - result = idx.astype(int) + result = idx.astype(np.int64) expected = Int64Index([16937] + [-9223372036854775808] * 3, dtype=np.int64) tm.assert_index_equal(result, expected) From 02e3863787855610f047727c5397a0940c0243b5 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 19 Oct 2018 13:57:01 -0500 Subject: [PATCH 122/132] isort --- pandas/tests/arrays/test_period.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 38d612831e6eb..696eb8137fab3 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -9,7 +9,6 @@ from pandas.core.dtypes.common import pandas_dtype from pandas.core.dtypes.dtypes import PeriodDtype - # ---------------------------------------------------------------------------- # Constructors From 1997cffee329dab7f3820ec93b5cbb0e2fe7a811 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 19 Oct 2018 14:01:55 -0500 Subject: [PATCH 123/132] consistent _data --- pandas/core/arrays/period.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index bb479aff2f668..76d254fcf2fec 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -284,7 +284,7 @@ def freq(self): @property def asi8(self): - return self._ndarray_values + return self._data # -------------------------------------------------------------------- # Vectorized analogues of Period properties @@ -400,7 +400,7 @@ def isna(self): def fillna(self, value=None, method=None, limit=None): # TODO(#20300) # To avoid converting to object, we re-implement here with the changes - # 1. Passing `_ndarray_values` to func instead of self.astype(object) + # 1. Passing `_data` to func instead of self.astype(object) # 2. Re-boxing output of 1. # #20300 should let us do this kind of logic on ExtensionArray.fillna # and we can use it. @@ -421,7 +421,7 @@ def fillna(self, value=None, method=None, limit=None): if mask.any(): if method is not None: func = pad_1d if method == 'pad' else backfill_1d - new_values = func(self._ndarray_values, limit=limit, + new_values = func(self._data, limit=limit, mask=mask) new_values = type(self)(new_values, freq=self.freq) else: @@ -489,7 +489,7 @@ def _time_shift(self, n, freq=None): freq : pandas.DateOffset, pandas.Timedelta, or string Frequency increment to shift by. """ - values = self._ndarray_values + n * self.freq.n + values = self._data + n * self.freq.n if self.hasnans: values[self._isnan] = iNaT return type(self)(values, freq=self.freq) @@ -595,8 +595,7 @@ def to_timestamp(self, freq=None, how='start'): base, mult = frequencies.get_freq_code(freq) new_data = self.asfreq(freq, how=how) - new_data = libperiod.periodarr_to_dt64arr(new_data._ndarray_values, - base) + new_data = libperiod.periodarr_to_dt64arr(new_data.asi8, base) return DatetimeArrayMixin(new_data, freq='infer') # ------------------------------------------------------------------ @@ -805,11 +804,9 @@ def repeat(self, repeats, *args, **kwargs): -------- numpy.ndarray.repeat """ - # TODO: Share with Categorical.repeat? - # need to use ndarray_values in Categorical - # and some kind of _constructor (from_ordinals, from_codes). + # TODO(DatetimeArray): remove nv.validate_repeat(args, kwargs) - values = self._ndarray_values.repeat(repeats) + values = self._data.repeat(repeats) return type(self)(values, self.freq) # Delegation... @@ -831,7 +828,7 @@ def astype(self, dtype, copy=True): elif is_string_dtype(dtype) and not is_categorical_dtype(dtype): return self._format_native_types() elif is_integer_dtype(dtype): - values = self._ndarray_values + values = self._data if values.dtype != dtype: # int32 vs. int64 @@ -857,7 +854,7 @@ def astype(self, dtype, copy=True): def _item(self): if len(self) == 1: # IndexOpsMixin will catch and re-raise IndexErrors - return Period._from_ordinal(self._ndarray_values[0], self.freq) + return Period._from_ordinal(self._data[0], self.freq) else: raise ValueError('can only convert an array of size 1 to a ' 'Python scalar') @@ -868,7 +865,7 @@ def flags(self): # We need this since reduction.SeriesBinGrouper uses values.flags # Ideally, we wouldn't be passing objects down there in the first # place. - return self._ndarray_values.flags + return self._data.flags # ------------------------------------------------------------------------ # Ops From af2d1de513f4d78878f17b2622d9183a53ad692a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 19 Oct 2018 15:03:43 -0500 Subject: [PATCH 124/132] lint --- pandas/tests/arrays/test_period.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index 696eb8137fab3..e1b493c17587a 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -14,6 +14,7 @@ # period_array + @pytest.mark.parametrize("data, freq, expected", [ ([pd.Period("2017", "D")], None, [17167]), ([pd.Period("2017", "D")], "D", [17167]), From 415151039f2ce96572c11acd430b63a749412ef1 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 19 Oct 2018 20:43:08 -0500 Subject: [PATCH 125/132] ndarray_values -> asi8 --- pandas/core/arrays/period.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 76d254fcf2fec..9f98a2afd9ea9 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -54,7 +54,7 @@ def _field_accessor(name, alias, docstring=None): def f(self): base, mult = frequencies.get_freq_code(self.freq) - result = get_period_field_arr(alias, self._ndarray_values, base) + result = get_period_field_arr(alias, self.asi8, base) return result f.__name__ = name @@ -70,7 +70,7 @@ def _period_array_cmp(cls, op): nat_result = True if opname == '__ne__' else False def wrapper(self, other): - op = getattr(self._ndarray_values, opname) + op = getattr(self.asi8, opname) if isinstance(other, (ABCSeries, ABCIndexClass)): # TODO: return NotImplemented? other = other._values @@ -86,7 +86,7 @@ def wrapper(self, other): msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) - result = op(other._ndarray_values) + result = op(other.asi8) mask = self._isnan | other._isnan if mask.any(): @@ -94,7 +94,7 @@ def wrapper(self, other): return result elif other is NaT: - result = np.empty(len(self._ndarray_values), dtype=bool) + result = np.empty(len(self.asi8), dtype=bool) result.fill(nat_result) else: other = Period(other, freq=self.freq) @@ -210,7 +210,7 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): return cls(ordinals, freq=freq) def _values_for_factorize(self): - return self._ndarray_values, iNaT + return self.asi8, iNaT @classmethod def _from_factorized(cls, values, original): @@ -277,15 +277,15 @@ def _ndarray_values(self): # Ordinals return self._data + @property + def asi8(self): + return self._data + @property def freq(self): """Return the frequency object for this PeriodArray.""" return self.dtype.freq - @property - def asi8(self): - return self._data - # -------------------------------------------------------------------- # Vectorized analogues of Period properties From ac9bd41bc9a2375bdcfec94877c0aee25c0fa046 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 19 Oct 2018 20:56:22 -0500 Subject: [PATCH 126/132] colocate ops --- pandas/core/arrays/period.py | 163 +++++++++++++++++------------------ 1 file changed, 80 insertions(+), 83 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 9f98a2afd9ea9..506d8b9f2d405 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -598,87 +598,6 @@ def to_timestamp(self, freq=None, how='start'): new_data = libperiod.periodarr_to_dt64arr(new_data.asi8, base) return DatetimeArrayMixin(new_data, freq='infer') - # ------------------------------------------------------------------ - # Arithmetic Methods - _create_comparison_method = classmethod(_period_array_cmp) - - def _sub_datelike(self, other): - assert other is not NaT - return NotImplemented - - def _sub_period(self, other): - # If the operation is well-defined, we return an object-Index - # of DateOffsets. Null entries are filled with pd.NaT - if self.freq != other.freq: - msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) - raise IncompatibleFrequency(msg) - - asi8 = self.asi8 - new_data = asi8 - other.ordinal - new_data = np.array([self.freq * x for x in new_data]) - - if self.hasnans: - new_data[self._isnan] = NaT - - return new_data - - def _add_offset(self, other): - assert not isinstance(other, Tick) - base = frequencies.get_base_alias(other.rule_code) - if base != self.freq.rule_code: - msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) - raise IncompatibleFrequency(msg) - return self._time_shift(other.n) - - def _add_delta_td(self, other): - assert isinstance(self.freq, Tick) # checked by calling function - assert isinstance(other, (timedelta, np.timedelta64, Tick)) - - delta = self._check_timedeltalike_freq_compat(other) - - # Note: when calling parent class's _add_delta_td, it will call - # delta_to_nanoseconds(delta). Because delta here is an integer, - # delta_to_nanoseconds will return it unchanged. - ordinals = super(PeriodArray, self)._add_delta_td(delta) - return type(self)(ordinals, self.freq) - - def _add_delta_tdi(self, other): - assert isinstance(self.freq, Tick) # checked by calling function - - delta = self._check_timedeltalike_freq_compat(other) - return self._addsub_int_array(delta, operator.add) - - def _add_delta(self, other): - """ - Add a timedelta-like, Tick, or TimedeltaIndex-like object - to self. - - Parameters - ---------- - other : {timedelta, np.timedelta64, Tick, - TimedeltaIndex, ndarray[timedelta64]} - - Returns - ------- - result : same type as self - """ - if not isinstance(self.freq, Tick): - # We cannot add timedelta-like to non-tick PeriodArray - raise IncompatibleFrequency("Input has different freq from " - "{cls}(freq={freqstr})" - .format(cls=type(self).__name__, - freqstr=self.freqstr)) - - # TODO: standardize across datetimelike subclasses whether to return - # i8 view or _shallow_copy - if isinstance(other, (Tick, timedelta, np.timedelta64)): - return self._add_delta_td(other) - elif is_timedelta64_dtype(other): - # ndarray[timedelta64] or TimedeltaArray/index - return self._add_delta_tdi(other) - else: # pragma: no cover - raise TypeError(type(other).__name__) - def _maybe_convert_timedelta(self, other): """ Convert timedelta-like input to an integer multiple of self.freq @@ -867,8 +786,29 @@ def flags(self): # place. return self._data.flags - # ------------------------------------------------------------------------ - # Ops + # ------------------------------------------------------------------ + # Arithmetic Methods + _create_comparison_method = classmethod(_period_array_cmp) + + def _sub_datelike(self, other): + assert other is not NaT + return NotImplemented + + def _sub_period(self, other): + # If the operation is well-defined, we return an object-Index + # of DateOffsets. Null entries are filled with pd.NaT + if self.freq != other.freq: + msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) + raise IncompatibleFrequency(msg) + + asi8 = self.asi8 + new_data = asi8 - other.ordinal + new_data = np.array([self.freq * x for x in new_data]) + + if self.hasnans: + new_data[self._isnan] = NaT + + return new_data def _addsub_int_array(self, other, op): assert op in [operator.add, operator.sub] @@ -881,6 +821,63 @@ def _addsub_int_array(self, other, op): res_values[self._isnan] = iNaT return type(self)(res_values, freq=self.freq) + def _add_offset(self, other): + assert not isinstance(other, Tick) + base = frequencies.get_base_alias(other.rule_code) + if base != self.freq.rule_code: + msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) + raise IncompatibleFrequency(msg) + return self._time_shift(other.n) + + def _add_delta_td(self, other): + assert isinstance(self.freq, Tick) # checked by calling function + assert isinstance(other, (timedelta, np.timedelta64, Tick)) + + delta = self._check_timedeltalike_freq_compat(other) + + # Note: when calling parent class's _add_delta_td, it will call + # delta_to_nanoseconds(delta). Because delta here is an integer, + # delta_to_nanoseconds will return it unchanged. + ordinals = super(PeriodArray, self)._add_delta_td(delta) + return type(self)(ordinals, self.freq) + + def _add_delta_tdi(self, other): + assert isinstance(self.freq, Tick) # checked by calling function + + delta = self._check_timedeltalike_freq_compat(other) + return self._addsub_int_array(delta, operator.add) + + def _add_delta(self, other): + """ + Add a timedelta-like, Tick, or TimedeltaIndex-like object + to self. + + Parameters + ---------- + other : {timedelta, np.timedelta64, Tick, + TimedeltaIndex, ndarray[timedelta64]} + + Returns + ------- + result : same type as self + """ + if not isinstance(self.freq, Tick): + # We cannot add timedelta-like to non-tick PeriodArray + raise IncompatibleFrequency("Input has different freq from " + "{cls}(freq={freqstr})" + .format(cls=type(self).__name__, + freqstr=self.freqstr)) + + # TODO: standardize across datetimelike subclasses whether to return + # i8 view or _shallow_copy + if isinstance(other, (Tick, timedelta, np.timedelta64)): + return self._add_delta_td(other) + elif is_timedelta64_dtype(other): + # ndarray[timedelta64] or TimedeltaArray/index + return self._add_delta_tdi(other) + else: # pragma: no cover + raise TypeError(type(other).__name__) + PeriodArray._add_comparison_ops() PeriodArray._add_datetimelike_methods() From 5462bd7c46b93e79b768cdedef38ea165d608a2f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 19 Oct 2018 20:56:35 -0500 Subject: [PATCH 127/132] refactor PeriodIndex.item remove unused method --- pandas/core/arrays/period.py | 15 --------------- pandas/core/indexes/period.py | 7 +++---- pandas/tests/indexes/period/test_construction.py | 1 - 3 files changed, 3 insertions(+), 20 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 506d8b9f2d405..74c9c627ba524 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -770,14 +770,6 @@ def astype(self, dtype, copy=True): else: return np.asarray(self, dtype=dtype) - def _item(self): - if len(self) == 1: - # IndexOpsMixin will catch and re-raise IndexErrors - return Period._from_ordinal(self._data[0], self.freq) - else: - raise ValueError('can only convert an array of size 1 to a ' - 'Python scalar') - @property def flags(self): # TODO: remove @@ -965,13 +957,6 @@ def period_array(data, freq=None, copy=False): return PeriodArray._from_sequence(data, dtype=dtype) -def _ordinal_to_periodarr(ordinal, freq, copy=False): - data = np.asarray(ordinal, dtype=np.int64) - if copy: - data = data.copy() - return PeriodArray(data, freq=freq) - - def dt64arr_to_periodarr(data, freq, tz=None): """ Convert an datetime-like array to values Period ordinals. diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 4c7d9f9c96cd0..ddb59c01542e7 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -950,10 +950,9 @@ def item(self): scalar """ # TODO(DatetimeArray): remove - # override to use _item - try: - return self._data._item() - except IndexError: + if len(self) == 1: + return self[0] + else: # copy numpy's message here because Py26 raises an IndexError raise ValueError('can only convert an array of size 1 to a ' 'Python scalar') diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index e4c73aa69a015..e1cefaf5905ad 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -464,7 +464,6 @@ def test_map_with_string_constructor(self): class TestSeriesPeriod(object): - # TODO: many more tests def setup_method(self, method): self.series = Series(period_range('2000-01-01', periods=10, freq='D')) From c1c642828fff70c7b31ca310621859e3fd89a051 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 19 Oct 2018 21:47:53 -0500 Subject: [PATCH 128/132] return NotImplemented for Series / Index --- pandas/core/arrays/period.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 74c9c627ba524..232bf93cae948 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -71,8 +71,12 @@ def _period_array_cmp(cls, op): def wrapper(self, other): op = getattr(self.asi8, opname) - if isinstance(other, (ABCSeries, ABCIndexClass)): - # TODO: return NotImplemented? + # We want to eventually defer to the Series or PeriodIndex (which will + # return here with an unboxed PeriodArray). But before we do that, + # we do a bit of validation on type (Period) and freq, so that our + # error messages are sensible + not_implemented = isinstance(other, (ABCSeries, ABCIndexClass)) + if not_implemented: other = other._values if isinstance(other, Period): @@ -86,6 +90,8 @@ def wrapper(self, other): msg = DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) + if not_implemented: + return NotImplemented result = op(other.asi8) mask = self._isnan | other._isnan From 7ab273655e6842f8af540d0db07f91e5a0b645f0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 20 Oct 2018 06:38:15 -0500 Subject: [PATCH 129/132] remove xpass --- pandas/tests/extension/test_period.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index 4bec67c0988ce..6f59cbb66a145 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -138,15 +138,6 @@ def _compare_other(self, s, data, op_name, other): # with (some) integers, depending on the value. pass - @pytest.mark.xfail(reason="DatetimeArray", strict=True) - def test_direct_arith_with_series_returns_not_implemented(self, data): - # Investigate returning NotImplemented here once all Datetimelike - # are EAs - return ( - super(TestComparisonOps, self) - .test_direct_arith_with_series_returns_not_implemented(data) - ) - class TestMissing(BasePeriodTests, base.BaseMissingTests): pass From bd6f966607554fdd252688731802a195ff47dd42 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 22 Oct 2018 14:57:33 -0500 Subject: [PATCH 130/132] release note --- doc/source/whatsnew/v0.24.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 51fff33261a7a..0ea2a27ae390e 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -732,6 +732,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your - Updated the ``.type`` attribute for ``PeriodDtype``, ``DatetimeTZDtype``, and ``IntervalDtype`` to be instances of the dtype (``Period``, ``Timestamp``, and ``Interval`` respectively) (:issue:`22938`) - :func:`ExtensionArray.isna` is allowed to return an ``ExtensionArray`` (:issue:`22325`). - Support for reduction operations such as ``sum``, ``mean`` via opt-in base class method override (:issue:`22762`) +- :meth:`Series.unstack` no longer converts extension arrays to object-dtype ndarrays. The output ``DataFrame`` will now have the same dtype as the input. This changes behavior for Categorical and Sparse data (:issue:`23077`). .. _whatsnew_0240.api.incompatibilities: From 5691506d898b5fa8a2a98d355588a9ec29cb719e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 23 Oct 2018 10:08:22 -0500 Subject: [PATCH 131/132] types, use data --- pandas/core/arrays/period.py | 7 ++++++- pandas/tests/arrays/test_period.py | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index f885486d4cbf5..085298d8324c5 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -810,7 +810,12 @@ def _sub_period(self, other): return new_data - def _addsub_int_array(self, other, op): + def _addsub_int_array( + self, + other, # type: Union[Index, ExtensionArray, np.ndarray[int]] + op, # type: Callable[Any, Any] + ): + # type: (...) -> PeriodArray assert op in [operator.add, operator.sub] # easy case for PeriodIndex if op is operator.sub: diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index e1b493c17587a..780df579d2778 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -97,10 +97,10 @@ def test_astype(dtype): def test_astype_copies(): arr = period_array(['2000', '2001', None], freq='D') result = arr.astype(np.int64, copy=False) - assert result is arr._ndarray_values + assert result is arr._data result = arr.astype(np.int64, copy=True) - assert result is not arr._ndarray_values + assert result is not arr._data def test_astype_categorical(): From 575d61a85a715a1040cc89c1f6c4b714d7ee20bc Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 24 Oct 2018 07:18:25 -0500 Subject: [PATCH 132/132] remove ufunc xpass --- pandas/core/series.py | 3 ++- pandas/tests/arithmetic/test_period.py | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index d1cb10337f52f..d3ea005d3aae7 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -644,7 +644,8 @@ def __array_prepare__(self, result, context=None): # nice error message for non-ufunc types if (context is not None and - not isinstance(self._values, (np.ndarray, ABCSparseArray))): + (not isinstance(self._values, (np.ndarray, ExtensionArray)) + or isinstance(self._values, Categorical))): obj = context[1][0] raise TypeError("{obj} with dtype {dtype} cannot perform " "the numpy op {op}".format( diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 5e89e365ff3d8..f3676d508504b 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -904,7 +904,6 @@ def test_pi_ops_errors(self, ng): with pytest.raises(TypeError): np.subtract(ng, obj) - @pytest.mark.xfail(reason="GH-22798", strict=True) def test_pi_ops_nat(self): idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], freq='M', name='idx') @@ -929,7 +928,6 @@ def test_pi_ops_nat(self): self._check(idx + 3, lambda x: x - 3, idx) self._check(idx + 3, lambda x: np.subtract(x, 3), idx) - @pytest.mark.xfail(reason="GH-22798", strict=True) def test_pi_ops_array_int(self): idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], freq='M', name='idx')