From 7469ca94ef719f3b0a6ea5117afce1b7e2519930 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 23 May 2018 06:35:42 -0400 Subject: [PATCH 01/12] BUG: Series.combine() fails with ExtensionArray inside of Series --- doc/source/whatsnew/v0.23.1.txt | 1 + pandas/core/arrays/base.py | 11 +++++ pandas/core/series.py | 48 +++++++++++++++---- .../extension/category/test_categorical.py | 13 +++++ pandas/tests/test_window.py | 7 +-- pandas/util/testing.py | 13 +++-- 6 files changed, 76 insertions(+), 17 deletions(-) diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index a071d7f3f5534..b987b249868fc 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -47,6 +47,7 @@ Bug Fixes ~~~~~~~~~ - tab completion on :class:`Index` in IPython no longer outputs deprecation warnings (:issue:`21125`) +- Series.combine() no longer fails with ExtensionArray inside of Series (:issue:`20825`) Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 1922801c30719..9d3a767382359 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -610,3 +610,14 @@ def _ndarray_values(self): used for interacting with our indexers. """ return np.array(self) + + # ------------------------------------------------------------------------ + # Utilities for use by subclasses + # ------------------------------------------------------------------------ + def is_sequence_of_dtype(self, seq): + """ + Given a sequence, determine whether all members have the appropriate + type for this instance of an ExtensionArray + """ + thistype = self.dtype.type + return all(isinstance(i, thistype) for i in seq) diff --git a/pandas/core/series.py b/pandas/core/series.py index c9329e8b9e572..ce1992ed0d6e4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2185,10 +2185,26 @@ def _binop(self, other, func, level=None, fill_value=None): this_vals, other_vals = ops.fill_binop(this.values, other.values, fill_value) - - with np.errstate(all='ignore'): - result = func(this_vals, other_vals) name = ops.get_op_result_name(self, other) + + if is_extension_array_dtype(this) or is_extension_array_dtype(other): + try: + result = func(this_vals, other_vals) + except TypeError: + result = NotImplemented + except Exception as e: + raise e + + if result is NotImplemented: + result = [func(a, b) for a, b in zip(this_vals, other_vals)] + if is_extension_array_dtype(this): + excons = type(this_vals)._from_sequence + else: + excons = type(other_vals)._from_sequence + result = excons(result) + else: + with np.errstate(all='ignore'): + result = func(this_vals, other_vals) result = self._constructor(result, index=new_index, name=name) result = result.__finalize__(self) if name is None: @@ -2196,7 +2212,7 @@ def _binop(self, other, func, level=None, fill_value=None): result.name = None return result - def combine(self, other, func, fill_value=np.nan): + def combine(self, other, func, fill_value=None): """ Perform elementwise binary operation on two Series using given function with optional fill value when an index is missing from one Series or @@ -2208,6 +2224,9 @@ def combine(self, other, func, fill_value=np.nan): func : function Function that takes two scalars as inputs and return a scalar fill_value : scalar value + The default specifies to use np.nan unless self is + backed by ExtensionArray, in which case the ExtensionArray + na_value is used. Returns ------- @@ -2227,20 +2246,33 @@ def combine(self, other, func, fill_value=np.nan): Series.combine_first : Combine Series values, choosing the calling Series's values first """ + self_is_ext = is_extension_array_dtype(self) + if fill_value is None: + if self_is_ext: + fill_value = self.dtype.na_value + else: + fill_value = np.nan if isinstance(other, Series): new_index = self.index.union(other.index) new_name = ops.get_op_result_name(self, other) - new_values = np.empty(len(new_index), dtype=self.dtype) + new_values = [] for i, idx in enumerate(new_index): lv = self.get(idx, fill_value) rv = other.get(idx, fill_value) with np.errstate(all='ignore'): - new_values[i] = func(lv, rv) + new_values.append(func(lv, rv)) else: new_index = self.index - with np.errstate(all='ignore'): - new_values = func(self._values, other) + if not self_is_ext: + with np.errstate(all='ignore'): + new_values = func(self._values, other) + else: + new_values = [func(lv, other) for lv in self._values] new_name = self.name + + if (self_is_ext and self.values.is_sequence_of_dtype(new_values)): + new_values = self._values._from_sequence(new_values) + return self._constructor(new_values, index=new_index, name=new_name) def combine_first(self, other): diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py index 530a4e7a22a7a..3e9a97cfae402 100644 --- a/pandas/tests/extension/category/test_categorical.py +++ b/pandas/tests/extension/category/test_categorical.py @@ -2,6 +2,9 @@ import pytest import numpy as np +import pandas as pd + +import pandas.util.testing as tm from pandas.api.types import CategoricalDtype from pandas import Categorical @@ -157,3 +160,13 @@ def test_value_counts(self, all_data, dropna): class TestCasting(base.BaseCastingTests): pass + + +def test_combine(): + orig_data1 = make_data() + orig_data2 = make_data() + s1 = pd.Series(Categorical(orig_data1, ordered=True)) + s2 = pd.Series(Categorical(orig_data2, ordered=True)) + result = s1.combine(s2, lambda x1, x2: x1 <= x2) + expected = pd.Series([a <= b for (a, b) in zip(orig_data1, orig_data2)]) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index d8e90ae0e1b35..74f2c977e0db2 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -41,7 +41,7 @@ def win_types(request): return request.param -@pytest.fixture(params=['kaiser', 'gaussian', 'general_gaussian', 'slepian']) +@pytest.fixture(params=['kaiser', 'gaussian', 'general_gaussian']) def win_types_special(request): return request.param @@ -1079,8 +1079,7 @@ def test_cmov_window_special(self, win_types_special): kwds = { 'kaiser': {'beta': 1.}, 'gaussian': {'std': 1.}, - 'general_gaussian': {'power': 2., 'width': 2.}, - 'slepian': {'width': 0.5}} + 'general_gaussian': {'power': 2., 'width': 2.}} vals = np.array([6.95, 15.21, 4.72, 9.12, 13.81, 13.49, 16.68, 9.48, 10.63, 14.48]) @@ -1090,8 +1089,6 @@ def test_cmov_window_special(self, win_types_special): 13.65671, 12.01002, np.nan, np.nan], 'general_gaussian': [np.nan, np.nan, 9.85011, 10.71589, 11.73161, 13.08516, 12.95111, 12.74577, np.nan, np.nan], - 'slepian': [np.nan, np.nan, 9.81073, 10.89359, 11.70284, 12.88331, - 12.96079, 12.77008, np.nan, np.nan], 'kaiser': [np.nan, np.nan, 9.86851, 11.02969, 11.65161, 12.75129, 12.90702, 12.83757, np.nan, np.nan] } diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 233eba6490937..b7ea52210369a 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -30,7 +30,7 @@ is_categorical_dtype, is_interval_dtype, is_sequence, - is_list_like) + is_list_like, is_extension_array_dtype) from pandas.io.formats.printing import pprint_thing from pandas.core.algorithms import take_1d import pandas.core.common as com @@ -1118,10 +1118,12 @@ def assert_extension_array_equal(left, right): right_na = right.isna() assert_numpy_array_equal(left_na, right_na) - left_valid = left[~left_na].astype(object) - right_valid = right[~right_na].astype(object) + if len(left_na) > 0 and len(right_na) > 0: - assert_numpy_array_equal(left_valid, right_valid) + left_valid = left[~left_na].astype(object) + right_valid = right[~right_na].astype(object) + + assert_numpy_array_equal(left_valid, right_valid) # This could be refactored to use the NDFrame.equals method @@ -1224,6 +1226,9 @@ def assert_series_equal(left, right, check_dtype=True, left = pd.IntervalIndex(left) right = pd.IntervalIndex(right) assert_index_equal(left, right, obj='{obj}.index'.format(obj=obj)) + elif (is_extension_array_dtype(left) and not is_categorical_dtype(left) and + is_extension_array_dtype(right) and not is_categorical_dtype(right)): + return assert_extension_array_equal(left.values, right.values) else: _testing.assert_almost_equal(left.get_values(), right.get_values(), From 339b23a6b8203438fc6e7a36d5ae3bf191f356b5 Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Wed, 23 May 2018 17:50:18 -0400 Subject: [PATCH 02/12] Fixes as requested by pandas maintainers --- doc/source/whatsnew/v0.23.1.txt | 2 +- pandas/core/series.py | 4 +--- .../extension/category/test_categorical.py | 18 ++++++++++-------- pandas/tests/extension/decimal/test_decimal.py | 11 +++++++++++ pandas/util/testing.py | 13 ++++--------- 5 files changed, 27 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index b987b249868fc..08faccaadf30a 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -47,7 +47,7 @@ Bug Fixes ~~~~~~~~~ - tab completion on :class:`Index` in IPython no longer outputs deprecation warnings (:issue:`21125`) -- Series.combine() no longer fails with ExtensionArray inside of Series (:issue:`20825`) +- :meth:`Series.combine()` no longer fails with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`) Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/core/series.py b/pandas/core/series.py index ce1992ed0d6e4..da02002b8a487 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2192,8 +2192,6 @@ def _binop(self, other, func, level=None, fill_value=None): result = func(this_vals, other_vals) except TypeError: result = NotImplemented - except Exception as e: - raise e if result is NotImplemented: result = [func(a, b) for a, b in zip(this_vals, other_vals)] @@ -2256,7 +2254,7 @@ def combine(self, other, func, fill_value=None): new_index = self.index.union(other.index) new_name = ops.get_op_result_name(self, other) new_values = [] - for i, idx in enumerate(new_index): + for idx in new_index: lv = self.get(idx, fill_value) rv = other.get(idx, fill_value) with np.errstate(all='ignore'): diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py index 3e9a97cfae402..248af368fd6d6 100644 --- a/pandas/tests/extension/category/test_categorical.py +++ b/pandas/tests/extension/category/test_categorical.py @@ -157,16 +157,18 @@ class TestMethods(base.BaseMethodsTests): def test_value_counts(self, all_data, dropna): pass + def test_combine(self): + # GH 20825 + orig_data1 = make_data() + orig_data2 = make_data() + s1 = pd.Series(Categorical(orig_data1, ordered=True)) + s2 = pd.Series(Categorical(orig_data2, ordered=True)) + result = s1.combine(s2, lambda x1, x2: x1 <= x2) + expected = pd.Series([a <= b for (a, b) in zip(orig_data1, orig_data2)]) + tm.assert_series_equal(result, expected) + class TestCasting(base.BaseCastingTests): pass -def test_combine(): - orig_data1 = make_data() - orig_data2 = make_data() - s1 = pd.Series(Categorical(orig_data1, ordered=True)) - s2 = pd.Series(Categorical(orig_data2, ordered=True)) - result = s1.combine(s2, lambda x1, x2: x1 <= x2) - expected = pd.Series([a <= b for (a, b) in zip(orig_data1, orig_data2)]) - tm.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 1f8cf0264f62f..8463d36cca69c 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -138,6 +138,17 @@ def test_value_counts(self, all_data, dropna): tm.assert_series_equal(result, expected) + def test_combine(self): + # GH 20825 + orig_data1 = make_data() + orig_data2 = make_data() + s1 = pd.Series(orig_data1) + s2 = pd.Series(orig_data2) + result = s1.combine(s2, lambda x1, x2: x1 <= x2) + expected = pd.Series([a <= b for (a, b) in zip(orig_data1, orig_data2)]) + tm.assert_series_equal(result, expected) + + class TestCasting(BaseDecimal, base.BaseCastingTests): pass diff --git a/pandas/util/testing.py b/pandas/util/testing.py index b7ea52210369a..233eba6490937 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -30,7 +30,7 @@ is_categorical_dtype, is_interval_dtype, is_sequence, - is_list_like, is_extension_array_dtype) + is_list_like) from pandas.io.formats.printing import pprint_thing from pandas.core.algorithms import take_1d import pandas.core.common as com @@ -1118,12 +1118,10 @@ def assert_extension_array_equal(left, right): right_na = right.isna() assert_numpy_array_equal(left_na, right_na) - if len(left_na) > 0 and len(right_na) > 0: + left_valid = left[~left_na].astype(object) + right_valid = right[~right_na].astype(object) - left_valid = left[~left_na].astype(object) - right_valid = right[~right_na].astype(object) - - assert_numpy_array_equal(left_valid, right_valid) + assert_numpy_array_equal(left_valid, right_valid) # This could be refactored to use the NDFrame.equals method @@ -1226,9 +1224,6 @@ def assert_series_equal(left, right, check_dtype=True, left = pd.IntervalIndex(left) right = pd.IntervalIndex(right) assert_index_equal(left, right, obj='{obj}.index'.format(obj=obj)) - elif (is_extension_array_dtype(left) and not is_categorical_dtype(left) and - is_extension_array_dtype(right) and not is_categorical_dtype(right)): - return assert_extension_array_equal(left.values, right.values) else: _testing.assert_almost_equal(left.get_values(), right.get_values(), From 61a09e719bcf0f51179324d0a96ba3fec9e1981c Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Wed, 23 May 2018 17:56:04 -0400 Subject: [PATCH 03/12] Fix PEP8 issues --- pandas/tests/extension/category/test_categorical.py | 5 ++--- pandas/tests/extension/decimal/test_decimal.py | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py index 248af368fd6d6..9a8939fa3e7dc 100644 --- a/pandas/tests/extension/category/test_categorical.py +++ b/pandas/tests/extension/category/test_categorical.py @@ -164,11 +164,10 @@ def test_combine(self): s1 = pd.Series(Categorical(orig_data1, ordered=True)) s2 = pd.Series(Categorical(orig_data2, ordered=True)) result = s1.combine(s2, lambda x1, x2: x1 <= x2) - expected = pd.Series([a <= b for (a, b) in zip(orig_data1, orig_data2)]) + expected = pd.Series([a <= b for (a, b) in + zip(orig_data1, orig_data2)]) tm.assert_series_equal(result, expected) class TestCasting(base.BaseCastingTests): pass - - diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 8463d36cca69c..0995ed3459796 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -145,11 +145,11 @@ def test_combine(self): s1 = pd.Series(orig_data1) s2 = pd.Series(orig_data2) result = s1.combine(s2, lambda x1, x2: x1 <= x2) - expected = pd.Series([a <= b for (a, b) in zip(orig_data1, orig_data2)]) + expected = pd.Series([a <= b for (a, b) in + zip(orig_data1, orig_data2)]) tm.assert_series_equal(result, expected) - class TestCasting(BaseDecimal, base.BaseCastingTests): pass From 4c925fc9470a016b1c95e89e81943c3991222fb8 Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Thu, 24 May 2018 14:31:54 -0400 Subject: [PATCH 04/12] get rid of is_sequence_of_dtype --- pandas/core/arrays/base.py | 11 ----------- pandas/core/series.py | 14 +++++++++----- pandas/tests/extension/decimal/array.py | 4 +++- pandas/tests/extension/decimal/test_decimal.py | 9 +++++++-- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 9d3a767382359..1922801c30719 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -610,14 +610,3 @@ def _ndarray_values(self): used for interacting with our indexers. """ return np.array(self) - - # ------------------------------------------------------------------------ - # Utilities for use by subclasses - # ------------------------------------------------------------------------ - def is_sequence_of_dtype(self, seq): - """ - Given a sequence, determine whether all members have the appropriate - type for this instance of an ExtensionArray - """ - thistype = self.dtype.type - return all(isinstance(i, thistype) for i in seq) diff --git a/pandas/core/series.py b/pandas/core/series.py index da02002b8a487..46e86db06bd61 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2187,7 +2187,8 @@ def _binop(self, other, func, level=None, fill_value=None): fill_value) name = ops.get_op_result_name(self, other) - if is_extension_array_dtype(this) or is_extension_array_dtype(other): + if (is_extension_array_dtype(this_vals) or + is_extension_array_dtype(other_vals)): try: result = func(this_vals, other_vals) except TypeError: @@ -2195,7 +2196,7 @@ def _binop(self, other, func, level=None, fill_value=None): if result is NotImplemented: result = [func(a, b) for a, b in zip(this_vals, other_vals)] - if is_extension_array_dtype(this): + if is_extension_array_dtype(this_vals): excons = type(this_vals)._from_sequence else: excons = type(other_vals)._from_sequence @@ -2244,7 +2245,7 @@ def combine(self, other, func, fill_value=None): Series.combine_first : Combine Series values, choosing the calling Series's values first """ - self_is_ext = is_extension_array_dtype(self) + self_is_ext = is_extension_array_dtype(self.values) if fill_value is None: if self_is_ext: fill_value = self.dtype.na_value @@ -2268,8 +2269,11 @@ def combine(self, other, func, fill_value=None): new_values = [func(lv, other) for lv in self._values] new_name = self.name - if (self_is_ext and self.values.is_sequence_of_dtype(new_values)): - new_values = self._values._from_sequence(new_values) + if self_is_ext and not is_categorical_dtype(self.values): + try: + new_values = self._values._from_sequence(new_values) + except TypeError: + pass return self._constructor(new_values, index=new_index, name=new_name) diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 90f0181beab0d..cc6fadc483d5e 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -28,7 +28,9 @@ class DecimalArray(ExtensionArray): dtype = DecimalDtype() def __init__(self, values): - assert all(isinstance(v, decimal.Decimal) for v in values) + for val in values: + if not isinstance(val, self.dtype.type): + raise TypeError values = np.asarray(values, dtype=object) self._data = values diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 0995ed3459796..4fcd031f56cf3 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -142,13 +142,18 @@ def test_combine(self): # GH 20825 orig_data1 = make_data() orig_data2 = make_data() - s1 = pd.Series(orig_data1) - s2 = pd.Series(orig_data2) + s1 = pd.Series(DecimalArray(orig_data1)) + s2 = pd.Series(DecimalArray(orig_data2)) result = s1.combine(s2, lambda x1, x2: x1 <= x2) expected = pd.Series([a <= b for (a, b) in zip(orig_data1, orig_data2)]) tm.assert_series_equal(result, expected) + result = s1.combine(s2, lambda x1, x2: x1 + x2) + expected = pd.Series(DecimalArray([a + b for (a, b) in + zip(orig_data1, orig_data2)])) + tm.assert_series_equal(result, expected) + class TestCasting(BaseDecimal, base.BaseCastingTests): pass From 27480ac48df566fb553f44fe16fc8facff27e25e Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Thu, 24 May 2018 14:34:20 -0400 Subject: [PATCH 05/12] move whatsnew from 0.23.1 to 0.24 --- doc/source/whatsnew/v0.23.1.txt | 1 - doc/source/whatsnew/v0.24.0.txt | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 08faccaadf30a..a071d7f3f5534 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -47,7 +47,6 @@ Bug Fixes ~~~~~~~~~ - tab completion on :class:`Index` in IPython no longer outputs deprecation warnings (:issue:`21125`) -- :meth:`Series.combine()` no longer fails with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`) Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index b94377af770f4..d5dfd3fbc81bc 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -174,6 +174,7 @@ Reshaping Other ^^^^^ +- :meth:`Series.combine()` no longer fails with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`) - - - From 677fe18012dcb30583edcade39db2e31720ae9de Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Tue, 29 May 2018 12:09:47 -0400 Subject: [PATCH 06/12] remove binop changes and make it work right with scalar --- doc/source/whatsnew/v0.24.0.txt | 4 +-- pandas/core/series.py | 37 +++++----------------- pandas/tests/series/test_combine_concat.py | 13 ++++++++ 3 files changed, 23 insertions(+), 31 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 2b7ca11536296..52225b0690e01 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -181,8 +181,8 @@ Reshaping Other ^^^^^ -- :meth:`Series.combine()` no longer fails with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`) -- +- :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`) +- :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`) - - diff --git a/pandas/core/series.py b/pandas/core/series.py index 46e86db06bd61..9e18fb510f3b5 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2185,25 +2185,10 @@ def _binop(self, other, func, level=None, fill_value=None): this_vals, other_vals = ops.fill_binop(this.values, other.values, fill_value) - name = ops.get_op_result_name(self, other) - - if (is_extension_array_dtype(this_vals) or - is_extension_array_dtype(other_vals)): - try: - result = func(this_vals, other_vals) - except TypeError: - result = NotImplemented - if result is NotImplemented: - result = [func(a, b) for a, b in zip(this_vals, other_vals)] - if is_extension_array_dtype(this_vals): - excons = type(this_vals)._from_sequence - else: - excons = type(other_vals)._from_sequence - result = excons(result) - else: - with np.errstate(all='ignore'): - result = func(this_vals, other_vals) + with np.errstate(all='ignore'): + result = func(this_vals, other_vals) + name = ops.get_op_result_name(self, other) result = self._constructor(result, index=new_index, name=name) result = result.__finalize__(self) if name is None: @@ -2223,9 +2208,8 @@ def combine(self, other, func, fill_value=None): func : function Function that takes two scalars as inputs and return a scalar fill_value : scalar value - The default specifies to use np.nan unless self is - backed by ExtensionArray, in which case the ExtensionArray - na_value is used. + The default specifies to use the appropriate NaN value for + the underlying dtype of the Series Returns ------- @@ -2247,10 +2231,8 @@ def combine(self, other, func, fill_value=None): """ self_is_ext = is_extension_array_dtype(self.values) if fill_value is None: - if self_is_ext: - fill_value = self.dtype.na_value - else: - fill_value = np.nan + fill_value = na_value_for_dtype(self.dtype, False) + if isinstance(other, Series): new_index = self.index.union(other.index) new_name = ops.get_op_result_name(self, other) @@ -2262,10 +2244,7 @@ def combine(self, other, func, fill_value=None): new_values.append(func(lv, rv)) else: new_index = self.index - if not self_is_ext: - with np.errstate(all='ignore'): - new_values = func(self._values, other) - else: + with np.errstate(all='ignore'): new_values = [func(lv, other) for lv in self._values] new_name = self.name diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 6cf60e818c845..f35cce6ac9d71 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -60,6 +60,19 @@ def test_append_duplicates(self): with tm.assert_raises_regex(ValueError, msg): pd.concat([s1, s2], verify_integrity=True) + def test_combine_scalar(self): + # GH 21248 + # Note - combine() with another Series is tested elsewhere because + # it is used when testing operators + s = pd.Series([i * 10 for i in range(5)]) + result = s.combine(3, lambda x, y: x + y) + expected = pd.Series([i * 10 + 3 for i in range(5)]) + tm.assert_series_equal(result, expected) + + result = s.combine(22, lambda x, y: min(x, y)) + expected = pd.Series([min(i * 10, 22) for i in range(5)]) + tm.assert_series_equal(result, expected) + def test_combine_first(self): values = tm.makeIntIndex(20).values.astype(float) series = Series(values, index=tm.makeIntIndex(20)) From 9fceee7f736edb8967d750535590d85ffd720b12 Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Wed, 30 May 2018 17:58:37 -0400 Subject: [PATCH 07/12] Move test_combine tests to base class --- pandas/tests/extension/base/methods.py | 33 +++++++++++++++++++ .../extension/category/test_categorical.py | 25 +++++++------- pandas/tests/extension/conftest.py | 9 +++++ .../tests/extension/decimal/test_decimal.py | 24 +++++--------- pandas/tests/extension/json/test_json.py | 8 +++++ 5 files changed, 70 insertions(+), 29 deletions(-) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index c5436aa731d50..384d360d5751d 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -103,3 +103,36 @@ def test_factorize_equivalence(self, data_for_grouping, na_sentinel): tm.assert_numpy_array_equal(l1, l2) self.assert_extension_array_equal(u1, u2) + + def test_combine_le(self, data_repeated): + # GH 20825 + orig_data1, orig_data2 = data_repeated(2) + s1 = pd.Series(orig_data1) + s2 = pd.Series(orig_data2) + result = s1.combine(s2, lambda x1, x2: x1 <= x2) + expected = pd.Series([a <= b for (a, b) in + zip(list(orig_data1), list(orig_data2))]) + tm.assert_series_equal(result, expected) + + val = s1.iloc[0] + result = s1.combine(val, lambda x1, x2: x1 <= x2) + expected = pd.Series([a <= val for a in list(orig_data1)]) + tm.assert_series_equal(result, expected) + + def test_combine_add(self, data_repeated): + # GH 20825 + orig_data1, orig_data2 = data_repeated(2) + s1 = pd.Series(orig_data1) + s2 = pd.Series(orig_data2) + result = s1.combine(s2, lambda x1, x2: x1 + x2) + expected = pd.Series( + orig_data1._from_sequence([a + b for (a, b) in + zip(list(orig_data1), + list(orig_data2))])) + tm.assert_series_equal(result, expected) + + val = s1.iloc[0] + result = s1.combine(val, lambda x1, x2: x1 + x2) + expected = pd.Series( + orig_data1._from_sequence([a + val for a in list(orig_data1)])) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py index 9a8939fa3e7dc..db7d8d2473970 100644 --- a/pandas/tests/extension/category/test_categorical.py +++ b/pandas/tests/extension/category/test_categorical.py @@ -2,9 +2,6 @@ import pytest import numpy as np -import pandas as pd - -import pandas.util.testing as tm from pandas.api.types import CategoricalDtype from pandas import Categorical @@ -32,6 +29,15 @@ def data_missing(): return Categorical([np.nan, 'A']) +@pytest.fixture +def data_repeated(): + """Return different versions of data for count times""" + def gen(count): + for _ in range(count): + yield Categorical(make_data()) + yield gen + + @pytest.fixture def data_for_sorting(): return Categorical(['A', 'B', 'C'], categories=['C', 'A', 'B'], @@ -157,16 +163,9 @@ class TestMethods(base.BaseMethodsTests): def test_value_counts(self, all_data, dropna): pass - def test_combine(self): - # GH 20825 - orig_data1 = make_data() - orig_data2 = make_data() - s1 = pd.Series(Categorical(orig_data1, ordered=True)) - s2 = pd.Series(Categorical(orig_data2, ordered=True)) - result = s1.combine(s2, lambda x1, x2: x1 <= x2) - expected = pd.Series([a <= b for (a, b) in - zip(orig_data1, orig_data2)]) - tm.assert_series_equal(result, expected) + @pytest.mark.skip(reason="combine add for categorical not supported") + def test_combine_add(self, data_repeated): + pass class TestCasting(base.BaseCastingTests): diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py index bbd31c4071b91..cf70f7734bf52 100644 --- a/pandas/tests/extension/conftest.py +++ b/pandas/tests/extension/conftest.py @@ -30,6 +30,15 @@ def all_data(request, data, data_missing): return data_missing +@pytest.fixture +def data_repeated(): + """Return different versions of data for count times""" + def gen(count): + for _ in range(count): + yield NotImplemented + yield gen + + @pytest.fixture def data_for_sorting(): """Length-3 array with a known sort order. diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 4fcd031f56cf3..f74b4d7e94f11 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -25,6 +25,14 @@ def data_missing(): return DecimalArray([decimal.Decimal('NaN'), decimal.Decimal(1)]) +@pytest.fixture +def data_repeated(): + def gen(count): + for _ in range(count): + yield DecimalArray(make_data()) + yield gen + + @pytest.fixture def data_for_sorting(): return DecimalArray([decimal.Decimal('1'), @@ -138,22 +146,6 @@ def test_value_counts(self, all_data, dropna): tm.assert_series_equal(result, expected) - def test_combine(self): - # GH 20825 - orig_data1 = make_data() - orig_data2 = make_data() - s1 = pd.Series(DecimalArray(orig_data1)) - s2 = pd.Series(DecimalArray(orig_data2)) - result = s1.combine(s2, lambda x1, x2: x1 <= x2) - expected = pd.Series([a <= b for (a, b) in - zip(orig_data1, orig_data2)]) - tm.assert_series_equal(result, expected) - - result = s1.combine(s2, lambda x1, x2: x1 + x2) - expected = pd.Series(DecimalArray([a + b for (a, b) in - zip(orig_data1, orig_data2)])) - tm.assert_series_equal(result, expected) - class TestCasting(BaseDecimal, base.BaseCastingTests): pass diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index b7ac8033f3f6d..85a282ae4007f 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -187,6 +187,14 @@ def test_sort_values_missing(self, data_missing_for_sorting, ascending): super(TestMethods, self).test_sort_values_missing( data_missing_for_sorting, ascending) + @pytest.mark.skip(reason="combine for JSONArray not supported") + def test_combine_le(self, data_repeated): + pass + + @pytest.mark.skip(reason="combine for JSONArray not supported") + def test_combine_add(self, data_repeated): + pass + class TestCasting(BaseJSON, base.BaseCastingTests): @pytest.mark.xfail From 1010cb51e2e45d807ea133982ab7687ae7df6b10 Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Thu, 31 May 2018 10:04:23 -0400 Subject: [PATCH 08/12] Fix lint issue --- pandas/tests/extension/base/methods.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 384d360d5751d..5cc793897579e 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -112,12 +112,12 @@ def test_combine_le(self, data_repeated): result = s1.combine(s2, lambda x1, x2: x1 <= x2) expected = pd.Series([a <= b for (a, b) in zip(list(orig_data1), list(orig_data2))]) - tm.assert_series_equal(result, expected) + self.assert_series_equal(result, expected) val = s1.iloc[0] result = s1.combine(val, lambda x1, x2: x1 <= x2) expected = pd.Series([a <= val for a in list(orig_data1)]) - tm.assert_series_equal(result, expected) + self.assert_series_equal(result, expected) def test_combine_add(self, data_repeated): # GH 20825 @@ -129,10 +129,10 @@ def test_combine_add(self, data_repeated): orig_data1._from_sequence([a + b for (a, b) in zip(list(orig_data1), list(orig_data2))])) - tm.assert_series_equal(result, expected) + self.assert_series_equal(result, expected) val = s1.iloc[0] result = s1.combine(val, lambda x1, x2: x1 + x2) expected = pd.Series( orig_data1._from_sequence([a + val for a in list(orig_data1)])) - tm.assert_series_equal(result, expected) + self.assert_series_equal(result, expected) From aceea9fcdb1ad1ee1011066b1cdb4f5cafa0bc88 Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Thu, 31 May 2018 15:03:45 -0400 Subject: [PATCH 09/12] Change NotImplemented to NotImplementedError --- pandas/tests/extension/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py index cf70f7734bf52..4bbbb7df2f399 100644 --- a/pandas/tests/extension/conftest.py +++ b/pandas/tests/extension/conftest.py @@ -35,7 +35,7 @@ def data_repeated(): """Return different versions of data for count times""" def gen(count): for _ in range(count): - yield NotImplemented + yield NotImplementedError yield gen From 0e4720b454b3386e7cdca01b60ac33a2d16643f2 Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Mon, 4 Jun 2018 09:34:53 -0400 Subject: [PATCH 10/12] put in keyword arg for na_value_for_dtype --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 03cb7c56d2920..4b57d43dfd1f9 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2231,7 +2231,7 @@ def combine(self, other, func, fill_value=None): """ self_is_ext = is_extension_array_dtype(self.values) if fill_value is None: - fill_value = na_value_for_dtype(self.dtype, False) + fill_value = na_value_for_dtype(self.dtype, compat=False) if isinstance(other, Series): new_index = self.index.union(other.index) From d3ed2c73ed43bf9ae8aa7b1294f2843a01bcbe4d Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Wed, 6 Jun 2018 11:22:23 -0400 Subject: [PATCH 11/12] cosmetic changes. fix test_add for category --- pandas/core/series.py | 9 +++++++-- pandas/tests/extension/base/methods.py | 1 + .../extension/category/test_categorical.py | 17 +++++++++++++++-- 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 4ce4e581248ed..b5fb7ff888f61 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2237,11 +2237,12 @@ def combine(self, other, func, fill_value=None): Series.combine_first : Combine Series values, choosing the calling Series's values first """ - self_is_ext = is_extension_array_dtype(self.values) if fill_value is None: fill_value = na_value_for_dtype(self.dtype, compat=False) if isinstance(other, Series): + # If other is a Series, result is based on union of Series, + # so do this element by element new_index = self.index.union(other.index) new_name = ops.get_op_result_name(self, other) new_values = [] @@ -2251,12 +2252,16 @@ def combine(self, other, func, fill_value=None): with np.errstate(all='ignore'): new_values.append(func(lv, rv)) else: + # Assume that other is a scalar, so apply the function for + # each element in the Series new_index = self.index with np.errstate(all='ignore'): new_values = [func(lv, other) for lv in self._values] new_name = self.name - if self_is_ext and not is_categorical_dtype(self.values): + if is_categorical_dtype(self.values): + pass + elif is_extension_array_dtype(self.values): try: new_values = self._values._from_sequence(new_values) except TypeError: diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 5cc793897579e..23227867ee4d7 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -106,6 +106,7 @@ def test_factorize_equivalence(self, data_for_grouping, na_sentinel): def test_combine_le(self, data_repeated): # GH 20825 + # Test that combine works when doing a <= (le) comparison orig_data1, orig_data2 = data_repeated(2) s1 = pd.Series(orig_data1) s2 = pd.Series(orig_data2) diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py index db7d8d2473970..61fdb8454b542 100644 --- a/pandas/tests/extension/category/test_categorical.py +++ b/pandas/tests/extension/category/test_categorical.py @@ -1,6 +1,7 @@ import string import pytest +import pandas as pd import numpy as np from pandas.api.types import CategoricalDtype @@ -163,9 +164,21 @@ class TestMethods(base.BaseMethodsTests): def test_value_counts(self, all_data, dropna): pass - @pytest.mark.skip(reason="combine add for categorical not supported") def test_combine_add(self, data_repeated): - pass + # GH 20825 + # When adding categoricals in combine, result is a string + orig_data1, orig_data2 = data_repeated(2) + s1 = pd.Series(orig_data1) + s2 = pd.Series(orig_data2) + result = s1.combine(s2, lambda x1, x2: x1 + x2) + expected = pd.Series(([a + b for (a, b) in + zip(list(orig_data1), list(orig_data2))])) + self.assert_series_equal(result, expected) + + val = s1.iloc[0] + result = s1.combine(val, lambda x1, x2: x1 + x2) + expected = pd.Series([a + val for a in list(orig_data1)]) + self.assert_series_equal(result, expected) class TestCasting(base.BaseCastingTests): From 4ca28b2c672a7a0401474ba26e6719d63fb55de1 Mon Sep 17 00:00:00 2001 From: Dr-Irv Date: Thu, 7 Jun 2018 11:45:21 -0400 Subject: [PATCH 12/12] Doc changes --- doc/source/whatsnew/v0.24.0.txt | 12 ++++++++++-- pandas/core/series.py | 2 ++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index a83c2e4d0d3ec..8d26c51ae6527 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -178,10 +178,18 @@ Reshaping - - -Other -^^^^^ +ExtensionArray +^^^^^^^^^^^^^^ - :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`) - :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`) - - + +Other +^^^^^ + +- +- +- +- diff --git a/pandas/core/series.py b/pandas/core/series.py index b5fb7ff888f61..0564cdbbb2014 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2262,6 +2262,8 @@ def combine(self, other, func, fill_value=None): if is_categorical_dtype(self.values): pass elif is_extension_array_dtype(self.values): + # The function can return something of any type, so check + # if the type is compatible with the calling EA try: new_values = self._values._from_sequence(new_values) except TypeError: