From 8303674941b5579f57ce07b726b2a1d32d0f884f Mon Sep 17 00:00:00 2001 From: makbigc Date: Wed, 7 Aug 2019 22:31:00 +0800 Subject: [PATCH 01/20] Add argmax, max, argmin, min to EA --- doc/source/whatsnew/v0.25.1.rst | 1 + pandas/core/arrays/base.py | 29 ++++++++++++++++++++++ pandas/tests/extension/base/methods.py | 24 ++++++++++++++++++ pandas/tests/extension/test_categorical.py | 8 ++++++ pandas/tests/extension/test_numpy.py | 14 +++++++++++ 5 files changed, 76 insertions(+) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index c80195af413f7..8c5596206560f 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -6,6 +6,7 @@ What's new in 0.25.1 (July XX, 2019) Enhancements ~~~~~~~~~~~~ +- Add :meth:`ExtensionArray.argmax`, :meth:`ExtensionArray.max`, :meth:`ExtensionArray.argmin` and :meth:`ExtensionArray.min` (:issue:`24382`) .. _whatsnew_0251.enhancements.other: diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index e517be4f03a16..8f8c0f9f46bf1 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -461,6 +461,35 @@ def argsort(self, ascending=True, kind="quicksort", *args, **kwargs): result = nargsort(self, kind=kind, ascending=ascending, na_position="last") return result + def argmin(self): + """ + Return the minimun argument indexer. + + Returns + ------- + scalar + Minimun argument indexer. + + See Also + -------- + Index.max : Return the maximum value of the object. + Series.min : Return the minimum value in a Series. + DataFrame.min : Return the minimum values in a DataFrame. + """ + return self.argsort()[0] + + def min(self): + min_idx = self.argmin() + return self[min_idx] + + def argmax(self): + no_nan = self.isna().sum() + return self.argsort()[-1 - no_nan] + + def max(self): + max_idx = self.argmax() + return self[max_idx] + def fillna(self, value=None, method=None, limit=None): """ Fill NA/NaN values using the specified method. diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 6d47b0c1d1f77..9d31ae539a64a 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -59,6 +59,30 @@ def test_argsort_missing(self, data_missing_for_sorting): expected = pd.Series(np.array([1, -1, 0], dtype=np.int64)) self.assert_series_equal(result, expected) + def test_argmax(self, data_missing_for_sorting): + # GH 24382 + result = data_missing_for_sorting.argmax() + expected = 0 + assert result == expected + + def test_max(self, data_missing_for_sorting): + # GH 24382 + result = data_missing_for_sorting.max() + expected = data_missing_for_sorting[0] + assert result == expected + + def test_argmin(self, data_missing_for_sorting): + # GH 24382 + result = data_missing_for_sorting.argmin() + expected = 2 + assert result == expected + + def test_min(self, data_missing_for_sorting): + # GH 24382 + result = data_missing_for_sorting.min() + expected = data_missing_for_sorting[2] + assert result == expected + @pytest.mark.parametrize( "na_position, expected", [ diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index f7456d24ad6d3..452242b01b75c 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -195,6 +195,14 @@ def test_searchsorted(self, data_for_sorting): if not data_for_sorting.ordered: raise pytest.skip(reason="searchsorted requires ordered data.") + def test_max(self): + # GH 24382 + pass + + def test_min(self): + # GH 24382 + pass + class TestCasting(base.BaseCastingTests): pass diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 221cf0787d839..7c1261d87160e 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -246,6 +246,20 @@ def test_repeat(self, data, repeats, as_series, use_numpy): # Fails creating expected super().test_repeat(data, repeats, as_series, use_numpy) + def test_max(self): + # GH 24382 + data = PandasArray(np.array([1, np.nan, 0])) + result = data.max() + expected = data[0] + assert result == expected + + def test_min(self): + # GH 24382 + data = PandasArray(np.array([1, np.nan, 0])) + result = data.min() + expected = data[2] + assert result == expected + @skip_nested class TestArithmetics(BaseNumPyTests, base.BaseArithmeticOpsTests): From 44838f77ebe6e8ced5c7bf0700bb8ce19f5f130e Mon Sep 17 00:00:00 2001 From: makbigc Date: Fri, 9 Aug 2019 20:11:21 +0800 Subject: [PATCH 02/20] Remove argmax, argmin, max, min from ArrowEA --- pandas/tests/extension/arrow/arrays.py | 12 ++++++++++++ pandas/tests/extension/arrow/test_bool.py | 10 ++++++++++ 2 files changed, 22 insertions(+) diff --git a/pandas/tests/extension/arrow/arrays.py b/pandas/tests/extension/arrow/arrays.py index 6a28f76e474cc..1c95fe5b94113 100644 --- a/pandas/tests/extension/arrow/arrays.py +++ b/pandas/tests/extension/arrow/arrays.py @@ -155,6 +155,18 @@ def any(self, axis=0, out=None): def all(self, axis=0, out=None): return self._data.to_pandas().all() + def argmin(self): + raise NotImplementedError + + def min(self): + raise NotImplementedError + + def argmax(self): + raise NotImplementedError + + def max(self): + raise NotImplementedError + class ArrowBoolArray(ArrowExtensionArray): def __init__(self, values): diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py index cc0deca765b41..1686dc8a09451 100644 --- a/pandas/tests/extension/arrow/test_bool.py +++ b/pandas/tests/extension/arrow/test_bool.py @@ -53,6 +53,16 @@ def test_from_sequence_from_cls(self, data): class TestReduce(base.BaseNoReduceTests): + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): + op_name = all_numeric_reductions + if op_name in ('max', 'min'): + pass + else: + ser = pd.Series(data) + with pytest.raises(TypeError): + getattr(ser, op_name)(skipna=skipna) + def test_reduce_series_boolean(self): pass From 8bc95735c8682a362ff716d1217c97ca4368b6a0 Mon Sep 17 00:00:00 2001 From: makbigc Date: Fri, 9 Aug 2019 22:27:27 +0800 Subject: [PATCH 03/20] Fix black error --- pandas/tests/extension/arrow/test_bool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py index 1686dc8a09451..391788b500d1a 100644 --- a/pandas/tests/extension/arrow/test_bool.py +++ b/pandas/tests/extension/arrow/test_bool.py @@ -56,7 +56,7 @@ class TestReduce(base.BaseNoReduceTests): @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): op_name = all_numeric_reductions - if op_name in ('max', 'min'): + if op_name in ("max", "min"): pass else: ser = pd.Series(data) From bc69a4ab62e5d357ac33bc335004146a4f94e1c0 Mon Sep 17 00:00:00 2001 From: makbigc Date: Sat, 10 Aug 2019 23:13:02 +0800 Subject: [PATCH 04/20] Add issue number to the test --- pandas/tests/extension/arrow/test_bool.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py index 391788b500d1a..e40d64e2ba97b 100644 --- a/pandas/tests/extension/arrow/test_bool.py +++ b/pandas/tests/extension/arrow/test_bool.py @@ -55,6 +55,7 @@ def test_from_sequence_from_cls(self, data): class TestReduce(base.BaseNoReduceTests): @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): + # GH 24382 op_name = all_numeric_reductions if op_name in ("max", "min"): pass From e84268b6b6679bb184f9e3254984eb2322afc8e3 Mon Sep 17 00:00:00 2001 From: makbigc Date: Mon, 2 Dec 2019 21:30:35 +0800 Subject: [PATCH 05/20] Move the whatsnew entry to v1 --- doc/source/whatsnew/v1.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index b45bec37e84eb..4b4ce54eda941 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -194,6 +194,7 @@ Other enhancements - Roundtripping DataFrames with nullable integer or string data types to parquet (:meth:`~DataFrame.to_parquet` / :func:`read_parquet`) using the `'pyarrow'` engine now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`). +- Add :meth:`ExtensionArray.argmax`, :meth:`ExtensionArray.max`, :meth:`ExtensionArray.argmin` and :meth:`ExtensionArray.min` (:issue:`24382`) Build Changes ^^^^^^^^^^^^^ From f9c9fea0c1d9bc83ea78c3bbd010a1375ebc3785 Mon Sep 17 00:00:00 2001 From: makbigc Date: Tue, 3 Dec 2019 11:54:23 +0800 Subject: [PATCH 06/20] Add test for categorical --- pandas/tests/extension/test_categorical.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index f29e287b7ab1e..dff1e58641ade 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -192,14 +192,6 @@ def test_searchsorted(self, data_for_sorting): if not data_for_sorting.ordered: raise pytest.skip(reason="searchsorted requires ordered data.") - def test_max(self): - # GH 24382 - pass - - def test_min(self): - # GH 24382 - pass - class TestCasting(base.BaseCastingTests): @pytest.mark.parametrize("cls", [Categorical, CategoricalIndex]) From 8b7585fa78c5a8437717a41941b30c4271463e52 Mon Sep 17 00:00:00 2001 From: makbigc Date: Wed, 4 Dec 2019 23:46:08 +0800 Subject: [PATCH 07/20] Add func doc and pre-check for zero length --- pandas/core/arrays/base.py | 69 +++++++++++++++++++++++++++++++++++--- 1 file changed, 64 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 1f9985539b222..67c1f92539bdb 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -486,30 +486,89 @@ def argsort(self, ascending=True, kind="quicksort", *args, **kwargs): def argmin(self): """ - Return the minimun argument indexer. + Return the minimum argument indexer. Returns ------- scalar - Minimun argument indexer. + Minimum argument indexer. See Also -------- - Index.max : Return the maximum value of the object. - Series.min : Return the minimum value in a Series. - DataFrame.min : Return the minimum values in a DataFrame. + ExtensionArray.argmax """ + if len(self) == 0: + raise ValueError( + "zero-size array does not support argmin" + ) + return self.argsort()[0] def min(self): + """ + Return the minimum value. + + Returns + ------- + scalar + Minimum value. + + See Also + -------- + ExtensionArray.max : Return the maximum value of the object. + Series.min : Return the minimum value in a Series. + DataFrame.min : Return the minimum values in a DataFrame. + """ + if len(self) == 0: + raise ValueError( + "zero-size array does not support min" + ) + min_idx = self.argmin() return self[min_idx] def argmax(self): + """ + Return the maximum argument indexer. + + Returns + ------- + scalar + Maximum argument indexer. + + See Also + -------- + ExtensionArray.argmin + """ + + if len(self) == 0: + raise ValueError( + "zero-size array does not support argmax" + ) + no_nan = self.isna().sum() return self.argsort()[-1 - no_nan] def max(self): + """ + Return the maximum value. + + Returns + ------- + scalar + Maximum value. + + See Also + -------- + ExtensionArray.min : Return the minimum value of the object. + Series.max : Return the maximum value in a Series. + DataFrame.max : Return the maximum values in a DataFrame. + """ + if len(self) == 0: + raise ValueError( + "zero-size array does not support max" + ) + max_idx = self.argmax() return self[max_idx] From 41e8ce4443c3c0cbe40d5930ec597d72cd564ea7 Mon Sep 17 00:00:00 2001 From: makbigc Date: Thu, 5 Dec 2019 00:38:57 +0800 Subject: [PATCH 08/20] Add min and max to StringArray --- pandas/core/arrays/string_.py | 46 +++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index f6af05ab4d9e7..ac36513eae501 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -252,6 +252,52 @@ def value_counts(self, dropna=False): return value_counts(self._ndarray, dropna=dropna) + def min(self): + """ + Return the minimum value. + + Returns + ------- + scalar + Minimum value. + + See Also + -------- + ExtensionArray.max : Return the maximum value of the object. + Series.min : Return the minimum value in a Series. + DataFrame.min : Return the minimum values in a DataFrame. + """ + if len(self) == 0: + raise ValueError( + "zero-size array does not support min" + ) + + min_idx = self.argmin() + return self[min_idx] + + def max(self): + """ + Return the maximum value. + + Returns + ------- + scalar + Maximum value. + + See Also + -------- + ExtensionArray.min : Return the minimum value of the object. + Series.max : Return the maximum value in a Series. + DataFrame.max : Return the maximum values in a DataFrame. + """ + if len(self) == 0: + raise ValueError( + "zero-size array does not support max" + ) + + max_idx = self.argmax() + return self[max_idx] + # Overrride parent because we have different return types. @classmethod def _create_arithmetic_method(cls, op): From 20ca0a2afb5a04360989146fcd6fbb47f859887b Mon Sep 17 00:00:00 2001 From: makbigc Date: Thu, 5 Dec 2019 00:39:31 +0800 Subject: [PATCH 09/20] Add test for empty array --- pandas/tests/extension/base/methods.py | 12 ++++++++++++ pandas/tests/extension/test_datetime.py | 15 +++++++++++++++ pandas/tests/extension/test_numpy.py | 14 ++++++++++++++ pandas/tests/extension/test_period.py | 14 ++++++++++++++ 4 files changed, 55 insertions(+) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 969372a113441..294bf23ae0639 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -83,6 +83,18 @@ def test_min(self, data_missing_for_sorting): expected = data_missing_for_sorting[2] assert result == expected + @pytest.mark.parametrize( + "method", + ["argmax", "max", "argmin", "min"],) + def test_extremize_empty_array(self, method, data_missing_for_sorting): + # GH 24382 + err_msg = ("zero-size array") + #empty = data_missing_for_sorting[:0] + #pytest.set_trace() + #empty.max() + with pytest.raises(ValueError, match=err_msg): + getattr(data_missing_for_sorting[:0], method)() + @pytest.mark.parametrize( "na_position, expected", [ diff --git a/pandas/tests/extension/test_datetime.py b/pandas/tests/extension/test_datetime.py index a60607d586ada..b5eb00e952dc7 100644 --- a/pandas/tests/extension/test_datetime.py +++ b/pandas/tests/extension/test_datetime.py @@ -98,6 +98,21 @@ def test_combine_add(self, data_repeated): # Timestamp.__add__(Timestamp) not defined pass + @pytest.mark.parametrize( + "method", + ["argmax", "max", "argmin", "min"],) + def test_extremize_empty_array(self, method, data): + # GH 24382 + err_msg = ("zero-size array does not support") + empty_arr = data[:0] + if method in ("max", "min"): + result = getattr(empty_arr, method)() + assert result is pd.NaT + else: + with pytest.raises(ValueError, match=err_msg): + getattr(empty_arr, method)() + + class TestInterface(BaseDatetimeTests, base.BaseInterfaceTests): def test_array_interface(self, data): diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 7c1261d87160e..13769f00c41d8 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -260,6 +260,20 @@ def test_min(self): expected = data[2] assert result == expected + @pytest.mark.parametrize( + "method", + ["argmax", "max", "argmin", "min"],) + def test_extremize_empty_array(self, method, data): + # GH 24382 + err_msg = ("zero-size array does not support") + empty_arr = data[:0] + if method in ("max", "min"): + result = getattr(empty_arr, method)() + assert np.isnan(result) + else: + with pytest.raises(ValueError): + getattr(empty_arr, method)() + @skip_nested class TestArithmetics(BaseNumPyTests, base.BaseArithmeticOpsTests): diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index c439b8b5ed319..706764957c754 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -75,6 +75,20 @@ def test_combine_add(self, data_repeated): # Period + Period is not defined. pass + @pytest.mark.parametrize( + "method", + ["argmax", "max", "argmin", "min"],) + def test_extremize_empty_array(self, method, data): + # GH 24382 + err_msg = ("zero-size array does not support") + empty_arr = data[:0] + if method in ("max", "min"): + result = getattr(empty_arr, method)() + assert result is pd.NaT + else: + with pytest.raises(ValueError, match=err_msg): + getattr(empty_arr, method)() + class TestInterface(BasePeriodTests, base.BaseInterfaceTests): From 1aa742278ca239503b398fd4d239d135c38f8934 Mon Sep 17 00:00:00 2001 From: makbigc Date: Thu, 5 Dec 2019 09:23:39 +0800 Subject: [PATCH 10/20] Resolve black format --- pandas/core/arrays/base.py | 16 ++++------------ pandas/core/arrays/string_.py | 8 ++------ pandas/tests/extension/base/methods.py | 12 ++++++------ pandas/tests/extension/test_datetime.py | 7 +++---- pandas/tests/extension/test_numpy.py | 6 +++--- pandas/tests/extension/test_period.py | 6 +++--- 6 files changed, 21 insertions(+), 34 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 67c1f92539bdb..a1bc54593c2ea 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -498,9 +498,7 @@ def argmin(self): ExtensionArray.argmax """ if len(self) == 0: - raise ValueError( - "zero-size array does not support argmin" - ) + raise ValueError("zero-size array does not support argmin") return self.argsort()[0] @@ -520,9 +518,7 @@ def min(self): DataFrame.min : Return the minimum values in a DataFrame. """ if len(self) == 0: - raise ValueError( - "zero-size array does not support min" - ) + raise ValueError("zero-size array does not support min") min_idx = self.argmin() return self[min_idx] @@ -542,9 +538,7 @@ def argmax(self): """ if len(self) == 0: - raise ValueError( - "zero-size array does not support argmax" - ) + raise ValueError("zero-size array does not support argmax") no_nan = self.isna().sum() return self.argsort()[-1 - no_nan] @@ -565,9 +559,7 @@ def max(self): DataFrame.max : Return the maximum values in a DataFrame. """ if len(self) == 0: - raise ValueError( - "zero-size array does not support max" - ) + raise ValueError("zero-size array does not support max") max_idx = self.argmax() return self[max_idx] diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index ac36513eae501..b047f35efd69e 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -268,9 +268,7 @@ def min(self): DataFrame.min : Return the minimum values in a DataFrame. """ if len(self) == 0: - raise ValueError( - "zero-size array does not support min" - ) + raise ValueError("zero-size array does not support min") min_idx = self.argmin() return self[min_idx] @@ -291,9 +289,7 @@ def max(self): DataFrame.max : Return the maximum values in a DataFrame. """ if len(self) == 0: - raise ValueError( - "zero-size array does not support max" - ) + raise ValueError("zero-size array does not support max") max_idx = self.argmax() return self[max_idx] diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 294bf23ae0639..55433248a9a8e 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -84,14 +84,14 @@ def test_min(self, data_missing_for_sorting): assert result == expected @pytest.mark.parametrize( - "method", - ["argmax", "max", "argmin", "min"],) + "method", ["argmax", "max", "argmin", "min"], + ) def test_extremize_empty_array(self, method, data_missing_for_sorting): # GH 24382 - err_msg = ("zero-size array") - #empty = data_missing_for_sorting[:0] - #pytest.set_trace() - #empty.max() + err_msg = "zero-size array" + # empty = data_missing_for_sorting[:0] + # pytest.set_trace() + # empty.max() with pytest.raises(ValueError, match=err_msg): getattr(data_missing_for_sorting[:0], method)() diff --git a/pandas/tests/extension/test_datetime.py b/pandas/tests/extension/test_datetime.py index b5eb00e952dc7..2fb234380dd2a 100644 --- a/pandas/tests/extension/test_datetime.py +++ b/pandas/tests/extension/test_datetime.py @@ -99,11 +99,11 @@ def test_combine_add(self, data_repeated): pass @pytest.mark.parametrize( - "method", - ["argmax", "max", "argmin", "min"],) + "method", ["argmax", "max", "argmin", "min"], + ) def test_extremize_empty_array(self, method, data): # GH 24382 - err_msg = ("zero-size array does not support") + err_msg = "zero-size array does not support" empty_arr = data[:0] if method in ("max", "min"): result = getattr(empty_arr, method)() @@ -113,7 +113,6 @@ def test_extremize_empty_array(self, method, data): getattr(empty_arr, method)() - class TestInterface(BaseDatetimeTests, base.BaseInterfaceTests): def test_array_interface(self, data): if data.tz: diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 13769f00c41d8..63752e89244f1 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -261,11 +261,11 @@ def test_min(self): assert result == expected @pytest.mark.parametrize( - "method", - ["argmax", "max", "argmin", "min"],) + "method", ["argmax", "max", "argmin", "min"], + ) def test_extremize_empty_array(self, method, data): # GH 24382 - err_msg = ("zero-size array does not support") + err_msg = "zero-size array does not support" empty_arr = data[:0] if method in ("max", "min"): result = getattr(empty_arr, method)() diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index 706764957c754..02269d706de0a 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -76,11 +76,11 @@ def test_combine_add(self, data_repeated): pass @pytest.mark.parametrize( - "method", - ["argmax", "max", "argmin", "min"],) + "method", ["argmax", "max", "argmin", "min"], + ) def test_extremize_empty_array(self, method, data): # GH 24382 - err_msg = ("zero-size array does not support") + err_msg = "zero-size array does not support" empty_arr = data[:0] if method in ("max", "min"): result = getattr(empty_arr, method)() From 7d81cc5b0591a767f79cf22629d86db6775846fb Mon Sep 17 00:00:00 2001 From: makbigc Date: Sat, 18 Jan 2020 12:32:11 +0800 Subject: [PATCH 11/20] Fix test --- pandas/tests/extension/base/methods.py | 3 --- pandas/tests/extension/test_categorical.py | 9 +++++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index e238e87972890..02fc59ba3edc3 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -89,9 +89,6 @@ def test_min(self, data_missing_for_sorting): def test_extremize_empty_array(self, method, data_missing_for_sorting): # GH 24382 err_msg = "zero-size array" - # empty = data_missing_for_sorting[:0] - # pytest.set_trace() - # empty.max() with pytest.raises(ValueError, match=err_msg): getattr(data_missing_for_sorting[:0], method)() diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 336b23e54d74c..b893d75418f96 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -192,6 +192,15 @@ def test_searchsorted(self, data_for_sorting): if not data_for_sorting.ordered: raise pytest.skip(reason="searchsorted requires ordered data.") + @pytest.mark.parametrize( + "method", ["argmax", "argmin"], + ) + def test_extremize_empty_array(self, method, data_missing_for_sorting): + # GH 24382 + err_msg = "zero-size array" + with pytest.raises(ValueError, match=err_msg): + getattr(data_missing_for_sorting[:0], method)() + class TestCasting(base.BaseCastingTests): @pytest.mark.parametrize("cls", [Categorical, CategoricalIndex]) From d18c8bbafea1c812341d776dde9a97ee9bab1fba Mon Sep 17 00:00:00 2001 From: makbigc Date: Sat, 18 Jan 2020 12:32:23 +0800 Subject: [PATCH 12/20] Fix lint error --- pandas/tests/extension/test_numpy.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 8739e80818545..e67e302132d2c 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -267,7 +267,6 @@ def test_min(self): ) def test_extremize_empty_array(self, method, data): # GH 24382 - err_msg = "zero-size array does not support" empty_arr = data[:0] if method in ("max", "min"): result = getattr(empty_arr, method)() From 3530a6a0b18561a038f03394e857c068a8439125 Mon Sep 17 00:00:00 2001 From: makbigc Date: Sun, 19 Jan 2020 13:57:54 +0800 Subject: [PATCH 13/20] Change the error message --- pandas/core/arrays/base.py | 8 ++++---- pandas/core/arrays/string_.py | 4 ++-- pandas/tests/extension/base/methods.py | 2 +- pandas/tests/extension/test_categorical.py | 2 +- pandas/tests/extension/test_datetime.py | 2 +- pandas/tests/extension/test_period.py | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 4c9a68afd52d7..79203b8716575 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -530,7 +530,7 @@ def argmin(self): ExtensionArray.argmax """ if len(self) == 0: - raise ValueError("zero-size array does not support argmin") + raise ValueError("attempt to get argmin of an empty sequence") return self.argsort()[0] @@ -550,7 +550,7 @@ def min(self): DataFrame.min : Return the minimum values in a DataFrame. """ if len(self) == 0: - raise ValueError("zero-size array does not support min") + raise ValueError("attempt to get min of an empty sequence") min_idx = self.argmin() return self[min_idx] @@ -570,7 +570,7 @@ def argmax(self): """ if len(self) == 0: - raise ValueError("zero-size array does not support argmax") + raise ValueError("attempt to get argmax of an empty sequence") no_nan = self.isna().sum() return self.argsort()[-1 - no_nan] @@ -591,7 +591,7 @@ def max(self): DataFrame.max : Return the maximum values in a DataFrame. """ if len(self) == 0: - raise ValueError("zero-size array does not support max") + raise ValueError("attempt to get max of an empty sequence") max_idx = self.argmax() return self[max_idx] diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 2f0a3330c737f..ff9184de0c415 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -281,7 +281,7 @@ def min(self): DataFrame.min : Return the minimum values in a DataFrame. """ if len(self) == 0: - raise ValueError("zero-size array does not support min") + raise ValueError("attempt to get min of an empty sequence") min_idx = self.argmin() return self[min_idx] @@ -302,7 +302,7 @@ def max(self): DataFrame.max : Return the maximum values in a DataFrame. """ if len(self) == 0: - raise ValueError("zero-size array does not support max") + raise ValueError("attempt to get max of an empty sequence") max_idx = self.argmax() return self[max_idx] diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 02fc59ba3edc3..4ad7927c5b641 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -88,7 +88,7 @@ def test_min(self, data_missing_for_sorting): ) def test_extremize_empty_array(self, method, data_missing_for_sorting): # GH 24382 - err_msg = "zero-size array" + err_msg = "attempt to get" with pytest.raises(ValueError, match=err_msg): getattr(data_missing_for_sorting[:0], method)() diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index b893d75418f96..9aadc72c79d51 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -197,7 +197,7 @@ def test_searchsorted(self, data_for_sorting): ) def test_extremize_empty_array(self, method, data_missing_for_sorting): # GH 24382 - err_msg = "zero-size array" + err_msg = "attempt to get" with pytest.raises(ValueError, match=err_msg): getattr(data_missing_for_sorting[:0], method)() diff --git a/pandas/tests/extension/test_datetime.py b/pandas/tests/extension/test_datetime.py index 2fb234380dd2a..edb031db8c69e 100644 --- a/pandas/tests/extension/test_datetime.py +++ b/pandas/tests/extension/test_datetime.py @@ -103,7 +103,7 @@ def test_combine_add(self, data_repeated): ) def test_extremize_empty_array(self, method, data): # GH 24382 - err_msg = "zero-size array does not support" + err_msg = "attempt to get" empty_arr = data[:0] if method in ("max", "min"): result = getattr(empty_arr, method)() diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index 02269d706de0a..ff8f48d5d0b1b 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -80,7 +80,7 @@ def test_combine_add(self, data_repeated): ) def test_extremize_empty_array(self, method, data): # GH 24382 - err_msg = "zero-size array does not support" + err_msg = "attempt to get" empty_arr = data[:0] if method in ("max", "min"): result = getattr(empty_arr, method)() From 8d8506adb1c9bac6a99c666e92e82550f3fd3a7e Mon Sep 17 00:00:00 2001 From: makbigc Date: Sun, 19 Jan 2020 14:01:24 +0800 Subject: [PATCH 14/20] Move the whatsnew entry from v1 to v1.1 --- doc/source/whatsnew/v1.0.0.rst | 1 - doc/source/whatsnew/v1.1.0.rst | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index ec62d5e5ca637..fa562838c8f7c 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -228,7 +228,6 @@ Other enhancements - Added an experimental :attr:`~DataFrame.attrs` for storing global metadata about a dataset (:issue:`29062`) - :meth:`Timestamp.fromisocalendar` is now compatible with python 3.8 and above (:issue:`28115`) - :meth:`DataFrame.to_pickle` and :func:`read_pickle` now accept URL (:issue:`30163`) -- Add :meth:`ExtensionArray.argmax`, :meth:`ExtensionArray.max`, :meth:`ExtensionArray.argmin` and :meth:`ExtensionArray.min` (:issue:`24382`) Build Changes diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 8133e54c934ad..be28d76483c44 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -18,7 +18,7 @@ Enhancements Other enhancements ^^^^^^^^^^^^^^^^^^ -- +- Add :meth:`ExtensionArray.argmax`, :meth:`ExtensionArray.max`, :meth:`ExtensionArray.argmin` and :meth:`ExtensionArray.min` (:issue:`24382`) - From 2036b25c71d899e32cb975f394277abeef3791f8 Mon Sep 17 00:00:00 2001 From: makbigc Date: Fri, 14 Feb 2020 18:19:40 +0800 Subject: [PATCH 15/20] Refactor max and min --- pandas/core/arrays/base.py | 6 ++++++ pandas/core/arrays/string_.py | 40 ++--------------------------------- 2 files changed, 8 insertions(+), 38 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 8b64c05b38d21..88aede830d0e1 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -552,6 +552,9 @@ def min(self): Series.min : Return the minimum value in a Series. DataFrame.min : Return the minimum values in a DataFrame. """ + return self._min() + + def _min(self): if len(self) == 0: raise ValueError("attempt to get min of an empty sequence") @@ -593,6 +596,9 @@ def max(self): Series.max : Return the maximum value in a Series. DataFrame.max : Return the maximum values in a DataFrame. """ + return self._max() + + def _max(self): if len(self) == 0: raise ValueError("attempt to get max of an empty sequence") diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index c9d19b5b09463..4212cd5e594b2 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -282,46 +282,10 @@ def value_counts(self, dropna=False): return value_counts(self._ndarray, dropna=dropna).astype("Int64") def min(self): - """ - Return the minimum value. - - Returns - ------- - scalar - Minimum value. - - See Also - -------- - ExtensionArray.max : Return the maximum value of the object. - Series.min : Return the minimum value in a Series. - DataFrame.min : Return the minimum values in a DataFrame. - """ - if len(self) == 0: - raise ValueError("attempt to get min of an empty sequence") - - min_idx = self.argmin() - return self[min_idx] + return self._min() def max(self): - """ - Return the maximum value. - - Returns - ------- - scalar - Maximum value. - - See Also - -------- - ExtensionArray.min : Return the minimum value of the object. - Series.max : Return the maximum value in a Series. - DataFrame.max : Return the maximum values in a DataFrame. - """ - if len(self) == 0: - raise ValueError("attempt to get max of an empty sequence") - - max_idx = self.argmax() - return self[max_idx] + return self._max() # Overrride parent because we have different return types. @classmethod From 8cd7169fb1c86aff9f9b9b8a6e3ba36ed359072d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 9 May 2020 09:58:28 +0200 Subject: [PATCH 16/20] fixup merge --- doc/source/whatsnew/v1.1.0.rst | 4 ---- 1 file changed, 4 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 61b4ba2bae15c..d21060c7a6f47 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -130,10 +130,7 @@ Other enhancements - :meth:`Styler.highlight_null` now accepts ``subset`` argument (:issue:`31345`) - When writing directly to a sqlite connection :func:`to_sql` now supports the ``multi`` method (:issue:`29921`) - `OptionError` is now exposed in `pandas.errors` (:issue:`27553`) -<<<<<<< HEAD - Add :meth:`ExtensionArray.argmax`, :meth:`ExtensionArray.max`, :meth:`ExtensionArray.argmin` and :meth:`ExtensionArray.min` (:issue:`24382`) -- -======= - :func:`timedelta_range` will now infer a frequency when passed ``start``, ``stop``, and ``periods`` (:issue:`32377`) - Positional slicing on a :class:`IntervalIndex` now supports slices with ``step > 1`` (:issue:`31658`) - :class:`Series.str` now has a `fullmatch` method that matches a regular expression against the entire string in each row of the series, similar to `re.fullmatch` (:issue:`32806`). @@ -154,7 +151,6 @@ Other enhancements such as ``dict`` and ``list``, mirroring the behavior of :meth:`DataFrame.update` (:issue:`33215`) - :meth:`~pandas.core.groupby.GroupBy.transform` and :meth:`~pandas.core.groupby.GroupBy.aggregate` has gained ``engine`` and ``engine_kwargs`` arguments that supports executing functions with ``Numba`` (:issue:`32854`, :issue:`33388`) - :meth:`~pandas.core.resample.Resampler.interpolate` now supports SciPy interpolation method :class:`scipy.interpolate.CubicSpline` as method ``cubicspline`` (:issue:`33670`) ->>>>>>> upstream/master - .. --------------------------------------------------------------------------- From 810ac5653f2d9a32ede8d55b48e508dfbc302797 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 9 May 2020 10:13:07 +0200 Subject: [PATCH 17/20] Remove min/max for now --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/core/arrays/base.py | 48 ---------------------- pandas/core/arrays/string_.py | 6 --- pandas/tests/extension/arrow/arrays.py | 6 --- pandas/tests/extension/base/methods.py | 14 +------ pandas/tests/extension/test_categorical.py | 9 ---- pandas/tests/extension/test_datetime.py | 14 ------- pandas/tests/extension/test_numpy.py | 27 ------------ pandas/tests/extension/test_period.py | 14 ------- 9 files changed, 2 insertions(+), 138 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index d21060c7a6f47..b7fdc81a8a468 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -130,7 +130,7 @@ Other enhancements - :meth:`Styler.highlight_null` now accepts ``subset`` argument (:issue:`31345`) - When writing directly to a sqlite connection :func:`to_sql` now supports the ``multi`` method (:issue:`29921`) - `OptionError` is now exposed in `pandas.errors` (:issue:`27553`) -- Add :meth:`ExtensionArray.argmax`, :meth:`ExtensionArray.max`, :meth:`ExtensionArray.argmin` and :meth:`ExtensionArray.min` (:issue:`24382`) +- Add :meth:`ExtensionArray.argmax` and :meth:`ExtensionArray.argmin` (:issue:`24382`) - :func:`timedelta_range` will now infer a frequency when passed ``start``, ``stop``, and ``periods`` (:issue:`32377`) - Positional slicing on a :class:`IntervalIndex` now supports slices with ``step > 1`` (:issue:`31658`) - :class:`Series.str` now has a `fullmatch` method that matches a regular expression against the entire string in each row of the series, similar to `re.fullmatch` (:issue:`32806`). diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 4ab044a0acd6b..cd43b67eff9d2 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -525,30 +525,6 @@ def argmin(self): return self.argsort()[0] - def min(self): - """ - Return the minimum value. - - Returns - ------- - scalar - Minimum value. - - See Also - -------- - ExtensionArray.max : Return the maximum value of the object. - Series.min : Return the minimum value in a Series. - DataFrame.min : Return the minimum values in a DataFrame. - """ - return self._min() - - def _min(self): - if len(self) == 0: - raise ValueError("attempt to get min of an empty sequence") - - min_idx = self.argmin() - return self[min_idx] - def argmax(self): """ Return the maximum argument indexer. @@ -569,30 +545,6 @@ def argmax(self): no_nan = self.isna().sum() return self.argsort()[-1 - no_nan] - def max(self): - """ - Return the maximum value. - - Returns - ------- - scalar - Maximum value. - - See Also - -------- - ExtensionArray.min : Return the minimum value of the object. - Series.max : Return the maximum value in a Series. - DataFrame.max : Return the maximum values in a DataFrame. - """ - return self._max() - - def _max(self): - if len(self) == 0: - raise ValueError("attempt to get max of an empty sequence") - - max_idx = self.argmax() - return self[max_idx] - def fillna(self, value=None, method=None, limit=None): """ Fill NA/NaN values using the specified method. diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 81a5ea8839007..537b1cf3dd439 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -292,12 +292,6 @@ def value_counts(self, dropna=False): return value_counts(self._ndarray, dropna=dropna).astype("Int64") - def min(self): - return self._min() - - def max(self): - return self._max() - def memory_usage(self, deep=False): result = self._ndarray.nbytes if deep: diff --git a/pandas/tests/extension/arrow/arrays.py b/pandas/tests/extension/arrow/arrays.py index 5e9f3fa111ad9..e37c61761f859 100644 --- a/pandas/tests/extension/arrow/arrays.py +++ b/pandas/tests/extension/arrow/arrays.py @@ -161,15 +161,9 @@ def all(self, axis=0, out=None): def argmin(self): raise NotImplementedError - def min(self): - raise NotImplementedError - def argmax(self): raise NotImplementedError - def max(self): - raise NotImplementedError - class ArrowBoolArray(ArrowExtensionArray): def __init__(self, values): diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index a06196e593888..221b0debc8907 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -81,26 +81,14 @@ def test_argmax(self, data_missing_for_sorting): expected = 0 assert result == expected - def test_max(self, data_missing_for_sorting): - # GH 24382 - result = data_missing_for_sorting.max() - expected = data_missing_for_sorting[0] - assert result == expected - def test_argmin(self, data_missing_for_sorting): # GH 24382 result = data_missing_for_sorting.argmin() expected = 2 assert result == expected - def test_min(self, data_missing_for_sorting): - # GH 24382 - result = data_missing_for_sorting.min() - expected = data_missing_for_sorting[2] - assert result == expected - @pytest.mark.parametrize( - "method", ["argmax", "max", "argmin", "min"], + "method", ["argmax", "argmin"], ) def test_extremize_empty_array(self, method, data_missing_for_sorting): # GH 24382 diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index c2600f6fe9654..d1211e477fe3e 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -153,15 +153,6 @@ def test_searchsorted(self, data_for_sorting): if not data_for_sorting.ordered: raise pytest.skip(reason="searchsorted requires ordered data.") - @pytest.mark.parametrize( - "method", ["argmax", "argmin"], - ) - def test_extremize_empty_array(self, method, data_missing_for_sorting): - # GH 24382 - err_msg = "attempt to get" - with pytest.raises(ValueError, match=err_msg): - getattr(data_missing_for_sorting[:0], method)() - class TestCasting(base.BaseCastingTests): @pytest.mark.parametrize("cls", [Categorical, CategoricalIndex]) diff --git a/pandas/tests/extension/test_datetime.py b/pandas/tests/extension/test_datetime.py index d7e6dd099b931..e026809f7e611 100644 --- a/pandas/tests/extension/test_datetime.py +++ b/pandas/tests/extension/test_datetime.py @@ -98,20 +98,6 @@ def test_combine_add(self, data_repeated): # Timestamp.__add__(Timestamp) not defined pass - @pytest.mark.parametrize( - "method", ["argmax", "max", "argmin", "min"], - ) - def test_extremize_empty_array(self, method, data): - # GH 24382 - err_msg = "attempt to get" - empty_arr = data[:0] - if method in ("max", "min"): - result = getattr(empty_arr, method)() - assert result is pd.NaT - else: - with pytest.raises(ValueError, match=err_msg): - getattr(empty_arr, method)() - class TestInterface(BaseDatetimeTests, base.BaseInterfaceTests): def test_array_interface(self, data): diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index d6d9aaaeadf8c..e48065b47f17c 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -272,33 +272,6 @@ def test_repeat(self, data, repeats, as_series, use_numpy): # Fails creating expected super().test_repeat(data, repeats, as_series, use_numpy) - def test_max(self): - # GH 24382 - data = PandasArray(np.array([1, np.nan, 0])) - result = data.max() - expected = data[0] - assert result == expected - - def test_min(self): - # GH 24382 - data = PandasArray(np.array([1, np.nan, 0])) - result = data.min() - expected = data[2] - assert result == expected - - @pytest.mark.parametrize( - "method", ["argmax", "max", "argmin", "min"], - ) - def test_extremize_empty_array(self, method, data): - # GH 24382 - empty_arr = data[:0] - if method in ("max", "min"): - result = getattr(empty_arr, method)() - assert np.isnan(result) - else: - with pytest.raises(ValueError): - getattr(empty_arr, method)() - @pytest.mark.xfail(reason="PandasArray.diff may fail on dtype") def test_diff(self, data, periods): return super().test_diff(data, periods) diff --git a/pandas/tests/extension/test_period.py b/pandas/tests/extension/test_period.py index fb4b2ab312145..b1eb276bfc227 100644 --- a/pandas/tests/extension/test_period.py +++ b/pandas/tests/extension/test_period.py @@ -75,20 +75,6 @@ def test_combine_add(self, data_repeated): # Period + Period is not defined. pass - @pytest.mark.parametrize( - "method", ["argmax", "max", "argmin", "min"], - ) - def test_extremize_empty_array(self, method, data): - # GH 24382 - err_msg = "attempt to get" - empty_arr = data[:0] - if method in ("max", "min"): - result = getattr(empty_arr, method)() - assert result is pd.NaT - else: - with pytest.raises(ValueError, match=err_msg): - getattr(empty_arr, method)() - class TestInterface(BasePeriodTests, base.BaseInterfaceTests): From 6b030aad3f67049fc251b91ecf34922dfaeb79c0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 9 May 2020 11:32:54 +0200 Subject: [PATCH 18/20] argmin/argmax implementation based on _values_for_argsort --- pandas/core/arrays/base.py | 14 +++------- pandas/core/sorting.py | 27 +++++++++++++++++++ pandas/tests/extension/base/methods.py | 37 ++++++++++++++++++-------- pandas/tests/extension/test_boolean.py | 17 ++++++++++++ pandas/tests/extension/test_sparse.py | 13 +++++++++ 5 files changed, 86 insertions(+), 22 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index cd43b67eff9d2..90c7f53178389 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -28,7 +28,7 @@ from pandas.core import ops from pandas.core.algorithms import _factorize_array, unique from pandas.core.missing import backfill_1d, pad_1d -from pandas.core.sorting import nargsort +from pandas.core.sorting import nargminmax, nargsort _extension_array_shared_docs: Dict[str, str] = dict() @@ -520,10 +520,7 @@ def argmin(self): -------- ExtensionArray.argmax """ - if len(self) == 0: - raise ValueError("attempt to get argmin of an empty sequence") - - return self.argsort()[0] + return nargminmax(self, "argmin") def argmax(self): """ @@ -538,12 +535,7 @@ def argmax(self): -------- ExtensionArray.argmin """ - - if len(self) == 0: - raise ValueError("attempt to get argmax of an empty sequence") - - no_nan = self.isna().sum() - return self.argsort()[-1 - no_nan] + return nargminmax(self, "argmax") def fillna(self, value=None, method=None, limit=None): """ diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 69d55978724af..994bdd312a9e6 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -319,6 +319,33 @@ def nargsort( return indexer +def nargminmax(values, method: str): + """ + Implementation of np.argmin/argmax but for ExtensionArray and which + handles missing values. + + Parameters + ---------- + values : ExtensionArray + method : {"argmax", "argmin"} + + Returns + ------- + int + """ + assert method in {"argmax", "argmin"} + func = np.argmax if method == "argmax" else np.argmin + + mask = np.asarray(isna(values)) + values = values._values_for_argsort() + + idx = np.arange(len(values)) + non_nans = values[~mask] + non_nan_idx = idx[~mask] + + return non_nan_idx[func(non_nans)] + + def ensure_key_mapped_multiindex(index, key: Callable, level=None): """ Returns a new MultiIndex in which key has been applied diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 221b0debc8907..4b91b30ecbd15 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -75,26 +75,41 @@ def test_argsort_missing(self, data_missing_for_sorting): expected = pd.Series(np.array([1, -1, 0], dtype=np.int64)) self.assert_series_equal(result, expected) - def test_argmax(self, data_missing_for_sorting): + def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting, na_value): # GH 24382 - result = data_missing_for_sorting.argmax() - expected = 0 - assert result == expected - def test_argmin(self, data_missing_for_sorting): - # GH 24382 - result = data_missing_for_sorting.argmin() - expected = 2 - assert result == expected + # data_for_sorting -> [B, C, A] with A < B < C + assert data_for_sorting.argmax() == 1 + assert data_for_sorting.argmin() == 2 + + # with repeated values -> first occurence + data = data_for_sorting.take([2, 0, 0, 1, 1, 2]) + assert data.argmax() == 3 + assert data.argmin() == 0 + + # with missing values + # data_missing_for_sorting -> [B, NA, A] with A < B and NA missing. + assert data_missing_for_sorting.argmax() == 0 + assert data_missing_for_sorting.argmin() == 2 @pytest.mark.parametrize( "method", ["argmax", "argmin"], ) - def test_extremize_empty_array(self, method, data_missing_for_sorting): + def test_argmin_argmax_empty_array(self, method, data): # GH 24382 err_msg = "attempt to get" with pytest.raises(ValueError, match=err_msg): - getattr(data_missing_for_sorting[:0], method)() + getattr(data[:0], method)() + + @pytest.mark.parametrize( + "method", ["argmax", "argmin"], + ) + def test_argmin_argmax_all_na(self, method, data, na_value): + # all missing with skipna=True is the same as emtpy + err_msg = "attempt to get" + data_na = type(data)._from_sequence([na_value, na_value], dtype=data.dtype) + with pytest.raises(ValueError, match=err_msg): + getattr(data_na, method)() @pytest.mark.parametrize( "na_position, expected", diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py index 0b0bbd3a6dc48..95c810eec76fe 100644 --- a/pandas/tests/extension/test_boolean.py +++ b/pandas/tests/extension/test_boolean.py @@ -235,6 +235,23 @@ def test_searchsorted(self, data_for_sorting, as_series): def test_value_counts(self, all_data, dropna): return super().test_value_counts(all_data, dropna) + def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting): + # override because there are only 2 unique values + + # data_for_sorting -> [B, C, A] with A < B < C -> here True, True, False + assert data_for_sorting.argmax() == 0 + assert data_for_sorting.argmin() == 2 + + # with repeated values -> first occurence + data = data_for_sorting.take([2, 0, 0, 1, 1, 2]) + assert data.argmax() == 1 + assert data.argmin() == 0 + + # with missing values + # data_missing_for_sorting -> [B, NA, A] with A < B and NA missing. + assert data_missing_for_sorting.argmax() == 0 + assert data_missing_for_sorting.argmin() == 2 + class TestCasting(base.BaseCastingTests): pass diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 19ac25eb0ccf7..816270a8f082e 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -316,6 +316,19 @@ def test_shift_0_periods(self, data): data._sparse_values[0] = data._sparse_values[1] assert result._sparse_values[0] != result._sparse_values[1] + @pytest.mark.parametrize( + "method", ["argmax", "argmin"], + ) + def test_argmin_argmax_all_na(self, method, data, na_value): + # overriding because Sparse[int64, 0] cannot handle na_value + if data.dtype.fill_value == 0: + pytest.skip("missing values not supported with Sparse[int64, 0]") + + err_msg = "attempt to get" + data_na = type(data)._from_sequence([na_value, na_value], dtype=data.dtype) + with pytest.raises(ValueError, match=err_msg): + getattr(data_na, method)() + class TestCasting(BaseSparseTests, base.BaseCastingTests): def test_astype_object_series(self, all_data): From e5a6d8cd4872351d1bc703906bd1d25de63624b5 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 9 May 2020 11:44:52 +0200 Subject: [PATCH 19/20] test clean-up + update docstring --- pandas/core/arrays/base.py | 16 ++++++++++------ pandas/tests/extension/arrow/arrays.py | 6 ------ pandas/tests/extension/arrow/test_bool.py | 11 ----------- 3 files changed, 10 insertions(+), 23 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 90c7f53178389..4c223df78cd4c 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -509,12 +509,14 @@ def argsort( def argmin(self): """ - Return the minimum argument indexer. + Return the index of minimum value. + + In case of multiple occurrences of the minimum value, the index + corresponding to the first occurrence is returned. Returns ------- - scalar - Minimum argument indexer. + int See Also -------- @@ -524,12 +526,14 @@ def argmin(self): def argmax(self): """ - Return the maximum argument indexer. + Return the index of maximum value. + + In case of multiple occurrences of the maximum value, the index + corresponding to the first occurrence is returned. Returns ------- - scalar - Maximum argument indexer. + int See Also -------- diff --git a/pandas/tests/extension/arrow/arrays.py b/pandas/tests/extension/arrow/arrays.py index e37c61761f859..ffebc9f8b3359 100644 --- a/pandas/tests/extension/arrow/arrays.py +++ b/pandas/tests/extension/arrow/arrays.py @@ -158,12 +158,6 @@ def any(self, axis=0, out=None): def all(self, axis=0, out=None): return self._data.to_pandas().all() - def argmin(self): - raise NotImplementedError - - def argmax(self): - raise NotImplementedError - class ArrowBoolArray(ArrowExtensionArray): def __init__(self, values): diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py index 78c2a0f0ff1b3..681c6f9a19dc5 100644 --- a/pandas/tests/extension/arrow/test_bool.py +++ b/pandas/tests/extension/arrow/test_bool.py @@ -71,17 +71,6 @@ def test_series_constructor_scalar_na_with_index(self, dtype, na_value): class TestReduce(base.BaseNoReduceTests): - @pytest.mark.parametrize("skipna", [True, False]) - def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): - # GH 24382 - op_name = all_numeric_reductions - if op_name in ("max", "min"): - pass - else: - ser = pd.Series(data) - with pytest.raises(TypeError): - getattr(ser, op_name)(skipna=skipna) - def test_reduce_series_boolean(self): pass From 65e1e4c0ac27f31d0310b452083d6483bb447d5e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 9 May 2020 11:48:10 +0200 Subject: [PATCH 20/20] simplify test_sparse override --- pandas/tests/extension/test_sparse.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index aa603a893f740..8a3bc77e0ca24 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -321,13 +321,8 @@ def test_shift_0_periods(self, data): ) def test_argmin_argmax_all_na(self, method, data, na_value): # overriding because Sparse[int64, 0] cannot handle na_value - if data.dtype.fill_value == 0: - pytest.skip("missing values not supported with Sparse[int64, 0]") - - err_msg = "attempt to get" - data_na = type(data)._from_sequence([na_value, na_value], dtype=data.dtype) - with pytest.raises(ValueError, match=err_msg): - getattr(data_na, method)() + self._check_unsupported(data) + super().test_argmin_argmax_all_na(method, data, na_value) @pytest.mark.parametrize("box", [pd.array, pd.Series, pd.DataFrame]) def test_equals(self, data, na_value, as_series, box):