From a3095e3dfb94624eb27dc20ff8a06b8ce444c2c1 Mon Sep 17 00:00:00 2001 From: Mak Sze Chun Date: Mon, 2 Dec 2019 18:37:11 +0800 Subject: [PATCH] API/DEPR: Change default skipna behaviour + deprecate numeric_only in Categorical.min and max (#27929) --- doc/source/whatsnew/v1.0.0.rst | 22 +++++++++ pandas/core/arrays/categorical.py | 38 +++++++-------- pandas/core/series.py | 4 +- .../arrays/categorical/test_analytics.py | 46 ++++++++++++------- pandas/tests/reductions/test_reductions.py | 32 ++++++------- 5 files changed, 85 insertions(+), 57 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 98473241476181..b45bec37e84ebd 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -304,6 +304,26 @@ The following methods now also correctly output values for unobserved categories df.groupby(["cat_1", "cat_2"], observed=False)["value"].count() +By default :meth:`Categorical.min` now returns the minimum instead of np.nan +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When :class:`Categorical` contains ``np.nan``, +:meth:`Categorical.min` no longer return ``np.nan`` by default (skipna=True) (:issue:`25303`) + +*pandas 0.25.x* + +.. code-block:: ipython + + In [1]: pd.Categorical([1, 2, np.nan], ordered=True).min() + Out[1]: nan + + +*pandas 1.0.0* + +.. ipython:: python + + pd.Categorical([1, 2, np.nan], ordered=True).min() + .. _whatsnew_1000.api_breaking.deps: Increased minimum versions for dependencies @@ -410,6 +430,8 @@ Deprecations - :func:`is_extension_type` is deprecated, :func:`is_extension_array_dtype` should be used instead (:issue:`29457`) - :func:`eval` keyword argument "truediv" is deprecated and will be removed in a future version (:issue:`29812`) - :meth:`Categorical.take_nd` is deprecated, use :meth:`Categorical.take` instead (:issue:`27745`) +- The parameter ``numeric_only`` of :meth:`Categorical.min` and :meth:`Categorical.max` is deprecated and replaced with ``skipna`` (:issue:`25303`) +- .. _whatsnew_1000.prior_deprecations: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 83f6051b8423fd..f4a20b808292a2 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2123,7 +2123,8 @@ def _reduce(self, name, axis=0, **kwargs): raise TypeError(f"Categorical cannot perform the operation {name}") return func(**kwargs) - def min(self, numeric_only=None, **kwargs): + @deprecate_kwarg(old_arg_name="numeric_only", new_arg_name="skipna") + def min(self, skipna=True): """ The minimum value of the object. @@ -2139,17 +2140,18 @@ def min(self, numeric_only=None, **kwargs): min : the minimum of this `Categorical` """ self.check_for_ordered("min") - if numeric_only: - good = self._codes != -1 - pointer = self._codes[good].min(**kwargs) - else: - pointer = self._codes.min(**kwargs) - if pointer == -1: - return np.nan + good = self._codes != -1 + if not good.all(): + if skipna: + pointer = self._codes[good].min() + else: + return np.nan else: - return self.categories[pointer] + pointer = self._codes.min() + return self.categories[pointer] - def max(self, numeric_only=None, **kwargs): + @deprecate_kwarg(old_arg_name="numeric_only", new_arg_name="skipna") + def max(self, skipna=True): """ The maximum value of the object. @@ -2165,15 +2167,15 @@ def max(self, numeric_only=None, **kwargs): max : the maximum of this `Categorical` """ self.check_for_ordered("max") - if numeric_only: - good = self._codes != -1 - pointer = self._codes[good].max(**kwargs) - else: - pointer = self._codes.max(**kwargs) - if pointer == -1: - return np.nan + good = self._codes != -1 + if not good.all(): + if skipna: + pointer = self._codes[good].max() + else: + return np.nan else: - return self.categories[pointer] + pointer = self._codes.max() + return self.categories[pointer] def mode(self, dropna=True): """ diff --git a/pandas/core/series.py b/pandas/core/series.py index a8232f137f3ef5..11e87a4eed27f6 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3820,9 +3820,7 @@ def _reduce( self._get_axis_number(axis) if isinstance(delegate, Categorical): - # TODO deprecate numeric_only argument for Categorical and use - # skipna as well, see GH25303 - return delegate._reduce(name, numeric_only=numeric_only, **kwds) + return delegate._reduce(name, skipna=skipna, **kwds) elif isinstance(delegate, ExtensionArray): # dispatch to ExtensionArray interface return delegate._reduce(name, skipna=skipna, **kwds) diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index 279f1492d7dadd..637a47eba05975 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -35,31 +35,43 @@ def test_min_max(self): assert _min == "d" assert _max == "a" + @pytest.mark.parametrize("skipna", [True, False]) + def test_min_max_with_nan(self, skipna): + # GH 25303 cat = Categorical( [np.nan, "b", "c", np.nan], categories=["d", "c", "b", "a"], ordered=True ) - _min = cat.min() - _max = cat.max() - assert np.isnan(_min) - assert _max == "b" + _min = cat.min(skipna=skipna) + _max = cat.max(skipna=skipna) - _min = cat.min(numeric_only=True) - assert _min == "c" - _max = cat.max(numeric_only=True) - assert _max == "b" + if skipna is False: + assert np.isnan(_min) + assert np.isnan(_max) + else: + assert _min == "c" + assert _max == "b" cat = Categorical( [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True ) - _min = cat.min() - _max = cat.max() - assert np.isnan(_min) - assert _max == 1 - - _min = cat.min(numeric_only=True) - assert _min == 2 - _max = cat.max(numeric_only=True) - assert _max == 1 + _min = cat.min(skipna=skipna) + _max = cat.max(skipna=skipna) + + if skipna is False: + assert np.isnan(_min) + assert np.isnan(_max) + else: + assert _min == 2 + assert _max == 1 + + @pytest.mark.parametrize("method", ["min", "max"]) + def test_deprecate_numeric_only_min_max(self, method): + # GH 25303 + cat = Categorical( + [np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True + ) + with tm.assert_produces_warning(expected_warning=FutureWarning): + getattr(cat, method)(numeric_only=True) @pytest.mark.parametrize( "values,categories,exp_mode", diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index b0ef0c58ca65a4..80d148c919ab28 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -1043,7 +1043,7 @@ def test_min_max(self): ) _min = cat.min() _max = cat.max() - assert np.isnan(_min) + assert _min == "c" assert _max == "b" cat = Series( @@ -1053,30 +1053,24 @@ def test_min_max(self): ) _min = cat.min() _max = cat.max() - assert np.isnan(_min) + assert _min == 2 assert _max == 1 - def test_min_max_numeric_only(self): - # TODO deprecate numeric_only argument for Categorical and use - # skipna as well, see GH25303 + @pytest.mark.parametrize("skipna", [True, False]) + def test_min_max_skipna(self, skipna): + # GH 25303 cat = Series( Categorical(["a", "b", np.nan, "a"], categories=["b", "a"], ordered=True) ) + _min = cat.min(skipna=skipna) + _max = cat.max(skipna=skipna) - _min = cat.min() - _max = cat.max() - assert np.isnan(_min) - assert _max == "a" - - _min = cat.min(numeric_only=True) - _max = cat.max(numeric_only=True) - assert _min == "b" - assert _max == "a" - - _min = cat.min(numeric_only=False) - _max = cat.max(numeric_only=False) - assert np.isnan(_min) - assert _max == "a" + if skipna is True: + assert _min == "b" + assert _max == "a" + else: + assert np.isnan(_min) + assert np.isnan(_max) class TestSeriesMode: