DEPR: Enforce deprecation of silent dropping of nuisance columns in a…

…gg_list_like (#49401) * DEPR: Enforce deprecation of silent dropping of nuisance columns in agg_list_like * Remove type-ignore * Fixups * Remove outdated comment
pandas-dev · Nov 2, 2022 · fb9b345 · fb9b345
1 parent cb43a81
commit fb9b345
Show file tree

Hide file tree

Showing 12 changed files with 127 additions and 193 deletions.
diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
@@ -310,7 +310,7 @@ def time_different_python_functions_multicol(self, df):
         df.groupby(["key1", "key2"]).agg([sum, min, max])
 
     def time_different_python_functions_singlecol(self, df):
-        df.groupby("key1").agg([sum, min, max])
+        df.groupby("key1")[["value1", "value2", "value3"]].agg([sum, min, max])
 
 
 class GroupStrings:

diff --git a/doc/source/user_guide/basics.rst b/doc/source/user_guide/basics.rst
@@ -1039,34 +1039,6 @@ not noted for a particular column will be ``NaN``:
 
    tsdf.agg({"A": ["mean", "min"], "B": "sum"})
 
-.. _basics.aggregation.mixed_string:
-
-Mixed dtypes
-++++++++++++
-
-.. deprecated:: 1.4.0
-   Attempting to determine which columns cannot be aggregated and silently dropping them from the results is deprecated and will be removed in a future version. If any porition of the columns or operations provided fail, the call to ``.agg`` will raise.
-
-When presented with mixed dtypes that cannot aggregate, ``.agg`` will only take the valid
-aggregations. This is similar to how ``.groupby.agg`` works.
-
-.. ipython:: python
-
-   mdf = pd.DataFrame(
-       {
-           "A": [1, 2, 3],
-           "B": [1.0, 2.0, 3.0],
-           "C": ["foo", "bar", "baz"],
-           "D": pd.date_range("20130101", periods=3),
-       }
-   )
-   mdf.dtypes
-
-.. ipython:: python
-   :okwarning:
-
-   mdf.agg(["min", "sum"])
-
 .. _basics.aggregation.custom_describe:
 
 Custom describe

diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
@@ -1007,7 +1007,7 @@ functions:
 .. ipython:: python
    :okwarning:
 
-   grouped = df.groupby("A")
+   grouped = df.groupby("A")[["C", "D"]]
    grouped.agg(lambda x: x.std())
 
 But, it's rather verbose and can be untidy if you need to pass additional

diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst
@@ -104,10 +104,13 @@ aggregations. This is similar to how groupby ``.agg()`` works. (:issue:`15015`)
                       'D': pd.date_range('20130101', periods=3)})
    df.dtypes
 
-.. ipython:: python
-   :okwarning:
+.. code-block:: python
 
-   df.agg(['min', 'sum'])
+   In [10]: df.agg(['min', 'sum'])
+   Out[10]:
+        A    B          C          D
+   min  1  1.0        bar 2013-01-01
+   sum  6  6.0  foobarbaz        NaT
 
 .. _whatsnew_0200.enhancements.dataio_dtype:
 

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -302,7 +302,7 @@ Removal of prior version deprecations/changes
 - Changed behavior of :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``; object-dtype columns with all-bool values will no longer be included, manually cast to ``bool`` dtype first (:issue:`46188`)
 - Changed behavior of comparison of a :class:`Timestamp` with a ``datetime.date`` object; these now compare as un-equal and raise on inequality comparisons, matching the ``datetime.datetime`` behavior (:issue:`36131`)
 - Enforced deprecation of silently dropping columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a list or dictionary (:issue:`43740`)
--
+- Change behavior of :meth:`DataFrame.apply` with list-like so that any partial failure will raise an error (:issue:`43740`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_200.performance:

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -4,7 +4,6 @@
 from collections import defaultdict
 from functools import partial
 import inspect
-import re
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -18,7 +17,6 @@
     Sequence,
     cast,
 )
-import warnings
 
 import numpy as np
 
@@ -35,12 +33,8 @@
     NDFrameT,
     npt,
 )
-from pandas.errors import (
-    DataError,
-    SpecificationError,
-)
+from pandas.errors import SpecificationError
 from pandas.util._decorators import cache_readonly
-from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.cast import is_nested_object
 from pandas.core.dtypes.common import (
@@ -317,88 +311,28 @@ def agg_list_like(self) -> DataFrame | Series:
 
         results = []
         keys = []
-        failed_names = []
-
-        depr_nuisance_columns_msg = (
-            "{} did not aggregate successfully. If any error is "
-            "raised this will raise in a future version of pandas. "
-            "Drop these columns/ops to avoid this warning."
-        )
 
         # degenerate case
         if selected_obj.ndim == 1:
             for a in arg:
                 colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj)
-                try:
-                    new_res = colg.aggregate(a)
-
-                except TypeError:
-                    failed_names.append(com.get_callable_name(a) or a)
-                else:
-                    results.append(new_res)
+                new_res = colg.aggregate(a)
+                results.append(new_res)
 
-                    # make sure we find a good name
-                    name = com.get_callable_name(a) or a
-                    keys.append(name)
+                # make sure we find a good name
+                name = com.get_callable_name(a) or a
+                keys.append(name)
 
         # multiples
         else:
             indices = []
             for index, col in enumerate(selected_obj):
                 colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index])
-                try:
-                    # Capture and suppress any warnings emitted by us in the call
-                    # to agg below, but pass through any warnings that were
-                    # generated otherwise.
-                    # This is necessary because of https://bugs.python.org/issue29672
-                    # See GH #43741 for more details
-                    with warnings.catch_warnings(record=True) as record:
-                        new_res = colg.aggregate(arg)
-                    if len(record) > 0:
-                        match = re.compile(depr_nuisance_columns_msg.format(".*"))
-                        for warning in record:
-                            if re.match(match, str(warning.message)):
-                                failed_names.append(col)
-                            else:
-                                warnings.warn_explicit(
-                                    message=warning.message,
-                                    category=warning.category,
-                                    filename=warning.filename,
-                                    lineno=warning.lineno,
-                                )
-
-                except (TypeError, DataError):
-                    failed_names.append(col)
-                except ValueError as err:
-                    # cannot aggregate
-                    if "Must produce aggregated value" in str(err):
-                        # raised directly in _aggregate_named
-                        failed_names.append(col)
-                    elif "no results" in str(err):
-                        # reached in test_frame_apply.test_nuiscance_columns
-                        #  where the colg.aggregate(arg) ends up going through
-                        #  the selected_obj.ndim == 1 branch above with arg == ["sum"]
-                        #  on a datetime64[ns] column
-                        failed_names.append(col)
-                    else:
-                        raise
-                else:
-                    results.append(new_res)
-                    indices.append(index)
-
+                new_res = colg.aggregate(arg)
+                results.append(new_res)
+                indices.append(index)
             keys = selected_obj.columns.take(indices)
 
-        # if we are empty
-        if not len(results):
-            raise ValueError("no results")
-
-        if len(failed_names) > 0:
-            warnings.warn(
-                depr_nuisance_columns_msg.format(failed_names),
-                FutureWarning,
-                stacklevel=find_stack_level(),
-            )
-
         try:
             concatenated = concat(results, keys=keys, axis=1, sort=False)
         except TypeError as err:
@@ -479,8 +413,6 @@ def agg_dict_like(self) -> DataFrame | Series:
                 keys_to_use = ktu
 
             axis: AxisInt = 0 if isinstance(obj, ABCSeries) else 1
-            # error: Key expression in dictionary comprehension has incompatible type
-            # "Hashable"; expected type "NDFrame"  [misc]
             result = concat(
                 {k: results[k] for k in keys_to_use},  # type: ignore[misc]
                 axis=axis,

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -1138,8 +1138,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
                     result = gba.agg()
 
                 except ValueError as err:
-                    if "no results" not in str(err):
-                        # raised directly by _aggregate_multiple_funcs
+                    if "No objects to concatenate" not in str(err):
                         raise
                     result = self._aggregate_frame(func)
 

diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
@@ -1141,44 +1141,55 @@ def test_agg_with_name_as_column_name():
     tm.assert_series_equal(result, expected)
 
 
-def test_agg_multiple_mixed_no_warning():
+def test_agg_multiple_mixed():
     # GH 20909
     mdf = DataFrame(
         {
             "A": [1, 2, 3],
             "B": [1.0, 2.0, 3.0],
             "C": ["foo", "bar", "baz"],
-            "D": date_range("20130101", periods=3),
         }
     )
     expected = DataFrame(
         {
             "A": [1, 6],
             "B": [1.0, 6.0],
             "C": ["bar", "foobarbaz"],
-            "D": [Timestamp("2013-01-01"), pd.NaT],
         },
         index=["min", "sum"],
     )
     # sorted index
-    with tm.assert_produces_warning(
-        FutureWarning, match=r"\['D'\] did not aggregate successfully"
-    ):
-        result = mdf.agg(["min", "sum"])
-
+    result = mdf.agg(["min", "sum"])
     tm.assert_frame_equal(result, expected)
 
-    with tm.assert_produces_warning(
-        FutureWarning, match=r"\['D'\] did not aggregate successfully"
-    ):
-        result = mdf[["D", "C", "B", "A"]].agg(["sum", "min"])
-
+    result = mdf[["C", "B", "A"]].agg(["sum", "min"])
     # GH40420: the result of .agg should have an index that is sorted
     # according to the arguments provided to agg.
-    expected = expected[["D", "C", "B", "A"]].reindex(["sum", "min"])
+    expected = expected[["C", "B", "A"]].reindex(["sum", "min"])
     tm.assert_frame_equal(result, expected)
 
 
+def test_agg_multiple_mixed_raises():
+    # GH 20909
+    mdf = DataFrame(
+        {
+            "A": [1, 2, 3],
+            "B": [1.0, 2.0, 3.0],
+            "C": ["foo", "bar", "baz"],
+            "D": date_range("20130101", periods=3),
+        }
+    )
+
+    # sorted index
+    # TODO: GH#49399 will fix error message
+    msg = "DataFrame constructor called with"
+    with pytest.raises(TypeError, match=msg):
+        mdf.agg(["min", "sum"])
+
+    with pytest.raises(TypeError, match=msg):
+        mdf[["D", "C", "B", "A"]].agg(["sum", "min"])
+
+
 def test_agg_reduce(axis, float_frame):
     other_axis = 1 if axis in {0, "index"} else 0
     name1, name2 = float_frame.axes[other_axis].unique()[:2].sort_values()
@@ -1277,14 +1288,10 @@ def test_nuiscance_columns():
     expected = Series([6, 6.0, "foobarbaz"], index=["A", "B", "C"])
     tm.assert_series_equal(result, expected)
 
-    with tm.assert_produces_warning(
-        FutureWarning, match=r"\['D'\] did not aggregate successfully"
-    ):
-        result = df.agg(["sum"])
-    expected = DataFrame(
-        [[6, 6.0, "foobarbaz"]], index=["sum"], columns=["A", "B", "C"]
-    )
-    tm.assert_frame_equal(result, expected)
+    # TODO: GH#49399 will fix error message
+    msg = "DataFrame constructor called with"
+    with pytest.raises(TypeError, match=msg):
+        df.agg(["sum"])
 
 
 @pytest.mark.parametrize("how", ["agg", "apply"])
@@ -1499,27 +1506,23 @@ def test_aggregation_func_column_order():
     # according to the arguments provided to agg.
     df = DataFrame(
         [
-            ("1", 1, 0, 0),
-            ("2", 2, 0, 0),
-            ("3", 3, 0, 0),
-            ("4", 4, 5, 4),
-            ("5", 5, 6, 6),
-            ("6", 6, 7, 7),
+            (1, 0, 0),
+            (2, 0, 0),
+            (3, 0, 0),
+            (4, 5, 4),
+            (5, 6, 6),
+            (6, 7, 7),
         ],
-        columns=("item", "att1", "att2", "att3"),
+        columns=("att1", "att2", "att3"),
     )
 
     def foo(s):
         return s.sum() / 2
 
     aggs = ["sum", foo, "count", "min"]
-    with tm.assert_produces_warning(
-        FutureWarning, match=r"\['item'\] did not aggregate successfully"
-    ):
-        result = df.agg(aggs)
+    result = df.agg(aggs)
     expected = DataFrame(
         {
-            "item": ["123456", np.nan, 6, "1"],
             "att1": [21.0, 10.5, 6.0, 1.0],
             "att2": [18.0, 9.0, 6.0, 0.0],
             "att3": [17.0, 8.5, 6.0, 0.0],

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -383,21 +383,18 @@ def test_agg_multiple_functions_same_name_with_ohlc_present():
 
 def test_multiple_functions_tuples_and_non_tuples(df):
     # #1359
+    # Columns B and C would cause partial failure
+    df = df.drop(columns=["B", "C"])
+
     funcs = [("foo", "mean"), "std"]
     ex_funcs = [("foo", "mean"), ("std", "std")]
 
-    result = df.groupby("A")["C"].agg(funcs)
-    expected = df.groupby("A")["C"].agg(ex_funcs)
+    result = df.groupby("A")["D"].agg(funcs)
+    expected = df.groupby("A")["D"].agg(ex_funcs)
     tm.assert_frame_equal(result, expected)
 
-    with tm.assert_produces_warning(
-        FutureWarning, match=r"\['B'\] did not aggregate successfully"
-    ):
-        result = df.groupby("A").agg(funcs)
-    with tm.assert_produces_warning(
-        FutureWarning, match=r"\['B'\] did not aggregate successfully"
-    ):
-        expected = df.groupby("A").agg(ex_funcs)
+    result = df.groupby("A").agg(funcs)
+    expected = df.groupby("A").agg(ex_funcs)
     tm.assert_frame_equal(result, expected)