pandas-dev · mroeschke · Jun 7, 2023 · May 21, 2023 · May 21, 2023 · May 22, 2023
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -235,6 +235,8 @@ Deprecations
 - Deprecated ``axis=1`` in :meth:`DataFrame.groupby` and in :class:`Grouper` constructor, do ``frame.T.groupby(...)`` instead (:issue:`51203`)
 - Deprecated accepting slices in :meth:`DataFrame.take`, call ``obj[slicer]`` or pass a sequence of integers instead (:issue:`51539`)
 - Deprecated explicit support for subclassing :class:`Index` (:issue:`45289`)
+- Deprecated making functions given to :meth:`Series.agg` attempt to operate on each element in the :class:`Series` and only operate on the whole :class:`Series` if the elementwise operations failed. In the future, functions given to :meth:`Series.agg` will always operate on the whole :class:`Series` only. To keep the current behavior, use :meth:`Series.transform` instead. (:issue:`53325`)
+- Deprecated making the functions in a list of functions given to :meth:`DataFrame.agg` attempt to operate on each element in the :class:`DataFrame` and only operate on the columns of the :class:`DataFrame` if the elementwise operations failed. To keep the current behavior, use :meth:`DataFrame.transform` instead. (:issue:`53325`)
 - Deprecated passing a :class:`DataFrame` to :meth:`DataFrame.from_records`, use :meth:`DataFrame.set_index` or :meth:`DataFrame.drop` instead (:issue:`51353`)
 - Deprecated silently dropping unrecognized timezones when parsing strings to datetimes (:issue:`18702`)
 - Deprecated the ``axis`` keyword in :meth:`DataFrame.ewm`, :meth:`Series.ewm`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.expanding`, :meth:`Series.expanding` (:issue:`51778`)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -1121,23 +1121,25 @@ def apply(self) -> DataFrame | Series:
     def agg(self):
         result = super().agg()
         if result is None:
+            obj = self.obj
             func = self.func
-
             # string, list-like, and dict-like are entirely handled in super
             assert callable(func)
 
-            # try a regular apply, this evaluates lambdas
-            # row-by-row; however if the lambda is expected a Series
-            # expression, e.g.: lambda x: x-x.quantile(0.25)
-            # this will fail, so we can try a vectorized evaluation
-
-            # we cannot FIRST try the vectorized evaluation, because
-            # then .agg and .apply would have different semantics if the
-            # operation is actually defined on the Series, e.g. str
+            # GH53325: The setup below is just to keep current behavior while emitting a
+            # deprecation message. In the future this will all be replaced with a simple
+            # `result = f(self.obj, *self.args, **self.kwargs)`.
             try:
-                result = self.obj.apply(func, args=self.args, **self.kwargs)
+                result = obj.apply(func, args=self.args, **self.kwargs)
             except (ValueError, AttributeError, TypeError):
-                result = func(self.obj, *self.args, **self.kwargs)
+                result = func(obj, *self.args, **self.kwargs)
+            else:
+                msg = (
+                    f"using {func} in {type(obj).__name__}.agg cannot aggregate and "
+                    f"has been deprecated. Use {type(obj).__name__}.transform to "
+                    f"keep behavior unchanged."
+                )
+                warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())
 
         return result
 

diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py
@@ -1478,8 +1478,8 @@ def test_any_apply_keyword_non_zero_axis_regression():
     tm.assert_series_equal(result, expected)
 
 
-def test_agg_list_like_func_with_args():
-    # GH 50624
+def test_agg_mapping_func_deprecated():
+    # GH 53325
     df = DataFrame({"x": [1, 2, 3]})
 
     def foo1(x, a=1, c=0):
@@ -1488,17 +1488,26 @@ def foo1(x, a=1, c=0):
     def foo2(x, b=2, c=0):
         return x + b + c
 
-    msg = r"foo1\(\) got an unexpected keyword argument 'b'"
-    with pytest.raises(TypeError, match=msg):
-        df.agg([foo1, foo2], 0, 3, b=3, c=4)
+    # single func already takes the vectorized path
+    result = df.agg(foo1, 0, 3, c=4)
+    expected = df + 7
+    tm.assert_frame_equal(result, expected)
+
+    msg = "using .+ in Series.agg cannot aggregate and"
 
-    result = df.agg([foo1, foo2], 0, 3, c=4)
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.agg([foo1, foo2], 0, 3, c=4)
     expected = DataFrame(
-        [[8, 8], [9, 9], [10, 10]],
-        columns=MultiIndex.from_tuples([("x", "foo1"), ("x", "foo2")]),
+        [[8, 8], [9, 9], [10, 10]], columns=[["x", "x"], ["foo1", "foo2"]]
     )
     tm.assert_frame_equal(result, expected)
 
+    # TODO: the result below is wrong, should be fixed (GH53325)
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = df.agg({"x": foo1}, 0, 3, c=4)
+    expected = DataFrame([2, 3, 4], columns=["x"])
+    tm.assert_frame_equal(result, expected)
+
 
 def test_agg_std():
     df = DataFrame(np.arange(6).reshape(3, 2), columns=["A", "B"])

diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py
@@ -66,6 +66,28 @@ def test_transform_empty_listlike(float_frame, ops, frame_or_series):
         obj.transform(ops)
 
 
+def test_transform_listlike_func_with_args():
+    # GH 50624
+    df = DataFrame({"x": [1, 2, 3]})
+
+    def foo1(x, a=1, c=0):
+        return x + a + c
+
+    def foo2(x, b=2, c=0):
+        return x + b + c
+
+    msg = r"foo1\(\) got an unexpected keyword argument 'b'"
+    with pytest.raises(TypeError, match=msg):
+        df.transform([foo1, foo2], 0, 3, b=3, c=4)
+
+    result = df.transform([foo1, foo2], 0, 3, c=4)
+    expected = DataFrame(
+        [[8, 8], [9, 9], [10, 10]],
+        columns=MultiIndex.from_tuples([("x", "foo1"), ("x", "foo2")]),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
 @pytest.mark.parametrize("box", [dict, Series])
 def test_transform_dictlike(axis, float_frame, box):
     # GH 35964

diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py
@@ -8,6 +8,7 @@
 
 from itertools import chain
 import re
+import warnings
 
 import numpy as np
 import pytest
@@ -307,7 +308,10 @@ def test_transform_and_agg_err_series(string_series, func, msg):
     # we are trying to transform with an aggregator
     with pytest.raises(ValueError, match=msg):
         with np.errstate(all="ignore"):
-            string_series.agg(func)
+            # GH53325
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore", FutureWarning)
+                string_series.agg(func)
 
 
 @pytest.mark.parametrize("func", [["max", "min"], ["max", "sqrt"]])

diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py
@@ -108,14 +108,18 @@ def f(x, a=0, b=0, c=0):
         return x + a + 10 * b + 100 * c
 
     s = Series([1, 2])
-    result = s.agg(f, 0, *args, **kwargs)
+    msg = (
+        "in Series.agg cannot aggregate and has been deprecated. "
+        "Use Series.transform to keep behavior unchanged."
+    )
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        result = s.agg(f, 0, *args, **kwargs)
     expected = s + increment
     tm.assert_series_equal(result, expected)
 
 
-def test_agg_list_like_func_with_args():
-    # GH 50624
-
+def test_agg_mapping_func_deprecated():
+    # GH 53325
     s = Series([1, 2, 3])
 
     def foo1(x, a=1, c=0):
@@ -124,13 +128,13 @@ def foo1(x, a=1, c=0):
     def foo2(x, b=2, c=0):
         return x + b + c
 
-    msg = r"foo1\(\) got an unexpected keyword argument 'b'"
-    with pytest.raises(TypeError, match=msg):
-        s.agg([foo1, foo2], 0, 3, b=3, c=4)
-
-    result = s.agg([foo1, foo2], 0, 3, c=4)
-    expected = DataFrame({"foo1": [8, 9, 10], "foo2": [8, 9, 10]})
-    tm.assert_frame_equal(result, expected)
+    msg = "using .+ in Series.agg cannot aggregate and"
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        s.agg(foo1, 0, 3, c=4)
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        s.agg([foo1, foo2], 0, 3, c=4)
+    with tm.assert_produces_warning(FutureWarning, match=msg):
+        s.agg({"a": foo1, "b": foo2}, 0, 3, c=4)
 
 
 def test_series_apply_map_box_timestamps(by_row):
@@ -391,23 +395,32 @@ def test_apply_map_evaluate_lambdas_the_same(string_series, func, by_row):
         assert result == str(string_series)
 
 
-def test_with_nested_series(datetime_series):
+def test_agg_evaluate_lambdas(string_series):
+    # GH53325
+    # in the future, the result will be a Series class.
+
+    with tm.assert_produces_warning(FutureWarning):
+        result = string_series.agg(lambda x: type(x))
+    assert isinstance(result, Series) and len(result) == len(string_series)
+
+    with tm.assert_produces_warning(FutureWarning):
+        result = string_series.agg(type)
+    assert isinstance(result, Series) and len(result) == len(string_series)
+
+
+@pytest.mark.parametrize("op_name", ["agg", "apply"])
+def test_with_nested_series(datetime_series, op_name):
     # GH 2316
     # .agg with a reducer and a transform, what to do
     msg = "Returning a DataFrame from Series.apply when the supplied function"
     with tm.assert_produces_warning(FutureWarning, match=msg):
         # GH52123
-        result = datetime_series.apply(
+        result = getattr(datetime_series, op_name)(
             lambda x: Series([x, x**2], index=["x", "x^2"])
         )
     expected = DataFrame({"x": datetime_series, "x^2": datetime_series**2})
     tm.assert_frame_equal(result, expected)
 
-    with tm.assert_produces_warning(FutureWarning, match=msg):
-        # GH52123
-        result = datetime_series.agg(lambda x: Series([x, x**2], index=["x", "x^2"]))
-    tm.assert_frame_equal(result, expected)
-
 
 def test_replicate_describe(string_series, by_row):
     # this also tests a result set that is all scalars

diff --git a/pandas/tests/apply/test_series_transform.py b/pandas/tests/apply/test_series_transform.py
@@ -10,6 +10,21 @@
 import pandas._testing as tm
 
 
+@pytest.mark.parametrize(
+    "args, kwargs, increment",
+    [((), {}, 0), ((), {"a": 1}, 1), ((2, 3), {}, 32), ((1,), {"c": 2}, 201)],
+)
+def test_agg_args(args, kwargs, increment):
+    # GH 43357
+    def f(x, a=0, b=0, c=0):
+        return x + a + 10 * b + 100 * c
+
+    s = Series([1, 2])
+    result = s.transform(f, 0, *args, **kwargs)
+    expected = s + increment
+    tm.assert_series_equal(result, expected)
+
+
 @pytest.mark.parametrize(
     "ops, names",
     [
@@ -28,6 +43,26 @@ def test_transform_listlike(string_series, ops, names):
         tm.assert_frame_equal(result, expected)
 
 
+def test_transform_listlike_func_with_args():
+    # GH 50624
+
+    s = Series([1, 2, 3])
+
+    def foo1(x, a=1, c=0):
+        return x + a + c
+
+    def foo2(x, b=2, c=0):
+        return x + b + c
+
+    msg = r"foo1\(\) got an unexpected keyword argument 'b'"
+    with pytest.raises(TypeError, match=msg):
+        s.transform([foo1, foo2], 0, 3, b=3, c=4)
+
+    result = s.transform([foo1, foo2], 0, 3, c=4)
+    expected = DataFrame({"foo1": [8, 9, 10], "foo2": [8, 9, 10]})
+    tm.assert_frame_equal(result, expected)
+
+
 @pytest.mark.parametrize("box", [dict, Series])
 def test_transform_dictlike(string_series, box):
     # GH 35964