From 72c0f4ee9356b6b9738c41fa0d2e4c27f91f4ed8 Mon Sep 17 00:00:00 2001 From: DriesS Date: Fri, 21 May 2021 03:16:04 +0200 Subject: [PATCH] BUG: agg order for list is not maintained (#41017) --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/core/apply.py | 14 +++++++-- pandas/tests/apply/test_frame_apply.py | 39 +++++++++++++++++++++++--- 3 files changed, 47 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 1eb22436204a8e..6f39dc4917024a 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -973,6 +973,7 @@ Other - Bug in :meth:`DataFrame.equals`, :meth:`Series.equals`, :meth:`Index.equals` with object-dtype containing ``np.datetime64("NaT")`` or ``np.timedelta64("NaT")`` (:issue:`39650`) - Bug in :func:`pandas.util.show_versions` where console JSON output was not proper JSON (:issue:`39701`) - Bug in :meth:`DataFrame.convert_dtypes` incorrectly raised ValueError when called on an empty DataFrame (:issue:`40393`) +- Bug in :meth:`DataFrame.agg()` not sorting the aggregated axis in the order of the provided aggragation functions when one or more aggregation function fails to produce results (:issue:`33634`) - Bug in :meth:`DataFrame.clip` not interpreting missing values as no threshold (:issue:`40420`) - Bug in :class:`Series` backed by :class:`DatetimeArray` or :class:`TimedeltaArray` sometimes failing to set the array's ``freq`` to ``None`` (:issue:`41425`) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index d0c6a1a841edb6..00b49c2f4f9511 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -376,12 +376,10 @@ def agg_list_like(self) -> FrameOrSeriesUnion: raise ValueError("no results") try: - return concat(results, keys=keys, axis=1, sort=False) + concatenated = concat(results, keys=keys, axis=1, sort=False) except TypeError as err: - # we are concatting non-NDFrame objects, # e.g. a list of scalars - from pandas import Series result = Series(results, index=keys, name=obj.name) @@ -390,6 +388,16 @@ def agg_list_like(self) -> FrameOrSeriesUnion: "cannot combine transform and aggregation operations" ) from err return result + else: + # Concat uses the first index to determine the final indexing order. + # The union of a shorter first index with the other indices causes + # the index sorting to be different from the order of the aggregating + # functions. Reindex if this is the case. + index_size = concatenated.index.size + full_ordered_index = next( + result.index for result in results if result.index.size == index_size + ) + return concatenated.reindex(full_ordered_index, copy=False) def agg_dict_like(self) -> FrameOrSeriesUnion: """ diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index fcccd0d846d0fd..cc91cdae942fd5 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1110,10 +1110,9 @@ def test_agg_multiple_mixed_no_warning(): with tm.assert_produces_warning(None): result = mdf[["D", "C", "B", "A"]].agg(["sum", "min"]) - # For backwards compatibility, the result's index is - # still sorted by function name, so it's ['min', 'sum'] - # not ['sum', 'min']. - expected = expected[["D", "C", "B", "A"]] + # GH40420: the result of .agg should have an index that is sorted + # according to the arguments provided to agg. + expected = expected[["D", "C", "B", "A"]].reindex(["sum", "min"]) tm.assert_frame_equal(result, expected) @@ -1521,6 +1520,38 @@ def test_apply_np_reducer(float_frame, op, how): tm.assert_series_equal(result, expected) +def test_aggregation_func_column_order(): + # GH40420: the result of .agg should have an index that is sorted + # according to the arguments provided to agg. + df = DataFrame( + [ + ("1", 1, 0, 0), + ("2", 2, 0, 0), + ("3", 3, 0, 0), + ("4", 4, 5, 4), + ("5", 5, 6, 6), + ("6", 6, 7, 7), + ], + columns=("item", "att1", "att2", "att3"), + ) + + def foo(s): + return s.sum() / 2 + + aggs = ["sum", foo, "count", "min"] + result = df.agg(aggs) + expected = DataFrame( + { + "item": ["123456", np.nan, 6, "1"], + "att1": [21.0, 10.5, 6.0, 1.0], + "att2": [18.0, 9.0, 6.0, 0.0], + "att3": [17.0, 8.5, 6.0, 0.0], + }, + index=["sum", "foo", "count", "min"], + ) + tm.assert_frame_equal(result, expected) + + def test_apply_getitem_axis_1(): # GH 13427 df = DataFrame({"a": [0, 1, 2], "b": [1, 2, 3]})