diff --git a/doc/source/whatsnew/v1.1.4.rst b/doc/source/whatsnew/v1.1.4.rst index 043b817bb9026..c5f7905bc8a20 100644 --- a/doc/source/whatsnew/v1.1.4.rst +++ b/doc/source/whatsnew/v1.1.4.rst @@ -21,6 +21,7 @@ Fixed regressions - Fixed regression in :meth:`Series.astype` converting ``None`` to ``"nan"`` when casting to string (:issue:`36904`) - Fixed regression in :class:`RollingGroupby` causing a segmentation fault with Index of dtype object (:issue:`36727`) - Fixed regression in :meth:`DataFrame.resample(...).apply(...)` raised ``AttributeError`` when input was a :class:`DataFrame` and only a :class:`Series` was evaluated (:issue:`36951`) +- Fixed regression in ``DataFrame.groupby(..).std()`` with nullable integer dtype (:issue:`37415`) - Fixed regression in :class:`PeriodDtype` comparing both equal and unequal to its string representation (:issue:`37265`) - Fixed regression in certain offsets (:meth:`pd.offsets.Day() ` and below) no longer being hashable (:issue:`37267`) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index ab01f99ba11f9..4cf70db96e288 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2573,9 +2573,9 @@ def _get_cythonized_result( except TypeError as e: error_msg = str(e) continue + vals = vals.astype(cython_dtype, copy=False) if needs_2d: vals = vals.reshape((-1, 1)) - vals = vals.astype(cython_dtype, copy=False) func = partial(func, vals) func = partial(func, labels) diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index e01855c1b7761..c907391917ca8 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -277,3 +277,38 @@ def test_read_only_buffer_source_agg(agg): expected = df.copy().groupby(["species"]).agg({"sepal_length": agg}) tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "op_name", + [ + "count", + "sum", + "std", + "var", + "sem", + "mean", + "median", + "prod", + "min", + "max", + ], +) +def test_cython_agg_nullable_int(op_name): + # ensure that the cython-based aggregations don't fail for nullable dtype + # (eg https://github.com/pandas-dev/pandas/issues/37415) + df = DataFrame( + { + "A": ["A", "B"] * 5, + "B": pd.array([1, 2, 3, 4, 5, 6, 7, 8, 9, pd.NA], dtype="Int64"), + } + ) + result = getattr(df.groupby("A")["B"], op_name)() + df2 = df.assign(B=df["B"].astype("float64")) + expected = getattr(df2.groupby("A")["B"], op_name)() + + if op_name != "count": + # the result is not yet consistently using Int64/Float64 dtype, + # so for now just checking the values by casting to float + result = result.astype("float64") + tm.assert_series_equal(result, expected)