Skip to content

Commit

Permalink
Backport PR #35562: BUG: Ensure rolling groupby doesn't segfault with…
Browse files Browse the repository at this point in the history
… center=True (#35610)

Co-authored-by: Matthew Roeschke <[email protected]>
  • Loading branch information
meeseeksmachine and mroeschke authored Aug 7, 2020
1 parent 3b00652 commit c1676ce
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 0 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Fixed regressions
- Fixed regression where :func:`read_csv` would raise a ``ValueError`` when ``pandas.options.mode.use_inf_as_na`` was set to ``True`` (:issue:`35493`).
- Fixed regression in :class:`pandas.core.groupby.RollingGroupby` where column selection was ignored (:issue:`35486`)
- Fixed regression in :meth:`DataFrame.shift` with ``axis=1`` and heterogeneous dtypes (:issue:`35488`)
- Fixed regression in ``.groupby(..).rolling(..)`` where a segfault would occur with ``center=True`` and an odd number of values (:issue:`35552`)

.. ---------------------------------------------------------------------------
Expand Down
6 changes: 6 additions & 0 deletions pandas/core/window/indexers.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,4 +319,10 @@ def get_window_bounds(
end_arrays.append(window_indicies.take(end))
start = np.concatenate(start_arrays)
end = np.concatenate(end_arrays)
# GH 35552: Need to adjust start and end based on the nans appended to values
# when center=True
if num_values > len(start):
offset = num_values - len(start)
start = np.concatenate([start, np.array([end[-1]] * offset)])
end = np.concatenate([end, np.array([end[-1]] * offset)])
return start, end
65 changes: 65 additions & 0 deletions pandas/tests/window/test_grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,71 @@ def foo(x):
)
tm.assert_series_equal(result, expected)

def test_groupby_rolling_center_center(self):
# GH 35552
series = Series(range(1, 6))
result = series.groupby(series).rolling(center=True, window=3).mean()
expected = Series(
[np.nan] * 5,
index=pd.MultiIndex.from_tuples(((1, 0), (2, 1), (3, 2), (4, 3), (5, 4))),
)
tm.assert_series_equal(result, expected)

series = Series(range(1, 5))
result = series.groupby(series).rolling(center=True, window=3).mean()
expected = Series(
[np.nan] * 4,
index=pd.MultiIndex.from_tuples(((1, 0), (2, 1), (3, 2), (4, 3))),
)
tm.assert_series_equal(result, expected)

df = pd.DataFrame({"a": ["a"] * 5 + ["b"] * 6, "b": range(11)})
result = df.groupby("a").rolling(center=True, window=3).mean()
expected = pd.DataFrame(
[np.nan, 1, 2, 3, np.nan, np.nan, 6, 7, 8, 9, np.nan],
index=pd.MultiIndex.from_tuples(
(
("a", 0),
("a", 1),
("a", 2),
("a", 3),
("a", 4),
("b", 5),
("b", 6),
("b", 7),
("b", 8),
("b", 9),
("b", 10),
),
names=["a", None],
),
columns=["b"],
)
tm.assert_frame_equal(result, expected)

df = pd.DataFrame({"a": ["a"] * 5 + ["b"] * 5, "b": range(10)})
result = df.groupby("a").rolling(center=True, window=3).mean()
expected = pd.DataFrame(
[np.nan, 1, 2, 3, np.nan, np.nan, 6, 7, 8, np.nan],
index=pd.MultiIndex.from_tuples(
(
("a", 0),
("a", 1),
("a", 2),
("a", 3),
("a", 4),
("b", 5),
("b", 6),
("b", 7),
("b", 8),
("b", 9),
),
names=["a", None],
),
columns=["b"],
)
tm.assert_frame_equal(result, expected)

def test_groupby_subselect_rolling(self):
# GH 35486
df = DataFrame(
Expand Down

0 comments on commit c1676ce

Please sign in to comment.