pandas-dev · jreback · Mar 1, 2018 · Feb 26, 2018 · Feb 26, 2018 · Feb 26, 2018
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -890,6 +890,7 @@ Groupby/Resample/Rolling
 - Bug in :func:`DataFrame.groupby` passing the `on=` kwarg, and subsequently using ``.apply()`` (:issue:`17813`)
 - Bug in :func:`DataFrame.resample().aggregate` not raising a ``KeyError`` when aggregating a non-existent column (:issue:`16766`, :issue:`19566`)
 - Fixed a performance regression for ``GroupBy.nth`` and ``GroupBy.last`` with some object columns (:issue:`19283`)
+- Bug in :func:`DataFrame.groupby.cumsum` and :func:`DataFrame.groupby.cumprod` where skipna is not an option
 
 Sparse
 ^^^^^^

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -139,7 +139,8 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
 def group_cumprod_float64(float64_t[:, :] out,
                           float64_t[:, :] values,
                           int64_t[:] labels,
-                          bint is_datetimelike):
+                          bint is_datetimelike,
+                          bint skipna=True):
     """
     Only transforms on axis=0
     """
@@ -163,14 +164,21 @@ def group_cumprod_float64(float64_t[:, :] out,
                 if val == val:
                     accum[lab, j] *= val
                     out[i, j] = accum[lab, j]
+                if val != val:
+                    if skipna:
+                        out[i, j] = NaN
+                    else:
+                        accum[lab, j] = NaN
+                        out[i, j] = accum[lab, j]
 
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def group_cumsum(numeric[:, :] out,
                  numeric[:, :] values,
                  int64_t[:] labels,
-                 is_datetimelike):
+                 is_datetimelike,
+                 bint skipna=True):
     """
     Only transforms on axis=0
     """
@@ -196,6 +204,12 @@ def group_cumsum(numeric[:, :] out,
                     if val == val:
                         accum[lab, j] += val
                         out[i, j] = accum[lab, j]
+                    if val != val:
+                        if skipna:
+                            out[i, j] = NaN
+                        else:
+                            accum[lab, j] = NaN
+                            out[i, j] = accum[lab, j]
                 else:
                     accum[lab, j] += val
                     out[i, j] = accum[lab, j]

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -1839,7 +1839,8 @@ def rank(self, method='average', ascending=True, na_option='keep',
     @Appender(_doc_template)
     def cumprod(self, axis=0, *args, **kwargs):
         """Cumulative product for each group"""
-        nv.validate_groupby_func('cumprod', args, kwargs, ['numeric_only'])
+        nv.validate_groupby_func('cumprod', args, kwargs,
+                                 ['numeric_only', 'skipna'])
         if axis != 0:
             return self.apply(lambda x: x.cumprod(axis=axis, **kwargs))
 
@@ -1849,7 +1850,8 @@ def cumprod(self, axis=0, *args, **kwargs):
     @Appender(_doc_template)
     def cumsum(self, axis=0, *args, **kwargs):
         """Cumulative sum for each group"""
-        nv.validate_groupby_func('cumsum', args, kwargs, ['numeric_only'])
+        nv.validate_groupby_func('cumsum', args, kwargs,
+                                 ['numeric_only', 'skipna'])
         if axis != 0:
             return self.apply(lambda x: x.cumsum(axis=axis, **kwargs))
 

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -2521,6 +2521,26 @@ def test_groupby_cumprod(self):
         expected.name = 'value'
         tm.assert_series_equal(actual, expected)
 
+        # make sure that skipna works
+        df = pd.concat(
+            [pd.DataFrame({'x': [1.0, 2.0, np.nan, np.nan, 3.0, 2.0],
+                           'gp': 'a'}),
+             pd.DataFrame({'x': [2.0, 5.0, 6.0, 1.0, np.nan, 1.0],
+                           'gp': 'b'})])
+        result = df.groupby('gp')['x'].cumprod(skipna=False)
+        expected = pd.Series([1.0, 2.0, np.nan, np.nan, np.nan, np.nan,
+                              2.0, 10.0, 60.0, 60.0, np.nan, np.nan],
+                             name='x', index=(0, 1, 2, 3, 4, 5,
+                                              0, 1, 2, 3, 4, 5))
+        tm.assert_series_equal(result, expected)
+
+        result = df.groupby('gp')['x'].cumprod()
+        expected = pd.Series([1.0, 2.0, np.nan, np.nan, 6.0, 12.0,
+                              2.0, 10.0, 60.0, 60.0, np.nan, 60.0],
+                             name='x', index=(0, 1, 2, 3, 4, 5,
+                                              0, 1, 2, 3, 4, 5))
+        tm.assert_series_equal(result, expected)
+
     def test_ops_general(self):
         ops = [('mean', np.mean),
                ('median', np.median),