pandas-dev · jreback · Mar 1, 2018 · Feb 26, 2018 · Feb 26, 2018 · Feb 26, 2018
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -890,6 +890,7 @@ Groupby/Resample/Rolling
 - Bug in :func:`DataFrame.groupby` passing the `on=` kwarg, and subsequently using ``.apply()`` (:issue:`17813`)
 - Bug in :func:`DataFrame.resample().aggregate` not raising a ``KeyError`` when aggregating a non-existent column (:issue:`16766`, :issue:`19566`)
 - Fixed a performance regression for ``GroupBy.nth`` and ``GroupBy.last`` with some object columns (:issue:`19283`)
+- Bug in :func:`DataFrameGroupBy.cumsum` and :func:`DataFrameGroupBy.cumprod` where `skipna` is not an option (:issue:`19806`)
 
 Sparse
 ^^^^^^

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -139,7 +139,8 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
 def group_cumprod_float64(float64_t[:, :] out,
                           float64_t[:, :] values,
                           int64_t[:] labels,
-                          bint is_datetimelike):
+                          bint is_datetimelike,
+                          bint skipna=True):
     """
     Only transforms on axis=0
     """
@@ -163,14 +164,20 @@ def group_cumprod_float64(float64_t[:, :] out,
                 if val == val:
                     accum[lab, j] *= val
                     out[i, j] = accum[lab, j]
+                else:
+                    out[i, j] = NaN
+                    if not skipna:
+                        accum[lab, j] = NaN
+                        break
 
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def group_cumsum(numeric[:, :] out,
                  numeric[:, :] values,
                  int64_t[:] labels,
-                 is_datetimelike):
+                 is_datetimelike,
+                 bint skipna=True):
     """
     Only transforms on axis=0
     """
@@ -196,6 +203,11 @@ def group_cumsum(numeric[:, :] out,
                     if val == val:
                         accum[lab, j] += val
                         out[i, j] = accum[lab, j]
+                    else:
+                        out[i, j] = NaN
+                        if not skipna:
+                            accum[lab, j] = NaN
+                            break
                 else:
                     accum[lab, j] += val
                     out[i, j] = accum[lab, j]

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -1839,7 +1839,8 @@ def rank(self, method='average', ascending=True, na_option='keep',
     @Appender(_doc_template)
     def cumprod(self, axis=0, *args, **kwargs):
         """Cumulative product for each group"""
-        nv.validate_groupby_func('cumprod', args, kwargs, ['numeric_only'])
+        nv.validate_groupby_func('cumprod', args, kwargs,
+                                 ['numeric_only', 'skipna'])
         if axis != 0:
             return self.apply(lambda x: x.cumprod(axis=axis, **kwargs))
 
@@ -1849,7 +1850,8 @@ def cumprod(self, axis=0, *args, **kwargs):
     @Appender(_doc_template)
     def cumsum(self, axis=0, *args, **kwargs):
         """Cumulative sum for each group"""
-        nv.validate_groupby_func('cumsum', args, kwargs, ['numeric_only'])
+        nv.validate_groupby_func('cumsum', args, kwargs,
+                                 ['numeric_only', 'skipna'])
         if axis != 0:
             return self.apply(lambda x: x.cumsum(axis=axis, **kwargs))
 

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -2521,6 +2521,34 @@ def test_groupby_cumprod(self):
         expected.name = 'value'
         tm.assert_series_equal(actual, expected)
 
+    def test_groupby_cum_skipna(self):
+        # GH 19806
+        # make sure that skipna works for both cumsum and cumprod
+        df = pd.DataFrame({'key': ['b'] * 10 + ['a'] * 2, 'value': 3})
+        df.at[3] = np.nan
+        df.at[3, 'key'] = 'b'
+
+        result = df.groupby('key')['value'].cumprod(skipna=False)
+        expected = pd.Series([3.0, 9.0, 27.0, np.nan, np.nan, np.nan, np.nan,
+                              np.nan, np.nan, np.nan, 3.0, 9.0],
+                             name='value', index=range(12))
+        tm.assert_series_equal(result, expected)
+
+        result = df.groupby('key')['value'].cumsum(skipna=False)
+        expected = pd.Series([3.0, 6.0, 9.0, np.nan, np.nan, np.nan, np.nan,
+                              np.nan, np.nan, np.nan, 3.0, 6.0],
+                             name='value', index=range(12))
+        tm.assert_series_equal(result, expected)
+
+        df = pd.DataFrame({'key': ['b'] * 10, 'value': np.nan})
+        result = df.groupby('key')['value'].cumprod(skipna=False)
+        expected = pd.Series([np.nan] * 10, name='value', index=range(10))
+        tm.assert_series_equal(result, expected)
+
+        result = df.groupby('key')['value'].cumsum(skipna=False)
+        expected = pd.Series([np.nan] * 10, name='value', index=range(10))
+        tm.assert_series_equal(result, expected)
+
     def test_ops_general(self):
         ops = [('mean', np.mean),
                ('median', np.median),