Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,7 @@ Groupby/Resample/Rolling
- Bug in :func:`DataFrame.groupby` passing the `on=` kwarg, and subsequently using ``.apply()`` (:issue:`17813`)
- Bug in :func:`DataFrame.resample().aggregate` not raising a ``KeyError`` when aggregating a non-existent column (:issue:`16766`, :issue:`19566`)
- Fixed a performance regression for ``GroupBy.nth`` and ``GroupBy.last`` with some object columns (:issue:`19283`)
- Bug in :func:`DataFrameGroupBy.cumsum` and :func:`DataFrameGroupBy.cumprod` where `skipna` is not an option (:issue:`19806`)

Sparse
^^^^^^
Expand Down
16 changes: 14 additions & 2 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,8 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
def group_cumprod_float64(float64_t[:, :] out,
float64_t[:, :] values,
int64_t[:] labels,
bint is_datetimelike):
bint is_datetimelike,
bint skipna=True):
"""
Only transforms on axis=0
"""
Expand All @@ -163,14 +164,20 @@ def group_cumprod_float64(float64_t[:, :] out,
if val == val:
accum[lab, j] *= val
out[i, j] = accum[lab, j]
else:
out[i, j] = NaN
if not skipna:
accum[lab, j] = NaN
break


@cython.boundscheck(False)
@cython.wraparound(False)
def group_cumsum(numeric[:, :] out,
numeric[:, :] values,
int64_t[:] labels,
is_datetimelike):
is_datetimelike,
bint skipna=True):
"""
Only transforms on axis=0
"""
Expand All @@ -196,6 +203,11 @@ def group_cumsum(numeric[:, :] out,
if val == val:
accum[lab, j] += val
out[i, j] = accum[lab, j]
else:
out[i, j] = NaN
if not skipna:
accum[lab, j] = NaN
break
else:
accum[lab, j] += val
out[i, j] = accum[lab, j]
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1839,7 +1839,8 @@ def rank(self, method='average', ascending=True, na_option='keep',
@Appender(_doc_template)
def cumprod(self, axis=0, *args, **kwargs):
"""Cumulative product for each group"""
nv.validate_groupby_func('cumprod', args, kwargs, ['numeric_only'])
nv.validate_groupby_func('cumprod', args, kwargs,
['numeric_only', 'skipna'])
if axis != 0:
return self.apply(lambda x: x.cumprod(axis=axis, **kwargs))

Expand All @@ -1849,7 +1850,8 @@ def cumprod(self, axis=0, *args, **kwargs):
@Appender(_doc_template)
def cumsum(self, axis=0, *args, **kwargs):
"""Cumulative sum for each group"""
nv.validate_groupby_func('cumsum', args, kwargs, ['numeric_only'])
nv.validate_groupby_func('cumsum', args, kwargs,
['numeric_only', 'skipna'])
if axis != 0:
return self.apply(lambda x: x.cumsum(axis=axis, **kwargs))

Expand Down
28 changes: 28 additions & 0 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2521,6 +2521,34 @@ def test_groupby_cumprod(self):
expected.name = 'value'
tm.assert_series_equal(actual, expected)

def test_groupby_cum_skipna(self):
# GH 19806
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this needs to go with the other cumsum tests in pandas/tests/groupby/test_transform.py. put near the other cum* tests.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I moved it to test_transform.py

# make sure that skipna works for both cumsum and cumprod
df = pd.DataFrame({'key': ['b'] * 10 + ['a'] * 2, 'value': 3})
df.at[3] = np.nan
df.at[3, 'key'] = 'b'

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ideally you could parameterize this over skipna=True/False

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I parametrized it over cumprod/sum & skipna.

result = df.groupby('key')['value'].cumprod(skipna=False)
expected = pd.Series([3.0, 9.0, 27.0, np.nan, np.nan, np.nan, np.nan,
np.nan, np.nan, np.nan, 3.0, 9.0],
name='value', index=range(12))
tm.assert_series_equal(result, expected)

result = df.groupby('key')['value'].cumsum(skipna=False)
expected = pd.Series([3.0, 6.0, 9.0, np.nan, np.nan, np.nan, np.nan,
np.nan, np.nan, np.nan, 3.0, 6.0],
name='value', index=range(12))
tm.assert_series_equal(result, expected)

df = pd.DataFrame({'key': ['b'] * 10, 'value': np.nan})
result = df.groupby('key')['value'].cumprod(skipna=False)
expected = pd.Series([np.nan] * 10, name='value', index=range(10))
tm.assert_series_equal(result, expected)

result = df.groupby('key')['value'].cumsum(skipna=False)
expected = pd.Series([np.nan] * 10, name='value', index=range(10))
tm.assert_series_equal(result, expected)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add a test for cumsum with skipna=False`?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That should be covered by test_groupby_cum_skipna now.

def test_ops_general(self):
ops = [('mean', np.mean),
('median', np.median),
Expand Down