Skip to content

Commit

Permalink
Merge pull request #4796 from shwina/fix-gb-apply
Browse files Browse the repository at this point in the history
[REVIEW] Fix groupby apply for operations that fail on empty groups
  • Loading branch information
kkraus14 authored Apr 3, 2020
2 parents 7141165 + 694ee8b commit e095723
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
- PR #4756 Fix regex error checking for valid quantifier condition
- PR #4777 Fix data pointer for column slices of zero length
- PR #4770 Fix readonly flag in `Column. __cuda_array_interface__`
- PR #4796 Fix groupby apply for operations that fail on empty groups


# cuDF 0.13.0 (31 Mar 2020)
Expand Down
6 changes: 3 additions & 3 deletions python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import collections
import functools
import itertools
import pickle

import pandas as pd
Expand Down Expand Up @@ -327,8 +326,9 @@ def mult(df):
if not callable(function):
raise TypeError("type {!r} is not callable", type(function))
_, offsets, grouped_values = self._grouped()
ends = itertools.chain(offsets[1:], [None])
chunks = [grouped_values[s:e] for s, e in zip(offsets, ends)]
chunks = [
grouped_values[s:e] for s, e in zip(offsets[:-1], offsets[1:])
]
result = cudf.concat([function(chk) for chk in chunks])
if self._sort:
result = result.sort_index()
Expand Down
15 changes: 15 additions & 0 deletions python/cudf/cudf/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1078,3 +1078,18 @@ def test_groupby_agg_combinations(agg):
assert_eq(
pdf.groupby("a").agg(agg), gdf.groupby("a").agg(agg), check_dtype=False
)


def test_groupby_apply_noempty_group():
pdf = pd.DataFrame(
{"a": [1, 1, 2, 2], "b": [1, 2, 1, 2], "c": [1, 2, 3, 4]}
)
gdf = cudf.from_pandas(pdf)
assert_eq(
pdf.groupby("a")
.apply(lambda x: x.iloc[[0, 1]])
.reset_index(drop=True),
gdf.groupby("a")
.apply(lambda x: x.iloc[[0, 1]])
.reset_index(drop=True),
)

0 comments on commit e095723

Please sign in to comment.