Skip to content

Commit

Permalink
Review
Browse files Browse the repository at this point in the history
  • Loading branch information
shwina committed Feb 7, 2024
1 parent 0f4849d commit e60a054
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 4 deletions.
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/pylibcudf/groupby.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ cdef class GroupByRequest:

cdef class GroupBy:
cdef unique_ptr[groupby] c_obj
cdef Table _keys
cpdef tuple aggregate(self, list requests)
cpdef tuple scan(self, list requests)
cpdef tuple shift(self, Table values, list offset, list fill_values)
Expand Down
9 changes: 6 additions & 3 deletions python/cudf/cudf/_lib/pylibcudf/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ cdef class GroupBy:
sorted keys_are_sorted=sorted.NO
):
self.c_obj.reset(new groupby(keys.view(), null_handling, keys_are_sorted))
# keep a reference to the keys table so it doesn't get
# deallocated from under us:
self._keys = keys

@staticmethod
cdef tuple _parse_outputs(
Expand Down Expand Up @@ -254,14 +257,14 @@ cdef class GroupBy:
----------
values : Table, optional
The columns to get group labels for. If not specified,
an empty table is returned for the group values.
`None` is returned for the group values.
Returns
-------
Tuple[Table, Table, List[int]]
A tuple of tables containing three items:
- A table of group keys
- A table of group values
- A table of group values or None
- A list of integer offsets into the tables
"""

Expand All @@ -278,6 +281,6 @@ cdef class GroupBy:
c_groups = dereference(self.c_obj).get_groups()
return (
Table.from_libcudf(move(c_groups.keys)),
Table([]),
None,
c_groups.offsets,
)
3 changes: 2 additions & 1 deletion python/cudf/cudf/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -3762,4 +3762,5 @@ def test_groupby_internal_groups_empty(gdf):
# test that we don't segfault when calling the internal
# .groups() method with an empty list:
gb = gdf.groupby("y")._groupby
gb.groups([])
_, grouped_vals, _ = gb.groups([])
assert grouped_vals is None

0 comments on commit e60a054

Please sign in to comment.