Skip to content

Commit

Permalink
Reorder grouped outputs
Browse files Browse the repository at this point in the history
  • Loading branch information
shwina committed Feb 8, 2024
1 parent e60a054 commit 2088742
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 11 deletions.
10 changes: 5 additions & 5 deletions python/cudf/cudf/_lib/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -120,23 +120,23 @@ cdef class GroupBy:
Returns
-------
offsets: list of integers
Integer offsets such that offsets[i+1] - offsets[i]
represents the size of group `i`.
grouped_keys: list of Columns
The grouped key columns
grouped_values: list of Columns
The grouped value columns
offsets: list of integers
Integer offsets such that offsets[i+1] - offsets[i]
represents the size of group `i`.
"""
grouped_keys, grouped_values, offsets = self._groupby.get_groups(
offsets, grouped_keys, grouped_values = self._groupby.get_groups(
pylibcudf.table.Table([c.to_pylibcudf(mode="read") for c in values])
if values else None
)

return (
offsets,
columns_from_pylibcudf_table(grouped_keys),
columns_from_pylibcudf_table(grouped_values),
offsets,
)

def aggregate(self, values, aggregations):
Expand Down
8 changes: 4 additions & 4 deletions python/cudf/cudf/_lib/pylibcudf/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -261,26 +261,26 @@ cdef class GroupBy:
Returns
-------
Tuple[Table, Table, List[int]]
Tuple[List[int], Table, Table]]
A tuple of tables containing three items:
- A list of integer offsets into the group keys/values
- A table of group keys
- A table of group values or None
- A list of integer offsets into the tables
"""

cdef groups c_groups
if values:
c_groups = dereference(self.c_obj).get_groups(values.view())
return (
c_groups.offsets,
Table.from_libcudf(move(c_groups.keys)),
Table.from_libcudf(move(c_groups.values)),
c_groups.offsets,
)
else:
# c_groups.values is nullptr
c_groups = dereference(self.c_obj).get_groups()
return (
c_groups.offsets,
Table.from_libcudf(move(c_groups.keys)),
None,
c_groups.offsets,
)
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1179,7 +1179,7 @@ def deserialize(cls, header, frames):
return cls(obj, grouping, **kwargs)

def _grouped(self):
grouped_key_cols, grouped_value_cols, offsets = self._groupby.groups(
offsets, grouped_key_cols, grouped_value_cols = self._groupby.groups(
[*self.obj._index._columns, *self.obj._columns]
)
grouped_keys = cudf.core.index._index_from_columns(grouped_key_cols)
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -3762,5 +3762,5 @@ def test_groupby_internal_groups_empty(gdf):
# test that we don't segfault when calling the internal
# .groups() method with an empty list:
gb = gdf.groupby("y")._groupby
_, grouped_vals, _ = gb.groups([])
_, _, grouped_vals = gb.groups([])
assert grouped_vals is None

0 comments on commit 2088742

Please sign in to comment.