Skip to content

Commit

Permalink
Fix categoricalIndex ordering issues in groupby
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar committed Aug 27, 2023
1 parent a025db5 commit f83e8b0
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 0 deletions.
9 changes: 9 additions & 0 deletions python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,15 @@ def agg(self, func):
how="left",
)
result = result.take(indices)
if isinstance(result._index, cudf.CategoricalIndex):
# Needs re-ordering the categories in the order
# they are after grouping.
result._index = cudf.Index(
result._index._column.reorder_categories(
result._index._column._get_decategorized_column()
),
name=result._index.name,
)

if not self._as_index:
result = result.reset_index()
Expand Down
16 changes: 16 additions & 0 deletions python/cudf/cudf/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -3440,3 +3440,19 @@ def test_groupby_consecutive_operations():
expected = pg.cumsum()

assert_groupby_results_equal(actual, expected, check_dtype=False)


def test_categorical_grouping_pandas_compatibility():
gdf = cudf.DataFrame(
{
"key": cudf.Series([2, 1, 3, 1, 1], dtype="category"),
"a": [0, 1, 3, 2, 3],
}
)
pdf = gdf.to_pandas()

with cudf.option_context("mode.pandas_compatible", True):
actual = gdf.groupby("key", sort=False).sum()
expected = pdf.groupby("key", sort=False).sum()

assert_eq(actual, expected)

0 comments on commit f83e8b0

Please sign in to comment.