From 2ed8505d50e5b93385dd85bd994d1d3e914ddef4 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Sun, 27 Aug 2023 10:51:00 -0700 Subject: [PATCH] Fix categoricalIndex ordering issues in groupby --- python/cudf/cudf/core/groupby/groupby.py | 9 +++++++++ python/cudf/cudf/tests/test_groupby.py | 16 ++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index cf4c861c28f..38b07eca330 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -613,6 +613,15 @@ def agg(self, func): how="left", ) result = result.take(indices) + if isinstance(result._index, cudf.CategoricalIndex): + # Needs re-ordering the categories in the order + # they are after grouping. + result._index = cudf.Index( + result._index._column.reorder_categories( + result._index._column._get_decategorized_column() + ), + name=result._index.name, + ) if not self._as_index: result = result.reset_index() diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index b48ce210104..2ab8b29f224 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -3440,3 +3440,19 @@ def test_groupby_consecutive_operations(): expected = pg.cumsum() assert_groupby_results_equal(actual, expected, check_dtype=False) + + +def test_categorical_grouping_pandas_compatibility(): + gdf = cudf.DataFrame( + { + "key": cudf.Series([2, 1, 3, 1, 1], dtype="category"), + "a": [0, 1, 3, 2, 3], + } + ) + pdf = gdf.to_pandas() + + with cudf.option_context("mode.pandas_compatible", True): + actual = gdf.groupby("key", sort=False).sum() + expected = pdf.groupby("key", sort=False).sum() + + assert_eq(actual, expected)