diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index e4370be304a..caf5ac5928f 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -363,13 +363,22 @@ def indices(self): >>> df.groupby(by=["a"]).indices {10: array([0, 1]), 40: array([2])} """ - group_names, offsets, _, grouped_values = self._grouped() + offsets, group_keys, (indices,) = self._groupby.groups( + [ + cudf.core.column.as_column( + range(len(self.obj)), dtype=size_type_dtype + ) + ] + ) + group_keys = libcudf.stream_compaction.drop_duplicates(group_keys) + if len(group_keys) > 1: + index = cudf.MultiIndex.from_arrays(group_keys) + else: + (group_keys,) = group_keys + index = cudf.Index(group_keys) return dict( - zip( - group_names.to_pandas(), - np.split(grouped_values.index.values, offsets[1:-1]), - ) + zip(index.to_pandas(), cp.split(indices.values, offsets[1:-1])) ) @_cudf_nvtx_annotate @@ -414,8 +423,7 @@ def get_group(self, name, obj=None): "instead of ``gb.get_group(name, obj=df)``.", FutureWarning, ) - - return obj.loc[self.groups[name].drop_duplicates()] + return obj.iloc[self.indices[name]] @_cudf_nvtx_annotate def size(self): diff --git a/python/cudf/cudf/tests/groupby/test_groupby_obj.py b/python/cudf/cudf/tests/groupby/test_groupby_obj.py new file mode 100644 index 00000000000..04b483e08dc --- /dev/null +++ b/python/cudf/cudf/tests/groupby/test_groupby_obj.py @@ -0,0 +1,15 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +from numpy.testing import assert_array_equal + +import cudf +from cudf.testing._utils import assert_eq + + +def test_groupby_14955(): + # https://github.com/rapidsai/cudf/issues/14955 + df = cudf.DataFrame({"a": [1, 2] * 2}, index=[0] * 4) + agg = df.groupby("a") + pagg = df.to_pandas().groupby("a") + for key in agg.groups: + assert_array_equal(pagg.indices[key], agg.indices[key].get()) + assert_eq(pagg.get_group(key), agg.get_group(key))