diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index 0ec293ae3f0..5fe4a5eb30f 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -553,7 +553,8 @@ std::unique_ptr groupby(table_view const& keys, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - auto const num_keys = keys.num_rows(); + // convert to int64_t to avoid potential overflow with large `keys` + auto const num_keys = static_cast(keys.num_rows()); auto const null_keys_are_equal = null_equality::EQUAL; auto const has_null = nullate::DYNAMIC{cudf::has_nested_nulls(keys)}; diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java index dc6eb55fc6a..050bcbb268f 100644 --- a/java/src/test/java/ai/rapids/cudf/TableTest.java +++ b/java/src/test/java/ai/rapids/cudf/TableTest.java @@ -7838,11 +7838,12 @@ void testSumWithStrings() { .build(); Table result = t.groupBy(0).aggregate( GroupByAggregation.sum().onColumn(1)); + Table sorted = result.orderBy(OrderByArg.asc(0)); Table expected = new Table.TestBuilder() .column("1-URGENT", "3-MEDIUM") .column(5289L + 5303L, 5203L + 5206L) .build()) { - assertTablesAreEqual(expected, result); + assertTablesAreEqual(expected, sorted); } } diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index d08268eea3a..77b54a583d3 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -1308,7 +1308,7 @@ def pipe(self, func, *args, **kwargs): To get the difference between each groups maximum and minimum value in one pass, you can do - >>> df.groupby('A').pipe(lambda x: x.max() - x.min()) + >>> df.groupby('A', sort=True).pipe(lambda x: x.max() - x.min()) B A a 2