diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu
index 0ec293ae3f0..5fe4a5eb30f 100644
--- a/cpp/src/groupby/hash/groupby.cu
+++ b/cpp/src/groupby/hash/groupby.cu
@@ -553,7 +553,8 @@ std::unique_ptr
groupby(table_view const& keys,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
- auto const num_keys = keys.num_rows();
+ // convert to int64_t to avoid potential overflow with large `keys`
+ auto const num_keys = static_cast(keys.num_rows());
auto const null_keys_are_equal = null_equality::EQUAL;
auto const has_null = nullate::DYNAMIC{cudf::has_nested_nulls(keys)};
diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java
index dc6eb55fc6a..050bcbb268f 100644
--- a/java/src/test/java/ai/rapids/cudf/TableTest.java
+++ b/java/src/test/java/ai/rapids/cudf/TableTest.java
@@ -7838,11 +7838,12 @@ void testSumWithStrings() {
.build();
Table result = t.groupBy(0).aggregate(
GroupByAggregation.sum().onColumn(1));
+ Table sorted = result.orderBy(OrderByArg.asc(0));
Table expected = new Table.TestBuilder()
.column("1-URGENT", "3-MEDIUM")
.column(5289L + 5303L, 5203L + 5206L)
.build()) {
- assertTablesAreEqual(expected, result);
+ assertTablesAreEqual(expected, sorted);
}
}
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index d08268eea3a..77b54a583d3 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -1308,7 +1308,7 @@ def pipe(self, func, *args, **kwargs):
To get the difference between each groups maximum and minimum value
in one pass, you can do
- >>> df.groupby('A').pipe(lambda x: x.max() - x.min())
+ >>> df.groupby('A', sort=True).pipe(lambda x: x.max() - x.min())
B
A
a 2