Fix Aggregation Type Promotion: Ensure Unsigned Input Types Result in…

… Unsigned Output for Sum and Multiply (#14679) During aggregation, output types are modified to prevent overflow. Presently, summing INT32 yields INT64, but summing UINT32 still results in INT64 instead of UINT64. This pull request resolves Issue #[10149](#10149) to ensure the correct output type is used when summing or multiplying integers. Authors: - Suraj Aralihalli (https://github.com/SurajAralihalli) - Karthikeyan (https://github.com/karthikeyann) - Nghia Truong (https://github.com/ttnghia) Approvers: - Nghia Truong (https://github.com/ttnghia) - Shruti Shivakumar (https://github.com/shrshi) - Karthikeyan (https://github.com/karthikeyann) URL: #14679
rapidsai · Jan 23, 2024 · a39897c · a39897c
1 parent ef3ce4b
commit a39897c
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 8 deletions.
diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -1234,12 +1234,12 @@ constexpr bool is_sum_product_agg(aggregation::Kind k)
          (k == aggregation::SUM_OF_SQUARES);
 }
 
-// Summing/Multiplying integers of any type, always use int64_t accumulator
+// Summing/Multiplying integers of any type, always use uint64_t for unsigned and int64_t for signed
 template <typename Source, aggregation::Kind k>
 struct target_type_impl<Source,
                         k,
                         std::enable_if_t<std::is_integral_v<Source> && is_sum_product_agg(k)>> {
-  using type = int64_t;
+  using type = std::conditional_t<std::is_unsigned_v<Source>, uint64_t, int64_t>;
 };
 
 // Summing fixed_point numbers

diff --git a/cpp/tests/groupby/sum_tests.cpp b/cpp/tests/groupby/sum_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -28,10 +28,10 @@ using namespace cudf::test::iterators;
 template <typename V>
 struct groupby_sum_test : public cudf::test::BaseFixture {};
 
-using K = int32_t;
-using supported_types =
-  cudf::test::Concat<cudf::test::Types<int8_t, int16_t, int32_t, int64_t, float, double>,
-                     cudf::test::DurationTypes>;
+using K               = int32_t;
+using supported_types = cudf::test::Concat<
+  cudf::test::Types<int8_t, int16_t, int32_t, int64_t, float, double, uint16_t, uint64_t>,
+  cudf::test::DurationTypes>;
 
 TYPED_TEST_SUITE(groupby_sum_test, supported_types);
 
@@ -40,6 +40,9 @@ TYPED_TEST(groupby_sum_test, basic)
   using V = TypeParam;
   using R = cudf::detail::target_type_t<V, cudf::aggregation::SUM>;
 
+  static_assert(std::is_signed_v<R> == std::is_signed_v<V>,
+                "Both Result type and Source type must have same signedness");
+
   cudf::test::fixed_width_column_wrapper<K> keys{1, 2, 3, 1, 2, 2, 1, 3, 3, 2};
   cudf::test::fixed_width_column_wrapper<V> vals{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};