From a39897c108d44a4d5e027ca741be5462863eeefc Mon Sep 17 00:00:00 2001
From: Suraj Aralihalli <suraj.ara16@gmail.com>
Date: Tue, 23 Jan 2024 00:14:31 -0800
Subject: [PATCH] Fix Aggregation Type Promotion: Ensure Unsigned Input Types
 Result in Unsigned Output for Sum and Multiply (#14679)

During aggregation, output types are modified to prevent overflow. Presently, summing INT32 yields INT64, but summing UINT32 still results in INT64 instead of UINT64. This pull request resolves Issue #[10149](https://github.com/rapidsai/cudf/issues/10149) to ensure the correct output type is used when summing or multiplying integers.

Authors:
  - Suraj Aralihalli (https://github.com/SurajAralihalli)
  - Karthikeyan (https://github.com/karthikeyann)
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Shruti Shivakumar (https://github.com/shrshi)
  - Karthikeyan (https://github.com/karthikeyann)

URL: https://github.com/rapidsai/cudf/pull/14679
---
 cpp/include/cudf/detail/aggregation/aggregation.hpp |  6 +++---
 cpp/tests/groupby/sum_tests.cpp                     | 13 ++++++++-----
 2 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp
index 784f05a964e..c35d56b4c13 100644
--- a/cpp/include/cudf/detail/aggregation/aggregation.hpp
+++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -1234,12 +1234,12 @@ constexpr bool is_sum_product_agg(aggregation::Kind k)
          (k == aggregation::SUM_OF_SQUARES);
 }
 
-// Summing/Multiplying integers of any type, always use int64_t accumulator
+// Summing/Multiplying integers of any type, always use uint64_t for unsigned and int64_t for signed
 template <typename Source, aggregation::Kind k>
 struct target_type_impl<Source,
                         k,
                         std::enable_if_t<std::is_integral_v<Source> && is_sum_product_agg(k)>> {
-  using type = int64_t;
+  using type = std::conditional_t<std::is_unsigned_v<Source>, uint64_t, int64_t>;
 };
 
 // Summing fixed_point numbers
diff --git a/cpp/tests/groupby/sum_tests.cpp b/cpp/tests/groupby/sum_tests.cpp
index 35e8fd18a4d..abf25eb0aa9 100644
--- a/cpp/tests/groupby/sum_tests.cpp
+++ b/cpp/tests/groupby/sum_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -28,10 +28,10 @@ using namespace cudf::test::iterators;
 template <typename V>
 struct groupby_sum_test : public cudf::test::BaseFixture {};
 
-using K = int32_t;
-using supported_types =
-  cudf::test::Concat<cudf::test::Types<int8_t, int16_t, int32_t, int64_t, float, double>,
-                     cudf::test::DurationTypes>;
+using K               = int32_t;
+using supported_types = cudf::test::Concat<
+  cudf::test::Types<int8_t, int16_t, int32_t, int64_t, float, double, uint16_t, uint64_t>,
+  cudf::test::DurationTypes>;
 
 TYPED_TEST_SUITE(groupby_sum_test, supported_types);
 
@@ -40,6 +40,9 @@ TYPED_TEST(groupby_sum_test, basic)
   using V = TypeParam;
   using R = cudf::detail::target_type_t<V, cudf::aggregation::SUM>;
 
+  static_assert(std::is_signed_v<R> == std::is_signed_v<V>,
+                "Both Result type and Source type must have same signedness");
+
   cudf::test::fixed_width_column_wrapper<K> keys{1, 2, 3, 1, 2, 2, 1, 3, 3, 2};
   cudf::test::fixed_width_column_wrapper<V> vals{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};