From a39897c108d44a4d5e027ca741be5462863eeefc Mon Sep 17 00:00:00 2001 From: Suraj Aralihalli Date: Tue, 23 Jan 2024 00:14:31 -0800 Subject: [PATCH] Fix Aggregation Type Promotion: Ensure Unsigned Input Types Result in Unsigned Output for Sum and Multiply (#14679) During aggregation, output types are modified to prevent overflow. Presently, summing INT32 yields INT64, but summing UINT32 still results in INT64 instead of UINT64. This pull request resolves Issue #[10149](https://github.com/rapidsai/cudf/issues/10149) to ensure the correct output type is used when summing or multiplying integers. Authors: - Suraj Aralihalli (https://github.com/SurajAralihalli) - Karthikeyan (https://github.com/karthikeyann) - Nghia Truong (https://github.com/ttnghia) Approvers: - Nghia Truong (https://github.com/ttnghia) - Shruti Shivakumar (https://github.com/shrshi) - Karthikeyan (https://github.com/karthikeyann) URL: https://github.com/rapidsai/cudf/pull/14679 --- cpp/include/cudf/detail/aggregation/aggregation.hpp | 6 +++--- cpp/tests/groupby/sum_tests.cpp | 13 ++++++++----- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index 784f05a964e..c35d56b4c13 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1234,12 +1234,12 @@ constexpr bool is_sum_product_agg(aggregation::Kind k) (k == aggregation::SUM_OF_SQUARES); } -// Summing/Multiplying integers of any type, always use int64_t accumulator +// Summing/Multiplying integers of any type, always use uint64_t for unsigned and int64_t for signed template struct target_type_impl && is_sum_product_agg(k)>> { - using type = int64_t; + using type = std::conditional_t, uint64_t, int64_t>; }; // Summing fixed_point numbers diff --git a/cpp/tests/groupby/sum_tests.cpp b/cpp/tests/groupby/sum_tests.cpp index 35e8fd18a4d..abf25eb0aa9 100644 --- a/cpp/tests/groupby/sum_tests.cpp +++ b/cpp/tests/groupby/sum_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,10 +28,10 @@ using namespace cudf::test::iterators; template struct groupby_sum_test : public cudf::test::BaseFixture {}; -using K = int32_t; -using supported_types = - cudf::test::Concat, - cudf::test::DurationTypes>; +using K = int32_t; +using supported_types = cudf::test::Concat< + cudf::test::Types, + cudf::test::DurationTypes>; TYPED_TEST_SUITE(groupby_sum_test, supported_types); @@ -40,6 +40,9 @@ TYPED_TEST(groupby_sum_test, basic) using V = TypeParam; using R = cudf::detail::target_type_t; + static_assert(std::is_signed_v == std::is_signed_v, + "Both Result type and Source type must have same signedness"); + cudf::test::fixed_width_column_wrapper keys{1, 2, 3, 1, 2, 2, 1, 3, 3, 2}; cudf::test::fixed_width_column_wrapper vals{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};