From 81371341bfa96cd6a2c331dad74f62a23a81cd5d Mon Sep 17 00:00:00 2001
From: sperlingxx <lovedreamf@gmail.com>
Date: Tue, 30 Nov 2021 13:40:46 +0800
Subject: [PATCH 1/5] init

Signed-off-by: sperlingxx <lovedreamf@gmail.com>
---
 cpp/src/round/round.cu          | 74 +++++++++++++++++++++++++++++----
 cpp/tests/round/round_tests.cpp | 12 +++---
 2 files changed, 73 insertions(+), 13 deletions(-)
diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu
index 36dd2dabd72..bca657434e9 100644
--- a/cpp/src/round/round.cu
+++ b/cpp/src/round/round.cu
@@ -32,6 +32,7 @@
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/exec_policy.hpp>
 
+#include <cudf/detail/fill.hpp>
 #include <type_traits>
 
 namespace cudf {
@@ -191,13 +192,57 @@ struct half_even_negative {
 template <typename T>
 struct half_up_fixed_point {
   T n;
-  __device__ T operator()(T e) { return half_up_negative<T>{n}(e) / n; }
+  template <typename U = T, typename std::enable_if_t<cudf::is_floating_point<U>()>* = nullptr>
+  __device__ U operator()(U e)
+  {
+    assert(false);  // Should never get here. Just for compilation
+    return U{};
+  }
+
+  template <typename U = T, typename std::enable_if_t<cuda::std::is_integral<U>::value>* = nullptr>
+  __device__ U operator()(U e)
+  {
+    // Create a container with extra digit for adjustment
+    auto const container = e / n;
+    auto const down      = container / 10;
+    // Use the remainder of 10 to decide whether to round or not
+    return down + (generic_abs(container % 10) >= 5 ? generic_sign(e) : 0);
+  }
 };
 
 template <typename T>
 struct half_even_fixed_point {
   T n;
-  __device__ T operator()(T e) { return half_even_negative<T>{n}(e) / n; }
+  template <typename U = T, typename std::enable_if_t<cudf::is_floating_point<U>()>* = nullptr>
+  __device__ U operator()(U e)
+  {
+    assert(false);  // Should never get here. Just for compilation
+    return U{};
+  }
+
+  template <typename U = T, typename std::enable_if_t<cuda::std::is_integral<U>::value>* = nullptr>
+  __device__ T operator()(T e)
+  {
+    // Create a container with extra digit for adjustment
+    auto const container = e / n;
+    auto const down      = container / 10;
+    auto abs_mod_10      = generic_abs(container % 10);
+    if ((abs_mod_10 > 5) or (abs_mod_10 == 5 and generic_abs(down) % 2 == 1)) {
+      return down + generic_sign(e);
+    }
+    return down;
+  }
+
+  //    template <typename U = T, typename std::enable_if_t<cuda::std::is_integral<U>::value>* = nullptr>
+//  __device__ U operator()(U e)
+//  {
+//    auto const down_over_n = e / n;            // use this to determine HALF_EVEN case
+//    auto const down        = down_over_n * n;  // result from rounding down
+//    auto const diff        = generic_abs(e - down);
+//    auto const adjustment =
+//      (diff > n / 2) or (diff == n / 2 && generic_abs(down_over_n) % 2 == 1) ? n : 0;
+//    return down + generic_sign(e) * adjustment;
+//  }
 };
 
 template <typename T,
@@ -252,13 +297,26 @@ std::unique_ptr<column> round_with(column_view const& input,
     result_type, input.size(), copy_bitmask(input, stream, mr), input.null_count(), stream, mr);
 
   auto out_view = result->mutable_view();
-  Type const n  = std::pow(10, std::abs(decimal_places + input.type().scale()));
 
-  thrust::transform(rmm::exec_policy(stream),
-                    input.begin<Type>(),
-                    input.end<Type>(),
-                    out_view.begin<Type>(),
-                    FixedPointRoundFunctor{n});
+  constexpr int max_precision = []{
+    if constexpr (std::is_same_v<T, numeric::decimal32>) return 9;
+    if constexpr (std::is_same_v<T, numeric::decimal64>) return 18;
+    return 38;
+  }();
+
+  auto const scale_movement = -decimal_places - input.type().scale();
+
+  if (scale_movement > max_precision) {
+    auto zero_scalar = make_fixed_point_scalar<T>(0, scale_type{-decimal_places});
+    detail::fill_in_place(out_view, 0, out_view.size(), *zero_scalar, stream);
+  } else {
+    Type const n = std::pow(10, scale_movement - 1);
+    thrust::transform(rmm::exec_policy(stream),
+                      input.begin<Type>(),
+                      input.end<Type>(),
+                      out_view.begin<Type>(),
+                      FixedPointRoundFunctor{n});
+  }
 
   return result;
 }
diff --git a/cpp/tests/round/round_tests.cpp b/cpp/tests/round/round_tests.cpp
index 6b2febb9b5c..b06620df671 100644
--- a/cpp/tests/round/round_tests.cpp
+++ b/cpp/tests/round/round_tests.cpp
@@ -93,9 +93,11 @@ TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfEvenZero)
   using RepType    = cudf::device_storage_type_t<decimalXX>;
   using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;
 
-  auto const input    = fp_wrapper{{1140, 1150, 1160, 1240, 1250, 1260}, scale_type{-2}};
-  auto const expected = fp_wrapper{{11, 12, 12, 12, 12, 13}, scale_type{0}};
-  auto const result   = cudf::round(input, 0, cudf::rounding_method::HALF_EVEN);
+  auto const input = fp_wrapper{
+    {1140, 1150, 1160, 1240, 1250, 1260, -1140, -1150, -1160, -1240, -1250, -1260}, scale_type{-2}};
+  auto const expected =
+    fp_wrapper{{11, 12, 12, 12, 12, 13, -11, -12, -12, -12, -12, -13}, scale_type{0}};
+  auto const result = cudf::round(input, 0, cudf::rounding_method::HALF_EVEN);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
 }
@@ -107,8 +109,8 @@ TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfUp)
   using RepType    = cudf::device_storage_type_t<decimalXX>;
   using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;
 
-  auto const input    = fp_wrapper{{1140, 1150, 1160}, scale_type{-3}};
-  auto const expected = fp_wrapper{{11, 12, 12}, scale_type{-1}};
+  auto const input    = fp_wrapper{{1140, 1150, 1160, -1140, -1150, -1160}, scale_type{-3}};
+  auto const expected = fp_wrapper{{11, 12, 12, -11, -12, -12}, scale_type{-1}};
   auto const result   = cudf::round(input, 1, cudf::rounding_method::HALF_UP);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());

From 6cc0474f160d6b3a816537507aea95b563bbbb60 Mon Sep 17 00:00:00 2001
From: sperlingxx <lovedreamf@gmail.com>
Date: Wed, 1 Dec 2021 17:51:59 +0800
Subject: [PATCH 2/5] update

---
 cpp/src/round/round.cu          | 28 ++++------
 cpp/tests/round/round_tests.cpp | 99 +++++++++++++++++++++++----------
 2 files changed, 82 insertions(+), 45 deletions(-)

diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu
index bca657434e9..62721c391e8 100644
--- a/cpp/src/round/round.cu
+++ b/cpp/src/round/round.cu
@@ -205,8 +205,9 @@ struct half_up_fixed_point {
     // Create a container with extra digit for adjustment
     auto const container = e / n;
     auto const down      = container / 10;
-    // Use the remainder of 10 to decide whether to round or not
-    return down + (generic_abs(container % 10) >= 5 ? generic_sign(e) : 0);
+    // Use the remainder of 10 to determine whether to round or not
+    auto remainder_of_10 = generic_abs(container % 10);
+    return down + (remainder_of_10 >= 5 ? generic_sign(e) : 0);
   }
 };
 
@@ -226,23 +227,13 @@ struct half_even_fixed_point {
     // Create a container with extra digit for adjustment
     auto const container = e / n;
     auto const down      = container / 10;
-    auto abs_mod_10      = generic_abs(container % 10);
-    if ((abs_mod_10 > 5) or (abs_mod_10 == 5 and generic_abs(down) % 2 == 1)) {
+    // Use the remainder of 10 to determine whether to round or not
+    auto remainder_of_10 = generic_abs(container % 10);
+    if ((remainder_of_10 > 5) or (remainder_of_10 == 5 and generic_abs(down) % 2 == 1)) {
       return down + generic_sign(e);
     }
     return down;
   }
-
-  //    template <typename U = T, typename std::enable_if_t<cuda::std::is_integral<U>::value>* = nullptr>
-//  __device__ U operator()(U e)
-//  {
-//    auto const down_over_n = e / n;            // use this to determine HALF_EVEN case
-//    auto const down        = down_over_n * n;  // result from rounding down
-//    auto const diff        = generic_abs(e - down);
-//    auto const adjustment =
-//      (diff > n / 2) or (diff == n / 2 && generic_abs(down_over_n) % 2 == 1) ? n : 0;
-//    return down + generic_sign(e) * adjustment;
-//  }
 };
 
 template <typename T,
@@ -298,18 +289,21 @@ std::unique_ptr<column> round_with(column_view const& input,
 
   auto out_view = result->mutable_view();
 
-  constexpr int max_precision = []{
+  constexpr int max_precision = [] {
     if constexpr (std::is_same_v<T, numeric::decimal32>) return 9;
     if constexpr (std::is_same_v<T, numeric::decimal64>) return 18;
     return 38;
   }();
 
   auto const scale_movement = -decimal_places - input.type().scale();
-
+  // If scale_movement is larger than max precision of current type, the pow operation will
+  // overflow. Under this circumstance, we can simply output a zero column because no digits can
+  // survive such a large scale movement.
   if (scale_movement > max_precision) {
     auto zero_scalar = make_fixed_point_scalar<T>(0, scale_type{-decimal_places});
     detail::fill_in_place(out_view, 0, out_view.size(), *zero_scalar, stream);
   } else {
+    // Creates n to truncate the input number, keeping one more digit than the result type
     Type const n = std::pow(10, scale_movement - 1);
     thrust::transform(rmm::exec_policy(stream),
                       input.begin<Type>(),
diff --git a/cpp/tests/round/round_tests.cpp b/cpp/tests/round/round_tests.cpp
index b06620df671..c20aab6a5b8 100644
--- a/cpp/tests/round/round_tests.cpp
+++ b/cpp/tests/round/round_tests.cpp
@@ -51,9 +51,11 @@ TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfUpZero)
   using RepType    = cudf::device_storage_type_t<decimalXX>;
   using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;
 
-  auto const input    = fp_wrapper{{1140, 1150, 1160, 1240, 1250, 1260}, scale_type{-2}};
-  auto const expected = fp_wrapper{{11, 12, 12, 12, 13, 13}, scale_type{0}};
-  auto const result   = cudf::round(input);
+  auto const input = fp_wrapper{
+    {1140, 1150, 1160, 1240, 1250, 1260, -1140, -1150, -1160, -1240, -1250, -1260}, scale_type{-2}};
+  auto const expected =
+    fp_wrapper{{11, 12, 12, 12, 13, 13, -11, -12, -12, -12, -13, -13}, scale_type{0}};
+  auto const result = cudf::round(input);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
 }
@@ -65,7 +67,8 @@ TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfUpZeroNoOp)
   using RepType    = cudf::device_storage_type_t<decimalXX>;
   using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;
 
-  auto const input  = fp_wrapper{{1125, 1150, 1160, 1240, 1250, 1260}, scale_type{0}};
+  auto const input = fp_wrapper{
+    {1125, 1150, 1160, 1240, 1250, 1260, -1125, -1150, -1160, -1240, -1250, -1260}, scale_type{0}};
   auto const result = cudf::round(input);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(input, result->view());
@@ -123,8 +126,8 @@ TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfUp2)
   using RepType    = cudf::device_storage_type_t<decimalXX>;
   using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;
 
-  auto const input    = fp_wrapper{{114, 115, 116}, scale_type{-2}};
-  auto const expected = fp_wrapper{{11, 12, 12}, scale_type{-1}};
+  auto const input    = fp_wrapper{{114, 115, 116, -114, -115, -116}, scale_type{-2}};
+  auto const expected = fp_wrapper{{11, 12, 12, -11, -12, -12}, scale_type{-1}};
   auto const result   = cudf::round(input, 1, cudf::rounding_method::HALF_UP);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
@@ -137,8 +140,8 @@ TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfUp3)
   using RepType    = cudf::device_storage_type_t<decimalXX>;
   using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;
 
-  auto const input    = fp_wrapper{{1, 2, 3}, scale_type{1}};
-  auto const expected = fp_wrapper{{100, 200, 300}, scale_type{-1}};
+  auto const input    = fp_wrapper{{1, 2, 3, -1, -2, -3}, scale_type{1}};
+  auto const expected = fp_wrapper{{100, 200, 300, -100, -200, -300}, scale_type{-1}};
   auto const result   = cudf::round(input, 1, cudf::rounding_method::HALF_UP);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
@@ -151,9 +154,11 @@ TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfEven)
   using RepType    = cudf::device_storage_type_t<decimalXX>;
   using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;
 
-  auto const input    = fp_wrapper{{1140, 1150, 1160, 1240, 1250, 1260}, scale_type{-3}};
-  auto const expected = fp_wrapper{{11, 12, 12, 12, 12, 13}, scale_type{-1}};
-  auto const result   = cudf::round(input, 1, cudf::rounding_method::HALF_EVEN);
+  auto const input = fp_wrapper{
+    {1140, 1150, 1160, 1240, 1250, 1260, -1140, -1150, -1160, -1240, -1250, -1260}, scale_type{-3}};
+  auto const expected =
+    fp_wrapper{{11, 12, 12, 12, 12, 13, -11, -12, -12, -12, -12, -13}, scale_type{-1}};
+  auto const result = cudf::round(input, 1, cudf::rounding_method::HALF_EVEN);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
 }
@@ -165,9 +170,11 @@ TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfEven2)
   using RepType    = cudf::device_storage_type_t<decimalXX>;
   using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;
 
-  auto const input    = fp_wrapper{{114, 115, 116, 124, 125, 126}, scale_type{-2}};
-  auto const expected = fp_wrapper{{11, 12, 12, 12, 12, 13}, scale_type{-1}};
-  auto const result   = cudf::round(input, 1, cudf::rounding_method::HALF_EVEN);
+  auto const input =
+    fp_wrapper{{114, 115, 116, 124, 125, 126, -114, -115, -116, -124, -125, -126}, scale_type{-2}};
+  auto const expected =
+    fp_wrapper{{11, 12, 12, 12, 12, 13, -11, -12, -12, -12, -12, -13}, scale_type{-1}};
+  auto const result = cudf::round(input, 1, cudf::rounding_method::HALF_EVEN);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
 }
@@ -179,8 +186,8 @@ TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfEven3)
   using RepType    = cudf::device_storage_type_t<decimalXX>;
   using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;
 
-  auto const input    = fp_wrapper{{1, 2, 3}, scale_type{1}};
-  auto const expected = fp_wrapper{{100, 200, 300}, scale_type{-1}};
+  auto const input    = fp_wrapper{{1, 2, 3, -1, -2, -3}, scale_type{1}};
+  auto const expected = fp_wrapper{{100, 200, 300, -100, -200, -300}, scale_type{-1}};
   auto const result   = cudf::round(input, 1, cudf::rounding_method::HALF_EVEN);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
@@ -209,8 +216,9 @@ TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestNegHalfUp)
   using RepType    = cudf::device_storage_type_t<decimalXX>;
   using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;
 
-  auto const input    = fp_wrapper{{14, 15, 16, 24, 25, 26}, scale_type{2}};
-  auto const expected = fp_wrapper{{1, 2, 2, 2, 3, 3}, scale_type{3}};
+  auto const input =
+    fp_wrapper{{14, 15, 16, 24, 25, 26, -14, -15, -16, -24, -25, -26}, scale_type{2}};
+  auto const expected = fp_wrapper{{1, 2, 2, 2, 3, 3, -1, -2, -2, -2, -3, -3}, scale_type{3}};
   auto const result   = cudf::round(input, -3, cudf::rounding_method::HALF_UP);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
@@ -223,8 +231,9 @@ TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestNegHalfUp2)
   using RepType    = cudf::device_storage_type_t<decimalXX>;
   using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;
 
-  auto const input    = fp_wrapper{{14, 15, 16, 24, 25, 26}, scale_type{3}};
-  auto const expected = fp_wrapper{{1, 2, 2, 2, 3, 3}, scale_type{4}};
+  auto const input =
+    fp_wrapper{{14, 15, 16, 24, 25, 26, -14, -15, -16, -24, -25, -26}, scale_type{3}};
+  auto const expected = fp_wrapper{{1, 2, 2, 2, 3, 3, -1, -2, -2, -2, -3, -3}, scale_type{4}};
   auto const result   = cudf::round(input, -4, cudf::rounding_method::HALF_UP);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
@@ -237,8 +246,8 @@ TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfNegUp3)
   using RepType    = cudf::device_storage_type_t<decimalXX>;
   using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;
 
-  auto const input    = fp_wrapper{{1, 2, 3}, scale_type{2}};
-  auto const expected = fp_wrapper{{10, 20, 30}, scale_type{1}};
+  auto const input    = fp_wrapper{{1, 2, 3, -1, -2, -3}, scale_type{2}};
+  auto const expected = fp_wrapper{{10, 20, 30, -10, -20, -30}, scale_type{1}};
   auto const result   = cudf::round(input, -1, cudf::rounding_method::HALF_UP);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
@@ -251,8 +260,9 @@ TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestNegHalfEven)
   using RepType    = cudf::device_storage_type_t<decimalXX>;
   using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;
 
-  auto const input    = fp_wrapper{{14, 15, 16, 24, 25, 26}, scale_type{2}};
-  auto const expected = fp_wrapper{{1, 2, 2, 2, 2, 3}, scale_type{3}};
+  auto const input =
+    fp_wrapper{{14, 15, 16, 24, 25, 26, -14, -15, -16, -24, -25, -26}, scale_type{2}};
+  auto const expected = fp_wrapper{{1, 2, 2, 2, 2, 3, -1, -2, -2, -2, -2, -3}, scale_type{3}};
   auto const result   = cudf::round(input, -3, cudf::rounding_method::HALF_EVEN);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
@@ -265,8 +275,9 @@ TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestNegHalfEven2)
   using RepType    = cudf::device_storage_type_t<decimalXX>;
   using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;
 
-  auto const input    = fp_wrapper{{14, 15, 16, 24, 25, 26}, scale_type{3}};
-  auto const expected = fp_wrapper{{1, 2, 2, 2, 2, 3}, scale_type{4}};
+  auto const input =
+    fp_wrapper{{14, 15, 16, 24, 25, 26, -14, -15, -16, -24, -25, -26}, scale_type{3}};
+  auto const expected = fp_wrapper{{1, 2, 2, 2, 2, 3, -1, -2, -2, -2, -2, -3}, scale_type{4}};
   auto const result   = cudf::round(input, -4, cudf::rounding_method::HALF_EVEN);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
@@ -279,8 +290,8 @@ TYPED_TEST(RoundTestsFixedPointTypes, SimpleFixedPointTestHalfNegEven3)
   using RepType    = cudf::device_storage_type_t<decimalXX>;
   using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;
 
-  auto const input    = fp_wrapper{{1, 2, 3}, scale_type{2}};
-  auto const expected = fp_wrapper{{10, 20, 30}, scale_type{1}};
+  auto const input    = fp_wrapper{{1, 2, 3, -1, -2, -3}, scale_type{2}};
+  auto const expected = fp_wrapper{{10, 20, 30, -10, -20, -30}, scale_type{1}};
   auto const result   = cudf::round(input, -1, cudf::rounding_method::HALF_EVEN);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
@@ -300,6 +311,38 @@ TYPED_TEST(RoundTestsFixedPointTypes, TestForBlog)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
 }
 
+TEST_F(RoundTests, TestScaleMovementExceedingMaxPrecision)
+{
+  using namespace numeric;
+  using dec32_wrapper  = cudf::test::fixed_point_column_wrapper<int32_t>;
+  using dec64_wrapper  = cudf::test::fixed_point_column_wrapper<int64_t>;
+  using dec128_wrapper = cudf::test::fixed_point_column_wrapper<__int128_t>;
+
+  // max precision of int32 = 9
+  // scale movement = -(-11) -1 = 10 > 9
+  auto const input_32 =
+    dec32_wrapper{{14, 15, 16, 24, 25, 26, -14, -15, -16, -24, -25, -26}, scale_type{1}};
+  auto const expected_32 = dec32_wrapper{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, scale_type{11}};
+  auto const result_32   = cudf::round(input_32, -11, cudf::rounding_method::HALF_UP);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_32, result_32->view());
+
+  // max precision of int64 = 18
+  // scale movement = -(-20) -1 = 19 > 18
+  auto const input_64 =
+    dec64_wrapper{{14, 15, 16, 24, 25, 26, -14, -15, -16, -24, -25, -26}, scale_type{1}};
+  auto const expected_64 = dec64_wrapper{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, scale_type{20}};
+  auto const result_64   = cudf::round(input_64, -20, cudf::rounding_method::HALF_EVEN);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_64, result_64->view());
+
+  // max precision of int128 = 38
+  // scale movement = -(-40) -1 = 39 > 18
+  auto const input_128 =
+    dec128_wrapper{{14, 15, 16, 24, 25, 26, -14, -15, -16, -24, -25, -26}, scale_type{1}};
+  auto const expected_128 = dec128_wrapper{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, scale_type{40}};
+  auto const result_128   = cudf::round(input_128, -40, cudf::rounding_method::HALF_UP);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_128, result_128->view());
+}
+
 TYPED_TEST(RoundTestsFloatingPointTypes, SimpleFloatingPointTestHalfUp0)
 {
   using fw_wrapper = cudf::test::fixed_width_column_wrapper<TypeParam>;

From 79fee12a98317421db04d4839de937c0254c9ed8 Mon Sep 17 00:00:00 2001
From: sperlingxx <lovedreamf@gmail.com>
Date: Thu, 2 Dec 2021 17:56:43 +0800
Subject: [PATCH 3/5] update

---
 cpp/src/round/round.cu | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu
index 62721c391e8..74d5fcfbc57 100644
--- a/cpp/src/round/round.cu
+++ b/cpp/src/round/round.cu
@@ -206,7 +206,7 @@ struct half_up_fixed_point {
     auto const container = e / n;
     auto const down      = container / 10;
     // Use the remainder of 10 to determine whether to round or not
-    auto remainder_of_10 = generic_abs(container % 10);
+    auto const remainder_of_10 = generic_abs(container) % 10;
     return down + (remainder_of_10 >= 5 ? generic_sign(e) : 0);
   }
 };
@@ -228,8 +228,8 @@ struct half_even_fixed_point {
     auto const container = e / n;
     auto const down      = container / 10;
     // Use the remainder of 10 to determine whether to round or not
-    auto remainder_of_10 = generic_abs(container % 10);
-    if ((remainder_of_10 > 5) or (remainder_of_10 == 5 and generic_abs(down) % 2 == 1)) {
+    auto const remainder_of_10 = generic_abs(container) % 10;
+    if (remainder_of_10 > 5 || (remainder_of_10 == 5 && generic_abs(down) % 2)) {
       return down + generic_sign(e);
     }
     return down;
@@ -289,11 +289,7 @@ std::unique_ptr<column> round_with(column_view const& input,
 
   auto out_view = result->mutable_view();
 
-  constexpr int max_precision = [] {
-    if constexpr (std::is_same_v<T, numeric::decimal32>) return 9;
-    if constexpr (std::is_same_v<T, numeric::decimal64>) return 18;
-    return 38;
-  }();
+  constexpr int max_precision = cuda::std::numeric_limits<Type>::digits10;
 
   auto const scale_movement = -decimal_places - input.type().scale();
   // If scale_movement is larger than max precision of current type, the pow operation will

From fd098fe48c6634a4ee741a70c3263ff5552d5bc8 Mon Sep 17 00:00:00 2001
From: sperlingxx <lovedreamf@gmail.com>
Date: Thu, 2 Dec 2021 18:26:26 +0800
Subject: [PATCH 4/5] revert

---
 cpp/src/round/round.cu | 42 +++---------------------------------------
 1 file changed, 3 insertions(+), 39 deletions(-)

diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu
index 74d5fcfbc57..771ee84f1cd 100644
--- a/cpp/src/round/round.cu
+++ b/cpp/src/round/round.cu
@@ -192,48 +192,13 @@ struct half_even_negative {
 template <typename T>
 struct half_up_fixed_point {
   T n;
-  template <typename U = T, typename std::enable_if_t<cudf::is_floating_point<U>()>* = nullptr>
-  __device__ U operator()(U e)
-  {
-    assert(false);  // Should never get here. Just for compilation
-    return U{};
-  }
-
-  template <typename U = T, typename std::enable_if_t<cuda::std::is_integral<U>::value>* = nullptr>
-  __device__ U operator()(U e)
-  {
-    // Create a container with extra digit for adjustment
-    auto const container = e / n;
-    auto const down      = container / 10;
-    // Use the remainder of 10 to determine whether to round or not
-    auto const remainder_of_10 = generic_abs(container) % 10;
-    return down + (remainder_of_10 >= 5 ? generic_sign(e) : 0);
-  }
+  __device__ T operator()(T e) { return half_up_negative<T>{n}(e) / n; }
 };
 
 template <typename T>
 struct half_even_fixed_point {
   T n;
-  template <typename U = T, typename std::enable_if_t<cudf::is_floating_point<U>()>* = nullptr>
-  __device__ U operator()(U e)
-  {
-    assert(false);  // Should never get here. Just for compilation
-    return U{};
-  }
-
-  template <typename U = T, typename std::enable_if_t<cuda::std::is_integral<U>::value>* = nullptr>
-  __device__ T operator()(T e)
-  {
-    // Create a container with extra digit for adjustment
-    auto const container = e / n;
-    auto const down      = container / 10;
-    // Use the remainder of 10 to determine whether to round or not
-    auto const remainder_of_10 = generic_abs(container) % 10;
-    if (remainder_of_10 > 5 || (remainder_of_10 == 5 && generic_abs(down) % 2)) {
-      return down + generic_sign(e);
-    }
-    return down;
-  }
+  __device__ T operator()(T e) { return half_even_negative<T>{n}(e) / n; }
 };
 
 template <typename T,
@@ -299,8 +264,7 @@ std::unique_ptr<column> round_with(column_view const& input,
     auto zero_scalar = make_fixed_point_scalar<T>(0, scale_type{-decimal_places});
     detail::fill_in_place(out_view, 0, out_view.size(), *zero_scalar, stream);
   } else {
-    // Creates n to truncate the input number, keeping one more digit than the result type
-    Type const n = std::pow(10, scale_movement - 1);
+    Type const n = std::pow(10, scale_movement);
     thrust::transform(rmm::exec_policy(stream),
                       input.begin<Type>(),
                       input.end<Type>(),

From ac9e0d9f78e48fe6e5de373a7978d4dfb1dffb0b Mon Sep 17 00:00:00 2001
From: sperlingxx <lovedreamf@gmail.com>
Date: Thu, 23 Dec 2021 15:30:27 +0800
Subject: [PATCH 5/5] update

---
 cpp/src/round/round.cu | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu
index 771ee84f1cd..e834c53cfb2 100644
--- a/cpp/src/round/round.cu
+++ b/cpp/src/round/round.cu
@@ -23,7 +23,6 @@
 #include <cudf/fixed_point/fixed_point.hpp>
 #include <cudf/fixed_point/temporary.hpp>
 #include <cudf/round.hpp>
-#include <cudf/scalar/scalar.hpp>
 #include <cudf/scalar/scalar_factories.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/error.hpp>
@@ -254,13 +253,11 @@ std::unique_ptr<column> round_with(column_view const& input,
 
   auto out_view = result->mutable_view();
 
-  constexpr int max_precision = cuda::std::numeric_limits<Type>::digits10;
-
   auto const scale_movement = -decimal_places - input.type().scale();
   // If scale_movement is larger than max precision of current type, the pow operation will
   // overflow. Under this circumstance, we can simply output a zero column because no digits can
   // survive such a large scale movement.
-  if (scale_movement > max_precision) {
+  if (scale_movement > cuda::std::numeric_limits<Type>::digits10) {
     auto zero_scalar = make_fixed_point_scalar<T>(0, scale_type{-decimal_places});
     detail::fill_in_place(out_view, 0, out_view.size(), *zero_scalar, stream);
   } else {