From 4ee73655e1ab28c0b4be6a613cdce935b4688ca4 Mon Sep 17 00:00:00 2001
From: sperlingxx <lovedreamf@gmail.com>
Date: Thu, 17 Feb 2022 11:58:33 +0800
Subject: [PATCH 1/4] fixes up the overflowed fixed-point round on nullable
 column

Signed-off-by: sperlingxx <lovedreamf@gmail.com>
---
 cpp/src/round/round.cu          | 20 ++++++++++++--
 cpp/tests/round/round_tests.cpp | 49 ++++++++++++++++++---------------
 2 files changed, 45 insertions(+), 24 deletions(-)
diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu
index 3f9b7ebe0d3..1873c0a6614 100644
--- a/cpp/src/round/round.cu
+++ b/cpp/src/round/round.cu
@@ -16,6 +16,8 @@
 
 #include <cudf/column/column_factories.hpp>
 #include <cudf/copying.hpp>
+#include <cudf/detail/copy_range.cuh>
+#include <cudf/detail/iterator.cuh>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/round.hpp>
@@ -259,8 +261,22 @@ std::unique_ptr<column> round_with(column_view const& input,
   // overflow. Under this circumstance, we can simply output a zero column because no digits can
   // survive such a large scale movement.
   if (scale_movement > cuda::std::numeric_limits<Type>::digits10) {
-    auto zero_scalar = make_fixed_point_scalar<T>(0, scale_type{-decimal_places});
-    detail::fill_in_place(out_view, 0, out_view.size(), *zero_scalar, stream);
+    if (input.nullable()) {
+      auto device_view = column_device_view::create(out_view, stream);
+      detail::copy_range(thrust::make_constant_iterator(static_cast<Type>(0)),
+                         detail::make_validity_iterator(*device_view),
+                         out_view,
+                         out_view.offset(),
+                         out_view.offset() + out_view.size(),
+                         stream);
+    } else {
+      detail::copy_range(thrust::make_constant_iterator(static_cast<Type>(0)),
+                         thrust::make_constant_iterator(false),
+                         out_view,
+                         out_view.offset(),
+                         out_view.offset() + out_view.size(),
+                         stream);
+    }
   } else {
     Type const n = std::pow(10, scale_movement);
     thrust::transform(rmm::exec_policy(stream),
diff --git a/cpp/tests/round/round_tests.cpp b/cpp/tests/round/round_tests.cpp
index c20aab6a5b8..d612bf8db77 100644
--- a/cpp/tests/round/round_tests.cpp
+++ b/cpp/tests/round/round_tests.cpp
@@ -311,36 +311,41 @@ TYPED_TEST(RoundTestsFixedPointTypes, TestForBlog)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
 }
 
-TEST_F(RoundTests, TestScaleMovementExceedingMaxPrecision)
+TYPED_TEST(RoundTestsFixedPointTypes, TestScaleMovementExceedingMaxPrecision)
 {
   using namespace numeric;
-  using dec32_wrapper  = cudf::test::fixed_point_column_wrapper<int32_t>;
-  using dec64_wrapper  = cudf::test::fixed_point_column_wrapper<int64_t>;
-  using dec128_wrapper = cudf::test::fixed_point_column_wrapper<__int128_t>;
+  using decimalXX  = TypeParam;
+  using RepType    = cudf::device_storage_type_t<decimalXX>;
+  using fp_wrapper = cudf::test::fixed_point_column_wrapper<RepType>;
 
   // max precision of int32 = 9
   // scale movement = -(-11) -1 = 10 > 9
-  auto const input_32 =
-    dec32_wrapper{{14, 15, 16, 24, 25, 26, -14, -15, -16, -24, -25, -26}, scale_type{1}};
-  auto const expected_32 = dec32_wrapper{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, scale_type{11}};
-  auto const result_32   = cudf::round(input_32, -11, cudf::rounding_method::HALF_UP);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_32, result_32->view());
-
   // max precision of int64 = 18
   // scale movement = -(-20) -1 = 19 > 18
-  auto const input_64 =
-    dec64_wrapper{{14, 15, 16, 24, 25, 26, -14, -15, -16, -24, -25, -26}, scale_type{1}};
-  auto const expected_64 = dec64_wrapper{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, scale_type{20}};
-  auto const result_64   = cudf::round(input_64, -20, cudf::rounding_method::HALF_EVEN);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_64, result_64->view());
-
   // max precision of int128 = 38
-  // scale movement = -(-40) -1 = 39 > 18
-  auto const input_128 =
-    dec128_wrapper{{14, 15, 16, 24, 25, 26, -14, -15, -16, -24, -25, -26}, scale_type{1}};
-  auto const expected_128 = dec128_wrapper{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, scale_type{40}};
-  auto const result_128   = cudf::round(input_128, -40, cudf::rounding_method::HALF_UP);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_128, result_128->view());
+  // scale movement = -(-40) -1 = 39 > 38
+  auto const target_scale = cuda::std::numeric_limits<RepType>::digits10 + 1 + 1;
+
+  auto const input =
+    fp_wrapper{{14, 15, 16, 24, 25, 26, -14, -15, -16, -24, -25, -26}, scale_type{1}};
+  auto const expected = fp_wrapper{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, scale_type{target_scale}};
+  auto const result   = cudf::round(input, -target_scale, cudf::rounding_method::HALF_UP);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
+
+  auto const input_even =
+    fp_wrapper{{14, 15, 16, 24, 25, 26, -14, -15, -16, -24, -25, -26}, scale_type{1}};
+  auto const expected_even =
+    fp_wrapper{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, scale_type{target_scale}};
+  auto const result_even = cudf::round(input, -target_scale, cudf::rounding_method::HALF_EVEN);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_even, result_even->view());
+
+  const std::initializer_list<bool> validity = {1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0};
+  auto const input_null =
+    fp_wrapper{{14, 15, 16, 24, 25, 26, -14, -15, -16, -24, -25, -26}, validity, scale_type{1}};
+  auto const expected_null =
+    fp_wrapper{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, validity, scale_type{target_scale}};
+  auto const result_null = cudf::round(input_null, -target_scale, cudf::rounding_method::HALF_UP);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_null, result_null->view());
 }
 
 TYPED_TEST(RoundTestsFloatingPointTypes, SimpleFloatingPointTestHalfUp0)

From 27c5f5002d58e3f00d073ec39122452fcf9e1f6d Mon Sep 17 00:00:00 2001
From: sperlingxx <lovedreamf@gmail.com>
Date: Thu, 17 Feb 2022 12:25:40 +0800
Subject: [PATCH 2/4] update licence

---
 cpp/src/round/round.cu          | 2 +-
 cpp/tests/round/round_tests.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu
index 1873c0a6614..4d532147fc8 100644
--- a/cpp/src/round/round.cu
+++ b/cpp/src/round/round.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
diff --git a/cpp/tests/round/round_tests.cpp b/cpp/tests/round/round_tests.cpp
index d612bf8db77..3b211dc1453 100644
--- a/cpp/tests/round/round_tests.cpp
+++ b/cpp/tests/round/round_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.

From 306cca3cf91f3008d3e0d6007b0726cfa409f2d2 Mon Sep 17 00:00:00 2001
From: sperlingxx <lovedreamf@gmail.com>
Date: Mon, 21 Feb 2022 15:59:46 +0800
Subject: [PATCH 3/4] update

---
 cpp/src/round/round.cu | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu
index 4d532147fc8..7c0bb3a6b2a 100644
--- a/cpp/src/round/round.cu
+++ b/cpp/src/round/round.cu
@@ -261,22 +261,10 @@ std::unique_ptr<column> round_with(column_view const& input,
   // overflow. Under this circumstance, we can simply output a zero column because no digits can
   // survive such a large scale movement.
   if (scale_movement > cuda::std::numeric_limits<Type>::digits10) {
-    if (input.nullable()) {
-      auto device_view = column_device_view::create(out_view, stream);
-      detail::copy_range(thrust::make_constant_iterator(static_cast<Type>(0)),
-                         detail::make_validity_iterator(*device_view),
-                         out_view,
-                         out_view.offset(),
-                         out_view.offset() + out_view.size(),
-                         stream);
-    } else {
-      detail::copy_range(thrust::make_constant_iterator(static_cast<Type>(0)),
-                         thrust::make_constant_iterator(false),
-                         out_view,
-                         out_view.offset(),
-                         out_view.offset() + out_view.size(),
-                         stream);
-    }
+    thrust::fill(rmm::exec_policy(stream),
+                 out_view.template begin<Type>(),
+                 out_view.template end<Type>(),
+                 static_cast<Type>(0));
   } else {
     Type const n = std::pow(10, scale_movement);
     thrust::transform(rmm::exec_policy(stream),

From 40a0763378aafcdcfddcd1a9803a746c4509a939 Mon Sep 17 00:00:00 2001
From: sperlingxx <lovedreamf@gmail.com>
Date: Mon, 21 Feb 2022 16:09:58 +0800
Subject: [PATCH 4/4] update

---
 cpp/src/round/round.cu | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu
index 7c0bb3a6b2a..5fc75bdfa1b 100644
--- a/cpp/src/round/round.cu
+++ b/cpp/src/round/round.cu
@@ -261,10 +261,10 @@ std::unique_ptr<column> round_with(column_view const& input,
   // overflow. Under this circumstance, we can simply output a zero column because no digits can
   // survive such a large scale movement.
   if (scale_movement > cuda::std::numeric_limits<Type>::digits10) {
-    thrust::fill(rmm::exec_policy(stream),
-                 out_view.template begin<Type>(),
-                 out_view.template end<Type>(),
-                 static_cast<Type>(0));
+    thrust::uninitialized_fill(rmm::exec_policy(stream),
+                               out_view.template begin<Type>(),
+                               out_view.template end<Type>(),
+                               static_cast<Type>(0));
   } else {
     Type const n = std::pow(10, scale_movement);
     thrust::transform(rmm::exec_policy(stream),