rapidsai · rapids-bot · Aug 25, 2021 · Aug 23, 2021 · Aug 24, 2021
@@ -17,6 +17,8 @@
 #include <thrust/optional.h>
 #include <thrust/pair.h>
 
+#include <type_traits>
+
 namespace cudf {
 namespace strings {
 namespace detail {
@@ -27,17 +29,22 @@ namespace detail {
  * This is reads everything up to the exponent 'e' notation.
  * The return includes the integer digits and any exponent offset.
  *
+ * @tparam UnsignedDecimalType The unsigned version of the desired decimal type.
+ *                             Use the `std::make_unsigned_t` to create the
+ *                             unsigned type from the storage type.
+ *
  * @param[in,out] iter Start of characters to parse
  * @param[in] end End of characters to parse
  * @return Integer component and exponent offset.
  */
-__device__ inline thrust::pair<uint64_t, int32_t> parse_integer(char const*& iter,
-                                                                char const* iter_end,
-                                                                const char decimal_pt_char = '.')
+template <typename UnsignedDecimalType>
+__device__ inline thrust::pair<UnsignedDecimalType, int32_t> parse_integer(
+  char const*& iter, char const* iter_end, const char decimal_pt_char = '.')
 {
   // highest value where another decimal digit cannot be appended without an overflow;
-  // this preserves the most digits when scaling the final result
-  constexpr uint64_t decimal_max = (std::numeric_limits<uint64_t>::max() - 9L) / 10L;
+  // this preserves the most digits when scaling the final result for this type
+  constexpr UnsignedDecimalType decimal_max =
+    (std::numeric_limits<UnsignedDecimalType>::max() - 9L) / 10L;
 
   uint64_t value     = 0;  // for checking overflow
   int32_t exp_offset = 0;
@@ -56,7 +63,7 @@ __device__ inline thrust::pair<uint64_t, int32_t> parse_integer(char const*& ite
     if (value > decimal_max) {
       exp_offset += static_cast<int32_t>(!decimal_found);
     } else {
-      value = (value * 10) + static_cast<uint64_t>(ch - '0');
+      value = (value * 10) + static_cast<UnsignedDecimalType>(ch - '0');
       exp_offset -= static_cast<int32_t>(decimal_found);
     }
   }
@@ -130,7 +137,8 @@ __device__ DecimalType parse_decimal(char const* iter, char const* iter_end, int
   // if string begins with a sign, continue with next character
   if (sign != 0) ++iter;
 
-  auto [value, exp_offset] = parse_integer(iter, iter_end);
+  using UnsignedDecimalType = std::make_unsigned_t<DecimalType>;
+  auto [value, exp_offset]  = parse_integer<UnsignedDecimalType>(iter, iter_end);
   if (value == 0) { return DecimalType{0}; }
 
   // check for exponent
@@ -143,9 +151,9 @@ __device__ DecimalType parse_decimal(char const* iter, char const* iter_end, int
 
   // shift the output value based on the exp_ten and the scale values
   if (exp_ten < scale) {
-    value = value / static_cast<uint64_t>(exp10(static_cast<double>(scale - exp_ten)));
+    value = value / static_cast<UnsignedDecimalType>(exp10(static_cast<double>(scale - exp_ten)));
   } else {
-    value = value * static_cast<uint64_t>(exp10(static_cast<double>(exp_ten - scale)));
+    value = value * static_cast<UnsignedDecimalType>(exp10(static_cast<double>(exp_ten - scale)));
   }
 
   return static_cast<DecimalType>(value) * (sign == 0 ? 1 : sign);

@@ -97,7 +97,8 @@ struct string_to_decimal_check_fn {
 
     auto const iter_end = d_str.data() + d_str.size_bytes();
 
-    auto [value, exp_offset] = parse_integer(iter, iter_end);
+    using UnsignedDecimalType = std::make_unsigned_t<DecimalType>;
+    auto [value, exp_offset]  = parse_integer<UnsignedDecimalType>(iter, iter_end);
 
     // only exponent notation is expected here
     if ((iter < iter_end) && (*iter != 'e' && *iter != 'E')) { return false; }
@@ -112,11 +113,10 @@ struct string_to_decimal_check_fn {
     exp_ten += exp_offset;
 
     // finally, check for overflow based on the exp_ten and scale values
-    return (exp_ten < scale)
-             ? true
-             : value <= static_cast<uint64_t>(
-                          std::numeric_limits<DecimalType>::max() /
-                          static_cast<DecimalType>(exp10(static_cast<double>(exp_ten - scale))));
+    return (exp_ten < scale) or
+           value <= static_cast<UnsignedDecimalType>(
+                      std::numeric_limits<DecimalType>::max() /
+                      static_cast<DecimalType>(exp10(static_cast<double>(exp_ten - scale))));
   }
 };
 

@@ -189,31 +189,39 @@ TEST_F(StringsConvertTest, IsFixedPoint)
     "9223372036854775807",
     "-9223372036854775807",
     "9223372036854775808",
+    "9223372036854775808000",
     "100E2147483648",
   });
-  results = cudf::strings::is_fixed_point(cudf::strings_column_view(big_numbers),
+  results               = cudf::strings::is_fixed_point(cudf::strings_column_view(big_numbers),
                                           cudf::data_type{cudf::type_id::DECIMAL32});
-  auto const expected32 =
-    cudf::test::fixed_width_column_wrapper<bool>({true, true, false, false, false, false, false});
+  auto const expected32 = cudf::test::fixed_width_column_wrapper<bool>(
+    {true, true, false, false, false, false, false, false});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected32);
 
-  results = cudf::strings::is_fixed_point(cudf::strings_column_view(big_numbers),
+  results               = cudf::strings::is_fixed_point(cudf::strings_column_view(big_numbers),
                                           cudf::data_type{cudf::type_id::DECIMAL64});
-  auto const expected64 =
-    cudf::test::fixed_width_column_wrapper<bool>({true, true, true, true, true, false, false});
+  auto const expected64 = cudf::test::fixed_width_column_wrapper<bool>(
+    {true, true, true, true, true, false, false, false});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected64);
 
   results = cudf::strings::is_fixed_point(
     cudf::strings_column_view(big_numbers),
     cudf::data_type{cudf::type_id::DECIMAL32, numeric::scale_type{10}});
-  auto const expected32_scaled =
-    cudf::test::fixed_width_column_wrapper<bool>({true, true, true, true, true, true, false});
+  auto const expected32_scaled = cudf::test::fixed_width_column_wrapper<bool>(
+    {true, true, true, true, true, true, false, false});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected32_scaled);
 
+  results = cudf::strings::is_fixed_point(
+    cudf::strings_column_view(big_numbers),
+    cudf::data_type{cudf::type_id::DECIMAL64, numeric::scale_type{10}});
+  auto const expected64_scaled_positive =
+    cudf::test::fixed_width_column_wrapper<bool>({true, true, true, true, true, true, true, false});
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected64_scaled_positive);
+
   results = cudf::strings::is_fixed_point(
     cudf::strings_column_view(big_numbers),
     cudf::data_type{cudf::type_id::DECIMAL64, numeric::scale_type{-5}});
-  auto const expected64_scaled =
-    cudf::test::fixed_width_column_wrapper<bool>({true, true, true, false, false, false, false});
+  auto const expected64_scaled = cudf::test::fixed_width_column_wrapper<bool>(
+    {true, true, true, false, false, false, false, false});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected64_scaled);
 }