From c3497230116f2bc877e60e18ae4cfa1cf18ae5b5 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Mon, 23 Aug 2021 15:56:17 -0400 Subject: [PATCH 1/2] Fix is_fixed_point checking of overflow for decimal32 --- .../strings/detail/convert/fixed_point.cuh | 26 +++++++++++------ .../strings/convert/convert_fixed_point.cu | 12 ++++---- cpp/tests/strings/fixed_point_tests.cpp | 28 ++++++++++++------- 3 files changed, 41 insertions(+), 25 deletions(-) diff --git a/cpp/include/cudf/strings/detail/convert/fixed_point.cuh b/cpp/include/cudf/strings/detail/convert/fixed_point.cuh index 53774ed948d..49d0fa8bf3e 100644 --- a/cpp/include/cudf/strings/detail/convert/fixed_point.cuh +++ b/cpp/include/cudf/strings/detail/convert/fixed_point.cuh @@ -17,6 +17,8 @@ #include #include +#include + namespace cudf { namespace strings { namespace detail { @@ -27,17 +29,22 @@ namespace detail { * This is reads everything up to the exponent 'e' notation. * The return includes the integer digits and any exponent offset. * + * @tparam UnsignedDecimalType The unsigned version of the desired decimal type. + * Use the `std::make_unsigned_t` to create the + * unsigned type from the storage type. + * * @param[in,out] iter Start of characters to parse * @param[in] end End of characters to parse * @return Integer component and exponent offset. */ -__device__ inline thrust::pair parse_integer(char const*& iter, - char const* iter_end, - const char decimal_pt_char = '.') +template +__device__ inline thrust::pair parse_integer( + char const*& iter, char const* iter_end, const char decimal_pt_char = '.') { // highest value where another decimal digit cannot be appended without an overflow; - // this preserves the most digits when scaling the final result - constexpr uint64_t decimal_max = (std::numeric_limits::max() - 9L) / 10L; + // this preserves the most digits when scaling the final result for this type + constexpr UnsignedDecimalType decimal_max = + (std::numeric_limits::max() - 9L) / 10L; uint64_t value = 0; // for checking overflow int32_t exp_offset = 0; @@ -56,7 +63,7 @@ __device__ inline thrust::pair parse_integer(char const*& ite if (value > decimal_max) { exp_offset += static_cast(!decimal_found); } else { - value = (value * 10) + static_cast(ch - '0'); + value = (value * 10) + static_cast(ch - '0'); exp_offset -= static_cast(decimal_found); } } @@ -130,7 +137,8 @@ __device__ DecimalType parse_decimal(char const* iter, char const* iter_end, int // if string begins with a sign, continue with next character if (sign != 0) ++iter; - auto [value, exp_offset] = parse_integer(iter, iter_end); + using UnsignedDecimalType = std::make_unsigned_t; + auto [value, exp_offset] = parse_integer(iter, iter_end); if (value == 0) { return DecimalType{0}; } // check for exponent @@ -143,9 +151,9 @@ __device__ DecimalType parse_decimal(char const* iter, char const* iter_end, int // shift the output value based on the exp_ten and the scale values if (exp_ten < scale) { - value = value / static_cast(exp10(static_cast(scale - exp_ten))); + value = value / static_cast(exp10(static_cast(scale - exp_ten))); } else { - value = value * static_cast(exp10(static_cast(exp_ten - scale))); + value = value * static_cast(exp10(static_cast(exp_ten - scale))); } return static_cast(value) * (sign == 0 ? 1 : sign); diff --git a/cpp/src/strings/convert/convert_fixed_point.cu b/cpp/src/strings/convert/convert_fixed_point.cu index 2f57b38249f..6f7076422c4 100644 --- a/cpp/src/strings/convert/convert_fixed_point.cu +++ b/cpp/src/strings/convert/convert_fixed_point.cu @@ -97,7 +97,8 @@ struct string_to_decimal_check_fn { auto const iter_end = d_str.data() + d_str.size_bytes(); - auto [value, exp_offset] = parse_integer(iter, iter_end); + using UnsignedDecimalType = std::make_unsigned_t; + auto [value, exp_offset] = parse_integer(iter, iter_end); // only exponent notation is expected here if ((iter < iter_end) && (*iter != 'e' && *iter != 'E')) { return false; } @@ -112,11 +113,10 @@ struct string_to_decimal_check_fn { exp_ten += exp_offset; // finally, check for overflow based on the exp_ten and scale values - return (exp_ten < scale) - ? true - : value <= static_cast( - std::numeric_limits::max() / - static_cast(exp10(static_cast(exp_ten - scale)))); + return (exp_ten < scale) or + value <= static_cast( + std::numeric_limits::max() / + static_cast(exp10(static_cast(exp_ten - scale)))); } }; diff --git a/cpp/tests/strings/fixed_point_tests.cpp b/cpp/tests/strings/fixed_point_tests.cpp index d8b570cee8b..820bf5ec216 100644 --- a/cpp/tests/strings/fixed_point_tests.cpp +++ b/cpp/tests/strings/fixed_point_tests.cpp @@ -189,31 +189,39 @@ TEST_F(StringsConvertTest, IsFixedPoint) "9223372036854775807", "-9223372036854775807", "9223372036854775808", + "9223372036854775808000", "100E2147483648", }); - results = cudf::strings::is_fixed_point(cudf::strings_column_view(big_numbers), + results = cudf::strings::is_fixed_point(cudf::strings_column_view(big_numbers), cudf::data_type{cudf::type_id::DECIMAL32}); - auto const expected32 = - cudf::test::fixed_width_column_wrapper({true, true, false, false, false, false, false}); + auto const expected32 = cudf::test::fixed_width_column_wrapper( + {true, true, false, false, false, false, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected32); - results = cudf::strings::is_fixed_point(cudf::strings_column_view(big_numbers), + results = cudf::strings::is_fixed_point(cudf::strings_column_view(big_numbers), cudf::data_type{cudf::type_id::DECIMAL64}); - auto const expected64 = - cudf::test::fixed_width_column_wrapper({true, true, true, true, true, false, false}); + auto const expected64 = cudf::test::fixed_width_column_wrapper( + {true, true, true, true, true, false, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected64); results = cudf::strings::is_fixed_point( cudf::strings_column_view(big_numbers), cudf::data_type{cudf::type_id::DECIMAL32, numeric::scale_type{10}}); - auto const expected32_scaled = - cudf::test::fixed_width_column_wrapper({true, true, true, true, true, true, false}); + auto const expected32_scaled = cudf::test::fixed_width_column_wrapper( + {true, true, true, true, true, true, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected32_scaled); + results = cudf::strings::is_fixed_point( + cudf::strings_column_view(big_numbers), + cudf::data_type{cudf::type_id::DECIMAL64, numeric::scale_type{10}}); + auto const expected64_scaled_positive = + cudf::test::fixed_width_column_wrapper({true, true, true, true, true, true, true, false}); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected64_scaled_positive); + results = cudf::strings::is_fixed_point( cudf::strings_column_view(big_numbers), cudf::data_type{cudf::type_id::DECIMAL64, numeric::scale_type{-5}}); - auto const expected64_scaled = - cudf::test::fixed_width_column_wrapper({true, true, true, false, false, false, false}); + auto const expected64_scaled = cudf::test::fixed_width_column_wrapper( + {true, true, true, false, false, false, false, false}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected64_scaled); } From 1a35ab45ac1d5670aa875f9e643dfc5b0017666c Mon Sep 17 00:00:00 2001 From: davidwendt Date: Tue, 24 Aug 2021 07:34:09 -0400 Subject: [PATCH 2/2] remove extra is from doxygen comment wording --- cpp/include/cudf/strings/detail/convert/fixed_point.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/strings/detail/convert/fixed_point.cuh b/cpp/include/cudf/strings/detail/convert/fixed_point.cuh index 49d0fa8bf3e..56205c161b1 100644 --- a/cpp/include/cudf/strings/detail/convert/fixed_point.cuh +++ b/cpp/include/cudf/strings/detail/convert/fixed_point.cuh @@ -26,7 +26,7 @@ namespace detail { /** * @brief Return the integer component of a decimal string. * - * This is reads everything up to the exponent 'e' notation. + * This reads everything up to the exponent 'e' notation. * The return includes the integer digits and any exponent offset. * * @tparam UnsignedDecimalType The unsigned version of the desired decimal type.