From c14b199d13fab316841900459dac7353eaabcce9 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Mon, 15 Jul 2024 20:05:54 -0700 Subject: [PATCH] Use new conversion code --- src/main/cpp/src/decimal_utils.cu | 121 ++++++++++++++++++++++++++++-- 1 file changed, 113 insertions(+), 8 deletions(-) diff --git a/src/main/cpp/src/decimal_utils.cu b/src/main/cpp/src/decimal_utils.cu index 504d58233b..a95cdac480 100644 --- a/src/main/cpp/src/decimal_utils.cu +++ b/src/main/cpp/src/decimal_utils.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -28,6 +29,8 @@ #include #include +#include + #include #include @@ -1202,11 +1205,110 @@ FloatingType __device__ fix_before_round(FloatingType floating, int const exp10) return std::nextafter(floating, direction); } -template +using namespace numeric; +using namespace numeric::detail; + +/** + * @brief Perform floating-point -> integer decimal conversion, matching Spark + * + * @tparam Rep The type of integer we are converting to, to store the decimal value + * @tparam FloatingType The type of floating-point object we are converting from + * @param floating The floating point value to convert + * @param scale The desired base-10 scale factor: decimal value = returned value * 10^scale + * @return Integer representation of the floating-point value, given the desired scale + */ +template )> +CUDF_HOST_DEVICE Rep convert_floating_to_integral_SPARK_RAPIDS(FloatingType floating, + scale_type const& scale) +{ + // The rounding and precision decisions made here are chosen to match Apache Spark. + // Spark wants to perform the conversion as double to have the most precision. + // However, the behavior is still slightly different if the original type was float. + + // Extract components of the (double-ized) floating point number + using converter = floating_converter; + auto const integer_rep = converter::bit_cast_to_integer(double(floating)); + if (converter::is_zero(integer_rep)) { return 0; } + + // Note that the significand here is an unsigned integer with sizeof(double) + auto const is_negative = converter::get_is_negative(integer_rep); + auto const [significand, floating_pow2] = converter::get_significand_and_pow2(integer_rep); + + // Spark often wants to round the last decimal place, so we'll perform the conversion + // with one lower power of 10 so that we can (optionally) round at the end. + auto const pow10 = static_cast(scale); + auto const shifting_pow10 = pow10 - 1; + + // Sometimes add half a bit to correct for compiler rounding text to nearest floating-point value. + // See comments in add_half_if_truncates(), with differences detailed below. + // Even if we don't add the bit, shift bits to line up with what the shifting algorithm is + // expecting. + bool const is_whole_number = (cuda::std::floor(floating) == floating); + auto const [base2_value, pow2] = [is_whole_number](auto significand, auto floating_pow2) { + if constexpr (cuda::std::is_same_v) { + // Add the 1/2 bit regardless of truncation, but still not for whole numbers + auto const base2_value = + (significand << 1) + static_cast(!is_whole_number); + return std::make_pair(base2_value, floating_pow2 - 1); + } else { + // Input was float: never add 1/2 bit. + // Why? Because we converted to double, and the 1/2 bit beyond float is WAY too large compared + // to double's precision. And the 1/2 bit beyond double is not due to user input. + return std::make_pair(significand << 1, floating_pow2 - 1); + } + }(significand, floating_pow2); + + // Main algorithm: Apply the powers of 2 and 10 (except for the last power-of-10) + auto magnitude = + convert_floating_to_integral_shifting(base2_value, shifting_pow10, pow2); + + // Spark wants to floor the last digits of the output, clearing data that was beyond the + // precision that was available in double. + // How many digits do we need to floor? + // From the decimal digit corresponding to pow2 (just past double precision) to the end (pow10). + // The conversion from pow2 to pow10 is log10(2), which is ~ 90/299 (close enough for ints) + int const floor_pow10 = (90 * pow2) / 299 - pow10; + if (floor_pow10 < 0) { + // Truncated: The scale factor cut off the extra, imprecise bits. + // To round to the final decimal place, add 5 to one past the last decimal place + magnitude += 5U; + magnitude /= 10U; // Apply the last power of 10 + } else { + // We are keeping decimal digits with data beyond the precision of double + // We want to truncate these digits, but sometimes we want to round first + // We will round if and only if we didn't already add a half-bit earlier + if constexpr (cuda::std::is_same_v) { + // For doubles, only round the extra digits of whole numbers + // If it was not a whole number, we already added 1/2 a bit at higher precision than this + // earlier. + if (is_whole_number) { + magnitude += multiply_power10(decltype(magnitude)(5), floor_pow10); + } + } else { + // Input was float: we didn't add a half-bit earlier, so round at the edge of precision here. + magnitude += multiply_power10(decltype(magnitude)(5), floor_pow10); + } + + // +1: Divide the last power-of-10 that we postponed earlier to do rounding. + auto const truncated = divide_power10(magnitude, floor_pow10 + 1); + magnitude = multiply_power10(truncated, floor_pow10); + } + + // Reapply the sign and return + // NOTE: Cast can overflow! + auto const signed_magnitude = static_cast(magnitude); + return is_negative ? -signed_magnitude : signed_magnitude; +} + +template FloatingType __device__ scaled_round(FloatingType floating, int const exp10) { - auto const scale_factor = std::pow(10, exp10); - return std::round(scale_factor * fix_before_round(floating, exp10)); + // auto const scale_factor = std::pow(10, exp10); + // return std::round(scale_factor * fix_before_round(floating, exp10)); + return static_cast(convert_floating_to_integral_SPARK_RAPIDS( + fix_before_round(floating, exp10), numeric::scale_type{-exp10})); } struct float_to_decimal_fn { @@ -1219,10 +1321,10 @@ struct float_to_decimal_fn { int32_t precision, rmm::cuda_stream_view stream) const { - if constexpr ((std::is_same_v || std::is_same_v)&&( - std::is_same_v || - std::is_same_v || - std::is_same_v)) { + if constexpr ((std::is_same_v || std::is_same_v) && + (std::is_same_v || + std::is_same_v || + std::is_same_v)) { using DecimalRepType = cudf::device_storage_type_t; // Exclusive bound @@ -1261,7 +1363,10 @@ struct float_to_decimal_fn { // scaled_rounded, // scale * std::nextafter(static_cast(x), direction)); #endif - auto const scaled_rounded = scaled_round(static_cast(x), decimal_places); + // auto const scaled_rounded = + // scaled_round(static_cast(x), decimal_places); + auto const scaled_rounded = + scaled_round(static_cast(x), decimal_places); auto const is_out_of_bound = (min_ex_bound >= scaled_rounded) || (scaled_rounded >= max_ex_bound);