From 3c83ce451446dfd556bd14ad8537b0189226a0e5 Mon Sep 17 00:00:00 2001 From: Paul Mattione <156858817+pmattione-nvidia@users.noreply.github.com> Date: Wed, 10 Jul 2024 18:58:31 -0600 Subject: [PATCH] New Decimal <--> Floating conversion (#15905) This PR contains the main algorithm for the new decimal <--> floating conversion code. This algorithm was written to address the precision issues described [here](https://github.com/rapidsai/cudf/issues/14169). ### Summary * The new algorithm is more accurate than the previous code, but it is also far more complex. * It can perform conversions that were not even possible in the old code due to overflow (decimal32/64/128 conversions only worked for scale factors up to 10^9/18/38, respectively). Now the entire floating-point range is convertible, including denormals. * This new algorithm is significantly faster in some parts of the conversion phase-space, and in some parts slightly slower. ### Previous PR's These contain the supporting parts of this work: * [Explicit conversion PR](https://github.com/rapidsai/cudf/pull/15438) * [Benchmarking PR](https://github.com/rapidsai/cudf/pull/15334) * [Powers-of-10 PR](https://github.com/rapidsai/cudf/pull/15353) * [Utilities PR](https://github.com/rapidsai/cudf/pull/15359). These utilities are updated here to support denormals. ### Algorithm Outline We convert floating -> (integer) decimal by: * Extract the floating-point mantissa (converted to integer) and power-of-2 * For float we use a uint64 to contain our data during the below shifting/scaling, for double uint128_t * In this shifting integer, we alternately apply the extracted powers-of-2 (bit-shifts, until they're all used) and scale-factor powers-of-10 (multiply/divide) as needed to reach the desired scale factor. Decimal -> floating is just the reverse operation. ### Supplemental Changes * Testing: Add decimal128, add precise-conversion tests. Remove kludges due to inaccurate conversions. Add test for zeroes. * Benchmarking: Enable regions of conversion phase-space for benchmarking that were not possible in the old algorithm. * Unary: Cleanup by using CUDF_ENABLE_IF. Call new conversion code for base-10 fixed-point. ### Performance for various conversions/input-ranges * Note: F32/F64 is float/double New algorithm is **FASTER** by: * F64 --> decimal64: 60% for E8 --> E15 * F64 --> decimal128: 13% for E-8 --> E-15 * F64 --> decimal128: 22% for E8 --> E15 * F64 --> decimal128: 27% for E31 --> E38 * decimal32 --> F64: 18% for E-3 --> E4 * decimal64 --> F64: 27% for E-14 --> E-7 * decimal64 --> F64: 17% for E-3 --> E4 * decimal128 --> F64: 21% for E-14 --> E-7 * decimal128 --> F64: 11% for E-3 --> E4 * decimal128 --> F64: 13% for E31 --> E38 New algorithm is **SLOWER** by: * F32 --> decimal32: 3% for E-3 --> E4 * F32 --> decimal64: 2% for E-14 --> E14 * F64 --> decimal32: 3% for E-3 --> E4 * decimal32 --> F32: 5% for E-3 --> E4 * decimal128 --> F64: 36% for E-37 --> E-30 Other kernels: * The PYMOD binary-op benchmark is 7% slower. ### Performance discussion * Many conversions have identical speed, indicating these algorithms are often fast and we are instead bottlenecked on overheads such as getting the input to the gpu in the first place. * F64 conversions are often much faster than the old algorithm as the new algorithm completely avoids the FP64 pipeline. Other than the cast to double itself, all of the operations are on integers. Thus we don't have threads competing with each other and taking turns for access to the floating-point cores. * The conversions are slightly slower for floats with powers-of-10 near zero. Presumably this is due to code overhead for e.g., handling a large range of inputs, UB-checks for bit shifts, branches for denormals, etc. * The conversion is slower for decimal128 conversions with very small exponents, which requires several large divisions (128bit divided by 64bit). * The PYMOD kernel is slower due to register pressure from the introduction of the new division routines in the earlier PR. Even though this benchmark does not perform decimal <--> floating conversions, it gets hit because of inlined template code in the kernel increasing the code/register pressure. Authors: - Paul Mattione (https://github.com/pmattione-nvidia) Approvers: - Jason Lowe (https://github.com/jlowe) - Bradley Dice (https://github.com/bdice) - Mike Wilson (https://github.com/hyperbolic2346) URL: https://github.com/rapidsai/cudf/pull/15905 --- cpp/benchmarks/decimal/convert_floating.cpp | 17 - .../cudf/fixed_point/floating_conversion.hpp | 964 +++++++++++++++--- cpp/include/cudf/unary.hpp | 35 +- cpp/tests/fixed_point/fixed_point_tests.cpp | 129 ++- .../java/ai/rapids/cudf/ColumnVectorTest.java | 6 +- python/cudf/cudf/tests/test_decimal.py | 2 +- 6 files changed, 958 insertions(+), 195 deletions(-) diff --git a/cpp/benchmarks/decimal/convert_floating.cpp b/cpp/benchmarks/decimal/convert_floating.cpp index a367036c494..ac09c3400cb 100644 --- a/cpp/benchmarks/decimal/convert_floating.cpp +++ b/cpp/benchmarks/decimal/convert_floating.cpp @@ -32,8 +32,6 @@ void bench_cast_decimal(nvbench::state& state, nvbench::type_list || std::is_same_v; - static constexpr bool is_32bit = - std::is_same_v || std::is_same_v; static constexpr bool is_128bit = std::is_same_v || std::is_same_v; @@ -69,21 +67,6 @@ void bench_cast_decimal(nvbench::state& state, nvbench::type_list decimal conversion algorithm is limited - static constexpr bool is_64bit = !is_32bit && !is_128bit; - if (is_32bit && (exp_mode != 3)) { - state.skip("Decimal32 conversion only works up to scale factors of 10^9."); - return; - } - if (is_64bit && ((exp_mode < 2) || (exp_mode > 4))) { - state.skip("Decimal64 conversion only works up to scale factors of 10^18."); - return; - } - if (is_128bit && ((exp_mode == 0) || (exp_mode == 6))) { - state.skip("Decimal128 conversion only works up to scale factors of 10^38."); - return; - } - // Type IDs auto const input_id = cudf::type_to_id(); auto const output_id = cudf::type_to_id(); diff --git a/cpp/include/cudf/fixed_point/floating_conversion.hpp b/cpp/include/cudf/fixed_point/floating_conversion.hpp index 2c3a5c5629d..c64ae8877d4 100644 --- a/cpp/include/cudf/fixed_point/floating_conversion.hpp +++ b/cpp/include/cudf/fixed_point/floating_conversion.hpp @@ -18,6 +18,7 @@ #include +#include #include #include @@ -34,6 +35,49 @@ namespace numeric { namespace detail { +/** + * @brief Determine the number of significant bits in an integer + * + * @tparam T Type of input integer value. Must be either uint32_t, uint64_t, or __uint128_t + * @param value The integer whose bits are being counted + * @return The number of significant bits: the # of bits - # of leading zeroes + */ +template || std::is_same_v || + std::is_same_v)> +CUDF_HOST_DEVICE inline int count_significant_bits(T value) +{ +#ifdef __CUDA_ARCH__ + if constexpr (std::is_same_v) { + return 64 - __clzll(static_cast(value)); + } else if constexpr (std::is_same_v) { + return 32 - __clz(static_cast(value)); + } else if constexpr (std::is_same_v) { + // 128 bit type, must break up into high and low components + auto const high_bits = static_cast(value >> 64); + auto const low_bits = static_cast(value); + return 128 - (__clzll(high_bits) + static_cast(high_bits == 0) * __clzll(low_bits)); + } +#else + // Undefined behavior to call __builtin_clzll() with zero in gcc and clang + if (value == 0) { return 0; } + + if constexpr (std::is_same_v) { + return 64 - __builtin_clzll(value); + } else if constexpr (std::is_same_v) { + return 32 - __builtin_clz(value); + } else if constexpr (std::is_same_v) { + // 128 bit type, must break up into high and low components + auto const high_bits = static_cast(value >> 64); + if (high_bits == 0) { + return 64 - __builtin_clzll(static_cast(value)); + } else { + return 128 - __builtin_clzll(high_bits); + } + } +#endif +} + /** * @brief Helper struct for getting and setting the components of a floating-point value * @@ -62,27 +106,28 @@ struct floating_converter { // The low 23 / 52 bits (for float / double) are the mantissa. // The mantissa is normalized. There is an understood 1 bit to the left of the binary point. // The value of the mantissa is in the range [1, 2). - /// # mantissa bits (-1 for understood bit) - static constexpr int num_mantissa_bits = cuda::std::numeric_limits::digits - 1; + /// # significand bits (includes understood bit) + static constexpr int num_significand_bits = cuda::std::numeric_limits::digits; + /// # stored mantissa bits (-1 for understood bit) + static constexpr int num_stored_mantissa_bits = num_significand_bits - 1; /// The mask for the understood bit - static constexpr IntegralType understood_bit_mask = (IntegralType(1) << num_mantissa_bits); + static constexpr IntegralType understood_bit_mask = (IntegralType(1) << num_stored_mantissa_bits); /// The mask to select the mantissa static constexpr IntegralType mantissa_mask = understood_bit_mask - 1; // And in between are the bits used to store the biased power-of-2 exponent. /// # exponents bits (-1 for sign bit) - static constexpr int num_exponent_bits = num_floating_bits - num_mantissa_bits - 1; + static constexpr int num_exponent_bits = num_floating_bits - num_stored_mantissa_bits - 1; /// The mask for the exponents, unshifted static constexpr IntegralType unshifted_exponent_mask = (IntegralType(1) << num_exponent_bits) - 1; /// The mask to select the exponents - static constexpr IntegralType exponent_mask = unshifted_exponent_mask << num_mantissa_bits; + static constexpr IntegralType exponent_mask = unshifted_exponent_mask << num_stored_mantissa_bits; // To store positive and negative exponents as unsigned values, the stored value for // the power-of-2 is exponent + bias. The bias is 127 for floats and 1023 for doubles. /// 127 / 1023 for float / double - static constexpr IntegralType exponent_bias = - cuda::std::numeric_limits::max_exponent - 1; + static constexpr int exponent_bias = cuda::std::numeric_limits::max_exponent - 1; /** * @brief Reinterpret the bits of a floating-point value as an integer @@ -113,15 +158,15 @@ struct floating_converter { } /** - * @brief Extracts the integral significand of a bit-casted floating-point number + * @brief Checks whether the bit-casted floating-point value is +/-0 * - * @param integer_rep The bit-casted floating value to extract the exponent from - * @return The integral significand, bit-shifted to a (large) whole number + * @param integer_rep The bit-casted floating value to check if is +/-0 + * @return True if is a zero, else false */ - CUDF_HOST_DEVICE inline static IntegralType get_base2_value(IntegralType integer_rep) + CUDF_HOST_DEVICE inline static bool is_zero(IntegralType integer_rep) { - // Extract the significand, setting the high bit for the understood 1/2 - return (integer_rep & mantissa_mask) | understood_bit_mask; + // It's a zero if every non-sign bit is zero + return ((integer_rep & ~sign_mask) == 0); } /** @@ -137,40 +182,59 @@ struct floating_converter { } /** - * @brief Extracts the exponent of a bit-casted floating-point number + * @brief Extracts the significand and exponent of a bit-casted floating-point number, + * shifted for denormals. * - * @note This returns INT_MIN for +/-0, +/-inf, NaN's, and denormals - * For all of these cases, the decimal fixed_point number should be set to zero + * @note Zeros/inf/NaN not handled. * * @param integer_rep The bit-casted floating value to extract the exponent from - * @return The stored base-2 exponent, or INT_MIN for special values + * @return The stored base-2 exponent and significand, shifted for denormals */ - CUDF_HOST_DEVICE inline static int get_exp2(IntegralType integer_rep) + CUDF_HOST_DEVICE inline static std::pair get_significand_and_pow2( + IntegralType integer_rep) { - // First extract the exponent bits and handle its special values. - // To minimize branching, all of these special cases will return INT_MIN. - // For all of these cases, the decimal fixed_point number should be set to zero. + // Extract the significand + auto significand = (integer_rep & mantissa_mask); + + // Extract the exponent bits. auto const exponent_bits = integer_rep & exponent_mask; + + // Notes on special values of exponent_bits: + // bits = exponent_mask is +/-inf or NaN, but those are handled prior to input. + // bits = 0 is either a denormal (handled below) or a zero (handled earlier by caller). + int floating_pow2; if (exponent_bits == 0) { - // Because of the understood set-bit not stored in the mantissa, it is not possible - // to store the value zero directly. Instead both +/-0 and denormals are represented with - // the exponent bits set to zero. - // Thus it's fastest to just floor (generally unwanted) denormals to zero. - return INT_MIN; - } else if (exponent_bits == exponent_mask) { - //+/-inf and NaN values are stored with all of the exponent bits set. - // As none of these are representable by integers, we'll return the same value for all cases. - return INT_MIN; + // Denormal values are 2^(1 - exponent_bias) * Sum_i(B_i * 2^-i) + // Where i is the i-th mantissa bit (counting from the LEFT, starting at 1), + // and B_i is the value of that bit (0 or 1) + // So e.g. for the minimum denormal, only the lowest bit is set: + // FLT_TRUE_MIN = 2^(1 - 127) * 2^-23 = 2^-149 + // DBL_TRUE_MIN = 2^(1 - 1023) * 2^-52 = 2^-1074 + floating_pow2 = 1 - exponent_bias; + + // Line-up denormal to same (understood) bit as normal numbers + // This is so bit-shifting starts at the same bit index + auto const lineup_shift = num_significand_bits - count_significant_bits(significand); + significand <<= lineup_shift; + floating_pow2 -= lineup_shift; + } else { + // Extract the exponent value: shift the bits down and subtract the bias. + auto const shifted_exponent_bits = exponent_bits >> num_stored_mantissa_bits; + floating_pow2 = static_cast(shifted_exponent_bits) - exponent_bias; + + // Set the high bit for the understood 1/2 + significand |= understood_bit_mask; } - // Extract the exponent value: shift the bits down and subtract the bias. - using SignedIntegralType = cuda::std::make_signed_t; - SignedIntegralType const shifted_exponent_bits = exponent_bits >> num_mantissa_bits; - return shifted_exponent_bits - static_cast(exponent_bias); + // To convert the mantissa to an integer, we effectively applied #-mantissa-bits + // powers of 2 to convert the fractional value to an integer, so subtract them off here + int const pow2 = floating_pow2 - num_stored_mantissa_bits; + + return {significand, pow2}; } /** - * @brief Sets the sign bit of a positive floating-point number + * @brief Sets the sign bit of a floating-point number * * @param floating The floating-point value to set the sign of. Must be positive. * @param is_negative The sign bit to set for the floating-point number @@ -192,83 +256,60 @@ struct floating_converter { /** * @brief Adds to the base-2 exponent of a floating-point number * + * @note The caller must guarantee that the input is a positive (> 0) whole number. + * * @param floating The floating value to add to the exponent of. Must be positive. - * @param exp2 The power-of-2 to add to the floating-point number - * @return The input floating-point value * 2^exp2 + * @param pow2 The power-of-2 to add to the floating-point number + * @return The input floating-point value * 2^pow2 */ - CUDF_HOST_DEVICE inline static FloatingType add_exp2(FloatingType floating, int exp2) + CUDF_HOST_DEVICE inline static FloatingType add_pow2(FloatingType floating, int pow2) { + // Note that the input floating-point number is positive (& whole), so we don't have to + // worry about the sign here; the sign will be set later in set_is_negative() + // Convert floating to integer auto integer_rep = bit_cast_to_integer(floating); // Extract the currently stored (biased) exponent + using SignedType = std::make_signed_t; auto exponent_bits = integer_rep & exponent_mask; - auto stored_exp2 = exponent_bits >> num_mantissa_bits; + auto stored_pow2 = static_cast(exponent_bits >> num_stored_mantissa_bits); // Add the additional power-of-2 - stored_exp2 += exp2; + stored_pow2 += pow2; // Check for exponent over/under-flow. - // Note that the input floating-point number is always positive, so we don't have to - // worry about the sign here; the sign will be set later in set_is_negative() - if (stored_exp2 <= 0) { - return 0.0; - } else if (stored_exp2 >= unshifted_exponent_mask) { + if (stored_pow2 <= 0) { + // Denormal (zero handled prior to input) + + // Early out if bit shift will zero it anyway. + // Note: We must handle this explicitly, as too-large a bit-shift is UB + auto const bit_shift = -stored_pow2 + 1; //+1 due to understood bit set below + if (bit_shift > num_stored_mantissa_bits) { return 0.0; } + + // Clear the exponent bits (zero means 2^-126/2^-1022 w/ no understood bit) + integer_rep &= (~exponent_mask); + + // The input floating-point number has an "understood" bit that we need to set + // prior to bit-shifting. Set the understood bit. + integer_rep |= understood_bit_mask; + + // Convert to denormal: bit shift off the low bits + integer_rep >>= bit_shift; + } else if (stored_pow2 >= static_cast(unshifted_exponent_mask)) { + // Overflow: Set infinity return cuda::std::numeric_limits::infinity(); } else { - // Clear existing exponent bits and set new ones - exponent_bits = stored_exp2 << num_mantissa_bits; + // Normal number: Clear existing exponent bits and set new ones + exponent_bits = static_cast(stored_pow2) << num_stored_mantissa_bits; integer_rep &= (~exponent_mask); integer_rep |= exponent_bits; - - // Convert back to float - return bit_cast_to_floating(integer_rep); } - } -}; -/** - * @brief Determine the number of significant bits in an integer - * - * @tparam T Type of input integer value. Must be either uint32_t, uint64_t, or __uint128_t - * @param value The integer whose bits are being counted - * @return The number of significant bits: the # of bits - # of leading zeroes - */ -template || std::is_same_v || - std::is_same_v)> -CUDF_HOST_DEVICE inline int count_significant_bits(T value) -{ -#ifdef __CUDA_ARCH__ - if constexpr (std::is_same_v) { - return 64 - __clzll(static_cast(value)); - } else if constexpr (std::is_same_v) { - return 32 - __clz(static_cast(value)); - } else if constexpr (std::is_same_v) { - // 128 bit type, must break up into high and low components - auto const high_bits = static_cast(value >> 64); - auto const low_bits = static_cast(value); - return 128 - (__clzll(high_bits) + static_cast(high_bits == 0) * __clzll(low_bits)); - } -#else - // Undefined behavior to call __builtin_clzll() with zero in gcc and clang - if (value == 0) { return 0; } - - if constexpr (std::is_same_v) { - return 64 - __builtin_clzll(value); - } else if constexpr (std::is_same_v) { - return 32 - __builtin_clz(value); - } else if constexpr (std::is_same_v) { - // 128 bit type, must break up into high and low components - auto const high_bits = static_cast(value >> 64); - if (high_bits == 0) { - return 64 - __builtin_clzll(static_cast(value)); - } else { - return 128 - __builtin_clzll(high_bits); - } + // Convert back to float + return bit_cast_to_floating(integer_rep); } -#endif -} +}; /** * @brief Recursively calculate a signed large power of 10 (>= 10^19) that can only be stored in an @@ -276,18 +317,18 @@ CUDF_HOST_DEVICE inline int count_significant_bits(T value) * * @note Intended to be run at compile time. * - * @tparam Exp10 The power of 10 to calculate - * @return Returns 10^Exp10 + * @tparam Pow10 The power of 10 to calculate + * @return Returns 10^Pow10 */ -template +template constexpr __uint128_t large_power_of_10() { // Stop at 10^19 to speed up compilation; literals can be used for smaller powers of 10. - static_assert(Exp10 >= 19); - if constexpr (Exp10 == 19) + static_assert(Pow10 >= 19); + if constexpr (Pow10 == 19) return __uint128_t(10000000000000000000ULL); else - return large_power_of_10() * __uint128_t(10); + return large_power_of_10() * __uint128_t(10); } /** @@ -295,11 +336,11 @@ constexpr __uint128_t large_power_of_10() * * @tparam T Type of value to be divided-from. * @param value The number to be divided-from. - * @param exp10 The power-of-10 of the denominator, from 0 to 9 inclusive. - * @return Returns value / 10^exp10 + * @param pow10 The power-of-10 of the denominator, from 0 to 9 inclusive. + * @return Returns value / 10^pow10 */ -template >* = nullptr> -CUDF_HOST_DEVICE inline T divide_power10_32bit(T value, int exp10) +template )> +CUDF_HOST_DEVICE inline T divide_power10_32bit(T value, int pow10) { // Computing division this way is much faster than the alternatives. // Division is not implemented in GPU hardware, and the compiler will often implement it as a @@ -309,7 +350,7 @@ CUDF_HOST_DEVICE inline T divide_power10_32bit(T value, int exp10) // Instead, if the compiler can see exactly what number it is dividing by, it can // produce much more optimal assembly, doing bit shifting, multiplies by a constant, etc. - // For the compiler to see the value though, array lookup (with exp10 as the index) + // For the compiler to see the value though, array lookup (with pow10 as the index) // is not sufficient: We have to use a switch statement. Although this introduces a branch, // it is still much faster than doing the divide any other way. // Perhaps an array can be used in C++23 with the assume attribute? @@ -325,7 +366,7 @@ CUDF_HOST_DEVICE inline T divide_power10_32bit(T value, int exp10) // introduces too much pressure on the kernels that use this code, slowing down their benchmarks. // It also dramatically slows down the compile time. - switch (exp10) { + switch (pow10) { case 0: return value; case 1: return value / 10U; case 2: return value / 100U; @@ -345,14 +386,14 @@ CUDF_HOST_DEVICE inline T divide_power10_32bit(T value, int exp10) * * @tparam T Type of value to be divided-from. * @param value The number to be divided-from. - * @param exp10 The power-of-10 of the denominator, from 0 to 19 inclusive. - * @return Returns value / 10^exp10 + * @param pow10 The power-of-10 of the denominator, from 0 to 19 inclusive. + * @return Returns value / 10^pow10 */ -template >* = nullptr> -CUDF_HOST_DEVICE inline T divide_power10_64bit(T value, int exp10) +template )> +CUDF_HOST_DEVICE inline T divide_power10_64bit(T value, int pow10) { // See comments in divide_power10_32bit() for discussion. - switch (exp10) { + switch (pow10) { case 0: return value; case 1: return value / 10U; case 2: return value / 100U; @@ -382,14 +423,14 @@ CUDF_HOST_DEVICE inline T divide_power10_64bit(T value, int exp10) * * @tparam T Type of value to be divided-from. * @param value The number to be divided-from. - * @param exp10 The power-of-10 of the denominator, from 0 to 38 inclusive. - * @return Returns value / 10^exp10. + * @param pow10 The power-of-10 of the denominator, from 0 to 38 inclusive. + * @return Returns value / 10^pow10. */ -template >* = nullptr> -CUDF_HOST_DEVICE inline constexpr T divide_power10_128bit(T value, int exp10) +template )> +CUDF_HOST_DEVICE inline constexpr T divide_power10_128bit(T value, int pow10) { // See comments in divide_power10_32bit() for an introduction. - switch (exp10) { + switch (pow10) { case 0: return value; case 1: return value / 10U; case 2: return value / 100U; @@ -438,14 +479,14 @@ CUDF_HOST_DEVICE inline constexpr T divide_power10_128bit(T value, int exp10) * * @tparam T Type of value to be multiplied. * @param value The number to be multiplied. - * @param exp10 The power-of-10 of the multiplier, from 0 to 9 inclusive. - * @return Returns value * 10^exp10 + * @param pow10 The power-of-10 of the multiplier, from 0 to 9 inclusive. + * @return Returns value * 10^pow10 */ -template >* = nullptr> -CUDF_HOST_DEVICE inline constexpr T multiply_power10_32bit(T value, int exp10) +template )> +CUDF_HOST_DEVICE inline constexpr T multiply_power10_32bit(T value, int pow10) { // See comments in divide_power10_32bit() for discussion. - switch (exp10) { + switch (pow10) { case 0: return value; case 1: return value * 10U; case 2: return value * 100U; @@ -465,14 +506,14 @@ CUDF_HOST_DEVICE inline constexpr T multiply_power10_32bit(T value, int exp10) * * @tparam T Type of value to be multiplied. * @param value The number to be multiplied. - * @param exp10 The power-of-10 of the multiplier, from 0 to 19 inclusive. - * @return Returns value * 10^exp10 + * @param pow10 The power-of-10 of the multiplier, from 0 to 19 inclusive. + * @return Returns value * 10^pow10 */ -template >* = nullptr> -CUDF_HOST_DEVICE inline constexpr T multiply_power10_64bit(T value, int exp10) +template )> +CUDF_HOST_DEVICE inline constexpr T multiply_power10_64bit(T value, int pow10) { // See comments in divide_power10_32bit() for discussion. - switch (exp10) { + switch (pow10) { case 0: return value; case 1: return value * 10U; case 2: return value * 100U; @@ -502,14 +543,14 @@ CUDF_HOST_DEVICE inline constexpr T multiply_power10_64bit(T value, int exp10) * * @tparam T Type of value to be multiplied. * @param value The number to be multiplied. - * @param exp10 The power-of-10 of the multiplier, from 0 to 38 inclusive. - * @return Returns value * 10^exp10. + * @param pow10 The power-of-10 of the multiplier, from 0 to 38 inclusive. + * @return Returns value * 10^pow10. */ -template >* = nullptr> -CUDF_HOST_DEVICE inline constexpr T multiply_power10_128bit(T value, int exp10) +template )> +CUDF_HOST_DEVICE inline constexpr T multiply_power10_128bit(T value, int pow10) { // See comments in divide_power10_128bit() for discussion. - switch (exp10) { + switch (pow10) { case 0: return value; case 1: return value * 10U; case 2: return value * 100U; @@ -556,59 +597,678 @@ CUDF_HOST_DEVICE inline constexpr T multiply_power10_128bit(T value, int exp10) /** * @brief Multiply an integer by a power of 10. * - * @note Use this function if you have no a-priori knowledge of what exp10 might be. + * @note Use this function if you have no a-priori knowledge of what pow10 might be. * If you do, prefer calling the bit-size-specific versions * * @tparam Rep Representation type needed for integer exponentiation * @tparam T Integral type of value to be multiplied. * @param value The number to be multiplied. - * @param exp10 The power-of-10 of the multiplier. - * @return Returns value * 10^exp10 + * @param pow10 The power-of-10 of the multiplier. + * @return Returns value * 10^pow10 */ -template )>* = nullptr> -CUDF_HOST_DEVICE inline constexpr T multiply_power10(T value, int exp10) +template )> +CUDF_HOST_DEVICE inline constexpr T multiply_power10(T value, int pow10) { - // Use this function if you have no knowledge of what exp10 might be + // Use this function if you have no knowledge of what pow10 might be // If you do, prefer calling the bit-size-specific versions if constexpr (sizeof(Rep) <= 4) { - return multiply_power10_32bit(value, exp10); + return multiply_power10_32bit(value, pow10); } else if constexpr (sizeof(Rep) <= 8) { - return multiply_power10_64bit(value, exp10); + return multiply_power10_64bit(value, pow10); } else { - return multiply_power10_128bit(value, exp10); + return multiply_power10_128bit(value, pow10); } } /** * @brief Divide an integer by a power of 10. * - * @note Use this function if you have no a-priori knowledge of what exp10 might be. + * @note Use this function if you have no a-priori knowledge of what pow10 might be. * If you do, prefer calling the bit-size-specific versions * * @tparam Rep Representation type needed for integer exponentiation * @tparam T Integral type of value to be divided-from. * @param value The number to be divided-from. - * @param exp10 The power-of-10 of the denominator. - * @return Returns value / 10^exp10 + * @param pow10 The power-of-10 of the denominator. + * @return Returns value / 10^pow10 */ -template )>* = nullptr> -CUDF_HOST_DEVICE inline constexpr T divide_power10(T value, int exp10) +template )> +CUDF_HOST_DEVICE inline constexpr T divide_power10(T value, int pow10) { - // Use this function if you have no knowledge of what exp10 might be + // Use this function if you have no knowledge of what pow10 might be // If you do, prefer calling the bit-size-specific versions if constexpr (sizeof(Rep) <= 4) { - return divide_power10_32bit(value, exp10); + return divide_power10_32bit(value, pow10); } else if constexpr (sizeof(Rep) <= 8) { - return divide_power10_64bit(value, exp10); + return divide_power10_64bit(value, pow10); } else { - return divide_power10_128bit(value, exp10); + return divide_power10_128bit(value, pow10); } } +/** + * @brief Perform a bit-shift left, guarding against undefined behavior + * + * @tparam IntegerType Type of input unsigned integer value + * @param value The integer whose bits are being shifted + * @param bit_shift The number of bits to shift left + * @return The bit-shifted integer, except max value if UB would occur + */ +template )> +CUDF_HOST_DEVICE inline IntegerType guarded_left_shift(IntegerType value, int bit_shift) +{ + // Bit shifts larger than this are undefined behavior + constexpr int max_safe_bit_shift = cuda::std::numeric_limits::digits - 1; + return (bit_shift <= max_safe_bit_shift) ? value << bit_shift + : cuda::std::numeric_limits::max(); +} + +/** + * @brief Perform a bit-shift right, guarding against undefined behavior + * + * @tparam IntegerType Type of input unsigned integer value + * @param value The integer whose bits are being shifted + * @param bit_shift The number of bits to shift right + * @return The bit-shifted integer, which is zero on underflow + */ +template )> +CUDF_HOST_DEVICE inline IntegerType guarded_right_shift(IntegerType value, int bit_shift) +{ + // Bit shifts larger than this are undefined behavior + constexpr int max_safe_bit_shift = cuda::std::numeric_limits::digits - 1; + return (bit_shift <= max_safe_bit_shift) ? value >> bit_shift : 0; +} + +/** + * @brief Helper struct with common constants needed by the floating <--> decimal conversions + */ +template +struct shifting_constants { + /// Whether the type is double + static constexpr bool is_double = cuda::std::is_same_v; + + /// Integer type that can hold the value of the significand + using IntegerRep = std::conditional_t; + + /// Num bits needed to hold the significand + static constexpr auto num_significand_bits = cuda::std::numeric_limits::digits; + + /// Shift data back and forth in space of a type with 2x the starting bits, to give us enough room + using ShiftingRep = std::conditional_t; + + // The significand of a float / double is 24 / 53 bits + // However, to uniquely represent each double / float as different #'s in decimal + // you need 17 / 9 digits (from std::numeric_limits::max_digits10) + // To represent 10^17 / 10^9, you need 57 / 30 bits + // So we need to keep track of at least this # of bits during shifting to ensure no info is lost + + // We will be alternately shifting our data back and forth by powers of 2 and 10 to convert + // between floating and decimal (see shifting functions for details). + + // To iteratively shift back and forth, our 2's (bit-) and 10's (divide-/multiply-) shifts must + // be of nearly the same magnitude, or else we'll over-/under-flow our shifting integer + + // 2^10 is approximately 10^3, so the largest shifts will have a 10/3 ratio + // The difference between 2^10 and 10^3 is 1024/1000: 2.4% + // So every time we shift by 10 bits and 3 decimal places, the 2s shift is an extra 2.4% + + // This 2.4% error compounds each time we do an iteration. + // The min (normal) float is 2^-126. + // Min denormal: 2^-126 * 2^-23 (mantissa bits): 2^-149 = ~1.4E-45 + // With our 10/3 shifting ratio, 149 (bit-shifts) * (3 / 10) = 44.7 (10s-shifts) + // 10^(-44.7) = 2E-45, which is off by ~1.4x from 1.4E-45 + + // Similarly, the min (normal) double is 2^-1022. + // Min denormal: 2^-1022 * 2^-52 (mantissa bits): 2^-1074 = 4.94E-324 + // With our 10/3 shifting ratio, 1074 (bit-shifts) * (3 / 10) = 322.2 (10s-shifts) + // 10^(-322.2) = 6.4E-323, which is off by ~13.2x from 4.94E-324 + + // To account for this compounding error, we can either complicate our loop code (slow), + // or use extra bits (in the direction we're shifting the 2s!) to compensate: + // 4 extra bits for doubles (2^4 = 16 > 13.2x error), 1 extra for floats (2 > 1.4x error) + /// # buffer bits to account for shifting error + static constexpr int num_2s_shift_buffer_bits = is_double ? 4 : 1; + + // How much room do we have for shifting? + // Float: 64-bit ShiftingRep - 31 (rep + buffer) = 33 bits. 2^33 = 8.6E9 + // Double: 128-bit ShiftingRep - 61 (rep + buffer) = 67 bits. 2^67 = 1.5E20 + // Thus for double / float we can shift up to 20 / 9 decimal places at once + + // But, we need to stick to our 10-bits / 3-decimals shift ratio to not over/under-flow. + // To simplify our loop code, we'll keep to this ratio by instead shifting a max of + // 18 / 9 decimal places, for double / float (60 / 30 bits) + /// Max at-once decimal place shift + static constexpr int max_digits_shift = is_double ? 18 : 9; + /// Max at-once bit shift + static constexpr int max_bits_shift = max_digits_shift * 10 / 3; + + // Pre-calculate 10^max_digits_shift. Note that 10^18 / 10^9 fits within IntegerRep + /// 10^max_digits_shift + static constexpr auto max_digits_shift_pow = + multiply_power10(IntegerRep(1), max_digits_shift); +}; + +/** + * @brief Add half a bit to integer rep of floating point if conversion causes truncation + * + * @note This fixes problems like 1.2 (value = 1.1999...) at scale -1 -> 11 + * + * @tparam FloatingType Type of integer holding the floating-point significand + * @param floating The floating-point number to convert + * @param integer_rep The integer representation of the floating-point significand + * @param pow2 The power of 2 that needs to be applied to the significand + * @param pow10 The power of 10 that needs to be applied to the significand + * @return integer_rep, shifted 1 and ++'d if the conversion to decimal causes truncation + */ +template )> +CUDF_HOST_DEVICE cuda::std::pair::IntegralType, int> +add_half_if_truncates(FloatingType floating, + typename floating_converter::IntegralType integer_rep, + int pow2, + int pow10) +{ + // The user-supplied scale may truncate information, so we need to talk about rounding. + // We have chosen not to round, so we want 1.23456f with scale -4 to be decimal 12345 + + // But if we don't round at all, 1.2 (double) with scale -1 is 11 instead of 12! + // Why? Because 1.2 (double) is actually stored as 1.1999999... which we truncate to 1.1 + // While correct (given our choice to truncate), this is surprising and undesirable. + // This problem happens because 1.2 is not perfectly representable in floating point, + // and the value 1.199999... happened to be closer to 1.2 than the next value (1.2000...1...) + + // If the scale truncates information (we didn't choose to keep exactly 1.1999...), how + // do we make sure we store 1.2? We'll add half an ulp! (unit in the last place) + // Then 1.1999... becomes 1.2000...1... which truncates to 1.2. + // And if it had been 1.2000...1..., adding half an ulp still truncates to 1.2 + + // Why 1/2 an ulp? Because that's all that is needed. The reason we have this problem in the + // first place is because the compiler rounded (e.g.) 1.2 to the nearest floating point number. + // The distance of this rounding is at most 1/2 ulp, otherwise we'd have rounded the other way. + + // How do we add 1/2 an ulp? Just shift the bits left (updating pow2) and add 1. + // We'll always shift up so every input to the conversion algorithm is aligned the same way. + + // If we add a full ulp we run into issues where we add too much and get the wrong result. + // This is because (e.g.) 2^23 = 8.4E6 which is not quite 7 digits of precision. + // So if we want 7 digits, that may "barely" truncate information; adding a 1 ulp is overkill. + + // So when does the user-supplied scale truncate info? + // For powers > 0: When the 10s (scale) shift is larger than the corresponding bit-shift. + // For powers < 0: When the 10s shift is less than the corresponding bit-shift. + + // Corresponding bit-shift: + // 2^10 is approximately 10^3, but this is off by 1.024% + // 1.024^30 is 2.03704, so this is high by one bit for every 30*3 = 90 powers of 10 + // So 10^N = 2^(10*N/3 - N/90) = 2^(299*N/90) + // Do comparison without dividing, which loses information: + // Note: if shift is "equal," still truncates if pow2 < 0 (shifting UP by 2s, 2^10 > 10^3) + int const pow2_term = 90 * pow2; + int const pow10_term = 299 * pow10; + bool const conversion_truncates = + (pow10_term > pow2_term) || ((pow2_term == pow10_term) && (pow2 < 0)); + + // However, don't add a half-bit if the input is a whole number! + // This is only for errors introduced by rounding decimal fractions! + bool const is_whole_number = (cuda::std::floor(floating) == floating); + bool const add_half_bit = conversion_truncates && !is_whole_number; + + // Add half a bit on truncation (shift to make room and update pow2) + integer_rep <<= 1; + --pow2; + integer_rep += static_cast(add_half_bit); + + return {integer_rep, pow2}; +} + +/** + * @brief Perform base-2 -> base-10 fixed-point conversion for pow10 > 0 + * + * @tparam Rep The type of the storage for the decimal value + * @tparam FloatingType The type of the original floating-point value we are converting from + * @param base2_value The base-2 fixed-point value we are converting from + * @param pow2 The number of powers of 2 to apply to convert from base-2 + * @param pow10 The number of powers of 10 to apply to reach the desired scale factor + * @return Magnitude of the converted-to decimal integer + */ +template )> +CUDF_HOST_DEVICE inline cuda::std::make_unsigned_t shift_to_decimal_pospow( + typename shifting_constants::IntegerRep const base2_value, int pow2, int pow10) +{ + // To convert to decimal, we need to apply the input powers of 2 and 10 + // The result will be (integer) base2_value * (2^pow2) / (10^pow10) + // Output type is ShiftingRep + + // Here pow10 > 0 and pow2 > 0, so we need to shift left by 2s and divide by 10s. + // We'll iterate back and forth between them, shifting up by 2s + // and down by 10s until all of the powers have been applied. + + // However the input base2_value type has virtually no spare room to shift our data + // without over- or under-flowing and losing precision. + // So we'll cast up to ShiftingRep: uint64 for float's, __uint128_t for double's + using Constants = shifting_constants; + using ShiftingRep = typename Constants::ShiftingRep; + auto shifting_rep = static_cast(base2_value); + + // We want to start with our significand bits at the top of the shifting range, + // so that we don't lose information we need on intermediary right-shifts. + // Note that since we're shifting 2s up, we need num_2s_shift_buffer_bits space on the high side, + // For all numbers this bit shift is a fixed distance, due to the understood 2^0 bit. + // Note that shift_from is +1 due to shift in add_half_if_truncates() + static constexpr int shift_up_to = sizeof(ShiftingRep) * 8 - Constants::num_2s_shift_buffer_bits; + static constexpr int shift_from = Constants::num_significand_bits + 1; + static constexpr int max_init_shift = shift_up_to - shift_from; + + // If our total bit shift is less than this, we don't need to iterate + using UnsignedRep = cuda::std::make_unsigned_t; + if (pow2 <= max_init_shift) { + // Shift bits left, divide by 10s to apply the scale factor, and we're done. + shifting_rep = divide_power10(shifting_rep << pow2, pow10); + // NOTE: Cast can overflow! + return static_cast(shifting_rep); + } + + // We need to iterate. Do the combined initial shift + shifting_rep <<= max_init_shift; + pow2 -= max_init_shift; + + // Iterate, dividing by 10s and shifting up by 2s until we're almost done + while (pow10 > Constants::max_digits_shift) { + // More decimal places to shift than we have room: Divide the max number of 10s + shifting_rep /= Constants::max_digits_shift_pow; + pow10 -= Constants::max_digits_shift; + + // If our remaining bit shift is less than the max, we're finished iterating + if (pow2 <= Constants::max_bits_shift) { + // Shift bits left, divide by 10s to apply the scale factor, and we're done. + shifting_rep = divide_power10(shifting_rep << pow2, pow10); + + // NOTE: Cast can overflow! + return static_cast(shifting_rep); + } + + // Shift the max number of bits left again + shifting_rep <<= Constants::max_bits_shift; + pow2 -= Constants::max_bits_shift; + } + + // Last 10s-shift: Divide all remaining decimal places, shift all remaining bits, then bail + // Note: This divide result may not fit in the low half of the bit range + // But the divisor is less than the max-shift, and thus fits within 64 / 32 bits + if constexpr (Constants::is_double) { + shifting_rep = divide_power10_64bit(shifting_rep, pow10); + } else { + shifting_rep = divide_power10_32bit(shifting_rep, pow10); + } + + // Final bit shift: Shift may be large, guard against UB + // NOTE: This can overflow (both cast and shift)! + return guarded_left_shift(static_cast(shifting_rep), pow2); +} + +/** + * @brief Perform base-2 -> base-10 fixed-point conversion for pow10 < 0 + * + * @tparam Rep The type of the storage for the decimal value + * @tparam FloatingType The type of the original floating-point value we are converting from + * @param base2_value The base-2 fixed-point value we are converting from + * @param pow2 The number of powers of 2 to apply to convert from base-2 + * @param pow10 The number of powers of 10 to apply to reach the desired scale factor + * @return Magnitude of the converted-to decimal integer + */ +template )> +CUDF_HOST_DEVICE inline cuda::std::make_unsigned_t shift_to_decimal_negpow( + typename shifting_constants::IntegerRep base2_value, int pow2, int pow10) +{ + // This is similar to shift_to_decimal_pospow(), except pow10 < 0 & pow2 < 0 + // See comments in that function for details. + // Instead here we need to multiply by 10s and shift right by 2s + + // ShiftingRep: uint64 for float's, __uint128_t for double's + using Constants = shifting_constants; + using ShiftingRep = typename Constants::ShiftingRep; + auto shifting_rep = static_cast(base2_value); + + // Convert to using positive values so we don't have keep negating + int pow10_mag = -pow10; + int pow2_mag = -pow2; + + // For performing final 10s-shift + using UnsignedRep = cuda::std::make_unsigned_t; + auto final_shifts_low10s = [&]() { + // Last 10s-shift: multiply all remaining decimal places, shift all remaining bits, then bail + // The multiplier is less than the max-shift, and thus fits within 64 / 32 bits + if constexpr (Constants::is_double) { + shifting_rep = multiply_power10_64bit(shifting_rep, pow10_mag); + } else { + shifting_rep = multiply_power10_32bit(shifting_rep, pow10_mag); + } + + // Final bit shifting: Shift may be large, guard against UB + return static_cast(guarded_right_shift(shifting_rep, pow2_mag)); + }; + + // If our total decimal shift is less than the max, we don't need to iterate + if (pow10_mag <= Constants::max_digits_shift) { return final_shifts_low10s(); } + + // We want to start by lining up our bits to the top of the shifting range, + // except our first operation is a multiply, so not quite that far + // We are bit-shifting down, so we need extra bits on the low-side, which this has. + // Note that shift_from is +1 due to shift in add_half_if_truncates() + static constexpr int shift_up_to = sizeof(ShiftingRep) * 8 - Constants::max_bits_shift; + static constexpr int shift_from = Constants::num_significand_bits + 1; + static constexpr int num_init_bit_shift = shift_up_to - shift_from; + + // Perform initial shift + shifting_rep <<= num_init_bit_shift; + pow2_mag += num_init_bit_shift; + + // Iterate, multiplying by 10s and shifting down by 2s until we're almost done + do { + // More decimal places to shift than we have room: Multiply the max number of 10s + shifting_rep *= Constants::max_digits_shift_pow; + pow10_mag -= Constants::max_digits_shift; + + // If our remaining bit shift is less than the max, we're finished iterating + if (pow2_mag <= Constants::max_bits_shift) { + // Last bit-shift: Shift all remaining bits, apply the remaining scale, then bail + shifting_rep >>= pow2_mag; + + // We need to convert to the output rep for the final scale-factor multiply, because if (e.g.) + // float -> dec128 and some large pow10_mag, it might overflow the 64bit shifting rep. + // It's not needed for pow10 > 0 because we're dividing by 10s there instead of multiplying. + // NOTE: This can overflow! (Both multiply and cast) + return multiply_power10(static_cast(shifting_rep), pow10_mag); + } + + // More bits to shift than we have room: Shift the max number of 2s + shifting_rep >>= Constants::max_bits_shift; + pow2_mag -= Constants::max_bits_shift; + } while (pow10_mag > Constants::max_digits_shift); + + // Do our final shifts + return final_shifts_low10s(); +} + +/** + * @brief Perform base-2 -> base-10 fixed-point conversion + * + * @tparam Rep The type of integer we are converting to, to store the decimal value + * @tparam FloatingType The type of floating-point object we are converting from + * @param base2_value The base-2 fixed-point value we are converting from + * @param pow2 The number of powers of 2 to apply to convert from base-2 + * @param pow10 The number of powers of 10 to apply to reach the desired scale factor + * @return Integer representation of the floating-point value, given the desired scale + */ +template )> +CUDF_HOST_DEVICE inline cuda::std::make_unsigned_t convert_floating_to_integral_shifting( + typename floating_converter::IntegralType base2_value, int pow10, int pow2) +{ + // Apply the powers of 2 and 10 to convert to decimal. + // The result will be base2_value * (2^pow2) / (10^pow10) + + // Note that while this code is branchy, the decimal scale factor is part of the + // column type itself, so every thread will take the same branches on pow10. + // Also data within a column tends to be similar, so they will often take the + // same branches on pow2 as well. + + // NOTE: some returns here can overflow (e.g. ShiftingRep -> UnsignedRep) + using UnsignedRep = cuda::std::make_unsigned_t; + if (pow10 == 0) { + // NOTE: Left Bit-shift can overflow! As can cast! (e.g. double -> decimal32) + // Bit shifts may be large, guard against UB + if (pow2 >= 0) { + return guarded_left_shift(static_cast(base2_value), pow2); + } else { + return static_cast(guarded_right_shift(base2_value, -pow2)); + } + } else if (pow10 > 0) { + if (pow2 <= 0) { + // Power-2/10 shifts both downward: order doesn't matter, apply and bail. + // Guard against shift being undefined behavior + auto const shifted = guarded_right_shift(base2_value, -pow2); + return static_cast(divide_power10(shifted, pow10)); + } + return shift_to_decimal_pospow(base2_value, pow2, pow10); + } else { // pow10 < 0 + if (pow2 >= 0) { + // Power-2/10 shifts both upward: order doesn't matter, apply and bail. + // NOTE: Either shift, multiply, or cast (e.g. double -> decimal32) can overflow! + auto const shifted = guarded_left_shift(static_cast(base2_value), pow2); + return multiply_power10(shifted, -pow10); + } + return shift_to_decimal_negpow(base2_value, pow2, pow10); + } +} + +/** + * @brief Perform floating-point -> integer decimal conversion + * + * @tparam Rep The type of integer we are converting to, to store the decimal value + * @tparam FloatingType The type of floating-point object we are converting from + * @param floating The floating point value to convert + * @param scale The desired base-10 scale factor: decimal value = returned value * 10^scale + * @return Integer representation of the floating-point value, given the desired scale + */ +template )> +CUDF_HOST_DEVICE inline Rep convert_floating_to_integral(FloatingType const& floating, + scale_type const& scale) +{ + // Extract components of the floating point number + using converter = floating_converter; + auto const integer_rep = converter::bit_cast_to_integer(floating); + if (converter::is_zero(integer_rep)) { return 0; } + + // Note that the significand here is an unsigned integer with sizeof(FloatingType) + auto const is_negative = converter::get_is_negative(integer_rep); + auto const [significand, floating_pow2] = converter::get_significand_and_pow2(integer_rep); + + // Add half a bit if truncating to yield expected value, see function for discussion. + auto const pow10 = static_cast(scale); + auto const [base2_value, pow2] = + add_half_if_truncates(floating, significand, floating_pow2, pow10); + + // Apply the powers of 2 and 10 to convert to decimal. + auto const magnitude = + convert_floating_to_integral_shifting(base2_value, pow10, pow2); + + // Reapply the sign and return + // NOTE: Cast can overflow! + auto const signed_magnitude = static_cast(magnitude); + return is_negative ? -signed_magnitude : signed_magnitude; +} + +/** + * @brief Perform base-10 -> base-2 fixed-point conversion for pow10 > 0 + * + * @tparam DecimalRep The decimal integer type we are converting from + * @tparam FloatingType The type of floating point object we are converting to + * @param decimal_rep The decimal integer to convert + * @param pow10 The number of powers of 10 to apply to undo the scale factor + * @return A pair of the base-2 value and the remaining powers of 2 to be applied + */ +template )> +CUDF_HOST_DEVICE inline auto shift_to_binary_pospow(DecimalRep decimal_rep, int pow10) +{ + // This is the reverse of shift_to_decimal_pospow(), see that for more details. + + // ShiftingRep: uint64 for float's, __uint128_t for double's + using Constants = shifting_constants; + using ShiftingRep = typename Constants::ShiftingRep; + + // We want to start by lining up our bits to the top of the shifting range, + // except our first operation is a multiply, so not quite that far + // We are bit-shifting down, so we need extra bits on the low-side, which this has. + static constexpr int shift_up_to = sizeof(ShiftingRep) * 8 - Constants::max_bits_shift; + int const shift_from = count_significant_bits(decimal_rep); + int const num_init_bit_shift = shift_up_to - shift_from; + int pow2 = -num_init_bit_shift; + + // Perform the initial bit shift + ShiftingRep shifting_rep; + if constexpr (sizeof(ShiftingRep) < sizeof(DecimalRep)) { + // Shift within DecimalRep before dropping to the smaller ShiftingRep + decimal_rep = (pow2 >= 0) ? (decimal_rep >> pow2) : (decimal_rep << -pow2); + shifting_rep = static_cast(decimal_rep); + } else { + // Scale up to ShiftingRep before shifting + shifting_rep = static_cast(decimal_rep); + shifting_rep = (pow2 >= 0) ? (shifting_rep >> pow2) : (shifting_rep << -pow2); + } + + // Iterate, multiplying by 10s and shifting down by 2s until we're almost done + while (pow10 > Constants::max_digits_shift) { + // More decimal places to shift than we have room: Multiply the max number of 10s + shifting_rep *= Constants::max_digits_shift_pow; + pow10 -= Constants::max_digits_shift; + + // Then make more room by bit shifting down by the max # of 2s + shifting_rep >>= Constants::max_bits_shift; + pow2 += Constants::max_bits_shift; + } + + // Last 10s-shift: multiply all remaining decimal places + // The multiplier is less than the max-shift, and thus fits within 64 / 32 bits + if constexpr (Constants::is_double) { + shifting_rep = multiply_power10_64bit(shifting_rep, pow10); + } else { + shifting_rep = multiply_power10_32bit(shifting_rep, pow10); + } + + // Our shifting_rep is now the integer mantissa, return it and the powers of 2 + return std::pair{shifting_rep, pow2}; +} + +/** + * @brief Perform base-10 -> base-2 fixed-point conversion for pow10 < 0 + * + * @tparam DecimalRep The decimal integer type we are converting from + * @tparam FloatingType The type of floating point object we are converting to + * @param decimal_rep The decimal integer to convert + * @param pow10 The number of powers of 10 to apply to undo the scale factor + * @return A pair of the base-2 value and the remaining powers of 2 to be applied + */ +template )> +CUDF_HOST_DEVICE inline auto shift_to_binary_negpow(DecimalRep decimal_rep, int const pow10) +{ + // This is the reverse of shift_to_decimal_negpow(), see that for more details. + + // ShiftingRep: uint64 for float's, __uint128_t for double's + using Constants = shifting_constants; + using ShiftingRep = typename Constants::ShiftingRep; + + // We want to start with our significand bits at the top of the shifting range, + // so that we lose minimal information we need on intermediary right-shifts. + // Note that since we're shifting 2s up, we need num_2s_shift_buffer_bits space on the high side + static constexpr int shift_up_to = sizeof(ShiftingRep) * 8 - Constants::num_2s_shift_buffer_bits; + int const shift_from = count_significant_bits(decimal_rep); + int const num_init_bit_shift = shift_up_to - shift_from; + int pow2 = -num_init_bit_shift; + + // Perform the initial bit shift + ShiftingRep shifting_rep; + if constexpr (sizeof(ShiftingRep) < sizeof(DecimalRep)) { + // Shift within DecimalRep before dropping to the smaller ShiftingRep + decimal_rep = (pow2 >= 0) ? (decimal_rep >> pow2) : (decimal_rep << -pow2); + shifting_rep = static_cast(decimal_rep); + } else { + // Scale up to ShiftingRep before shifting + shifting_rep = static_cast(decimal_rep); + shifting_rep = (pow2 >= 0) ? (shifting_rep >> pow2) : (shifting_rep << -pow2); + } + + // Convert to using positive values upfront, simpler than doing later. + int pow10_mag = -pow10; + + // Iterate, dividing by 10s and shifting up by 2s until we're almost done + while (pow10_mag > Constants::max_digits_shift) { + // More decimal places to shift than we have room: Divide the max number of 10s + shifting_rep /= Constants::max_digits_shift_pow; + pow10_mag -= Constants::max_digits_shift; + + // Then make more room by bit shifting up by the max # of 2s + shifting_rep <<= Constants::max_bits_shift; + pow2 -= Constants::max_bits_shift; + } + + // Last 10s-shift: Divdie all remaining decimal places. + // This divide result may not fit in the low half of the bit range + // But the divisor is less than the max-shift, and thus fits within 64 / 32 bits + if constexpr (Constants::is_double) { + shifting_rep = divide_power10_64bit(shifting_rep, pow10_mag); + } else { + shifting_rep = divide_power10_32bit(shifting_rep, pow10_mag); + } + + // Our shifting_rep is now the integer mantissa, return it and the powers of 2 + return std::pair{shifting_rep, pow2}; +} + +/** + * @brief Perform integer decimal -> floating-point conversion + * + * @tparam FloatingType The type of floating-point object we are converting to + * @tparam Rep The decimal integer type we are converting from + * @param value The decimal integer to convert + * @param scale The base-10 scale factor for the input integer + * @return Floating-point representation of the scaled integral value + */ +template )> +CUDF_HOST_DEVICE inline FloatingType convert_integral_to_floating(Rep const& value, + scale_type const& scale) +{ + // Check the sign of the input + bool const is_negative = (value < 0); + + // Convert to unsigned for bit counting/shifting + using UnsignedType = cuda::std::make_unsigned_t; + auto const unsigned_value = [&]() -> UnsignedType { + // Must guard against minimum value, as we can't just negate it: not representable. + if (value == cuda::std::numeric_limits::min()) { return static_cast(value); } + + // No abs function for 128bit types, so have to do it manually. + if constexpr (cuda::std::is_same_v) { + return static_cast(is_negative ? -value : value); + } else { + return cuda::std::abs(value); + } + }(); + + // Shift by powers of 2 and 10 to get our integer mantissa + auto const [mantissa, pow2] = [&]() { + auto const pow10 = static_cast(scale); + if (pow10 >= 0) { + return shift_to_binary_pospow(unsigned_value, pow10); + } else { // pow10 < 0 + return shift_to_binary_negpow(unsigned_value, pow10); + } + }(); + + // Zero has special exponent bits, just handle it here + if (mantissa == 0) { return FloatingType(0.0f); } + + // Cast our integer mantissa to floating point + auto const floating = static_cast(mantissa); // IEEE-754 rounds to even + + // Apply the sign and the remaining powers of 2 + using converter = floating_converter; + auto const magnitude = converter::add_pow2(floating, pow2); + return converter::set_is_negative(magnitude, is_negative); +} + } // namespace detail /** @} */ // end of group diff --git a/cpp/include/cudf/unary.hpp b/cpp/include/cudf/unary.hpp index 74c8bc67d3a..8a515335351 100644 --- a/cpp/include/cudf/unary.hpp +++ b/cpp/include/cudf/unary.hpp @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include #include @@ -50,14 +51,19 @@ namespace cudf { */ template () && - cuda::std::is_floating_point_v>* = nullptr> + CUDF_ENABLE_IF(cuda::std::is_floating_point_v&& is_fixed_point())> CUDF_HOST_DEVICE Fixed convert_floating_to_fixed(Floating floating, numeric::scale_type scale) { - using Rep = typename Fixed::rep; - auto const shifted = numeric::detail::shift(floating, scale); - numeric::scaled_integer scaled{static_cast(shifted), scale}; - return Fixed(scaled); + using Rep = typename Fixed::rep; + auto const value = [&]() { + if constexpr (Fixed::rad == numeric::Radix::BASE_10) { + return numeric::detail::convert_floating_to_integral(floating, scale); + } else { + return static_cast(numeric::detail::shift(floating, scale)); + } + }(); + + return Fixed(numeric::scaled_integer{value, scale}); } /** @@ -75,14 +81,17 @@ CUDF_HOST_DEVICE Fixed convert_floating_to_fixed(Floating floating, numeric::sca */ template && - is_fixed_point()>* = nullptr> + CUDF_ENABLE_IF(cuda::std::is_floating_point_v&& is_fixed_point())> CUDF_HOST_DEVICE Floating convert_fixed_to_floating(Fixed fixed) { - using Rep = typename Fixed::rep; - auto const casted = static_cast(fixed.value()); - auto const scale = numeric::scale_type{-fixed.scale()}; - return numeric::detail::shift(casted, scale); + using Rep = typename Fixed::rep; + if constexpr (Fixed::rad == numeric::Radix::BASE_10) { + return numeric::detail::convert_integral_to_floating(fixed.value(), fixed.scale()); + } else { + auto const casted = static_cast(fixed.value()); + auto const scale = numeric::scale_type{-fixed.scale()}; + return numeric::detail::shift(casted, scale); + } } /** @@ -95,7 +104,7 @@ CUDF_HOST_DEVICE Floating convert_fixed_to_floating(Fixed fixed) */ template >* = nullptr> + CUDF_ENABLE_IF(cuda::std::is_floating_point_v)> CUDF_HOST_DEVICE Floating convert_to_floating(Input input) { if constexpr (is_fixed_point()) { diff --git a/cpp/tests/fixed_point/fixed_point_tests.cpp b/cpp/tests/fixed_point/fixed_point_tests.cpp index ab7984d4b03..a222289216d 100644 --- a/cpp/tests/fixed_point/fixed_point_tests.cpp +++ b/cpp/tests/fixed_point/fixed_point_tests.cpp @@ -38,7 +38,7 @@ struct FixedPointTest : public cudf::test::BaseFixture {}; template struct FixedPointTestAllReps : public cudf::test::BaseFixture {}; -using RepresentationTypes = ::testing::Types; +using RepresentationTypes = ::testing::Types; TYPED_TEST_SUITE(FixedPointTestAllReps, RepresentationTypes); @@ -53,6 +53,7 @@ TYPED_TEST(FixedPointTestAllReps, SimpleDecimalXXConstruction) auto num4 = cudf::convert_floating_to_fixed(1.234567, scale_type(-4)); auto num5 = cudf::convert_floating_to_fixed(1.234567, scale_type(-5)); auto num6 = cudf::convert_floating_to_fixed(1.234567, scale_type(-6)); + auto num7 = cudf::convert_floating_to_fixed(0.0, scale_type(-4)); EXPECT_EQ(1, cudf::convert_fixed_to_floating(num0)); EXPECT_EQ(1.2, cudf::convert_fixed_to_floating(num1)); @@ -61,6 +62,7 @@ TYPED_TEST(FixedPointTestAllReps, SimpleDecimalXXConstruction) EXPECT_EQ(1.2345, cudf::convert_fixed_to_floating(num4)); EXPECT_EQ(1.23456, cudf::convert_fixed_to_floating(num5)); EXPECT_EQ(1.234567, cudf::convert_fixed_to_floating(num6)); + EXPECT_EQ(0.0, cudf::convert_fixed_to_floating(num7)); } TYPED_TEST(FixedPointTestAllReps, SimpleNegativeDecimalXXConstruction) @@ -74,6 +76,7 @@ TYPED_TEST(FixedPointTestAllReps, SimpleNegativeDecimalXXConstruction) auto num4 = cudf::convert_floating_to_fixed(-1.234567, scale_type(-4)); auto num5 = cudf::convert_floating_to_fixed(-1.234567, scale_type(-5)); auto num6 = cudf::convert_floating_to_fixed(-1.234567, scale_type(-6)); + auto num7 = cudf::convert_floating_to_fixed(-0.0, scale_type(-4)); EXPECT_EQ(-1, cudf::convert_fixed_to_floating(num0)); EXPECT_EQ(-1.2, cudf::convert_fixed_to_floating(num1)); @@ -82,6 +85,7 @@ TYPED_TEST(FixedPointTestAllReps, SimpleNegativeDecimalXXConstruction) EXPECT_EQ(-1.2345, cudf::convert_fixed_to_floating(num4)); EXPECT_EQ(-1.23456, cudf::convert_fixed_to_floating(num5)); EXPECT_EQ(-1.234567, cudf::convert_fixed_to_floating(num6)); + EXPECT_EQ(-0.0, cudf::convert_fixed_to_floating(num7)); } TYPED_TEST(FixedPointTestAllReps, PaddedDecimalXXConstruction) @@ -99,14 +103,10 @@ TYPED_TEST(FixedPointTestAllReps, PaddedDecimalXXConstruction) EXPECT_EQ(1.1, cudf::convert_fixed_to_floating(a)); EXPECT_EQ(1.01, cudf::convert_fixed_to_floating(b)); - EXPECT_EQ(1, - cudf::convert_fixed_to_floating( - c)); // intentional (inherited problem from floating point) + EXPECT_EQ(1.001, cudf::convert_fixed_to_floating(c)); EXPECT_EQ(1.0001, cudf::convert_fixed_to_floating(d)); EXPECT_EQ(1.00001, cudf::convert_fixed_to_floating(e)); - EXPECT_EQ(1, - cudf::convert_fixed_to_floating( - f)); // intentional (inherited problem from floating point) + EXPECT_EQ(1.000001, cudf::convert_fixed_to_floating(f)); EXPECT_TRUE(1.000123 - cudf::convert_fixed_to_floating(x) < std::numeric_limits::epsilon()); @@ -153,6 +153,119 @@ TYPED_TEST(FixedPointTestAllReps, MoreSimpleBinaryFPConstruction) EXPECT_EQ(2.0625, cudf::convert_fixed_to_floating(num1)); } +TEST_F(FixedPointTest, PreciseFloatDecimal64Construction) +{ + // Need 9 decimal digits to uniquely represent all floats (numeric_limits::max_digits10()). + // Precise conversion: set the scale factor to 9 less than the order-of-magnitude. + // But with -9 scale factor decimal32 can overflow: use decimal64 instead. + + // Positive Exponent + { + auto num0 = cudf::convert_floating_to_fixed(3.141593E7f, scale_type(-2)); + auto num1 = cudf::convert_floating_to_fixed(3.141593E12f, scale_type(3)); + auto num2 = cudf::convert_floating_to_fixed(3.141593E17f, scale_type(8)); + auto num3 = cudf::convert_floating_to_fixed(3.141593E22f, scale_type(13)); + auto num4 = cudf::convert_floating_to_fixed(3.141593E27f, scale_type(18)); + auto num5 = cudf::convert_floating_to_fixed(3.141593E32f, scale_type(23)); + auto num6 = cudf::convert_floating_to_fixed(3.141593E37f, scale_type(28)); + + EXPECT_EQ(3.141593E7f, cudf::convert_fixed_to_floating(num0)); + EXPECT_EQ(3.141593E12f, cudf::convert_fixed_to_floating(num1)); + EXPECT_EQ(3.141593E17f, cudf::convert_fixed_to_floating(num2)); + EXPECT_EQ(3.141593E22f, cudf::convert_fixed_to_floating(num3)); + EXPECT_EQ(3.141593E27f, cudf::convert_fixed_to_floating(num4)); + EXPECT_EQ(3.141593E32f, cudf::convert_fixed_to_floating(num5)); + EXPECT_EQ(3.141593E37f, cudf::convert_fixed_to_floating(num6)); + } + + // Negative Exponent + { + auto num0 = cudf::convert_floating_to_fixed(3.141593E-7f, scale_type(-16)); + auto num1 = cudf::convert_floating_to_fixed(3.141593E-12f, scale_type(-21)); + auto num2 = cudf::convert_floating_to_fixed(3.141593E-17f, scale_type(-26)); + auto num3 = cudf::convert_floating_to_fixed(3.141593E-22f, scale_type(-31)); + auto num4 = cudf::convert_floating_to_fixed(3.141593E-27f, scale_type(-36)); + auto num5 = cudf::convert_floating_to_fixed(3.141593E-32f, scale_type(-41)); + auto num6 = cudf::convert_floating_to_fixed(3.141593E-37f, scale_type(-47)); + + EXPECT_EQ(3.141593E-7f, cudf::convert_fixed_to_floating(num0)); + EXPECT_EQ(3.141593E-12f, cudf::convert_fixed_to_floating(num1)); + EXPECT_EQ(3.141593E-17f, cudf::convert_fixed_to_floating(num2)); + EXPECT_EQ(3.141593E-22f, cudf::convert_fixed_to_floating(num3)); + EXPECT_EQ(3.141593E-27f, cudf::convert_fixed_to_floating(num4)); + EXPECT_EQ(3.141593E-32f, cudf::convert_fixed_to_floating(num5)); + EXPECT_EQ(3.141593E-37f, cudf::convert_fixed_to_floating(num6)); + + // Denormals + auto num7 = cudf::convert_floating_to_fixed(3.141593E-39f, scale_type(-48)); + auto num8 = cudf::convert_floating_to_fixed(3.141593E-41f, scale_type(-50)); + auto num9 = cudf::convert_floating_to_fixed(3.141593E-43f, scale_type(-52)); + auto num10 = cudf::convert_floating_to_fixed(FLT_TRUE_MIN, scale_type(-54)); + + EXPECT_EQ(3.141593E-39f, cudf::convert_fixed_to_floating(num7)); + EXPECT_EQ(3.141593E-41f, cudf::convert_fixed_to_floating(num8)); + EXPECT_EQ(3.141593E-43f, cudf::convert_fixed_to_floating(num9)); + EXPECT_EQ(FLT_TRUE_MIN, cudf::convert_fixed_to_floating(num10)); + } +} + +TEST_F(FixedPointTest, PreciseDoubleDecimal64Construction) +{ + // Need 17 decimal digits to uniquely represent all doubles (numeric_limits::max_digits10()). + // Precise conversion: set the scale factor to 17 less than the order-of-magnitude. + + using decimal64 = fixed_point; + + // Positive Exponent + { + auto num0 = cudf::convert_floating_to_fixed(3.141593E8, scale_type(-9)); + auto num1 = cudf::convert_floating_to_fixed(3.141593E58, scale_type(41)); + auto num2 = cudf::convert_floating_to_fixed(3.141593E108, scale_type(91)); + auto num3 = cudf::convert_floating_to_fixed(3.141593E158, scale_type(141)); + auto num4 = cudf::convert_floating_to_fixed(3.141593E208, scale_type(191)); + auto num5 = cudf::convert_floating_to_fixed(3.141593E258, scale_type(241)); + auto num6 = cudf::convert_floating_to_fixed(3.141593E307, scale_type(290)); + + EXPECT_EQ(3.141593E8, cudf::convert_fixed_to_floating(num0)); + EXPECT_EQ(3.141593E58, cudf::convert_fixed_to_floating(num1)); + EXPECT_EQ(3.141593E108, cudf::convert_fixed_to_floating(num2)); + EXPECT_EQ(3.141593E158, cudf::convert_fixed_to_floating(num3)); + EXPECT_EQ(3.141593E208, cudf::convert_fixed_to_floating(num4)); + EXPECT_EQ(3.141593E258, cudf::convert_fixed_to_floating(num5)); + EXPECT_EQ(3.141593E307, cudf::convert_fixed_to_floating(num6)); + } + + // Negative Exponent + { + auto num0 = cudf::convert_floating_to_fixed(3.141593E-8, scale_type(-25)); + auto num1 = cudf::convert_floating_to_fixed(3.141593E-58, scale_type(-75)); + auto num2 = cudf::convert_floating_to_fixed(3.141593E-108, scale_type(-125)); + auto num3 = cudf::convert_floating_to_fixed(3.141593E-158, scale_type(-175)); + auto num4 = cudf::convert_floating_to_fixed(3.141593E-208, scale_type(-225)); + auto num5 = cudf::convert_floating_to_fixed(3.141593E-258, scale_type(-275)); + auto num6 = cudf::convert_floating_to_fixed(3.141593E-308, scale_type(-325)); + + EXPECT_EQ(3.141593E-8, cudf::convert_fixed_to_floating(num0)); + EXPECT_EQ(3.141593E-58, cudf::convert_fixed_to_floating(num1)); + EXPECT_EQ(3.141593E-108, cudf::convert_fixed_to_floating(num2)); + EXPECT_EQ(3.141593E-158, cudf::convert_fixed_to_floating(num3)); + EXPECT_EQ(3.141593E-208, cudf::convert_fixed_to_floating(num4)); + EXPECT_EQ(3.141593E-258, cudf::convert_fixed_to_floating(num5)); + EXPECT_EQ(3.141593E-308, cudf::convert_fixed_to_floating(num6)); + + // Denormals + auto num7 = cudf::convert_floating_to_fixed(3.141593E-309, scale_type(-326)); + auto num8 = cudf::convert_floating_to_fixed(3.141593E-314, scale_type(-331)); + auto num9 = cudf::convert_floating_to_fixed(3.141593E-319, scale_type(-336)); + auto num10 = cudf::convert_floating_to_fixed(DBL_TRUE_MIN, scale_type(-341)); + + EXPECT_EQ(3.141593E-309, cudf::convert_fixed_to_floating(num7)); + EXPECT_EQ(3.141593E-314, cudf::convert_fixed_to_floating(num8)); + EXPECT_EQ(3.141593E-319, cudf::convert_fixed_to_floating(num9)); + EXPECT_EQ(DBL_TRUE_MIN, cudf::convert_fixed_to_floating(num10)); + } +} + TYPED_TEST(FixedPointTestAllReps, SimpleDecimalXXMath) { using decimalXX = fixed_point; @@ -442,8 +555,6 @@ void float_vector_test(ValueType const initial_value, int32_t const scale, Binop binop) { - using decimal32 = fixed_point; - std::vector vec1(size); std::vector vec2(size); diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 1d6a3b3304a..7136b162c13 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -3509,9 +3509,9 @@ void testCastFloatToDecimal() { @Test void testCastDoubleToDecimal() { testCastNumericToDecimalsAndBack(DType.FLOAT64, false, 0, - () -> ColumnVector.fromBoxedDoubles(1.0, 2.1, -3.23, null, 2.41281, (double) Long.MAX_VALUE), - () -> ColumnVector.fromBoxedDoubles(1.0, 2.0, -3.0, null, 2.0, (double) Long.MAX_VALUE), - new Long[]{1L, 2L, -3L, null, 2L, Long.MAX_VALUE} + () -> ColumnVector.fromBoxedDoubles(1.0, 2.1, -3.23, null, 2.41281, (double) Integer.MAX_VALUE), + () -> ColumnVector.fromBoxedDoubles(1.0, 2.0, -3.0, null, 2.0, (double) Integer.MAX_VALUE), + new Long[]{1L, 2L, -3L, null, 2L, (long) Integer.MAX_VALUE} ); testCastNumericToDecimalsAndBack(DType.FLOAT64, false, -2, () -> ColumnVector.fromBoxedDoubles(1.0, 2.1, -3.23, null, 2.41281, -55.01999), diff --git a/python/cudf/cudf/tests/test_decimal.py b/python/cudf/cudf/tests/test_decimal.py index c41a938f6ea..65f739bc74a 100644 --- a/python/cudf/cudf/tests/test_decimal.py +++ b/python/cudf/cudf/tests/test_decimal.py @@ -97,7 +97,7 @@ def test_typecast_from_float_to_decimal(request, data, from_dtype, to_dtype): pytest.mark.xfail( condition=version.parse(pa.__version__) >= version.parse("13.0.0") and from_dtype == np.dtype("float32") - and to_dtype.precision > 7, + and to_dtype.precision > 12, reason="https://github.com/rapidsai/cudf/issues/14169", ) )