Skip to content

Commit

Permalink
Roll back ipow changes due to register pressure. (#15242)
Browse files Browse the repository at this point in the history
The addition of an array of integers in this function placed too much register pressure on our code base. This function is used by the fixed_point constructor and cast operators, so it potentially affects every kernel.  Too many unrelated kernels were impacted and suffered performance degradations to justify this change.  This reverts the algorithm introduced in #15110 to what it was previously, with some very minor tweaks.

Authors:
  - Paul Mattione (https://github.com/pmattione-nvidia)

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Shruti Shivakumar (https://github.com/shrshi)
  - MithunR (https://github.com/mythrocks)

URL: #15242
  • Loading branch information
pmattione-nvidia authored Mar 11, 2024
1 parent dfaa41c commit 63c9ed7
Showing 1 changed file with 16 additions and 44 deletions.
60 changes: 16 additions & 44 deletions cpp/include/cudf/fixed_point/fixed_point.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,41 +85,7 @@ constexpr inline auto is_supported_construction_value_type()
namespace detail {

/**
* @brief Recursively computes integer exponentiation
*
* @note This is intended to be run at compile time
*
* @tparam Rep Representation type for return type
* @tparam Base The base to be exponentiated
* @param exp The exponent to be used for exponentiation
* @return Result of `Base` to the power of `exponent` of type `Rep`
*/
template <typename Rep, int32_t Base>
CUDF_HOST_DEVICE inline constexpr Rep get_power(int32_t exp)
{
// Compute power recursively
return (exp > 0) ? Rep(Base) * get_power<Rep, Base>(exp - 1) : 1;
}

/**
* @brief Implementation of integer exponentiation by array lookup
*
* @tparam Rep Representation type for return type
* @tparam Base The base to be exponentiated
* @tparam Exponents The exponents for the array entries
* @param exponent The exponent to be used for exponentiation
* @return Result of `Base` to the power of `exponent` of type `Rep`
*/
template <typename Rep, int32_t Base, std::size_t... Exponents>
CUDF_HOST_DEVICE inline Rep ipow_impl(int32_t exponent, cuda::std::index_sequence<Exponents...>)
{
// Compute powers at compile time, storing into array
static constexpr Rep powers[] = {get_power<Rep, Base>(Exponents)...};
return powers[exponent];
}

/**
* @brief A function for integer exponentiation by array lookup
* @brief A function for integer exponentiation by squaring.
*
* @tparam Rep Representation type for return type
* @tparam Base The base to be exponentiated
Expand All @@ -134,16 +100,22 @@ template <typename Rep,
CUDF_HOST_DEVICE inline Rep ipow(T exponent)
{
cudf_assert(exponent >= 0 && "integer exponentiation with negative exponent is not possible.");
if constexpr (Base == numeric::Radix::BASE_2) {
return static_cast<Rep>(1) << exponent;
} else { // BASE_10
// Build index sequence for building power array at compile time
static constexpr auto max_exp = cuda::std::numeric_limits<Rep>::digits10;
static constexpr auto exponents = cuda::std::make_index_sequence<max_exp + 1>{};

// Get compile-time result
return ipow_impl<Rep, static_cast<int32_t>(Base)>(exponent, exponents);

if constexpr (Base == numeric::Radix::BASE_2) { return static_cast<Rep>(1) << exponent; }

// Note: Including an array here introduces too much register pressure
// https://simple.wikipedia.org/wiki/Exponentiation_by_squaring
// This is the iterative equivalent of the recursive definition (faster)
// Quick-bench for squaring: http://quick-bench.com/Wg7o7HYQC9FW5M0CO0wQAjSwP_Y
if (exponent == 0) { return static_cast<Rep>(1); }
auto extra = static_cast<Rep>(1);
auto square = static_cast<Rep>(Base);
while (exponent > 1) {
if (exponent & 1) { extra *= square; }
exponent >>= 1;
square *= square;
}
return square * extra;
}

/** @brief Function that performs a `right shift` scale "times" on the `val`
Expand Down

0 comments on commit 63c9ed7

Please sign in to comment.