Skip to content

Commit

Permalink
Remove 64/128bit switches due to register pressure
Browse files Browse the repository at this point in the history
  • Loading branch information
pmattione-nvidia committed Jul 16, 2024
1 parent beda22e commit b07fb96
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 136 deletions.
4 changes: 2 additions & 2 deletions cpp/include/cudf/fixed_point/fixed_point.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ template <typename Rep,
Radix Base,
typename T,
typename cuda::std::enable_if_t<(cuda::std::is_same_v<int32_t, T> &&
is_supported_representation_type<Rep>())>* = nullptr>
CUDF_HOST_DEVICE inline Rep ipow(T exponent)
cuda::std::is_integral_v<Rep>)>* = nullptr>
CUDF_HOST_DEVICE inline constexpr Rep ipow(T exponent)
{
cudf_assert(exponent >= 0 && "integer exponentiation with negative exponent is not possible.");

Expand Down
138 changes: 4 additions & 134 deletions cpp/include/cudf/fixed_point/floating_conversion.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,30 +392,7 @@ CUDF_HOST_DEVICE inline T divide_power10_32bit(T value, int pow10)
template <typename T, CUDF_ENABLE_IF(cuda::std::is_unsigned_v<T>)>
CUDF_HOST_DEVICE inline T divide_power10_64bit(T value, int pow10)
{
// See comments in divide_power10_32bit() for discussion.
switch (pow10) {
case 0: return value;
case 1: return value / 10U;
case 2: return value / 100U;
case 3: return value / 1000U;
case 4: return value / 10000U;
case 5: return value / 100000U;
case 6: return value / 1000000U;
case 7: return value / 10000000U;
case 8: return value / 100000000U;
case 9: return value / 1000000000U;
case 10: return value / 10000000000ULL;
case 11: return value / 100000000000ULL;
case 12: return value / 1000000000000ULL;
case 13: return value / 10000000000000ULL;
case 14: return value / 100000000000000ULL;
case 15: return value / 1000000000000000ULL;
case 16: return value / 10000000000000000ULL;
case 17: return value / 100000000000000000ULL;
case 18: return value / 1000000000000000000ULL;
case 19: return value / 10000000000000000000ULL;
default: return 0;
}
return value / ipow<uint64_t, Radix::BASE_10>(pow10);
}

/**
Expand All @@ -429,49 +406,7 @@ CUDF_HOST_DEVICE inline T divide_power10_64bit(T value, int pow10)
template <typename T, CUDF_ENABLE_IF(cuda::std::is_unsigned_v<T>)>
CUDF_HOST_DEVICE inline constexpr T divide_power10_128bit(T value, int pow10)
{
// See comments in divide_power10_32bit() for an introduction.
switch (pow10) {
case 0: return value;
case 1: return value / 10U;
case 2: return value / 100U;
case 3: return value / 1000U;
case 4: return value / 10000U;
case 5: return value / 100000U;
case 6: return value / 1000000U;
case 7: return value / 10000000U;
case 8: return value / 100000000U;
case 9: return value / 1000000000U;
case 10: return value / 10000000000ULL;
case 11: return value / 100000000000ULL;
case 12: return value / 1000000000000ULL;
case 13: return value / 10000000000000ULL;
case 14: return value / 100000000000000ULL;
case 15: return value / 1000000000000000ULL;
case 16: return value / 10000000000000000ULL;
case 17: return value / 100000000000000000ULL;
case 18: return value / 1000000000000000000ULL;
case 19: return value / 10000000000000000000ULL;
case 20: return value / large_power_of_10<20>();
case 21: return value / large_power_of_10<21>();
case 22: return value / large_power_of_10<22>();
case 23: return value / large_power_of_10<23>();
case 24: return value / large_power_of_10<24>();
case 25: return value / large_power_of_10<25>();
case 26: return value / large_power_of_10<26>();
case 27: return value / large_power_of_10<27>();
case 28: return value / large_power_of_10<28>();
case 29: return value / large_power_of_10<29>();
case 30: return value / large_power_of_10<30>();
case 31: return value / large_power_of_10<31>();
case 32: return value / large_power_of_10<32>();
case 33: return value / large_power_of_10<33>();
case 34: return value / large_power_of_10<34>();
case 35: return value / large_power_of_10<35>();
case 36: return value / large_power_of_10<36>();
case 37: return value / large_power_of_10<37>();
case 38: return value / large_power_of_10<38>();
default: return 0;
}
return value / ipow<__uint128_t, Radix::BASE_10>(pow10);
}

/**
Expand Down Expand Up @@ -512,30 +447,7 @@ CUDF_HOST_DEVICE inline constexpr T multiply_power10_32bit(T value, int pow10)
template <typename T, CUDF_ENABLE_IF(cuda::std::is_unsigned_v<T>)>
CUDF_HOST_DEVICE inline constexpr T multiply_power10_64bit(T value, int pow10)
{
// See comments in divide_power10_32bit() for discussion.
switch (pow10) {
case 0: return value;
case 1: return value * 10U;
case 2: return value * 100U;
case 3: return value * 1000U;
case 4: return value * 10000U;
case 5: return value * 100000U;
case 6: return value * 1000000U;
case 7: return value * 10000000U;
case 8: return value * 100000000U;
case 9: return value * 1000000000U;
case 10: return value * 10000000000ULL;
case 11: return value * 100000000000ULL;
case 12: return value * 1000000000000ULL;
case 13: return value * 10000000000000ULL;
case 14: return value * 100000000000000ULL;
case 15: return value * 1000000000000000ULL;
case 16: return value * 10000000000000000ULL;
case 17: return value * 100000000000000000ULL;
case 18: return value * 1000000000000000000ULL;
case 19: return value * 10000000000000000000ULL;
default: return 0;
}
return value * ipow<uint64_t, Radix::BASE_10>(pow10);
}

/**
Expand All @@ -549,49 +461,7 @@ CUDF_HOST_DEVICE inline constexpr T multiply_power10_64bit(T value, int pow10)
template <typename T, CUDF_ENABLE_IF(cuda::std::is_unsigned_v<T>)>
CUDF_HOST_DEVICE inline constexpr T multiply_power10_128bit(T value, int pow10)
{
// See comments in divide_power10_128bit() for discussion.
switch (pow10) {
case 0: return value;
case 1: return value * 10U;
case 2: return value * 100U;
case 3: return value * 1000U;
case 4: return value * 10000U;
case 5: return value * 100000U;
case 6: return value * 1000000U;
case 7: return value * 10000000U;
case 8: return value * 100000000U;
case 9: return value * 1000000000U;
case 10: return value * 10000000000ULL;
case 11: return value * 100000000000ULL;
case 12: return value * 1000000000000ULL;
case 13: return value * 10000000000000ULL;
case 14: return value * 100000000000000ULL;
case 15: return value * 1000000000000000ULL;
case 16: return value * 10000000000000000ULL;
case 17: return value * 100000000000000000ULL;
case 18: return value * 1000000000000000000ULL;
case 19: return value * 10000000000000000000ULL;
case 20: return value * large_power_of_10<20>();
case 21: return value * large_power_of_10<21>();
case 22: return value * large_power_of_10<22>();
case 23: return value * large_power_of_10<23>();
case 24: return value * large_power_of_10<24>();
case 25: return value * large_power_of_10<25>();
case 26: return value * large_power_of_10<26>();
case 27: return value * large_power_of_10<27>();
case 28: return value * large_power_of_10<28>();
case 29: return value * large_power_of_10<29>();
case 30: return value * large_power_of_10<30>();
case 31: return value * large_power_of_10<31>();
case 32: return value * large_power_of_10<32>();
case 33: return value * large_power_of_10<33>();
case 34: return value * large_power_of_10<34>();
case 35: return value * large_power_of_10<35>();
case 36: return value * large_power_of_10<36>();
case 37: return value * large_power_of_10<37>();
case 38: return value * large_power_of_10<38>();
default: return 0;
}
return value * ipow<__uint128_t, Radix::BASE_10>(pow10);
}

/**
Expand Down

0 comments on commit b07fb96

Please sign in to comment.