forked from acts-project/acts
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
perf: Try fast pow for
EigenStepper
step size scaling (acts-project…
…#3153) Currently the step scaling calculation has a big impact on the `EigenStepper` performance. In this PR the `pow(x, 0.25)` is approximated with `fastPow` (similar to fast inverse square root) which relies on the bit representation of the floating point number. The assumption is that we do not really care about the precision of this value but rather that it gives a good approximation for the step size scaling. Edit: After measuring the performance it seems like there is no measurable improvement. I made this approximation compile time optional for now. See this acts-project#3153 (comment) for more details <img width="577" alt="image" src="https://github.com/acts-project/acts/assets/487211/d613caeb-991f-4a89-98fc-bc051be13b7b"> References - https://martin.ankerl.com/2012/01/25/optimized-approximative-pow-in-c-and-cpp
- Loading branch information
Showing
7 changed files
with
285 additions
and
16 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
// This file is part of the Acts project. | ||
// | ||
// Copyright (C) 2024 CERN for the benefit of the Acts project | ||
// | ||
// This Source Code Form is subject to the terms of the Mozilla Public | ||
// License, v. 2.0. If a copy of the MPL was not distributed with this | ||
// file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||
|
||
#pragma once | ||
|
||
#include <cmath> | ||
#include <cstdint> | ||
#include <limits> | ||
#include <type_traits> | ||
|
||
namespace Acts { | ||
|
||
/// @brief Fast inverse square root function | ||
/// Taken from https://en.wikipedia.org/wiki/Fast_inverse_square_root | ||
/// @param x the number to calculate the inverse square root of | ||
/// @param iterations the number of newton iterations to perform | ||
template <typename T> | ||
constexpr T fastInverseSqrt(T x, int iterations = 1) noexcept { | ||
static_assert(std::numeric_limits<T>::is_iec559 && | ||
"Only IEEE 754 is supported"); | ||
static_assert(std::is_same_v<T, float> || std::is_same_v<T, double>, | ||
"Only float and double are supported"); | ||
using I = std::conditional_t<std::is_same_v<T, float>, std::uint32_t, | ||
std::uint64_t>; | ||
|
||
constexpr I magic = | ||
std::is_same_v<T, float> ? 0x5f3759df : 0x5fe6eb50c7b537a9; | ||
|
||
union { | ||
T f; | ||
I i; | ||
} u = {x}; | ||
|
||
u.i = magic - (u.i >> 1); | ||
|
||
for (int i = 0; i < iterations; ++i) { | ||
u.f *= 1.5f - 0.5f * x * u.f * u.f; | ||
} | ||
|
||
return u.f; | ||
} | ||
|
||
/// @brief Fast power function @see `std::pow` | ||
/// Taken from | ||
/// https://martin.ankerl.com/2012/01/25/optimized-approximative-pow-in-c-and-cpp | ||
/// @param a the base | ||
/// @param b the exponent | ||
constexpr double fastPow(double a, double b) { | ||
// enable only on IEEE 754 | ||
static_assert(std::numeric_limits<double>::is_iec559); | ||
|
||
constexpr std::int64_t magic = 0x3FEF127F00000000; | ||
|
||
union { | ||
double f; | ||
std::int64_t i; | ||
} u = {a}; | ||
|
||
u.i = static_cast<std::int64_t>(b * (u.i - magic) + magic); | ||
|
||
return u.f; | ||
} | ||
|
||
/// @brief Fast power function more precise than @see `fastPow` | ||
/// Taken from | ||
/// https://martin.ankerl.com/2012/01/25/optimized-approximative-pow-in-c-and-cpp | ||
/// @param a the base | ||
/// @param b the exponent | ||
constexpr double fastPowMorePrecise(double a, double b) { | ||
// binary exponentiation | ||
double r = 1.0; | ||
int exp = std::abs(static_cast<int>(b)); | ||
double base = b > 0 ? a : 1 / a; | ||
while (exp != 0) { | ||
if ((exp & 1) != 0) { | ||
r *= base; | ||
} | ||
base *= base; | ||
exp >>= 1; | ||
} | ||
|
||
return r * fastPow(a, b - static_cast<int>(b)); | ||
} | ||
|
||
} // namespace Acts |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
// This file is part of the Acts project. | ||
// | ||
// Copyright (C) 2024 CERN for the benefit of the Acts project | ||
// | ||
// This Source Code Form is subject to the terms of the Mozilla Public | ||
// License, v. 2.0. If a copy of the MPL was not distributed with this | ||
// file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||
|
||
#include <boost/test/data/test_case.hpp> | ||
#include <boost/test/unit_test.hpp> | ||
|
||
#include "Acts/Tests/CommonHelpers/BenchmarkTools.hpp" | ||
#include "Acts/Utilities/QuickMath.hpp" | ||
|
||
#include <cmath> | ||
#include <random> | ||
|
||
namespace bdata = boost::unit_test::data; | ||
|
||
namespace Acts::Test { | ||
|
||
/// @brief Another fast power function @see `fastPow` | ||
// Taken from | ||
// https://martin.ankerl.com/2007/02/11/optimized-exponential-functions-for-java | ||
/// @param a the base | ||
/// @param b the exponent | ||
constexpr double fastPowAnother(double a, double b) { | ||
// enable only on IEEE 754 | ||
static_assert(std::numeric_limits<double>::is_iec559); | ||
|
||
union { | ||
double f; | ||
std::int64_t i; | ||
} u = {}; | ||
|
||
u.i = static_cast<std::int64_t>( | ||
9076650 * (a - 1) / (a + 1 + 4 * std::sqrt(a)) * b + 1072632447); | ||
u.i <<= 32; | ||
|
||
// result seems broken? | ||
return u.f; | ||
} | ||
|
||
// Some randomness & number crunching | ||
const unsigned int nTests = 10; | ||
const unsigned int nReps = 10000; | ||
|
||
BOOST_DATA_TEST_CASE( | ||
benchmark_pow_25, | ||
bdata::random( | ||
(bdata::engine = std::mt19937(), bdata::seed = 21, | ||
bdata::distribution = std::uniform_real_distribution<double>(-4, 4))) ^ | ||
bdata::xrange(nTests), | ||
baseExp, index) { | ||
(void)index; | ||
|
||
const double base = std::pow(10, baseExp); | ||
const double exp = 0.25; | ||
|
||
std::cout << std::endl | ||
<< "Benchmarking base=" << base << ", exp=" << exp << "..." | ||
<< std::endl; | ||
std::cout << "- void: " | ||
<< Acts::Test::microBenchmark([&] { return 0; }, nReps) | ||
<< std::endl; | ||
std::cout << "- std::pow: " | ||
<< Acts::Test::microBenchmark([&] { return std::pow(base, exp); }, | ||
nReps) | ||
<< std::endl; | ||
std::cout << "- std::exp: " | ||
<< Acts::Test::microBenchmark( | ||
[&] { return std::exp(std::log(base) * exp); }, nReps) | ||
<< std::endl; | ||
std::cout << "- std::sqrt: " | ||
<< Acts::Test::microBenchmark( | ||
[&] { return std::sqrt(std::sqrt(base)); }, nReps) | ||
<< std::endl; | ||
std::cout << "- fastPow: " | ||
<< Acts::Test::microBenchmark([&] { return fastPow(base, exp); }, | ||
nReps) | ||
<< std::endl; | ||
std::cout << "- fastPowMorePrecise: " | ||
<< Acts::Test::microBenchmark( | ||
[&] { return fastPowMorePrecise(base, exp); }, nReps) | ||
<< std::endl; | ||
std::cout << "- fastPowAnother: " | ||
<< Acts::Test::microBenchmark( | ||
[&] { return fastPowAnother(base, exp); }, nReps) | ||
<< std::endl; | ||
} | ||
|
||
} // namespace Acts::Test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
// This file is part of the Acts project. | ||
// | ||
// Copyright (C) 2024 CERN for the benefit of the Acts project | ||
// | ||
// This Source Code Form is subject to the terms of the Mozilla Public | ||
// License, v. 2.0. If a copy of the MPL was not distributed with this | ||
// file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||
|
||
#include <boost/test/data/test_case.hpp> | ||
#include <boost/test/unit_test.hpp> | ||
|
||
#include "Acts/Tests/CommonHelpers/FloatComparisons.hpp" | ||
#include "Acts/Utilities/QuickMath.hpp" | ||
|
||
namespace bdata = boost::unit_test::data; | ||
|
||
const auto expDist = bdata::random( | ||
(bdata::engine = std::mt19937{}, bdata::seed = 0, | ||
bdata::distribution = std::uniform_real_distribution<double>(-4, 4))); | ||
|
||
BOOST_AUTO_TEST_SUITE(Utilities) | ||
|
||
BOOST_DATA_TEST_CASE(fastInverseSqrt, expDist ^ bdata::xrange(100), exp, i) { | ||
(void)i; | ||
|
||
const double x = std::pow(10, exp); | ||
|
||
const float fastFloat = Acts::fastInverseSqrt(static_cast<float>(x)); | ||
const double fastDouble = Acts::fastInverseSqrt(x); | ||
|
||
const double stdFloat = 1.0 / std::sqrt(static_cast<float>(x)); | ||
const double stdDouble = 1.0 / std::sqrt(x); | ||
|
||
CHECK_CLOSE_REL(stdFloat, fastFloat, 0.01); | ||
CHECK_CLOSE_REL(stdDouble, fastDouble, 0.01); | ||
} | ||
|
||
BOOST_DATA_TEST_CASE(fastPow, expDist ^ expDist ^ bdata::xrange(100), baseExp, | ||
exp, i) { | ||
(void)i; | ||
|
||
const double base = std::pow(10, baseExp); | ||
|
||
const double fast = Acts::fastPow(base, exp); | ||
const double fastMorePrecise = Acts::fastPowMorePrecise(base, exp); | ||
|
||
const double std = std::pow(base, exp); | ||
|
||
CHECK_CLOSE_REL(fast, std, 0.15); | ||
CHECK_CLOSE_REL(fastMorePrecise, std, 0.1); | ||
} | ||
|
||
// BOOST_AUTO_TEST_CASE(fastPowChart) { | ||
// std::cout << "a ref obs" << std::endl; | ||
// for (double aExp = -4; aExp <= 4; aExp += 0.01) { | ||
// double a = std::pow(10, aExp); | ||
// double ref = std::pow(a, 0.25); | ||
// double obs = Acts::fastPow(a, 0.25); | ||
|
||
// std::cout << a << " " << ref << " " << obs << std::endl; | ||
// } | ||
// } | ||
|
||
BOOST_AUTO_TEST_SUITE_END() |