Skip to content

Commit

Permalink
perf: Try fast pow for EigenStepper step size scaling (acts-project…
Browse files Browse the repository at this point in the history
…#3153)

Currently the step scaling calculation has a big impact on the `EigenStepper` performance. In this PR the `pow(x, 0.25)` is approximated with `fastPow` (similar to fast inverse square root) which relies on the bit representation of the floating point number.

The assumption is that we do not really care about the precision of this value but rather that it gives a good approximation for the step size scaling.

Edit: After measuring the performance it seems like there is no measurable improvement. I made this approximation compile time optional for now. See this acts-project#3153 (comment) for more details

<img width="577" alt="image" src="https://github.com/acts-project/acts/assets/487211/d613caeb-991f-4a89-98fc-bc051be13b7b">

References
- https://martin.ankerl.com/2012/01/25/optimized-approximative-pow-in-c-and-cpp
  • Loading branch information
andiwand authored and asalzburger committed May 21, 2024
1 parent 4ca23af commit 07972a4
Show file tree
Hide file tree
Showing 7 changed files with 285 additions and 16 deletions.
2 changes: 1 addition & 1 deletion Core/include/Acts/Propagator/EigenStepper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ class EigenStepper {
std::shared_ptr<const MagneticFieldProvider> m_bField;

/// Overstep limit
double m_overstepLimit;
double m_overstepLimit{};
};

template <typename navigator_t>
Expand Down
51 changes: 36 additions & 15 deletions Core/include/Acts/Propagator/EigenStepper.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
#include "Acts/EventData/TransformationHelpers.hpp"
#include "Acts/Propagator/ConstrainedStep.hpp"
#include "Acts/Propagator/detail/CovarianceEngine.hpp"
#include "Acts/Utilities/QuickMath.hpp"

#include <limits>

template <typename E, typename A>
Acts::EigenStepper<E, A>::EigenStepper(
Expand Down Expand Up @@ -153,7 +156,8 @@ Acts::Result<double> Acts::EigenStepper<E, A>::step(
propagator_state_t& state, const navigator_t& navigator) const {
// Runge-Kutta integrator state
auto& sd = state.stepping.stepData;
double error_estimate = 0.;

double errorEstimate = 0.;
double h2 = 0, half_h = 0;

auto pos = position(state.stepping);
Expand All @@ -172,6 +176,31 @@ Acts::Result<double> Acts::EigenStepper<E, A>::step(
return 0.;
}

const auto calcStepSizeScaling = [&](const double errorEstimate_) -> double {
// For details about these values see ATL-SOFT-PUB-2009-001 for details
constexpr double lower = 0.25;
constexpr double upper = 4.0;
// This is given by the order of the Runge-Kutta method
constexpr double exponent = 0.25;

// Whether to use fast power function if available
constexpr bool tryUseFastPow{false};

double x = state.options.stepTolerance / errorEstimate_;

if constexpr (exponent == 0.25 && !tryUseFastPow) {
// This is 3x faster than std::pow
x = std::sqrt(std::sqrt(x));
} else if constexpr (std::numeric_limits<double>::is_iec559 &&
tryUseFastPow) {
x = fastPow(x, exponent);
} else {
x = std::pow(x, exponent);
}

return std::clamp(x, lower, upper);
};

// The following functor starts to perform a Runge-Kutta step of a certain
// size, going up to the point where it can return an estimate of the local
// integration error. The results are stated in the local variables above,
Expand Down Expand Up @@ -217,12 +246,13 @@ Acts::Result<double> Acts::EigenStepper<E, A>::step(
}

// Compute and check the local integration error estimate
error_estimate =
errorEstimate =
h2 * ((sd.k1 - sd.k2 - sd.k3 + sd.k4).template lpNorm<1>() +
std::abs(sd.kQoP[0] - sd.kQoP[1] - sd.kQoP[2] + sd.kQoP[3]));
error_estimate = std::max(error_estimate, 1e-20);
// Protect against division by zero
errorEstimate = std::max(1e-20, errorEstimate);

return success(error_estimate <= state.options.stepTolerance);
return success(errorEstimate <= state.options.stepTolerance);
};

const double initialH =
Expand All @@ -240,12 +270,7 @@ Acts::Result<double> Acts::EigenStepper<E, A>::step(
break;
}

// double std::sqrt is 3x faster than std::pow
const double stepSizeScaling =
std::clamp(std::sqrt(std::sqrt(state.options.stepTolerance /
std::abs(2. * error_estimate))),
0.25, 4.0);
h *= stepSizeScaling;
h *= calcStepSizeScaling(2 * errorEstimate);

// If step size becomes too small the particle remains at the initial
// place
Expand Down Expand Up @@ -321,11 +346,7 @@ Acts::Result<double> Acts::EigenStepper<E, A>::step(
state.stepping.derivative.template segment<3>(4) = sd.k4;
}
state.stepping.pathAccumulated += h;
// double std::sqrt is 3x faster than std::pow
const double stepSizeScaling =
std::clamp(std::sqrt(std::sqrt(state.options.stepTolerance /
std::abs(error_estimate))),
0.25, 4.0);
const double stepSizeScaling = calcStepSizeScaling(errorEstimate);
const double nextAccuracy = std::abs(h * stepSizeScaling);
const double previousAccuracy = std::abs(state.stepping.stepSize.accuracy());
const double initialStepLength = std::abs(initialH);
Expand Down
90 changes: 90 additions & 0 deletions Core/include/Acts/Utilities/QuickMath.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// This file is part of the Acts project.
//
// Copyright (C) 2024 CERN for the benefit of the Acts project
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

#pragma once

#include <cmath>
#include <cstdint>
#include <limits>
#include <type_traits>

namespace Acts {

/// @brief Fast inverse square root function
/// Taken from https://en.wikipedia.org/wiki/Fast_inverse_square_root
/// @param x the number to calculate the inverse square root of
/// @param iterations the number of newton iterations to perform
template <typename T>
constexpr T fastInverseSqrt(T x, int iterations = 1) noexcept {
static_assert(std::numeric_limits<T>::is_iec559 &&
"Only IEEE 754 is supported");
static_assert(std::is_same_v<T, float> || std::is_same_v<T, double>,
"Only float and double are supported");
using I = std::conditional_t<std::is_same_v<T, float>, std::uint32_t,
std::uint64_t>;

constexpr I magic =
std::is_same_v<T, float> ? 0x5f3759df : 0x5fe6eb50c7b537a9;

union {
T f;
I i;
} u = {x};

u.i = magic - (u.i >> 1);

for (int i = 0; i < iterations; ++i) {
u.f *= 1.5f - 0.5f * x * u.f * u.f;
}

return u.f;
}

/// @brief Fast power function @see `std::pow`
/// Taken from
/// https://martin.ankerl.com/2012/01/25/optimized-approximative-pow-in-c-and-cpp
/// @param a the base
/// @param b the exponent
constexpr double fastPow(double a, double b) {
// enable only on IEEE 754
static_assert(std::numeric_limits<double>::is_iec559);

constexpr std::int64_t magic = 0x3FEF127F00000000;

union {
double f;
std::int64_t i;
} u = {a};

u.i = static_cast<std::int64_t>(b * (u.i - magic) + magic);

return u.f;
}

/// @brief Fast power function more precise than @see `fastPow`
/// Taken from
/// https://martin.ankerl.com/2012/01/25/optimized-approximative-pow-in-c-and-cpp
/// @param a the base
/// @param b the exponent
constexpr double fastPowMorePrecise(double a, double b) {
// binary exponentiation
double r = 1.0;
int exp = std::abs(static_cast<int>(b));
double base = b > 0 ? a : 1 / a;
while (exp != 0) {
if ((exp & 1) != 0) {
r *= base;
}
base *= base;
exp >>= 1;
}

return r * fastPow(a, b - static_cast<int>(b));
}

} // namespace Acts
1 change: 1 addition & 0 deletions Tests/Benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@ add_benchmark(SurfaceIntersection SurfaceIntersectionBenchmark.cpp)
add_benchmark(RayFrustum RayFrustumBenchmark.cpp)
add_benchmark(AnnulusBounds AnnulusBoundsBenchmark.cpp)
add_benchmark(StraightLineStepper StraightLineStepperBenchmark.cpp)
add_benchmark(QuickMath QuickMathBenchmark.cpp)
92 changes: 92 additions & 0 deletions Tests/Benchmarks/QuickMathBenchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// This file is part of the Acts project.
//
// Copyright (C) 2024 CERN for the benefit of the Acts project
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

#include <boost/test/data/test_case.hpp>
#include <boost/test/unit_test.hpp>

#include "Acts/Tests/CommonHelpers/BenchmarkTools.hpp"
#include "Acts/Utilities/QuickMath.hpp"

#include <cmath>
#include <random>

namespace bdata = boost::unit_test::data;

namespace Acts::Test {

/// @brief Another fast power function @see `fastPow`
// Taken from
// https://martin.ankerl.com/2007/02/11/optimized-exponential-functions-for-java
/// @param a the base
/// @param b the exponent
constexpr double fastPowAnother(double a, double b) {
// enable only on IEEE 754
static_assert(std::numeric_limits<double>::is_iec559);

union {
double f;
std::int64_t i;
} u = {};

u.i = static_cast<std::int64_t>(
9076650 * (a - 1) / (a + 1 + 4 * std::sqrt(a)) * b + 1072632447);
u.i <<= 32;

// result seems broken?
return u.f;
}

// Some randomness & number crunching
const unsigned int nTests = 10;
const unsigned int nReps = 10000;

BOOST_DATA_TEST_CASE(
benchmark_pow_25,
bdata::random(
(bdata::engine = std::mt19937(), bdata::seed = 21,
bdata::distribution = std::uniform_real_distribution<double>(-4, 4))) ^
bdata::xrange(nTests),
baseExp, index) {
(void)index;

const double base = std::pow(10, baseExp);
const double exp = 0.25;

std::cout << std::endl
<< "Benchmarking base=" << base << ", exp=" << exp << "..."
<< std::endl;
std::cout << "- void: "
<< Acts::Test::microBenchmark([&] { return 0; }, nReps)
<< std::endl;
std::cout << "- std::pow: "
<< Acts::Test::microBenchmark([&] { return std::pow(base, exp); },
nReps)
<< std::endl;
std::cout << "- std::exp: "
<< Acts::Test::microBenchmark(
[&] { return std::exp(std::log(base) * exp); }, nReps)
<< std::endl;
std::cout << "- std::sqrt: "
<< Acts::Test::microBenchmark(
[&] { return std::sqrt(std::sqrt(base)); }, nReps)
<< std::endl;
std::cout << "- fastPow: "
<< Acts::Test::microBenchmark([&] { return fastPow(base, exp); },
nReps)
<< std::endl;
std::cout << "- fastPowMorePrecise: "
<< Acts::Test::microBenchmark(
[&] { return fastPowMorePrecise(base, exp); }, nReps)
<< std::endl;
std::cout << "- fastPowAnother: "
<< Acts::Test::microBenchmark(
[&] { return fastPowAnother(base, exp); }, nReps)
<< std::endl;
}

} // namespace Acts::Test
1 change: 1 addition & 0 deletions Tests/UnitTests/Core/Utilities/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,4 @@ target_compile_definitions(ActsUnitTestAnyDebug PRIVATE _ACTS_ANY_ENABLE_VERBOSE
add_unittest(ParticleData ParticleDataTests.cpp)
add_unittest(Zip ZipTests.cpp)
add_unittest(TransformRange TransformRangeTests.cpp)
add_unittest(QuickMath QuickMathTests.cpp)
64 changes: 64 additions & 0 deletions Tests/UnitTests/Core/Utilities/QuickMathTests.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// This file is part of the Acts project.
//
// Copyright (C) 2024 CERN for the benefit of the Acts project
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

#include <boost/test/data/test_case.hpp>
#include <boost/test/unit_test.hpp>

#include "Acts/Tests/CommonHelpers/FloatComparisons.hpp"
#include "Acts/Utilities/QuickMath.hpp"

namespace bdata = boost::unit_test::data;

const auto expDist = bdata::random(
(bdata::engine = std::mt19937{}, bdata::seed = 0,
bdata::distribution = std::uniform_real_distribution<double>(-4, 4)));

BOOST_AUTO_TEST_SUITE(Utilities)

BOOST_DATA_TEST_CASE(fastInverseSqrt, expDist ^ bdata::xrange(100), exp, i) {
(void)i;

const double x = std::pow(10, exp);

const float fastFloat = Acts::fastInverseSqrt(static_cast<float>(x));
const double fastDouble = Acts::fastInverseSqrt(x);

const double stdFloat = 1.0 / std::sqrt(static_cast<float>(x));
const double stdDouble = 1.0 / std::sqrt(x);

CHECK_CLOSE_REL(stdFloat, fastFloat, 0.01);
CHECK_CLOSE_REL(stdDouble, fastDouble, 0.01);
}

BOOST_DATA_TEST_CASE(fastPow, expDist ^ expDist ^ bdata::xrange(100), baseExp,
exp, i) {
(void)i;

const double base = std::pow(10, baseExp);

const double fast = Acts::fastPow(base, exp);
const double fastMorePrecise = Acts::fastPowMorePrecise(base, exp);

const double std = std::pow(base, exp);

CHECK_CLOSE_REL(fast, std, 0.15);
CHECK_CLOSE_REL(fastMorePrecise, std, 0.1);
}

// BOOST_AUTO_TEST_CASE(fastPowChart) {
// std::cout << "a ref obs" << std::endl;
// for (double aExp = -4; aExp <= 4; aExp += 0.01) {
// double a = std::pow(10, aExp);
// double ref = std::pow(a, 0.25);
// double obs = Acts::fastPow(a, 0.25);

// std::cout << a << " " << ref << " " << obs << std::endl;
// }
// }

BOOST_AUTO_TEST_SUITE_END()

0 comments on commit 07972a4

Please sign in to comment.