diff --git a/Core/include/Acts/Propagator/EigenStepper.hpp b/Core/include/Acts/Propagator/EigenStepper.hpp index 11f32490c7c..74dadc78165 100644 --- a/Core/include/Acts/Propagator/EigenStepper.hpp +++ b/Core/include/Acts/Propagator/EigenStepper.hpp @@ -431,7 +431,7 @@ class EigenStepper { std::shared_ptr m_bField; /// Overstep limit - double m_overstepLimit; + double m_overstepLimit{}; }; template diff --git a/Core/include/Acts/Propagator/EigenStepper.ipp b/Core/include/Acts/Propagator/EigenStepper.ipp index d0d08e3881f..d54d0010875 100644 --- a/Core/include/Acts/Propagator/EigenStepper.ipp +++ b/Core/include/Acts/Propagator/EigenStepper.ipp @@ -9,6 +9,9 @@ #include "Acts/EventData/TransformationHelpers.hpp" #include "Acts/Propagator/ConstrainedStep.hpp" #include "Acts/Propagator/detail/CovarianceEngine.hpp" +#include "Acts/Utilities/QuickMath.hpp" + +#include template Acts::EigenStepper::EigenStepper( @@ -153,7 +156,8 @@ Acts::Result Acts::EigenStepper::step( propagator_state_t& state, const navigator_t& navigator) const { // Runge-Kutta integrator state auto& sd = state.stepping.stepData; - double error_estimate = 0.; + + double errorEstimate = 0.; double h2 = 0, half_h = 0; auto pos = position(state.stepping); @@ -172,6 +176,31 @@ Acts::Result Acts::EigenStepper::step( return 0.; } + const auto calcStepSizeScaling = [&](const double errorEstimate_) -> double { + // For details about these values see ATL-SOFT-PUB-2009-001 for details + constexpr double lower = 0.25; + constexpr double upper = 4.0; + // This is given by the order of the Runge-Kutta method + constexpr double exponent = 0.25; + + // Whether to use fast power function if available + constexpr bool tryUseFastPow{false}; + + double x = state.options.stepTolerance / errorEstimate_; + + if constexpr (exponent == 0.25 && !tryUseFastPow) { + // This is 3x faster than std::pow + x = std::sqrt(std::sqrt(x)); + } else if constexpr (std::numeric_limits::is_iec559 && + tryUseFastPow) { + x = fastPow(x, exponent); + } else { + x = std::pow(x, exponent); + } + + return std::clamp(x, lower, upper); + }; + // The following functor starts to perform a Runge-Kutta step of a certain // size, going up to the point where it can return an estimate of the local // integration error. The results are stated in the local variables above, @@ -217,12 +246,13 @@ Acts::Result Acts::EigenStepper::step( } // Compute and check the local integration error estimate - error_estimate = + errorEstimate = h2 * ((sd.k1 - sd.k2 - sd.k3 + sd.k4).template lpNorm<1>() + std::abs(sd.kQoP[0] - sd.kQoP[1] - sd.kQoP[2] + sd.kQoP[3])); - error_estimate = std::max(error_estimate, 1e-20); + // Protect against division by zero + errorEstimate = std::max(1e-20, errorEstimate); - return success(error_estimate <= state.options.stepTolerance); + return success(errorEstimate <= state.options.stepTolerance); }; const double initialH = @@ -240,12 +270,7 @@ Acts::Result Acts::EigenStepper::step( break; } - // double std::sqrt is 3x faster than std::pow - const double stepSizeScaling = - std::clamp(std::sqrt(std::sqrt(state.options.stepTolerance / - std::abs(2. * error_estimate))), - 0.25, 4.0); - h *= stepSizeScaling; + h *= calcStepSizeScaling(2 * errorEstimate); // If step size becomes too small the particle remains at the initial // place @@ -321,11 +346,7 @@ Acts::Result Acts::EigenStepper::step( state.stepping.derivative.template segment<3>(4) = sd.k4; } state.stepping.pathAccumulated += h; - // double std::sqrt is 3x faster than std::pow - const double stepSizeScaling = - std::clamp(std::sqrt(std::sqrt(state.options.stepTolerance / - std::abs(error_estimate))), - 0.25, 4.0); + const double stepSizeScaling = calcStepSizeScaling(errorEstimate); const double nextAccuracy = std::abs(h * stepSizeScaling); const double previousAccuracy = std::abs(state.stepping.stepSize.accuracy()); const double initialStepLength = std::abs(initialH); diff --git a/Core/include/Acts/Utilities/QuickMath.hpp b/Core/include/Acts/Utilities/QuickMath.hpp new file mode 100644 index 00000000000..ef09d22485d --- /dev/null +++ b/Core/include/Acts/Utilities/QuickMath.hpp @@ -0,0 +1,90 @@ +// This file is part of the Acts project. +// +// Copyright (C) 2024 CERN for the benefit of the Acts project +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#pragma once + +#include +#include +#include +#include + +namespace Acts { + +/// @brief Fast inverse square root function +/// Taken from https://en.wikipedia.org/wiki/Fast_inverse_square_root +/// @param x the number to calculate the inverse square root of +/// @param iterations the number of newton iterations to perform +template +constexpr T fastInverseSqrt(T x, int iterations = 1) noexcept { + static_assert(std::numeric_limits::is_iec559 && + "Only IEEE 754 is supported"); + static_assert(std::is_same_v || std::is_same_v, + "Only float and double are supported"); + using I = std::conditional_t, std::uint32_t, + std::uint64_t>; + + constexpr I magic = + std::is_same_v ? 0x5f3759df : 0x5fe6eb50c7b537a9; + + union { + T f; + I i; + } u = {x}; + + u.i = magic - (u.i >> 1); + + for (int i = 0; i < iterations; ++i) { + u.f *= 1.5f - 0.5f * x * u.f * u.f; + } + + return u.f; +} + +/// @brief Fast power function @see `std::pow` +/// Taken from +/// https://martin.ankerl.com/2012/01/25/optimized-approximative-pow-in-c-and-cpp +/// @param a the base +/// @param b the exponent +constexpr double fastPow(double a, double b) { + // enable only on IEEE 754 + static_assert(std::numeric_limits::is_iec559); + + constexpr std::int64_t magic = 0x3FEF127F00000000; + + union { + double f; + std::int64_t i; + } u = {a}; + + u.i = static_cast(b * (u.i - magic) + magic); + + return u.f; +} + +/// @brief Fast power function more precise than @see `fastPow` +/// Taken from +/// https://martin.ankerl.com/2012/01/25/optimized-approximative-pow-in-c-and-cpp +/// @param a the base +/// @param b the exponent +constexpr double fastPowMorePrecise(double a, double b) { + // binary exponentiation + double r = 1.0; + int exp = std::abs(static_cast(b)); + double base = b > 0 ? a : 1 / a; + while (exp != 0) { + if ((exp & 1) != 0) { + r *= base; + } + base *= base; + exp >>= 1; + } + + return r * fastPow(a, b - static_cast(b)); +} + +} // namespace Acts diff --git a/Tests/Benchmarks/CMakeLists.txt b/Tests/Benchmarks/CMakeLists.txt index 3bd4a69f07e..70297192006 100644 --- a/Tests/Benchmarks/CMakeLists.txt +++ b/Tests/Benchmarks/CMakeLists.txt @@ -29,3 +29,4 @@ add_benchmark(SurfaceIntersection SurfaceIntersectionBenchmark.cpp) add_benchmark(RayFrustum RayFrustumBenchmark.cpp) add_benchmark(AnnulusBounds AnnulusBoundsBenchmark.cpp) add_benchmark(StraightLineStepper StraightLineStepperBenchmark.cpp) +add_benchmark(QuickMath QuickMathBenchmark.cpp) diff --git a/Tests/Benchmarks/QuickMathBenchmark.cpp b/Tests/Benchmarks/QuickMathBenchmark.cpp new file mode 100644 index 00000000000..9fe1aa75cfa --- /dev/null +++ b/Tests/Benchmarks/QuickMathBenchmark.cpp @@ -0,0 +1,92 @@ +// This file is part of the Acts project. +// +// Copyright (C) 2024 CERN for the benefit of the Acts project +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include +#include + +#include "Acts/Tests/CommonHelpers/BenchmarkTools.hpp" +#include "Acts/Utilities/QuickMath.hpp" + +#include +#include + +namespace bdata = boost::unit_test::data; + +namespace Acts::Test { + +/// @brief Another fast power function @see `fastPow` +// Taken from +// https://martin.ankerl.com/2007/02/11/optimized-exponential-functions-for-java +/// @param a the base +/// @param b the exponent +constexpr double fastPowAnother(double a, double b) { + // enable only on IEEE 754 + static_assert(std::numeric_limits::is_iec559); + + union { + double f; + std::int64_t i; + } u = {}; + + u.i = static_cast( + 9076650 * (a - 1) / (a + 1 + 4 * std::sqrt(a)) * b + 1072632447); + u.i <<= 32; + + // result seems broken? + return u.f; +} + +// Some randomness & number crunching +const unsigned int nTests = 10; +const unsigned int nReps = 10000; + +BOOST_DATA_TEST_CASE( + benchmark_pow_25, + bdata::random( + (bdata::engine = std::mt19937(), bdata::seed = 21, + bdata::distribution = std::uniform_real_distribution(-4, 4))) ^ + bdata::xrange(nTests), + baseExp, index) { + (void)index; + + const double base = std::pow(10, baseExp); + const double exp = 0.25; + + std::cout << std::endl + << "Benchmarking base=" << base << ", exp=" << exp << "..." + << std::endl; + std::cout << "- void: " + << Acts::Test::microBenchmark([&] { return 0; }, nReps) + << std::endl; + std::cout << "- std::pow: " + << Acts::Test::microBenchmark([&] { return std::pow(base, exp); }, + nReps) + << std::endl; + std::cout << "- std::exp: " + << Acts::Test::microBenchmark( + [&] { return std::exp(std::log(base) * exp); }, nReps) + << std::endl; + std::cout << "- std::sqrt: " + << Acts::Test::microBenchmark( + [&] { return std::sqrt(std::sqrt(base)); }, nReps) + << std::endl; + std::cout << "- fastPow: " + << Acts::Test::microBenchmark([&] { return fastPow(base, exp); }, + nReps) + << std::endl; + std::cout << "- fastPowMorePrecise: " + << Acts::Test::microBenchmark( + [&] { return fastPowMorePrecise(base, exp); }, nReps) + << std::endl; + std::cout << "- fastPowAnother: " + << Acts::Test::microBenchmark( + [&] { return fastPowAnother(base, exp); }, nReps) + << std::endl; +} + +} // namespace Acts::Test diff --git a/Tests/UnitTests/Core/Utilities/CMakeLists.txt b/Tests/UnitTests/Core/Utilities/CMakeLists.txt index 5e607f14a25..daf84311957 100644 --- a/Tests/UnitTests/Core/Utilities/CMakeLists.txt +++ b/Tests/UnitTests/Core/Utilities/CMakeLists.txt @@ -51,3 +51,4 @@ target_compile_definitions(ActsUnitTestAnyDebug PRIVATE _ACTS_ANY_ENABLE_VERBOSE add_unittest(ParticleData ParticleDataTests.cpp) add_unittest(Zip ZipTests.cpp) add_unittest(TransformRange TransformRangeTests.cpp) +add_unittest(QuickMath QuickMathTests.cpp) diff --git a/Tests/UnitTests/Core/Utilities/QuickMathTests.cpp b/Tests/UnitTests/Core/Utilities/QuickMathTests.cpp new file mode 100644 index 00000000000..a463de371da --- /dev/null +++ b/Tests/UnitTests/Core/Utilities/QuickMathTests.cpp @@ -0,0 +1,64 @@ +// This file is part of the Acts project. +// +// Copyright (C) 2024 CERN for the benefit of the Acts project +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include +#include + +#include "Acts/Tests/CommonHelpers/FloatComparisons.hpp" +#include "Acts/Utilities/QuickMath.hpp" + +namespace bdata = boost::unit_test::data; + +const auto expDist = bdata::random( + (bdata::engine = std::mt19937{}, bdata::seed = 0, + bdata::distribution = std::uniform_real_distribution(-4, 4))); + +BOOST_AUTO_TEST_SUITE(Utilities) + +BOOST_DATA_TEST_CASE(fastInverseSqrt, expDist ^ bdata::xrange(100), exp, i) { + (void)i; + + const double x = std::pow(10, exp); + + const float fastFloat = Acts::fastInverseSqrt(static_cast(x)); + const double fastDouble = Acts::fastInverseSqrt(x); + + const double stdFloat = 1.0 / std::sqrt(static_cast(x)); + const double stdDouble = 1.0 / std::sqrt(x); + + CHECK_CLOSE_REL(stdFloat, fastFloat, 0.01); + CHECK_CLOSE_REL(stdDouble, fastDouble, 0.01); +} + +BOOST_DATA_TEST_CASE(fastPow, expDist ^ expDist ^ bdata::xrange(100), baseExp, + exp, i) { + (void)i; + + const double base = std::pow(10, baseExp); + + const double fast = Acts::fastPow(base, exp); + const double fastMorePrecise = Acts::fastPowMorePrecise(base, exp); + + const double std = std::pow(base, exp); + + CHECK_CLOSE_REL(fast, std, 0.15); + CHECK_CLOSE_REL(fastMorePrecise, std, 0.1); +} + +// BOOST_AUTO_TEST_CASE(fastPowChart) { +// std::cout << "a ref obs" << std::endl; +// for (double aExp = -4; aExp <= 4; aExp += 0.01) { +// double a = std::pow(10, aExp); +// double ref = std::pow(a, 0.25); +// double obs = Acts::fastPow(a, 0.25); + +// std::cout << a << " " << ref << " " << obs << std::endl; +// } +// } + +BOOST_AUTO_TEST_SUITE_END()