From 720573ad106a98e7fb1f5250b85e0c2cb7f36491 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Thu, 6 Jan 2022 23:50:12 +0200 Subject: [PATCH 1/3] constexpr fma --- doc/sf/ccmath.qbk | 6 ++ include/boost/math/ccmath/ccmath.hpp | 1 + include/boost/math/ccmath/fma.hpp | 115 +++++++++++++++++++++ test/Jamfile.v2 | 1 + test/ccmath_fma_test.cpp | 73 +++++++++++++ test/compile_test/ccmath_fma_incl_test.cpp | 16 +++ 6 files changed, 212 insertions(+) create mode 100644 include/boost/math/ccmath/fma.hpp create mode 100644 test/ccmath_fma_test.cpp create mode 100644 test/compile_test/ccmath_fma_incl_test.cpp diff --git a/doc/sf/ccmath.qbk b/doc/sf/ccmath.qbk index 0a88ce6310..f39e388ee5 100644 --- a/doc/sf/ccmath.qbk +++ b/doc/sf/ccmath.qbk @@ -182,6 +182,12 @@ All of the following functions require C++17 or greater. template inline constexpr bool isunordered(T x, T y) noexcept + template + inline constexpr Real fma(Real x, Real y, Real z) noexcept + + template + inline constepxr Promoted fma(Arithmetic1 x, Arithmetic2 y, Arithmetic3 z) noexcept + } // Namespaces [endsect] [/section:ccmath Constexpr CMath] diff --git a/include/boost/math/ccmath/ccmath.hpp b/include/boost/math/ccmath/ccmath.hpp index 72c49922f1..2749ec7b28 100644 --- a/include/boost/math/ccmath/ccmath.hpp +++ b/include/boost/math/ccmath/ccmath.hpp @@ -38,5 +38,6 @@ #include #include #include +#include #endif // BOOST_MATH_CCMATH_HPP diff --git a/include/boost/math/ccmath/fma.hpp b/include/boost/math/ccmath/fma.hpp new file mode 100644 index 0000000000..12390953e7 --- /dev/null +++ b/include/boost/math/ccmath/fma.hpp @@ -0,0 +1,115 @@ +// (C) Copyright Matt Borland 2022. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef BOOST_MATH_CCMATH_FMA_HPP +#define BOOST_MATH_CCMATH_FMA_HPP + +#include +#include +#include +#include +#include +#include + +namespace boost::math::ccmath { + +namespace detail { + +template +inline constexpr T fma_imp(const T x, const T y, const T z) noexcept +{ + #if __GNUC__ < 10 + return (x * y) + z; + #else + if constexpr (std::is_same_v) + { + return __builtin_fmaf(x, y, z); + } + else if constexpr (std::is_same_v) + { + return __builtin_fma(x, y, z); + } + else if constexpr (std::is_same_v) + { + return __builtin_fmal(x, y, z); + } + else // e.g. Boost.Multiprecision types where no built-in exists + { + return (x * y) + z; + } + #endif +} + +} // Namespace detail + +template , bool> = true> +inline constexpr Real fma(Real x, Real y, Real z) noexcept +{ + if (BOOST_MATH_IS_CONSTANT_EVALUATED(x)) + { + return x == 0 && boost::math::ccmath::isinf(y) ? std::numeric_limits::quiet_NaN() : + y == 0 && boost::math::ccmath::isinf(x) ? std::numeric_limits::quiet_NaN() : + boost::math::ccmath::isnan(x) ? std::numeric_limits::quiet_NaN() : + boost::math::ccmath::isnan(y) ? std::numeric_limits::quiet_NaN() : + boost::math::ccmath::isnan(z) ? std::numeric_limits::quiet_NaN() : + boost::math::ccmath::detail::fma_imp(x, y, z); + } + else + { + using std::fma; + return fma(x, y, z); + } +} + +template +inline constexpr auto fma(T1 x, T2 y, T3 z) noexcept +{ + if (BOOST_MATH_IS_CONSTANT_EVALUATED(x)) + { + // If the type is an integer (e.g. epsilon == 0) then set the epsilon value to 1 so that type is at a minimum + // cast to double + constexpr auto T1p = std::numeric_limits::epsilon() > 0 ? std::numeric_limits::epsilon() : 1; + constexpr auto T2p = std::numeric_limits::epsilon() > 0 ? std::numeric_limits::epsilon() : 1; + constexpr auto T3p = std::numeric_limits::epsilon() > 0 ? std::numeric_limits::epsilon() : 1; + + using promoted_type = + #ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS + std::conditional_t>>>>>; + #else + >>>; + #endif + + return boost::math::ccmath::fma(promoted_type(x), promoted_type(y), promoted_type(z)); + } + else + { + using std::fma; + return fma(x, y, z); + } +} + +inline constexpr float fmaf(float x, float y, float z) noexcept +{ + return boost::math::ccmath::fma(x, y, z); +} + +#ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS +inline constexpr long double fmal(long double x, long double y, long double z) noexcept +{ + return boost::math::ccmath::fma(x, y, z); +} +#endif + +} // Namespace boost::math::ccmath + +#endif // BOOST_MATH_CCMATH_FMA_HPP diff --git a/test/Jamfile.v2 b/test/Jamfile.v2 index 5deced8cb0..e38665f777 100644 --- a/test/Jamfile.v2 +++ b/test/Jamfile.v2 @@ -154,6 +154,7 @@ test-suite special_fun : [ run ccmath_isless_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] ] [ run ccmath_islessequal_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] ] [ run ccmath_isunordered_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] ] + [ run ccmath_fma_test.cpp ../../test/build//boost_unit_test_framework : : : [ requires cxx17_if_constexpr ] ] [ run log1p_expm1_test.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] [ run powm1_sqrtp1m1_test.cpp test_instances//test_instances pch_light ../../test/build//boost_unit_test_framework ] [ run git_issue_705.cpp ../../test/build//boost_unit_test_framework ] diff --git a/test/ccmath_fma_test.cpp b/test/ccmath_fma_test.cpp new file mode 100644 index 0000000000..3b7354df2e --- /dev/null +++ b/test/ccmath_fma_test.cpp @@ -0,0 +1,73 @@ +// (C) Copyright Matt Borland 2022. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef BOOST_HAS_FLOAT128 +#include +#endif + +template +constexpr void test() +{ + // Error handling + static_assert(boost::math::ccmath::isnan(boost::math::ccmath::fma(std::numeric_limits::infinity(), T(0), T(1)))); + static_assert(boost::math::ccmath::isnan(boost::math::ccmath::fma(T(0), std::numeric_limits::infinity(), T(1)))); + + static_assert(boost::math::ccmath::isnan(boost::math::ccmath::fma(std::numeric_limits::infinity(), T(0), std::numeric_limits::quiet_NaN()))); + static_assert(boost::math::ccmath::isnan(boost::math::ccmath::fma(T(0), std::numeric_limits::infinity(), std::numeric_limits::quiet_NaN()))); + + static_assert(boost::math::ccmath::isnan(boost::math::ccmath::fma(std::numeric_limits::quiet_NaN(), T(1), T(1)))); + static_assert(boost::math::ccmath::isnan(boost::math::ccmath::fma(T(1), std::numeric_limits::quiet_NaN(), T(1)))); + + static_assert(boost::math::ccmath::isnan(boost::math::ccmath::fma(T(1), T(1), std::numeric_limits::quiet_NaN()))); + + // Functionality + static_assert(boost::math::ccmath::fma(T(1), T(2), T(3)) == T(5)); + static_assert(boost::math::ccmath::fma(T(2), T(3), T(1)) == T(7)); + + // Correct promoted types + if constexpr (!std::is_same_v) + { + constexpr auto test_type = boost::math::ccmath::fma(T(1), 1.0, 1.0f); + static_assert(std::is_same_v>); + } + else + { + constexpr auto test_type = boost::math::ccmath::fma(1.0f, 1, 1.0); + static_assert(std::is_same_v>); + } +} + +#if !defined(BOOST_MATH_NO_CONSTEXPR_DETECTION) && !defined(BOOST_MATH_USING_BUILTIN_CONSTANT_P) +int main() +{ + test(); + test(); + + #ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS + test(); + #endif + + #ifdef BOOST_HAS_FLOAT128 + test(); + #endif + + return 0; +} +#else +int main() +{ + return 0; +} +#endif diff --git a/test/compile_test/ccmath_fma_incl_test.cpp b/test/compile_test/ccmath_fma_incl_test.cpp new file mode 100644 index 0000000000..ef035bd8b6 --- /dev/null +++ b/test/compile_test/ccmath_fma_incl_test.cpp @@ -0,0 +1,16 @@ +// (C) Copyright Matt Borland 2022. +// Use, modification and distribution are subject to the +// Boost Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include "test_compile_result.hpp" + +void compile_and_link_test() +{ + check_result(boost::math::ccmath::fma(1.0f, 1.0f, 1.0f)); + check_result(boost::math::ccmath::fma(1.0, 1.0, 1.0)); +#ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS + check_result(boost::math::ccmath::fma(1.0l, 1.0l, 1.0l)); +#endif +} From 49881beaf13e9b8a534152236fbbe21efb9dcde1 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 24 May 2022 19:44:19 -0700 Subject: [PATCH 2/3] Improve use of intrinsics for calculation --- doc/sf/ccmath.qbk | 1 + include/boost/math/ccmath/fma.hpp | 25 ++++++++++++++++++++----- test/ccmath_fma_test.cpp | 2 +- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/doc/sf/ccmath.qbk b/doc/sf/ccmath.qbk index f39e388ee5..4a2b0b2fbf 100644 --- a/doc/sf/ccmath.qbk +++ b/doc/sf/ccmath.qbk @@ -184,6 +184,7 @@ All of the following functions require C++17 or greater. template inline constexpr Real fma(Real x, Real y, Real z) noexcept + Requires compiling with fma flag template inline constepxr Promoted fma(Arithmetic1 x, Arithmetic2 y, Arithmetic3 z) noexcept diff --git a/include/boost/math/ccmath/fma.hpp b/include/boost/math/ccmath/fma.hpp index 12390953e7..0ed3cc5668 100644 --- a/include/boost/math/ccmath/fma.hpp +++ b/include/boost/math/ccmath/fma.hpp @@ -13,6 +13,11 @@ #include #include +#if __has_include("immintrin.h") && defined(__X86_64__) || defined(__amd64__) +# include "immintrin.h" +# define BOOST_MATH_HAS_IMMINTRIN_H +#endif + namespace boost::math::ccmath { namespace detail { @@ -20,9 +25,7 @@ namespace detail { template inline constexpr T fma_imp(const T x, const T y, const T z) noexcept { - #if __GNUC__ < 10 - return (x * y) + z; - #else + #if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) && !defined(__INTEL_LLVM_COMPILER) if constexpr (std::is_same_v) { return __builtin_fmaf(x, y, z); @@ -35,11 +38,23 @@ inline constexpr T fma_imp(const T x, const T y, const T z) noexcept { return __builtin_fmal(x, y, z); } - else // e.g. Boost.Multiprecision types where no built-in exists + #elif defined(BOOST_MATH_HAS_IMMINTRIN_H) + if constexpr (std::is_same_v) + { + return static_cast(_mm_fmadd_ps(x, y, z)); + } + else if constexpr (std::is_same_v) + { + return static_cast(_mm_fmadd_pd(x, y, z)); + } + else if constexpr (std::is_same_v) { - return (x * y) + z; + return static_cast(_mm256_fmadd_pd(x, y, z)); } #endif + + // If we can't use compiler intrinsics hope that -fma flag optimizes this call to fma instruction + return (x * y) + z; } } // Namespace detail diff --git a/test/ccmath_fma_test.cpp b/test/ccmath_fma_test.cpp index 3b7354df2e..a5aa74914d 100644 --- a/test/ccmath_fma_test.cpp +++ b/test/ccmath_fma_test.cpp @@ -17,6 +17,7 @@ #include #endif +#if !defined(BOOST_MATH_NO_CONSTEXPR_DETECTION) && !defined(BOOST_MATH_USING_BUILTIN_CONSTANT_P) template constexpr void test() { @@ -49,7 +50,6 @@ constexpr void test() } } -#if !defined(BOOST_MATH_NO_CONSTEXPR_DETECTION) && !defined(BOOST_MATH_USING_BUILTIN_CONSTANT_P) int main() { test(); From 38aaf9d7669a4fde40bba5d590b552ac4a32a890 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Fri, 27 May 2022 11:26:52 -0700 Subject: [PATCH 3/3] Changes to intrinsics and address sonarlint comments --- include/boost/math/ccmath/fma.hpp | 56 +++++++++++++++---------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/include/boost/math/ccmath/fma.hpp b/include/boost/math/ccmath/fma.hpp index 0ed3cc5668..3056f76d47 100644 --- a/include/boost/math/ccmath/fma.hpp +++ b/include/boost/math/ccmath/fma.hpp @@ -13,17 +13,12 @@ #include #include -#if __has_include("immintrin.h") && defined(__X86_64__) || defined(__amd64__) -# include "immintrin.h" -# define BOOST_MATH_HAS_IMMINTRIN_H -#endif - namespace boost::math::ccmath { namespace detail { template -inline constexpr T fma_imp(const T x, const T y, const T z) noexcept +constexpr T fma_imp(const T x, const T y, const T z) noexcept { #if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) && !defined(__INTEL_LLVM_COMPILER) if constexpr (std::is_same_v) @@ -38,19 +33,6 @@ inline constexpr T fma_imp(const T x, const T y, const T z) noexcept { return __builtin_fmal(x, y, z); } - #elif defined(BOOST_MATH_HAS_IMMINTRIN_H) - if constexpr (std::is_same_v) - { - return static_cast(_mm_fmadd_ps(x, y, z)); - } - else if constexpr (std::is_same_v) - { - return static_cast(_mm_fmadd_pd(x, y, z)); - } - else if constexpr (std::is_same_v) - { - return static_cast(_mm256_fmadd_pd(x, y, z)); - } #endif // If we can't use compiler intrinsics hope that -fma flag optimizes this call to fma instruction @@ -60,16 +42,32 @@ inline constexpr T fma_imp(const T x, const T y, const T z) noexcept } // Namespace detail template , bool> = true> -inline constexpr Real fma(Real x, Real y, Real z) noexcept +constexpr Real fma(Real x, Real y, Real z) noexcept { if (BOOST_MATH_IS_CONSTANT_EVALUATED(x)) { - return x == 0 && boost::math::ccmath::isinf(y) ? std::numeric_limits::quiet_NaN() : - y == 0 && boost::math::ccmath::isinf(x) ? std::numeric_limits::quiet_NaN() : - boost::math::ccmath::isnan(x) ? std::numeric_limits::quiet_NaN() : - boost::math::ccmath::isnan(y) ? std::numeric_limits::quiet_NaN() : - boost::math::ccmath::isnan(z) ? std::numeric_limits::quiet_NaN() : - boost::math::ccmath::detail::fma_imp(x, y, z); + if (x == 0 && boost::math::ccmath::isinf(y)) + { + return std::numeric_limits::quiet_NaN(); + } + else if (y == 0 && boost::math::ccmath::isinf(x)) + { + return std::numeric_limits::quiet_NaN(); + } + else if (boost::math::ccmath::isnan(x)) + { + return std::numeric_limits::quiet_NaN(); + } + else if (boost::math::ccmath::isnan(y)) + { + return std::numeric_limits::quiet_NaN(); + } + else if (boost::math::ccmath::isnan(z)) + { + return std::numeric_limits::quiet_NaN(); + } + + return boost::math::ccmath::detail::fma_imp(x, y, z); } else { @@ -79,7 +77,7 @@ inline constexpr Real fma(Real x, Real y, Real z) noexcept } template -inline constexpr auto fma(T1 x, T2 y, T3 z) noexcept +constexpr auto fma(T1 x, T2 y, T3 z) noexcept { if (BOOST_MATH_IS_CONSTANT_EVALUATED(x)) { @@ -113,13 +111,13 @@ inline constexpr auto fma(T1 x, T2 y, T3 z) noexcept } } -inline constexpr float fmaf(float x, float y, float z) noexcept +constexpr float fmaf(float x, float y, float z) noexcept { return boost::math::ccmath::fma(x, y, z); } #ifndef BOOST_MATH_NO_LONG_DOUBLE_MATH_FUNCTIONS -inline constexpr long double fmal(long double x, long double y, long double z) noexcept +constexpr long double fmal(long double x, long double y, long double z) noexcept { return boost::math::ccmath::fma(x, y, z); }