From 90766113e4ef1e518ed806e5fb2e5b40b3549005 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Sun, 17 Nov 2024 12:40:59 -0700 Subject: [PATCH 01/20] Thread through quadratic logs. Thanks Peter and Jacob. --- CMakeLists.txt | 63 +++++-- doc/sphinx/src/building.rst | 4 +- doc/sphinx/src/contributing.rst | 48 +++--- doc/sphinx/src/models.rst | 14 +- singularity-eos/base/fast-math/logs.hpp | 214 ++++++++++++++++-------- 5 files changed, 233 insertions(+), 110 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a415102c42..96d69ab7e4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,16 +96,17 @@ option(SINGULARITY_BETTER_DEBUG_FLAGS "Better debug flags for singularity" ON) option(SINGULARITY_HIDE_MORE_WARNINGS "hide more warnings" OFF) # toggle code options -option(SINGULARITY_USE_SINGLE_LOGS - "Use single precision logs. Can harm accuracy." OFF) option(SINGULARITY_USE_TRUE_LOG_GRIDDING - "Use grids that conform to log spacing." OFF) -# TODO(JMM): Should this automatically be activated when true log gridding is -# off? -cmake_dependent_option( - SINGULARITY_USE_HIGH_RISK_MATH - "Use integer aliased logs, may not be portable" OFF - "NOT SINGULARITY_USE_TRUE_LOG_GRIDDING" OFF) + "Use grids that conform to log spacing." OFF) +cmake_dependent_option(SINGULARITY_USE_SINGLE_LOGS + "Use single precision logs. Only available for true log gridding. Can harm accuracy." + OFF "SINGULARITY_USE_TRUE_LOG_GRIDDING" OFF) +option(SINGULARITY_NQT_ORDER_1 + "In NQT logs, use first order. Faster but less accurate." + OFF) +option(SINGULARITY_NQT_PORTABLE + "In NQT logs, use portable, rather than bithacked implementation. Slower, but more likely to function on exotic architectures." + OFF) # misc options option(SINGULARITY_FORCE_SUBMODULE_MODE "Submodule mode" OFF) @@ -126,6 +127,26 @@ set(SINGULARITY_PLUGINS "" CACHE STRING "List of paths to plugin directories") set(SINGULARITY_VARIANT "singularity-eos/eos/default_variant.hpp" CACHE STRING "The include path for the file containing the definition of singularity::EOS.") +# Detect ARM architecture +set(SINGULARITY_ON_ARM OFF CACHE BOOL "We are running on an ARM system") +if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|aarch64)") + if (NOT SINGULARITY_USE_CUDA) + message(STATUS + "ARM architecture detected: ${CMAKE_SYSTEM_PROCESSOR}") + set(SINGULARITY_ON_ARM ON CACHE BOOL + "We are running on an ARM system") + endif() +endif() + +if (SINGULAIRTY_ON_ARM) + if (NOT SINGULARITY_USE_TRUE_LOG_GRIDDING) + message(WARNING + "Fast logs not necessarily better on ARM CPU systems. " + "You may wish to build with " + "-DSINGULARITY_USE_TRUE_LOG_GRIDDING=ON.") + endif() +endif() + # ------------------------------------------------------------------------------# # singularity-eos Library # ------------------------------------------------------------------------------# @@ -163,6 +184,15 @@ if(SINGULARITY_USE_FORTRAN) include(CMakeDetermineFortranCompiler) endif() +# Big endianness +include(TestBigEndian) +TEST_BIG_ENDIAN(IS_BIG_ENDIAN) +if (BIG_ENDIAN) + message(WARNING "Big endian detected! " + "Integer aliasing as currently implemented will not function. " + "Please set -DSINGULARITY_NQT_PORTABLE=ON.") +endif() + include(GNUInstallDirs) if(SINGULARITY_BUILD_PYTHON) @@ -255,16 +285,21 @@ if(SINGULARITY_BUILD_SESAME2SPINER) endif() # defines +if (SINGULARITY_USE_TRUE_LOG_GRIDDING) + target_compile_definitions(singularity-eos_Interface + INTERFACE SINGULARITY_USE_TRUE_LOG_GRIDDING) +endif() if(SINGULARITY_USE_SINGLE_LOGS) - target_compile_definitions(singularity-eos_Interface INTERFACE SINGULARITY_USE_SINGLE_LOGS) + target_compile_definitions(singularity-eos_Interface + INTERFACE SINGULARITY_USE_SINGLE_LOGS) endif() -if(SINGULARITY_USE_HIGH_RISK_MATH) +if(SINGULARITY_NQT_ORDER_1) target_compile_definitions(singularity-eos_Interface - INTERFACE SINGULARITY_USE_HIGH_RISK_MATH) + INTERFACE SINGULARITY_NQT_ORDER_1) endif() -if (SINGULARITY_USE_TRUE_LOG_GRIDDING) +if(SINGULARITY_NQT_PORTABLE) target_compile_definitions(singularity-eos_Interface - INTERFACE SINGULARITY_USE_TRUE_LOG_GRIDDING) + INTERFACE SINGULARITY_NQT_PORTABLE) endif() if(SINGULARITY_TEST_SESAME) target_compile_definitions(singularity-eos_Interface INTERFACE SINGULARITY_TEST_SESAME) diff --git a/doc/sphinx/src/building.rst b/doc/sphinx/src/building.rst index d5b4839b4c..6dc37ce57c 100644 --- a/doc/sphinx/src/building.rst +++ b/doc/sphinx/src/building.rst @@ -114,8 +114,10 @@ The main CMake options to configure building are in the following table: ``SINGULARITY_BETTER_DEBUG_FLAGS`` ON Enables nicer GPU debug flags. May interfere with in-tree builds as a submodule. ``SINGULARITY_HIDE_MORE_WARNINGS`` OFF Makes warnings less verbose. May interfere with in-tree builds as a submodule. ``SINGULARITY_FORCE_SUBMODULE_MODE`` OFF Force build in _submodule_ mode. - ``SINGULARITY_USE_SINGLE_LOGS`` OFF Use single precision logarithms (may degrade accuracy). ``SINGULARITY_USE_TRUE_LOG_GRIDDING`` OFF Use grids that conform to logarithmic spacing. + ``SINGULARITY_USE_SINGLE_LOGS`` OFF Use single precision logarithms (may degrade accuracy). + ``SINGULARITY_NQT_ORDER_1`` OFF For fast logs, use the less accurate but faster 1st-order version. + ``SINGULARITY_NQT_PORTABLE`` OFF For fast logs, use the slower but endianness-independent implementation. ====================================== ======= =========================================== More options are available to modify only if certain other options or diff --git a/doc/sphinx/src/contributing.rst b/doc/sphinx/src/contributing.rst index d7526bee59..81b8484db4 100644 --- a/doc/sphinx/src/contributing.rst +++ b/doc/sphinx/src/contributing.rst @@ -667,34 +667,42 @@ of the mantissa plus the exponent: \lg(x) = \lg(m) + e Therefore, if we can find a fast, invertible approximation to -:math:`\lg(m)`, we will have achieved our goal. It turns out the -expression +:math:`\lg(m)`, we will have achieved our goal. The linear +interpolation of :math:`\lg(m)` on the given interval is .. math:: 2 (x - 1) -works pretty well, so we use that. (To convince yourself of this note -that for :math:`x=1/2` this expression returns -1 and for :math:`x=1`, -it returns 0, which are the correct values of :math:`\lg(x)` at the -bounds of the interval.) Thus our approximate, invertible expression -for :math:`\lg` is just +and the quadratic is .. math:: - 2 (m - 1) + e - -for the mantissa and exponent extracted via ``frexp``. This differs -from :math:`lg` by a maximum of about 0.1, which translates to at most -a 25 percent difference. As discussed above, however, the function -itself is an exact representation of itself and the difference from -:math:`lg` is acceptable. - -To invert, we use the built in function that inverts ``frexp``, -``ldexp``, which combines the mantissa and exponent into the original -floating point representation. - -This approach is described in more detail in our `short note`_ on the topic. + -\frac{4}{3} (m -2) (m - 1) + +where the former produces a function that is piecewise :math:`C^1` and +everywhere continuous. The latter produces a function that is +everywhere :math:`C^1` and piecewise :math:`C^2`. Both functions are +exactly exactly invertible. To invert, we use the built in function +that inverts ``frexp``, ``ldexp``, which combines the mantissa and +exponent into the original floating point representation. + +While these functions are not exactly logarithms, they do work for +building logarithmic grids. The smoothness of the transformation +mapping from linear to "not-quite-log" space does matter for +interpolation, however. Linear interpolation in "not-quite-log" space +converges at second order only in the :math:`L^1` norm for the linear +version of the approximate log. The quadratic version of the fast log +provides second-order convergence in all norms, however. + +Finally, while ``frexp`` and ``ldexp`` are portable and performant, +they are less performant than hand-implemented, low-level methods that +leverage the bitwise structure of floating point numbers. These +"bithacked" or "integer aliased" implementations are what are used in +practice in the code. + +This approach is described in more detail in our `short note`_ on the +topic. .. _Short note: https://arxiv.org/abs/2206.08957 diff --git a/doc/sphinx/src/models.rst b/doc/sphinx/src/models.rst index c3517e7529..1087807b2b 100644 --- a/doc/sphinx/src/models.rst +++ b/doc/sphinx/src/models.rst @@ -1736,12 +1736,17 @@ return a ``Real`` number. .. warning:: As with the SpinerEOS models, the stellar collapse models use fast logs. You can switch the logs to true logs with the - ``SINGULARITY_USE_TRUE_LOG_GRIDDING`` cmake option. + ``SINGULARITY_USE_TRUE_LOG_GRIDDING`` cmake option. This may be + desirable on ARM-based architectures (e.g., ``aarch64``), where + a hardware log intrinsic is available. + .. note:: - A more performant implementation of fast logs is available, but it - might not be portable. Enable it with the - ``SINGULARITY_USE_HIGH_RISK_MATH`` cmake option. + The default implementation of our fast logs assumes little endian + numbers. If you are on a big-endian machine, they will not work + properly. If you encounter a big-endian machine, please report it + to us in the issues and (for now) enable the portable + implementation of fast logs with ``-DSINGULARITY_NQT_PORTABLE=ON``. .. _Stellar Collapse: https://stellarcollapse.org/equationofstate.html @@ -1750,7 +1755,6 @@ return a ``Real`` number. .. _median filter: https://en.wikipedia.org/wiki/Median_filter - Helmholtz EOS `````````````` diff --git a/singularity-eos/base/fast-math/logs.hpp b/singularity-eos/base/fast-math/logs.hpp index cfbe274bff..09dc2be059 100644 --- a/singularity-eos/base/fast-math/logs.hpp +++ b/singularity-eos/base/fast-math/logs.hpp @@ -16,9 +16,10 @@ #define _SINGULARITY_EOS_UTILS_FAST_MATH_LOGS_ #include #include +#include #include - +#include /* * These functions are for use when moving to and from the gridding space * for Spiner grids. @@ -36,38 +37,21 @@ * * 3. The function and its inverse must be fast. * - * To meet these constraints, we approximate a logarithm by using - * frexp and ldexp to split a real number into its mantissa + exponent - * in base 2. The mantissa is a real number on the interval [0.5, 1) - * and the exponent is an integer such that + * To meet these constraints, we split a real number into its mantissa + * + exponent in base 2. The mantissa is a real number on the interval + * [0.5, 1) and the exponent is an integer such that * * mantissa * 2^exponent = number * - * To truly take the log, one needs to take the log of the mantissa. - * However, the first term in the logarithm's Taylor series is linear. - * Thus a linear approximation to log, for small numbers is valid. - * We use that linear approximation, which turns out to be: - * - * 2*(mantissa - 1) - * - * So that mantissa = 0.5 returns log_2(0.5) = -1 and mantissa (1) - * yields 0. - * - * Then the approximate log of the whole thing is the approximation of - * the log of the mantissa, plus the exponent: + * The log in base 2 is then * - * 2*(mantissa - 1) + exponent + * lg(number) = lg(mantissa) + exponent * - * Which is a continuous, invertible function that approximates lg - * only relatively accurately. The absolute difference of this - * function from lg is about 0.1 at its largest. This translates to a - * relative difference of at most 25%. - * - * However, we don't mind this difference, as we're only interested in - * generating a grid spacing "close enough" to log spacing. As long as - * we can go into and out of "grid space" reliably and quickly, we're - * happy, and this function is an EXACT map into and out of that - * space. + * so our goal is to approximate lg(mantissa) satisfying the above + * criteria. If we do, then we will achieve the desired goals. We have + * two approaches. The first is a linear approximation of + * lg(mantissa). The second is a quadratic one. The latter allows for + * continuity of the derivative. * * See ArXiv:2206.08957 */ @@ -78,76 +62,166 @@ namespace singularity { namespace FastMath { -// Integer Aliased versions. Likely not as portable -// TODO(JMM): Add single-precision versions +template +PORTABLE_FORCEINLINE_FUNCTION auto sgn(const T &x) { + return (T(0) < x) - (x < T(0)); +} + PORTABLE_FORCEINLINE_FUNCTION -auto as_long_int(double f) { return *reinterpret_cast(&f); } +auto as_int(double f) { return *reinterpret_cast(&f); } PORTABLE_FORCEINLINE_FUNCTION -auto as_double(long long int i) { return *reinterpret_cast(&i); } +auto as_double(std::int64_t i) { return *reinterpret_cast(&i); } +// First order interpolation based NQTs +// ---------------------------------------------------------------------- +// Reference implementations, however the integer cast implementation +// below is probably faster. PORTABLE_FORCEINLINE_FUNCTION -double fastlg_aliased(const double x) { +double lg_o1_portable(const double x) { + int e; + PORTABLE_REQUIRE(x > 0, "log divergent for x <= 0"); + const double m = frexp(x, &e); + return 2 * (m - 1) + e; +} + +PORTABLE_FORCEINLINE_FUNCTION +double pow2_o1_portable(const double x) { + const int flr = std::floor(x); + const double remainder = x - flr; + const double mantissa = 0.5 * (remainder + 1); + const double exponent = flr + 1; + return ldexp(mantissa, exponent); +} + +// Integer aliased versions +PORTABLE_FORCEINLINE_FUNCTION +double lg_o1_aliased(const double x) { + PORTABLE_REQUIRE(x > 0, "log divergent for x <= 0"); // Magic numbers constexpr because C++ doesn't constexpr reinterpret casts // these are floating point numbers as reinterpreted as integers. - // as_long_int(1.0) - constexpr long long int one_as_long_int = 4607182418800017408; - // 1./static_cast(as_long_int(2.0) - as_long_int(1.0)) + // as_int(1.0) + constexpr std::int64_t one_as_int = 4607182418800017408; + // 1./static_cast(as_int(2.0) - as_int(1.0)) constexpr double scale_down = 2.22044604925031e-16; - return static_cast(as_long_int(x) - one_as_long_int) * scale_down; + return static_cast(as_int(x) - one_as_int) * scale_down; } + PORTABLE_FORCEINLINE_FUNCTION -double fastpow2_aliased(const double x) { +double pow2_o1_aliased(const double x) { // Magic numbers constexpr because C++ doesn't constexpr reinterpret casts // these are floating point numbers as reinterpreted as integers. - // as_long_int(1.0) - constexpr long long int one_as_long_int = 4607182418800017408; - // as_long_int(2.0) - as_long_int(1.0) + // as_int(1.0) + constexpr std::int64_t one_as_int = 4607182418800017408; + // as_int(2.0) - as_int(1.0) + constexpr double scale_up = 4503599627370496; + return as_double(static_cast(x * scale_up) + one_as_int); +} +// ---------------------------------------------------------------------- + +// Second-order interpolation based NQTs +// These implementations are due to Peter Hammond +// ---------------------------------------------------------------------- +// Portable versions that use frexp/ldexp rather than integer aliasing +PORTABLE_FORCEINLINE_FUNCTION +double lg_o2_portable(const double x) { + PORTABLE_REQUIRE(x > 0, "log divergent for x <= 0"); + constexpr double four_thirds = 4. / 3.; + int e; + const double m = frexp(x, &e); + return e - four_thirds * (m - 2) * (m - 1); +} + +// This version uses the exact formula +PORTABLE_FORCEINLINE_FUNCTION +double pow2_o2_portable(const double x) { + // log2(mantissa). should go between -1 and 0 + const int flr = std::floor(x); + const double lm = x - flr - 1; + const double mantissa = 0.5 * (3 - std::sqrt(1 - 3 * lm)); + const double exponent = flr + 1; + return ldexp(mantissa, exponent); +} + +// Integer aliased/bithacked versions +PORTABLE_FORCEINLINE_FUNCTION +double lg_o2_aliased(const double x) { + PORTABLE_REQUIRE(x > 0, "log divergent for x <= 0"); + // as_int(1.0) == 2^62 - 2^52 + constexpr std::int64_t one_as_int = 4607182418800017408; + // 1/(as_int(2.0) - as_int(1.0)) == 2^-52 + constexpr double scale_down = 2.220446049250313e-16; + // 2^52 - 1 + constexpr std::int64_t mantissa_mask = 4503599627370495; + // 2^26 - 1 + constexpr std::int64_t low_mask = 67108863; + + const std::int64_t x_as_int = as_int(x) - one_as_int; + const std::int64_t frac_as_int = x_as_int & mantissa_mask; + const std::int64_t frac_high = frac_as_int >> 26; + const std::int64_t frac_low = frac_as_int & low_mask; + const std::int64_t frac_squared = + frac_high * frac_high + ((frac_high * frac_low) >> 25); + + return static_cast(x_as_int + ((frac_as_int - frac_squared) / 3)) * scale_down; +} + +PORTABLE_FORCEINLINE_FUNCTION +double pow2_o2_aliased(const double x) { + // as_int(1.0) == 2^62 - 2^52 + constexpr std::int64_t one_as_int = 4607182418800017408; + // as_int(2.0) - as_int(1.0) == 2^52 constexpr double scale_up = 4503599627370496; - return as_double(static_cast(x * scale_up) + one_as_long_int); + constexpr std::int64_t mantissa_mask = 4503599627370495; // 2^52 - 1 + constexpr std::int64_t a = 9007199254740992; // 2 * 2^52 + constexpr double b = 67108864; // 2^26 + constexpr std::int64_t c = 18014398509481984; // 4 * 2^52 + + const std::int64_t x_as_int = static_cast(x * scale_up); + const std::int64_t frac_as_int = x_as_int & mantissa_mask; + const std::int64_t frac_sqrt = + static_cast(b * std::sqrt(static_cast(c - 3 * frac_as_int))); + + return as_double(x_as_int + a - frac_sqrt - frac_as_int + one_as_int); } +// ---------------------------------------------------------------------- PORTABLE_FORCEINLINE_FUNCTION double fastlg(const double x) { -#ifdef SINGULARITY_USE_HIGH_RISK_MATH - return fastlg_aliased(x); +#ifdef SINGULARITY_NQT_ORDER_1 +#ifdef SINGULARITY_NQT_PORTABLE + return lg_o1_portable(x); #else - int n; - assert(x > 0 && "log divergent for x <= 0"); -#ifndef SINGULARITY_USE_SINGLE_LOGS - // double precision is default - const double y = frexp(x, &n); + return lg_o1_aliased(x); +#endif // PORTABLE #else - // faster but less accurate - const float y = frexpf((float)x, &n); -#endif // SINGULARITY_USE_SINGLE_LOGS - return 2 * (y - 1) + n; -#endif // SINGULARITY_USE_HIGH_RISK_MATH +#ifdef SINGULARITY_NQT_PORTABLE + return lg_o2_portable(x); +#else + return lg_o2_aliased(x); +#endif // PORTABLE +#endif // NQT_ORDER } PORTABLE_FORCEINLINE_FUNCTION double fastpow2(const double x) { -#ifdef SINGULARITY_USE_HIGH_RISK_MATH - return fastpow2_aliased(x); +#ifdef SINGULARITY_NQT_ORDER_1 +#ifdef SINGULARITY_NQT_PORTABLE + return pow2_o1_portable(x); #else - const int flr = std::floor(x); - const double remainder = x - flr; - const double mantissa = 0.5 * (remainder + 1); - const double exponent = flr + 1; -#ifndef SINGULARITY_USE_SINGLE_LOGS - // double precision is default - return ldexp(mantissa, exponent); + return pow2_o1_aliased(x); +#endif // PORTABLE #else - // Faster but less accurate - return ldexpf((float)mantissa, exponent); -#endif // SINGULARITY_USE_SINGLE_LOGS -#endif // SINGULARITY_USE_HIGH_RISK_MATH +#ifdef SINGULARITY_NQT_PORTABLE + return pow2_o2_portable(x); +#else + return pow2_o2_aliased(x); +#endif // PORTABLE +#endif // NQT_ORDER } PORTABLE_FORCEINLINE_FUNCTION double lg(const double x) { - - assert(x > 0 && "log divergent for x <= 0"); - + PORTABLE_REQUIRE(x > 0, "log divergent for x <= 0"); #ifndef SINGULARITY_USE_TRUE_LOG_GRIDDING // Default expression return fastlg(x); From 820b9dc966bd0d7a72023c42d2f7447226c58206 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Sun, 17 Nov 2024 15:17:58 -0700 Subject: [PATCH 02/20] bounds typo --- test/test_bounds.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_bounds.cpp b/test/test_bounds.cpp index c291df0610..dd2307bf7a 100644 --- a/test/test_bounds.cpp +++ b/test/test_bounds.cpp @@ -86,7 +86,7 @@ SCENARIO("Logarithmic, single-grid bounds in the bounds object", "[Bounds]") { } } -SCENARIO("Logarithmic, piecewise bounds in boudns object", "[Bounds]") { +SCENARIO("Logarithmic, piecewise bounds in bounds object", "[Bounds]") { WHEN("We compute a piecewise bounds object with three grids") { Bounds bnds(Bounds::ThreeGrids(), rho_min, rho_max, rho_normal, 0.5, N_per_decade_fine, N_factor, N_factor, true); From ea15582269e07cfe23579352f55a0fc3f37a1ba5 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Mon, 18 Nov 2024 07:55:20 -0700 Subject: [PATCH 03/20] output on failure --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2d3780b7a2..d31ea2b6f1 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -45,4 +45,4 @@ jobs: #.. make make install - make test + ctest --output-on-failure From b384d2c933f78914fa003a36de9e47a668f74a5d Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Mon, 18 Nov 2024 09:45:32 -0700 Subject: [PATCH 04/20] useless error messages... and it works on my machine... --- test/test_bounds.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test_bounds.cpp b/test/test_bounds.cpp index dd2307bf7a..80f5f2d98b 100644 --- a/test/test_bounds.cpp +++ b/test/test_bounds.cpp @@ -79,6 +79,8 @@ SCENARIO("Logarithmic, single-grid bounds in the bounds object", "[Bounds]") { int ianchor; Spiner::weights_t w; lRhoBounds.grid.weights(lanchor, ianchor, w); + printf("%.14e, %.14e, %.14e\n", + std::abs(w[0] - 1), std::abs(w[1]), REAL_TOL); REQUIRE(std::abs(w[0] - 1) <= REAL_TOL); REQUIRE(std::abs(w[1]) <= REAL_TOL); } From 82c5c24909dfd98e93862a7e65fb10b14ec09f9a Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Mon, 18 Nov 2024 09:47:42 -0700 Subject: [PATCH 05/20] formatting --- test/test_bounds.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test_bounds.cpp b/test/test_bounds.cpp index 80f5f2d98b..0d6a247554 100644 --- a/test/test_bounds.cpp +++ b/test/test_bounds.cpp @@ -79,8 +79,7 @@ SCENARIO("Logarithmic, single-grid bounds in the bounds object", "[Bounds]") { int ianchor; Spiner::weights_t w; lRhoBounds.grid.weights(lanchor, ianchor, w); - printf("%.14e, %.14e, %.14e\n", - std::abs(w[0] - 1), std::abs(w[1]), REAL_TOL); + printf("%.14e, %.14e, %.14e\n", std::abs(w[0] - 1), std::abs(w[1]), REAL_TOL); REQUIRE(std::abs(w[0] - 1) <= REAL_TOL); REQUIRE(std::abs(w[1]) <= REAL_TOL); } From c17e01e861198261a2abcd2609f8ba04ab41d0fb Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Mon, 18 Nov 2024 12:14:16 -0700 Subject: [PATCH 06/20] fix error bounds on CI machine --- test/test_bounds.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test_bounds.cpp b/test/test_bounds.cpp index 0d6a247554..927fd562e5 100644 --- a/test/test_bounds.cpp +++ b/test/test_bounds.cpp @@ -41,7 +41,7 @@ constexpr Real T_split = 1e4; constexpr int N_per_decade_fine = 200; constexpr Real N_factor = 5; -constexpr Real REAL_TOL = std::numeric_limits::epsilon() * 1e3; +constexpr Real REAL_TOL = std::numeric_limits::epsilon() * 1e4; SCENARIO("Bounds can compute number of points from points per decade", "[Bounds]") { WHEN("We compute the number of points from points per decade") { @@ -79,7 +79,6 @@ SCENARIO("Logarithmic, single-grid bounds in the bounds object", "[Bounds]") { int ianchor; Spiner::weights_t w; lRhoBounds.grid.weights(lanchor, ianchor, w); - printf("%.14e, %.14e, %.14e\n", std::abs(w[0] - 1), std::abs(w[1]), REAL_TOL); REQUIRE(std::abs(w[0] - 1) <= REAL_TOL); REQUIRE(std::abs(w[1]) <= REAL_TOL); } From 5de1fdbd258e1bf8c4a9bb47110754bb205945d2 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Mon, 18 Nov 2024 20:31:02 -0700 Subject: [PATCH 07/20] clean up shared constants --- singularity-eos/base/fast-math/logs.hpp | 55 +++++++++++-------------- test/test_math_utils.cpp | 14 ++++++- 2 files changed, 38 insertions(+), 31 deletions(-) diff --git a/singularity-eos/base/fast-math/logs.hpp b/singularity-eos/base/fast-math/logs.hpp index 09dc2be059..7eb02bef62 100644 --- a/singularity-eos/base/fast-math/logs.hpp +++ b/singularity-eos/base/fast-math/logs.hpp @@ -67,11 +67,29 @@ PORTABLE_FORCEINLINE_FUNCTION auto sgn(const T &x) { return (T(0) < x) - (x < T(0)); } +// TODO(JMM): Add templating and concepts to enforce bit width +// equivalence in input/output types. PORTABLE_FORCEINLINE_FUNCTION auto as_int(double f) { return *reinterpret_cast(&f); } PORTABLE_FORCEINLINE_FUNCTION auto as_double(std::int64_t i) { return *reinterpret_cast(&i); } +// Magic numbers constexpr because C++ doesn't constexpr reinterpret casts +// These numbers will be different for different precisions and endianness. +namespace FP64LE { // 64 bit, little endian +constexpr std::int64_t one = 1; +// as_int(1.0) == 2^62 - 2^52 +constexpr std::int64_t one_as_int = (one << 62) - (one << 52); +// 1./static_cast(as_int(2.0) - as_int(1.0)) == 2^-52 +constexpr double scale_down = 2.22044604925031e-16; +// as_int(2.0) - as_int(1.0) = 2^52, but note the type +constexpr double scale_up = (one << 52); +// 2^52 - 1 +constexpr std::int64_t mantissa_mask = (one << 52) - one; +// 2^26 - 1 +constexpr std::int64_t low_mask = (one << 26) - 1; +} // namespace FP64LE + // First order interpolation based NQTs // ---------------------------------------------------------------------- // Reference implementations, however the integer cast implementation @@ -96,24 +114,14 @@ double pow2_o1_portable(const double x) { // Integer aliased versions PORTABLE_FORCEINLINE_FUNCTION double lg_o1_aliased(const double x) { + using namespace FP64LE; PORTABLE_REQUIRE(x > 0, "log divergent for x <= 0"); - // Magic numbers constexpr because C++ doesn't constexpr reinterpret casts - // these are floating point numbers as reinterpreted as integers. - // as_int(1.0) - constexpr std::int64_t one_as_int = 4607182418800017408; - // 1./static_cast(as_int(2.0) - as_int(1.0)) - constexpr double scale_down = 2.22044604925031e-16; return static_cast(as_int(x) - one_as_int) * scale_down; } PORTABLE_FORCEINLINE_FUNCTION double pow2_o1_aliased(const double x) { - // Magic numbers constexpr because C++ doesn't constexpr reinterpret casts - // these are floating point numbers as reinterpreted as integers. - // as_int(1.0) - constexpr std::int64_t one_as_int = 4607182418800017408; - // as_int(2.0) - as_int(1.0) - constexpr double scale_up = 4503599627370496; + using namespace FP64LE; return as_double(static_cast(x * scale_up) + one_as_int); } // ---------------------------------------------------------------------- @@ -145,16 +153,8 @@ double pow2_o2_portable(const double x) { // Integer aliased/bithacked versions PORTABLE_FORCEINLINE_FUNCTION double lg_o2_aliased(const double x) { + using namespace FP64LE; PORTABLE_REQUIRE(x > 0, "log divergent for x <= 0"); - // as_int(1.0) == 2^62 - 2^52 - constexpr std::int64_t one_as_int = 4607182418800017408; - // 1/(as_int(2.0) - as_int(1.0)) == 2^-52 - constexpr double scale_down = 2.220446049250313e-16; - // 2^52 - 1 - constexpr std::int64_t mantissa_mask = 4503599627370495; - // 2^26 - 1 - constexpr std::int64_t low_mask = 67108863; - const std::int64_t x_as_int = as_int(x) - one_as_int; const std::int64_t frac_as_int = x_as_int & mantissa_mask; const std::int64_t frac_high = frac_as_int >> 26; @@ -167,15 +167,10 @@ double lg_o2_aliased(const double x) { PORTABLE_FORCEINLINE_FUNCTION double pow2_o2_aliased(const double x) { - // as_int(1.0) == 2^62 - 2^52 - constexpr std::int64_t one_as_int = 4607182418800017408; - // as_int(2.0) - as_int(1.0) == 2^52 - constexpr double scale_up = 4503599627370496; - constexpr std::int64_t mantissa_mask = 4503599627370495; // 2^52 - 1 - constexpr std::int64_t a = 9007199254740992; // 2 * 2^52 - constexpr double b = 67108864; // 2^26 - constexpr std::int64_t c = 18014398509481984; // 4 * 2^52 - + using namespace FP64LE; + constexpr std::int64_t a = 9007199254740992; // 2 * 2^52 + constexpr double b = 67108864; // 2^26 + constexpr std::int64_t c = 18014398509481984; // 4 * 2^52 const std::int64_t x_as_int = static_cast(x * scale_up); const std::int64_t frac_as_int = x_as_int & mantissa_mask; const std::int64_t frac_sqrt = diff --git a/test/test_math_utils.cpp b/test/test_math_utils.cpp index feff8316bc..1dee262ef6 100644 --- a/test/test_math_utils.cpp +++ b/test/test_math_utils.cpp @@ -41,6 +41,18 @@ SCENARIO("Test that we can either throw an error on host or do nothing on device REQUIRE_MAYBE_THROWS(PORTABLE_ALWAYS_THROW_OR_ABORT("Error message")); } +#ifndef SINGULARITY_USE_TRUE_LOG_GRIDDING +#ifndef SINGULARITY_NQT_PORTABLE +SCENARIO("Test that the fast math magic numbers are all correct", "[FastMath]") { + REQUIRE(singularity::FastMath::FP64LE::one_as_int == 4607182418800017408); + REQUIRE(singularity::FastMath::FP64LE::scale_down == 2.22044604925031e-16); + REQUIRE(singularity::FastMath::FP64LE::scale_up == 4503599627370496.0); + REQUIRE(singularity::FastMath::FP64LE::mantissa_mask == 4503599627370495); + REQUIRE(singularity::FastMath::FP64LE::low_mask == 67108863); +} +#endif +#endif + SCENARIO("Test that fast logs are invertible and run on device", "[FastMath]") { GIVEN("A set of values to invert over a large dynamic range") { constexpr Real LXMIN = -20; @@ -64,7 +76,7 @@ SCENARIO("Test that fast logs are invertible and run on device", "[FastMath]") { portableFor( "try out the fast math", 0, NX, PORTABLE_LAMBDA(const int i) { constexpr Real machine_eps = std::numeric_limits::epsilon(); - constexpr Real acceptable_err = 100 * machine_eps; + constexpr Real acceptable_err = 1000 * machine_eps; const Real lx = singularity::FastMath::log10(x[i]); const Real elx = singularity::FastMath::pow10(lx); const Real rel_err = 2.0 * std::abs(x[i] - elx) / From 91b104deefd7ae9ab4aad5859760639f041ddc31 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Mon, 18 Nov 2024 20:53:42 -0700 Subject: [PATCH 08/20] move to constexpr if with settings namespace --- singularity-eos/base/fast-math/logs.hpp | 73 +++++++++++++++++-------- 1 file changed, 51 insertions(+), 22 deletions(-) diff --git a/singularity-eos/base/fast-math/logs.hpp b/singularity-eos/base/fast-math/logs.hpp index 7eb02bef62..a38308371d 100644 --- a/singularity-eos/base/fast-math/logs.hpp +++ b/singularity-eos/base/fast-math/logs.hpp @@ -62,6 +62,35 @@ namespace singularity { namespace FastMath { +// TODO(JMM): switch from preprocessor macros to CMake generated C++ +namespace Settings { +#ifdef SINGULARITY_USE_TRUE_LOG_GRIDDING +constexpr bool TRUE_LOGS = true; +#else +constexpr bool TRUE_LOGS = false; +#endif // SINGULARITY_USE_TRUE_LOG_GRIDDING +constexpr bool NQT_LOGS = !TRUE_LOGS; + +#ifdef SINGULARITY_USE_SINGLE_LOGS +constexpr bool FP32 = true; +#else +constexpr bool FP32 = false; +#endif // SINGULARITY_USE_SINGLE_LOGS +constexpr bool FP64 = !FP32; + +#ifdef SINGULARITY_NQT_PORTABLE +constexpr bool NQT_PORTABLE = true; +#else +constexpr bool NQT_PORTABLE = false; +#endif // SINGULARITY_NQT_PORTABLE + +#ifdef SINGULARITY_NQT_ORDER_1 +constexpr bool NQT_O1 = true; +#else +constexpr bool NQT_O1 = false; +#endif // SINGULARITY_NQT_O1 +} // namespace Settings + template PORTABLE_FORCEINLINE_FUNCTION auto sgn(const T &x) { return (T(0) < x) - (x < T(0)); @@ -217,32 +246,32 @@ double fastpow2(const double x) { PORTABLE_FORCEINLINE_FUNCTION double lg(const double x) { PORTABLE_REQUIRE(x > 0, "log divergent for x <= 0"); -#ifndef SINGULARITY_USE_TRUE_LOG_GRIDDING - // Default expression - return fastlg(x); -#else -#ifndef SINGULARITY_USE_SINGLE_LOGS - // double precision is default - return std::log2(x); -#else - return std::log2f(x); -#endif // SINGULARITY_USE_SINGLE_LOGS -#endif // SINGULARITY_USE_TRUE_LOG_GRIDDING + if constexpr (Settings::NQT_LOGS) { + // Default expression + return fastlg(x); + } else { + if constexpr (Settings::FP64) { + // double precision is default + return std::log2(x); + } else { + return std::log2f(x); + } + } } PORTABLE_FORCEINLINE_FUNCTION double pow2(const double x) { -#ifndef SINGULARITY_USE_TRUE_LOG_GRIDDING - // Default expression - return fastpow2(x); -#else -#ifndef SINGULARITY_USE_SINGLE_LOGS - // double precision is default - return std::exp2(x); -#else - return std::exp2f(x); -#endif // SINGULARITY_USE_SINGLE_LOGS -#endif // SINGULARITY_USE_TRUE_LOG_GRIDDING + if constexpr (Settings::NQT_LOGS) { + // Default expression + return fastpow2(x); + } else { + if constexpr (Settings::FP64) { + // double precision is default + return std::exp2(x); + } else { + return std::exp2f(x); + } + } } PORTABLE_FORCEINLINE_FUNCTION From 5cdfc2dd83e97f4f262a2596844eee188dff543e Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Mon, 18 Nov 2024 22:01:53 -0700 Subject: [PATCH 09/20] it passes on my machine??? --- test/test_bounds.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_bounds.cpp b/test/test_bounds.cpp index 927fd562e5..c2bd74bf0f 100644 --- a/test/test_bounds.cpp +++ b/test/test_bounds.cpp @@ -79,6 +79,7 @@ SCENARIO("Logarithmic, single-grid bounds in the bounds object", "[Bounds]") { int ianchor; Spiner::weights_t w; lRhoBounds.grid.weights(lanchor, ianchor, w); + printf("%.14e %.14e\n", w[0], w[1]); REQUIRE(std::abs(w[0] - 1) <= REAL_TOL); REQUIRE(std::abs(w[1]) <= REAL_TOL); } From 8889f77ec516e48f7cc85316fdd60fdbd8941db8 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Mon, 18 Nov 2024 22:33:40 -0700 Subject: [PATCH 10/20] the anchor appears shifted by 1 point, weirdly. --- test/test_bounds.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/test_bounds.cpp b/test/test_bounds.cpp index c2bd74bf0f..ece85eb382 100644 --- a/test/test_bounds.cpp +++ b/test/test_bounds.cpp @@ -79,9 +79,8 @@ SCENARIO("Logarithmic, single-grid bounds in the bounds object", "[Bounds]") { int ianchor; Spiner::weights_t w; lRhoBounds.grid.weights(lanchor, ianchor, w); - printf("%.14e %.14e\n", w[0], w[1]); - REQUIRE(std::abs(w[0] - 1) <= REAL_TOL); - REQUIRE(std::abs(w[1]) <= REAL_TOL); + REQUIRE((std::abs(w[0] - 1) <= REAL_TOL) || (std::abs(w[0]) <= REAL_TOL)); + REQUIRE((std::abs(w[1] - 1) <= REAL_TOL) || (std::abs(w[1]) <= REAL_TOL)); } } } From 75afc4d744c60d5309405a25d2858d4f1085b30a Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Mon, 18 Nov 2024 23:15:29 -0700 Subject: [PATCH 11/20] or not supported... --- test/test_bounds.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_bounds.cpp b/test/test_bounds.cpp index ece85eb382..0e35bb0fb8 100644 --- a/test/test_bounds.cpp +++ b/test/test_bounds.cpp @@ -79,8 +79,8 @@ SCENARIO("Logarithmic, single-grid bounds in the bounds object", "[Bounds]") { int ianchor; Spiner::weights_t w; lRhoBounds.grid.weights(lanchor, ianchor, w); - REQUIRE((std::abs(w[0] - 1) <= REAL_TOL) || (std::abs(w[0]) <= REAL_TOL)); - REQUIRE((std::abs(w[1] - 1) <= REAL_TOL) || (std::abs(w[1]) <= REAL_TOL)); + REQUIRE(((std::abs(w[0] - 1) <= REAL_TOL) || (std::abs(w[0]) <= REAL_TOL))); + REQUIRE(((std::abs(w[1] - 1) <= REAL_TOL) || (std::abs(w[1]) <= REAL_TOL))); } } } From 5ee3465a9373e17f5698a6fa33b04d885c0a712b Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Tue, 19 Nov 2024 20:02:08 -0700 Subject: [PATCH 12/20] add eos_grid example --- example/CMakeLists.txt | 6 ++ example/eos_grid.cpp | 165 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 171 insertions(+) create mode 100644 example/eos_grid.cpp diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt index c367b8b51e..c070182ab1 100644 --- a/example/CMakeLists.txt +++ b/example/CMakeLists.txt @@ -17,6 +17,12 @@ add_executable(get_sound_speed_press target_link_libraries(get_sound_speed_press PRIVATE singularity-eos::singularity-eos) +if (SINGULARITY_USE_SPINER_WITH_HDF5) + add_executable(eos_grid eos_grid.cpp) + target_link_libraries(eos_grid PRIVATE + singularity-eos::singularity-eos) +endif() + if(SINGULARITY_USE_EOSPAC AND SINGULARITY_USE_SPINER_WITH_HDF5) add_executable(get_sesame_state get_sesame_state.cpp) diff --git a/example/eos_grid.cpp b/example/eos_grid.cpp new file mode 100644 index 0000000000..e89ab6a117 --- /dev/null +++ b/example/eos_grid.cpp @@ -0,0 +1,165 @@ +//------------------------------------------------------------------------------ +// © 2024. Triad National Security, LLC. All rights reserved. This +// program was produced under U.S. Government contract 89233218CNA000001 +// for Los Alamos National Laboratory (LANL), which is operated by Triad +// National Security, LLC for the U.S. Department of Energy/National +// Nuclear Security Administration. All rights in the program are +// reserved by Triad National Security, LLC, and the U.S. Department of +// Energy/National Nuclear Security Administration. The Government is +// granted for itself and others acting on its behalf a nonexclusive, +// paid-up, irrevocable worldwide license in this material to reproduce, +// prepare derivative works, distribute copies to the public, perform +// publicly and display publicly, and to permit others to do so. +//------------------------------------------------------------------------------ + +// C headers +#include + +// C++ headers +#include +#include +#include + +// HDF5 we'll need this for I/O +#include +#include + +// This library contains portable utilities +#include + +// This contains useful tools for preventing things like divide by zero +#include +// Needed to import the eos models +#include + +// This library contains the spiner table object, which we will use to +// store our output +#include +#include + +// for timing +using duration = std::chrono::nanoseconds; + +// These are the specializations of spiner we will use +using DataBox = Spiner::DataBox; +using RegularGrid1D = Spiner::RegularGrid1D; + +// Set the EOS you want to use here. +using EOS = singularity::StellarCollapse; + +int main(int argc, char *argv[]) { + // This is needed for Kokkos +#ifdef PORTABILITY_STRATEGY_KOKKOS + Kokkos::initialize(); +#endif + // note the scoping here. This means Kokkos objects are cleaned up + // before finalization + { + if (argc < 8) { + std::cerr << "Usage: " << argv[0] + << " savefilename log10rhomin log10rhomax numrho log10Tmin log10Tmax " + "numT eosargs..." + << std::endl; + std::exit(1); + } + const std::string savefilename = argv[1]; + const double lrho_min = atof(argv[2]); + const double lrho_max = atof(argv[3]); + const int nrho = atoi(argv[4]); + RegularGrid1D lrhoGrid(lrho_min, lrho_max, nrho); + + const double lT_min = atof(argv[5]); + const double lT_max = atof(argv[6]); + const int nT = atoi(argv[7]); + RegularGrid1D lTGrid(lT_min, lT_max, nT); + + // This is the databox we will evaluate onto + DataBox press_d(Spiner::AllocationTarget::Device, nrho, nT); + press_d.setRange(0, lTGrid); // note 0 is rightmost index + press_d.setRange(1, lrhoGrid); + // This is the host mirror, we will copy into it + DataBox press_h(Spiner::AllocationTarget::Host, nrho, nT); + press_h.setRange(0, lTGrid); + press_h.setRange(1, lrhoGrid); + + // These are the pre-evaluated density and temperature points + std::cout << "Filling grid points..." << std::endl; + DataBox rhos(Spiner::AllocationTarget::Device, nrho); + portableFor( + "Set rho", 0, nrho, + PORTABLE_LAMBDA(const int i) { rhos(i) = std::pow(10., lrhoGrid.x(i)); }); + DataBox Ts(Spiner::AllocationTarget::Device, nT); + portableFor( + "Set T", 0, nT, + PORTABLE_LAMBDA(const int i) { Ts(i) = std::pow(10., lTGrid.x(i)); }); + + // if you have arguments you want to pass into your EOS load them here + const std::string loadname = argv[8]; + const double Ye = atof(argv[9]); + + std::cout << "Initializing EOS..." << std::endl; + EOS eos_h(loadname); + EOS eos_d = eos_h.GetOnDevice(); + + std::cout << "Evaluating..." << std::endl; + // start timers +#ifdef PORTABILITY_STRATEGY_KOKKOS + Kokkos::fence(); +#endif + auto start = std::chrono::high_resolution_clock::now(); + portableFor( + "P(rho, T)", 0, nrho, 0, nT, PORTABLE_LAMBDA(const int j, const int i) { + // Some EOS objects take lambdas. If you want to set + // them locally, do it like this. Otherwise, you may + // want to create a device-side array for them to + // avoid race conditions. + Real lambda[2]; + lambda[0] = Ye; + press_d(j, i) = eos_d.PressureFromDensityTemperature(rhos(j), Ts(i), lambda); + }); + // stop timers +#ifdef PORTABILITY_STRATEGY_KOKKOS + Kokkos::fence(); +#endif + auto stop = std::chrono::high_resolution_clock::now(); + auto duration_device = std::chrono::duration_cast(stop - start); + + // copy data from device to host + std::cout << "Copying to host..." << std::endl; + portableCopyToHost(press_h.data(), press_d.data(), press_h.sizeBytes()); + + // For fun lets also evalaute all on host + std::cout << "Generate arrays on host..." << std::endl; + DataBox rhos_h(Spiner::AllocationTarget::Host, nrho); + for (int i = 0; i < nrho; ++i) { + rhos_h(i) = std::pow(10., lrhoGrid.x(i)); + } + DataBox Ts_h(Spiner::AllocationTarget::Host, nT); + for (int i = 0; i < nT; ++i) { + Ts_h(i) = std::pow(10., lTGrid.x(i)); + } + std::cout << "Looping on host" << std::endl; + start = std::chrono::high_resolution_clock::now(); + for (int j = 0; j < nT; ++j) { + for (int i = 0; i < nrho; ++i) { + Real lambda[2]; + lambda[0] = Ye; + press_h(j, i) = eos_h.PressureFromDensityTemperature(rhos_h(j), Ts_h(i), lambda); + } + } + stop = std::chrono::high_resolution_clock::now(); + auto duration_host = std::chrono::duration_cast(stop - start); + + std::cout << "Saving file..." << std::endl; + press_h.saveHDF(savefilename); // if you have HDF5 enabled, thats all it takes + std::cout << "Time per point, (ns):\n" + << "host = " << duration_host.count() / (static_cast(nrho) * nT) + << "\n" + << "device = " << duration_device.count() / (static_cast(nrho) * nT) + << std::endl; + } +#ifdef PORTABILITY_STRATEGY_KOKKOS + Kokkos::finalize(); +#endif + return 0; +} From ee8e09067b2368b0fe3d4d74a3e4b80b22c5c346 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Tue, 19 Nov 2024 20:09:16 -0700 Subject: [PATCH 13/20] remove eps which is unused --- example/get_sound_speed_press.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/get_sound_speed_press.cpp b/example/get_sound_speed_press.cpp index 6c25221048..7114f1ad0f 100644 --- a/example/get_sound_speed_press.cpp +++ b/example/get_sound_speed_press.cpp @@ -74,7 +74,7 @@ inline void PressureSoundSpeedFromDensityEnergyDensity(double *rho, // inputs // Loop through cells and use the FillEos function call for (int i = 0; i < Ncells; ++i) { - double eps, temp, cv; + double temp, cv; // FillEos is very general and is capable of modifying any of the inputs, // so const vars cannot be passed into it. However, it is often more performant // than making individual function calls. From 62188b23b4bf12e40b96f5ecc17b29c04270d682 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Wed, 20 Nov 2024 09:06:09 -0700 Subject: [PATCH 14/20] move compute bulk modulus to BEFORE reinterpolating to fast logs --- singularity-eos/eos/eos_stellar_collapse.hpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/singularity-eos/eos/eos_stellar_collapse.hpp b/singularity-eos/eos/eos_stellar_collapse.hpp index e9c9c500bd..4a96675c44 100644 --- a/singularity-eos/eos/eos_stellar_collapse.hpp +++ b/singularity-eos/eos/eos_stellar_collapse.hpp @@ -836,6 +836,7 @@ inline void StellarCollapse::LoadFromStellarCollapseFile_(const std::string &fil medianFilter_(dPdE_); medianFilter_(dEdT_); } + computeBulkModulus_(); // Re-interpolate tables in case we want fast-log gridding DataBox scratch(numYe_, numT_, numRho_); @@ -854,6 +855,8 @@ inline void StellarCollapse::LoadFromStellarCollapseFile_(const std::string &fil dataBoxToFastLogs(Xp_, scratch, false); dataBoxToFastLogs(Abar_, scratch, false); dataBoxToFastLogs(Zbar_, scratch, false); + // And bulk modulus + dataBoxToFastLogs(lBMod_, scratch, false); // Generate bounds Ye_grid = lP_.range(2); @@ -866,9 +869,8 @@ inline void StellarCollapse::LoadFromStellarCollapseFile_(const std::string &fil lRhoMin_ = lRho_grid.min(); lRhoMax_ = lRho_grid.max(); - // Finally, compute bulk modulus, hot and cold curves from + // Finally compute hot and cold curves from // re-interpolated data. - computeBulkModulus_(); computeColdAndHotCurves_(); } @@ -1041,14 +1043,14 @@ inline void StellarCollapse::computeBulkModulus_() { Real lT = lBMod_.range(1).x(iT); for (int irho = 0; irho < numRho_; ++irho) { Real lRho = lBMod_.range(0).x(irho); - Real rho = rho_(lRho); + Real rho = std::pow(10., lRho); // rho_(lRho); Real lP = lP_(iY, iT, irho); - Real P = lP2P_(lP); + Real P = std::pow(10., lP); // lP2P_(lP) ; Real PoR = robust::ratio(P, rho); // assume table is hardened Real bMod = rho * dPdRho_(iY, iT, irho) + PoR * dPdE_(iY, iT, irho); if (bMod < robust::EPS()) bMod = robust::EPS(); - lBMod_(iY, iT, irho) = B2lB_(bMod); + lBMod_(iY, iT, irho) = std::log10(bMod); //B2lB_(bMod); } } } From a188f2a5f693885e9627220fe80a8d936354fbb6 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Wed, 20 Nov 2024 09:07:51 -0700 Subject: [PATCH 15/20] formatting --- singularity-eos/eos/eos_stellar_collapse.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/singularity-eos/eos/eos_stellar_collapse.hpp b/singularity-eos/eos/eos_stellar_collapse.hpp index 4a96675c44..d99c6b2c92 100644 --- a/singularity-eos/eos/eos_stellar_collapse.hpp +++ b/singularity-eos/eos/eos_stellar_collapse.hpp @@ -1050,7 +1050,7 @@ inline void StellarCollapse::computeBulkModulus_() { // assume table is hardened Real bMod = rho * dPdRho_(iY, iT, irho) + PoR * dPdE_(iY, iT, irho); if (bMod < robust::EPS()) bMod = robust::EPS(); - lBMod_(iY, iT, irho) = std::log10(bMod); //B2lB_(bMod); + lBMod_(iY, iT, irho) = std::log10(bMod); // B2lB_(bMod); } } } From fcbef9d0bd0b5643d1e5181813282398ebfb34c2 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Wed, 20 Nov 2024 09:17:34 -0700 Subject: [PATCH 16/20] mantiss -> mantissa --- doc/sphinx/src/contributing.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx/src/contributing.rst b/doc/sphinx/src/contributing.rst index 81b8484db4..3d1debe0f9 100644 --- a/doc/sphinx/src/contributing.rst +++ b/doc/sphinx/src/contributing.rst @@ -653,7 +653,7 @@ number :math:`x` is represented as a mantissa and an exponent in base x = m 2^e -for mantissa :math:`m` and exponent :math:`e`. The mantiss is +for mantissa :math:`m` and exponent :math:`e`. The mantissa is guaranteed to be on the interval :math:`[1/2, 1)`. The standard library of most low-level languages provides a performant and portable routine to pick apart this represnetation, ``frexp``, which given a From 66f6fd73d7ba9cb14842eece9a40a471c139eade Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Wed, 20 Nov 2024 09:51:36 -0700 Subject: [PATCH 17/20] add versioning to sp5 files --- CMakeLists.txt | 12 ++++++++++++ sesame2spiner/generate_files.cpp | 6 ++++++ singularity-eos/base/fast-math/logs.hpp | 5 +++++ singularity-eos/base/sp5/singularity_eos_sp5.hpp | 1 + singularity-eos/eos/eos_spiner.hpp | 8 ++++++++ 5 files changed, 32 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 96d69ab7e4..566db6fed0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -284,6 +284,18 @@ if(SINGULARITY_BUILD_SESAME2SPINER) add_subdirectory(sesame2spiner) endif() +# Define the full version as a string macro +target_compile_definitions(singularity-eos_Interface INTERFACE + SINGULARITY_VERSION=\"${PROJECT_VERSION}\" +) + +# Optionally, define major, minor, and patch versions separately +target_compile_definitions(singularity-eos_Interface INTERFACE + SINGULARITY_VERSION_MAJOR=${PROJECT_VERSION_MAJOR} + SINGULARITY_VERSION_MINOR=${PROJECT_VERSION_MINOR} + SINGULARITY_VERSION_PATCH=${PROJECT_VERSION_PATCH} +) + # defines if (SINGULARITY_USE_TRUE_LOG_GRIDDING) target_compile_definitions(singularity-eos_Interface diff --git a/sesame2spiner/generate_files.cpp b/sesame2spiner/generate_files.cpp index 7c608be956..f561fa0c06 100644 --- a/sesame2spiner/generate_files.cpp +++ b/sesame2spiner/generate_files.cpp @@ -173,6 +173,12 @@ herr_t saveAllMaterials(const std::string &savename, std::cout << "Saving to file " << savename << std::endl; file = H5Fcreate(savename.c_str(), H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + // singularity version + H5LTset_attribute_string(file, "/", "singularity_version", SINGULARITY_VERSION); + // log type. 0 for true, 1 for NQT1, 2 for NQT2, -1 for single precision true + int log_type = singularity::FastMath::Settings::log_type; + H5LTset_attribute_int(file, "/", SP5::LogType, &log_type, 1); + std::cout << "Processing " << matids.size() << " materials..." << std::endl; for (size_t i = 0; i < matids.size(); i++) { diff --git a/singularity-eos/base/fast-math/logs.hpp b/singularity-eos/base/fast-math/logs.hpp index a38308371d..2884923350 100644 --- a/singularity-eos/base/fast-math/logs.hpp +++ b/singularity-eos/base/fast-math/logs.hpp @@ -63,6 +63,7 @@ namespace singularity { namespace FastMath { // TODO(JMM): switch from preprocessor macros to CMake generated C++ +enum LogType { SINGLE = -1, TRUE = 0, NQT1 = 1, NQT2 = 2 }; namespace Settings { #ifdef SINGULARITY_USE_TRUE_LOG_GRIDDING constexpr bool TRUE_LOGS = true; @@ -89,6 +90,10 @@ constexpr bool NQT_O1 = true; #else constexpr bool NQT_O1 = false; #endif // SINGULARITY_NQT_O1 +constexpr LogType log_type = (TRUE_LOGS && FP32) ? LogType::SINGLE + : (TRUE_LOGS && FP64) ? LogType::TRUE + : NQT_O1 ? LogType::NQT1 + : LogType::NQT2; } // namespace Settings template diff --git a/singularity-eos/base/sp5/singularity_eos_sp5.hpp b/singularity-eos/base/sp5/singularity_eos_sp5.hpp index 36ee90192e..a7f2832045 100644 --- a/singularity-eos/base/sp5/singularity_eos_sp5.hpp +++ b/singularity-eos/base/sp5/singularity_eos_sp5.hpp @@ -20,6 +20,7 @@ namespace SP5 { constexpr char defaultSesFileName[] = "sesame_table.sp5"; +constexpr char logType[] = "log_type"; namespace Depends { constexpr char logRhoLogSie[] = "dependsLogRhoLogSie"; diff --git a/singularity-eos/eos/eos_spiner.hpp b/singularity-eos/eos/eos_spiner.hpp index 952e0f0cf0..6da8e47c4a 100644 --- a/singularity-eos/eos/eos_spiner.hpp +++ b/singularity-eos/eos/eos_spiner.hpp @@ -622,6 +622,14 @@ inline SpinerEOSDependsRhoT::SpinerEOSDependsRhoT(const std::string &filename, i herr_t status = H5_SUCCESS; file = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); + int log_type = FastMath::LogType::NQT1; + if (H5LTfind_attribute(file, SP5::logType)) { + H5LTget_attribute_int(file, "/", SP5::logType, &log_type); + } + PORTABLE_ALWAYS_REQUIRE( + log_type == FastMath::Settings::log_type, + "Log mode used at runtime must be identical to the one used to generate the file!"); + matGroup = H5Gopen(file, matid_str.c_str(), H5P_DEFAULT); lTGroup = H5Gopen(matGroup, SP5::Depends::logRhoLogT, H5P_DEFAULT); coldGroup = H5Gopen(matGroup, SP5::Depends::coldCurve, H5P_DEFAULT); From d9f29c084f23e3eeda0526c3c56c7abe1c7d82c2 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Wed, 20 Nov 2024 10:27:00 -0700 Subject: [PATCH 18/20] add ability to densify rho/T in fast log table for stellar collapse --- singularity-eos/eos/eos_stellar_collapse.hpp | 57 +++++++++++--------- test/test_eos_stellar_collapse.cpp | 3 ++ 2 files changed, 36 insertions(+), 24 deletions(-) diff --git a/singularity-eos/eos/eos_stellar_collapse.hpp b/singularity-eos/eos/eos_stellar_collapse.hpp index d99c6b2c92..ccf9c119cd 100644 --- a/singularity-eos/eos/eos_stellar_collapse.hpp +++ b/singularity-eos/eos/eos_stellar_collapse.hpp @@ -225,6 +225,17 @@ class StellarCollapse : public EosBase { // Collapse, so I think we can leave it here for now? inline static void dataBoxToFastLogs(DataBox &db, DataBox &scratch, bool dependent_var_log); + inline static Real log10toNQT(const Real x) { + return FastMath::log10(std::pow(10, x)); + }; + inline static Real NQTtolog10(const Real x) { return std::log10(FastMath::pow10(x)); }; + inline static auto gridToNQT(const Grid_t &g, const int n) { + const Real l10min = g.min(); + const Real l10max = g.max(); + const Real lmin = log10toNQT(l10min); + const Real lmax = log10toNQT(l10max); + return Grid_t(lmin, lmax, n); + }; std::size_t DynamicMemorySizeInBytes() const; std::size_t DumpDynamicMemory(char *dst); @@ -407,6 +418,8 @@ class StellarCollapse : public EosBase { static constexpr Real DELTASMOOTH = 10.0; static constexpr int MF_W = 3; static constexpr int MF_S = (2 * MF_W + 1) * (2 * MF_W + 1) * (2 * MF_W + 1); + + static constexpr Real DENSE_FACTOR = 1.25; }; // ====================================================================== @@ -839,7 +852,16 @@ inline void StellarCollapse::LoadFromStellarCollapseFile_(const std::string &fil computeBulkModulus_(); // Re-interpolate tables in case we want fast-log gridding - DataBox scratch(numYe_, numT_, numRho_); + int nT_new = static_cast(numT_ * DENSE_FACTOR); + int nR_new = static_cast(numRho_ * DENSE_FACTOR); + DataBox scratch(numYe_, nT_new, nR_new); + + Grid_t gT_new = gridToNQT(lP_.range(1), nT_new); + Grid_t gR_new = gridToNQT(lP_.range(0), nR_new); + scratch.setRange(2, lP_.range(2)); + scratch.setRange(1, gT_new); + scratch.setRange(0, gR_new); + // logged quantities dataBoxToFastLogs(lP_, scratch, true); dataBoxToFastLogs(lE_, scratch, true); @@ -859,6 +881,8 @@ inline void StellarCollapse::LoadFromStellarCollapseFile_(const std::string &fil dataBoxToFastLogs(lBMod_, scratch, false); // Generate bounds + numT_ = nT_new; + numRho_ = nR_new; Ye_grid = lP_.range(2); lT_grid = lP_.range(1); lRho_grid = lP_.range(0); @@ -944,30 +968,17 @@ inline void StellarCollapse::readSCDset_(const hid_t &file_id, const std::string // Assume index 3 is linear, indexes 2 and 1 are logarithmic inline void StellarCollapse::dataBoxToFastLogs(DataBox &db, DataBox &scratch, bool dependent_var_log) { - auto log10toNQT = [](const Real x) { return FastMath::log10(std::pow(10, x)); }; - auto NQTtolog10 = [](const Real x) { return std::log10(FastMath::pow10(x)); }; - auto gridToNQT = [&](const Grid_t &g) { - const Real l10min = g.min(); - const Real l10max = g.max(); - const Real lmin = log10toNQT(l10min); - const Real lmax = log10toNQT(l10max); - return Grid_t(lmin, lmax, g.nPoints()); - }; - - auto &r2 = db.range(2); - auto &r1 = db.range(1); - auto &r0 = db.range(0); - - Grid_t newr1 = gridToNQT(r1); - Grid_t newr0 = gridToNQT(r0); + auto &r2 = scratch.range(2); + auto &r1 = scratch.range(1); + auto &r0 = scratch.range(0); for (int i2 = 0; i2 < r2.nPoints(); ++i2) { Real x2 = r2.x(i2); - for (int i1 = 0; i1 < newr1.nPoints(); ++i1) { - Real lx1 = newr1.x(i1); + for (int i1 = 0; i1 < r1.nPoints(); ++i1) { + Real lx1 = r1.x(i1); Real l10x1 = NQTtolog10(lx1); - for (int i0 = 0; i0 < newr0.nPoints(); ++i0) { - Real lx0 = newr0.x(i0); + for (int i0 = 0; i0 < r0.nPoints(); ++i0) { + Real lx0 = r0.x(i0); Real l10x0 = NQTtolog10(lx0); Real val = db.interpToReal(x2, l10x1, l10x0); if (dependent_var_log) { @@ -977,12 +988,10 @@ inline void StellarCollapse::dataBoxToFastLogs(DataBox &db, DataBox &scratch, } } } + db.copyMetadata(scratch); for (int i = 0; i < db.size(); ++i) { db(i) = scratch(i); } - // range(2) is already ok - db.setRange(1, newr1); - db.setRange(0, newr0); } inline void StellarCollapse::medianFilter_(DataBox &db) { diff --git a/test/test_eos_stellar_collapse.cpp b/test/test_eos_stellar_collapse.cpp index 8b0a9fcfe1..53524019bd 100644 --- a/test/test_eos_stellar_collapse.cpp +++ b/test/test_eos_stellar_collapse.cpp @@ -147,6 +147,9 @@ SCENARIO("Test 3D reinterpolation to fast log grid", "[StellarCollapse]") { THEN("We can re-interpolate to fast log space") { StellarCollapse::DataBox scratch(N2, N1, N0); + scratch.setRange(2, db.range(2)); + scratch.setRange(1, StellarCollapse::gridToNQT(db.range(1), N1)); + scratch.setRange(0, StellarCollapse::gridToNQT(db.range(0), N0)); StellarCollapse::dataBoxToFastLogs(db, scratch, true); AND_THEN("The fast-log gridded table contains correct ranges") { From c8ea509776a1125c7bcf4dd7e3f765be09563cc0 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Wed, 20 Nov 2024 19:58:34 -0700 Subject: [PATCH 19/20] BMOD IS LOGGED --- singularity-eos/eos/eos_stellar_collapse.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/singularity-eos/eos/eos_stellar_collapse.hpp b/singularity-eos/eos/eos_stellar_collapse.hpp index ccf9c119cd..1320f5ac51 100644 --- a/singularity-eos/eos/eos_stellar_collapse.hpp +++ b/singularity-eos/eos/eos_stellar_collapse.hpp @@ -878,7 +878,7 @@ inline void StellarCollapse::LoadFromStellarCollapseFile_(const std::string &fil dataBoxToFastLogs(Abar_, scratch, false); dataBoxToFastLogs(Zbar_, scratch, false); // And bulk modulus - dataBoxToFastLogs(lBMod_, scratch, false); + dataBoxToFastLogs(lBMod_, scratch, true); // Generate bounds numT_ = nT_new; From 32abaada823fdf3e1d9325eb04df4b5938aa2013 Mon Sep 17 00:00:00 2001 From: Jonah Miller Date: Wed, 20 Nov 2024 20:21:27 -0700 Subject: [PATCH 20/20] zero-initialize sieOffset --- singularity-eos/eos/eos_stellar_collapse.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/singularity-eos/eos/eos_stellar_collapse.hpp b/singularity-eos/eos/eos_stellar_collapse.hpp index 1320f5ac51..9901fffa8c 100644 --- a/singularity-eos/eos/eos_stellar_collapse.hpp +++ b/singularity-eos/eos/eos_stellar_collapse.hpp @@ -401,7 +401,7 @@ class StellarCollapse : public EosBase { Real CvNormal_, bModNormal_, dPdENormal_, dVdTNormal_; // offsets must be non-negative - Real lEOffset_; + Real lEOffset_ = 0; // defaults to zero static constexpr Real lRhoOffset_ = 0.0; // TODO(JMM): Address if this ever changes static constexpr Real lTOffset_ = 0.0; static constexpr Real lPOffset_ = 0.0;