lanl · Yurlungur · Nov 21, 2024 · Nov 17, 2024 · Nov 17, 2024 · Nov 18, 2024
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -45,4 +45,4 @@ jobs:
                   #..
             make
             make install
-            make test
+            ctest --output-on-failure
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -96,16 +96,17 @@ option(SINGULARITY_BETTER_DEBUG_FLAGS "Better debug flags for singularity" ON)
 option(SINGULARITY_HIDE_MORE_WARNINGS "hide more warnings" OFF)
 
 # toggle code options
-option(SINGULARITY_USE_SINGLE_LOGS
-       "Use single precision logs. Can harm accuracy." OFF)
 option(SINGULARITY_USE_TRUE_LOG_GRIDDING
-       "Use grids that conform to log spacing." OFF)
-# TODO(JMM): Should this automatically be activated when true log gridding is
-# off?
-cmake_dependent_option(
-  SINGULARITY_USE_HIGH_RISK_MATH
-  "Use integer aliased logs, may not be portable" OFF
-  "NOT SINGULARITY_USE_TRUE_LOG_GRIDDING" OFF)
+  "Use grids that conform to log spacing." OFF)
+cmake_dependent_option(SINGULARITY_USE_SINGLE_LOGS
+  "Use single precision logs. Only available for true log gridding. Can harm accuracy."
+  OFF "SINGULARITY_USE_TRUE_LOG_GRIDDING" OFF)
+option(SINGULARITY_NQT_ORDER_1
+  "In NQT logs, use first order. Faster but less accurate."
+  OFF)
+option(SINGULARITY_NQT_PORTABLE
+  "In NQT logs, use portable, rather than bithacked implementation. Slower, but more likely to function on exotic architectures."
+  OFF)
 
 # misc options
 option(SINGULARITY_FORCE_SUBMODULE_MODE "Submodule mode" OFF)
@@ -126,6 +127,26 @@ set(SINGULARITY_PLUGINS "" CACHE STRING "List of paths to plugin directories")
 set(SINGULARITY_VARIANT "singularity-eos/eos/default_variant.hpp" CACHE STRING
   "The include path for the file containing the definition of singularity::EOS.")
 
+# Detect ARM architecture
+set(SINGULARITY_ON_ARM OFF CACHE BOOL "We are running on an ARM system")
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|aarch64)")
+    if (NOT SINGULARITY_USE_CUDA)
+      message(STATUS
+        "ARM architecture detected: ${CMAKE_SYSTEM_PROCESSOR}")
+      set(SINGULARITY_ON_ARM ON CACHE BOOL
+        "We are running on an ARM system")
+    endif()
+endif()
+
+if (SINGULAIRTY_ON_ARM)
+  if (NOT SINGULARITY_USE_TRUE_LOG_GRIDDING)
+    message(WARNING
+      "Fast logs not necessarily better on ARM CPU systems. "
+      "You may wish to build with "
+      "-DSINGULARITY_USE_TRUE_LOG_GRIDDING=ON.")
+  endif()
+endif()
+
 # ------------------------------------------------------------------------------#
 # singularity-eos Library
 # ------------------------------------------------------------------------------#
@@ -163,6 +184,15 @@ if(SINGULARITY_USE_FORTRAN)
   include(CMakeDetermineFortranCompiler)
 endif()
 
+# Big endianness
+include(TestBigEndian)
+TEST_BIG_ENDIAN(IS_BIG_ENDIAN)
+if (BIG_ENDIAN)
+  message(WARNING "Big endian detected! "
+    "Integer aliasing as currently implemented will not function. "
+    "Please set -DSINGULARITY_NQT_PORTABLE=ON.")
+endif()
+
 include(GNUInstallDirs)
 
 if(SINGULARITY_BUILD_PYTHON)
@@ -255,16 +285,21 @@ if(SINGULARITY_BUILD_SESAME2SPINER)
 endif()
 
 # defines
+if (SINGULARITY_USE_TRUE_LOG_GRIDDING)
+  target_compile_definitions(singularity-eos_Interface
+    INTERFACE SINGULARITY_USE_TRUE_LOG_GRIDDING)
+endif()
 if(SINGULARITY_USE_SINGLE_LOGS)
-  target_compile_definitions(singularity-eos_Interface INTERFACE SINGULARITY_USE_SINGLE_LOGS)
+  target_compile_definitions(singularity-eos_Interface
+    INTERFACE SINGULARITY_USE_SINGLE_LOGS)
 endif()
-if(SINGULARITY_USE_HIGH_RISK_MATH)
+if(SINGULARITY_NQT_ORDER_1)
   target_compile_definitions(singularity-eos_Interface
-                             INTERFACE SINGULARITY_USE_HIGH_RISK_MATH)
+    INTERFACE SINGULARITY_NQT_ORDER_1)
 endif()
-if (SINGULARITY_USE_TRUE_LOG_GRIDDING)
+if(SINGULARITY_NQT_PORTABLE)
   target_compile_definitions(singularity-eos_Interface
-    INTERFACE SINGULARITY_USE_TRUE_LOG_GRIDDING)
+    INTERFACE SINGULARITY_NQT_PORTABLE)
 endif()
 if(SINGULARITY_TEST_SESAME)
   target_compile_definitions(singularity-eos_Interface INTERFACE SINGULARITY_TEST_SESAME)

diff --git a/doc/sphinx/src/building.rst b/doc/sphinx/src/building.rst
@@ -114,8 +114,10 @@ The main CMake options to configure building are in the following table:
  ``SINGULARITY_BETTER_DEBUG_FLAGS``      ON       Enables nicer GPU debug flags. May interfere with in-tree builds as a submodule.
  ``SINGULARITY_HIDE_MORE_WARNINGS``      OFF      Makes warnings less verbose. May interfere with in-tree builds as a submodule.
  ``SINGULARITY_FORCE_SUBMODULE_MODE``    OFF      Force build in _submodule_ mode.
- ``SINGULARITY_USE_SINGLE_LOGS``         OFF      Use single precision logarithms (may degrade accuracy).
  ``SINGULARITY_USE_TRUE_LOG_GRIDDING``   OFF      Use grids that conform to logarithmic spacing.
+ ``SINGULARITY_USE_SINGLE_LOGS``         OFF      Use single precision logarithms (may degrade accuracy).
+ ``SINGULARITY_NQT_ORDER_1``             OFF      For fast logs, use the less accurate but faster 1st-order version.
+ ``SINGULARITY_NQT_PORTABLE``            OFF      For fast logs, use the slower but endianness-independent implementation.
 ====================================== ======= ===========================================
 
 More options are available to modify only if certain other options or

diff --git a/doc/sphinx/src/contributing.rst b/doc/sphinx/src/contributing.rst
@@ -667,34 +667,42 @@ of the mantissa plus the exponent:
    \lg(x) = \lg(m) + e
 
 Therefore, if we can find a fast, invertible approximation to
-:math:`\lg(m)`, we will have achieved our goal. It turns out the
-expression
+:math:`\lg(m)`, we will have achieved our goal. The linear
+interpolation of :math:`\lg(m)` on the given interval is
 
 .. math::
 
    2 (x - 1)
 
-works pretty well, so we use that. (To convince yourself of this note
-that for :math:`x=1/2` this expression returns -1 and for :math:`x=1`,
-it returns 0, which are the correct values of :math:`\lg(x)` at the
-bounds of the interval.) Thus our approximate, invertible expression
-for :math:`\lg` is just
+and the quadratic is
 
 .. math::
 
-   2 (m - 1) + e
-
-for the mantissa and exponent extracted via ``frexp``. This differs
-from :math:`lg` by a maximum of about 0.1, which translates to at most
-a 25 percent difference. As discussed above, however, the function
-itself is an exact representation of itself and the difference from
-:math:`lg` is acceptable.
-
-To invert, we use the built in function that inverts ``frexp``,
-``ldexp``, which combines the mantissa and exponent into the original
-floating point representation.
-
-This approach is described in more detail in our `short note`_ on the topic.
+  -\frac{4}{3} (m -2) (m - 1)
+
+where the former produces a function that is piecewise :math:`C^1` and
+everywhere continuous. The latter produces a function that is
+everywhere :math:`C^1` and piecewise :math:`C^2`. Both functions are
+exactly exactly invertible. To invert, we use the built in function
+that inverts ``frexp``, ``ldexp``, which combines the mantissa and
+exponent into the original floating point representation.
+
+While these functions are not exactly logarithms, they do work for
+building logarithmic grids. The smoothness of the transformation
+mapping from linear to "not-quite-log" space does matter for
+interpolation, however. Linear interpolation in "not-quite-log" space
+converges at second order only in the :math:`L^1` norm for the linear
+version of the approximate log. The quadratic version of the fast log
+provides second-order convergence in all norms, however.
+
+Finally, while ``frexp`` and ``ldexp`` are portable and performant,
+they are less performant than hand-implemented, low-level methods that
+leverage the bitwise structure of floating point numbers. These
+"bithacked" or "integer aliased" implementations are what are used in
+practice in the code.
+
+This approach is described in more detail in our `short note`_ on the
+topic.
 
 .. _Short note: https://arxiv.org/abs/2206.08957
 

diff --git a/doc/sphinx/src/models.rst b/doc/sphinx/src/models.rst
@@ -1736,12 +1736,17 @@ return a ``Real`` number.
 .. warning::
     As with the SpinerEOS models, the stellar collapse models use fast
     logs. You can switch the logs to true logs with the
-    ``SINGULARITY_USE_TRUE_LOG_GRIDDING`` cmake option.
+    ``SINGULARITY_USE_TRUE_LOG_GRIDDING`` cmake option. This may be
+    desirable on ARM-based architectures (e.g., ``aarch64``), where
+    a hardware log intrinsic is available.
+
 
 .. note::
-    A more performant implementation of fast logs is available, but it
-    might not be portable. Enable it with the
-    ``SINGULARITY_USE_HIGH_RISK_MATH`` cmake option.
+   The default implementation of our fast logs assumes little endian
+   numbers. If you are on a big-endian machine, they will not work
+   properly. If you encounter a big-endian machine, please report it
+   to us in the issues and (for now) enable the portable
+   implementation of fast logs with ``-DSINGULARITY_NQT_PORTABLE=ON``.
 
 .. _Stellar Collapse: https://stellarcollapse.org/equationofstate.html
 
@@ -1750,7 +1755,6 @@ return a ``Real`` number.
 .. _median filter: https://en.wikipedia.org/wiki/Median_filter
 
 
-
 Helmholtz EOS
 ``````````````
 

diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt
@@ -17,6 +17,12 @@ add_executable(get_sound_speed_press
 target_link_libraries(get_sound_speed_press PRIVATE
   singularity-eos::singularity-eos)
 
+if (SINGULARITY_USE_SPINER_WITH_HDF5)
+  add_executable(eos_grid eos_grid.cpp)
+  target_link_libraries(eos_grid PRIVATE
+    singularity-eos::singularity-eos)
+endif()
+
 if(SINGULARITY_USE_EOSPAC AND SINGULARITY_USE_SPINER_WITH_HDF5)
   add_executable(get_sesame_state
     get_sesame_state.cpp)

diff --git a/example/eos_grid.cpp b/example/eos_grid.cpp
@@ -0,0 +1,165 @@
+//------------------------------------------------------------------------------
+// © 2024. Triad National Security, LLC. All rights reserved.  This
+// program was produced under U.S. Government contract 89233218CNA000001
+// for Los Alamos National Laboratory (LANL), which is operated by Triad
+// National Security, LLC for the U.S.  Department of Energy/National
+// Nuclear Security Administration. All rights in the program are
+// reserved by Triad National Security, LLC, and the U.S. Department of
+// Energy/National Nuclear Security Administration. The Government is
+// granted for itself and others acting on its behalf a nonexclusive,
+// paid-up, irrevocable worldwide license in this material to reproduce,
+// prepare derivative works, distribute copies to the public, perform
+// publicly and display publicly, and to permit others to do so.
+//------------------------------------------------------------------------------
+
+// C headers
+#include <cmath>
+
+// C++ headers
+#include <chrono>
+#include <iostream>
+#include <string>
+
+// HDF5 we'll need this for I/O
+#include <hdf5.h>
+#include <hdf5_hl.h>
+
+// This library contains portable utilities
+#include <ports-of-call/portability.hpp>
+
+// This contains useful tools for preventing things like divide by zero
+#include <singularity-eos/base/robust_utils.hpp>
+// Needed to import the eos models
+#include <singularity-eos/eos/eos.hpp>
+
+// This library contains the spiner table object, which we will use to
+// store our output
+#include <spiner/databox.hpp>
+#include <spiner/interpolation.hpp>
+
+// for timing
+using duration = std::chrono::nanoseconds;
+
+// These are the specializations of spiner we will use
+using DataBox = Spiner::DataBox<Real>;
+using RegularGrid1D = Spiner::RegularGrid1D<Real>;
+
+// Set the EOS you want to use here.
+using EOS = singularity::StellarCollapse;
+
+int main(int argc, char *argv[]) {
+  // This is needed for Kokkos
+#ifdef PORTABILITY_STRATEGY_KOKKOS
+  Kokkos::initialize();
+#endif
+  // note the scoping here. This means Kokkos objects are cleaned up
+  // before finalization
+  {
+    if (argc < 8) {
+      std::cerr << "Usage: " << argv[0]
+                << " savefilename log10rhomin log10rhomax numrho log10Tmin log10Tmax "
+                   "numT eosargs..."
+                << std::endl;
+      std::exit(1);
+    }
+    const std::string savefilename = argv[1];
+    const double lrho_min = atof(argv[2]);
+    const double lrho_max = atof(argv[3]);
+    const int nrho = atoi(argv[4]);
+    RegularGrid1D lrhoGrid(lrho_min, lrho_max, nrho);
+
+    const double lT_min = atof(argv[5]);
+    const double lT_max = atof(argv[6]);
+    const int nT = atoi(argv[7]);
+    RegularGrid1D lTGrid(lT_min, lT_max, nT);
+
+    // This is the databox we will evaluate onto
+    DataBox press_d(Spiner::AllocationTarget::Device, nrho, nT);
+    press_d.setRange(0, lTGrid); // note 0 is rightmost index
+    press_d.setRange(1, lrhoGrid);
+    // This is the host mirror, we will copy into it
+    DataBox press_h(Spiner::AllocationTarget::Host, nrho, nT);
+    press_h.setRange(0, lTGrid);
+    press_h.setRange(1, lrhoGrid);
+
+    // These are the pre-evaluated density and temperature points
+    std::cout << "Filling grid points..." << std::endl;
+    DataBox rhos(Spiner::AllocationTarget::Device, nrho);
+    portableFor(
+        "Set rho", 0, nrho,
+        PORTABLE_LAMBDA(const int i) { rhos(i) = std::pow(10., lrhoGrid.x(i)); });
+    DataBox Ts(Spiner::AllocationTarget::Device, nT);
+    portableFor(
+        "Set T", 0, nT,
+        PORTABLE_LAMBDA(const int i) { Ts(i) = std::pow(10., lTGrid.x(i)); });
+
+    // if you have arguments you want to pass into your EOS load them here
+    const std::string loadname = argv[8];
+    const double Ye = atof(argv[9]);
+
+    std::cout << "Initializing EOS..." << std::endl;
+    EOS eos_h(loadname);
+    EOS eos_d = eos_h.GetOnDevice();
+
+    std::cout << "Evaluating..." << std::endl;
+    // start timers
+#ifdef PORTABILITY_STRATEGY_KOKKOS
+    Kokkos::fence();
+#endif
+    auto start = std::chrono::high_resolution_clock::now();
+    portableFor(
+        "P(rho, T)", 0, nrho, 0, nT, PORTABLE_LAMBDA(const int j, const int i) {
+          // Some EOS objects take lambdas. If you want to set
+          // them locally, do it like this. Otherwise, you may
+          // want to create a device-side array for them to
+          // avoid race conditions.
+          Real lambda[2];
+          lambda[0] = Ye;
+          press_d(j, i) = eos_d.PressureFromDensityTemperature(rhos(j), Ts(i), lambda);
+        });
+    // stop timers
+#ifdef PORTABILITY_STRATEGY_KOKKOS
+    Kokkos::fence();
+#endif
+    auto stop = std::chrono::high_resolution_clock::now();
+    auto duration_device = std::chrono::duration_cast<duration>(stop - start);
+
+    // copy data from device to host
+    std::cout << "Copying to host..." << std::endl;
+    portableCopyToHost(press_h.data(), press_d.data(), press_h.sizeBytes());
+
+    // For fun lets also evalaute all on host
+    std::cout << "Generate arrays on host..." << std::endl;
+    DataBox rhos_h(Spiner::AllocationTarget::Host, nrho);
+    for (int i = 0; i < nrho; ++i) {
+      rhos_h(i) = std::pow(10., lrhoGrid.x(i));
+    }
+    DataBox Ts_h(Spiner::AllocationTarget::Host, nT);
+    for (int i = 0; i < nT; ++i) {
+      Ts_h(i) = std::pow(10., lTGrid.x(i));
+    }
+    std::cout << "Looping on host" << std::endl;
+    start = std::chrono::high_resolution_clock::now();
+    for (int j = 0; j < nT; ++j) {
+      for (int i = 0; i < nrho; ++i) {
+        Real lambda[2];
+        lambda[0] = Ye;
+        press_h(j, i) = eos_h.PressureFromDensityTemperature(rhos_h(j), Ts_h(i), lambda);
+      }
+    }
+    stop = std::chrono::high_resolution_clock::now();
+    auto duration_host = std::chrono::duration_cast<duration>(stop - start);
+
+    std::cout << "Saving file..." << std::endl;
+    press_h.saveHDF(savefilename); // if you have HDF5 enabled, thats all it takes
+    std::cout << "Time per point, (ns):\n"
+              << "host = " << duration_host.count() / (static_cast<Real>(nrho) * nT)
+              << "\n"
+              << "device = " << duration_device.count() / (static_cast<Real>(nrho) * nT)
+              << std::endl;
+  }
+#ifdef PORTABILITY_STRATEGY_KOKKOS
+  Kokkos::finalize();
+#endif
+  return 0;
+}
diff --git a/example/get_sound_speed_press.cpp b/example/get_sound_speed_press.cpp
@@ -74,7 +74,7 @@ inline void PressureSoundSpeedFromDensityEnergyDensity(double *rho, // inputs
 
   // Loop through cells and use the FillEos function call
   for (int i = 0; i < Ncells; ++i) {
-    double eps, temp, cv;
+    double temp, cv;
     // FillEos is very general and is capable of modifying any of the inputs,
     // so const vars cannot be passed into it. However, it is often more performant
     // than making individual function calls.