Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

nqt o2 #433

Merged
merged 20 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,4 @@ jobs:
#..
make
make install
make test
ctest --output-on-failure
75 changes: 61 additions & 14 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -96,16 +96,17 @@ option(SINGULARITY_BETTER_DEBUG_FLAGS "Better debug flags for singularity" ON)
option(SINGULARITY_HIDE_MORE_WARNINGS "hide more warnings" OFF)

# toggle code options
option(SINGULARITY_USE_SINGLE_LOGS
"Use single precision logs. Can harm accuracy." OFF)
option(SINGULARITY_USE_TRUE_LOG_GRIDDING
"Use grids that conform to log spacing." OFF)
# TODO(JMM): Should this automatically be activated when true log gridding is
# off?
cmake_dependent_option(
SINGULARITY_USE_HIGH_RISK_MATH
"Use integer aliased logs, may not be portable" OFF
"NOT SINGULARITY_USE_TRUE_LOG_GRIDDING" OFF)
"Use grids that conform to log spacing." OFF)
cmake_dependent_option(SINGULARITY_USE_SINGLE_LOGS
"Use single precision logs. Only available for true log gridding. Can harm accuracy."
OFF "SINGULARITY_USE_TRUE_LOG_GRIDDING" OFF)
option(SINGULARITY_NQT_ORDER_1
Yurlungur marked this conversation as resolved.
Show resolved Hide resolved
"In NQT logs, use first order. Faster but less accurate."
OFF)
option(SINGULARITY_NQT_PORTABLE
"In NQT logs, use portable, rather than bithacked implementation. Slower, but more likely to function on exotic architectures."
OFF)

# misc options
option(SINGULARITY_FORCE_SUBMODULE_MODE "Submodule mode" OFF)
Expand All @@ -126,6 +127,26 @@ set(SINGULARITY_PLUGINS "" CACHE STRING "List of paths to plugin directories")
set(SINGULARITY_VARIANT "singularity-eos/eos/default_variant.hpp" CACHE STRING
"The include path for the file containing the definition of singularity::EOS.")

# Detect ARM architecture
set(SINGULARITY_ON_ARM OFF CACHE BOOL "We are running on an ARM system")
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm|aarch64)")
if (NOT SINGULARITY_USE_CUDA)
message(STATUS
"ARM architecture detected: ${CMAKE_SYSTEM_PROCESSOR}")
set(SINGULARITY_ON_ARM ON CACHE BOOL
"We are running on an ARM system")
endif()
endif()

if (SINGULAIRTY_ON_ARM)
if (NOT SINGULARITY_USE_TRUE_LOG_GRIDDING)
message(WARNING
"Fast logs not necessarily better on ARM CPU systems. "
"You may wish to build with "
"-DSINGULARITY_USE_TRUE_LOG_GRIDDING=ON.")
endif()
endif()

# ------------------------------------------------------------------------------#
# singularity-eos Library
# ------------------------------------------------------------------------------#
Expand Down Expand Up @@ -163,6 +184,15 @@ if(SINGULARITY_USE_FORTRAN)
include(CMakeDetermineFortranCompiler)
endif()

# Big endianness
include(TestBigEndian)
TEST_BIG_ENDIAN(IS_BIG_ENDIAN)
if (BIG_ENDIAN)
message(WARNING "Big endian detected! "
"Integer aliasing as currently implemented will not function. "
"Please set -DSINGULARITY_NQT_PORTABLE=ON.")
endif()

include(GNUInstallDirs)

if(SINGULARITY_BUILD_PYTHON)
Expand Down Expand Up @@ -254,17 +284,34 @@ if(SINGULARITY_BUILD_SESAME2SPINER)
add_subdirectory(sesame2spiner)
endif()

# Define the full version as a string macro
target_compile_definitions(singularity-eos_Interface INTERFACE
SINGULARITY_VERSION=\"${PROJECT_VERSION}\"
)

# Optionally, define major, minor, and patch versions separately
target_compile_definitions(singularity-eos_Interface INTERFACE
SINGULARITY_VERSION_MAJOR=${PROJECT_VERSION_MAJOR}
SINGULARITY_VERSION_MINOR=${PROJECT_VERSION_MINOR}
SINGULARITY_VERSION_PATCH=${PROJECT_VERSION_PATCH}
)

# defines
if (SINGULARITY_USE_TRUE_LOG_GRIDDING)
target_compile_definitions(singularity-eos_Interface
INTERFACE SINGULARITY_USE_TRUE_LOG_GRIDDING)
endif()
if(SINGULARITY_USE_SINGLE_LOGS)
target_compile_definitions(singularity-eos_Interface INTERFACE SINGULARITY_USE_SINGLE_LOGS)
target_compile_definitions(singularity-eos_Interface
INTERFACE SINGULARITY_USE_SINGLE_LOGS)
endif()
if(SINGULARITY_USE_HIGH_RISK_MATH)
if(SINGULARITY_NQT_ORDER_1)
target_compile_definitions(singularity-eos_Interface
INTERFACE SINGULARITY_USE_HIGH_RISK_MATH)
INTERFACE SINGULARITY_NQT_ORDER_1)
endif()
if (SINGULARITY_USE_TRUE_LOG_GRIDDING)
if(SINGULARITY_NQT_PORTABLE)
target_compile_definitions(singularity-eos_Interface
INTERFACE SINGULARITY_USE_TRUE_LOG_GRIDDING)
INTERFACE SINGULARITY_NQT_PORTABLE)
endif()
if(SINGULARITY_TEST_SESAME)
target_compile_definitions(singularity-eos_Interface INTERFACE SINGULARITY_TEST_SESAME)
Expand Down
4 changes: 3 additions & 1 deletion doc/sphinx/src/building.rst
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,10 @@ The main CMake options to configure building are in the following table:
``SINGULARITY_BETTER_DEBUG_FLAGS`` ON Enables nicer GPU debug flags. May interfere with in-tree builds as a submodule.
``SINGULARITY_HIDE_MORE_WARNINGS`` OFF Makes warnings less verbose. May interfere with in-tree builds as a submodule.
``SINGULARITY_FORCE_SUBMODULE_MODE`` OFF Force build in _submodule_ mode.
``SINGULARITY_USE_SINGLE_LOGS`` OFF Use single precision logarithms (may degrade accuracy).
``SINGULARITY_USE_TRUE_LOG_GRIDDING`` OFF Use grids that conform to logarithmic spacing.
``SINGULARITY_USE_SINGLE_LOGS`` OFF Use single precision logarithms (may degrade accuracy).
``SINGULARITY_NQT_ORDER_1`` OFF For fast logs, use the less accurate but faster 1st-order version.
``SINGULARITY_NQT_PORTABLE`` OFF For fast logs, use the slower but endianness-independent implementation.
====================================== ======= ===========================================

More options are available to modify only if certain other options or
Expand Down
50 changes: 29 additions & 21 deletions doc/sphinx/src/contributing.rst
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The change to this documentation is where the core of the method is. Essentially the quadratic interpolation of $\lg(mantissa)$ on the interval $[0.5, 1)$ has the advantage that the NQT function is C1 everywhere, which improves convergence of linear interpolation. The function itself is still a 100% accurate version of itself, but improving the continuity class of the method improves convergence of stencil ops.

Original file line number Diff line number Diff line change
Expand Up @@ -653,7 +653,7 @@ number :math:`x` is represented as a mantissa and an exponent in base

x = m 2^e

for mantissa :math:`m` and exponent :math:`e`. The mantiss is
for mantissa :math:`m` and exponent :math:`e`. The mantissa is
guaranteed to be on the interval :math:`[1/2, 1)`. The standard
library of most low-level languages provides a performant and portable
routine to pick apart this represnetation, ``frexp``, which given a
Expand All @@ -667,34 +667,42 @@ of the mantissa plus the exponent:
\lg(x) = \lg(m) + e

Therefore, if we can find a fast, invertible approximation to
:math:`\lg(m)`, we will have achieved our goal. It turns out the
expression
:math:`\lg(m)`, we will have achieved our goal. The linear
interpolation of :math:`\lg(m)` on the given interval is

.. math::

2 (x - 1)

works pretty well, so we use that. (To convince yourself of this note
that for :math:`x=1/2` this expression returns -1 and for :math:`x=1`,
it returns 0, which are the correct values of :math:`\lg(x)` at the
bounds of the interval.) Thus our approximate, invertible expression
for :math:`\lg` is just
and the quadratic is

.. math::

2 (m - 1) + e

for the mantissa and exponent extracted via ``frexp``. This differs
from :math:`lg` by a maximum of about 0.1, which translates to at most
a 25 percent difference. As discussed above, however, the function
itself is an exact representation of itself and the difference from
:math:`lg` is acceptable.

To invert, we use the built in function that inverts ``frexp``,
``ldexp``, which combines the mantissa and exponent into the original
floating point representation.

This approach is described in more detail in our `short note`_ on the topic.
-\frac{4}{3} (m -2) (m - 1)

where the former produces a function that is piecewise :math:`C^1` and
everywhere continuous. The latter produces a function that is
everywhere :math:`C^1` and piecewise :math:`C^2`. Both functions are
exactly exactly invertible. To invert, we use the built in function
that inverts ``frexp``, ``ldexp``, which combines the mantissa and
exponent into the original floating point representation.

While these functions are not exactly logarithms, they do work for
building logarithmic grids. The smoothness of the transformation
mapping from linear to "not-quite-log" space does matter for
interpolation, however. Linear interpolation in "not-quite-log" space
converges at second order only in the :math:`L^1` norm for the linear
version of the approximate log. The quadratic version of the fast log
provides second-order convergence in all norms, however.

Finally, while ``frexp`` and ``ldexp`` are portable and performant,
they are less performant than hand-implemented, low-level methods that
leverage the bitwise structure of floating point numbers. These
"bithacked" or "integer aliased" implementations are what are used in
practice in the code.

This approach is described in more detail in our `short note`_ on the
topic.

Yurlungur marked this conversation as resolved.
Show resolved Hide resolved
.. _Short note: https://arxiv.org/abs/2206.08957

Expand Down
14 changes: 9 additions & 5 deletions doc/sphinx/src/models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1736,12 +1736,17 @@ return a ``Real`` number.
.. warning::
As with the SpinerEOS models, the stellar collapse models use fast
logs. You can switch the logs to true logs with the
``SINGULARITY_USE_TRUE_LOG_GRIDDING`` cmake option.
``SINGULARITY_USE_TRUE_LOG_GRIDDING`` cmake option. This may be
desirable on ARM-based architectures (e.g., ``aarch64``), where
a hardware log intrinsic is available.


.. note::
A more performant implementation of fast logs is available, but it
might not be portable. Enable it with the
``SINGULARITY_USE_HIGH_RISK_MATH`` cmake option.
The default implementation of our fast logs assumes little endian
numbers. If you are on a big-endian machine, they will not work
properly. If you encounter a big-endian machine, please report it
to us in the issues and (for now) enable the portable
implementation of fast logs with ``-DSINGULARITY_NQT_PORTABLE=ON``.

.. _Stellar Collapse: https://stellarcollapse.org/equationofstate.html

Expand All @@ -1750,7 +1755,6 @@ return a ``Real`` number.
.. _median filter: https://en.wikipedia.org/wiki/Median_filter



Helmholtz EOS
``````````````

Expand Down
6 changes: 6 additions & 0 deletions example/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ add_executable(get_sound_speed_press
target_link_libraries(get_sound_speed_press PRIVATE
singularity-eos::singularity-eos)

if (SINGULARITY_USE_SPINER_WITH_HDF5)
add_executable(eos_grid eos_grid.cpp)
target_link_libraries(eos_grid PRIVATE
singularity-eos::singularity-eos)
endif()

if(SINGULARITY_USE_EOSPAC AND SINGULARITY_USE_SPINER_WITH_HDF5)
add_executable(get_sesame_state
get_sesame_state.cpp)
Expand Down
Loading
Loading