Merge branch 'develop' into value-alias

rcclay · Nov 10, 2023 · 6e86357 · 6e86357
2 parents 1776046 + 1c37a5b
commit 6e86357
Show file tree

Hide file tree

Showing 23 changed files with 329 additions and 280 deletions.
diff --git a/src/Numerics/SplineBound.hpp b/src/Numerics/SplineBound.hpp
@@ -0,0 +1,64 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2023 QMCPACK developers.
+//
+// File developed by: Ye Luo, [email protected], Argonne National Laboratory
+//
+// File created by: Ye Luo, [email protected], Argonne National Laboratory
+//
+//////////////////////////////////////////////////////////////////////////////////////
+
+#ifndef QMCPLUSPLUS_SPLINEBOUND_HPP
+#define QMCPLUSPLUS_SPLINEBOUND_HPP
+
+namespace qmcplusplus
+{
+
+/** break x into the integer part and residual part and apply bounds
+ * @param x input coordinate
+ * @param nmax input upper bound of the integer part
+ * @param ind output integer part
+ * @param dx output fractional part
+ *
+ * x in the range of [0, nmax+1) will be split correctly.
+ * x < 0, ind = 0, dx = 0
+ * x >= nmax+1, ind = nmax, dx = 1 - epsilon
+ *
+ * Attention: nmax is not the number grid points but the maximum allowed grid index
+ * For example, ng is the number of grid point.
+ * the actual grid points indices are 0, 1, ..., ng - 1.
+ * In a periodic/anti periodic spline, set nmax = ng - 1
+ * In a natural boundary spline, set nmax = ng - 2
+ * because the end point should be excluded and the last grid point has an index ng - 2.
+ */
+template<typename T, typename TRESIDUAL>
+inline void getSplineBound(const T x, const int nmax, int& ind, TRESIDUAL& dx)
+{
+  // lower bound
+  if (x < 0)
+  {
+    ind = 0;
+    dx  = T(0);
+  }
+  else
+  {
+#if defined(__INTEL_LLVM_COMPILER) || defined(__INTEL_CLANG_COMPILER)
+    T ipart = std::floor(x);
+    dx      = x - ipart;
+#else
+    T ipart;
+    dx = std::modf(x, &ipart);
+#endif
+    ind = static_cast<int>(ipart);
+    // upper bound
+    if (ind > nmax)
+    {
+      ind = nmax;
+      dx  = T(1) - std::numeric_limits<T>::epsilon();
+    }
+  }
+}
+} // namespace qmcplusplus
+#endif
diff --git a/src/Numerics/tests/CMakeLists.txt b/src/Numerics/tests/CMakeLists.txt
@@ -26,6 +26,7 @@ set(UTEST_SRCS
     test_OneDimCubicSplineLinearGrid.cpp
     test_one_dim_cubic_spline.cpp
     test_Quadrature.cpp
+    test_SplineBound.cpp
     test_RotationMatrix3D.cpp)
 
 # Run gen_gto.py to create these files.  They may take a long time to compile.

diff --git a/src/Numerics/tests/test_SplineBound.cpp b/src/Numerics/tests/test_SplineBound.cpp
@@ -0,0 +1,49 @@
+//////////////////////////////////////////////////////////////////////////////////////
+// This file is distributed under the University of Illinois/NCSA Open Source License.
+// See LICENSE file in top directory for details.
+//
+// Copyright (c) 2018 Jeongnim Kim and QMCPACK developers.
+//
+// File developed by: Mark Dewing, [email protected], Argonne National Laboratory
+//
+// File created by: Mark Dewing, [email protected], Argonne National Laboratory
+//////////////////////////////////////////////////////////////////////////////////////
+
+
+#include "catch.hpp"
+#include "Numerics/SplineBound.hpp"
+
+namespace qmcplusplus
+{
+template<typename T>
+void test_spline_bounds()
+{
+  T x = 2.2;
+  T dx;
+  int ind;
+  int ng = 10;
+  getSplineBound(x, ng, ind, dx);
+  CHECK(dx == Approx(0.2));
+  REQUIRE(ind == 2);
+
+  // check clamping to a maximum index value
+  x = 10.5;
+  getSplineBound(x, ng, ind, dx);
+  CHECK(dx == Approx(0.5));
+  REQUIRE(ind == 10);
+
+  x = 11.5;
+  getSplineBound(x, ng, ind, dx);
+  CHECK(dx == Approx(1.0));
+  REQUIRE(ind == 10);
+
+  // check clamping to a zero index value
+  x = -1.3;
+  getSplineBound(x, ng, ind, dx);
+  CHECK(dx == Approx(0.0));
+  REQUIRE(ind == 0);
+}
+
+TEST_CASE("getSplineBound double", "[numerics]") { test_spline_bounds<double>(); }
+TEST_CASE("getSplineBound float", "[numerics]") { test_spline_bounds<float>(); }
+} // namespace qmcplusplus
diff --git a/src/QMCWaveFunctions/Jastrow/BsplineFunctor.cpp b/src/QMCWaveFunctions/Jastrow/BsplineFunctor.cpp
@@ -43,22 +43,25 @@ void BsplineFunctor<REAL>::mw_evaluateVGL(const int iat,
      * Bspline coefs device pointer sizeof(T*), DeltaRInv sizeof(T) and cutoff_radius sizeof(T)
      * these contents change based on the group of the target particle, so it is prepared per call.
      */
-  transfer_buffer.resize((sizeof(REAL*) + sizeof(REAL) * 2) * num_groups);
+  transfer_buffer.resize((sizeof(REAL*) + sizeof(REAL) * 2 + sizeof(int)) * num_groups);
   REAL** mw_coefs_ptr        = reinterpret_cast<REAL**>(transfer_buffer.data());
   REAL* mw_DeltaRInv_ptr     = reinterpret_cast<REAL*>(transfer_buffer.data() + sizeof(REAL*) * num_groups);
   REAL* mw_cutoff_radius_ptr = mw_DeltaRInv_ptr + num_groups;
+  int* mw_max_index_ptr      = reinterpret_cast<int*>(transfer_buffer.data() + (sizeof(REAL*) + sizeof(REAL) * 2) * num_groups);
   for (int ig = 0; ig < num_groups; ig++)
     if (functors[ig])
     {
       mw_coefs_ptr[ig]         = functors[ig]->spline_coefs_->device_data();
       mw_DeltaRInv_ptr[ig]     = functors[ig]->DeltaRInv;
       mw_cutoff_radius_ptr[ig] = functors[ig]->cutoff_radius;
+      mw_max_index_ptr[ig]     = functors[ig]->getMaxIndex();
     }
     else
     {
       mw_coefs_ptr[ig]         = nullptr;
       mw_DeltaRInv_ptr[ig]     = 0.0;
       mw_cutoff_radius_ptr[ig] = 0.0; // important! Prevent spline evaluation to access nullptr.
+      mw_max_index_ptr[ig]     = 0;
     }
 
   auto* transfer_buffer_ptr = transfer_buffer.data();
@@ -84,6 +87,7 @@ void BsplineFunctor<REAL>::mw_evaluateVGL(const int iat,
     REAL** mw_coefs        = reinterpret_cast<REAL**>(transfer_buffer_ptr);
     REAL* mw_DeltaRInv     = reinterpret_cast<REAL*>(transfer_buffer_ptr + sizeof(REAL*) * num_groups);
     REAL* mw_cutoff_radius = mw_DeltaRInv + num_groups;
+    int* mw_max_index      = reinterpret_cast<int*>(transfer_buffer_ptr + (sizeof(REAL*) + sizeof(REAL) * 2) * num_groups);
 
     REAL* cur_allu = mw_cur_allu + ip * n_padded * 3;
 
@@ -96,14 +100,15 @@ void BsplineFunctor<REAL>::mw_evaluateVGL(const int iat,
       const REAL* coefs  = mw_coefs[ig];
       REAL DeltaRInv     = mw_DeltaRInv[ig];
       REAL cutoff_radius = mw_cutoff_radius[ig];
+      const int max_index  = mw_max_index[ig];
 
       REAL r = dist[j];
       REAL u(0);
       REAL dudr(0);
       REAL d2udr2(0);
       if (r < cutoff_radius)
       {
-        u = evaluate_impl(dist[j], coefs, DeltaRInv, dudr, d2udr2);
+        u = evaluate_impl(r, coefs, DeltaRInv, max_index, dudr, d2udr2);
         dudr *= REAL(1) / r;
       }
       // save u, dudr/r and d2udr2 to cur_allu
@@ -142,22 +147,25 @@ void BsplineFunctor<REAL>::mw_evaluateV(const int num_groups,
      * Bspline coefs device pointer sizeof(REAL*), DeltaRInv sizeof(REAL), cutoff_radius sizeof(REAL)
      * these contents change based on the group of the target particle, so it is prepared per call.
      */
-  transfer_buffer.resize((sizeof(REAL*) + sizeof(REAL) * 2) * num_groups);
+  transfer_buffer.resize((sizeof(REAL*) + sizeof(REAL) * 2 + sizeof(int)) * num_groups);
   REAL** mw_coefs_ptr        = reinterpret_cast<REAL**>(transfer_buffer.data());
   REAL* mw_DeltaRInv_ptr     = reinterpret_cast<REAL*>(transfer_buffer.data() + sizeof(REAL*) * num_groups);
   REAL* mw_cutoff_radius_ptr = mw_DeltaRInv_ptr + num_groups;
+  int* mw_max_index_ptr      = reinterpret_cast<int*>(transfer_buffer.data() + (sizeof(REAL*) + sizeof(REAL) * 2) * num_groups);
   for (int ig = 0; ig < num_groups; ig++)
     if (functors[ig])
     {
       mw_coefs_ptr[ig]         = functors[ig]->spline_coefs_->device_data();
       mw_DeltaRInv_ptr[ig]     = functors[ig]->DeltaRInv;
       mw_cutoff_radius_ptr[ig] = functors[ig]->cutoff_radius;
+      mw_max_index_ptr[ig]     = functors[ig]->getMaxIndex();
     }
     else
     {
       mw_coefs_ptr[ig]         = nullptr;
       mw_DeltaRInv_ptr[ig]     = 0.0;
       mw_cutoff_radius_ptr[ig] = 0.0; // important! Prevent spline evaluation to access nullptr.
+      mw_max_index_ptr[ig]     = 0;
     }
 
   auto* transfer_buffer_ptr = transfer_buffer.data();
@@ -173,25 +181,19 @@ void BsplineFunctor<REAL>::mw_evaluateV(const int num_groups,
     REAL** mw_coefs        = reinterpret_cast<REAL**>(transfer_buffer_ptr);
     REAL* mw_DeltaRInv     = reinterpret_cast<REAL*>(transfer_buffer_ptr + sizeof(REAL*) * num_groups);
     REAL* mw_cutoff_radius = mw_DeltaRInv + num_groups;
+    int* mw_max_index      = reinterpret_cast<int*>(transfer_buffer_ptr + (sizeof(REAL*) + sizeof(REAL) * 2) * num_groups);
     PRAGMA_OFFLOAD("omp parallel for reduction(+: sum)")
     for (int j = 0; j < n_src; j++)
     {
       const int ig       = grp_ids[j];
       const REAL* coefs  = mw_coefs[ig];
       REAL DeltaRInv     = mw_DeltaRInv[ig];
       REAL cutoff_radius = mw_cutoff_radius[ig];
+      const int max_index  = mw_max_index[ig];
 
       REAL r = dist[j];
       if (j != ref_at[ip] && r < cutoff_radius)
-      {
-        r *= DeltaRInv;
-        REAL ipart;
-        const REAL t = std::modf(r, &ipart);
-        const int i  = (int)ipart;
-        sum += coefs[i + 0] * (((A0 * t + A1) * t + A2) * t + A3) + coefs[i + 1] * (((A4 * t + A5) * t + A6) * t + A7) +
-            coefs[i + 2] * (((A8 * t + A9) * t + A10) * t + A11) +
-            coefs[i + 3] * (((A12 * t + A13) * t + A14) * t + A15);
-      }
+        sum += evaluate_impl(r, coefs, DeltaRInv, max_index);
     }
     mw_vals[ip] = sum;
   }
@@ -221,25 +223,27 @@ void BsplineFunctor<REAL>::mw_updateVGL(const int iat,
      * and packed accept list at most nw * sizeof(int)
      * these contents change based on the group of the target particle, so it is prepared per call.
      */
-  transfer_buffer.resize((sizeof(REAL*) + sizeof(REAL) * 2) * num_groups + nw * sizeof(int));
+  transfer_buffer.resize((sizeof(REAL*) + sizeof(REAL) * 2 + sizeof(int)) * num_groups + nw * sizeof(int));
   REAL** mw_coefs_ptr        = reinterpret_cast<REAL**>(transfer_buffer.data());
   REAL* mw_DeltaRInv_ptr     = reinterpret_cast<REAL*>(transfer_buffer.data() + sizeof(REAL*) * num_groups);
   REAL* mw_cutoff_radius_ptr = mw_DeltaRInv_ptr + num_groups;
-  int* accepted_indices =
-      reinterpret_cast<int*>(transfer_buffer.data() + (sizeof(REAL*) + sizeof(REAL) * 2) * num_groups);
+  int* mw_max_index_ptr      = reinterpret_cast<int*>(transfer_buffer.data() + (sizeof(REAL*) + sizeof(REAL) * 2) * num_groups);
+  int* accepted_indices  = mw_max_index_ptr + num_groups;
 
   for (int ig = 0; ig < num_groups; ig++)
     if (functors[ig])
     {
       mw_coefs_ptr[ig]         = functors[ig]->spline_coefs_->device_data();
       mw_DeltaRInv_ptr[ig]     = functors[ig]->DeltaRInv;
       mw_cutoff_radius_ptr[ig] = functors[ig]->cutoff_radius;
+      mw_max_index_ptr[ig]     = functors[ig]->getMaxIndex();
     }
     else
     {
       mw_coefs_ptr[ig]         = nullptr;
       mw_DeltaRInv_ptr[ig]     = 0.0;
       mw_cutoff_radius_ptr[ig] = 0.0; // important! Prevent spline evaluation to access nullptr.
+      mw_max_index_ptr[ig]     = 0;
     }
 
   int nw_accepted = 0;
@@ -259,8 +263,8 @@ void BsplineFunctor<REAL>::mw_updateVGL(const int iat,
     REAL** mw_coefs        = reinterpret_cast<REAL**>(transfer_buffer_ptr);
     REAL* mw_DeltaRInv     = reinterpret_cast<REAL*>(transfer_buffer_ptr + sizeof(REAL*) * num_groups);
     REAL* mw_cutoff_radius = mw_DeltaRInv + num_groups;
-    int* accepted_indices =
-        reinterpret_cast<int*>(transfer_buffer_ptr + (sizeof(REAL*) + sizeof(REAL) * 2) * num_groups);
+    int* mw_max_index      = reinterpret_cast<int*>(transfer_buffer_ptr + (sizeof(REAL*) + sizeof(REAL) * 2) * num_groups);
+    int* accepted_indices  = mw_max_index + num_groups;
     const int ip = accepted_indices[iw];
 
     const REAL* dist_new   = mw_dist + ip * dist_stride;
@@ -290,14 +294,15 @@ void BsplineFunctor<REAL>::mw_updateVGL(const int iat,
       const REAL* coefs  = mw_coefs[ig];
       REAL DeltaRInv     = mw_DeltaRInv[ig];
       REAL cutoff_radius = mw_cutoff_radius[ig];
+      const int max_index  = mw_max_index[ig];
 
       REAL r = dist_old[j];
       REAL u(0);
       REAL dudr(0);
       REAL d2udr2(0);
       if (r < cutoff_radius)
       {
-        u = evaluate_impl(dist_old[j], coefs, DeltaRInv, dudr, d2udr2);
+        u = evaluate_impl(dist_old[j], coefs, DeltaRInv, max_index, dudr, d2udr2);
         dudr *= REAL(1) / r;
       }
       // update Uat, dUat, d2Uat