Skip to content

Commit

Permalink
Merge branch 'develop' into value-alias
Browse files Browse the repository at this point in the history
  • Loading branch information
prckent authored Nov 10, 2023
2 parents 1776046 + 1c37a5b commit 6e86357
Show file tree
Hide file tree
Showing 23 changed files with 329 additions and 280 deletions.
64 changes: 64 additions & 0 deletions src/Numerics/SplineBound.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2023 QMCPACK developers.
//
// File developed by: Ye Luo, [email protected], Argonne National Laboratory
//
// File created by: Ye Luo, [email protected], Argonne National Laboratory
//
//////////////////////////////////////////////////////////////////////////////////////

#ifndef QMCPLUSPLUS_SPLINEBOUND_HPP
#define QMCPLUSPLUS_SPLINEBOUND_HPP

namespace qmcplusplus
{

/** break x into the integer part and residual part and apply bounds
* @param x input coordinate
* @param nmax input upper bound of the integer part
* @param ind output integer part
* @param dx output fractional part
*
* x in the range of [0, nmax+1) will be split correctly.
* x < 0, ind = 0, dx = 0
* x >= nmax+1, ind = nmax, dx = 1 - epsilon
*
* Attention: nmax is not the number grid points but the maximum allowed grid index
* For example, ng is the number of grid point.
* the actual grid points indices are 0, 1, ..., ng - 1.
* In a periodic/anti periodic spline, set nmax = ng - 1
* In a natural boundary spline, set nmax = ng - 2
* because the end point should be excluded and the last grid point has an index ng - 2.
*/
template<typename T, typename TRESIDUAL>
inline void getSplineBound(const T x, const int nmax, int& ind, TRESIDUAL& dx)
{
// lower bound
if (x < 0)
{
ind = 0;
dx = T(0);
}
else
{
#if defined(__INTEL_LLVM_COMPILER) || defined(__INTEL_CLANG_COMPILER)
T ipart = std::floor(x);
dx = x - ipart;
#else
T ipart;
dx = std::modf(x, &ipart);
#endif
ind = static_cast<int>(ipart);
// upper bound
if (ind > nmax)
{
ind = nmax;
dx = T(1) - std::numeric_limits<T>::epsilon();
}
}
}
} // namespace qmcplusplus
#endif
1 change: 1 addition & 0 deletions src/Numerics/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ set(UTEST_SRCS
test_OneDimCubicSplineLinearGrid.cpp
test_one_dim_cubic_spline.cpp
test_Quadrature.cpp
test_SplineBound.cpp
test_RotationMatrix3D.cpp)

# Run gen_gto.py to create these files. They may take a long time to compile.
Expand Down
49 changes: 49 additions & 0 deletions src/Numerics/tests/test_SplineBound.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
//////////////////////////////////////////////////////////////////////////////////////
// This file is distributed under the University of Illinois/NCSA Open Source License.
// See LICENSE file in top directory for details.
//
// Copyright (c) 2018 Jeongnim Kim and QMCPACK developers.
//
// File developed by: Mark Dewing, [email protected], Argonne National Laboratory
//
// File created by: Mark Dewing, [email protected], Argonne National Laboratory
//////////////////////////////////////////////////////////////////////////////////////


#include "catch.hpp"
#include "Numerics/SplineBound.hpp"

namespace qmcplusplus
{
template<typename T>
void test_spline_bounds()
{
T x = 2.2;
T dx;
int ind;
int ng = 10;
getSplineBound(x, ng, ind, dx);
CHECK(dx == Approx(0.2));
REQUIRE(ind == 2);

// check clamping to a maximum index value
x = 10.5;
getSplineBound(x, ng, ind, dx);
CHECK(dx == Approx(0.5));
REQUIRE(ind == 10);

x = 11.5;
getSplineBound(x, ng, ind, dx);
CHECK(dx == Approx(1.0));
REQUIRE(ind == 10);

// check clamping to a zero index value
x = -1.3;
getSplineBound(x, ng, ind, dx);
CHECK(dx == Approx(0.0));
REQUIRE(ind == 0);
}

TEST_CASE("getSplineBound double", "[numerics]") { test_spline_bounds<double>(); }
TEST_CASE("getSplineBound float", "[numerics]") { test_spline_bounds<float>(); }
} // namespace qmcplusplus
41 changes: 23 additions & 18 deletions src/QMCWaveFunctions/Jastrow/BsplineFunctor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,22 +43,25 @@ void BsplineFunctor<REAL>::mw_evaluateVGL(const int iat,
* Bspline coefs device pointer sizeof(T*), DeltaRInv sizeof(T) and cutoff_radius sizeof(T)
* these contents change based on the group of the target particle, so it is prepared per call.
*/
transfer_buffer.resize((sizeof(REAL*) + sizeof(REAL) * 2) * num_groups);
transfer_buffer.resize((sizeof(REAL*) + sizeof(REAL) * 2 + sizeof(int)) * num_groups);
REAL** mw_coefs_ptr = reinterpret_cast<REAL**>(transfer_buffer.data());
REAL* mw_DeltaRInv_ptr = reinterpret_cast<REAL*>(transfer_buffer.data() + sizeof(REAL*) * num_groups);
REAL* mw_cutoff_radius_ptr = mw_DeltaRInv_ptr + num_groups;
int* mw_max_index_ptr = reinterpret_cast<int*>(transfer_buffer.data() + (sizeof(REAL*) + sizeof(REAL) * 2) * num_groups);
for (int ig = 0; ig < num_groups; ig++)
if (functors[ig])
{
mw_coefs_ptr[ig] = functors[ig]->spline_coefs_->device_data();
mw_DeltaRInv_ptr[ig] = functors[ig]->DeltaRInv;
mw_cutoff_radius_ptr[ig] = functors[ig]->cutoff_radius;
mw_max_index_ptr[ig] = functors[ig]->getMaxIndex();
}
else
{
mw_coefs_ptr[ig] = nullptr;
mw_DeltaRInv_ptr[ig] = 0.0;
mw_cutoff_radius_ptr[ig] = 0.0; // important! Prevent spline evaluation to access nullptr.
mw_max_index_ptr[ig] = 0;
}

auto* transfer_buffer_ptr = transfer_buffer.data();
Expand All @@ -84,6 +87,7 @@ void BsplineFunctor<REAL>::mw_evaluateVGL(const int iat,
REAL** mw_coefs = reinterpret_cast<REAL**>(transfer_buffer_ptr);
REAL* mw_DeltaRInv = reinterpret_cast<REAL*>(transfer_buffer_ptr + sizeof(REAL*) * num_groups);
REAL* mw_cutoff_radius = mw_DeltaRInv + num_groups;
int* mw_max_index = reinterpret_cast<int*>(transfer_buffer_ptr + (sizeof(REAL*) + sizeof(REAL) * 2) * num_groups);

REAL* cur_allu = mw_cur_allu + ip * n_padded * 3;

Expand All @@ -96,14 +100,15 @@ void BsplineFunctor<REAL>::mw_evaluateVGL(const int iat,
const REAL* coefs = mw_coefs[ig];
REAL DeltaRInv = mw_DeltaRInv[ig];
REAL cutoff_radius = mw_cutoff_radius[ig];
const int max_index = mw_max_index[ig];

REAL r = dist[j];
REAL u(0);
REAL dudr(0);
REAL d2udr2(0);
if (r < cutoff_radius)
{
u = evaluate_impl(dist[j], coefs, DeltaRInv, dudr, d2udr2);
u = evaluate_impl(r, coefs, DeltaRInv, max_index, dudr, d2udr2);
dudr *= REAL(1) / r;
}
// save u, dudr/r and d2udr2 to cur_allu
Expand Down Expand Up @@ -142,22 +147,25 @@ void BsplineFunctor<REAL>::mw_evaluateV(const int num_groups,
* Bspline coefs device pointer sizeof(REAL*), DeltaRInv sizeof(REAL), cutoff_radius sizeof(REAL)
* these contents change based on the group of the target particle, so it is prepared per call.
*/
transfer_buffer.resize((sizeof(REAL*) + sizeof(REAL) * 2) * num_groups);
transfer_buffer.resize((sizeof(REAL*) + sizeof(REAL) * 2 + sizeof(int)) * num_groups);
REAL** mw_coefs_ptr = reinterpret_cast<REAL**>(transfer_buffer.data());
REAL* mw_DeltaRInv_ptr = reinterpret_cast<REAL*>(transfer_buffer.data() + sizeof(REAL*) * num_groups);
REAL* mw_cutoff_radius_ptr = mw_DeltaRInv_ptr + num_groups;
int* mw_max_index_ptr = reinterpret_cast<int*>(transfer_buffer.data() + (sizeof(REAL*) + sizeof(REAL) * 2) * num_groups);
for (int ig = 0; ig < num_groups; ig++)
if (functors[ig])
{
mw_coefs_ptr[ig] = functors[ig]->spline_coefs_->device_data();
mw_DeltaRInv_ptr[ig] = functors[ig]->DeltaRInv;
mw_cutoff_radius_ptr[ig] = functors[ig]->cutoff_radius;
mw_max_index_ptr[ig] = functors[ig]->getMaxIndex();
}
else
{
mw_coefs_ptr[ig] = nullptr;
mw_DeltaRInv_ptr[ig] = 0.0;
mw_cutoff_radius_ptr[ig] = 0.0; // important! Prevent spline evaluation to access nullptr.
mw_max_index_ptr[ig] = 0;
}

auto* transfer_buffer_ptr = transfer_buffer.data();
Expand All @@ -173,25 +181,19 @@ void BsplineFunctor<REAL>::mw_evaluateV(const int num_groups,
REAL** mw_coefs = reinterpret_cast<REAL**>(transfer_buffer_ptr);
REAL* mw_DeltaRInv = reinterpret_cast<REAL*>(transfer_buffer_ptr + sizeof(REAL*) * num_groups);
REAL* mw_cutoff_radius = mw_DeltaRInv + num_groups;
int* mw_max_index = reinterpret_cast<int*>(transfer_buffer_ptr + (sizeof(REAL*) + sizeof(REAL) * 2) * num_groups);
PRAGMA_OFFLOAD("omp parallel for reduction(+: sum)")
for (int j = 0; j < n_src; j++)
{
const int ig = grp_ids[j];
const REAL* coefs = mw_coefs[ig];
REAL DeltaRInv = mw_DeltaRInv[ig];
REAL cutoff_radius = mw_cutoff_radius[ig];
const int max_index = mw_max_index[ig];

REAL r = dist[j];
if (j != ref_at[ip] && r < cutoff_radius)
{
r *= DeltaRInv;
REAL ipart;
const REAL t = std::modf(r, &ipart);
const int i = (int)ipart;
sum += coefs[i + 0] * (((A0 * t + A1) * t + A2) * t + A3) + coefs[i + 1] * (((A4 * t + A5) * t + A6) * t + A7) +
coefs[i + 2] * (((A8 * t + A9) * t + A10) * t + A11) +
coefs[i + 3] * (((A12 * t + A13) * t + A14) * t + A15);
}
sum += evaluate_impl(r, coefs, DeltaRInv, max_index);
}
mw_vals[ip] = sum;
}
Expand Down Expand Up @@ -221,25 +223,27 @@ void BsplineFunctor<REAL>::mw_updateVGL(const int iat,
* and packed accept list at most nw * sizeof(int)
* these contents change based on the group of the target particle, so it is prepared per call.
*/
transfer_buffer.resize((sizeof(REAL*) + sizeof(REAL) * 2) * num_groups + nw * sizeof(int));
transfer_buffer.resize((sizeof(REAL*) + sizeof(REAL) * 2 + sizeof(int)) * num_groups + nw * sizeof(int));
REAL** mw_coefs_ptr = reinterpret_cast<REAL**>(transfer_buffer.data());
REAL* mw_DeltaRInv_ptr = reinterpret_cast<REAL*>(transfer_buffer.data() + sizeof(REAL*) * num_groups);
REAL* mw_cutoff_radius_ptr = mw_DeltaRInv_ptr + num_groups;
int* accepted_indices =
reinterpret_cast<int*>(transfer_buffer.data() + (sizeof(REAL*) + sizeof(REAL) * 2) * num_groups);
int* mw_max_index_ptr = reinterpret_cast<int*>(transfer_buffer.data() + (sizeof(REAL*) + sizeof(REAL) * 2) * num_groups);
int* accepted_indices = mw_max_index_ptr + num_groups;

for (int ig = 0; ig < num_groups; ig++)
if (functors[ig])
{
mw_coefs_ptr[ig] = functors[ig]->spline_coefs_->device_data();
mw_DeltaRInv_ptr[ig] = functors[ig]->DeltaRInv;
mw_cutoff_radius_ptr[ig] = functors[ig]->cutoff_radius;
mw_max_index_ptr[ig] = functors[ig]->getMaxIndex();
}
else
{
mw_coefs_ptr[ig] = nullptr;
mw_DeltaRInv_ptr[ig] = 0.0;
mw_cutoff_radius_ptr[ig] = 0.0; // important! Prevent spline evaluation to access nullptr.
mw_max_index_ptr[ig] = 0;
}

int nw_accepted = 0;
Expand All @@ -259,8 +263,8 @@ void BsplineFunctor<REAL>::mw_updateVGL(const int iat,
REAL** mw_coefs = reinterpret_cast<REAL**>(transfer_buffer_ptr);
REAL* mw_DeltaRInv = reinterpret_cast<REAL*>(transfer_buffer_ptr + sizeof(REAL*) * num_groups);
REAL* mw_cutoff_radius = mw_DeltaRInv + num_groups;
int* accepted_indices =
reinterpret_cast<int*>(transfer_buffer_ptr + (sizeof(REAL*) + sizeof(REAL) * 2) * num_groups);
int* mw_max_index = reinterpret_cast<int*>(transfer_buffer_ptr + (sizeof(REAL*) + sizeof(REAL) * 2) * num_groups);
int* accepted_indices = mw_max_index + num_groups;
const int ip = accepted_indices[iw];

const REAL* dist_new = mw_dist + ip * dist_stride;
Expand Down Expand Up @@ -290,14 +294,15 @@ void BsplineFunctor<REAL>::mw_updateVGL(const int iat,
const REAL* coefs = mw_coefs[ig];
REAL DeltaRInv = mw_DeltaRInv[ig];
REAL cutoff_radius = mw_cutoff_radius[ig];
const int max_index = mw_max_index[ig];

REAL r = dist_old[j];
REAL u(0);
REAL dudr(0);
REAL d2udr2(0);
if (r < cutoff_radius)
{
u = evaluate_impl(dist_old[j], coefs, DeltaRInv, dudr, d2udr2);
u = evaluate_impl(dist_old[j], coefs, DeltaRInv, max_index, dudr, d2udr2);
dudr *= REAL(1) / r;
}
// update Uat, dUat, d2Uat
Expand Down
Loading

0 comments on commit 6e86357

Please sign in to comment.