diff --git a/src/Numerics/SplineBound.hpp b/src/Numerics/SplineBound.hpp new file mode 100644 index 0000000000..15e9bac7c7 --- /dev/null +++ b/src/Numerics/SplineBound.hpp @@ -0,0 +1,64 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2023 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +////////////////////////////////////////////////////////////////////////////////////// + +#ifndef QMCPLUSPLUS_SPLINEBOUND_HPP +#define QMCPLUSPLUS_SPLINEBOUND_HPP + +namespace qmcplusplus +{ + +/** break x into the integer part and residual part and apply bounds + * @param x input coordinate + * @param nmax input upper bound of the integer part + * @param ind output integer part + * @param dx output fractional part + * + * x in the range of [0, nmax+1) will be split correctly. + * x < 0, ind = 0, dx = 0 + * x >= nmax+1, ind = nmax, dx = 1 - epsilon + * + * Attention: nmax is not the number grid points but the maximum allowed grid index + * For example, ng is the number of grid point. + * the actual grid points indices are 0, 1, ..., ng - 1. + * In a periodic/anti periodic spline, set nmax = ng - 1 + * In a natural boundary spline, set nmax = ng - 2 + * because the end point should be excluded and the last grid point has an index ng - 2. + */ +template +inline void getSplineBound(const T x, const int nmax, int& ind, TRESIDUAL& dx) +{ + // lower bound + if (x < 0) + { + ind = 0; + dx = T(0); + } + else + { +#if defined(__INTEL_LLVM_COMPILER) || defined(__INTEL_CLANG_COMPILER) + T ipart = std::floor(x); + dx = x - ipart; +#else + T ipart; + dx = std::modf(x, &ipart); +#endif + ind = static_cast(ipart); + // upper bound + if (ind > nmax) + { + ind = nmax; + dx = T(1) - std::numeric_limits::epsilon(); + } + } +} +} // namespace qmcplusplus +#endif diff --git a/src/Numerics/tests/CMakeLists.txt b/src/Numerics/tests/CMakeLists.txt index d7949ef275..ea19dfa549 100644 --- a/src/Numerics/tests/CMakeLists.txt +++ b/src/Numerics/tests/CMakeLists.txt @@ -26,6 +26,7 @@ set(UTEST_SRCS test_OneDimCubicSplineLinearGrid.cpp test_one_dim_cubic_spline.cpp test_Quadrature.cpp + test_SplineBound.cpp test_RotationMatrix3D.cpp) # Run gen_gto.py to create these files. They may take a long time to compile. diff --git a/src/Numerics/tests/test_SplineBound.cpp b/src/Numerics/tests/test_SplineBound.cpp new file mode 100644 index 0000000000..6afd540030 --- /dev/null +++ b/src/Numerics/tests/test_SplineBound.cpp @@ -0,0 +1,49 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2018 Jeongnim Kim and QMCPACK developers. +// +// File developed by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory +// +// File created by: Mark Dewing, mdewing@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + + +#include "catch.hpp" +#include "Numerics/SplineBound.hpp" + +namespace qmcplusplus +{ +template +void test_spline_bounds() +{ + T x = 2.2; + T dx; + int ind; + int ng = 10; + getSplineBound(x, ng, ind, dx); + CHECK(dx == Approx(0.2)); + REQUIRE(ind == 2); + + // check clamping to a maximum index value + x = 10.5; + getSplineBound(x, ng, ind, dx); + CHECK(dx == Approx(0.5)); + REQUIRE(ind == 10); + + x = 11.5; + getSplineBound(x, ng, ind, dx); + CHECK(dx == Approx(1.0)); + REQUIRE(ind == 10); + + // check clamping to a zero index value + x = -1.3; + getSplineBound(x, ng, ind, dx); + CHECK(dx == Approx(0.0)); + REQUIRE(ind == 0); +} + +TEST_CASE("getSplineBound double", "[numerics]") { test_spline_bounds(); } +TEST_CASE("getSplineBound float", "[numerics]") { test_spline_bounds(); } +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/Jastrow/BsplineFunctor.cpp b/src/QMCWaveFunctions/Jastrow/BsplineFunctor.cpp index b809b0301e..f71d346ac7 100644 --- a/src/QMCWaveFunctions/Jastrow/BsplineFunctor.cpp +++ b/src/QMCWaveFunctions/Jastrow/BsplineFunctor.cpp @@ -43,22 +43,25 @@ void BsplineFunctor::mw_evaluateVGL(const int iat, * Bspline coefs device pointer sizeof(T*), DeltaRInv sizeof(T) and cutoff_radius sizeof(T) * these contents change based on the group of the target particle, so it is prepared per call. */ - transfer_buffer.resize((sizeof(REAL*) + sizeof(REAL) * 2) * num_groups); + transfer_buffer.resize((sizeof(REAL*) + sizeof(REAL) * 2 + sizeof(int)) * num_groups); REAL** mw_coefs_ptr = reinterpret_cast(transfer_buffer.data()); REAL* mw_DeltaRInv_ptr = reinterpret_cast(transfer_buffer.data() + sizeof(REAL*) * num_groups); REAL* mw_cutoff_radius_ptr = mw_DeltaRInv_ptr + num_groups; + int* mw_max_index_ptr = reinterpret_cast(transfer_buffer.data() + (sizeof(REAL*) + sizeof(REAL) * 2) * num_groups); for (int ig = 0; ig < num_groups; ig++) if (functors[ig]) { mw_coefs_ptr[ig] = functors[ig]->spline_coefs_->device_data(); mw_DeltaRInv_ptr[ig] = functors[ig]->DeltaRInv; mw_cutoff_radius_ptr[ig] = functors[ig]->cutoff_radius; + mw_max_index_ptr[ig] = functors[ig]->getMaxIndex(); } else { mw_coefs_ptr[ig] = nullptr; mw_DeltaRInv_ptr[ig] = 0.0; mw_cutoff_radius_ptr[ig] = 0.0; // important! Prevent spline evaluation to access nullptr. + mw_max_index_ptr[ig] = 0; } auto* transfer_buffer_ptr = transfer_buffer.data(); @@ -84,6 +87,7 @@ void BsplineFunctor::mw_evaluateVGL(const int iat, REAL** mw_coefs = reinterpret_cast(transfer_buffer_ptr); REAL* mw_DeltaRInv = reinterpret_cast(transfer_buffer_ptr + sizeof(REAL*) * num_groups); REAL* mw_cutoff_radius = mw_DeltaRInv + num_groups; + int* mw_max_index = reinterpret_cast(transfer_buffer_ptr + (sizeof(REAL*) + sizeof(REAL) * 2) * num_groups); REAL* cur_allu = mw_cur_allu + ip * n_padded * 3; @@ -96,6 +100,7 @@ void BsplineFunctor::mw_evaluateVGL(const int iat, const REAL* coefs = mw_coefs[ig]; REAL DeltaRInv = mw_DeltaRInv[ig]; REAL cutoff_radius = mw_cutoff_radius[ig]; + const int max_index = mw_max_index[ig]; REAL r = dist[j]; REAL u(0); @@ -103,7 +108,7 @@ void BsplineFunctor::mw_evaluateVGL(const int iat, REAL d2udr2(0); if (r < cutoff_radius) { - u = evaluate_impl(dist[j], coefs, DeltaRInv, dudr, d2udr2); + u = evaluate_impl(r, coefs, DeltaRInv, max_index, dudr, d2udr2); dudr *= REAL(1) / r; } // save u, dudr/r and d2udr2 to cur_allu @@ -142,22 +147,25 @@ void BsplineFunctor::mw_evaluateV(const int num_groups, * Bspline coefs device pointer sizeof(REAL*), DeltaRInv sizeof(REAL), cutoff_radius sizeof(REAL) * these contents change based on the group of the target particle, so it is prepared per call. */ - transfer_buffer.resize((sizeof(REAL*) + sizeof(REAL) * 2) * num_groups); + transfer_buffer.resize((sizeof(REAL*) + sizeof(REAL) * 2 + sizeof(int)) * num_groups); REAL** mw_coefs_ptr = reinterpret_cast(transfer_buffer.data()); REAL* mw_DeltaRInv_ptr = reinterpret_cast(transfer_buffer.data() + sizeof(REAL*) * num_groups); REAL* mw_cutoff_radius_ptr = mw_DeltaRInv_ptr + num_groups; + int* mw_max_index_ptr = reinterpret_cast(transfer_buffer.data() + (sizeof(REAL*) + sizeof(REAL) * 2) * num_groups); for (int ig = 0; ig < num_groups; ig++) if (functors[ig]) { mw_coefs_ptr[ig] = functors[ig]->spline_coefs_->device_data(); mw_DeltaRInv_ptr[ig] = functors[ig]->DeltaRInv; mw_cutoff_radius_ptr[ig] = functors[ig]->cutoff_radius; + mw_max_index_ptr[ig] = functors[ig]->getMaxIndex(); } else { mw_coefs_ptr[ig] = nullptr; mw_DeltaRInv_ptr[ig] = 0.0; mw_cutoff_radius_ptr[ig] = 0.0; // important! Prevent spline evaluation to access nullptr. + mw_max_index_ptr[ig] = 0; } auto* transfer_buffer_ptr = transfer_buffer.data(); @@ -173,6 +181,7 @@ void BsplineFunctor::mw_evaluateV(const int num_groups, REAL** mw_coefs = reinterpret_cast(transfer_buffer_ptr); REAL* mw_DeltaRInv = reinterpret_cast(transfer_buffer_ptr + sizeof(REAL*) * num_groups); REAL* mw_cutoff_radius = mw_DeltaRInv + num_groups; + int* mw_max_index = reinterpret_cast(transfer_buffer_ptr + (sizeof(REAL*) + sizeof(REAL) * 2) * num_groups); PRAGMA_OFFLOAD("omp parallel for reduction(+: sum)") for (int j = 0; j < n_src; j++) { @@ -180,18 +189,11 @@ void BsplineFunctor::mw_evaluateV(const int num_groups, const REAL* coefs = mw_coefs[ig]; REAL DeltaRInv = mw_DeltaRInv[ig]; REAL cutoff_radius = mw_cutoff_radius[ig]; + const int max_index = mw_max_index[ig]; REAL r = dist[j]; if (j != ref_at[ip] && r < cutoff_radius) - { - r *= DeltaRInv; - REAL ipart; - const REAL t = std::modf(r, &ipart); - const int i = (int)ipart; - sum += coefs[i + 0] * (((A0 * t + A1) * t + A2) * t + A3) + coefs[i + 1] * (((A4 * t + A5) * t + A6) * t + A7) + - coefs[i + 2] * (((A8 * t + A9) * t + A10) * t + A11) + - coefs[i + 3] * (((A12 * t + A13) * t + A14) * t + A15); - } + sum += evaluate_impl(r, coefs, DeltaRInv, max_index); } mw_vals[ip] = sum; } @@ -221,12 +223,12 @@ void BsplineFunctor::mw_updateVGL(const int iat, * and packed accept list at most nw * sizeof(int) * these contents change based on the group of the target particle, so it is prepared per call. */ - transfer_buffer.resize((sizeof(REAL*) + sizeof(REAL) * 2) * num_groups + nw * sizeof(int)); + transfer_buffer.resize((sizeof(REAL*) + sizeof(REAL) * 2 + sizeof(int)) * num_groups + nw * sizeof(int)); REAL** mw_coefs_ptr = reinterpret_cast(transfer_buffer.data()); REAL* mw_DeltaRInv_ptr = reinterpret_cast(transfer_buffer.data() + sizeof(REAL*) * num_groups); REAL* mw_cutoff_radius_ptr = mw_DeltaRInv_ptr + num_groups; - int* accepted_indices = - reinterpret_cast(transfer_buffer.data() + (sizeof(REAL*) + sizeof(REAL) * 2) * num_groups); + int* mw_max_index_ptr = reinterpret_cast(transfer_buffer.data() + (sizeof(REAL*) + sizeof(REAL) * 2) * num_groups); + int* accepted_indices = mw_max_index_ptr + num_groups; for (int ig = 0; ig < num_groups; ig++) if (functors[ig]) @@ -234,12 +236,14 @@ void BsplineFunctor::mw_updateVGL(const int iat, mw_coefs_ptr[ig] = functors[ig]->spline_coefs_->device_data(); mw_DeltaRInv_ptr[ig] = functors[ig]->DeltaRInv; mw_cutoff_radius_ptr[ig] = functors[ig]->cutoff_radius; + mw_max_index_ptr[ig] = functors[ig]->getMaxIndex(); } else { mw_coefs_ptr[ig] = nullptr; mw_DeltaRInv_ptr[ig] = 0.0; mw_cutoff_radius_ptr[ig] = 0.0; // important! Prevent spline evaluation to access nullptr. + mw_max_index_ptr[ig] = 0; } int nw_accepted = 0; @@ -259,8 +263,8 @@ void BsplineFunctor::mw_updateVGL(const int iat, REAL** mw_coefs = reinterpret_cast(transfer_buffer_ptr); REAL* mw_DeltaRInv = reinterpret_cast(transfer_buffer_ptr + sizeof(REAL*) * num_groups); REAL* mw_cutoff_radius = mw_DeltaRInv + num_groups; - int* accepted_indices = - reinterpret_cast(transfer_buffer_ptr + (sizeof(REAL*) + sizeof(REAL) * 2) * num_groups); + int* mw_max_index = reinterpret_cast(transfer_buffer_ptr + (sizeof(REAL*) + sizeof(REAL) * 2) * num_groups); + int* accepted_indices = mw_max_index + num_groups; const int ip = accepted_indices[iw]; const REAL* dist_new = mw_dist + ip * dist_stride; @@ -290,6 +294,7 @@ void BsplineFunctor::mw_updateVGL(const int iat, const REAL* coefs = mw_coefs[ig]; REAL DeltaRInv = mw_DeltaRInv[ig]; REAL cutoff_radius = mw_cutoff_radius[ig]; + const int max_index = mw_max_index[ig]; REAL r = dist_old[j]; REAL u(0); @@ -297,7 +302,7 @@ void BsplineFunctor::mw_updateVGL(const int iat, REAL d2udr2(0); if (r < cutoff_radius) { - u = evaluate_impl(dist_old[j], coefs, DeltaRInv, dudr, d2udr2); + u = evaluate_impl(dist_old[j], coefs, DeltaRInv, max_index, dudr, d2udr2); dudr *= REAL(1) / r; } // update Uat, dUat, d2Uat diff --git a/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h b/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h index 2ba85f7923..5f323e0463 100644 --- a/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h +++ b/src/QMCWaveFunctions/Jastrow/BsplineFunctor.h @@ -30,6 +30,7 @@ #include "OhmmsPETE/OhmmsVector.h" #include "Numerics/LinearFit.h" #include "OMPTarget/OffloadAlignedAllocators.hpp" +#include "Numerics/SplineBound.hpp" namespace qmcplusplus { @@ -94,6 +95,9 @@ struct BsplineFunctor : public OptimizableFunctorBase void setPeriodic(bool p) override { periodic = p; } + /// return the max allowed beginning index to access spline coefficients + int getMaxIndex() const { return spline_coefs_->size() - 4; } + void resize(int n) { NumParams = n; @@ -217,12 +221,12 @@ struct BsplineFunctor : public OptimizableFunctorBase REAL* mw_vals, Vector>& transfer_buffer); - inline static Real evaluate_impl(Real r, const Real* coefs, const Real DeltaRInv) + inline static Real evaluate_impl(Real r, const Real* coefs, const Real DeltaRInv, const int max_index) { r *= DeltaRInv; - REAL ipart; - const REAL t = std::modf(r, &ipart); - const int i = (int)ipart; + int i; + Real t; + getSplineBound(r, max_index, i, t); Real sCoef0 = coefs[i + 0]; Real sCoef1 = coefs[i + 1]; @@ -237,7 +241,7 @@ struct BsplineFunctor : public OptimizableFunctorBase { Real u(0); if (r < cutoff_radius) - u = evaluate_impl(r, spline_coefs_->data(), DeltaRInv); + u = evaluate_impl(r, spline_coefs_->data(), DeltaRInv, getMaxIndex()); return u; } @@ -245,12 +249,12 @@ struct BsplineFunctor : public OptimizableFunctorBase inline void evaluateAll(Real r, Real rinv) { Y = evaluate(r, dY, d2Y); } - inline static Real evaluate_impl(Real r, const Real* coefs, const Real DeltaRInv, Real& dudr, Real& d2udr2) + inline static Real evaluate_impl(Real r, const Real* coefs, const Real DeltaRInv, const int max_index, Real& dudr, Real& d2udr2) { r *= DeltaRInv; - REAL ipart; - const REAL t = std::modf(r, &ipart); - const int i = (int)ipart; + int i; + Real t; + getSplineBound(r, max_index, i, t); Real sCoef0 = coefs[i + 0]; Real sCoef1 = coefs[i + 1]; @@ -277,7 +281,7 @@ struct BsplineFunctor : public OptimizableFunctorBase d2udr2 = Real(0); if (r < cutoff_radius) - u = evaluate_impl(r, spline_coefs_->data(), DeltaRInv, dudr, d2udr2); + u = evaluate_impl(r, spline_coefs_->data(), DeltaRInv, getMaxIndex(), dudr, d2udr2); return u; } @@ -297,9 +301,9 @@ struct BsplineFunctor : public OptimizableFunctorBase // -2.0*evaluate(r-0.5*eps) // +1.0*evaluate(r-1.0*eps))/(eps*eps*eps); r *= DeltaRInv; - Real ipart, t; - t = std::modf(r, &ipart); - int i = (int)ipart; + int i; + Real t; + getSplineBound(r, getMaxIndex(), i, t); Real tp[4]; tp[0] = t * t * t; tp[1] = t * t; @@ -374,16 +378,15 @@ struct BsplineFunctor : public OptimizableFunctorBase if (r >= cutoff_radius) return false; r *= DeltaRInv; - Real ipart, t; - t = std::modf(r, &ipart); - int i = (int)ipart; + int i; + Real t; + getSplineBound(r, getMaxIndex(), i, t); Real tp[4]; tp[0] = t * t * t; tp[1] = t * t; tp[2] = t; tp[3] = 1.0; - auto& coefs = *spline_coefs_; SplineDerivs[0] = TinyVector(0.0); // d/dp_i u(r) SplineDerivs[i + 0][0] = A0 * tp[0] + A1 * tp[1] + A2 * tp[2] + A3 * tp[3]; @@ -440,8 +443,11 @@ struct BsplineFunctor : public OptimizableFunctorBase { if (r >= cutoff_radius) return false; - Real tp[4], v[4], ipart, t; - t = std::modf(r * DeltaRInv, &ipart); + r *= DeltaRInv; + int i; + Real t; + getSplineBound(r, getMaxIndex(), i, t); + Real tp[4], v[4]; tp[0] = t * t * t; tp[1] = t * t; tp[2] = t; @@ -450,7 +456,6 @@ struct BsplineFunctor : public OptimizableFunctorBase v[1] = A4 * tp[0] + A5 * tp[1] + A6 * tp[2] + A7 * tp[3]; v[2] = A8 * tp[0] + A9 * tp[1] + A10 * tp[2] + A11 * tp[3]; v[3] = A12 * tp[0] + A13 * tp[1] + A14 * tp[2] + A15 * tp[3]; - int i = (int)ipart; int imin = std::max(i, 1); int imax = std::min(i + 4, NumParams + 1) - 1; int n = imin - 1, j = imin - i; @@ -738,13 +743,15 @@ inline REAL BsplineFunctor::evaluateV(const int iat, Real d = 0.0; auto& coefs = *spline_coefs_; + const int max_index = getMaxIndex(); #pragma omp simd reduction(+ : d) for (int jat = 0; jat < iCount; jat++) { Real r = distArrayCompressed[jat]; r *= DeltaRInv; - const int i = (int)r; - const Real t = r - Real(i); + int i; + Real t; + getSplineBound(r, max_index, i, t); Real d1 = coefs[i + 0] * (((A0 * t + A1) * t + A2) * t + A3); Real d2 = coefs[i + 1] * (((A4 * t + A5) * t + A6) * t + A7); Real d3 = coefs[i + 2] * (((A8 * t + A9) * t + A10) * t + A11); @@ -792,6 +799,7 @@ inline void BsplineFunctor::evaluateVGL(const int iat, } auto& coefs = *spline_coefs_; + const int max_index = getMaxIndex(); #pragma omp simd for (int j = 0; j < iCount; j++) { @@ -799,8 +807,9 @@ inline void BsplineFunctor::evaluateVGL(const int iat, int iScatter = distIndices[j]; Real rinv = cOne / r; r *= DeltaRInv; - const int iGather = (int)r; - const Real t = r - Real(iGather); + int iGather; + Real t; + getSplineBound(r, max_index, iGather, t); Real sCoef0 = coefs[iGather + 0]; Real sCoef1 = coefs[iGather + 1]; diff --git a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.cpp b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.cpp index 588f323eff..6b556c40bb 100644 --- a/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.cpp +++ b/src/QMCWaveFunctions/LCAO/CuspCorrectionConstruction.cpp @@ -580,11 +580,11 @@ void applyCuspCorrection(const Matrix& info, ScopedTimer cuspApplyTimerWrapper(cuspApplyTimer); - LCAOrbitalSet phi("phi", std::unique_ptr(lcao.myBasisSet->makeClone())); - phi.setOrbitalSetSize(lcao.getOrbitalSetSize()); + LCAOrbitalSet phi("phi", std::unique_ptr(lcao.myBasisSet->makeClone()), + lcao.getOrbitalSetSize(), lcao.isIdentity()); - LCAOrbitalSet eta("eta", std::unique_ptr(lcao.myBasisSet->makeClone())); - eta.setOrbitalSetSize(lcao.getOrbitalSetSize()); + LCAOrbitalSet eta("eta", std::unique_ptr(lcao.myBasisSet->makeClone()), + lcao.getOrbitalSetSize(), lcao.isIdentity()); std::vector corrCenter(num_centers, "true"); @@ -675,12 +675,11 @@ void generateCuspInfo(Matrix& info, ScopedTimer createCuspTimerWrapper(cuspCreateTimer); - LCAOrbitalSet phi("phi", std::unique_ptr(lcao.myBasisSet->makeClone())); - phi.setOrbitalSetSize(lcao.getOrbitalSetSize()); - - LCAOrbitalSet eta("eta", std::unique_ptr(lcao.myBasisSet->makeClone())); - eta.setOrbitalSetSize(lcao.getOrbitalSetSize()); + LCAOrbitalSet phi("phi", std::unique_ptr(lcao.myBasisSet->makeClone()), + lcao.getOrbitalSetSize(), lcao.isIdentity()); + LCAOrbitalSet eta("eta", std::unique_ptr(lcao.myBasisSet->makeClone()), + lcao.getOrbitalSetSize(), lcao.isIdentity()); std::vector corrCenter(num_centers, "true"); @@ -705,11 +704,11 @@ void generateCuspInfo(Matrix& info, ParticleSet localTargetPtcl(targetPtcl); ParticleSet localSourcePtcl(sourcePtcl); - LCAOrbitalSet local_phi("local_phi", std::unique_ptr(phi.myBasisSet->makeClone())); - local_phi.setOrbitalSetSize(phi.getOrbitalSetSize()); + LCAOrbitalSet local_phi("local_phi", std::unique_ptr(phi.myBasisSet->makeClone()), + phi.getOrbitalSetSize(), phi.isIdentity()); - LCAOrbitalSet local_eta("local_eta", std::unique_ptr(eta.myBasisSet->makeClone())); - local_eta.setOrbitalSetSize(eta.getOrbitalSetSize()); + LCAOrbitalSet local_eta("local_eta", std::unique_ptr(eta.myBasisSet->makeClone()), + eta.getOrbitalSetSize(), eta.isIdentity()); #pragma omp critical app_log() << " Working on MO: " << mo_idx << " Center: " << center_idx << std::endl; diff --git a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilder.cpp b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilder.cpp index 8906c0f42c..4f0a395e57 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilder.cpp +++ b/src/QMCWaveFunctions/LCAO/LCAOSpinorBuilder.cpp @@ -32,10 +32,12 @@ std::unique_ptr LCAOSpinorBuilder::createSPOSetFromXML(xmlNodePtr cur) ReportEngine PRE(ClassName, "createSPO(xmlNodePtr)"); std::string spo_name(""), optimize("no"); std::string basisset_name("LCAOBSet"); + size_t norbs(0); OhmmsAttributeSet spoAttrib; spoAttrib.add(spo_name, "name"); spoAttrib.add(optimize, "optimize"); spoAttrib.add(basisset_name, "basisset"); + spoAttrib.add(norbs, "size"); spoAttrib.put(cur); BasisSet_t* myBasisSet = nullptr; @@ -48,10 +50,11 @@ std::unique_ptr LCAOSpinorBuilder::createSPOSetFromXML(xmlNodePtr cur) app_log() << " SPOSet " << spo_name << " is optimizable\n"; std::unique_ptr upspo = - std::make_unique(spo_name + "_up", std::unique_ptr(myBasisSet->makeClone())); + std::make_unique(spo_name + "_up", std::unique_ptr(myBasisSet->makeClone()), norbs, + false); std::unique_ptr dnspo = - std::make_unique(spo_name + "_dn", std::unique_ptr(myBasisSet->makeClone())); - + std::make_unique(spo_name + "_dn", std::unique_ptr(myBasisSet->makeClone()), norbs, + false); loadMO(*upspo, *dnspo, cur); //create spinor and register up/dn @@ -63,16 +66,11 @@ std::unique_ptr LCAOSpinorBuilder::createSPOSetFromXML(xmlNodePtr cur) bool LCAOSpinorBuilder::loadMO(LCAOrbitalSet& up, LCAOrbitalSet& dn, xmlNodePtr cur) { bool PBC = false; - int norb = up.getBasisSetSize(); std::string debugc("no"); OhmmsAttributeSet aAttrib; - aAttrib.add(norb, "size"); aAttrib.add(debugc, "debug"); aAttrib.put(cur); - up.setOrbitalSetSize(norb); - dn.setOrbitalSetSize(norb); - xmlNodePtr occ_ptr = nullptr; cur = cur->xmlChildrenNode; while (cur != nullptr) diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.cpp index 30dd986c11..a823f473af 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.cpp +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalBuilder.cpp @@ -153,8 +153,8 @@ LCAOrbitalBuilder::LCAOrbitalBuilder(ParticleSet& els, ParticleSet& ions, Commun if (basisset_map_.size() == 0 && h5_path != "") { app_warning() << "!!!!!!! Deprecated input style: missing basisset element. " - << "LCAO needs an explicit basisset XML element. " - << "Fallback on loading an implicit one." << std::endl; + << "LCAO needs an explicit basisset XML element. " << "Fallback on loading an implicit one." + << std::endl; basisset_map_["LCAOBSet"] = loadBasisSetFromH5(cur); } @@ -458,19 +458,32 @@ std::unique_ptr LCAOrbitalBuilder::createSPOSetFromXML(xmlNodePtr cur) ReportEngine PRE(ClassName, "createSPO(xmlNodePtr)"); std::string spo_name(""), cusp_file(""), optimize("no"); std::string basisset_name("LCAOBSet"); + size_t norbs(0); OhmmsAttributeSet spoAttrib; spoAttrib.add(spo_name, "name"); spoAttrib.add(spo_name, "id"); spoAttrib.add(cusp_file, "cuspInfo"); spoAttrib.add(basisset_name, "basisset"); + spoAttrib.add(norbs, "size"); + spoAttrib.add(norbs, "orbitals", {}, TagStatus::DELETED); spoAttrib.put(cur); + // look for coefficients element. If found the MO coefficients matrix is not identity + bool identity = true; + processChildren(cur, [&](const std::string& cname, const xmlNodePtr element) { + if (cname.find("coeff") < cname.size() || cname == "parameter" || cname == "Var") + identity = false; + }); + std::unique_ptr myBasisSet; if (basisset_map_.find(basisset_name) == basisset_map_.end()) myComm->barrier_and_abort("basisset \"" + basisset_name + "\" cannot be found\n"); else myBasisSet.reset(basisset_map_[basisset_name]->makeClone()); + if (norbs == 0) + norbs = myBasisSet->getBasisSetSize(); + std::unique_ptr sposet; if (doCuspCorrection) { @@ -479,16 +492,18 @@ std::unique_ptr LCAOrbitalBuilder::createSPOSetFromXML(xmlNodePtr cur) "LCAOrbitalBuilder::createSPOSetFromXML cusp correction is not supported on complex LCAO."); #else app_summary() << " Using cusp correction." << std::endl; - auto lcwc = std::make_unique(spo_name, sourcePtcl, targetPtcl, std::move(myBasisSet)); - loadMO(lcwc->lcao, cur); - lcwc->setOrbitalSetSize(lcwc->lcao.getOrbitalSetSize()); + auto lcwc = std::make_unique(spo_name, std::move(myBasisSet), norbs, identity, + sourcePtcl, targetPtcl); + if (!identity) + loadMO(lcwc->lcao, cur); sposet = std::move(lcwc); #endif } else { - auto lcos = std::make_unique(spo_name, std::move(myBasisSet)); - loadMO(*lcos, cur); + auto lcos = std::make_unique(spo_name, std::move(myBasisSet), norbs, identity); + if (!identity) + loadMO(*lcos, cur); sposet = std::move(lcos); } @@ -561,13 +576,10 @@ bool LCAOrbitalBuilder::loadMO(LCAOrbitalSet& spo, xmlNodePtr cur) ReportEngine PRE("LCAOrbitalBuilder", "put(xmlNodePtr)"); //initialize the number of orbital by the basis set size - int norb = spo.getBasisSetSize(); std::string debugc("no"); double orbital_mix_magnitude = 0.0; bool PBC = false; OhmmsAttributeSet aAttrib; - aAttrib.add(norb, "orbitals"); - aAttrib.add(norb, "size"); aAttrib.add(debugc, "debug"); aAttrib.add(orbital_mix_magnitude, "orbital_mix_magnitude"); aAttrib.put(cur); @@ -592,7 +604,6 @@ bool LCAOrbitalBuilder::loadMO(LCAOrbitalSet& spo, xmlNodePtr cur) app_log() << " Using Identity for the LCOrbitalSet " << std::endl; return true; } - spo.setOrbitalSetSize(norb); bool success = putOccupation(spo, occ_ptr); if (h5_path == "") success = putFromXML(spo, coeff_ptr); @@ -645,7 +656,7 @@ bool LCAOrbitalBuilder::putFromXML(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr) int norbs = 0; OhmmsAttributeSet aAttrib; aAttrib.add(norbs, "size"); - aAttrib.add(norbs, "orbitals"); + aAttrib.add(norbs, "orbitals", {}, TagStatus::DELETED); aAttrib.put(coeff_ptr); if (norbs < spo.getOrbitalSetSize()) { @@ -680,12 +691,12 @@ bool LCAOrbitalBuilder::putFromXML(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr) */ bool LCAOrbitalBuilder::putFromH5(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr) { - int neigs = spo.getBasisSetSize(); int setVal = -1; + int neig; OhmmsAttributeSet aAttrib; aAttrib.add(setVal, "spindataset"); - aAttrib.add(neigs, "size"); - aAttrib.add(neigs, "orbitals"); + aAttrib.add(neig, "size", {}, TagStatus::DEPRECATED); + aAttrib.add(neig, "orbitals", {}, TagStatus::DELETED); aAttrib.put(coeff_ptr); hdf_archive hin(myComm); if (myComm->rank() == 0) @@ -752,17 +763,16 @@ bool LCAOrbitalBuilder::putFromH5(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr) bool LCAOrbitalBuilder::putPBCFromH5(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr) { ReportEngine PRE("LCAOrbitalBuilder", "LCAOrbitalBuilder::putPBCFromH5"); - int norbs = spo.getOrbitalSetSize(); - int neigs = spo.getBasisSetSize(); int setVal = -1; + int neig; bool IsComplex = false; bool MultiDet = false; PosType SuperTwist(0.0); PosType SuperTwistH5(0.0); OhmmsAttributeSet aAttrib; aAttrib.add(setVal, "spindataset"); - aAttrib.add(neigs, "size"); - aAttrib.add(neigs, "orbitals"); + aAttrib.add(neig, "size", {}, TagStatus::DEPRECATED); + aAttrib.add(neig, "orbitals", {}, TagStatus::DELETED); aAttrib.put(coeff_ptr); hdf_archive hin(myComm); @@ -821,7 +831,7 @@ bool LCAOrbitalBuilder::putPBCFromH5(LCAOrbitalSet& spo, xmlNodePtr coeff_ptr) LoadFullCoefsFromH5(hin, setVal, SuperTwist, Ctemp, MultiDet); int n = 0, i = 0; - while (i < norbs) + while (i < spo.getOrbitalSetSize()) { if (Occ[n] > 0.0) { diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSet.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSet.cpp index 026dc272ba..f35b2f3955 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSet.cpp +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSet.cpp @@ -33,20 +33,28 @@ struct LCAOrbitalSet::LCAOMultiWalkerMem : public Resource OffloadMWVArray vp_basis_v_mw; // [NVPs][NumAO] }; -LCAOrbitalSet::LCAOrbitalSet(const std::string& my_name, std::unique_ptr&& bs) +LCAOrbitalSet::LCAOrbitalSet(const std::string& my_name, std::unique_ptr&& bs, size_t norbs, bool identity) : SPOSet(my_name), BasisSetSize(bs ? bs->getBasisSetSize() : 0), - Identity(true), + Identity(identity), basis_timer_(createGlobalTimer("LCAOrbitalSet::Basis", timer_level_fine)), mo_timer_(createGlobalTimer("LCAOrbitalSet::MO", timer_level_fine)) { if (!bs) throw std::runtime_error("LCAOrbitalSet cannot take nullptr as its basis set!"); - myBasisSet = std::move(bs); + myBasisSet = std::move(bs); + OrbitalSetSize = norbs; Temp.resize(BasisSetSize); Temph.resize(BasisSetSize); Tempgh.resize(BasisSetSize); - OrbitalSetSize = BasisSetSize; + OrbitalSetSize = norbs; + if (!Identity) + { + Tempv.resize(OrbitalSetSize); + Temphv.resize(OrbitalSetSize); + Tempghv.resize(OrbitalSetSize); + C = std::make_shared(OrbitalSetSize, BasisSetSize); + } LCAOrbitalSet::checkObject(); } @@ -74,16 +82,7 @@ LCAOrbitalSet::LCAOrbitalSet(const LCAOrbitalSet& in) void LCAOrbitalSet::setOrbitalSetSize(int norbs) { - if (C) - throw std::runtime_error("LCAOrbitalSet::setOrbitalSetSize cannot reset existing MO coefficients"); - - Identity = false; - OrbitalSetSize = norbs; - C = std::make_shared(OrbitalSetSize, BasisSetSize); - Tempv.resize(OrbitalSetSize); - Temphv.resize(OrbitalSetSize); - Tempghv.resize(OrbitalSetSize); - LCAOrbitalSet::checkObject(); + throw std::runtime_error("LCAOrbitalSet::setOrbitalSetSize should not be called"); } void LCAOrbitalSet::checkObject() const diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSet.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSet.h index cf6706df95..5a3e48b45f 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSet.h +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSet.h @@ -41,9 +41,12 @@ struct LCAOrbitalSet : public SPOSet std::shared_ptr C; /** constructor + * @param my_name name of the SPOSet object * @param bs pointer to the BasisSet + * @param norb number of orbitals + * @param identity if true, the MO coefficients matrix is identity */ - LCAOrbitalSet(const std::string& my_name, std::unique_ptr&& bs); + LCAOrbitalSet(const std::string& my_name, std::unique_ptr&& bs, size_t norbs, bool identity = false); LCAOrbitalSet(const LCAOrbitalSet& in); @@ -222,7 +225,7 @@ struct LCAOrbitalSet : public SPOSet std::shared_ptr C_copy; ///true if C is an identity matrix - bool Identity; + const bool Identity; ///Temp(BasisSetSize) : Row index=V,Gx,Gy,Gz,L vgl_type Temp; ///Tempv(OrbitalSetSize) Tempv=C*Temp diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrection.cpp b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrection.cpp index 674a9a6c1f..c9fedaa2a7 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrection.cpp +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrection.cpp @@ -15,17 +15,19 @@ namespace qmcplusplus { LCAOrbitalSetWithCorrection::LCAOrbitalSetWithCorrection(const std::string& my_name, + std::unique_ptr&& bs, + size_t norbs, + bool identity, ParticleSet& ions, - ParticleSet& els, - std::unique_ptr&& bs) - : SPOSet(my_name), lcao(my_name + "_modified", std::move(bs)), cusp(ions, els) -{} + ParticleSet& els) + : SPOSet(my_name), lcao(my_name + "_modified", std::move(bs), norbs, identity), cusp(ions, els, norbs) +{ + OrbitalSetSize = norbs; +} void LCAOrbitalSetWithCorrection::setOrbitalSetSize(int norbs) { - assert(lcao.getOrbitalSetSize() == norbs && "norbs doesn't agree with lcao!"); - OrbitalSetSize = norbs; - cusp.setOrbitalSetSize(norbs); + throw std::runtime_error("LCAOrbitalSetWithCorrection::setOrbitalSetSize should not be called"); } diff --git a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrection.h b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrection.h index 65185973d2..884eb7b174 100644 --- a/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrection.h +++ b/src/QMCWaveFunctions/LCAO/LCAOrbitalSetWithCorrection.h @@ -29,15 +29,20 @@ class LCAOrbitalSetWithCorrection : public SPOSet public: using basis_type = LCAOrbitalSet::basis_type; /** constructor + * @param my_name name of the SPOSet object + * @param bs pointer to the BasisSet + * @param norb number of orbitals + * @param identity if true, the MO coefficients matrix is identity * @param ions * @param els - * @param bs pointer to the BasisSet * @param rl report level */ LCAOrbitalSetWithCorrection(const std::string& my_name, + std::unique_ptr&& bs, + size_t norbs, + bool identity, ParticleSet& ions, - ParticleSet& els, - std::unique_ptr&& bs); + ParticleSet& els); LCAOrbitalSetWithCorrection(const LCAOrbitalSetWithCorrection& in) = default; diff --git a/src/QMCWaveFunctions/LCAO/SoaCuspCorrection.cpp b/src/QMCWaveFunctions/LCAO/SoaCuspCorrection.cpp index 0b41857fb9..b8b177d754 100644 --- a/src/QMCWaveFunctions/LCAO/SoaCuspCorrection.cpp +++ b/src/QMCWaveFunctions/LCAO/SoaCuspCorrection.cpp @@ -17,21 +17,17 @@ namespace qmcplusplus { -SoaCuspCorrection::SoaCuspCorrection(ParticleSet& ions, ParticleSet& els) : myTableIndex(els.addTable(ions)) +SoaCuspCorrection::SoaCuspCorrection(ParticleSet& ions, ParticleSet& els, size_t norbs) + : myTableIndex(els.addTable(ions)), MaxOrbSize(norbs) { NumCenters = ions.getTotalNum(); NumTargets = els.getTotalNum(); LOBasisSet.resize(NumCenters); + myVGL.resize(5, MaxOrbSize); } SoaCuspCorrection::SoaCuspCorrection(const SoaCuspCorrection& a) = default; -void SoaCuspCorrection::setOrbitalSetSize(int norbs) -{ - MaxOrbSize = norbs; - myVGL.resize(5, MaxOrbSize); -} - inline void SoaCuspCorrection::evaluateVGL(const ParticleSet& P, int iat, VGLVector& vgl) { assert(MaxOrbSize >= vgl.size()); diff --git a/src/QMCWaveFunctions/LCAO/SoaCuspCorrection.h b/src/QMCWaveFunctions/LCAO/SoaCuspCorrection.h index ff7949d08d..7469bcaf21 100644 --- a/src/QMCWaveFunctions/LCAO/SoaCuspCorrection.h +++ b/src/QMCWaveFunctions/LCAO/SoaCuspCorrection.h @@ -50,7 +50,7 @@ class SoaCuspCorrection /** Maximal number of supported MOs * this is not the AO basis because cusp correction is applied on the MO directly. */ - int MaxOrbSize = 0; + const size_t MaxOrbSize; ///COMPLEX WON'T WORK using COT = CuspCorrectionAtomicBasis; @@ -68,16 +68,13 @@ class SoaCuspCorrection /** constructor * @param ions ionic system * @param els electronic system + * @param norbs the number of orbitals this cusp correction may serve */ - SoaCuspCorrection(ParticleSet& ions, ParticleSet& els); + SoaCuspCorrection(ParticleSet& ions, ParticleSet& els, size_t norbs); /** copy constructor */ SoaCuspCorrection(const SoaCuspCorrection& a); - /** set the number of orbitals this cusp correction may serve. call this before adding any correction centers. - */ - void setOrbitalSetSize(int norbs); - /** compute VGL * @param P quantum particleset * @param iat active particle diff --git a/src/QMCWaveFunctions/tests/test_soa_cusp_corr.cpp b/src/QMCWaveFunctions/tests/test_soa_cusp_corr.cpp index 1c7e02bff9..9ec4d44d1d 100644 --- a/src/QMCWaveFunctions/tests/test_soa_cusp_corr.cpp +++ b/src/QMCWaveFunctions/tests/test_soa_cusp_corr.cpp @@ -120,11 +120,11 @@ TEST_CASE("applyCuspInfo", "[wavefunction]") LCAOrbitalSet& lcob = dynamic_cast(*sposet); - LCAOrbitalSet phi("phi", std::unique_ptr(lcob.myBasisSet->makeClone())); - phi.setOrbitalSetSize(lcob.getOrbitalSetSize()); + LCAOrbitalSet phi("phi", std::unique_ptr(lcob.myBasisSet->makeClone()), + lcob.getOrbitalSetSize(), lcob.isIdentity()); - LCAOrbitalSet eta("eta", std::unique_ptr(lcob.myBasisSet->makeClone())); - eta.setOrbitalSetSize(lcob.getOrbitalSetSize()); + LCAOrbitalSet eta("eta", std::unique_ptr(lcob.myBasisSet->makeClone()), + lcob.getOrbitalSetSize(), lcob.isIdentity()); *(eta.C) = *(lcob.C); *(phi.C) = *(lcob.C); diff --git a/src/spline2/MultiBspline1D.hpp b/src/spline2/MultiBspline1D.hpp index 467cb5a674..c70f758c28 100644 --- a/src/spline2/MultiBspline1D.hpp +++ b/src/spline2/MultiBspline1D.hpp @@ -37,11 +37,11 @@ class MultiBspline1D ///define the real type using real_type = typename bspline_traits::real_type; ///actual einspline multi-bspline object - SplineType *spline_m; + SplineType* spline_m; public: MultiBspline1D() : spline_m(nullptr) {} - MultiBspline1D(const MultiBspline1D& in) = delete; + MultiBspline1D(const MultiBspline1D& in) = delete; MultiBspline1D& operator=(const MultiBspline1D& in) = delete; ~MultiBspline1D() @@ -125,7 +125,7 @@ inline void MultiBspline1D::evaluate_v_impl(T x, T* restrict vals) const x -= spline_m->x_grid.start; T tx; int ix; - spline2::getSplineBound(x * spline_m->x_grid.delta_inv, tx, ix, spline_m->x_grid.num - 2); + qmcplusplus::getSplineBound(x * spline_m->x_grid.delta_inv, spline_m->x_grid.num - 2, ix, tx); T a[4]; spline2::MultiBsplineData::compute_prefactors(a, tx); @@ -137,7 +137,7 @@ inline void MultiBspline1D::evaluate_v_impl(T x, T* restrict vals) const const T* restrict coefs2 = spline_m->coefs + ((ix + 2) * xs); const T* restrict coefs3 = spline_m->coefs + ((ix + 3) * xs); -#pragma omp simd aligned(vals, coefs0, coefs1, coefs2, coefs3: QMC_SIMD_ALIGNMENT) +#pragma omp simd aligned(vals, coefs0, coefs1, coefs2, coefs3 : QMC_SIMD_ALIGNMENT) for (int n = 0; n < spline_m->num_splines; n++) vals[n] = a[0] * coefs0[n] + a[1] * coefs1[n] + a[2] * coefs2[n] + a[3] * coefs3[n]; } @@ -148,7 +148,7 @@ inline void MultiBspline1D::evaluate_vgl_impl(T x, T* restrict vals, T* restr x -= spline_m->x_grid.start; T tx; int ix; - spline2::getSplineBound(x * spline_m->x_grid.delta_inv, tx, ix, spline_m->x_grid.num - 2); + qmcplusplus::getSplineBound(x * spline_m->x_grid.delta_inv, spline_m->x_grid.num - 2, ix, tx); T a[4], da[4], d2a[4]; spline2::MultiBsplineData::compute_prefactors(a, da, d2a, tx); @@ -161,7 +161,7 @@ inline void MultiBspline1D::evaluate_vgl_impl(T x, T* restrict vals, T* restr const T* restrict coefs2 = spline_m->coefs + ((ix + 2) * xs); const T* restrict coefs3 = spline_m->coefs + ((ix + 3) * xs); -#pragma omp simd aligned(vals, grads, lapl, coefs0, coefs1, coefs2, coefs3: QMC_SIMD_ALIGNMENT) +#pragma omp simd aligned(vals, grads, lapl, coefs0, coefs1, coefs2, coefs3 : QMC_SIMD_ALIGNMENT) for (int n = 0; n < spline_m->num_splines; n++) { const T coef_0 = coefs0[n]; diff --git a/src/spline2/MultiBsplineEval_helper.hpp b/src/spline2/MultiBsplineEval_helper.hpp index 8df498af14..d87663cb5d 100644 --- a/src/spline2/MultiBsplineEval_helper.hpp +++ b/src/spline2/MultiBsplineEval_helper.hpp @@ -20,64 +20,27 @@ #include #include "bspline_traits.hpp" #include "spline2/MultiBsplineData.hpp" +#include "Numerics/SplineBound.hpp" namespace spline2 { -/** break x into the integer part and residual part and apply bounds - * @param x input coordinate - * @param dx fractional part - * @param ind integer part - * @param nmax upper bound of the integer part - * - * x in the range of [0, nmax+1) will be split correctly. - * x < 0, ind = 0, dx = 0 - * x >= nmax+1, ind = nmax, dx = 1 - epsilon - * - * Attention: nmax is not the number grid points but the maximum allowed grid index - * For example, ng is the number of grid point. - * the actual grid points indices are 0, 1, ..., ng - 1. - * In a periodic/anti periodic spline, set nmax = ng - 1 - * In a natural boundary spline, set nmax = ng - 2 - * because the end point should be excluded and the last grid point has an index ng - 2. - */ -template -inline void getSplineBound(T x, TRESIDUAL& dx, int& ind, int nmax) -{ - // lower bound - if (x < 0) - { - ind = 0; - dx = T(0); - } - else - { -#if defined(__INTEL_LLVM_COMPILER) || defined(__INTEL_CLANG_COMPILER) - T ipart = std::floor(x); - dx = x - ipart; -#else - T ipart; - dx = std::modf(x, &ipart); -#endif - ind = static_cast(ipart); - // upper bound - if (ind > nmax) - { - ind = nmax; - dx = T(1) - std::numeric_limits::epsilon(); - } - } -} - /** define computeLocationAndFractional: common to any implementation * compute the location of the spline grid point and residual coordinates * also it precomputes auxiliary array a, b and c */ template -inline void computeLocationAndFractional(const typename qmcplusplus::bspline_traits::SplineType* restrict spline_m, - T x, T y, T z, - int& ix, int& iy, int& iz, - T a[4], T b[4], T c[4]) +inline void computeLocationAndFractional( + const typename qmcplusplus::bspline_traits::SplineType* restrict spline_m, + T x, + T y, + T z, + int& ix, + int& iy, + int& iz, + T a[4], + T b[4], + T c[4]) { x -= spline_m->x_grid.start; y -= spline_m->y_grid.start; @@ -85,9 +48,9 @@ inline void computeLocationAndFractional(const typename qmcplusplus::bspline_tra T tx, ty, tz; - getSplineBound(x * spline_m->x_grid.delta_inv, tx, ix, spline_m->x_grid.num - 1); - getSplineBound(y * spline_m->y_grid.delta_inv, ty, iy, spline_m->y_grid.num - 1); - getSplineBound(z * spline_m->z_grid.delta_inv, tz, iz, spline_m->z_grid.num - 1); + qmcplusplus::getSplineBound(x * spline_m->x_grid.delta_inv, spline_m->x_grid.num - 1, ix, tx); + qmcplusplus::getSplineBound(y * spline_m->y_grid.delta_inv, spline_m->y_grid.num - 1, iy, ty); + qmcplusplus::getSplineBound(z * spline_m->z_grid.delta_inv, spline_m->z_grid.num - 1, iz, tz); MultiBsplineData::compute_prefactors(a, tx); MultiBsplineData::compute_prefactors(b, ty); @@ -99,12 +62,23 @@ inline void computeLocationAndFractional(const typename qmcplusplus::bspline_tra * also it precomputes auxiliary array (a,b,c) (da,db,dc) (d2a,d2b,d2c) */ template -inline void computeLocationAndFractional(const typename qmcplusplus::bspline_traits::SplineType* restrict spline_m, - T x, T y, T z, - int& ix, int& iy, int& iz, - T a[4], T b[4], T c[4], - T da[4], T db[4], T dc[4], - T d2a[4], T d2b[4], T d2c[4]) +inline void computeLocationAndFractional( + const typename qmcplusplus::bspline_traits::SplineType* restrict spline_m, + T x, + T y, + T z, + int& ix, + int& iy, + int& iz, + T a[4], + T b[4], + T c[4], + T da[4], + T db[4], + T dc[4], + T d2a[4], + T d2b[4], + T d2c[4]) { x -= spline_m->x_grid.start; y -= spline_m->y_grid.start; @@ -112,9 +86,9 @@ inline void computeLocationAndFractional(const typename qmcplusplus::bspline_tra T tx, ty, tz; - getSplineBound(x * spline_m->x_grid.delta_inv, tx, ix, spline_m->x_grid.num - 1); - getSplineBound(y * spline_m->y_grid.delta_inv, ty, iy, spline_m->y_grid.num - 1); - getSplineBound(z * spline_m->z_grid.delta_inv, tz, iz, spline_m->z_grid.num - 1); + qmcplusplus::getSplineBound(x * spline_m->x_grid.delta_inv, spline_m->x_grid.num - 1, ix, tx); + qmcplusplus::getSplineBound(y * spline_m->y_grid.delta_inv, spline_m->y_grid.num - 1, iy, ty); + qmcplusplus::getSplineBound(z * spline_m->z_grid.delta_inv, spline_m->z_grid.num - 1, iz, tz); MultiBsplineData::compute_prefactors(a, da, d2a, tx); MultiBsplineData::compute_prefactors(b, db, d2b, ty); diff --git a/src/spline2/MultiBsplineVGHGH.hpp b/src/spline2/MultiBsplineVGHGH.hpp index c0f444612d..e52e3bad8f 100644 --- a/src/spline2/MultiBsplineVGHGH.hpp +++ b/src/spline2/MultiBsplineVGHGH.hpp @@ -46,9 +46,9 @@ inline void evaluate_vghgh_impl(const typename qmcplusplus::bspline_traits x -= spline_m->x_grid.start; y -= spline_m->y_grid.start; z -= spline_m->z_grid.start; - getSplineBound(x * spline_m->x_grid.delta_inv, tx, ix, spline_m->x_grid.num - 1); - getSplineBound(y * spline_m->y_grid.delta_inv, ty, iy, spline_m->y_grid.num - 1); - getSplineBound(z * spline_m->z_grid.delta_inv, tz, iz, spline_m->z_grid.num - 1); + qmcplusplus::getSplineBound(x * spline_m->x_grid.delta_inv, spline_m->x_grid.num - 1, ix, tx); + qmcplusplus::getSplineBound(y * spline_m->y_grid.delta_inv, spline_m->y_grid.num - 1, iy, ty); + qmcplusplus::getSplineBound(z * spline_m->z_grid.delta_inv, spline_m->z_grid.num - 1, iz, tz); MultiBsplineData::compute_prefactors(a, da, d2a, d3a, tx); MultiBsplineData::compute_prefactors(b, db, d2b, d3b, ty); @@ -126,7 +126,8 @@ inline void evaluate_vghgh_impl(const typename qmcplusplus::bspline_traits #pragma omp simd aligned(coefs, coefszs, coefs2zs, coefs3zs, gx, gy, gz, hxx, hxy, hxz, hyy, hyz, hzz, gh_xxx, gh_xxy, \ - gh_xxz, gh_xyy, gh_xyz, gh_xzz, gh_yyy, gh_yyz, gh_yzz, gh_zzz, vals: QMC_SIMD_ALIGNMENT) + gh_xxz, gh_xyy, gh_xyz, gh_xzz, gh_yyy, gh_yyz, gh_yzz, gh_zzz, \ + vals : QMC_SIMD_ALIGNMENT) for (int n = 0; n < num_splines; n++) { T coefsv = coefs[n]; @@ -150,15 +151,15 @@ inline void evaluate_vghgh_impl(const typename qmcplusplus::bspline_traits gh_yzz[n] += pre01 * sum2; gh_zzz[n] += pre00 * sum3; - hxx[n] += pre20 * sum0; - hxy[n] += pre11 * sum0; - hxz[n] += pre10 * sum1; - hyy[n] += pre02 * sum0; - hyz[n] += pre01 * sum1; - hzz[n] += pre00 * sum2; - gx[n] += pre10 * sum0; - gy[n] += pre01 * sum0; - gz[n] += pre00 * sum1; + hxx[n] += pre20 * sum0; + hxy[n] += pre11 * sum0; + hxz[n] += pre10 * sum1; + hyy[n] += pre02 * sum0; + hyz[n] += pre01 * sum1; + hzz[n] += pre00 * sum2; + gx[n] += pre10 * sum0; + gy[n] += pre01 * sum0; + gz[n] += pre00 * sum1; vals[n] += pre00 * sum0; } } @@ -185,12 +186,12 @@ inline void evaluate_vghgh_impl(const typename qmcplusplus::bspline_traits const T dzzz = dzInv * dzInv * dzInv; #pragma omp simd aligned(gx, gy, gz, hxx, hxy, hxz, hyy, hyz, hzz, gh_xxx, gh_xxy, gh_xxz, gh_xyy, gh_xyz, gh_xzz, \ - gh_yyy, gh_yyz, gh_yzz, gh_zzz: QMC_SIMD_ALIGNMENT) + gh_yyy, gh_yyz, gh_yzz, gh_zzz : QMC_SIMD_ALIGNMENT) for (int n = 0; n < num_splines; n++) { - gx[n] *= dxInv; - gy[n] *= dyInv; - gz[n] *= dzInv; + gx[n] *= dxInv; + gy[n] *= dyInv; + gz[n] *= dzInv; hxx[n] *= dxx; hyy[n] *= dyy; hzz[n] *= dzz; diff --git a/src/spline2/MultiBsplineVGLH_OMPoffload.hpp b/src/spline2/MultiBsplineVGLH_OMPoffload.hpp index 280d8eae46..03f7115ecd 100644 --- a/src/spline2/MultiBsplineVGLH_OMPoffload.hpp +++ b/src/spline2/MultiBsplineVGLH_OMPoffload.hpp @@ -41,20 +41,10 @@ inline void evaluate_vgl_impl(const typename qmcplusplus::bspline_traits:: int first, int last) { - x -= spline_m->x_grid.start; - y -= spline_m->y_grid.start; - z -= spline_m->z_grid.start; - T tx, ty, tz; int ix, iy, iz; - spline2::getSplineBound(x * spline_m->x_grid.delta_inv, tx, ix, spline_m->x_grid.num - 1); - spline2::getSplineBound(y * spline_m->y_grid.delta_inv, ty, iy, spline_m->y_grid.num - 1); - spline2::getSplineBound(z * spline_m->z_grid.delta_inv, tz, iz, spline_m->z_grid.num - 1); - T a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; - spline2::MultiBsplineData::compute_prefactors(a, da, d2a, tx); - spline2::MultiBsplineData::compute_prefactors(b, db, d2b, ty); - spline2::MultiBsplineData::compute_prefactors(c, dc, d2c, tz); + computeLocationAndFractional(spline_m, x, y, z, ix, iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c); const intptr_t xs = spline_m->x_stride; const intptr_t ys = spline_m->y_stride; @@ -152,19 +142,9 @@ inline void evaluate_vgh_impl(const typename qmcplusplus::bspline_traits:: int last) { int ix, iy, iz; - T tx, ty, tz; T a[4], b[4], c[4], da[4], db[4], dc[4], d2a[4], d2b[4], d2c[4]; - x -= spline_m->x_grid.start; - y -= spline_m->y_grid.start; - z -= spline_m->z_grid.start; - spline2::getSplineBound(x * spline_m->x_grid.delta_inv, tx, ix, spline_m->x_grid.num - 1); - spline2::getSplineBound(y * spline_m->y_grid.delta_inv, ty, iy, spline_m->y_grid.num - 1); - spline2::getSplineBound(z * spline_m->z_grid.delta_inv, tz, iz, spline_m->z_grid.num - 1); - - spline2::MultiBsplineData::compute_prefactors(a, da, d2a, tx); - spline2::MultiBsplineData::compute_prefactors(b, db, d2b, ty); - spline2::MultiBsplineData::compute_prefactors(c, dc, d2c, tz); + computeLocationAndFractional(spline_m, x, y, z, ix, iy, iz, a, b, c, da, db, dc, d2a, d2b, d2c); const intptr_t xs = spline_m->x_stride; const intptr_t ys = spline_m->y_stride; @@ -212,8 +192,8 @@ inline void evaluate_vgh_impl(const typename qmcplusplus::bspline_traits:: #ifdef ENABLE_OFFLOAD #pragma omp for #else -#pragma omp simd aligned(coefs, coefszs, coefs2zs, coefs3zs, vals, gx, gy, gz, hxx, hyy, hzz, hxy, hxz, hyz \ - : QMC_SIMD_ALIGNMENT) +#pragma omp simd aligned(coefs, coefszs, coefs2zs, coefs3zs, vals, gx, gy, gz, hxx, hyy, hzz, hxy, hxz, \ + hyz : QMC_SIMD_ALIGNMENT) #endif for (int n = 0; n < num_splines; n++) { diff --git a/src/spline2/MultiBsplineValue_OMPoffload.hpp b/src/spline2/MultiBsplineValue_OMPoffload.hpp index 447dd693f7..9604343e4a 100644 --- a/src/spline2/MultiBsplineValue_OMPoffload.hpp +++ b/src/spline2/MultiBsplineValue_OMPoffload.hpp @@ -28,19 +28,10 @@ inline void evaluate_v_impl(const typename qmcplusplus::bspline_traits::Sp int first, int last) { - x -= spline_m->x_grid.start; - y -= spline_m->y_grid.start; - z -= spline_m->z_grid.start; - T tx, ty, tz; int ix, iy, iz; - spline2::getSplineBound(x * spline_m->x_grid.delta_inv, tx, ix, spline_m->x_grid.num - 1); - spline2::getSplineBound(y * spline_m->y_grid.delta_inv, ty, iy, spline_m->y_grid.num - 1); - spline2::getSplineBound(z * spline_m->z_grid.delta_inv, tz, iz, spline_m->z_grid.num - 1); T a[4], b[4], c[4]; - spline2::MultiBsplineData::compute_prefactors(a, tx); - spline2::MultiBsplineData::compute_prefactors(b, ty); - spline2::MultiBsplineData::compute_prefactors(c, tz); + computeLocationAndFractional(spline_m, x, y, z, ix, iy, iz, a, b, c); const intptr_t xs = spline_m->x_stride; const intptr_t ys = spline_m->y_stride; diff --git a/src/spline2/tests/test_multi_spline.cpp b/src/spline2/tests/test_multi_spline.cpp index 831e07b54e..79ca1ad0dd 100644 --- a/src/spline2/tests/test_multi_spline.cpp +++ b/src/spline2/tests/test_multi_spline.cpp @@ -20,40 +20,6 @@ namespace qmcplusplus { -template -void test_spline_bounds() -{ - T x = 2.2; - T dx; - int ind; - int ng = 10; - spline2::getSplineBound(x, dx, ind, ng); - CHECK(dx == Approx(0.2)); - REQUIRE(ind == 2); - - // check clamping to a maximum index value - x = 10.5; - spline2::getSplineBound(x, dx, ind, ng); - CHECK(dx == Approx(0.5)); - REQUIRE(ind == 10); - - x = 11.5; - spline2::getSplineBound(x, dx, ind, ng); - CHECK(dx == Approx(1.0)); - REQUIRE(ind == 10); - - // check clamping to a zero index value - x = -1.3; - spline2::getSplineBound(x, dx, ind, ng); - CHECK(dx == Approx(0.0)); - REQUIRE(ind == 0); -} - -TEST_CASE("getSplineBound double", "[spline2]") { test_spline_bounds(); } - - -TEST_CASE("getSplineBound float", "[spline2]") { test_spline_bounds(); } - TEST_CASE("SymTrace", "[spline2]") { diff --git a/tests/molecules/LiH_ae_gms/vmc_noj_edens_vor_long.in.xml b/tests/molecules/LiH_ae_gms/vmc_noj_edens_vor_long.in.xml index d03c0ec868..cf91d87abd 100644 --- a/tests/molecules/LiH_ae_gms/vmc_noj_edens_vor_long.in.xml +++ b/tests/molecules/LiH_ae_gms/vmc_noj_edens_vor_long.in.xml @@ -12028,7 +12028,7 @@ PROPERTY VALUES FOR THE ROHF SELF-CONSISTENT FIELD WAVEFUNCTION - + diff --git a/tests/molecules/LiH_ae_gms/vmc_noj_edens_vor_short.in.xml b/tests/molecules/LiH_ae_gms/vmc_noj_edens_vor_short.in.xml index 621dfca8aa..beee4ac9b7 100644 --- a/tests/molecules/LiH_ae_gms/vmc_noj_edens_vor_short.in.xml +++ b/tests/molecules/LiH_ae_gms/vmc_noj_edens_vor_short.in.xml @@ -12028,7 +12028,7 @@ PROPERTY VALUES FOR THE ROHF SELF-CONSISTENT FIELD WAVEFUNCTION - +