diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineReader.cpp b/src/QMCWaveFunctions/BsplineFactory/BsplineReader.cpp index 619bd2c9d4..909b01bc94 100644 --- a/src/QMCWaveFunctions/BsplineFactory/BsplineReader.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/BsplineReader.cpp @@ -2,7 +2,7 @@ // This file is distributed under the University of Illinois/NCSA Open Source License. // See LICENSE file in top directory for details. // -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// Copyright (c) 2023 QMCPACK developers. // // File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign // Paul R. C. Kent, kentpr@ornl.gov, Oak Ridge National Laboratory @@ -161,9 +161,9 @@ std::unique_ptr BsplineReader::create_spline_set(int spin, xmlNodePtr cu * At gamma or arbitrary kpoints with complex wavefunctions, spo2band[i]==i */ void BsplineReader::initialize_spo2band(int spin, - const std::vector& bigspace, - SPOSetInfo& sposet, - std::vector& spo2band) + const std::vector& bigspace, + SPOSetInfo& sposet, + std::vector& spo2band) { spo2band.reserve(bigspace.size()); int ns = 0; diff --git a/src/QMCWaveFunctions/BsplineFactory/BsplineReader.h b/src/QMCWaveFunctions/BsplineFactory/BsplineReader.h index 9dafd3c205..3b8571414d 100644 --- a/src/QMCWaveFunctions/BsplineFactory/BsplineReader.h +++ b/src/QMCWaveFunctions/BsplineFactory/BsplineReader.h @@ -2,7 +2,7 @@ // This file is distributed under the University of Illinois/NCSA Open Source License. // See LICENSE file in top directory for details. // -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. +// Copyright (c) 2023 QMCPACK developers. // // File developed by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign // Raymond Clay III, j.k.rofling@gmail.com, Lawrence Livermore National Laboratory @@ -20,9 +20,10 @@ #ifndef QMCPLUSPLUS_BSPLINE_READER_H #define QMCPLUSPLUS_BSPLINE_READER_H -#include "mpi/collectives.h" -#include "mpi/point2point.h" +#include #include +#include +#include "EinsplineSetBuilder.h" namespace qmcplusplus { diff --git a/src/QMCWaveFunctions/BsplineFactory/EinsplineSetBuilderCommon.cpp b/src/QMCWaveFunctions/BsplineFactory/EinsplineSetBuilderCommon.cpp index 8db1238aa9..e1fcadfbce 100644 --- a/src/QMCWaveFunctions/BsplineFactory/EinsplineSetBuilderCommon.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/EinsplineSetBuilderCommon.cpp @@ -22,14 +22,14 @@ * - */ #include "EinsplineSetBuilder.h" +#include +#include #include "OhmmsData/AttributeSet.h" #include "Message/CommOperators.h" #include #include "QMCWaveFunctions/BsplineFactory/BsplineReader.h" #include "Particle/DistanceTable.h" - -#include -#include +#include "mpi/collectives.h" namespace qmcplusplus { diff --git a/src/QMCWaveFunctions/BsplineFactory/EinsplineSetBuilder_createSPOs.cpp b/src/QMCWaveFunctions/BsplineFactory/EinsplineSetBuilder_createSPOs.cpp index d47ea25ece..d285db303d 100644 --- a/src/QMCWaveFunctions/BsplineFactory/EinsplineSetBuilder_createSPOs.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/EinsplineSetBuilder_createSPOs.cpp @@ -26,8 +26,6 @@ #include "Utilities/Timer.h" #include "ParticleBase/RandomSeqGenerator.h" #include "Particle/DistanceTable.h" -#include -#include "Utilities/ProgressReportEngine.h" #include "einspline_helper.hpp" #include "BsplineReader.h" #include "BsplineSet.h" @@ -257,23 +255,13 @@ std::unique_ptr EinsplineSetBuilder::createSPOSetFromXML(xmlNodePtr cur) { //if(TargetPtcl.Lattice.SuperCellEnum != SUPERCELL_BULK && truncate=="yes") if (MixedSplineReader == 0) - { - if (use_single) - MixedSplineReader = createBsplineRealSingle(this, hybrid_rep == "yes", useGPU); - else - MixedSplineReader = createBsplineRealDouble(this, hybrid_rep == "yes", useGPU); - } + MixedSplineReader = createBsplineReal(this, use_single, hybrid_rep == "yes", useGPU); } else #endif { if (MixedSplineReader == 0) - { - if (use_single) - MixedSplineReader = createBsplineComplexSingle(this, hybrid_rep == "yes", useGPU); - else - MixedSplineReader = createBsplineComplexDouble(this, hybrid_rep == "yes", useGPU); - } + MixedSplineReader = createBsplineComplex(this, use_single, hybrid_rep == "yes", useGPU); } MixedSplineReader->setCommon(XMLRoot); diff --git a/src/QMCWaveFunctions/BsplineFactory/EinsplineSpinorSetBuilder.cpp b/src/QMCWaveFunctions/BsplineFactory/EinsplineSpinorSetBuilder.cpp index 314c08f14c..07f9d6e821 100644 --- a/src/QMCWaveFunctions/BsplineFactory/EinsplineSpinorSetBuilder.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/EinsplineSpinorSetBuilder.cpp @@ -177,23 +177,13 @@ std::unique_ptr EinsplineSpinorSetBuilder::createSPOSetFromXML(xmlNodePt if (use_real_splines_) { if (MixedSplineReader == 0) - { - if (use_single) - MixedSplineReader = createBsplineRealSingle(this, hybrid_rep == "yes", useGPU); - else - MixedSplineReader = createBsplineRealDouble(this, hybrid_rep == "yes", useGPU); - } + MixedSplineReader = createBsplineReal(this, use_single, hybrid_rep == "yes", useGPU); } else #endif { if (MixedSplineReader == 0) - { - if (use_single) - MixedSplineReader = createBsplineComplexSingle(this, hybrid_rep == "yes", useGPU); - else - MixedSplineReader = createBsplineComplexDouble(this, hybrid_rep == "yes", useGPU); - } + MixedSplineReader = createBsplineComplex(this, use_single, hybrid_rep == "yes", useGPU); } MixedSplineReader->setCommon(XMLRoot); diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepCplx.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepCplx.h index 266678e5fb..37e71b8141 100644 --- a/src/QMCWaveFunctions/BsplineFactory/HybridRepCplx.h +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepCplx.h @@ -20,6 +20,7 @@ #include "QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h" #include "CPU/SIMD/inner_product.hpp" +#include "BsplineSet.h" namespace qmcplusplus { @@ -35,6 +36,7 @@ class HybridRepCplx : public SPLINEBASE, private HybridRepCenterOrbitals; using ST = typename SPLINEBASE::DataType; + using DataType = typename SPLINEBASE::DataType; using PointType = typename SPLINEBASE::PointType; using SingleSplineType = typename SPLINEBASE::SingleSplineType; using RealType = typename SPLINEBASE::RealType; diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepReal.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepReal.h index 33fed108a4..3c2cea2841 100644 --- a/src/QMCWaveFunctions/BsplineFactory/HybridRepReal.h +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepReal.h @@ -20,6 +20,7 @@ #include "QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h" #include "CPU/SIMD/inner_product.hpp" +#include "BsplineSet.h" namespace qmcplusplus { @@ -35,6 +36,7 @@ class HybridRepReal : public SPLINEBASE, private HybridRepCenterOrbitals; using ST = typename SPLINEBASE::DataType; + using DataType = typename SPLINEBASE::DataType; using PointType = typename SPLINEBASE::PointType; using SingleSplineType = typename SPLINEBASE::SingleSplineType; using RealType = typename SPLINEBASE::RealType; diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReader.cpp b/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReader.cpp new file mode 100644 index 0000000000..29fe5ea599 --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReader.cpp @@ -0,0 +1,675 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2023 QMCPACK developers. +// +// File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + + +/** @file + * + * derived from SplineSetReader + */ + +#include "HybridRepSetReader.h" +#include "Numerics/Quadrature.h" +#include "Numerics/Bessel.h" +#include "OhmmsData/AttributeSet.h" +#include "CPU/math.hpp" +#include "Concurrency/OpenMP.h" +#include +#include "OneSplineOrbData.hpp" + +namespace qmcplusplus +{ +template +struct Gvectors +{ + using PosType = TinyVector; + using ValueType = std::complex; + + const LT& Lattice; + std::vector gvecs_cart; //Cartesian. + std::vector gmag; + const size_t NumGvecs; + + Gvectors(const std::vector>& gvecs_in, + const LT& Lattice_in, + const TinyVector& HalfG, + size_t first, + size_t last) + : Lattice(Lattice_in), NumGvecs(last - first) + { + gvecs_cart.resize(NumGvecs); + gmag.resize(NumGvecs); +#pragma omp parallel for + for (size_t ig = 0; ig < NumGvecs; ig++) + { + TinyVector gvec_shift; + gvec_shift = gvecs_in[ig + first] + HalfG * 0.5; + gvecs_cart[ig] = Lattice.k_cart(gvec_shift); + gmag[ig] = std::sqrt(dot(gvecs_cart[ig], gvecs_cart[ig])); + } + } + + template + void calc_Ylm_G(const size_t ig, YLM_ENGINE& Ylm, VVT& YlmG) const + { + PosType Ghat(0.0, 0.0, 1.0); + if (gmag[ig] > 0) + Ghat = gvecs_cart[ig] / gmag[ig]; + Ylm.evaluateV(Ghat[0], Ghat[1], Ghat[2], YlmG.data()); + } + + template + inline void calc_jlm_G(const int lmax, ST& r, const size_t ig, VVT& j_lm_G) const + { + bessel_steed_array_cpu(lmax, gmag[ig] * r, j_lm_G.data()); + for (size_t l = lmax; l > 0; l--) + for (size_t lm = l * l; lm < (l + 1) * (l + 1); lm++) + j_lm_G[lm] = j_lm_G[l]; + } + + template + inline void calc_phase_shift(const PT& RSoA, const size_t ig, VT& phase_shift_real, VT& phase_shift_imag) const + { + const ST* restrict px = RSoA.data(0); + const ST* restrict py = RSoA.data(1); + const ST* restrict pz = RSoA.data(2); + ST* restrict v_r = phase_shift_real.data(); + ST* restrict v_i = phase_shift_imag.data(); + const ST& gv_x = gvecs_cart[ig][0]; + const ST& gv_y = gvecs_cart[ig][1]; + const ST& gv_z = gvecs_cart[ig][2]; + +#pragma omp simd aligned(px, py, pz, v_r, v_i : QMC_SIMD_ALIGNMENT) + for (size_t iat = 0; iat < RSoA.size(); iat++) + qmcplusplus::sincos(px[iat] * gv_x + py[iat] * gv_y + pz[iat] * gv_z, v_i + iat, v_r + iat); + } + + template + ValueType evaluate_psi_r(const Vector>& cG, const PT& pos) + { + assert(cG.size() == NumGvecs); + std::complex val(0.0, 0.0); + for (size_t ig = 0; ig < NumGvecs; ig++) + { + ST s, c; + qmcplusplus::sincos(dot(gvecs_cart[ig], pos), &s, &c); + ValueType pw0(c, s); + val += cG[ig] * pw0; + } + return val; + } + + template + void evaluate_psi_r(const Vector>& cG, const PT& pos, ValueType& phi, ValueType& d2phi) + { + assert(cG.size() == NumGvecs); + d2phi = phi = 0.0; + for (size_t ig = 0; ig < NumGvecs; ig++) + { + ST s, c; + qmcplusplus::sincos(dot(gvecs_cart[ig], pos), &s, &c); + ValueType pw0(c, s); + phi += cG[ig] * pw0; + d2phi += cG[ig] * pw0 * (-dot(gvecs_cart[ig], gvecs_cart[ig])); + } + } + + double evaluate_KE(const Vector>& cG) + { + assert(cG.size() == NumGvecs); + double KE = 0; + for (size_t ig = 0; ig < NumGvecs; ig++) + KE += dot(gvecs_cart[ig], gvecs_cart[ig]) * (cG[ig].real() * cG[ig].real() + cG[ig].imag() * cG[ig].imag()); + return KE / 2.0; + } +}; + +template +HybridRepSetReader::HybridRepSetReader(EinsplineSetBuilder* e) : BsplineReader(e), spline_reader_(e) +{} + +template +std::unique_ptr HybridRepSetReader::create_spline_set(const std::string& my_name, + int spin, + const BandInfoGroup& bandgroup) +{ + auto bspline = std::make_unique(my_name); + app_log() << " ClassName = " << bspline->getClassName() << std::endl; + // set info for Hybrid + initialize_hybridrep_atomic_centers(*bspline); + bool foundspline = spline_reader_.createSplineDataSpaceLookforDumpFile(bandgroup, *bspline); + typename SA::HYBRIDBASE& hybrid_center_orbs = *bspline; + hybrid_center_orbs.resizeStorage(bspline->myV.size()); + if (foundspline) + { + Timer now; + hdf_archive h5f(myComm); + const auto splinefile = getSplineDumpFileName(bandgroup); + h5f.open(splinefile, H5F_ACC_RDONLY); + foundspline = bspline->read_splines(h5f); + if (foundspline) + app_log() << " Successfully restored 3D B-spline coefficients from " << splinefile << ". The reading time is " + << now.elapsed() << " sec." << std::endl; + } + + if (!foundspline) + { + hybrid_center_orbs.flush_zero(); + initialize_hybrid_pio_gather(spin, bandgroup, *bspline); + + if (saveSplineCoefs && myComm->rank() == 0) + { + Timer now; + const std::string splinefile(getSplineDumpFileName(bandgroup)); + hdf_archive h5f; + h5f.create(splinefile); + std::string classname = bspline->getClassName(); + h5f.write(classname, "class_name"); + int sizeD = sizeof(DataType); + h5f.write(sizeD, "sizeof"); + bspline->write_splines(h5f); + h5f.close(); + app_log() << " Stored spline coefficients in " << splinefile << " for potential reuse. The writing time is " + << now.elapsed() << " sec." << std::endl; + } + } + + { + Timer now; + bspline->bcast_tables(myComm); + app_log() << " Time to bcast the table = " << now.elapsed() << std::endl; + } + return bspline; +} + +template +void HybridRepSetReader::initialize_hybridrep_atomic_centers(SA& bspline) const +{ + auto& mybuilder = spline_reader_.mybuilder; + + OhmmsAttributeSet a; + std::string scheme_name("Consistent"); + std::string s_function_name("LEKS2018"); + a.add(scheme_name, "smoothing_scheme"); + a.add(s_function_name, "smoothing_function"); + a.put(mybuilder->XMLRoot); + // assign smooth_scheme + if (scheme_name == "Consistent") + bspline.smooth_scheme = SA::smoothing_schemes::CONSISTENT; + else if (scheme_name == "SmoothAll") + bspline.smooth_scheme = SA::smoothing_schemes::SMOOTHALL; + else if (scheme_name == "SmoothPartial") + bspline.smooth_scheme = SA::smoothing_schemes::SMOOTHPARTIAL; + else + APP_ABORT("initialize_hybridrep_atomic_centers wrong smoothing_scheme name! Only allows Consistent, SmoothAll or " + "SmoothPartial."); + + // assign smooth_function + if (s_function_name == "LEKS2018") + bspline.smooth_func_id = smoothing_functions::LEKS2018; + else if (s_function_name == "coscos") + bspline.smooth_func_id = smoothing_functions::COSCOS; + else if (s_function_name == "linear") + bspline.smooth_func_id = smoothing_functions::LINEAR; + else + APP_ABORT( + "initialize_hybridrep_atomic_centers wrong smoothing_function name! Only allows LEKS2018, coscos or linear."); + app_log() << "Hybrid orbital representation uses " << scheme_name << " smoothing scheme and " << s_function_name + << " smoothing function." << std::endl; + + bspline.set_info(*(mybuilder->SourcePtcl), mybuilder->TargetPtcl, mybuilder->Super2Prim); + auto& centers = bspline.AtomicCenters; + auto& ACInfo = mybuilder->AtomicCentersInfo; + // load atomic center info only when it is not initialized + if (centers.size() == 0) + { + bool success = true; + app_log() << "Reading atomic center info for hybrid representation" << std::endl; + for (int center_idx = 0; center_idx < ACInfo.Ncenters; center_idx++) + { + const int my_GroupID = ACInfo.GroupID[center_idx]; + if (ACInfo.cutoff[center_idx] < 0) + { + app_error() << "Hybrid orbital representation needs parameter 'cutoff_radius' for atom " << center_idx + << std::endl; + success = false; + } + + if (ACInfo.inner_cutoff[center_idx] < 0) + { + const double inner_cutoff = std::max(ACInfo.cutoff[center_idx] - 0.3, 0.0); + app_log() << "Hybrid orbital representation setting 'inner_cutoff' to " << inner_cutoff << " for group " + << my_GroupID << " as atom " << center_idx << std::endl; + // overwrite the inner_cutoff of all the atoms of the same species + for (int id = 0; id < ACInfo.Ncenters; id++) + if (my_GroupID == ACInfo.GroupID[id]) + ACInfo.inner_cutoff[id] = inner_cutoff; + } + else if (ACInfo.inner_cutoff[center_idx] > ACInfo.cutoff[center_idx]) + { + app_error() << "Hybrid orbital representation 'inner_cutoff' must be smaller than 'spline_radius' for atom " + << center_idx << std::endl; + success = false; + } + + if (ACInfo.cutoff[center_idx] > 0) + { + if (ACInfo.lmax[center_idx] < 0) + { + app_error() << "Hybrid orbital representation needs parameter 'lmax' for atom " << center_idx << std::endl; + success = false; + } + + if (ACInfo.spline_radius[center_idx] < 0 && ACInfo.spline_npoints[center_idx] < 0) + { + app_log() << "Parameters 'spline_radius' and 'spline_npoints' for group " << my_GroupID << " as atom " + << center_idx << " are not specified." << std::endl; + const double delta = std::min(0.02, ACInfo.cutoff[center_idx] / 4.0); + const int n_grid_point = std::ceil((ACInfo.cutoff[center_idx] + 1e-4) / delta) + 3; + for (int id = 0; id < ACInfo.Ncenters; id++) + if (my_GroupID == ACInfo.GroupID[id]) + { + ACInfo.spline_npoints[id] = n_grid_point; + ACInfo.spline_radius[id] = (n_grid_point - 1) * delta; + } + app_log() << " Based on default grid point distance " << delta << std::endl; + app_log() << " Setting 'spline_npoints' to " << ACInfo.spline_npoints[center_idx] << std::endl; + app_log() << " Setting 'spline_radius' to " << ACInfo.spline_radius[center_idx] << std::endl; + } + else + { + if (ACInfo.spline_radius[center_idx] < 0) + { + app_error() << "Hybrid orbital representation needs parameter 'spline_radius' for atom " << center_idx + << std::endl; + success = false; + } + + if (ACInfo.spline_npoints[center_idx] < 0) + { + app_error() << "Hybrid orbital representation needs parameter 'spline_npoints' for atom " << center_idx + << std::endl; + success = false; + } + } + + // check maximally allowed cutoff_radius + double max_allowed_cutoff = ACInfo.spline_radius[center_idx] - + 2.0 * ACInfo.spline_radius[center_idx] / (ACInfo.spline_npoints[center_idx] - 1); + if (success && ACInfo.cutoff[center_idx] > max_allowed_cutoff) + { + app_error() << "Hybrid orbital representation requires cutoff_radius<=" << max_allowed_cutoff + << " calculated by spline_radius-2*spline_radius/(spline_npoints-1) for atom " << center_idx + << std::endl; + success = false; + } + } + else + { + // no atomic regions for this atom type + ACInfo.spline_radius[center_idx] = 0.0; + ACInfo.spline_npoints[center_idx] = 0; + ACInfo.lmax[center_idx] = 0; + } + } + + if (!success) + spline_reader_.myComm->barrier_and_abort( + "initialize_hybridrep_atomic_centers Failed to initialize atomic centers " + "in hybrid orbital representation!"); + + for (int center_idx = 0; center_idx < ACInfo.Ncenters; center_idx++) + { + AtomicOrbitals oneCenter(ACInfo.lmax[center_idx]); + oneCenter.set_info(ACInfo.ion_pos[center_idx], ACInfo.cutoff[center_idx], ACInfo.inner_cutoff[center_idx], + ACInfo.spline_radius[center_idx], ACInfo.non_overlapping_radius[center_idx], + ACInfo.spline_npoints[center_idx]); + centers.push_back(oneCenter); + } + } +} + +template +void HybridRepSetReader::create_atomic_centers_Gspace(const Vector>& cG, + Communicate& band_group_comm, + const int iorb, + const std::complex& rotate_phase, + SA& bspline) const +{ + auto& mybuilder = spline_reader_.mybuilder; + double rotate_phase_r = rotate_phase.real(); + double rotate_phase_i = rotate_phase.imag(); + + band_group_comm.bcast(rotate_phase_r); + band_group_comm.bcast(rotate_phase_i); + //distribute G-vectors over processor groups + const int Ngvecs = mybuilder->Gvecs[0].size(); + const int Nprocs = band_group_comm.size(); + const int Ngvecgroups = std::min(Ngvecs, Nprocs); + Communicate gvec_group_comm(band_group_comm, Ngvecgroups); + std::vector gvec_groups(Ngvecgroups + 1, 0); + FairDivideLow(Ngvecs, Ngvecgroups, gvec_groups); + const int gvec_first = gvec_groups[gvec_group_comm.getGroupID()]; + const int gvec_last = gvec_groups[gvec_group_comm.getGroupID() + 1]; + + // prepare Gvecs Ylm(G) + using UnitCellType = typename EinsplineSetBuilder::UnitCellType; + Gvectors Gvecs(mybuilder->Gvecs[0], mybuilder->PrimCell, bspline.HalfG, gvec_first, gvec_last); + // if(band_group_comm.isGroupLeader()) std::cout << "print band=" << iorb << " KE=" << Gvecs.evaluate_KE(cG) << std::endl; + + std::vector>& centers = bspline.AtomicCenters; + app_log() << "Transforming band " << iorb << " on Rank 0" << std::endl; + // collect atomic centers by group + std::vector uniq_species; + for (int center_idx = 0; center_idx < centers.size(); center_idx++) + { + auto& ACInfo = mybuilder->AtomicCentersInfo; + const int my_GroupID = ACInfo.GroupID[center_idx]; + int found_idx = -1; + for (size_t idx = 0; idx < uniq_species.size(); idx++) + if (my_GroupID == uniq_species[idx]) + { + found_idx = idx; + break; + } + if (found_idx < 0) + uniq_species.push_back(my_GroupID); + } + // construct group list + std::vector> group_list(uniq_species.size()); + for (int center_idx = 0; center_idx < centers.size(); center_idx++) + { + auto& ACInfo = mybuilder->AtomicCentersInfo; + const int my_GroupID = ACInfo.GroupID[center_idx]; + for (size_t idx = 0; idx < uniq_species.size(); idx++) + if (my_GroupID == uniq_species[idx]) + { + group_list[idx].push_back(center_idx); + break; + } + } + + for (int group_idx = 0; group_idx < group_list.size(); group_idx++) + { + const auto& mygroup = group_list[group_idx]; + const double spline_radius = centers[mygroup[0]].getSplineRadius(); + const int spline_npoints = centers[mygroup[0]].getSplineNpoints(); + const int lmax = centers[mygroup[0]].getLmax(); + const double delta = spline_radius / static_cast(spline_npoints - 1); + const int lm_tot = (lmax + 1) * (lmax + 1); + const size_t natoms = mygroup.size(); + const int policy = lm_tot > natoms ? 0 : 1; + + std::vector> i_power(lm_tot); + // rotate phase is introduced here. + std::complex i_temp(rotate_phase_r, rotate_phase_i); + for (size_t l = 0; l <= lmax; l++) + { + for (size_t lm = l * l; lm < (l + 1) * (l + 1); lm++) + i_power[lm] = i_temp; + i_temp *= std::complex(0.0, 1.0); + } + + std::vector> all_vals(natoms); + std::vector>> vals_local(spline_npoints * omp_get_max_threads()); + VectorSoaContainer myRSoA(natoms); + for (size_t idx = 0; idx < natoms; idx++) + { + all_vals[idx].resize(spline_npoints, lm_tot * 2); + all_vals[idx] = 0.0; + myRSoA(idx) = centers[mygroup[idx]].getCenterPos(); + } + +#pragma omp parallel + { + const size_t tid = omp_get_thread_num(); + const size_t nt = omp_get_num_threads(); + + for (int ip = 0; ip < spline_npoints; ip++) + { + const size_t ip_idx = tid * spline_npoints + ip; + if (policy == 1) + { + vals_local[ip_idx].resize(lm_tot * 2); + for (size_t lm = 0; lm < lm_tot * 2; lm++) + { + auto& vals = vals_local[ip_idx][lm]; + vals.resize(natoms); + std::fill(vals.begin(), vals.end(), 0.0); + } + } + else + { + vals_local[ip_idx].resize(natoms * 2); + for (size_t iat = 0; iat < natoms * 2; iat++) + { + auto& vals = vals_local[ip_idx][iat]; + vals.resize(lm_tot); + std::fill(vals.begin(), vals.end(), 0.0); + } + } + } + + const size_t size_pw_tile = 32; + const size_t num_pw_tiles = (Gvecs.NumGvecs + size_pw_tile - 1) / size_pw_tile; + aligned_vector j_lm_G(lm_tot, 0.0); + std::vector> phase_shift_r(size_pw_tile); + std::vector> phase_shift_i(size_pw_tile); + std::vector> YlmG(size_pw_tile); + for (size_t ig = 0; ig < size_pw_tile; ig++) + { + phase_shift_r[ig].resize(natoms); + phase_shift_i[ig].resize(natoms); + YlmG[ig].resize(lm_tot); + } + SoaSphericalTensor Ylm(lmax); + +#pragma omp for + for (size_t tile_id = 0; tile_id < num_pw_tiles; tile_id++) + { + const size_t ig_first = tile_id * size_pw_tile; + const size_t ig_last = std::min((tile_id + 1) * size_pw_tile, Gvecs.NumGvecs); + for (size_t ig = ig_first; ig < ig_last; ig++) + { + const size_t ig_local = ig - ig_first; + // calculate phase shift for all the centers of this group + Gvecs.calc_phase_shift(myRSoA, ig, phase_shift_r[ig_local], phase_shift_i[ig_local]); + Gvecs.calc_Ylm_G(ig, Ylm, YlmG[ig_local]); + } + + for (int ip = 0; ip < spline_npoints; ip++) + { + double r = delta * static_cast(ip); + const size_t ip_idx = tid * spline_npoints + ip; + + for (size_t ig = ig_first; ig < ig_last; ig++) + { + const size_t ig_local = ig - ig_first; + // calculate spherical bessel function + Gvecs.calc_jlm_G(lmax, r, ig, j_lm_G); + for (size_t lm = 0; lm < lm_tot; lm++) + j_lm_G[lm] *= YlmG[ig_local][lm]; + + const double cG_r = cG[ig + gvec_first].real(); + const double cG_i = cG[ig + gvec_first].imag(); + if (policy == 1) + { + for (size_t lm = 0; lm < lm_tot; lm++) + { + double* restrict vals_r = vals_local[ip_idx][lm * 2].data(); + double* restrict vals_i = vals_local[ip_idx][lm * 2 + 1].data(); + const double* restrict ps_r_ptr = phase_shift_r[ig_local].data(); + const double* restrict ps_i_ptr = phase_shift_i[ig_local].data(); + double cG_j_r = cG_r * j_lm_G[lm]; + double cG_j_i = cG_i * j_lm_G[lm]; +#pragma omp simd aligned(vals_r, vals_i, ps_r_ptr, ps_i_ptr : QMC_SIMD_ALIGNMENT) + for (size_t idx = 0; idx < natoms; idx++) + { + const double ps_r = ps_r_ptr[idx]; + const double ps_i = ps_i_ptr[idx]; + vals_r[idx] += cG_j_r * ps_r - cG_j_i * ps_i; + vals_i[idx] += cG_j_i * ps_r + cG_j_r * ps_i; + } + } + } + else + { + for (size_t idx = 0; idx < natoms; idx++) + { + double* restrict vals_r = vals_local[ip_idx][idx * 2].data(); + double* restrict vals_i = vals_local[ip_idx][idx * 2 + 1].data(); + const double* restrict j_lm_G_ptr = j_lm_G.data(); + double cG_ps_r = cG_r * phase_shift_r[ig_local][idx] - cG_i * phase_shift_i[ig_local][idx]; + double cG_ps_i = cG_i * phase_shift_r[ig_local][idx] + cG_r * phase_shift_i[ig_local][idx]; +#pragma omp simd aligned(vals_r, vals_i, j_lm_G_ptr : QMC_SIMD_ALIGNMENT) + for (size_t lm = 0; lm < lm_tot; lm++) + { + const double jlm = j_lm_G_ptr[lm]; + vals_r[lm] += cG_ps_r * jlm; + vals_i[lm] += cG_ps_i * jlm; + } + } + } + } + } + } + +#pragma omp for collapse(2) + for (int ip = 0; ip < spline_npoints; ip++) + for (size_t idx = 0; idx < natoms; idx++) + { + double* vals = all_vals[idx][ip]; + for (size_t tid = 0; tid < nt; tid++) + for (size_t lm = 0; lm < lm_tot; lm++) + { + double vals_th_r, vals_th_i; + const size_t ip_idx = tid * spline_npoints + ip; + if (policy == 1) + { + vals_th_r = vals_local[ip_idx][lm * 2][idx]; + vals_th_i = vals_local[ip_idx][lm * 2 + 1][idx]; + } + else + { + vals_th_r = vals_local[ip_idx][idx * 2][lm]; + vals_th_i = vals_local[ip_idx][idx * 2 + 1][lm]; + } + const double real_tmp = 4.0 * M_PI * i_power[lm].real(); + const double imag_tmp = 4.0 * M_PI * i_power[lm].imag(); + vals[lm] += vals_th_r * real_tmp - vals_th_i * imag_tmp; + vals[lm + lm_tot] += vals_th_i * real_tmp + vals_th_r * imag_tmp; + } + } + } + //app_log() << "Building band " << iorb << " at center " << center_idx << std::endl; + + for (size_t idx = 0; idx < natoms; idx++) + { + // reduce all_vals + band_group_comm.reduce_in_place(all_vals[idx].data(), all_vals[idx].size()); + if (!band_group_comm.isGroupLeader()) + continue; +#pragma omp parallel for + for (int lm = 0; lm < lm_tot; lm++) + { + auto& mycenter = centers[mygroup[idx]]; + aligned_vector splineData_r(spline_npoints); + UBspline_1d_d* atomic_spline_r = nullptr; + for (size_t ip = 0; ip < spline_npoints; ip++) + splineData_r[ip] = all_vals[idx][ip][lm]; + atomic_spline_r = einspline::create(atomic_spline_r, 0.0, spline_radius, spline_npoints, splineData_r.data(), + ((lm == 0) || (lm > 3))); + if (!bspline.isComplex()) + { + mycenter.set_spline(atomic_spline_r, lm, iorb); + einspline::destroy(atomic_spline_r); + } + else + { + aligned_vector splineData_i(spline_npoints); + UBspline_1d_d* atomic_spline_i = nullptr; + for (size_t ip = 0; ip < spline_npoints; ip++) + splineData_i[ip] = all_vals[idx][ip][lm + lm_tot]; + atomic_spline_i = einspline::create(atomic_spline_i, 0.0, spline_radius, spline_npoints, splineData_i.data(), + ((lm == 0) || (lm > 3))); + mycenter.set_spline(atomic_spline_r, lm, iorb * 2); + mycenter.set_spline(atomic_spline_i, lm, iorb * 2 + 1); + einspline::destroy(atomic_spline_r); + einspline::destroy(atomic_spline_i); + } + } + } + } +} + +template +void HybridRepSetReader::initialize_hybrid_pio_gather(const int spin, + const BandInfoGroup& bandgroup, + SA& bspline) const +{ + auto& mybuilder = spline_reader_.mybuilder; + //distribute bands over processor groups + int Nbands = bandgroup.getNumDistinctOrbitals(); + const int Nprocs = myComm->size(); + const int Nbandgroups = std::min(Nbands, Nprocs); + Communicate band_group_comm(*myComm, Nbandgroups); + std::vector band_groups(Nbandgroups + 1, 0); + FairDivideLow(Nbands, Nbandgroups, band_groups); + int iorb_first = band_groups[band_group_comm.getGroupID()]; + int iorb_last = band_groups[band_group_comm.getGroupID() + 1]; + + app_log() << "Start transforming plane waves to 3D B-splines and atomic radial orbital 1D B-splines." << std::endl; + OneSplineOrbData oneband(mybuilder->MeshSize, bspline.HalfG, bspline.isComplex()); + hdf_archive h5f(&band_group_comm, false); + Vector> cG(mybuilder->Gvecs[0].size()); + const std::vector& cur_bands = bandgroup.myBands; + if (band_group_comm.isGroupLeader()) + h5f.open(mybuilder->H5FileName, H5F_ACC_RDONLY); + for (int iorb = iorb_first; iorb < iorb_last; iorb++) + { + if (band_group_comm.isGroupLeader()) + { + const auto& cur_band = cur_bands[bspline.BandIndexMap[iorb]]; + const int ti = cur_band.TwistIndex; + spline_reader_.readOneOrbitalCoefs(psi_g_path(ti, spin, cur_band.BandIndex), h5f, cG); + oneband.fft_spline(cG, mybuilder->Gvecs[0], mybuilder->primcell_kpoints[ti], rotate); + bspline.set_spline(&oneband.get_spline_r(), &oneband.get_spline_i(), cur_band.TwistIndex, iorb, 0); + } + band_group_comm.bcast(cG); + create_atomic_centers_Gspace(cG, band_group_comm, iorb, oneband.getRotatePhase(), bspline); + } + + myComm->barrier(); + if (band_group_comm.isGroupLeader()) + { + Timer now; + bspline.gather_tables(band_group_comm.getGroupLeaderComm()); + app_log() << " Time to gather the table = " << now.elapsed() << std::endl; + } +} + +#if defined(QMC_COMPLEX) +template class HybridRepSetReader>>; +template class HybridRepSetReader>>; +#if !defined(QMC_MIXED_PRECISION) +template class HybridRepSetReader>>; +template class HybridRepSetReader>>; +#endif +#else +template class HybridRepSetReader>>; +template class HybridRepSetReader>>; +template class HybridRepSetReader>>; +#if !defined(QMC_MIXED_PRECISION) +template class HybridRepSetReader>>; +template class HybridRepSetReader>>; +template class HybridRepSetReader>>; +#endif +#endif +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReader.h b/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReader.h index 678e4929fc..acedd177c5 100644 --- a/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReader.h +++ b/src/QMCWaveFunctions/BsplineFactory/HybridRepSetReader.h @@ -2,7 +2,7 @@ // This file is distributed under the University of Illinois/NCSA Open Source License. // See LICENSE file in top directory for details. // -// Copyright (c) 2019 QMCPACK developers. +// Copyright (c) 2023 QMCPACK developers. // // File developed by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory // @@ -12,660 +12,82 @@ /** @file * - * derived from SplineSetReader + * derived from BsplineReader */ #ifndef QMCPLUSPLUS_HYBRIDREP_READER_H #define QMCPLUSPLUS_HYBRIDREP_READER_H -#include "Numerics/Quadrature.h" -#include "Numerics/Bessel.h" -#include "QMCWaveFunctions/BsplineFactory/HybridRepCenterOrbitals.h" -#include "OhmmsData/AttributeSet.h" -#include "CPU/math.hpp" -#include "Concurrency/OpenMP.h" +#include "BsplineReader.h" +#include "SplineSetReader.h" -namespace qmcplusplus -{ -template -struct Gvectors -{ - using PosType = TinyVector; - using ValueType = std::complex; - - const LT& Lattice; - std::vector gvecs_cart; //Cartesian. - std::vector gmag; - const size_t NumGvecs; - - Gvectors(const std::vector>& gvecs_in, - const LT& Lattice_in, - const TinyVector& HalfG, - size_t first, - size_t last) - : Lattice(Lattice_in), NumGvecs(last - first) - { - gvecs_cart.resize(NumGvecs); - gmag.resize(NumGvecs); -#pragma omp parallel for - for (size_t ig = 0; ig < NumGvecs; ig++) - { - TinyVector gvec_shift; - gvec_shift = gvecs_in[ig + first] + HalfG * 0.5; - gvecs_cart[ig] = Lattice.k_cart(gvec_shift); - gmag[ig] = std::sqrt(dot(gvecs_cart[ig], gvecs_cart[ig])); - } - } - - template - void calc_Ylm_G(const size_t ig, YLM_ENGINE& Ylm, VVT& YlmG) const - { - PosType Ghat(0.0, 0.0, 1.0); - if (gmag[ig] > 0) - Ghat = gvecs_cart[ig] / gmag[ig]; - Ylm.evaluateV(Ghat[0], Ghat[1], Ghat[2], YlmG.data()); - } - - template - inline void calc_jlm_G(const int lmax, ST& r, const size_t ig, VVT& j_lm_G) const - { - bessel_steed_array_cpu(lmax, gmag[ig] * r, j_lm_G.data()); - for (size_t l = lmax; l > 0; l--) - for (size_t lm = l * l; lm < (l + 1) * (l + 1); lm++) - j_lm_G[lm] = j_lm_G[l]; - } - - template - inline void calc_phase_shift(const PT& RSoA, const size_t ig, VT& phase_shift_real, VT& phase_shift_imag) const - { - const ST* restrict px = RSoA.data(0); - const ST* restrict py = RSoA.data(1); - const ST* restrict pz = RSoA.data(2); - ST* restrict v_r = phase_shift_real.data(); - ST* restrict v_i = phase_shift_imag.data(); - const ST& gv_x = gvecs_cart[ig][0]; - const ST& gv_y = gvecs_cart[ig][1]; - const ST& gv_z = gvecs_cart[ig][2]; - -#pragma omp simd aligned(px, py, pz, v_r, v_i : QMC_SIMD_ALIGNMENT) - for (size_t iat = 0; iat < RSoA.size(); iat++) - qmcplusplus::sincos(px[iat] * gv_x + py[iat] * gv_y + pz[iat] * gv_z, v_i + iat, v_r + iat); - } - - template - ValueType evaluate_psi_r(const Vector>& cG, const PT& pos) - { - assert(cG.size() == NumGvecs); - std::complex val(0.0, 0.0); - for (size_t ig = 0; ig < NumGvecs; ig++) - { - ST s, c; - qmcplusplus::sincos(dot(gvecs_cart[ig], pos), &s, &c); - ValueType pw0(c, s); - val += cG[ig] * pw0; - } - return val; - } - - template - void evaluate_psi_r(const Vector>& cG, const PT& pos, ValueType& phi, ValueType& d2phi) - { - assert(cG.size() == NumGvecs); - d2phi = phi = 0.0; - for (size_t ig = 0; ig < NumGvecs; ig++) - { - ST s, c; - qmcplusplus::sincos(dot(gvecs_cart[ig], pos), &s, &c); - ValueType pw0(c, s); - phi += cG[ig] * pw0; - d2phi += cG[ig] * pw0 * (-dot(gvecs_cart[ig], gvecs_cart[ig])); - } - } - - double evaluate_KE(const Vector>& cG) - { - assert(cG.size() == NumGvecs); - double KE = 0; - for (size_t ig = 0; ig < NumGvecs; ig++) - KE += dot(gvecs_cart[ig], gvecs_cart[ig]) * (cG[ig].real() * cG[ig].real() + cG[ig].imag() * cG[ig].imag()); - return KE / 2.0; - } -}; +#if !defined(QMC_COMPLEX) +#include "HybridRepReal.h" +#endif +#include "HybridRepCplx.h" + +#if defined(QMC_COMPLEX) +#include "SplineC2C.h" +#include "SplineC2COMPTarget.h" +#else +#include "SplineR2R.h" +#include "SplineC2R.h" +#include "SplineC2ROMPTarget.h" +#endif +namespace qmcplusplus +{ /** General HybridRepSetReader to handle any unitcell */ template class HybridRepSetReader : public BsplineReader { using SplineReader = SplineSetReader; - using DataType = typename SplineReader::DataType; + using DataType = typename SA::DataType; SplineReader spline_reader_; public: - HybridRepSetReader(EinsplineSetBuilder* e) : BsplineReader(e), spline_reader_(e) {} + HybridRepSetReader(EinsplineSetBuilder* e); std::unique_ptr create_spline_set(const std::string& my_name, int spin, - const BandInfoGroup& bandgroup) override - { - auto bspline = std::make_unique(my_name); - app_log() << " ClassName = " << bspline->getClassName() << std::endl; - // set info for Hybrid - initialize_hybridrep_atomic_centers(*bspline); - bool foundspline = spline_reader_.createSplineDataSpaceLookforDumpFile(bandgroup, *bspline); - typename SA::HYBRIDBASE& hybrid_center_orbs = *bspline; - hybrid_center_orbs.resizeStorage(bspline->myV.size()); - if (foundspline) - { - Timer now; - hdf_archive h5f(myComm); - const auto splinefile = getSplineDumpFileName(bandgroup); - h5f.open(splinefile, H5F_ACC_RDONLY); - foundspline = bspline->read_splines(h5f); - if (foundspline) - app_log() << " Successfully restored 3D B-spline coefficients from " << splinefile << ". The reading time is " - << now.elapsed() << " sec." << std::endl; - } - - if (!foundspline) - { - hybrid_center_orbs.flush_zero(); - initialize_hybrid_pio_gather(spin, bandgroup, *bspline); - - if (saveSplineCoefs && myComm->rank() == 0) - { - Timer now; - const std::string splinefile(getSplineDumpFileName(bandgroup)); - hdf_archive h5f; - h5f.create(splinefile); - std::string classname = bspline->getClassName(); - h5f.write(classname, "class_name"); - int sizeD = sizeof(DataType); - h5f.write(sizeD, "sizeof"); - bspline->write_splines(h5f); - h5f.close(); - app_log() << " Stored spline coefficients in " << splinefile << " for potential reuse. The writing time is " - << now.elapsed() << " sec." << std::endl; - } - } - - { - Timer now; - bspline->bcast_tables(myComm); - app_log() << " Time to bcast the table = " << now.elapsed() << std::endl; - } - return bspline; - } + const BandInfoGroup& bandgroup) override; /** initialize basic parameters of atomic orbitals */ - void initialize_hybridrep_atomic_centers(SA& bspline) const - { - auto& mybuilder = spline_reader_.mybuilder; - - OhmmsAttributeSet a; - std::string scheme_name("Consistent"); - std::string s_function_name("LEKS2018"); - a.add(scheme_name, "smoothing_scheme"); - a.add(s_function_name, "smoothing_function"); - a.put(mybuilder->XMLRoot); - // assign smooth_scheme - if (scheme_name == "Consistent") - bspline.smooth_scheme = SA::smoothing_schemes::CONSISTENT; - else if (scheme_name == "SmoothAll") - bspline.smooth_scheme = SA::smoothing_schemes::SMOOTHALL; - else if (scheme_name == "SmoothPartial") - bspline.smooth_scheme = SA::smoothing_schemes::SMOOTHPARTIAL; - else - APP_ABORT("initialize_hybridrep_atomic_centers wrong smoothing_scheme name! Only allows Consistent, SmoothAll or " - "SmoothPartial."); - - // assign smooth_function - if (s_function_name == "LEKS2018") - bspline.smooth_func_id = smoothing_functions::LEKS2018; - else if (s_function_name == "coscos") - bspline.smooth_func_id = smoothing_functions::COSCOS; - else if (s_function_name == "linear") - bspline.smooth_func_id = smoothing_functions::LINEAR; - else - APP_ABORT( - "initialize_hybridrep_atomic_centers wrong smoothing_function name! Only allows LEKS2018, coscos or linear."); - app_log() << "Hybrid orbital representation uses " << scheme_name << " smoothing scheme and " << s_function_name - << " smoothing function." << std::endl; - - bspline.set_info(*(mybuilder->SourcePtcl), mybuilder->TargetPtcl, mybuilder->Super2Prim); - auto& centers = bspline.AtomicCenters; - auto& ACInfo = mybuilder->AtomicCentersInfo; - // load atomic center info only when it is not initialized - if (centers.size() == 0) - { - bool success = true; - app_log() << "Reading atomic center info for hybrid representation" << std::endl; - for (int center_idx = 0; center_idx < ACInfo.Ncenters; center_idx++) - { - const int my_GroupID = ACInfo.GroupID[center_idx]; - if (ACInfo.cutoff[center_idx] < 0) - { - app_error() << "Hybrid orbital representation needs parameter 'cutoff_radius' for atom " << center_idx - << std::endl; - success = false; - } - - if (ACInfo.inner_cutoff[center_idx] < 0) - { - const double inner_cutoff = std::max(ACInfo.cutoff[center_idx] - 0.3, 0.0); - app_log() << "Hybrid orbital representation setting 'inner_cutoff' to " << inner_cutoff << " for group " - << my_GroupID << " as atom " << center_idx << std::endl; - // overwrite the inner_cutoff of all the atoms of the same species - for (int id = 0; id < ACInfo.Ncenters; id++) - if (my_GroupID == ACInfo.GroupID[id]) - ACInfo.inner_cutoff[id] = inner_cutoff; - } - else if (ACInfo.inner_cutoff[center_idx] > ACInfo.cutoff[center_idx]) - { - app_error() << "Hybrid orbital representation 'inner_cutoff' must be smaller than 'spline_radius' for atom " - << center_idx << std::endl; - success = false; - } - - if (ACInfo.cutoff[center_idx] > 0) - { - if (ACInfo.lmax[center_idx] < 0) - { - app_error() << "Hybrid orbital representation needs parameter 'lmax' for atom " << center_idx << std::endl; - success = false; - } - - if (ACInfo.spline_radius[center_idx] < 0 && ACInfo.spline_npoints[center_idx] < 0) - { - app_log() << "Parameters 'spline_radius' and 'spline_npoints' for group " << my_GroupID << " as atom " - << center_idx << " are not specified." << std::endl; - const double delta = std::min(0.02, ACInfo.cutoff[center_idx] / 4.0); - const int n_grid_point = std::ceil((ACInfo.cutoff[center_idx] + 1e-4) / delta) + 3; - for (int id = 0; id < ACInfo.Ncenters; id++) - if (my_GroupID == ACInfo.GroupID[id]) - { - ACInfo.spline_npoints[id] = n_grid_point; - ACInfo.spline_radius[id] = (n_grid_point - 1) * delta; - } - app_log() << " Based on default grid point distance " << delta << std::endl; - app_log() << " Setting 'spline_npoints' to " << ACInfo.spline_npoints[center_idx] << std::endl; - app_log() << " Setting 'spline_radius' to " << ACInfo.spline_radius[center_idx] << std::endl; - } - else - { - if (ACInfo.spline_radius[center_idx] < 0) - { - app_error() << "Hybrid orbital representation needs parameter 'spline_radius' for atom " << center_idx - << std::endl; - success = false; - } - - if (ACInfo.spline_npoints[center_idx] < 0) - { - app_error() << "Hybrid orbital representation needs parameter 'spline_npoints' for atom " << center_idx - << std::endl; - success = false; - } - } - - // check maximally allowed cutoff_radius - double max_allowed_cutoff = ACInfo.spline_radius[center_idx] - - 2.0 * ACInfo.spline_radius[center_idx] / (ACInfo.spline_npoints[center_idx] - 1); - if (success && ACInfo.cutoff[center_idx] > max_allowed_cutoff) - { - app_error() << "Hybrid orbital representation requires cutoff_radius<=" << max_allowed_cutoff - << " calculated by spline_radius-2*spline_radius/(spline_npoints-1) for atom " << center_idx - << std::endl; - success = false; - } - } - else - { - // no atomic regions for this atom type - ACInfo.spline_radius[center_idx] = 0.0; - ACInfo.spline_npoints[center_idx] = 0; - ACInfo.lmax[center_idx] = 0; - } - } - - if (!success) - spline_reader_.myComm->barrier_and_abort( - "initialize_hybridrep_atomic_centers Failed to initialize atomic centers " - "in hybrid orbital representation!"); - - for (int center_idx = 0; center_idx < ACInfo.Ncenters; center_idx++) - { - AtomicOrbitals oneCenter(ACInfo.lmax[center_idx]); - oneCenter.set_info(ACInfo.ion_pos[center_idx], ACInfo.cutoff[center_idx], ACInfo.inner_cutoff[center_idx], - ACInfo.spline_radius[center_idx], ACInfo.non_overlapping_radius[center_idx], - ACInfo.spline_npoints[center_idx]); - centers.push_back(oneCenter); - } - } - } + void initialize_hybridrep_atomic_centers(SA& bspline) const; /** initialize construct atomic orbital radial functions from plane waves */ inline void create_atomic_centers_Gspace(const Vector>& cG, Communicate& band_group_comm, const int iorb, const std::complex& rotate_phase, - SA& bspline) const - { - auto& mybuilder = spline_reader_.mybuilder; - double rotate_phase_r = rotate_phase.real(); - double rotate_phase_i = rotate_phase.imag(); - - band_group_comm.bcast(rotate_phase_r); - band_group_comm.bcast(rotate_phase_i); - //distribute G-vectors over processor groups - const int Ngvecs = mybuilder->Gvecs[0].size(); - const int Nprocs = band_group_comm.size(); - const int Ngvecgroups = std::min(Ngvecs, Nprocs); - Communicate gvec_group_comm(band_group_comm, Ngvecgroups); - std::vector gvec_groups(Ngvecgroups + 1, 0); - FairDivideLow(Ngvecs, Ngvecgroups, gvec_groups); - const int gvec_first = gvec_groups[gvec_group_comm.getGroupID()]; - const int gvec_last = gvec_groups[gvec_group_comm.getGroupID() + 1]; - - // prepare Gvecs Ylm(G) - using UnitCellType = typename EinsplineSetBuilder::UnitCellType; - Gvectors Gvecs(mybuilder->Gvecs[0], mybuilder->PrimCell, bspline.HalfG, gvec_first, - gvec_last); - // if(band_group_comm.isGroupLeader()) std::cout << "print band=" << iorb << " KE=" << Gvecs.evaluate_KE(cG) << std::endl; - - std::vector>& centers = bspline.AtomicCenters; - app_log() << "Transforming band " << iorb << " on Rank 0" << std::endl; - // collect atomic centers by group - std::vector uniq_species; - for (int center_idx = 0; center_idx < centers.size(); center_idx++) - { - auto& ACInfo = mybuilder->AtomicCentersInfo; - const int my_GroupID = ACInfo.GroupID[center_idx]; - int found_idx = -1; - for (size_t idx = 0; idx < uniq_species.size(); idx++) - if (my_GroupID == uniq_species[idx]) - { - found_idx = idx; - break; - } - if (found_idx < 0) - uniq_species.push_back(my_GroupID); - } - // construct group list - std::vector> group_list(uniq_species.size()); - for (int center_idx = 0; center_idx < centers.size(); center_idx++) - { - auto& ACInfo = mybuilder->AtomicCentersInfo; - const int my_GroupID = ACInfo.GroupID[center_idx]; - for (size_t idx = 0; idx < uniq_species.size(); idx++) - if (my_GroupID == uniq_species[idx]) - { - group_list[idx].push_back(center_idx); - break; - } - } - - for (int group_idx = 0; group_idx < group_list.size(); group_idx++) - { - const auto& mygroup = group_list[group_idx]; - const double spline_radius = centers[mygroup[0]].getSplineRadius(); - const int spline_npoints = centers[mygroup[0]].getSplineNpoints(); - const int lmax = centers[mygroup[0]].getLmax(); - const double delta = spline_radius / static_cast(spline_npoints - 1); - const int lm_tot = (lmax + 1) * (lmax + 1); - const size_t natoms = mygroup.size(); - const int policy = lm_tot > natoms ? 0 : 1; - - std::vector> i_power(lm_tot); - // rotate phase is introduced here. - std::complex i_temp(rotate_phase_r, rotate_phase_i); - for (size_t l = 0; l <= lmax; l++) - { - for (size_t lm = l * l; lm < (l + 1) * (l + 1); lm++) - i_power[lm] = i_temp; - i_temp *= std::complex(0.0, 1.0); - } - - std::vector> all_vals(natoms); - std::vector>> vals_local(spline_npoints * omp_get_max_threads()); - VectorSoaContainer myRSoA(natoms); - for (size_t idx = 0; idx < natoms; idx++) - { - all_vals[idx].resize(spline_npoints, lm_tot * 2); - all_vals[idx] = 0.0; - myRSoA(idx) = centers[mygroup[idx]].getCenterPos(); - } - -#pragma omp parallel - { - const size_t tid = omp_get_thread_num(); - const size_t nt = omp_get_num_threads(); - - for (int ip = 0; ip < spline_npoints; ip++) - { - const size_t ip_idx = tid * spline_npoints + ip; - if (policy == 1) - { - vals_local[ip_idx].resize(lm_tot * 2); - for (size_t lm = 0; lm < lm_tot * 2; lm++) - { - auto& vals = vals_local[ip_idx][lm]; - vals.resize(natoms); - std::fill(vals.begin(), vals.end(), 0.0); - } - } - else - { - vals_local[ip_idx].resize(natoms * 2); - for (size_t iat = 0; iat < natoms * 2; iat++) - { - auto& vals = vals_local[ip_idx][iat]; - vals.resize(lm_tot); - std::fill(vals.begin(), vals.end(), 0.0); - } - } - } - - const size_t size_pw_tile = 32; - const size_t num_pw_tiles = (Gvecs.NumGvecs + size_pw_tile - 1) / size_pw_tile; - aligned_vector j_lm_G(lm_tot, 0.0); - std::vector> phase_shift_r(size_pw_tile); - std::vector> phase_shift_i(size_pw_tile); - std::vector> YlmG(size_pw_tile); - for (size_t ig = 0; ig < size_pw_tile; ig++) - { - phase_shift_r[ig].resize(natoms); - phase_shift_i[ig].resize(natoms); - YlmG[ig].resize(lm_tot); - } - SoaSphericalTensor Ylm(lmax); - -#pragma omp for - for (size_t tile_id = 0; tile_id < num_pw_tiles; tile_id++) - { - const size_t ig_first = tile_id * size_pw_tile; - const size_t ig_last = std::min((tile_id + 1) * size_pw_tile, Gvecs.NumGvecs); - for (size_t ig = ig_first; ig < ig_last; ig++) - { - const size_t ig_local = ig - ig_first; - // calculate phase shift for all the centers of this group - Gvecs.calc_phase_shift(myRSoA, ig, phase_shift_r[ig_local], phase_shift_i[ig_local]); - Gvecs.calc_Ylm_G(ig, Ylm, YlmG[ig_local]); - } - - for (int ip = 0; ip < spline_npoints; ip++) - { - double r = delta * static_cast(ip); - const size_t ip_idx = tid * spline_npoints + ip; - - for (size_t ig = ig_first; ig < ig_last; ig++) - { - const size_t ig_local = ig - ig_first; - // calculate spherical bessel function - Gvecs.calc_jlm_G(lmax, r, ig, j_lm_G); - for (size_t lm = 0; lm < lm_tot; lm++) - j_lm_G[lm] *= YlmG[ig_local][lm]; - - const double cG_r = cG[ig + gvec_first].real(); - const double cG_i = cG[ig + gvec_first].imag(); - if (policy == 1) - { - for (size_t lm = 0; lm < lm_tot; lm++) - { - double* restrict vals_r = vals_local[ip_idx][lm * 2].data(); - double* restrict vals_i = vals_local[ip_idx][lm * 2 + 1].data(); - const double* restrict ps_r_ptr = phase_shift_r[ig_local].data(); - const double* restrict ps_i_ptr = phase_shift_i[ig_local].data(); - double cG_j_r = cG_r * j_lm_G[lm]; - double cG_j_i = cG_i * j_lm_G[lm]; -#pragma omp simd aligned(vals_r, vals_i, ps_r_ptr, ps_i_ptr : QMC_SIMD_ALIGNMENT) - for (size_t idx = 0; idx < natoms; idx++) - { - const double ps_r = ps_r_ptr[idx]; - const double ps_i = ps_i_ptr[idx]; - vals_r[idx] += cG_j_r * ps_r - cG_j_i * ps_i; - vals_i[idx] += cG_j_i * ps_r + cG_j_r * ps_i; - } - } - } - else - { - for (size_t idx = 0; idx < natoms; idx++) - { - double* restrict vals_r = vals_local[ip_idx][idx * 2].data(); - double* restrict vals_i = vals_local[ip_idx][idx * 2 + 1].data(); - const double* restrict j_lm_G_ptr = j_lm_G.data(); - double cG_ps_r = cG_r * phase_shift_r[ig_local][idx] - cG_i * phase_shift_i[ig_local][idx]; - double cG_ps_i = cG_i * phase_shift_r[ig_local][idx] + cG_r * phase_shift_i[ig_local][idx]; -#pragma omp simd aligned(vals_r, vals_i, j_lm_G_ptr : QMC_SIMD_ALIGNMENT) - for (size_t lm = 0; lm < lm_tot; lm++) - { - const double jlm = j_lm_G_ptr[lm]; - vals_r[lm] += cG_ps_r * jlm; - vals_i[lm] += cG_ps_i * jlm; - } - } - } - } - } - } - -#pragma omp for collapse(2) - for (int ip = 0; ip < spline_npoints; ip++) - for (size_t idx = 0; idx < natoms; idx++) - { - double* vals = all_vals[idx][ip]; - for (size_t tid = 0; tid < nt; tid++) - for (size_t lm = 0; lm < lm_tot; lm++) - { - double vals_th_r, vals_th_i; - const size_t ip_idx = tid * spline_npoints + ip; - if (policy == 1) - { - vals_th_r = vals_local[ip_idx][lm * 2][idx]; - vals_th_i = vals_local[ip_idx][lm * 2 + 1][idx]; - } - else - { - vals_th_r = vals_local[ip_idx][idx * 2][lm]; - vals_th_i = vals_local[ip_idx][idx * 2 + 1][lm]; - } - const double real_tmp = 4.0 * M_PI * i_power[lm].real(); - const double imag_tmp = 4.0 * M_PI * i_power[lm].imag(); - vals[lm] += vals_th_r * real_tmp - vals_th_i * imag_tmp; - vals[lm + lm_tot] += vals_th_i * real_tmp + vals_th_r * imag_tmp; - } - } - } - //app_log() << "Building band " << iorb << " at center " << center_idx << std::endl; - - for (size_t idx = 0; idx < natoms; idx++) - { - // reduce all_vals - band_group_comm.reduce_in_place(all_vals[idx].data(), all_vals[idx].size()); - if (!band_group_comm.isGroupLeader()) - continue; -#pragma omp parallel for - for (int lm = 0; lm < lm_tot; lm++) - { - auto& mycenter = centers[mygroup[idx]]; - aligned_vector splineData_r(spline_npoints); - UBspline_1d_d* atomic_spline_r = nullptr; - for (size_t ip = 0; ip < spline_npoints; ip++) - splineData_r[ip] = all_vals[idx][ip][lm]; - atomic_spline_r = einspline::create(atomic_spline_r, 0.0, spline_radius, spline_npoints, splineData_r.data(), - ((lm == 0) || (lm > 3))); - if (!bspline.isComplex()) - { - mycenter.set_spline(atomic_spline_r, lm, iorb); - einspline::destroy(atomic_spline_r); - } - else - { - aligned_vector splineData_i(spline_npoints); - UBspline_1d_d* atomic_spline_i = nullptr; - for (size_t ip = 0; ip < spline_npoints; ip++) - splineData_i[ip] = all_vals[idx][ip][lm + lm_tot]; - atomic_spline_i = einspline::create(atomic_spline_i, 0.0, spline_radius, spline_npoints, - splineData_i.data(), ((lm == 0) || (lm > 3))); - mycenter.set_spline(atomic_spline_r, lm, iorb * 2); - mycenter.set_spline(atomic_spline_i, lm, iorb * 2 + 1); - einspline::destroy(atomic_spline_r); - einspline::destroy(atomic_spline_i); - } - } - } - } - } + SA& bspline) const; /** transforming planewave orbitals to 3D B-spline orbitals and 1D B-spline radial orbitals in real space. * @param spin orbital dataset spin index * @param bandgroup band info * @param bspline the spline object being worked on */ - void initialize_hybrid_pio_gather(const int spin, const BandInfoGroup& bandgroup, SA& bspline) const - { - auto& mybuilder = spline_reader_.mybuilder; - //distribute bands over processor groups - int Nbands = bandgroup.getNumDistinctOrbitals(); - const int Nprocs = myComm->size(); - const int Nbandgroups = std::min(Nbands, Nprocs); - Communicate band_group_comm(*myComm, Nbandgroups); - std::vector band_groups(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, band_groups); - int iorb_first = band_groups[band_group_comm.getGroupID()]; - int iorb_last = band_groups[band_group_comm.getGroupID() + 1]; - - app_log() << "Start transforming plane waves to 3D B-splines and atomic radial orbital 1D B-splines." << std::endl; - OneSplineOrbData oneband(mybuilder->MeshSize, bspline.HalfG, bspline.isComplex()); - hdf_archive h5f(&band_group_comm, false); - Vector> cG(mybuilder->Gvecs[0].size()); - const std::vector& cur_bands = bandgroup.myBands; - if (band_group_comm.isGroupLeader()) - h5f.open(mybuilder->H5FileName, H5F_ACC_RDONLY); - for (int iorb = iorb_first; iorb < iorb_last; iorb++) - { - if (band_group_comm.isGroupLeader()) - { - const auto& cur_band = cur_bands[bspline.BandIndexMap[iorb]]; - const int ti = cur_band.TwistIndex; - spline_reader_.readOneOrbitalCoefs(psi_g_path(ti, spin, cur_band.BandIndex), h5f, cG); - oneband.fft_spline(cG, mybuilder->Gvecs[0], mybuilder->primcell_kpoints[ti], rotate); - bspline.set_spline(&oneband.get_spline_r(), &oneband.get_spline_i(), cur_band.TwistIndex, iorb, 0); - } - band_group_comm.bcast(cG); - create_atomic_centers_Gspace(cG, band_group_comm, iorb, oneband.getRotatePhase(), bspline); - } - - myComm->barrier(); - if (band_group_comm.isGroupLeader()) - { - Timer now; - bspline.gather_tables(band_group_comm.getGroupLeaderComm()); - app_log() << " Time to gather the table = " << now.elapsed() << std::endl; - } - } + void initialize_hybrid_pio_gather(const int spin, const BandInfoGroup& bandgroup, SA& bspline) const; }; + +#if defined(QMC_COMPLEX) +extern template class HybridRepSetReader>>; +extern template class HybridRepSetReader>>; +#if !defined(QMC_MIXED_PRECISION) +extern template class HybridRepSetReader>>; +extern template class HybridRepSetReader>>; +#endif +#else +extern template class HybridRepSetReader>>; +extern template class HybridRepSetReader>>; +extern template class HybridRepSetReader>>; +#if !defined(QMC_MIXED_PRECISION) +extern template class HybridRepSetReader>>; +extern template class HybridRepSetReader>>; +extern template class HybridRepSetReader>>; +#endif +#endif } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/OneSplineOrbData.cpp b/src/QMCWaveFunctions/BsplineFactory/OneSplineOrbData.cpp new file mode 100644 index 0000000000..43481d31b6 --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/OneSplineOrbData.cpp @@ -0,0 +1,93 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2023 QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// Jeongnim Kim, jeongnim.kim@inte.com, Intel Corp. +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + + +#include "OneSplineOrbData.hpp" +#include +#include "einspline_helper.hpp" + +namespace qmcplusplus +{ +void OneSplineOrbData::create(const TinyVector& halfG) + { + const int nx = mesh_size_[0]; + const int ny = mesh_size_[1]; + const int nz = mesh_size_[2]; + //perform FFT using FFTW + FFTbox.resize(nx, ny, nz); + FFTplan = fftw_plan_dft_3d(nx, ny, nz, reinterpret_cast(FFTbox.data()), + reinterpret_cast(FFTbox.data()), +1, FFTW_ESTIMATE); + splineData_r.resize(nx, ny, nz); + if (isComplex_) + splineData_i.resize(nx, ny, nz); + + TinyVector start(0.0); + TinyVector end(1.0); + spline_r = einspline::create(spline_r, start, end, mesh_size_, halfG); + if (isComplex_) + spline_i = einspline::create(spline_i, start, end, mesh_size_, halfG); + } + + OneSplineOrbData::OneSplineOrbData(const TinyVector& mesh_size, const TinyVector& halfG, const bool isComplex) + : mesh_size_(mesh_size), isComplex_(isComplex) + { + create(halfG); + } + + OneSplineOrbData::~OneSplineOrbData() { clear(); } + + void OneSplineOrbData::clear() + { + einspline::destroy(spline_r); + einspline::destroy(spline_i); + if (FFTplan != nullptr) + fftw_destroy_plan(FFTplan); + FFTplan = nullptr; + } + + /** fft and spline cG + * @param cG psi_g to be processed + * @param ti twist index + * @param iorb orbital index + * + * Perform FFT and spline to spline_r and spline_i + */ + void OneSplineOrbData::fft_spline(const Vector>& cG, + const std::vector>& gvecs, + const TinyVector& primcell_kpoint, + const bool rotate) + { + unpack4fftw(cG, gvecs, mesh_size_, FFTbox); + fftw_execute(FFTplan); + if (isComplex_) + { + if (rotate) + fix_phase_rotate_c2c(FFTbox, splineData_r, splineData_i, primcell_kpoint, rotate_phase_r, rotate_phase_i); + else + { + split_real_components_c2c(FFTbox, splineData_r, splineData_i); + rotate_phase_r = 1.0; + rotate_phase_i = 0.0; + } + einspline::set(spline_r, splineData_r.data()); + einspline::set(spline_i, splineData_i.data()); + } + else + { + fix_phase_rotate_c2r(FFTbox, splineData_r, primcell_kpoint, rotate_phase_r, rotate_phase_i); + einspline::set(spline_r, splineData_r.data()); + } + } +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/OneSplineOrbData.hpp b/src/QMCWaveFunctions/BsplineFactory/OneSplineOrbData.hpp new file mode 100644 index 0000000000..9e785d5d37 --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/OneSplineOrbData.hpp @@ -0,0 +1,66 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2023 QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// Jeongnim Kim, jeongnim.kim@inte.com, Intel Corp. +// +// File created by: Ye Luo, yeluo@anl.gov, Argonne National Laboratory +////////////////////////////////////////////////////////////////////////////////////// + + +#ifndef QMCPLUSPLUS_ONESPLINEORBDATA_H +#define QMCPLUSPLUS_ONESPLINEORBDATA_H + +#include +#include +#include +#include +#include + +namespace qmcplusplus +{ +class OneSplineOrbData +{ + Array, 3> FFTbox; + Array splineData_r, splineData_i; + double rotate_phase_r, rotate_phase_i; + UBspline_3d_d* spline_r = nullptr; + UBspline_3d_d* spline_i = nullptr; + fftw_plan FFTplan = nullptr; + + const TinyVector& mesh_size_; + const bool isComplex_; + + void create(const TinyVector& halfG); + +public: + OneSplineOrbData(const TinyVector& mesh_size, const TinyVector& halfG, const bool isComplex); + + ~OneSplineOrbData(); + + auto getRotatePhase() const { return std::complex(rotate_phase_r, rotate_phase_i); } + auto& get_spline_r() { return *spline_r; } + auto& get_spline_i() { return *spline_i; } + + void clear(); + + /** fft and spline cG + * @param cG psi_g to be processed + * @param ti twist index + * @param iorb orbital index + * + * Perform FFT and spline to spline_r and spline_i + */ + void fft_spline(const Vector>& cG, + const std::vector>& gvecs, + const TinyVector& primcell_kpoint, + const bool rotate); +}; +} // namespace qmcplusplus +#endif diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.cpp b/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.cpp new file mode 100644 index 0000000000..bf37503294 --- /dev/null +++ b/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.cpp @@ -0,0 +1,217 @@ +////////////////////////////////////////////////////////////////////////////////////// +// This file is distributed under the University of Illinois/NCSA Open Source License. +// See LICENSE file in top directory for details. +// +// Copyright (c) 2023 QMCPACK developers. +// +// File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign +// Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +// Ye Luo, yeluo@anl.gov, Argonne National Laboratory +// Mark A. Berrill, berrillma@ornl.gov, Oak Ridge National Laboratory +// Jeongnim Kim, jeongnim.kim@inte.com, Intel Corp. +// +// File created by: Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign +////////////////////////////////////////////////////////////////////////////////////// + + +#include "SplineSetReader.h" +#include "OneSplineOrbData.hpp" +#include "Utilities/FairDivide.h" +#include "einspline_helper.hpp" +#include +#if defined(QMC_COMPLEX) +#include "SplineC2C.h" +#include "SplineC2COMPTarget.h" +#else +#include "SplineR2R.h" +#include "SplineC2R.h" +#include "SplineC2ROMPTarget.h" +#endif + +namespace qmcplusplus +{ +template +SplineSetReader::SplineSetReader(EinsplineSetBuilder* e) : BsplineReader(e) +{} + +template +std::unique_ptr SplineSetReader::create_spline_set(const std::string& my_name, + int spin, + const BandInfoGroup& bandgroup) +{ + auto bspline = std::make_unique(my_name); + app_log() << " ClassName = " << bspline->getClassName() << std::endl; + bool foundspline = createSplineDataSpaceLookforDumpFile(bandgroup, *bspline); + if (foundspline) + { + Timer now; + hdf_archive h5f(myComm); + const auto splinefile = getSplineDumpFileName(bandgroup); + h5f.open(splinefile, H5F_ACC_RDONLY); + foundspline = bspline->read_splines(h5f); + if (foundspline) + app_log() << " Successfully restored 3D B-spline coefficients from " << splinefile << ". The reading time is " + << now.elapsed() << " sec." << std::endl; + } + + if (!foundspline) + { + bspline->flush_zero(); + + Timer now; + initialize_spline_pio_gather(spin, bandgroup, *bspline); + app_log() << " SplineSetReader initialize_spline_pio " << now.elapsed() << " sec" << std::endl; + + if (saveSplineCoefs && myComm->rank() == 0) + { + Timer now; + const std::string splinefile(getSplineDumpFileName(bandgroup)); + hdf_archive h5f; + h5f.create(splinefile); + std::string classname = bspline->getClassName(); + h5f.write(classname, "class_name"); + int sizeD = sizeof(typename SA::DataType); + h5f.write(sizeD, "sizeof"); + bspline->write_splines(h5f); + h5f.close(); + app_log() << " Stored spline coefficients in " << splinefile << " for potential reuse. The writing time is " + << now.elapsed() << " sec." << std::endl; + } + } + + { + Timer now; + bspline->bcast_tables(myComm); + app_log() << " Time to bcast the table = " << now.elapsed() << std::endl; + } + + return bspline; +} + +template +bool SplineSetReader::createSplineDataSpaceLookforDumpFile(const BandInfoGroup& bandgroup, SA& bspline) const +{ + if (bspline.isComplex()) + app_log() << " Using complex einspline table" << std::endl; + else + app_log() << " Using real einspline table" << std::endl; + + //baseclass handles twists + check_twists(bspline, bandgroup); + + Ugrid xyz_grid[3]; + + typename SA::BCType xyz_bc[3]; + bool havePsig = set_grid(bspline.HalfG, xyz_grid, xyz_bc); + if (!havePsig) + myComm->barrier_and_abort("SplineSetReader needs psi_g. Set precision=\"double\"."); + bspline.create_spline(xyz_grid, xyz_bc); + + int foundspline = 0; + Timer now; + if (myComm->rank() == 0) + { + now.restart(); + hdf_archive h5f(myComm); + foundspline = h5f.open(getSplineDumpFileName(bandgroup), H5F_ACC_RDONLY); + if (foundspline) + { + std::string aname("none"); + foundspline = h5f.readEntry(aname, "class_name"); + foundspline = (aname.find(bspline.getKeyword()) != std::string::npos); + } + if (foundspline) + { + int sizeD = 0; + foundspline = h5f.readEntry(sizeD, "sizeof"); + foundspline = (sizeD == sizeof(typename SA::DataType)); + } + h5f.close(); + } + myComm->bcast(foundspline); + return foundspline; +} + +template +void SplineSetReader::readOneOrbitalCoefs(const std::string& s, + hdf_archive& h5f, + Vector>& cG) const +{ + if (!h5f.readEntry(cG, s)) + { + std::ostringstream msg; + msg << "SplineSetReader Failed to read band(s) from h5 file. " + << "Attempted dataset " << s << " with " << cG.size() << " complex numbers." << std::endl; + throw std::runtime_error(msg.str()); + } + double total_norm = compute_norm(cG); + if ((checkNorm) && (std::abs(total_norm - 1.0) > PW_COEFF_NORM_TOLERANCE)) + { + std::ostringstream msg; + msg << "SplineSetReader The orbital dataset " << s << " has a wrong norm " << total_norm + << ", computed from plane wave coefficients!" << std::endl + << "This may indicate a problem with the HDF5 library versions used " + << "during wavefunction conversion or read." << std::endl; + throw std::runtime_error(msg.str()); + } +} + +template +void SplineSetReader::initialize_spline_pio_gather(const int spin, + const BandInfoGroup& bandgroup, + SA& bspline) const +{ + //distribute bands over processor groups + int Nbands = bandgroup.getNumDistinctOrbitals(); + const int Nprocs = myComm->size(); + const int Nbandgroups = std::min(Nbands, Nprocs); + Communicate band_group_comm(*myComm, Nbandgroups); + std::vector band_groups(Nbandgroups + 1, 0); + FairDivideLow(Nbands, Nbandgroups, band_groups); + int iorb_first = band_groups[band_group_comm.getGroupID()]; + int iorb_last = band_groups[band_group_comm.getGroupID() + 1]; + + app_log() << "Start transforming plane waves to 3D B-Splines." << std::endl; + OneSplineOrbData oneband(mybuilder->MeshSize, bspline.HalfG, bspline.isComplex()); + hdf_archive h5f(&band_group_comm, false); + Vector> cG(mybuilder->Gvecs[0].size()); + const std::vector& cur_bands = bandgroup.myBands; + if (band_group_comm.isGroupLeader()) + { + h5f.open(mybuilder->H5FileName, H5F_ACC_RDONLY); + for (int iorb = iorb_first; iorb < iorb_last; iorb++) + { + const auto& cur_band = cur_bands[bspline.BandIndexMap[iorb]]; + const int ti = cur_band.TwistIndex; + readOneOrbitalCoefs(psi_g_path(ti, spin, cur_band.BandIndex), h5f, cG); + oneband.fft_spline(cG, mybuilder->Gvecs[0], mybuilder->primcell_kpoints[ti], rotate); + bspline.set_spline(&oneband.get_spline_r(), &oneband.get_spline_i(), cur_band.TwistIndex, iorb, 0); + } + + { + band_group_comm.getGroupLeaderComm()->barrier(); + Timer now; + bspline.gather_tables(band_group_comm.getGroupLeaderComm()); + app_log() << " Time to gather the table = " << now.elapsed() << std::endl; + } + } +} + +#if defined(QMC_COMPLEX) +template class SplineSetReader>; +template class SplineSetReader>; +#if !defined(QMC_MIXED_PRECISION) +template class SplineSetReader>; +template class SplineSetReader>; +#endif +#else +template class SplineSetReader>; +template class SplineSetReader>; +template class SplineSetReader>; +#if !defined(QMC_MIXED_PRECISION) +template class SplineSetReader>; +template class SplineSetReader>; +template class SplineSetReader>; +#endif +#endif +} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h b/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h index 526ed8cee7..30a0edd55e 100644 --- a/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h +++ b/src/QMCWaveFunctions/BsplineFactory/SplineSetReader.h @@ -2,7 +2,7 @@ // This file is distributed under the University of Illinois/NCSA Open Source License. // See LICENSE file in top directory for details. // -// Copyright (c) 2019 QMCPACK developers. +// Copyright (c) 2023 QMCPACK developers. // // File developed by: Jeremy McMinnis, jmcminis@gmail.com, University of Illinois at Urbana-Champaign // Jeongnim Kim, jeongnim.kim@gmail.com, University of Illinois at Urbana-Champaign @@ -25,100 +25,26 @@ */ #ifndef QMCPLUSPLUS_SPLINESET_READER_H #define QMCPLUSPLUS_SPLINESET_READER_H -#include "mpi/collectives.h" -#include "mpi/point2point.h" -#include "Utilities/FairDivide.h" + +#include "BsplineReader.h" namespace qmcplusplus { -class OneSplineOrbData -{ - Array, 3> FFTbox; - Array splineData_r, splineData_i; - double rotate_phase_r, rotate_phase_i; - UBspline_3d_d* spline_r = nullptr; - UBspline_3d_d* spline_i = nullptr; - fftw_plan FFTplan = nullptr; - - const TinyVector& mesh_size_; - const bool isComplex_; - - void create(const TinyVector& halfG) - { - const int nx = mesh_size_[0]; - const int ny = mesh_size_[1]; - const int nz = mesh_size_[2]; - //perform FFT using FFTW - FFTbox.resize(nx, ny, nz); - FFTplan = fftw_plan_dft_3d(nx, ny, nz, reinterpret_cast(FFTbox.data()), - reinterpret_cast(FFTbox.data()), +1, FFTW_ESTIMATE); - splineData_r.resize(nx, ny, nz); - if (isComplex_) - splineData_i.resize(nx, ny, nz); - - TinyVector start(0.0); - TinyVector end(1.0); - spline_r = einspline::create(spline_r, start, end, mesh_size_, halfG); - if (isComplex_) - spline_i = einspline::create(spline_i, start, end, mesh_size_, halfG); - } - -public: - OneSplineOrbData(const TinyVector& mesh_size, const TinyVector& halfG, const bool isComplex) - : mesh_size_(mesh_size), isComplex_(isComplex) - { - create(halfG); - } - ~OneSplineOrbData() { clear(); } - - auto getRotatePhase() const { return std::complex(rotate_phase_r, rotate_phase_i); } - auto& get_spline_r() { return *spline_r; } - auto& get_spline_i() { return *spline_i; } - - void clear() - { - einspline::destroy(spline_r); - einspline::destroy(spline_i); - if (FFTplan != nullptr) - fftw_destroy_plan(FFTplan); - FFTplan = nullptr; - } - - /** fft and spline cG - * @param cG psi_g to be processed - * @param ti twist index - * @param iorb orbital index - * - * Perform FFT and spline to spline_r and spline_i - */ - void fft_spline(const Vector>& cG, - const std::vector>& gvecs, - const TinyVector& primcell_kpoint, - const bool rotate) - { - unpack4fftw(cG, gvecs, mesh_size_, FFTbox); - fftw_execute(FFTplan); - if (isComplex_) - { - if (rotate) - fix_phase_rotate_c2c(FFTbox, splineData_r, splineData_i, primcell_kpoint, rotate_phase_r, rotate_phase_i); - else - { - split_real_components_c2c(FFTbox, splineData_r, splineData_i); - rotate_phase_r = 1.0; - rotate_phase_i = 0.0; - } - einspline::set(spline_r, splineData_r.data()); - einspline::set(spline_i, splineData_i.data()); - } - else - { - fix_phase_rotate_c2r(FFTbox, splineData_r, primcell_kpoint, rotate_phase_r, rotate_phase_i); - einspline::set(spline_r, splineData_r.data()); - } - } -}; +// forward declaration +#if defined(QMC_COMPLEX) +template +class SplineC2C; +template +class SplineC2COMPTarget; +#else +template +class SplineR2R; +template +class SplineC2R; +template +class SplineC2ROMPTarget; +#endif /** General SplineSetReader to handle any unitcell */ @@ -126,180 +52,51 @@ template class SplineSetReader : public BsplineReader { public: - using DataType = typename SA::DataType; - using SplineType = typename SA::SplineType; - - SplineSetReader(EinsplineSetBuilder* e) : BsplineReader(e) {} + SplineSetReader(EinsplineSetBuilder* e); std::unique_ptr create_spline_set(const std::string& my_name, int spin, - const BandInfoGroup& bandgroup) override - { - auto bspline = std::make_unique(my_name); - app_log() << " ClassName = " << bspline->getClassName() << std::endl; - bool foundspline = createSplineDataSpaceLookforDumpFile(bandgroup, *bspline); - if (foundspline) - { - Timer now; - hdf_archive h5f(myComm); - const auto splinefile = getSplineDumpFileName(bandgroup); - h5f.open(splinefile, H5F_ACC_RDONLY); - foundspline = bspline->read_splines(h5f); - if (foundspline) - app_log() << " Successfully restored 3D B-spline coefficients from " << splinefile << ". The reading time is " - << now.elapsed() << " sec." << std::endl; - } - - if (!foundspline) - { - bspline->flush_zero(); - - Timer now; - initialize_spline_pio_gather(spin, bandgroup, *bspline); - app_log() << " SplineSetReader initialize_spline_pio " << now.elapsed() << " sec" << std::endl; - - if (saveSplineCoefs && myComm->rank() == 0) - { - Timer now; - const std::string splinefile(getSplineDumpFileName(bandgroup)); - hdf_archive h5f; - h5f.create(splinefile); - std::string classname = bspline->getClassName(); - h5f.write(classname, "class_name"); - int sizeD = sizeof(DataType); - h5f.write(sizeD, "sizeof"); - bspline->write_splines(h5f); - h5f.close(); - app_log() << " Stored spline coefficients in " << splinefile << " for potential reuse. The writing time is " - << now.elapsed() << " sec." << std::endl; - } - } - - { - Timer now; - bspline->bcast_tables(myComm); - app_log() << " Time to bcast the table = " << now.elapsed() << std::endl; - } - - return bspline; - } + const BandInfoGroup& bandgroup) override; /** create data space in the spline object and try open spline dump files. * @param bandgroup band info * @param bspline the spline object being worked on * @return true if dumpfile pass class name and data type size check */ - bool createSplineDataSpaceLookforDumpFile(const BandInfoGroup& bandgroup, SA& bspline) const - { - if (bspline.isComplex()) - app_log() << " Using complex einspline table" << std::endl; - else - app_log() << " Using real einspline table" << std::endl; - - //baseclass handles twists - check_twists(bspline, bandgroup); - - Ugrid xyz_grid[3]; - - typename SA::BCType xyz_bc[3]; - bool havePsig = set_grid(bspline.HalfG, xyz_grid, xyz_bc); - if (!havePsig) - myComm->barrier_and_abort("SplineSetReader needs psi_g. Set precision=\"double\"."); - bspline.create_spline(xyz_grid, xyz_bc); - - int foundspline = 0; - Timer now; - if (myComm->rank() == 0) - { - now.restart(); - hdf_archive h5f(myComm); - foundspline = h5f.open(getSplineDumpFileName(bandgroup), H5F_ACC_RDONLY); - if (foundspline) - { - std::string aname("none"); - foundspline = h5f.readEntry(aname, "class_name"); - foundspline = (aname.find(bspline.getKeyword()) != std::string::npos); - } - if (foundspline) - { - int sizeD = 0; - foundspline = h5f.readEntry(sizeD, "sizeof"); - foundspline = (sizeD == sizeof(DataType)); - } - h5f.close(); - } - myComm->bcast(foundspline); - return foundspline; - } + bool createSplineDataSpaceLookforDumpFile(const BandInfoGroup& bandgroup, SA& bspline) const; /** read planewave coefficients from h5 file * @param s data set full path in h5 * @param h5f hdf5 file handle * @param cG vector to store coefficients */ - void readOneOrbitalCoefs(const std::string& s, hdf_archive& h5f, Vector>& cG) const - { - if (!h5f.readEntry(cG, s)) - { - std::ostringstream msg; - msg << "SplineSetReader Failed to read band(s) from h5 file. " << "Attempted dataset " << s << " with " - << cG.size() << " complex numbers." << std::endl; - throw std::runtime_error(msg.str()); - } - double total_norm = compute_norm(cG); - if ((checkNorm) && (std::abs(total_norm - 1.0) > PW_COEFF_NORM_TOLERANCE)) - { - std::ostringstream msg; - msg << "SplineSetReader The orbital dataset " << s << " has a wrong norm " << total_norm - << ", computed from plane wave coefficients!" << std::endl - << "This may indicate a problem with the HDF5 library versions used " - << "during wavefunction conversion or read." << std::endl; - throw std::runtime_error(msg.str()); - } - } + void readOneOrbitalCoefs(const std::string& s, hdf_archive& h5f, Vector>& cG) const; /** transforming planewave orbitals to 3D B-spline orbitals in real space. * @param spin orbital dataset spin index * @param bandgroup band info * @param bspline the spline object being worked on */ - void initialize_spline_pio_gather(const int spin, const BandInfoGroup& bandgroup, SA& bspline) const - { - //distribute bands over processor groups - int Nbands = bandgroup.getNumDistinctOrbitals(); - const int Nprocs = myComm->size(); - const int Nbandgroups = std::min(Nbands, Nprocs); - Communicate band_group_comm(*myComm, Nbandgroups); - std::vector band_groups(Nbandgroups + 1, 0); - FairDivideLow(Nbands, Nbandgroups, band_groups); - int iorb_first = band_groups[band_group_comm.getGroupID()]; - int iorb_last = band_groups[band_group_comm.getGroupID() + 1]; + void initialize_spline_pio_gather(const int spin, const BandInfoGroup& bandgroup, SA& bspline) const; +}; - app_log() << "Start transforming plane waves to 3D B-Splines." << std::endl; - OneSplineOrbData oneband(mybuilder->MeshSize, bspline.HalfG, bspline.isComplex()); - hdf_archive h5f(&band_group_comm, false); - Vector> cG(mybuilder->Gvecs[0].size()); - const std::vector& cur_bands = bandgroup.myBands; - if (band_group_comm.isGroupLeader()) - { - h5f.open(mybuilder->H5FileName, H5F_ACC_RDONLY); - for (int iorb = iorb_first; iorb < iorb_last; iorb++) - { - const auto& cur_band = cur_bands[bspline.BandIndexMap[iorb]]; - const int ti = cur_band.TwistIndex; - readOneOrbitalCoefs(psi_g_path(ti, spin, cur_band.BandIndex), h5f, cG); - oneband.fft_spline(cG, mybuilder->Gvecs[0], mybuilder->primcell_kpoints[ti], rotate); - bspline.set_spline(&oneband.get_spline_r(), &oneband.get_spline_i(), cur_band.TwistIndex, iorb, 0); - } +#if defined(QMC_COMPLEX) +extern template class SplineSetReader>; +extern template class SplineSetReader>; +#if !defined(QMC_MIXED_PRECISION) +extern template class SplineSetReader>; +extern template class SplineSetReader>; +#endif +#else +extern template class SplineSetReader>; +extern template class SplineSetReader>; +extern template class SplineSetReader>; +#if !defined(QMC_MIXED_PRECISION) +extern template class SplineSetReader>; +extern template class SplineSetReader>; +extern template class SplineSetReader>; +#endif +#endif - { - band_group_comm.getGroupLeaderComm()->barrier(); - Timer now; - bspline.gather_tables(band_group_comm.getGroupLeaderComm()); - app_log() << " Time to gather the table = " << now.elapsed() << std::endl; - } - } - } -}; } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/createComplexDouble.cpp b/src/QMCWaveFunctions/BsplineFactory/createBsplineComplex.cpp similarity index 65% rename from src/QMCWaveFunctions/BsplineFactory/createComplexDouble.cpp rename to src/QMCWaveFunctions/BsplineFactory/createBsplineComplex.cpp index 72f5be8b89..cfaef667eb 100644 --- a/src/QMCWaveFunctions/BsplineFactory/createComplexDouble.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/createBsplineComplex.cpp @@ -12,43 +12,33 @@ #include "QMCWaveFunctions/BsplineFactory/createBsplineReader.h" #include -#include "CPU/e2iphi.h" -#include "CPU/SIMD/vmath.hpp" -#include "Utilities/ProgressReportEngine.h" -#include "EinsplineSetBuilder.h" -#include "BsplineSet.h" -#include "SplineC2R.h" -#include "SplineC2C.h" -#include "SplineC2ROMPTarget.h" -#include "SplineC2COMPTarget.h" -#include "HybridRepCplx.h" -#include -#include "einspline_helper.hpp" -#include "BsplineReader.h" #include "SplineSetReader.h" #include "HybridRepSetReader.h" namespace qmcplusplus { -std::unique_ptr createBsplineComplexDouble(EinsplineSetBuilder* e, - bool hybrid_rep, - const std::string& useGPU) +/** create a reader which handles complex (double size real) splines, C2R or C2C case + * spline storage and computation precision is ST + */ +template +std::unique_ptr createBsplineComplex(EinsplineSetBuilder* e, bool hybrid_rep, const std::string& useGPU) { using RealType = OHMMS_PRECISION; std::unique_ptr aReader; #if defined(QMC_COMPLEX) - app_summary() << " Using complex valued spline SPOs with complex double precision storage (C2C)." << std::endl; + app_summary() << " Using complex valued spline SPOs with complex " << SplineStoragePrecision::value + << " precision storage (C2C)." << std::endl; if (CPUOMPTargetSelector::selectPlatform(useGPU) == PlatformKind::OMPTARGET) { app_summary() << " Running OpenMP offload code path." << std::endl; if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else - aReader = std::make_unique>>(e); + aReader = std::make_unique>>(e); } else { @@ -56,23 +46,24 @@ std::unique_ptr createBsplineComplexDouble(EinsplineSetBuilder* e if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else - aReader = std::make_unique>>(e); + aReader = std::make_unique>>(e); } #else //QMC_COMPLEX - app_summary() << " Using real valued spline SPOs with complex double precision storage (C2R)." << std::endl; + app_summary() << " Using real valued spline SPOs with complex " << SplineStoragePrecision::value + << " precision storage (C2R)." << std::endl; if (CPUOMPTargetSelector::selectPlatform(useGPU) == PlatformKind::OMPTARGET) { app_summary() << " Running OpenMP offload code path." << std::endl; if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else - aReader = std::make_unique>>(e); + aReader = std::make_unique>>(e); } else { @@ -80,13 +71,24 @@ std::unique_ptr createBsplineComplexDouble(EinsplineSetBuilder* e if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else - aReader = std::make_unique>>(e); + aReader = std::make_unique>>(e); } #endif - return aReader; } + +std::unique_ptr createBsplineComplex(EinsplineSetBuilder* e, + bool use_single, + bool hybrid_rep, + const std::string& useGPU) +{ + if (use_single) + return createBsplineComplex(e, hybrid_rep, useGPU); + else + return createBsplineComplex(e, hybrid_rep, useGPU); +} + } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/createBsplineReader.h b/src/QMCWaveFunctions/BsplineFactory/createBsplineReader.h index cd33a5bd36..e23ec218c1 100644 --- a/src/QMCWaveFunctions/BsplineFactory/createBsplineReader.h +++ b/src/QMCWaveFunctions/BsplineFactory/createBsplineReader.h @@ -22,33 +22,36 @@ namespace qmcplusplus struct BsplineReader; class EinsplineSetBuilder; -/** create a reader which handles complex (double size real) splines, C2R or C2C case - * spline storage and computation precision is double - */ -std::unique_ptr createBsplineComplexDouble(EinsplineSetBuilder* e, - bool hybrid_rep, - const std::string& useGPU); +template +struct SplineStoragePrecision; -/** create a reader which handles complex (double size real) splines, C2R or C2C case - * spline storage and computation precision is float - */ -std::unique_ptr createBsplineComplexSingle(EinsplineSetBuilder* e, - bool hybrid_rep, - const std::string& useGPU); +template<> +struct SplineStoragePrecision +{ + constexpr static std::string_view value = "single"; +}; -/** create a reader which handles real splines, R2R case +template<> +struct SplineStoragePrecision +{ + constexpr static std::string_view value = "double"; +}; + +/** create a reader which handles complex (double size real) splines, C2R or C2C case * spline storage and computation precision is double */ -std::unique_ptr createBsplineRealDouble(EinsplineSetBuilder* e, - bool hybrid_rep, - const std::string& useGPU); +std::unique_ptr createBsplineComplex(EinsplineSetBuilder* e, + bool use_single, + bool hybrid_rep, + const std::string& useGPU); /** create a reader which handles real splines, R2R case - * spline storage and computation precision is float + * spline storage and computation precision is double */ -std::unique_ptr createBsplineRealSingle(EinsplineSetBuilder* e, - bool hybrid_rep, - const std::string& useGPU); +std::unique_ptr createBsplineReal(EinsplineSetBuilder* e, + bool use_single, + bool hybrid_rep, + const std::string& useGPU); } // namespace qmcplusplus #endif diff --git a/src/QMCWaveFunctions/BsplineFactory/createRealDouble.cpp b/src/QMCWaveFunctions/BsplineFactory/createBsplineReal.cpp similarity index 56% rename from src/QMCWaveFunctions/BsplineFactory/createRealDouble.cpp rename to src/QMCWaveFunctions/BsplineFactory/createBsplineReal.cpp index 73f0844aec..bd275adfd4 100644 --- a/src/QMCWaveFunctions/BsplineFactory/createRealDouble.cpp +++ b/src/QMCWaveFunctions/BsplineFactory/createBsplineReal.cpp @@ -12,24 +12,20 @@ #include "QMCWaveFunctions/BsplineFactory/createBsplineReader.h" #include -#include "Utilities/ProgressReportEngine.h" -#include "EinsplineSetBuilder.h" -#include "BsplineSet.h" -#include "SplineR2R.h" -#include "HybridRepReal.h" -#include -#include "einspline_helper.hpp" -#include "BsplineReader.h" #include "SplineSetReader.h" #include "HybridRepSetReader.h" namespace qmcplusplus { -std::unique_ptr createBsplineRealDouble(EinsplineSetBuilder* e, - bool hybrid_rep, - const std::string& useGPU) +/** create a reader which handles real splines, R2R case + * spline storage and computation precision is ST + */ +template +std::unique_ptr createBsplineReal(EinsplineSetBuilder* e, bool hybrid_rep, const std::string& useGPU) { - app_summary() << " Using real valued spline SPOs with real double precision storage (R2R)." << std::endl; + app_summary() + << " Using real valued spline SPOs with real " << SplineStoragePrecision::value + << " precision storage (R2R)." << std::endl; if (CPUOMPTargetSelector::selectPlatform(useGPU) == PlatformKind::OMPTARGET) app_summary() << "OpenMP offload has not been implemented to support real valued spline SPOs with real storage!" << std::endl; @@ -39,10 +35,22 @@ std::unique_ptr createBsplineRealDouble(EinsplineSetBuilder* e, if (hybrid_rep) { app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); + aReader = std::make_unique>>>(e); } else - aReader = std::make_unique>>(e); + aReader = std::make_unique>>(e); return aReader; } + +std::unique_ptr createBsplineReal(EinsplineSetBuilder* e, + bool use_single, + bool hybrid_rep, + const std::string& useGPU) +{ + if (use_single) + return createBsplineReal(e, hybrid_rep, useGPU); + else + return createBsplineReal(e, hybrid_rep, useGPU); +} + } // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/createComplexSingle.cpp b/src/QMCWaveFunctions/BsplineFactory/createComplexSingle.cpp deleted file mode 100644 index 8618b863bb..0000000000 --- a/src/QMCWaveFunctions/BsplineFactory/createComplexSingle.cpp +++ /dev/null @@ -1,91 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: -// -// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. -////////////////////////////////////////////////////////////////////////////////////// - - -#include "QMCWaveFunctions/BsplineFactory/createBsplineReader.h" -#include -#include "CPU/e2iphi.h" -#include "CPU/SIMD/vmath.hpp" -#include "Utilities/ProgressReportEngine.h" -#include "EinsplineSetBuilder.h" -#include "BsplineSet.h" -#include "SplineC2R.h" -#include "SplineC2C.h" -#include "SplineC2ROMPTarget.h" -#include "SplineC2COMPTarget.h" -#include "HybridRepCplx.h" -#include -#include "einspline_helper.hpp" -#include "BsplineReader.h" -#include "SplineSetReader.h" -#include "HybridRepSetReader.h" - -namespace qmcplusplus -{ -std::unique_ptr createBsplineComplexSingle(EinsplineSetBuilder* e, - bool hybrid_rep, - const std::string& useGPU) -{ - using RealType = OHMMS_PRECISION; - std::unique_ptr aReader; - -#if defined(QMC_COMPLEX) - app_summary() << " Using complex valued spline SPOs with complex single precision storage (C2C)." << std::endl; - if (CPUOMPTargetSelector::selectPlatform(useGPU) == PlatformKind::OMPTARGET) - { - app_summary() << " Running OpenMP offload code path." << std::endl; - if (hybrid_rep) - { - app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); - } - else - aReader = std::make_unique>>(e); - } - else - { - app_summary() << " Running on CPU." << std::endl; - if (hybrid_rep) - { - app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); - } - else - aReader = std::make_unique>>(e); - } -#else //QMC_COMPLEX - app_summary() << " Using real valued spline SPOs with complex single precision storage (C2R)." << std::endl; - if (CPUOMPTargetSelector::selectPlatform(useGPU) == PlatformKind::OMPTARGET) - { - app_summary() << " Running OpenMP offload code path." << std::endl; - if (hybrid_rep) - { - app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); - } - else - aReader = std::make_unique>>(e); - } - else - { - app_summary() << " Running on CPU." << std::endl; - if (hybrid_rep) - { - app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); - } - else - aReader = std::make_unique>>(e); - } -#endif - return aReader; -} -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/BsplineFactory/createRealSingle.cpp b/src/QMCWaveFunctions/BsplineFactory/createRealSingle.cpp deleted file mode 100644 index 8f99ebfc4f..0000000000 --- a/src/QMCWaveFunctions/BsplineFactory/createRealSingle.cpp +++ /dev/null @@ -1,48 +0,0 @@ -////////////////////////////////////////////////////////////////////////////////////// -// This file is distributed under the University of Illinois/NCSA Open Source License. -// See LICENSE file in top directory for details. -// -// Copyright (c) 2016 Jeongnim Kim and QMCPACK developers. -// -// File developed by: -// -// File created by: Jeongnim Kim, jeongnim.kim@intel.com, Intel Corp. -////////////////////////////////////////////////////////////////////////////////////// - - -#include "QMCWaveFunctions/BsplineFactory/createBsplineReader.h" -#include -#include "Utilities/ProgressReportEngine.h" -#include "EinsplineSetBuilder.h" -#include "BsplineSet.h" -#include "SplineR2R.h" -#include "HybridRepReal.h" -#include -#include "einspline_helper.hpp" -#include "BsplineReader.h" -#include "SplineSetReader.h" -#include "HybridRepSetReader.h" - -namespace qmcplusplus -{ -std::unique_ptr createBsplineRealSingle(EinsplineSetBuilder* e, - bool hybrid_rep, - const std::string& useGPU) -{ - app_summary() << " Using real valued spline SPOs with real single precision storage (R2R)." << std::endl; - if (CPUOMPTargetSelector::selectPlatform(useGPU) == PlatformKind::OMPTARGET) - app_summary() << "OpenMP offload has not been implemented to support real valued spline SPOs with real storage!" - << std::endl; - app_summary() << " Running on CPU." << std::endl; - - std::unique_ptr aReader; - if (hybrid_rep) - { - app_summary() << " Using hybrid orbital representation." << std::endl; - aReader = std::make_unique>>>(e); - } - else - aReader = std::make_unique>>(e); - return aReader; -} -} // namespace qmcplusplus diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt index 0ddce93d61..864f6c9083 100644 --- a/src/QMCWaveFunctions/CMakeLists.txt +++ b/src/QMCWaveFunctions/CMakeLists.txt @@ -83,9 +83,11 @@ if(OHMMS_DIM MATCHES 3) BsplineFactory/EinsplineSetBuilderCommon.cpp BsplineFactory/EinsplineSetBuilderESHDF.fft.cpp BsplineFactory/EinsplineSetBuilder_createSPOs.cpp - BsplineFactory/createComplexDouble.cpp - BsplineFactory/createComplexSingle.cpp + BsplineFactory/createBsplineComplex.cpp BsplineFactory/HybridRepCenterOrbitals.cpp + BsplineFactory/SplineSetReader.cpp + BsplineFactory/HybridRepSetReader.cpp + BsplineFactory/OneSplineOrbData.cpp BandInfo.cpp BsplineFactory/BsplineReader.cpp) set(FERMION_OMPTARGET_SRCS Fermion/DiracDeterminantBatched.cpp Fermion/MultiDiracDeterminant.2.cpp) @@ -93,7 +95,7 @@ if(OHMMS_DIM MATCHES 3) set(FERMION_SRCS ${FERMION_SRCS} BsplineFactory/EinsplineSpinorSetBuilder.cpp BsplineFactory/SplineC2C.cpp) set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS} BsplineFactory/SplineC2COMPTarget.cpp) else(QMC_COMPLEX) - set(FERMION_SRCS ${FERMION_SRCS} BsplineFactory/createRealSingle.cpp BsplineFactory/createRealDouble.cpp + set(FERMION_SRCS ${FERMION_SRCS} BsplineFactory/createBsplineReal.cpp BsplineFactory/SplineC2R.cpp BsplineFactory/SplineR2R.cpp) set(FERMION_OMPTARGET_SRCS ${FERMION_OMPTARGET_SRCS} BsplineFactory/SplineC2ROMPTarget.cpp) endif(QMC_COMPLEX) diff --git a/src/einspline/bspline_structs.h b/src/einspline/bspline_structs.h index f850b89002..8455d57229 100644 --- a/src/einspline/bspline_structs.h +++ b/src/einspline/bspline_structs.h @@ -7,6 +7,7 @@ #ifndef BSPLINE_STRUCTS_STD_H #define BSPLINE_STRUCTS_STD_H #include +#include "bspline_base.h" /////////////////////////// // Single precision real //