From 44ac15341e93c432922b1dcd65300717f8e5f284 Mon Sep 17 00:00:00 2001 From: A-006 <3158793232@qq.com> Date: Mon, 6 May 2024 17:22:47 +0800 Subject: [PATCH 01/23] replace ParaV in module gint --- .../operator_lcao/veff_lcao.cpp | 4 +-- .../hamilt_lcaodft/spar_dh.cpp | 2 +- .../module_gint/gint_gamma.h | 2 +- .../module_gint/gint_gamma_vl.cpp | 2 +- .../module_hamilt_lcao/module_gint/gint_k.h | 21 +++++++++----- .../module_gint/gint_k_pvpr.cpp | 3 +- .../module_gint/gint_k_sparse.cpp | 20 +++++++------ .../module_gint/gint_k_sparse1.cpp | 29 ++++++++++--------- source/module_io/write_Vxc.hpp | 2 +- 9 files changed, 48 insertions(+), 37 deletions(-) diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp index e6bfbc4c78..ceab9d6d02 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp @@ -141,12 +141,12 @@ void Veff>::contributeHR(void) if(XC_Functional::get_func_type()==3 || XC_Functional::get_func_type()==5) { Gint_inout inout(vr_eff1, vofk_eff1, Gint_Tools::job_type::vlocal_meta); - this->GG->cal_vlocal(&inout, this->LM, this->new_e_iteration); + this->GG->cal_vlocal(&inout, this->new_e_iteration); } else { Gint_inout inout(vr_eff1, Gint_Tools::job_type::vlocal); - this->GG->cal_vlocal(&inout, this->LM, this->new_e_iteration); + this->GG->cal_vlocal(&inout, this->new_e_iteration); } this->GG->transfer_pvpR(this->hR); diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp index f10f176474..5e3911ccb2 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp @@ -42,7 +42,7 @@ void sparse_format::cal_dH( delete[] lm.DHloc_fixedR_y; delete[] lm.DHloc_fixedR_z; - gint_k.cal_dvlocal_R_sparseMatrix(current_spin, sparse_thr, &lm); + gint_k.cal_dvlocal_R_sparseMatrix(current_spin, sparse_thr, &lm, lm.ParaV); return; } diff --git a/source/module_hamilt_lcao/module_gint/gint_gamma.h b/source/module_hamilt_lcao/module_gint/gint_gamma.h index 2e053a913a..d6f31da80e 100644 --- a/source/module_hamilt_lcao/module_gint/gint_gamma.h +++ b/source/module_hamilt_lcao/module_gint/gint_gamma.h @@ -33,7 +33,7 @@ class Gint_Gamma : public Gint // there is an additional step in calculating vlocal for gamma point // namely the redistribution of Hamiltonian from grid to 2D block format // hence we have an additional layer outside the unified interface - void cal_vlocal(Gint_inout* inout, LCAO_Matrix* lm, const bool new_e_iteration); + void cal_vlocal(Gint_inout* inout, const bool new_e_iteration); //------------------------------------------------------ // in gint_gamma_env.cpp diff --git a/source/module_hamilt_lcao/module_gint/gint_gamma_vl.cpp b/source/module_hamilt_lcao/module_gint/gint_gamma_vl.cpp index 159527102b..163f127fc2 100644 --- a/source/module_hamilt_lcao/module_gint/gint_gamma_vl.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_gamma_vl.cpp @@ -28,7 +28,7 @@ extern "C" void Cblacs_pcoord(int icontxt, int pnum, int *prow, int *pcol); } -void Gint_Gamma::cal_vlocal(Gint_inout* inout, LCAO_Matrix* lm, bool new_e_iteration) +void Gint_Gamma::cal_vlocal(Gint_inout* inout,bool new_e_iteration) { const int max_size = this->gridt->max_atom; const int lgd = this->gridt->lgd; diff --git a/source/module_hamilt_lcao/module_gint/gint_k.h b/source/module_hamilt_lcao/module_gint/gint_k.h index 17c136b838..36c4d196da 100644 --- a/source/module_hamilt_lcao/module_gint/gint_k.h +++ b/source/module_hamilt_lcao/module_gint/gint_k.h @@ -59,7 +59,7 @@ class Gint_k : public Gint // // V is (Vl + Vh + Vxc) if no Vna is used, // and is (Vna + delta_Vh + Vxc) if Vna is used. - void folding_vl_k(const int &ik, LCAO_Matrix* LM, const std::vector>& kvec_d); + void folding_vl_k(const int &ik, LCAO_Matrix* LM, Parallel_Orbitals *PV,const std::vector>& kvec_d); /** * @brief transfer pvpR to this->hRGint @@ -88,19 +88,23 @@ class Gint_k : public Gint const double &sparse_threshold, const std::map, std::map>> &pvpR_sparseMatrix, - LCAO_Matrix *LM); + LCAO_Matrix *LM, + Parallel_Orbitals *PV); void distribute_pvpR_soc_sparseMatrix( const double &sparse_threshold, const std::map, std::map>>> &pvpR_soc_sparseMatrix, - LCAO_Matrix *LM); + LCAO_Matrix *LM, + Parallel_Orbitals *PV + ); void cal_vlocal_R_sparseMatrix( const int ¤t_spin, const double &sparse_threshold, - LCAO_Matrix *LM); + LCAO_Matrix *LM, + Parallel_Orbitals *PV); //------------------------------------------------------ // in gint_k_sparse1.cpp @@ -112,19 +116,22 @@ class Gint_k : public Gint const double &sparse_threshold, const std::map, std::map>> &pvdpR_sparseMatrix, - LCAO_Matrix *LM); + LCAO_Matrix *LM, + Parallel_Orbitals *PV); void distribute_pvdpR_soc_sparseMatrix( const int dim, const double &sparse_threshold, const std::map, std::map>>> &pvdpR_soc_sparseMatrix, - LCAO_Matrix *LM); + LCAO_Matrix *LM, + Parallel_Orbitals *PV); void cal_dvlocal_R_sparseMatrix( const int ¤t_spin, const double &sparse_threshold, - LCAO_Matrix *LM); + LCAO_Matrix *LM, + Parallel_Orbitals *PV); private: diff --git a/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp b/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp index 8413035f11..693254060f 100644 --- a/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp @@ -60,6 +60,7 @@ void Gint_k::destroy_pvpR(void) // H(k)=\sum{R} H(R)exp(ikR) void Gint_k::folding_vl_k(const int &ik, LCAO_Matrix *LM, + Parallel_Orbitals *PV, const std::vector>& kvec_d) { ModuleBase::TITLE("Gint_k","folding_vl_k"); @@ -388,7 +389,7 @@ void Gint_k::folding_vl_k(const int &ik, #endif for (int j=0; jParaV->in_this_processor(i,j)) + if (!PV->in_this_processor(i,j)) { continue; } diff --git a/source/module_hamilt_lcao/module_gint/gint_k_sparse.cpp b/source/module_hamilt_lcao/module_gint/gint_k_sparse.cpp index a3b77194a3..6bac72f1c8 100644 --- a/source/module_hamilt_lcao/module_gint/gint_k_sparse.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_k_sparse.cpp @@ -15,7 +15,8 @@ void Gint_k::distribute_pvpR_sparseMatrix( const double &sparse_threshold, const std::map, std::map>> &pvpR_sparseMatrix, - LCAO_Matrix *LM + LCAO_Matrix *LM, + Parallel_Orbitals *PV ) { ModuleBase::TITLE("Gint_k","distribute_pvpR_sparseMatrix"); @@ -110,11 +111,11 @@ void Gint_k::distribute_pvpR_sparseMatrix( Parallel_Reduce::reduce_pool(tmp, GlobalV::NLOCAL); - if (LM->ParaV->global2local_row(row) >= 0) + if (PV->global2local_row(row) >= 0) { for(int col = 0; col < GlobalV::NLOCAL; ++col) { - if(LM->ParaV->global2local_col(col) >= 0) + if(PV->global2local_col(col) >= 0) { if (std::abs(tmp[col]) > sparse_threshold) { @@ -150,7 +151,8 @@ void Gint_k::distribute_pvpR_soc_sparseMatrix( const double &sparse_threshold, const std::map, std::map>>> &pvpR_soc_sparseMatrix, - LCAO_Matrix *LM + LCAO_Matrix *LM, + Parallel_Orbitals *PV ) { ModuleBase::TITLE("Gint_k","distribute_pvpR_soc_sparseMatrix"); @@ -244,11 +246,11 @@ void Gint_k::distribute_pvpR_soc_sparseMatrix( Parallel_Reduce::reduce_pool(tmp_soc, GlobalV::NLOCAL); - if (LM->ParaV->global2local_row(row) >= 0) + if (PV->global2local_row(row) >= 0) { for(int col = 0; col < GlobalV::NLOCAL; ++col) { - if(LM->ParaV->global2local_col(col) >= 0) + if(PV->global2local_col(col) >= 0) { if (std::abs(tmp_soc[col]) > sparse_threshold) { @@ -281,7 +283,7 @@ void Gint_k::distribute_pvpR_soc_sparseMatrix( } -void Gint_k::cal_vlocal_R_sparseMatrix(const int ¤t_spin, const double &sparse_threshold, LCAO_Matrix *LM) +void Gint_k::cal_vlocal_R_sparseMatrix(const int ¤t_spin, const double &sparse_threshold, LCAO_Matrix *LM,Parallel_Orbitals *PV) { ModuleBase::TITLE("Gint_k","cal_vlocal_R_sparseMatrix"); @@ -418,11 +420,11 @@ void Gint_k::cal_vlocal_R_sparseMatrix(const int ¤t_spin, const double &sp if (GlobalV::NSPIN != 4) { - distribute_pvpR_sparseMatrix(current_spin, sparse_threshold, pvpR_sparseMatrix, LM); + distribute_pvpR_sparseMatrix(current_spin, sparse_threshold, pvpR_sparseMatrix, LM,PV); } else { - distribute_pvpR_soc_sparseMatrix(sparse_threshold, pvpR_soc_sparseMatrix, LM); + distribute_pvpR_soc_sparseMatrix(sparse_threshold, pvpR_soc_sparseMatrix, LM,PV); } return; diff --git a/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp b/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp index 36bdf3c317..eaeaede6a1 100644 --- a/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp @@ -15,8 +15,8 @@ void Gint_k::distribute_pvdpR_sparseMatrix( const int dim, const double &sparse_threshold, const std::map, std::map>> &pvdpR_sparseMatrix, - LCAO_Matrix *LM -) + LCAO_Matrix *LM, + Parallel_Orbitals *PV) { ModuleBase::TITLE("Gint_k","distribute_pvdpR_sparseMatrix"); @@ -110,11 +110,11 @@ void Gint_k::distribute_pvdpR_sparseMatrix( Parallel_Reduce::reduce_pool(tmp, GlobalV::NLOCAL); - if (LM->ParaV->global2local_row(row) >= 0) + if (PV->global2local_row(row) >= 0) { for(int col = 0; col < GlobalV::NLOCAL; ++col) { - if(LM->ParaV->global2local_col(col) >= 0) + if(PV->global2local_col(col) >= 0) { if (std::abs(tmp[col]) > sparse_threshold) { @@ -172,7 +172,8 @@ void Gint_k::distribute_pvdpR_soc_sparseMatrix( const double &sparse_threshold, const std::map, std::map>>> &pvdpR_soc_sparseMatrix, - LCAO_Matrix *LM + LCAO_Matrix *LM, + Parallel_Orbitals *PV ) { ModuleBase::TITLE("Gint_k","distribute_pvdpR_soc_sparseMatrix"); @@ -266,11 +267,11 @@ void Gint_k::distribute_pvdpR_soc_sparseMatrix( Parallel_Reduce::reduce_pool(tmp_soc, GlobalV::NLOCAL); - if (LM->ParaV->global2local_row(row) >= 0) + if (PV->global2local_row(row) >= 0) { for(int col = 0; col < GlobalV::NLOCAL; ++col) { - if(LM->ParaV->global2local_col(col) >= 0) + if(PV->global2local_col(col) >= 0) { if (std::abs(tmp_soc[col]) > sparse_threshold) { @@ -324,7 +325,7 @@ void Gint_k::distribute_pvdpR_soc_sparseMatrix( } -void Gint_k::cal_dvlocal_R_sparseMatrix(const int ¤t_spin, const double &sparse_threshold, LCAO_Matrix *LM) +void Gint_k::cal_dvlocal_R_sparseMatrix(const int ¤t_spin, const double &sparse_threshold, LCAO_Matrix *LM,Parallel_Orbitals *PV) { ModuleBase::TITLE("Gint_k","cal_vlocal_R_sparseMatrix"); @@ -528,15 +529,15 @@ void Gint_k::cal_dvlocal_R_sparseMatrix(const int ¤t_spin, const double &s if (GlobalV::NSPIN != 4) { - distribute_pvdpR_sparseMatrix(current_spin, 0, sparse_threshold, pvdpRx_sparseMatrix, LM); - distribute_pvdpR_sparseMatrix(current_spin, 1, sparse_threshold, pvdpRy_sparseMatrix, LM); - distribute_pvdpR_sparseMatrix(current_spin, 2, sparse_threshold, pvdpRz_sparseMatrix, LM); + distribute_pvdpR_sparseMatrix(current_spin, 0, sparse_threshold, pvdpRx_sparseMatrix, LM,PV); + distribute_pvdpR_sparseMatrix(current_spin, 1, sparse_threshold, pvdpRy_sparseMatrix, LM,PV); + distribute_pvdpR_sparseMatrix(current_spin, 2, sparse_threshold, pvdpRz_sparseMatrix, LM,PV); } else { - distribute_pvdpR_soc_sparseMatrix(0, sparse_threshold, pvdpRx_soc_sparseMatrix, LM); - distribute_pvdpR_soc_sparseMatrix(1, sparse_threshold, pvdpRy_soc_sparseMatrix, LM); - distribute_pvdpR_soc_sparseMatrix(2, sparse_threshold, pvdpRz_soc_sparseMatrix, LM); + distribute_pvdpR_soc_sparseMatrix(0, sparse_threshold, pvdpRx_soc_sparseMatrix, LM,PV); + distribute_pvdpR_soc_sparseMatrix(1, sparse_threshold, pvdpRy_soc_sparseMatrix, LM,PV); + distribute_pvdpR_soc_sparseMatrix(2, sparse_threshold, pvdpRz_soc_sparseMatrix, LM,PV); } return; diff --git a/source/module_io/write_Vxc.hpp b/source/module_io/write_Vxc.hpp index 236ccafb77..e384daf45e 100644 --- a/source/module_io/write_Vxc.hpp +++ b/source/module_io/write_Vxc.hpp @@ -25,7 +25,7 @@ namespace ModuleIO Gint_inout& io, LCAO_Matrix& lm) { - gg.cal_vlocal(&io, &lm, false); + gg.cal_vlocal(&io, false); }; inline void gint_vl( From 43fbbd42031dfc91de748db6960cc70b9b871b3f Mon Sep 17 00:00:00 2001 From: A-006 <3158793232@qq.com> Date: Tue, 7 May 2024 15:18:43 +0800 Subject: [PATCH 02/23] change PV to pv in module gint --- .../module_hamilt_lcao/module_gint/gint_k.h | 14 +++++----- .../module_gint/gint_k_pvpr.cpp | 4 +-- .../module_gint/gint_k_sparse.cpp | 18 ++++++------- .../module_gint/gint_k_sparse1.cpp | 26 +++++++++---------- 4 files changed, 31 insertions(+), 31 deletions(-) diff --git a/source/module_hamilt_lcao/module_gint/gint_k.h b/source/module_hamilt_lcao/module_gint/gint_k.h index 36c4d196da..094dc7d591 100644 --- a/source/module_hamilt_lcao/module_gint/gint_k.h +++ b/source/module_hamilt_lcao/module_gint/gint_k.h @@ -59,7 +59,7 @@ class Gint_k : public Gint // // V is (Vl + Vh + Vxc) if no Vna is used, // and is (Vna + delta_Vh + Vxc) if Vna is used. - void folding_vl_k(const int &ik, LCAO_Matrix* LM, Parallel_Orbitals *PV,const std::vector>& kvec_d); + void folding_vl_k(const int &ik, LCAO_Matrix* LM, Parallel_Orbitals *pv,const std::vector>& kvec_d); /** * @brief transfer pvpR to this->hRGint @@ -89,7 +89,7 @@ class Gint_k : public Gint const std::map, std::map>> &pvpR_sparseMatrix, LCAO_Matrix *LM, - Parallel_Orbitals *PV); + Parallel_Orbitals *pv); void distribute_pvpR_soc_sparseMatrix( const double &sparse_threshold, @@ -97,14 +97,14 @@ class Gint_k : public Gint std::map>>> &pvpR_soc_sparseMatrix, LCAO_Matrix *LM, - Parallel_Orbitals *PV + Parallel_Orbitals *pv ); void cal_vlocal_R_sparseMatrix( const int ¤t_spin, const double &sparse_threshold, LCAO_Matrix *LM, - Parallel_Orbitals *PV); + Parallel_Orbitals *pv); //------------------------------------------------------ // in gint_k_sparse1.cpp @@ -117,7 +117,7 @@ class Gint_k : public Gint const std::map, std::map>> &pvdpR_sparseMatrix, LCAO_Matrix *LM, - Parallel_Orbitals *PV); + Parallel_Orbitals *pv); void distribute_pvdpR_soc_sparseMatrix( const int dim, @@ -125,13 +125,13 @@ class Gint_k : public Gint const std::map, std::map>>> &pvdpR_soc_sparseMatrix, LCAO_Matrix *LM, - Parallel_Orbitals *PV); + Parallel_Orbitals *pv); void cal_dvlocal_R_sparseMatrix( const int ¤t_spin, const double &sparse_threshold, LCAO_Matrix *LM, - Parallel_Orbitals *PV); + Parallel_Orbitals *pv); private: diff --git a/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp b/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp index 693254060f..f39af59f16 100644 --- a/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp @@ -60,7 +60,7 @@ void Gint_k::destroy_pvpR(void) // H(k)=\sum{R} H(R)exp(ikR) void Gint_k::folding_vl_k(const int &ik, LCAO_Matrix *LM, - Parallel_Orbitals *PV, + Parallel_Orbitals *pv, const std::vector>& kvec_d) { ModuleBase::TITLE("Gint_k","folding_vl_k"); @@ -389,7 +389,7 @@ void Gint_k::folding_vl_k(const int &ik, #endif for (int j=0; jin_this_processor(i,j)) + if (!pv->in_this_processor(i,j)) { continue; } diff --git a/source/module_hamilt_lcao/module_gint/gint_k_sparse.cpp b/source/module_hamilt_lcao/module_gint/gint_k_sparse.cpp index 6bac72f1c8..5efec28e2d 100644 --- a/source/module_hamilt_lcao/module_gint/gint_k_sparse.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_k_sparse.cpp @@ -16,7 +16,7 @@ void Gint_k::distribute_pvpR_sparseMatrix( const std::map, std::map>> &pvpR_sparseMatrix, LCAO_Matrix *LM, - Parallel_Orbitals *PV + Parallel_Orbitals *pv ) { ModuleBase::TITLE("Gint_k","distribute_pvpR_sparseMatrix"); @@ -111,11 +111,11 @@ void Gint_k::distribute_pvpR_sparseMatrix( Parallel_Reduce::reduce_pool(tmp, GlobalV::NLOCAL); - if (PV->global2local_row(row) >= 0) + if (pv->global2local_row(row) >= 0) { for(int col = 0; col < GlobalV::NLOCAL; ++col) { - if(PV->global2local_col(col) >= 0) + if(pv->global2local_col(col) >= 0) { if (std::abs(tmp[col]) > sparse_threshold) { @@ -152,7 +152,7 @@ void Gint_k::distribute_pvpR_soc_sparseMatrix( const std::map, std::map>>> &pvpR_soc_sparseMatrix, LCAO_Matrix *LM, - Parallel_Orbitals *PV + Parallel_Orbitals *pv ) { ModuleBase::TITLE("Gint_k","distribute_pvpR_soc_sparseMatrix"); @@ -246,11 +246,11 @@ void Gint_k::distribute_pvpR_soc_sparseMatrix( Parallel_Reduce::reduce_pool(tmp_soc, GlobalV::NLOCAL); - if (PV->global2local_row(row) >= 0) + if (pv->global2local_row(row) >= 0) { for(int col = 0; col < GlobalV::NLOCAL; ++col) { - if(PV->global2local_col(col) >= 0) + if(pv->global2local_col(col) >= 0) { if (std::abs(tmp_soc[col]) > sparse_threshold) { @@ -283,7 +283,7 @@ void Gint_k::distribute_pvpR_soc_sparseMatrix( } -void Gint_k::cal_vlocal_R_sparseMatrix(const int ¤t_spin, const double &sparse_threshold, LCAO_Matrix *LM,Parallel_Orbitals *PV) +void Gint_k::cal_vlocal_R_sparseMatrix(const int ¤t_spin, const double &sparse_threshold, LCAO_Matrix *LM,Parallel_Orbitals *pv) { ModuleBase::TITLE("Gint_k","cal_vlocal_R_sparseMatrix"); @@ -420,11 +420,11 @@ void Gint_k::cal_vlocal_R_sparseMatrix(const int ¤t_spin, const double &sp if (GlobalV::NSPIN != 4) { - distribute_pvpR_sparseMatrix(current_spin, sparse_threshold, pvpR_sparseMatrix, LM,PV); + distribute_pvpR_sparseMatrix(current_spin, sparse_threshold, pvpR_sparseMatrix, LM,pv); } else { - distribute_pvpR_soc_sparseMatrix(sparse_threshold, pvpR_soc_sparseMatrix, LM,PV); + distribute_pvpR_soc_sparseMatrix(sparse_threshold, pvpR_soc_sparseMatrix, LM,pv); } return; diff --git a/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp b/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp index eaeaede6a1..e5c7ac5729 100644 --- a/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp @@ -16,7 +16,7 @@ void Gint_k::distribute_pvdpR_sparseMatrix( const double &sparse_threshold, const std::map, std::map>> &pvdpR_sparseMatrix, LCAO_Matrix *LM, - Parallel_Orbitals *PV) + Parallel_Orbitals *pv) { ModuleBase::TITLE("Gint_k","distribute_pvdpR_sparseMatrix"); @@ -110,11 +110,11 @@ void Gint_k::distribute_pvdpR_sparseMatrix( Parallel_Reduce::reduce_pool(tmp, GlobalV::NLOCAL); - if (PV->global2local_row(row) >= 0) + if (pv->global2local_row(row) >= 0) { for(int col = 0; col < GlobalV::NLOCAL; ++col) { - if(PV->global2local_col(col) >= 0) + if(pv->global2local_col(col) >= 0) { if (std::abs(tmp[col]) > sparse_threshold) { @@ -173,7 +173,7 @@ void Gint_k::distribute_pvdpR_soc_sparseMatrix( const std::map, std::map>>> &pvdpR_soc_sparseMatrix, LCAO_Matrix *LM, - Parallel_Orbitals *PV + Parallel_Orbitals *pv ) { ModuleBase::TITLE("Gint_k","distribute_pvdpR_soc_sparseMatrix"); @@ -267,11 +267,11 @@ void Gint_k::distribute_pvdpR_soc_sparseMatrix( Parallel_Reduce::reduce_pool(tmp_soc, GlobalV::NLOCAL); - if (PV->global2local_row(row) >= 0) + if (pv->global2local_row(row) >= 0) { for(int col = 0; col < GlobalV::NLOCAL; ++col) { - if(PV->global2local_col(col) >= 0) + if(pv->global2local_col(col) >= 0) { if (std::abs(tmp_soc[col]) > sparse_threshold) { @@ -325,7 +325,7 @@ void Gint_k::distribute_pvdpR_soc_sparseMatrix( } -void Gint_k::cal_dvlocal_R_sparseMatrix(const int ¤t_spin, const double &sparse_threshold, LCAO_Matrix *LM,Parallel_Orbitals *PV) +void Gint_k::cal_dvlocal_R_sparseMatrix(const int ¤t_spin, const double &sparse_threshold, LCAO_Matrix *LM,Parallel_Orbitals *pv) { ModuleBase::TITLE("Gint_k","cal_vlocal_R_sparseMatrix"); @@ -529,15 +529,15 @@ void Gint_k::cal_dvlocal_R_sparseMatrix(const int ¤t_spin, const double &s if (GlobalV::NSPIN != 4) { - distribute_pvdpR_sparseMatrix(current_spin, 0, sparse_threshold, pvdpRx_sparseMatrix, LM,PV); - distribute_pvdpR_sparseMatrix(current_spin, 1, sparse_threshold, pvdpRy_sparseMatrix, LM,PV); - distribute_pvdpR_sparseMatrix(current_spin, 2, sparse_threshold, pvdpRz_sparseMatrix, LM,PV); + distribute_pvdpR_sparseMatrix(current_spin, 0, sparse_threshold, pvdpRx_sparseMatrix, LM,pv); + distribute_pvdpR_sparseMatrix(current_spin, 1, sparse_threshold, pvdpRy_sparseMatrix, LM,pv); + distribute_pvdpR_sparseMatrix(current_spin, 2, sparse_threshold, pvdpRz_sparseMatrix, LM,pv); } else { - distribute_pvdpR_soc_sparseMatrix(0, sparse_threshold, pvdpRx_soc_sparseMatrix, LM,PV); - distribute_pvdpR_soc_sparseMatrix(1, sparse_threshold, pvdpRy_soc_sparseMatrix, LM,PV); - distribute_pvdpR_soc_sparseMatrix(2, sparse_threshold, pvdpRz_soc_sparseMatrix, LM,PV); + distribute_pvdpR_soc_sparseMatrix(0, sparse_threshold, pvdpRx_soc_sparseMatrix, LM,pv); + distribute_pvdpR_soc_sparseMatrix(1, sparse_threshold, pvdpRy_soc_sparseMatrix, LM,pv); + distribute_pvdpR_soc_sparseMatrix(2, sparse_threshold, pvdpRz_soc_sparseMatrix, LM,pv); } return; From 3a67c2b82682cdd70cfffd7aac359ea6ea90539f Mon Sep 17 00:00:00 2001 From: A-006 <3158793232@qq.com> Date: Mon, 13 May 2024 16:12:35 +0800 Subject: [PATCH 03/23] change GlobalC in module gint --- .../module_elecstate/elecstate_lcao_tddft.cpp | 1 - .../module_esolver/esolver_ks_lcao_elec.cpp | 4 +- .../operator_lcao/veff_lcao.cpp | 7 +- .../hamilt_lcaodft/operator_lcao/veff_lcao.h | 7 +- .../hamilt_lcaodft/spar_dh.cpp | 2 +- .../module_hamilt_lcao/module_gint/gint.cpp | 29 ++-- source/module_hamilt_lcao/module_gint/gint.h | 21 ++- .../module_gint/gint_fvl.cpp | 10 +- .../module_gint/gint_gamma.h | 4 +- .../module_gint/gint_gamma_env.cpp | 17 +- .../module_gint/gint_gamma_vl.cpp | 4 +- .../module_hamilt_lcao/module_gint/gint_k.h | 19 ++- .../module_gint/gint_k_env.cpp | 20 +-- .../module_gint/gint_k_pvpr.cpp | 94 +++++------ .../module_gint/gint_k_sparse.cpp | 37 ++--- .../module_gint/gint_k_sparse1.cpp | 41 ++--- .../module_gint/gint_rho.cpp | 4 +- .../module_gint/gint_tau.cpp | 9 +- .../module_gint/gint_tools.cpp | 6 +- .../module_gint/gint_tools.h | 6 +- .../module_gint/gint_vl.cpp | 38 +++-- .../module_gint/grid_bigcell.cpp | 56 +++---- .../module_gint/grid_bigcell.h | 6 +- .../module_gint/grid_meshcell.cpp | 20 +-- .../module_gint/grid_meshcell.h | 4 +- .../module_gint/grid_technique.cpp | 148 +++++++++--------- .../module_gint/grid_technique.h | 19 ++- source/module_io/istate_envelope.cpp | 4 +- 28 files changed, 345 insertions(+), 292 deletions(-) diff --git a/source/module_elecstate/elecstate_lcao_tddft.cpp b/source/module_elecstate/elecstate_lcao_tddft.cpp index cc0e7133e4..c4c1e21e19 100644 --- a/source/module_elecstate/elecstate_lcao_tddft.cpp +++ b/source/module_elecstate/elecstate_lcao_tddft.cpp @@ -5,7 +5,6 @@ #include "module_base/timer.h" #include "module_hamilt_lcao/module_gint/grid_technique.h" #include "module_hamilt_pw/hamilt_pwdft/global.h" - namespace elecstate { diff --git a/source/module_esolver/esolver_ks_lcao_elec.cpp b/source/module_esolver/esolver_ks_lcao_elec.cpp index aa8e53bd7c..b74d1c5425 100644 --- a/source/module_esolver/esolver_ks_lcao_elec.cpp +++ b/source/module_esolver/esolver_ks_lcao_elec.cpp @@ -64,7 +64,9 @@ void ESolver_KS_LCAO::set_matrix_grid(Record_adj& ra) this->pw_big->nbzp, this->pw_rho->ny, this->pw_rho->nplane, - this->pw_rho->startz_current); + this->pw_rho->startz_current, + GlobalC::ucell, + GlobalC::ORB); // (2)For each atom, calculate the adjacent atoms in different cells // and allocate the space for H(R) and S(R). diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp index ceab9d6d02..2fd691d4f4 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp @@ -2,7 +2,7 @@ #include "module_base/timer.h" #include "module_base/tool_title.h" #include "module_hamilt_general/module_xc/xc_functional.h" - +#include "module_cell/unitcell.h" namespace hamilt { @@ -112,8 +112,7 @@ void Veff>::contributeHR() } } } - - this->GK->transfer_pvpR(this->hR); + this->GK->transfer_pvpR(this->hR,this->ucell,GlobalC::ORB,this->gd); ModuleBase::timer::tick("Veff", "contributeHR"); return; @@ -148,7 +147,7 @@ void Veff>::contributeHR(void) Gint_inout inout(vr_eff1, Gint_Tools::job_type::vlocal); this->GG->cal_vlocal(&inout, this->new_e_iteration); } - this->GG->transfer_pvpR(this->hR); + this->GG->transfer_pvpR(this->hR,this->ucell,GlobalC::ORB); this->new_e_iteration = false; ModuleBase::timer::tick("Veff", "contributeHR"); diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h index 41bd719731..65e8b89765 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h +++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h @@ -48,6 +48,8 @@ class Veff> : public OperatorLCAO : GK(GK_in), loc(loc_in), pot(pot_in), + ucell(ucell_in), + gd(GridD_in), OperatorLCAO(LM_in, kvec_d_in, hR_in, hK_in) { this->cal_type = lcao_gint; @@ -74,7 +76,6 @@ class Veff> : public OperatorLCAO OperatorLCAO(LM_in, kvec_d_in, hR_in, hK_in) { this->cal_type = lcao_gint; - this->initialize_HR(ucell_in, GridD_in, paraV); GG_in->initialize_pvpR(*ucell_in, GridD_in); @@ -89,7 +90,9 @@ class Veff> : public OperatorLCAO * grid integration is used to calculate the contribution Hamiltonian of effective potential */ virtual void contributeHR() override; - + + const UnitCell* ucell; + Grid_Driver* gd; private: // used for k-dependent grid integration. Gint_k* GK = nullptr; diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp index 5e3911ccb2..3eb079006b 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp @@ -42,7 +42,7 @@ void sparse_format::cal_dH( delete[] lm.DHloc_fixedR_y; delete[] lm.DHloc_fixedR_z; - gint_k.cal_dvlocal_R_sparseMatrix(current_spin, sparse_thr, &lm, lm.ParaV); + gint_k.cal_dvlocal_R_sparseMatrix(current_spin, sparse_thr, &lm, lm.ParaV,GlobalC::ORB,GlobalC::ucell,GlobalC::GridD); return; } diff --git a/source/module_hamilt_lcao/module_gint/gint.cpp b/source/module_hamilt_lcao/module_gint/gint.cpp index 2392d21df2..864a53ab77 100644 --- a/source/module_hamilt_lcao/module_gint/gint.cpp +++ b/source/module_hamilt_lcao/module_gint/gint.cpp @@ -293,14 +293,14 @@ void Gint::cal_gint(Gint_inout* inout) //int* vindex = Gint_Tools::get_vindex(ncyz, ibx, jby, kbz); int* vindex = Gint_Tools::get_vindex(this->bxyz, this->bx, this->by, this->bz, this->nplane, this->gridt->start_ind[grid_index], ncyz); - this->gint_kernel_rho(na_grid, grid_index, delta_r, vindex, LD_pool, inout); + this->gint_kernel_rho(na_grid, grid_index, delta_r, vindex, LD_pool, GlobalC::ucell,inout); delete[] vindex; } else if(inout->job == Gint_Tools::job_type::tau) { int* vindex = Gint_Tools::get_vindex(this->bxyz, this->bx, this->by, this->bz, this->nplane, this->gridt->start_ind[grid_index], ncyz); - this->gint_kernel_tau(na_grid, grid_index, delta_r, vindex, LD_pool, inout); + this->gint_kernel_tau(na_grid, grid_index, delta_r, vindex, LD_pool, inout,GlobalC::ucell); delete[] vindex; } else if(inout->job == Gint_Tools::job_type::force) @@ -322,11 +322,11 @@ void Gint::cal_gint(Gint_inout* inout) #ifdef _OPENMP this->gint_kernel_force(na_grid, grid_index, delta_r, vldr3, LD_pool, DM_in, inout->ispin, inout->isforce, inout->isstress, - &fvl_dphi_thread, &svl_dphi_thread); + &fvl_dphi_thread, &svl_dphi_thread,GlobalC::ucell); #else this->gint_kernel_force(na_grid, grid_index, delta_r, vldr3, LD_pool, DM_in, inout->ispin, inout->isforce, inout->isstress, - inout->fvl_dphi, inout->svl_dphi); + inout->fvl_dphi, inout->svl_dphi,GlobalC::ucell); #endif delete[] vldr3; } @@ -338,17 +338,17 @@ void Gint::cal_gint(Gint_inout* inout) if((GlobalV::GAMMA_ONLY_LOCAL && lgd>0) || !GlobalV::GAMMA_ONLY_LOCAL) { this->gint_kernel_vlocal(na_grid, grid_index, delta_r, vldr3, LD_pool, - pvpR_thread, hRGint_thread); + pvpR_thread,GlobalC::ucell,hRGint_thread); } #else if(GlobalV::GAMMA_ONLY_LOCAL && lgd>0) { - this->gint_kernel_vlocal(na_grid, grid_index, delta_r, vldr3, LD_pool, nullptr); + this->gint_kernel_vlocal(na_grid, grid_index, delta_r, vldr3, LD_pool,GlobalC::ucell,nullptr); } if(!GlobalV::GAMMA_ONLY_LOCAL) { this->gint_kernel_vlocal(na_grid, grid_index, delta_r, vldr3, LD_pool, - this->pvpR_reduced[inout->ispin]); + GlobalC::ucell,this->pvpR_reduced[inout->ispin]); } #endif delete[] vldr3; @@ -359,12 +359,13 @@ void Gint::cal_gint(Gint_inout* inout) this->nplane, this->gridt->start_ind[grid_index], ncyz, dv); #ifdef _OPENMP this->gint_kernel_dvlocal(na_grid, grid_index, delta_r, vldr3, LD_pool, - pvdpRx_thread, pvdpRy_thread, pvdpRz_thread); + pvdpRx_thread, pvdpRy_thread, pvdpRz_thread,GlobalC::ucell); #else this->gint_kernel_dvlocal(na_grid, grid_index, delta_r, vldr3, LD_pool, this->pvdpRx_reduced[inout->ispin], this->pvdpRy_reduced[inout->ispin], - this->pvdpRz_reduced[inout->ispin]); + this->pvdpRz_reduced[inout->ispin], + GlobalC::ucell); #endif delete[] vldr3; } @@ -378,16 +379,16 @@ void Gint::cal_gint(Gint_inout* inout) if((GlobalV::GAMMA_ONLY_LOCAL && lgd>0) || !GlobalV::GAMMA_ONLY_LOCAL) { this->gint_kernel_vlocal_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool, - pvpR_thread, hRGint_thread); + pvpR_thread, GlobalC::ucell,hRGint_thread); } #else if(GlobalV::GAMMA_ONLY_LOCAL && lgd>0) { - this->gint_kernel_vlocal_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool, nullptr); + this->gint_kernel_vlocal_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool, GlobalC::ucell,nullptr); } if(!GlobalV::GAMMA_ONLY_LOCAL) { - this->gint_kernel_vlocal_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool, + this->gint_kernel_vlocal_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool,GlobalC::ucell, this->pvpR_reduced[inout->ispin]); } #endif @@ -415,11 +416,11 @@ void Gint::cal_gint(Gint_inout* inout) #ifdef _OPENMP this->gint_kernel_force_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool, DM_in, inout->ispin, inout->isforce, inout->isstress, - &fvl_dphi_thread, &svl_dphi_thread); + &fvl_dphi_thread, &svl_dphi_thread,GlobalC::ucell); #else this->gint_kernel_force_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool, DM_in, inout->ispin, inout->isforce, inout->isstress, - inout->fvl_dphi, inout->svl_dphi); + inout->fvl_dphi, inout->svl_dphi,GlobalC::ucell); #endif delete[] vldr3; delete[] vkdr3; diff --git a/source/module_hamilt_lcao/module_gint/gint.h b/source/module_hamilt_lcao/module_gint/gint.h index 8a90250c47..d7a51c1a03 100644 --- a/source/module_hamilt_lcao/module_gint/gint.h +++ b/source/module_hamilt_lcao/module_gint/gint.h @@ -12,7 +12,7 @@ #include "gint_tools.h" #include "module_hamilt_lcao/module_hcontainer/hcontainer.h" #include "module_cell/module_neighbor/sltk_grid_driver.h" - +#include "module_hamilt_lcao/module_gint/grid_technique.h" class Gint { public: @@ -80,6 +80,7 @@ class Gint double* vldr3, const int LD_pool, double* pvpR_reduced, + const UnitCell& ucell, hamilt::HContainer* hR = nullptr); // calculate < phi_0 | vlocal | dphi_R > @@ -91,7 +92,8 @@ class Gint const int LD_pool, double* pvdpRx_reduced, double* pvdpRy_reduced, - double* pvdpRz_reduced); + double* pvdpRz_reduced, + const UnitCell& ucell); void gint_kernel_vlocal_meta( const int na_grid, @@ -101,6 +103,7 @@ class Gint double* vkdr3, const int LD_pool, double* pvpR_reduced, + const UnitCell& ucell, hamilt::HContainer* hR = nullptr); void cal_meshball_vlocal_gamma( @@ -125,7 +128,8 @@ class Gint bool** cal_flag, double** psir_ylm, double** psir_vlbr3, - double* pvpR); + double* pvpR, + const UnitCell& ucell); //------------------------------------------------------ // in gint_fvl.cpp @@ -142,7 +146,8 @@ class Gint const bool isforce, const bool isstress, ModuleBase::matrix* fvl_dphi, - ModuleBase::matrix* svl_dphi); + ModuleBase::matrix* svl_dphi, + const UnitCell& ucell); void gint_kernel_force_meta( const int na_grid, @@ -156,7 +161,8 @@ class Gint const bool isforce, const bool isstress, ModuleBase::matrix* fvl_dphi, - ModuleBase::matrix* svl_dphi); + ModuleBase::matrix* svl_dphi, + const UnitCell& ucell); void cal_meshball_force( const int grid_index, @@ -191,6 +197,7 @@ class Gint const double delta_r, int* vindex, const int LD_pool, + const UnitCell& ucell, Gint_inout *inout); void cal_meshball_rho( @@ -207,7 +214,8 @@ class Gint const double delta_r, int* vindex, const int LD_pool, - Gint_inout *inout); + Gint_inout *inout, + const UnitCell& ucell); void cal_meshball_tau( const int na_grid, @@ -221,7 +229,6 @@ class Gint double** dpsiz_dm, double* rho); - // dimension: [GlobalC::LNNR.nnrg] // save the < phi_0i | V | phi_Rj > in sparse H matrix. bool pvpR_alloc_flag = false; double** pvpR_reduced = nullptr; //stores Hamiltonian in reduced format, for multi-l diff --git a/source/module_hamilt_lcao/module_gint/gint_fvl.cpp b/source/module_hamilt_lcao/module_gint/gint_fvl.cpp index 59359dc674..36de330336 100644 --- a/source/module_hamilt_lcao/module_gint/gint_fvl.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_fvl.cpp @@ -14,7 +14,8 @@ void Gint::gint_kernel_force( const bool isforce, const bool isstress, ModuleBase::matrix* fvl_dphi, - ModuleBase::matrix* svl_dphi) + ModuleBase::matrix* svl_dphi, + const UnitCell& ucell) { //prepare block information int* block_iw=nullptr; @@ -30,7 +31,7 @@ void Gint::gint_kernel_force( Gint_Tools::Array_Pool dpsir_ylm_z(this->bxyz, LD_pool); Gint_Tools::cal_dpsir_ylm(*this->gridt, this->bxyz, na_grid, grid_index, delta_r, block_index, block_size, cal_flag, - psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D); + psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D,ucell); //calculating f_mu(r) = v(r)*psi_mu(r)*dv const Gint_Tools::Array_Pool psir_vlbr3 @@ -117,7 +118,8 @@ void Gint::gint_kernel_force_meta( const bool isforce, const bool isstress, ModuleBase::matrix* fvl_dphi, - ModuleBase::matrix* svl_dphi) + ModuleBase::matrix* svl_dphi, + const UnitCell& ucell) { //prepare block information int* block_iw=nullptr; @@ -168,7 +170,7 @@ void Gint::gint_kernel_force_meta( //psi and gradient of psi Gint_Tools::cal_dpsir_ylm(*this->gridt, this->bxyz, na_grid, grid_index, delta_r, block_index, block_size, cal_flag, - psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D); + psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D,ucell); /* Gint_Tools::cal_dpsir_ylm(na_grid, grid_index, delta_r, block_index, block_size, cal_flag, psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D, displ1); diff --git a/source/module_hamilt_lcao/module_gint/gint_gamma.h b/source/module_hamilt_lcao/module_gint/gint_gamma.h index d6f31da80e..7ed700828f 100644 --- a/source/module_hamilt_lcao/module_gint/gint_gamma.h +++ b/source/module_hamilt_lcao/module_gint/gint_gamma.h @@ -39,13 +39,13 @@ class Gint_Gamma : public Gint // in gint_gamma_env.cpp //------------------------------------------------------ // calcualte the envelope function - void cal_env(const double* wfc, double* rho); + void cal_env(const double* wfc, double* rho,LCAO_Orbitals &orb,UnitCell &ucell); //------------------------------------------------------ // in veff_lcao.cpp //------------------------------------------------------ /// transfer this->hRGint to Veff::hR - void transfer_pvpR(hamilt::HContainer* hR); + void transfer_pvpR(hamilt::HContainer* hR,const UnitCell* ucell,const LCAO_Orbitals& orb); private: diff --git a/source/module_hamilt_lcao/module_gint/gint_gamma_env.cpp b/source/module_hamilt_lcao/module_gint/gint_gamma_env.cpp index b63b108411..29325a8e8b 100644 --- a/source/module_hamilt_lcao/module_gint/gint_gamma_env.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_gamma_env.cpp @@ -5,14 +5,14 @@ #include "module_base/ylm.h" #include "module_base/timer.h" -void Gint_Gamma::cal_env(const double* wfc, double* rho) +void Gint_Gamma::cal_env(const double* wfc, double* rho,LCAO_Orbitals &orb,UnitCell &ucell) { ModuleBase::TITLE("Grid_Integral","cal_env"); // it's a uniform grid to save orbital values, so the delta_r is a constant. - const double delta_r = GlobalC::ORB.dr_uniform; + const double delta_r = orb.dr_uniform; const int max_size = this->gridt->max_atom; - const int LD_pool = max_size*GlobalC::ucell.nwmax; + const int LD_pool = max_size*ucell.nwmax; if(max_size!=0) { @@ -40,7 +40,8 @@ void Gint_Gamma::cal_env(const double* wfc, double* rho) size, grid_index, delta_r, block_index, block_size, cal_flag, - psir_ylm.ptr_2D); + psir_ylm.ptr_2D, + ucell); int* vindex = Gint_Tools::get_vindex(this->bxyz, this->bx, this->by, this->bz, this->nplane, this->gridt->start_ind[grid_index], ncyz); @@ -49,11 +50,11 @@ void Gint_Gamma::cal_env(const double* wfc, double* rho) { const int mcell_index1 = this->gridt->bcell_start[grid_index] + ia1; const int iat = this->gridt->which_atom[mcell_index1]; - const int T1 = GlobalC::ucell.iat2it[iat]; - Atom *atom1 = &GlobalC::ucell.atoms[T1]; - const int I1 = GlobalC::ucell.iat2ia[iat]; + const int T1 = ucell.iat2it[iat]; + Atom *atom1 = &ucell.atoms[T1]; + const int I1 = ucell.iat2ia[iat]; // get the start index of local orbitals. - const int start1 = GlobalC::ucell.itiaiw2iwt(T1, I1, 0); + const int start1 = ucell.itiaiw2iwt(T1, I1, 0); for (int ib=0; ibbxyz; ib++) { if(cal_flag[ib][ia1]) diff --git a/source/module_hamilt_lcao/module_gint/gint_gamma_vl.cpp b/source/module_hamilt_lcao/module_gint/gint_gamma_vl.cpp index 163f127fc2..5177036c11 100644 --- a/source/module_hamilt_lcao/module_gint/gint_gamma_vl.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_gamma_vl.cpp @@ -289,11 +289,11 @@ void Gint_Gamma::vl_grid_to_2D( #ifdef __MPI #include "module_hamilt_lcao/module_hcontainer/hcontainer_funcs.h" #endif -void Gint_Gamma::transfer_pvpR(hamilt::HContainer* hR) +void Gint_Gamma::transfer_pvpR(hamilt::HContainer* hR,const UnitCell* ucell,const LCAO_Orbitals& orb) { ModuleBase::TITLE("Gint_Gamma","transfer_pvpR"); ModuleBase::timer::tick("Gint_Gamma","transfer_pvpR"); - + for(int iap=0;iaphRGint->size_atom_pairs();iap++) { auto& ap = this->hRGint->get_atom_pair(iap); diff --git a/source/module_hamilt_lcao/module_gint/gint_k.h b/source/module_hamilt_lcao/module_gint/gint_k.h index 094dc7d591..396eda3423 100644 --- a/source/module_hamilt_lcao/module_gint/gint_k.h +++ b/source/module_hamilt_lcao/module_gint/gint_k.h @@ -59,14 +59,16 @@ class Gint_k : public Gint // // V is (Vl + Vh + Vxc) if no Vna is used, // and is (Vna + delta_Vh + Vxc) if Vna is used. - void folding_vl_k(const int &ik, LCAO_Matrix* LM, Parallel_Orbitals *pv,const std::vector>& kvec_d); + void folding_vl_k(const int &ik, LCAO_Matrix* LM, Parallel_Orbitals *pv, + const std::vector>& kvec_d, + const UnitCell& ucell,const LCAO_Orbitals& orb,Grid_Driver& gd); /** * @brief transfer pvpR to this->hRGint * then pass this->hRGint to Veff::hR */ - void transfer_pvpR(hamilt::HContainer *hR); - void transfer_pvpR(hamilt::HContainer> *hR); + void transfer_pvpR(hamilt::HContainer *hR,const UnitCell* ucell_in,const LCAO_Orbitals& orb,Grid_Driver* gd); + void transfer_pvpR(hamilt::HContainer> *hR,const UnitCell* ucell_in,const LCAO_Orbitals& orb,Grid_Driver* gd); //------------------------------------------------------ // in gint_k_env.cpp @@ -76,7 +78,8 @@ class Gint_k : public Gint const std::complex* psi_k, double* rho, const std::vector>& kvec_c, - const std::vector>& kvec_d); + const std::vector>& kvec_d, + LCAO_Orbitals &orb,UnitCell &ucell); //------------------------------------------------------ // in gint_k_sparse.cpp @@ -104,7 +107,9 @@ class Gint_k : public Gint const int ¤t_spin, const double &sparse_threshold, LCAO_Matrix *LM, - Parallel_Orbitals *pv); + Parallel_Orbitals *pv, + LCAO_Orbitals &orb,UnitCell &ucell, + Grid_Driver &gdriver); //------------------------------------------------------ // in gint_k_sparse1.cpp @@ -131,7 +136,9 @@ class Gint_k : public Gint const int ¤t_spin, const double &sparse_threshold, LCAO_Matrix *LM, - Parallel_Orbitals *pv); + Parallel_Orbitals *pv, + LCAO_Orbitals &orb,UnitCell &ucell, + Grid_Driver &gdriver); private: diff --git a/source/module_hamilt_lcao/module_gint/gint_k_env.cpp b/source/module_hamilt_lcao/module_gint/gint_k_env.cpp index 33c22526bf..76499b2e5a 100644 --- a/source/module_hamilt_lcao/module_gint/gint_k_env.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_k_env.cpp @@ -9,15 +9,16 @@ void Gint_k::cal_env_k(int ik, const std::complex* psi_k, double* rho, const std::vector>& kvec_c, - const std::vector>& kvec_d) + const std::vector>& kvec_d, + LCAO_Orbitals &orb,UnitCell &ucell) { ModuleBase::TITLE("Gint_k", "cal_env_k"); ModuleBase::timer::tick("Gint_k", "cal_env_k"); // it's a uniform grid to save orbital values, so the delta_r is a constant. - const double delta_r = GlobalC::ORB.dr_uniform; + const double delta_r = orb.dr_uniform; const int max_size = this->gridt->max_atom; - const int LD_pool = max_size*GlobalC::ucell.nwmax; + const int LD_pool = max_size*ucell.nwmax; if (max_size != 0) { @@ -49,7 +50,8 @@ void Gint_k::cal_env_k(int ik, this->bxyz, size, grid_index, delta_r, block_index, block_size, cal_flag, - psir_ylm.ptr_2D); + psir_ylm.ptr_2D, + ucell); int* vindex = Gint_Tools::get_vindex(this->bxyz, this->bx, this->by, this->bz, this->nplane, this->gridt->start_ind[grid_index], ncyz); @@ -58,9 +60,9 @@ void Gint_k::cal_env_k(int ik, { const int mcell_index1 = this->gridt->bcell_start[grid_index] + ia1; const int iat = this->gridt->which_atom[mcell_index1]; - const int T1 = GlobalC::ucell.iat2it[iat]; - Atom* atom1 = &GlobalC::ucell.atoms[T1]; - const int I1 = GlobalC::ucell.iat2ia[iat]; + const int T1 = ucell.iat2it[iat]; + Atom* atom1 = &ucell.atoms[T1]; + const int I1 = ucell.iat2ia[iat]; //find R by which_unitcell and cal kphase const int id_ucell = this->gridt->which_unitcell[mcell_index1]; @@ -72,12 +74,12 @@ void Gint_k::cal_env_k(int ik, //std::cout << "kvec_c: " << kvec_c[ik].x << " " << kvec_c[ik].y << " " << kvec_c[ik].z << std::endl; //std::cout << "R: " << R.x << " " << R.y << " " << R.z << std::endl; const double arg = (kvec_d[ik] * R) * ModuleBase::TWO_PI; - const double arg1 = (kvec_c[ik] * (R.x * GlobalC::ucell.a1 + R.y * GlobalC::ucell.a2 + R.z * GlobalC::ucell.a3)) * ModuleBase::TWO_PI; + const double arg1 = (kvec_c[ik] * (R.x * ucell.a1 + R.y * ucell.a2 + R.z * ucell.a3)) * ModuleBase::TWO_PI; //std::cout << "arg0=" << arg << ", arg1=" << arg1 << std::endl; const std::complex kphase = std::complex (cos(arg), sin(arg)); // get the start index of local orbitals. - const int start1 = GlobalC::ucell.itiaiw2iwt(T1, I1, 0); + const int start1 = ucell.itiaiw2iwt(T1, I1, 0); for (int ib = 0; ib < this->bxyz; ib++) { if (cal_flag[ib][ia1]) diff --git a/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp b/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp index f39af59f16..3f62519bba 100644 --- a/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp @@ -60,8 +60,12 @@ void Gint_k::destroy_pvpR(void) // H(k)=\sum{R} H(R)exp(ikR) void Gint_k::folding_vl_k(const int &ik, LCAO_Matrix *LM, - Parallel_Orbitals *pv, - const std::vector>& kvec_d) + Parallel_Orbitals *pv, + const std::vector>& kvec_d, + const UnitCell& ucell, + const LCAO_Orbitals& orb, + Grid_Driver& gd + ) { ModuleBase::TITLE("Gint_k","folding_vl_k"); ModuleBase::timer::tick("Gint_k","folding_vl_k"); @@ -131,25 +135,25 @@ void Gint_k::folding_vl_k(const int &ik, #ifdef _OPENMP #pragma omp for schedule(dynamic) #endif - for(int iat=0; iatgridt->in_this_processor[iat]) { - Atom* atom1 = &GlobalC::ucell.atoms[T1]; - const int start1 = GlobalC::ucell.itiaiw2iwt(T1, I1, 0); + Atom* atom1 = &ucell.atoms[T1]; + const int start1 = ucell.itiaiw2iwt(T1, I1, 0); // get the start positions of elements. const int DM_start = this->gridt->nlocstartg[iat]; // get the coordinates of adjacent atoms. - tau1 = GlobalC::ucell.atoms[T1].tau[I1]; - //GlobalC::GridD.Find_atom(tau1); + tau1 = ucell.atoms[T1].tau[I1]; + //GridD.Find_atom(tau1); AdjacentAtomInfo adjs; - GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1, &adjs); + gd.Find_atom(ucell, tau1, T1, I1, &adjs); // search for the adjacent atoms. int nad = 0; @@ -158,25 +162,25 @@ void Gint_k::folding_vl_k(const int &ik, // get iat2 const int T2 = adjs.ntype[ad]; const int I2 = adjs.natom[ad]; - const int iat2 = GlobalC::ucell.itia2iat(T2, I2); + const int iat2 = ucell.itia2iat(T2, I2); // adjacent atom is also on the grid. if(this->gridt->in_this_processor[iat2]) { - Atom* atom2 = &GlobalC::ucell.atoms[T2]; + Atom* atom2 = &ucell.atoms[T2]; dtau = adjs.adjacent_tau[ad] - tau1; - double distance = dtau.norm() * GlobalC::ucell.lat0; - double rcut = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ORB.Phi[T2].getRcut(); + double distance = dtau.norm() * ucell.lat0; + double rcut = orb.Phi[T1].getRcut() + orb.Phi[T2].getRcut(); // for the local part, only need to calculate within range // mohan note 2012-07-06 if(distance < rcut) { - const int start2 = GlobalC::ucell.itiaiw2iwt(T2, I2, 0); + const int start2 = ucell.itiaiw2iwt(T2, I2, 0); // calculate the distance between iat1 and iat2. - // ModuleBase::Vector3 dR = GlobalC::GridD.getAdjacentTau(ad) - tau1; + // ModuleBase::Vector3 dR = GridD.getAdjacentTau(ad) - tau1; dR.x = adjs.box[ad].x; dR.y = adjs.box[ad].y; dR.z = adjs.box[ad].z; @@ -422,7 +426,7 @@ void Gint_k::folding_vl_k(const int &ik, #include "module_hamilt_lcao/module_hcontainer/hcontainer_funcs.h" //transfer_pvpR, NSPIN = 1 or 2 -void Gint_k::transfer_pvpR(hamilt::HContainer *hR) +void Gint_k::transfer_pvpR(hamilt::HContainer *hR,const UnitCell* ucell_in,const LCAO_Orbitals& orb,Grid_Driver* gd) { ModuleBase::TITLE("Gint_k","transfer_pvpR"); ModuleBase::timer::tick("Gint_k","transfer_pvpR"); @@ -434,25 +438,25 @@ void Gint_k::transfer_pvpR(hamilt::HContainer *hR) this->hRGint->set_zero(); const int npol = GlobalV::NPOL; - - for(int iat=0; iatgridt->in_this_processor[iat]) { - Atom* atom1 = &GlobalC::ucell.atoms[T1]; + Atom* atom1 = &ucell.atoms[T1]; // get the start positions of elements. const int DM_start = this->gridt->nlocstartg[iat]; // get the coordinates of adjacent atoms. - auto& tau1 = GlobalC::ucell.atoms[T1].tau[I1]; - //GlobalC::GridD.Find_atom(tau1); + auto& tau1 = ucell.atoms[T1].tau[I1]; + //gd.Find_atom(tau1); AdjacentAtomInfo adjs; - GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1, &adjs); + gd->Find_atom(GlobalC::ucell, tau1, T1, I1, &adjs); // search for the adjacent atoms. int nad = 0; @@ -461,16 +465,16 @@ void Gint_k::transfer_pvpR(hamilt::HContainer *hR) // get iat2 const int T2 = adjs.ntype[ad]; const int I2 = adjs.natom[ad]; - const int iat2 = GlobalC::ucell.itia2iat(T2, I2); + const int iat2 = ucell.itia2iat(T2, I2); // adjacent atom is also on the grid. if(this->gridt->in_this_processor[iat2]) { - Atom* atom2 = &GlobalC::ucell.atoms[T2]; + Atom* atom2 = &ucell.atoms[T2]; auto dtau = adjs.adjacent_tau[ad] - tau1; - double distance = dtau.norm() * GlobalC::ucell.lat0; - double rcut = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ORB.Phi[T2].getRcut(); + double distance = dtau.norm() * ucell.lat0; + double rcut = orb.Phi[T1].getRcut() + orb.Phi[T2].getRcut(); if(distance < rcut) { @@ -480,7 +484,7 @@ void Gint_k::transfer_pvpR(hamilt::HContainer *hR) continue; } // calculate the distance between iat1 and iat2. - // ModuleBase::Vector3 dR = GlobalC::GridD.getAdjacentTau(ad) - tau1; + // ModuleBase::Vector3 dR = gd.getAdjacentTau(ad) - tau1; auto& dR = adjs.box[ad]; //dR.x = adjs.box[ad].x; //dR.y = adjs.box[ad].y; @@ -548,11 +552,10 @@ void Gint_k::transfer_pvpR(hamilt::HContainer *hR) } //transfer_pvpR, NSPIN = 4 -void Gint_k::transfer_pvpR(hamilt::HContainer> *hR) +void Gint_k::transfer_pvpR(hamilt::HContainer> *hR,const UnitCell* ucell_in,const LCAO_Orbitals& orb,Grid_Driver* gd) { ModuleBase::TITLE("Gint_k","transfer_pvpR"); ModuleBase::timer::tick("Gint_k","transfer_pvpR"); - if(!pvpR_alloc_flag || this->hRGintCd == nullptr) { ModuleBase::WARNING_QUIT("Gint_k::destroy_pvpR","pvpR hasnot been allocated yet!"); @@ -560,25 +563,26 @@ void Gint_k::transfer_pvpR(hamilt::HContainer> *hR) this->hRGintCd->set_zero(); const int npol = GlobalV::NPOL; - - for(int iat=0; iatgridt->in_this_processor[iat]) { - Atom* atom1 = &GlobalC::ucell.atoms[T1]; + Atom* atom1 = &ucell.atoms[T1]; // get the start positions of elements. const int DM_start = this->gridt->nlocstartg[iat]; // get the coordinates of adjacent atoms. - auto& tau1 = GlobalC::ucell.atoms[T1].tau[I1]; - //GlobalC::GridD.Find_atom(tau1); + auto& tau1 = ucell.atoms[T1].tau[I1]; + //gd.Find_atom(tau1); AdjacentAtomInfo adjs; - GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1, &adjs); + gd->Find_atom(GlobalC::ucell, tau1, T1, I1, &adjs); // search for the adjacent atoms. int nad = 0; @@ -587,15 +591,15 @@ void Gint_k::transfer_pvpR(hamilt::HContainer> *hR) // get iat2 const int T2 = adjs.ntype[ad]; const int I2 = adjs.natom[ad]; - const int iat2 = GlobalC::ucell.itia2iat(T2, I2); + const int iat2 = ucell.itia2iat(T2, I2); // adjacent atom is also on the grid. if(this->gridt->in_this_processor[iat2]) { - Atom* atom2 = &GlobalC::ucell.atoms[T2]; + Atom* atom2 = &ucell.atoms[T2]; auto dtau = adjs.adjacent_tau[ad] - tau1; - double distance = dtau.norm() * GlobalC::ucell.lat0; - double rcut = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ORB.Phi[T2].getRcut(); + double distance = dtau.norm() * ucell.lat0; + double rcut = orb.Phi[T1].getRcut() + orb.Phi[T2].getRcut(); if(distance < rcut) { @@ -605,7 +609,7 @@ void Gint_k::transfer_pvpR(hamilt::HContainer> *hR) continue; } // calculate the distance between iat1 and iat2. - // ModuleBase::Vector3 dR = GlobalC::GridD.getAdjacentTau(ad) - tau1; + // ModuleBase::Vector3 dR = gd.getAdjacentTau(ad) - tau1; auto& dR = adjs.box[ad]; //dR.x = adjs.box[ad].x; //dR.y = adjs.box[ad].y; diff --git a/source/module_hamilt_lcao/module_gint/gint_k_sparse.cpp b/source/module_hamilt_lcao/module_gint/gint_k_sparse.cpp index 5efec28e2d..55337badf9 100644 --- a/source/module_hamilt_lcao/module_gint/gint_k_sparse.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_k_sparse.cpp @@ -283,7 +283,8 @@ void Gint_k::distribute_pvpR_soc_sparseMatrix( } -void Gint_k::cal_vlocal_R_sparseMatrix(const int ¤t_spin, const double &sparse_threshold, LCAO_Matrix *LM,Parallel_Orbitals *pv) +void Gint_k::cal_vlocal_R_sparseMatrix(const int ¤t_spin, const double &sparse_threshold, + LCAO_Matrix *LM,Parallel_Orbitals *pv,LCAO_Orbitals &orb,UnitCell &ucell,Grid_Driver &gdriver) { ModuleBase::TITLE("Gint_k","cal_vlocal_R_sparseMatrix"); @@ -295,38 +296,38 @@ void Gint_k::cal_vlocal_R_sparseMatrix(const int ¤t_spin, const double &sp std::complex temp_value_complex; ModuleBase::Vector3 tau1, dtau; - for(int T1=0; T1gridt->in_this_processor[iat]) { - Atom* atom1 = &GlobalC::ucell.atoms[T1]; - const int start1 = GlobalC::ucell.itiaiw2iwt(T1, I1, 0); + Atom* atom1 = &ucell.atoms[T1]; + const int start1 = ucell.itiaiw2iwt(T1, I1, 0); const int DM_start = this->gridt->nlocstartg[iat]; - tau1 = GlobalC::ucell.atoms[T1].tau[I1]; - GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1); + tau1 = ucell.atoms[T1].tau[I1]; + gdriver.Find_atom(ucell, tau1, T1, I1); int nad2 = 0; - for(int ad = 0; ad < GlobalC::GridD.getAdjacentNum()+1; ad++) + for(int ad = 0; ad < gdriver.getAdjacentNum()+1; ad++) { - const int T2 = GlobalC::GridD.getType(ad); - const int I2 = GlobalC::GridD.getNatom(ad); - const int iat2 = GlobalC::ucell.itia2iat(T2, I2); + const int T2 = gdriver.getType(ad); + const int I2 = gdriver.getNatom(ad); + const int iat2 = ucell.itia2iat(T2, I2); if(this->gridt->in_this_processor[iat2]) { - Atom* atom2 = &GlobalC::ucell.atoms[T2]; - dtau = GlobalC::GridD.getAdjacentTau(ad) - tau1; - double distance = dtau.norm() * GlobalC::ucell.lat0; - double rcut = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ORB.Phi[T2].getRcut(); + Atom* atom2 = &ucell.atoms[T2]; + dtau = gdriver.getAdjacentTau(ad) - tau1; + double distance = dtau.norm() * ucell.lat0; + double rcut = orb.Phi[T1].getRcut() + orb.Phi[T2].getRcut(); if(distance < rcut) { - const int start2 = GlobalC::ucell.itiaiw2iwt(T2, I2, 0); - Abfs::Vector3_Order dR(GlobalC::GridD.getBox(ad).x, GlobalC::GridD.getBox(ad).y, GlobalC::GridD.getBox(ad).z); + const int start2 = ucell.itiaiw2iwt(T2, I2, 0); + Abfs::Vector3_Order dR(gdriver.getBox(ad).x, gdriver.getBox(ad).y, gdriver.getBox(ad).z); int ixxx = DM_start + this->gridt->find_R2st[iat][nad2]; for(int iw=0; iwnw * GlobalV::NPOL; iw++) { diff --git a/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp b/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp index e5c7ac5729..637067d719 100644 --- a/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp @@ -325,7 +325,8 @@ void Gint_k::distribute_pvdpR_soc_sparseMatrix( } -void Gint_k::cal_dvlocal_R_sparseMatrix(const int ¤t_spin, const double &sparse_threshold, LCAO_Matrix *LM,Parallel_Orbitals *pv) +void Gint_k::cal_dvlocal_R_sparseMatrix(const int ¤t_spin, const double &sparse_threshold, + LCAO_Matrix *LM,Parallel_Orbitals *pv,LCAO_Orbitals &orb,UnitCell &ucell,Grid_Driver &gdriver) { ModuleBase::TITLE("Gint_k","cal_vlocal_R_sparseMatrix"); @@ -341,40 +342,40 @@ void Gint_k::cal_dvlocal_R_sparseMatrix(const int ¤t_spin, const double &s std::complex temp_value_complex; ModuleBase::Vector3 tau1, dtau; - for(int T1=0; T1gridt->in_this_processor[iat]) { - Atom* atom1 = &GlobalC::ucell.atoms[T1]; - const int start1 = GlobalC::ucell.itiaiw2iwt(T1, I1, 0); + Atom* atom1 = &ucell.atoms[T1]; + const int start1 = ucell.itiaiw2iwt(T1, I1, 0); const int DM_start = this->gridt->nlocstartg[iat]; - tau1 = GlobalC::ucell.atoms[T1].tau[I1]; - GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1); + tau1 = ucell.atoms[T1].tau[I1]; + gdriver.Find_atom(ucell, tau1, T1, I1); int nad2 = 0; - for(int ad = 0; ad < GlobalC::GridD.getAdjacentNum()+1; ad++) + for(int ad = 0; ad < gdriver.getAdjacentNum()+1; ad++) { - const int T2 = GlobalC::GridD.getType(ad); - const int I2 = GlobalC::GridD.getNatom(ad); - const int iat2 = GlobalC::ucell.itia2iat(T2, I2); + const int T2 = gdriver.getType(ad); + const int I2 = gdriver.getNatom(ad); + const int iat2 = ucell.itia2iat(T2, I2); if(this->gridt->in_this_processor[iat2]) { - Atom* atom2 = &GlobalC::ucell.atoms[T2]; - dtau = GlobalC::GridD.getAdjacentTau(ad) - tau1; - double distance = dtau.norm() * GlobalC::ucell.lat0; - double rcut = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ORB.Phi[T2].getRcut(); + Atom* atom2 = &ucell.atoms[T2]; + dtau = gdriver.getAdjacentTau(ad) - tau1; + double distance = dtau.norm() * ucell.lat0; + double rcut = orb.Phi[T1].getRcut() + orb.Phi[T2].getRcut(); if(distance < rcut) { - const int start2 = GlobalC::ucell.itiaiw2iwt(T2, I2, 0); - Abfs::Vector3_Order dR(GlobalC::GridD.getBox(ad).x, - GlobalC::GridD.getBox(ad).y, - GlobalC::GridD.getBox(ad).z); + const int start2 = ucell.itiaiw2iwt(T2, I2, 0); + Abfs::Vector3_Order dR(gdriver.getBox(ad).x, + gdriver.getBox(ad).y, + gdriver.getBox(ad).z); int ixxx = DM_start + this->gridt->find_R2st[iat][nad2]; diff --git a/source/module_hamilt_lcao/module_gint/gint_rho.cpp b/source/module_hamilt_lcao/module_gint/gint_rho.cpp index 4f9a58b64f..ff5bda6d7c 100644 --- a/source/module_hamilt_lcao/module_gint/gint_rho.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_rho.cpp @@ -15,6 +15,7 @@ void Gint::gint_kernel_rho( const double delta_r, int* vindex, const int LD_pool, + const UnitCell& ucell, Gint_inout *inout) { //prepare block information @@ -28,7 +29,8 @@ void Gint::gint_kernel_rho( this->bxyz, na_grid, grid_index, delta_r, block_index, block_size, cal_flag, - psir_ylm.ptr_2D); + psir_ylm.ptr_2D, + ucell); for(int is=0; is ylma; @@ -298,7 +299,8 @@ namespace Gint_Tools double*const*const psir_ylm, double*const*const dpsir_ylm_x, double*const*const dpsir_ylm_y, - double*const*const dpsir_ylm_z) + double*const*const dpsir_ylm_z, + const UnitCell& ucell) { ModuleBase::timer::tick("Gint_Tools", "cal_dpsir_ylm"); for (int id=0; id* hR) { //prepare block information @@ -37,7 +38,8 @@ void Gint::gint_kernel_vlocal( this->bxyz, na_grid, grid_index, delta_r, block_index, block_size, cal_flag, - psir_ylm.ptr_2D); + psir_ylm.ptr_2D, + ucell); //calculating f_mu(r) = v(r)*psi_mu(r)*dv const Gint_Tools::Array_Pool psir_vlbr3 = Gint_Tools::get_psir_vlbr3( @@ -56,7 +58,7 @@ void Gint::gint_kernel_vlocal( { this->cal_meshball_vlocal_k( na_grid, LD_pool, grid_index, block_size, block_index, block_iw, cal_flag, - psir_ylm.ptr_2D, psir_vlbr3.ptr_2D, pvpR_in); + psir_ylm.ptr_2D, psir_vlbr3.ptr_2D, pvpR_in,ucell); } //release memories @@ -80,7 +82,8 @@ void Gint::gint_kernel_dvlocal( const int LD_pool, double* pvdpRx, double* pvdpRy, - double* pvdpRz) + double* pvdpRz, + const UnitCell& ucell) { //prepare block information int * block_iw, * block_index, * block_size; @@ -94,7 +97,7 @@ void Gint::gint_kernel_dvlocal( Gint_Tools::Array_Pool dpsir_ylm_z(this->bxyz, LD_pool); Gint_Tools::cal_dpsir_ylm(*this->gridt, this->bxyz, na_grid, grid_index, delta_r, block_index, block_size, cal_flag, - psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D); + psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D,ucell); //calculating f_mu(r) = v(r)*psi_mu(r)*dv const Gint_Tools::Array_Pool psir_vlbr3 = Gint_Tools::get_psir_vlbr3( @@ -104,13 +107,13 @@ void Gint::gint_kernel_dvlocal( //and accumulates to the corresponding element in Hamiltonian this->cal_meshball_vlocal_k( na_grid, LD_pool, grid_index, block_size, block_index, block_iw, cal_flag, - psir_vlbr3.ptr_2D, dpsir_ylm_x.ptr_2D, pvdpRx); + psir_vlbr3.ptr_2D, dpsir_ylm_x.ptr_2D, pvdpRx,ucell); this->cal_meshball_vlocal_k( na_grid, LD_pool, grid_index, block_size, block_index, block_iw, cal_flag, - psir_vlbr3.ptr_2D, dpsir_ylm_y.ptr_2D, pvdpRy); + psir_vlbr3.ptr_2D, dpsir_ylm_y.ptr_2D, pvdpRy,ucell); this->cal_meshball_vlocal_k( na_grid, LD_pool, grid_index, block_size, block_index, block_iw, cal_flag, - psir_vlbr3.ptr_2D, dpsir_ylm_z.ptr_2D, pvdpRz); + psir_vlbr3.ptr_2D, dpsir_ylm_z.ptr_2D, pvdpRz,ucell); //release memories delete[] block_iw; @@ -133,6 +136,7 @@ void Gint::gint_kernel_vlocal_meta( double* vkdr3, const int LD_pool, double* pvpR_in, + const UnitCell& ucell, hamilt::HContainer* hR) { //prepare block information @@ -153,7 +157,8 @@ void Gint::gint_kernel_vlocal_meta( psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, - dpsir_ylm_z.ptr_2D + dpsir_ylm_z.ptr_2D, + ucell ); //calculating f_mu(r) = v(r)*psi_mu(r)*dv @@ -192,16 +197,16 @@ void Gint::gint_kernel_vlocal_meta( { this->cal_meshball_vlocal_k( na_grid, LD_pool, grid_index, block_size, block_index, block_iw, cal_flag, - psir_ylm.ptr_2D, psir_vlbr3.ptr_2D, pvpR_in); + psir_ylm.ptr_2D, psir_vlbr3.ptr_2D, pvpR_in,ucell); this->cal_meshball_vlocal_k( na_grid, LD_pool, grid_index, block_size, block_index, block_iw, cal_flag, - dpsir_ylm_x.ptr_2D, dpsix_vlbr3.ptr_2D, pvpR_in); + dpsir_ylm_x.ptr_2D, dpsix_vlbr3.ptr_2D, pvpR_in,ucell); this->cal_meshball_vlocal_k( na_grid, LD_pool, grid_index, block_size, block_index, block_iw, cal_flag, - dpsir_ylm_y.ptr_2D, dpsiy_vlbr3.ptr_2D, pvpR_in); + dpsir_ylm_y.ptr_2D, dpsiy_vlbr3.ptr_2D, pvpR_in,ucell); this->cal_meshball_vlocal_k( na_grid, LD_pool, grid_index, block_size, block_index, block_iw, cal_flag, - dpsir_ylm_z.ptr_2D, dpsiz_vlbr3.ptr_2D, pvpR_in); + dpsir_ylm_z.ptr_2D, dpsiz_vlbr3.ptr_2D, pvpR_in,ucell); } //release memories @@ -317,7 +322,8 @@ void Gint::cal_meshball_vlocal_k( bool** cal_flag, double** psir_ylm, double** psir_vlbr3, - double* pvpR) + double* pvpR, + const UnitCell& ucell) { auto find_offset = [&](const int id1, const int id2, const int iat1, const int iat2)->int { @@ -352,14 +358,14 @@ void Gint::cal_meshball_vlocal_k( int m=block_size[ia1]; const int mcell_index1 = this->gridt->bcell_start[grid_index] + ia1; const int iat1= this->gridt->which_atom[mcell_index1]; - const int T1 = GlobalC::ucell.iat2it[iat1]; + const int T1 = ucell.iat2it[iat1]; const int id1 = this->gridt->which_unitcell[mcell_index1]; const int DM_start = this->gridt->nlocstartg[iat1]; for(int ia2=0; ia2gridt->bcell_start[grid_index] + ia2; const int iat2 = this->gridt->which_atom[mcell_index2]; - const int T2 = GlobalC::ucell.iat2it[iat2]; + const int T2 = ucell.iat2it[iat2]; if (iat1 <= iat2) { int cal_num=0; @@ -373,7 +379,7 @@ void Gint::cal_meshball_vlocal_k( const int idx2=block_index[ia2]; int n=block_size[ia2]; - //const int I2 = GlobalC::ucell.iat2ia[iat2]; + //const int I2 = ucell.iat2ia[iat2]; const int mcell_index2 = this->gridt->bcell_start[grid_index] + ia2; const int id2 = this->gridt->which_unitcell[mcell_index2]; int offset; diff --git a/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp b/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp index 8f23f842eb..8aa3219c15 100644 --- a/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp +++ b/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp @@ -4,7 +4,7 @@ #include "module_base/timer.h" #include "module_basis/module_ao/ORB_read.h" #include "module_hamilt_pw/hamilt_pwdft/global.h" - +#include "module_cell/unitcell.h" Grid_BigCell::Grid_BigCell() { this->flag_tib = false; @@ -37,7 +37,7 @@ Grid_BigCell::~Grid_BigCell() delete[] index_atom; } -void Grid_BigCell::init_big_latvec(void) +void Grid_BigCell::init_big_latvec(const UnitCell& ucell) { ModuleBase::TITLE("Grid_BigCell","init_big_latvec"); // initialize the mesh cell vectors. @@ -46,17 +46,17 @@ void Grid_BigCell::init_big_latvec(void) assert(nbz>=0); //size of each big room (same shape with unitcell) - this->bigcell_vec1[0]= GlobalC::ucell.a1.x / (double)nbx * GlobalC::ucell.lat0; - this->bigcell_vec1[1]= GlobalC::ucell.a1.y / (double)nbx * GlobalC::ucell.lat0; - this->bigcell_vec1[2]= GlobalC::ucell.a1.z / (double)nbx * GlobalC::ucell.lat0; + this->bigcell_vec1[0]= ucell.a1.x / (double)nbx * ucell.lat0; + this->bigcell_vec1[1]= ucell.a1.y / (double)nbx * ucell.lat0; + this->bigcell_vec1[2]= ucell.a1.z / (double)nbx * ucell.lat0; - this->bigcell_vec2[0]= GlobalC::ucell.a2.x / (double)nby * GlobalC::ucell.lat0; - this->bigcell_vec2[1]= GlobalC::ucell.a2.y / (double)nby * GlobalC::ucell.lat0; - this->bigcell_vec2[2]= GlobalC::ucell.a2.z / (double)nby * GlobalC::ucell.lat0; + this->bigcell_vec2[0]= ucell.a2.x / (double)nby * ucell.lat0; + this->bigcell_vec2[1]= ucell.a2.y / (double)nby * ucell.lat0; + this->bigcell_vec2[2]= ucell.a2.z / (double)nby * ucell.lat0; - this->bigcell_vec3[0]= GlobalC::ucell.a3.x / (double)nbz * GlobalC::ucell.lat0; - this->bigcell_vec3[1]= GlobalC::ucell.a3.y / (double)nbz * GlobalC::ucell.lat0; - this->bigcell_vec3[2]= GlobalC::ucell.a3.z / (double)nbz * GlobalC::ucell.lat0; + this->bigcell_vec3[0]= ucell.a3.x / (double)nbz * ucell.lat0; + this->bigcell_vec3[1]= ucell.a3.y / (double)nbz * ucell.lat0; + this->bigcell_vec3[2]= ucell.a3.z / (double)nbz * ucell.lat0; this->bigcell_latvec0.e11 = this->bigcell_vec1[0]; this->bigcell_latvec0.e12 = this->bigcell_vec1[1]; @@ -105,15 +105,15 @@ void Grid_BigCell::init_big_latvec(void) } -void Grid_BigCell::init_grid_expansion(void) +void Grid_BigCell::init_grid_expansion(const UnitCell& ucell,const LCAO_Orbitals &orb) { ModuleBase::TITLE("Grid_BigCell","init_grid_expansion"); // calculate the max cutoff radius among all orbitals. // then we will use this parameter to generate grid expansion. - for(int T=0; Torbital_rmax = std::max( GlobalC::ORB.Phi[T].getRcut(), this->orbital_rmax); + this->orbital_rmax = std::max( orb.Phi[T].getRcut(), this->orbital_rmax); } if(GlobalV::test_gridt)ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"rmax of periodic grid (bohr)",orbital_rmax); @@ -160,7 +160,7 @@ void Grid_BigCell::init_grid_expansion(void) } -void Grid_BigCell::init_tau_in_bigcell(void) +void Grid_BigCell::init_tau_in_bigcell(const UnitCell& ucell) { ModuleBase::TITLE("Grid_BigCell","init_tau_in_bigcell"); @@ -169,8 +169,8 @@ void Grid_BigCell::init_tau_in_bigcell(void) if(!flag_tib) { - this->tau_in_bigcell = new double* [GlobalC::ucell.nat]; - for(int i=0; itau_in_bigcell = new double* [ucell.nat]; + for(int i=0; itau_in_bigcell[i] = new double[3]; } @@ -179,9 +179,9 @@ void Grid_BigCell::init_tau_in_bigcell(void) // allocate space, these arrays record which meshcell // the atom is in. delete[] index_atom; - this->index_atom = new int[GlobalC::ucell.nat]; + this->index_atom = new int[ucell.nat]; - ModuleBase::Memory::record("tau_in_bigcell", sizeof(double) * GlobalC::ucell.nat*3); + ModuleBase::Memory::record("tau_in_bigcell", sizeof(double) * ucell.nat*3); } // get the fraction number of (i,j,k) @@ -189,22 +189,22 @@ void Grid_BigCell::init_tau_in_bigcell(void) int iat=0; int ii,jj,kk; double delta[3]; - for(int it=0; itbigcell_GT; + //fraction = ( ucell.atoms[it].tau[ia] * ucell.lat0 )* this->bigcell_GT; //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // mohan add 2012-07-03, // this can make sure faction are always larger than 0. //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - fraction.x = GlobalC::ucell.atoms[it].taud[ia].x / (1.0/(double)nbx); - fraction.y = GlobalC::ucell.atoms[it].taud[ia].y / (1.0/(double)nby); - fraction.z = GlobalC::ucell.atoms[it].taud[ia].z / (1.0/(double)nbz); + fraction.x = ucell.atoms[it].taud[ia].x / (1.0/(double)nbx); + fraction.y = ucell.atoms[it].taud[ia].y / (1.0/(double)nby); + fraction.z = ucell.atoms[it].taud[ia].z / (1.0/(double)nbz); // never use the following, especially for k-algorithm, // it may move the atom to a cell that it doesn't belong @@ -222,9 +222,9 @@ void Grid_BigCell::init_tau_in_bigcell(void) if( fraction.x < 0 || fraction.y < 0 || fraction.z < 0) { std::cout << " Atom positions " << std::endl; - std::cout << GlobalC::ucell.atoms[it].tau[ia].x << " " ; - std::cout << GlobalC::ucell.atoms[it].tau[ia].y << " " ; - std::cout << GlobalC::ucell.atoms[it].tau[ia].z << " " ; + std::cout << ucell.atoms[it].tau[ia].x << " " ; + std::cout << ucell.atoms[it].tau[ia].y << " " ; + std::cout << ucell.atoms[it].tau[ia].z << " " ; std::cout << " fraction " << std::endl; std::cout << fraction.x << " "; std::cout << fraction.y << " "; diff --git a/source/module_hamilt_lcao/module_gint/grid_bigcell.h b/source/module_hamilt_lcao/module_gint/grid_bigcell.h index 0cc94d000d..bae7272652 100644 --- a/source/module_hamilt_lcao/module_gint/grid_bigcell.h +++ b/source/module_hamilt_lcao/module_gint/grid_bigcell.h @@ -20,7 +20,7 @@ class Grid_BigCell: public Grid_MeshCell protected: //--------------------------------- - void init_big_latvec(void); + void init_big_latvec(const UnitCell &ucell); double bigcell_vec1[3]; double bigcell_vec2[3]; @@ -32,7 +32,7 @@ class Grid_BigCell: public Grid_MeshCell //--------------------------------- - void init_grid_expansion(void); + void init_grid_expansion(const UnitCell& ucell,const LCAO_Orbitals &orb); // get the max radius of all orbitals. // which will use to generate grid expansion, @@ -59,7 +59,7 @@ class Grid_BigCell: public Grid_MeshCell //--------------------------------- - void init_tau_in_bigcell(void); + void init_tau_in_bigcell(const UnitCell& ucell); //this flag will be false at first and turned to true after memory of tau_in_meshcell has been allocated. bool flag_tib; diff --git a/source/module_hamilt_lcao/module_gint/grid_meshcell.cpp b/source/module_hamilt_lcao/module_gint/grid_meshcell.cpp index 05eacedede..7f1914560a 100644 --- a/source/module_hamilt_lcao/module_gint/grid_meshcell.cpp +++ b/source/module_hamilt_lcao/module_gint/grid_meshcell.cpp @@ -73,7 +73,7 @@ void Grid_MeshCell::set_grid_dim( // (1) -void Grid_MeshCell::init_latvec(void) +void Grid_MeshCell::init_latvec(const UnitCell &ucell) { ModuleBase::TITLE("Grid_MeshCell","init_latvec"); // initialize the mesh cell vectors. @@ -82,17 +82,17 @@ void Grid_MeshCell::init_latvec(void) assert(ncz>0); //size of each room (same shape with unitcell) - this->meshcell_vec1[0]= GlobalC::ucell.a1.x / (double)ncx * GlobalC::ucell.lat0; - this->meshcell_vec1[1]= GlobalC::ucell.a1.y / (double)ncx * GlobalC::ucell.lat0; - this->meshcell_vec1[2]= GlobalC::ucell.a1.z / (double)ncx * GlobalC::ucell.lat0; + this->meshcell_vec1[0]= ucell.a1.x / (double)ncx * ucell.lat0; + this->meshcell_vec1[1]= ucell.a1.y / (double)ncx * ucell.lat0; + this->meshcell_vec1[2]= ucell.a1.z / (double)ncx * ucell.lat0; - this->meshcell_vec2[0]= GlobalC::ucell.a2.x / (double)ncy * GlobalC::ucell.lat0; - this->meshcell_vec2[1]= GlobalC::ucell.a2.y / (double)ncy * GlobalC::ucell.lat0; - this->meshcell_vec2[2]= GlobalC::ucell.a2.z / (double)ncy * GlobalC::ucell.lat0; + this->meshcell_vec2[0]= ucell.a2.x / (double)ncy * ucell.lat0; + this->meshcell_vec2[1]= ucell.a2.y / (double)ncy * ucell.lat0; + this->meshcell_vec2[2]= ucell.a2.z / (double)ncy * ucell.lat0; - this->meshcell_vec3[0]= GlobalC::ucell.a3.x / (double)ncz * GlobalC::ucell.lat0; - this->meshcell_vec3[1]= GlobalC::ucell.a3.y / (double)ncz * GlobalC::ucell.lat0; - this->meshcell_vec3[2]= GlobalC::ucell.a3.z / (double)ncz * GlobalC::ucell.lat0; + this->meshcell_vec3[0]= ucell.a3.x / (double)ncz * ucell.lat0; + this->meshcell_vec3[1]= ucell.a3.y / (double)ncz * ucell.lat0; + this->meshcell_vec3[2]= ucell.a3.z / (double)ncz * ucell.lat0; this->meshcell_latvec0.e11 = this->meshcell_vec1[0]; this->meshcell_latvec0.e12 = this->meshcell_vec1[1]; diff --git a/source/module_hamilt_lcao/module_gint/grid_meshcell.h b/source/module_hamilt_lcao/module_gint/grid_meshcell.h index 74762abf59..71253ed747 100644 --- a/source/module_hamilt_lcao/module_gint/grid_meshcell.h +++ b/source/module_hamilt_lcao/module_gint/grid_meshcell.h @@ -4,7 +4,7 @@ #include "module_base/global_variable.h" #include "module_base/matrix3.h" #include "grid_meshk.h" - +#include "module_cell/unitcell.h" class Grid_MeshCell: public Grid_MeshK { public: @@ -39,7 +39,7 @@ class Grid_MeshCell: public Grid_MeshK const int &nbzp_in); - void init_latvec(void); + void init_latvec(const UnitCell &ucell); void init_meshcell_pos(void); void cal_extended_cell(const int &dxe, const int &dye, const int &dze); diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.cpp b/source/module_hamilt_lcao/module_gint/grid_technique.cpp index 691650aa63..6286dcb831 100644 --- a/source/module_hamilt_lcao/module_gint/grid_technique.cpp +++ b/source/module_hamilt_lcao/module_gint/grid_technique.cpp @@ -5,6 +5,7 @@ #include "module_base/timer.h" #include "module_hamilt_pw/hamilt_pwdft/global.h" #include "module_hsolver/kernels/cuda/helper_cuda.h" + Grid_Technique::Grid_Technique() { this->nlocdimg = nullptr; @@ -101,7 +102,7 @@ Grid_Technique::~Grid_Technique() #if ((defined __CUDA) /* || (defined __ROCM) */) if(GlobalV::device_flag == "gpu") { - free_gpu_gint_variables(); + free_gpu_gint_variables(GlobalC::ucell); } #endif } @@ -124,7 +125,9 @@ void Grid_Technique::set_pbc_grid(const int& ncx_in, const int& nbzp_in, const int& ny, const int& nplane, - const int& startz_current) + const int& startz_current, + const UnitCell& ucell, + const LCAO_Orbitals &orb) { ModuleBase::TITLE("Grid_Technique", "init"); ModuleBase::timer::tick("Grid_Technique", "init"); @@ -150,32 +153,32 @@ void Grid_Technique::set_pbc_grid(const int& ncx_in, nbzp_start_in, nbzp_in); - this->init_latvec(); + this->init_latvec(ucell); - this->init_big_latvec(); + this->init_big_latvec(ucell); this->init_meshcell_pos(); // (2) expand the grid - this->init_grid_expansion(); + this->init_grid_expansion(ucell,orb); // (3) calculate the extended grid. this->cal_extended_cell(this->dxe, this->dye, this->dze); - this->init_tau_in_bigcell(); + this->init_tau_in_bigcell(ucell); // init meshball this->delete_meshball_positions(); // LiuXh add 2018-12-14 this->init_meshball(); - this->init_atoms_on_grid(ny, nplane, startz_current); + this->init_atoms_on_grid(ny, nplane, startz_current,ucell); - this->cal_trace_lo(); + this->cal_trace_lo(ucell); #if ((defined __CUDA) /* || (defined __ROCM) */) if(GlobalV::device_flag == "gpu") { - this->init_gpu_gint_variables(); + this->init_gpu_gint_variables(ucell,LCAO_Orbitals &orb); } #endif @@ -237,7 +240,8 @@ void Grid_Technique::get_startind(const int& ny, // mohan add 2021-04-06 void Grid_Technique::init_atoms_on_grid(const int& ny, const int& nplane, - const int& startz_current) + const int& startz_current, + const UnitCell& ucell) { ModuleBase::TITLE("Grid_Technique", "init_atoms_on_grid"); @@ -269,8 +273,8 @@ void Grid_Technique::init_atoms_on_grid(const int& ny, // (3) Find the atoms using // when doing grid integration. delete[] in_this_processor; - this->in_this_processor = new bool[GlobalC::ucell.nat]; - for (int i = 0; i < GlobalC::ucell.nat; i++) + this->in_this_processor = new bool[ucell.nat]; + for (int i = 0; i < ucell.nat; i++) { in_this_processor[i] = false; } @@ -288,9 +292,9 @@ void Grid_Technique::init_atoms_on_grid(const int& ny, int normal=0; this->total_atoms_on_grid = 0; int nat_local = 0; - for(int it=0; itmeshball_ncells; im++) { @@ -351,7 +355,7 @@ void Grid_Technique::init_atoms_on_grid(const int& ny, if (nat_local > 0) { this->trace_iat.resize(nat_local); - for (int iat = GlobalC::ucell.nat - 1; iat >= 0; iat--) + for (int iat = ucell.nat - 1; iat >= 0; iat--) { if (this->in_this_processor[iat]) { @@ -363,7 +367,7 @@ void Grid_Technique::init_atoms_on_grid(const int& ny, // need how_many_atoms first. this->cal_grid_integration_index(); // bcell_start is needed. - this->init_atoms_on_grid2(index2normal); + this->init_atoms_on_grid2(index2normal,ucell); delete[] index2normal; return; } @@ -410,7 +414,7 @@ void Grid_Technique::check_bigcell(int*& ind_bigcell, return; } -void Grid_Technique::init_atoms_on_grid2(const int* index2normal) +void Grid_Technique::init_atoms_on_grid2(const int* index2normal,const UnitCell& ucell) { ModuleBase::TITLE("Grid_Techinique", "init_atoms_on_grid2"); @@ -455,9 +459,9 @@ void Grid_Technique::init_atoms_on_grid2(const int* index2normal) int count = 0; int iat = 0; ModuleBase::GlobalFunc::ZEROS(this->how_many_atoms, nbxx); - for(int it=0; itmeshball_ncells; im++) @@ -555,12 +559,12 @@ void Grid_Technique::cal_grid_integration_index(void) } // set 'lgd' variable -void Grid_Technique::cal_trace_lo(void) +void Grid_Technique::cal_trace_lo(const UnitCell& ucell) { ModuleBase::TITLE("Grid_Technique", "cal_trace_lo"); // save the atom information in trace_lo, // in fact the trace_lo dimension can be reduced - // to GlobalC::ucell.nat, but I think this is another way. + // to ucell.nat, but I think this is another way. delete[] trace_lo; this->trace_lo = new int[GlobalV::NLOCAL]; for (int i = 0; i < GlobalV::NLOCAL; i++) @@ -575,14 +579,14 @@ void Grid_Technique::cal_trace_lo(void) int iw_all = 0; int iw_local = 0; - for(int it=0; itin_this_processor[iat]) { ++lnat; - int nw0 = GlobalC::ucell.atoms[it].nw; + int nw0 = ucell.atoms[it].nw; if(GlobalV::NSPIN==4) {//added by zhengdy-soc, need to be double in soc nw0 *= 2; @@ -590,7 +594,7 @@ void Grid_Technique::cal_trace_lo(void) } else { - this->lgd += GlobalC::ucell.atoms[it].nw; + this->lgd += ucell.atoms[it].nw; } for(int iw=0; iw max_cut) + if (orb.Phi[i].getRcut() > max_cut) { - max_cut = GlobalC::ORB.Phi[i].getRcut(); + max_cut = orb.Phi[i].getRcut(); } } - int atom_nw_now[GlobalC::ucell.ntype]; - int ucell_atom_nwl_now[GlobalC::ucell.ntype]; - for (int i = 0; i < GlobalC::ucell.ntype; i++) + int atom_nw_now[ucell.ntype]; + int ucell_atom_nwl_now[ucell.ntype]; + for (int i = 0; i < ucell.ntype; i++) { - atom_nw_now[i] = GlobalC::ucell.atoms[i].nw; - ucell_atom_nwl_now[i] = GlobalC::ucell.atoms[i].nwl; + atom_nw_now[i] = ucell.atoms[i].nw; + ucell_atom_nwl_now[i] = ucell.atoms[i].nwl; } nr_max = static_cast(1000 * max_cut) + 10; - // double psi_u_now[GlobalC::ucell.ntype * GlobalC::ucell.nwmax * nr_max * + // double psi_u_now[ucell.ntype * ucell.nwmax * nr_max * // 2]; double* psi_u_now - = (double*)malloc(GlobalC::ucell.ntype * GlobalC::ucell.nwmax * nr_max + = (double*)malloc(ucell.ntype * ucell.nwmax * nr_max * 2 * sizeof(double)); memset(psi_u_now, 0, - GlobalC::ucell.ntype * GlobalC::ucell.nwmax * nr_max * 2 + ucell.ntype * ucell.nwmax * nr_max * 2 * sizeof(double)); bool* atom_iw2_new_now = (bool*)malloc( - GlobalC::ucell.ntype * GlobalC::ucell.nwmax * sizeof(bool)); + ucell.ntype * ucell.nwmax * sizeof(bool)); memset(atom_iw2_new_now, 0, - GlobalC::ucell.ntype * GlobalC::ucell.nwmax * sizeof(bool)); - int* atom_iw2_ylm_now = (int*)malloc(GlobalC::ucell.ntype - * GlobalC::ucell.nwmax * sizeof(int)); + ucell.ntype * ucell.nwmax * sizeof(bool)); + int* atom_iw2_ylm_now = (int*)malloc(ucell.ntype + * ucell.nwmax * sizeof(int)); memset(atom_iw2_ylm_now, 0, - GlobalC::ucell.ntype * GlobalC::ucell.nwmax * sizeof(int)); - int* atom_iw2_l_now = (int*)malloc(GlobalC::ucell.ntype - * GlobalC::ucell.nwmax * sizeof(int)); + ucell.ntype * ucell.nwmax * sizeof(int)); + int* atom_iw2_l_now = (int*)malloc(ucell.ntype + * ucell.nwmax * sizeof(int)); memset(atom_iw2_l_now, 0, - GlobalC::ucell.ntype * GlobalC::ucell.nwmax * sizeof(int)); + ucell.ntype * ucell.nwmax * sizeof(int)); Atom* atomx; - for (int i = 0; i < GlobalC::ucell.ntype; i++) + for (int i = 0; i < ucell.ntype; i++) { - atomx = &GlobalC::ucell.atoms[i]; - for (int j = 0; j < GlobalC::ucell.nwmax; j++) + atomx = &ucell.atoms[i]; + for (int j = 0; j < ucell.nwmax; j++) { if (j < atomx->nw) { - atom_iw2_new_now[i * GlobalC::ucell.nwmax + j] + atom_iw2_new_now[i * ucell.nwmax + j] = atomx->iw2_new[j]; - atom_iw2_ylm_now[i * GlobalC::ucell.nwmax + j] + atom_iw2_ylm_now[i * ucell.nwmax + j] = atomx->iw2_ylm[j]; - atom_iw2_l_now[i * GlobalC::ucell.nwmax + j] = atomx->iw2l[j]; - pointer = &GlobalC::ORB.Phi[i].PhiLN(atomx->iw2l[j], + atom_iw2_l_now[i * ucell.nwmax + j] = atomx->iw2l[j]; + pointer = &orb.Phi[i].PhiLN(atomx->iw2l[j], atomx->iw2n[j]); for (int k = 0; k < nr_max; k++) { int index_temp - = (i * GlobalC::ucell.nwmax * nr_max + j * nr_max + k) + = (i * ucell.nwmax * nr_max + j * nr_max + k) * 2; if (k < pointer->nr_uniform) { @@ -721,55 +725,55 @@ void Grid_Technique::init_gpu_gint_variables() } checkCudaErrors( - cudaMalloc((void**)&atom_nw_g, GlobalC::ucell.ntype * sizeof(int))); + cudaMalloc((void**)&atom_nw_g, ucell.ntype * sizeof(int))); checkCudaErrors(cudaMemcpy(atom_nw_g, atom_nw_now, - GlobalC::ucell.ntype * sizeof(int), + ucell.ntype * sizeof(int), cudaMemcpyHostToDevice)); checkCudaErrors(cudaMalloc((void**)&atom_nwl_g, - GlobalC::ucell.ntype * sizeof(int))); + ucell.ntype * sizeof(int))); checkCudaErrors(cudaMemcpy(atom_nwl_g, ucell_atom_nwl_now, - GlobalC::ucell.ntype * sizeof(int), + ucell.ntype * sizeof(int), cudaMemcpyHostToDevice)); checkCudaErrors(cudaMalloc((void**)&psi_u_g, - GlobalC::ucell.ntype * GlobalC::ucell.nwmax + ucell.ntype * ucell.nwmax * nr_max * sizeof(double) * 2)); checkCudaErrors(cudaMemcpy(psi_u_g, psi_u_now, - GlobalC::ucell.ntype * GlobalC::ucell.nwmax + ucell.ntype * ucell.nwmax * nr_max * sizeof(double) * 2, cudaMemcpyHostToDevice)); checkCudaErrors( cudaMalloc((void**)&atom_new_g, - GlobalC::ucell.ntype * GlobalC::ucell.nwmax * sizeof(bool))); + ucell.ntype * ucell.nwmax * sizeof(bool))); checkCudaErrors( cudaMalloc((void**)&atom_ylm_g, - GlobalC::ucell.ntype * GlobalC::ucell.nwmax * sizeof(int))); + ucell.ntype * ucell.nwmax * sizeof(int))); checkCudaErrors( cudaMalloc((void**)&atom_l_g, - GlobalC::ucell.ntype * GlobalC::ucell.nwmax * sizeof(int))); + ucell.ntype * ucell.nwmax * sizeof(int))); checkCudaErrors( cudaMemcpy(atom_new_g, atom_iw2_new_now, - GlobalC::ucell.ntype * GlobalC::ucell.nwmax * sizeof(bool), + ucell.ntype * ucell.nwmax * sizeof(bool), cudaMemcpyHostToDevice)); checkCudaErrors( cudaMemcpy(atom_ylm_g, atom_iw2_ylm_now, - GlobalC::ucell.ntype * GlobalC::ucell.nwmax * sizeof(int), + ucell.ntype * ucell.nwmax * sizeof(int), cudaMemcpyHostToDevice)); checkCudaErrors( cudaMemcpy(atom_l_g, atom_iw2_l_now, - GlobalC::ucell.ntype * GlobalC::ucell.nwmax * sizeof(int), + ucell.ntype * ucell.nwmax * sizeof(int), cudaMemcpyHostToDevice)); - const int max_atom_pair_number = GlobalC::ucell.nat * GlobalC::ucell.nat; + const int max_atom_pair_number = ucell.nat * ucell.nat; checkCudaErrors(cudaMallocHost( (void**)&grid_vlocal_g, max_atom_pair_number @@ -781,7 +785,7 @@ void Grid_Technique::init_gpu_gint_variables() grid_vlocal_g[iat] = nullptr; } - psir_size = nbzp * max_atom * bxyz * GlobalC::ucell.nwmax; + psir_size = nbzp * max_atom * bxyz * ucell.nwmax; checkCudaErrors(cudaMalloc((void**)&left_global_g, psir_size * nstreams * sizeof(double))); @@ -941,7 +945,7 @@ void Grid_Technique::init_gpu_gint_variables() free(atom_iw2_ylm_now); } -void Grid_Technique::free_gpu_gint_variables() +void Grid_Technique::free_gpu_gint_variables(const UnitCell& ucell) { if (!is_malloced) { @@ -1018,7 +1022,7 @@ void Grid_Technique::free_gpu_gint_variables() checkCudaErrors(cudaFree(dot_product_g)); checkCudaErrors(cudaFree(rho_g)); - const int max_atom_pair_number = GlobalC::ucell.nat * GlobalC::ucell.nat; + const int max_atom_pair_number = ucell.nat * ucell.nat; for (int i = 0; i < max_atom_pair_number; i++) { if (grid_vlocal_g[i] != nullptr) diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.h b/source/module_hamilt_lcao/module_gint/grid_technique.h index a32eb7feb2..2b95cd8a5f 100644 --- a/source/module_hamilt_lcao/module_gint/grid_technique.h +++ b/source/module_hamilt_lcao/module_gint/grid_technique.h @@ -4,6 +4,9 @@ #include "grid_index.h" #include "grid_meshball.h" #include "module_basis/module_ao/parallel_orbitals.h" +#include "module_cell/unitcell.h" +#include "module_basis/module_ao/ORB_read.h" +#include "module_cell/module_neighbor/sltk_grid_driver.h" #if ((defined __CUDA) /* || (defined __ROCM) */) #include @@ -93,7 +96,9 @@ class Grid_Technique : public Grid_MeshBall const int& nbzp_in, const int& ny, const int& nplane, - const int& startz_current); + const int& startz_current, + const UnitCell& ucell, + const LCAO_Orbitals &orb); /// number of elements(basis-pairs) in this processon /// on all adjacent atoms-pairs(Grid division) @@ -123,10 +128,12 @@ class Grid_Technique : public Grid_MeshBall // atoms on meshball void init_atoms_on_grid(const int& ny, const int& nplane, - const int& startz_current); - void init_atoms_on_grid2(const int* index2normal); + const int& startz_current, + const UnitCell& ucell); + void init_atoms_on_grid2(const int* index2normal, + const UnitCell& ucell); void cal_grid_integration_index(void); - void cal_trace_lo(void); + void cal_trace_lo(const UnitCell& ucell); void check_bigcell(int*& ind_bigcell, bool*& bigcell_on_processor); void get_startind(const int& ny, const int& nplane, @@ -218,8 +225,8 @@ class Grid_Technique : public Grid_MeshBall matrix_multiple_func_type fastest_matrix_mul; private: - void init_gpu_gint_variables(); - void free_gpu_gint_variables(); + void init_gpu_gint_variables(const UnitCell& ucell,LCAO_Orbitals &orb); + void free_gpu_gint_variables(const UnitCell& ucell); #endif }; diff --git a/source/module_io/istate_envelope.cpp b/source/module_io/istate_envelope.cpp index 1e369a36e3..8c95b0b3e6 100644 --- a/source/module_io/istate_envelope.cpp +++ b/source/module_io/istate_envelope.cpp @@ -109,7 +109,7 @@ void IState_Envelope::begin(const psi::Psi* psid, wfc_gamma_grid[is][i][j] = psid[0](i, j); } #endif - gg.cal_env(wfc_gamma_grid[is][ib], pes->charge->rho[is]); + gg.cal_env(wfc_gamma_grid[is][ib], pes->charge->rho[is],GlobalC::ORB,GlobalC::ucell); pes->charge->save_rho_before_sum_band(); //xiaohui add 2014-12-09 @@ -260,7 +260,7 @@ void IState_Envelope::begin(const psi::Psi>* psi, } #endif //deal with NSPIN=4 - gk.cal_env_k(ik, lowf.wfc_k_grid[ik][ib], pes->charge->rho[ispin], kv.kvec_c, kv.kvec_d); + gk.cal_env_k(ik, lowf.wfc_k_grid[ik][ib], pes->charge->rho[ispin], kv.kvec_c, kv.kvec_d,GlobalC::ORB,GlobalC::ucell); std::stringstream ss; ss << global_out_dir << "BAND" << ib + 1 << "_k_" << ik / nspin0 + 1 << "_s_" << ispin + 1 << "_ENV.cube"; From e50ee792d66ee65359e122e878835971b199b3f0 Mon Sep 17 00:00:00 2001 From: A-006 <3158793232@qq.com> Date: Mon, 13 May 2024 16:29:56 +0800 Subject: [PATCH 04/23] fix LCAO_Orbitals in module gint --- source/module_hamilt_lcao/module_gint/grid_bigcell.cpp | 2 +- source/module_hamilt_lcao/module_gint/grid_bigcell.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp b/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp index 8aa3219c15..9f470030b6 100644 --- a/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp +++ b/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp @@ -105,7 +105,7 @@ void Grid_BigCell::init_big_latvec(const UnitCell& ucell) } -void Grid_BigCell::init_grid_expansion(const UnitCell& ucell,const LCAO_Orbitals &orb) +void Grid_BigCell::init_grid_expansion(const UnitCell& ucell,const LCAO_Orbitals& orb) { ModuleBase::TITLE("Grid_BigCell","init_grid_expansion"); diff --git a/source/module_hamilt_lcao/module_gint/grid_bigcell.h b/source/module_hamilt_lcao/module_gint/grid_bigcell.h index bae7272652..dbb823bfe7 100644 --- a/source/module_hamilt_lcao/module_gint/grid_bigcell.h +++ b/source/module_hamilt_lcao/module_gint/grid_bigcell.h @@ -32,7 +32,7 @@ class Grid_BigCell: public Grid_MeshCell //--------------------------------- - void init_grid_expansion(const UnitCell& ucell,const LCAO_Orbitals &orb); + void init_grid_expansion(const UnitCell& ucell,const LCAO_Orbitals& orb); // get the max radius of all orbitals. // which will use to generate grid expansion, From 83ebe2ef2196dc2d07274c380af23e63c8368e19 Mon Sep 17 00:00:00 2001 From: A-006 <3158793232@qq.com> Date: Mon, 13 May 2024 17:10:31 +0800 Subject: [PATCH 05/23] fix error in compile without abacus --- source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp | 4 ++-- source/module_hamilt_lcao/module_gint/grid_bigcell.cpp | 5 +++-- source/module_hamilt_lcao/module_gint/grid_bigcell.h | 2 +- source/module_hamilt_lcao/module_gint/grid_technique.cpp | 7 ++++++- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp b/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp index 3f62519bba..14f2aa3288 100644 --- a/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp @@ -456,7 +456,7 @@ void Gint_k::transfer_pvpR(hamilt::HContainer *hR,const UnitCell* ucell_ auto& tau1 = ucell.atoms[T1].tau[I1]; //gd.Find_atom(tau1); AdjacentAtomInfo adjs; - gd->Find_atom(GlobalC::ucell, tau1, T1, I1, &adjs); + gd->Find_atom(ucell, tau1, T1, I1, &adjs); // search for the adjacent atoms. int nad = 0; @@ -582,7 +582,7 @@ void Gint_k::transfer_pvpR(hamilt::HContainer> *hR,const Un auto& tau1 = ucell.atoms[T1].tau[I1]; //gd.Find_atom(tau1); AdjacentAtomInfo adjs; - gd->Find_atom(GlobalC::ucell, tau1, T1, I1, &adjs); + gd->Find_atom(ucell, tau1, T1, I1, &adjs); // search for the adjacent atoms. int nad = 0; diff --git a/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp b/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp index 9f470030b6..1257f731df 100644 --- a/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp +++ b/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp @@ -105,15 +105,16 @@ void Grid_BigCell::init_big_latvec(const UnitCell& ucell) } -void Grid_BigCell::init_grid_expansion(const UnitCell& ucell,const LCAO_Orbitals& orb) +void Grid_BigCell::init_grid_expansion(const UnitCell& ucell,double* rcut) { ModuleBase::TITLE("Grid_BigCell","init_grid_expansion"); // calculate the max cutoff radius among all orbitals. // then we will use this parameter to generate grid expansion. + for(int T=0; Torbital_rmax = std::max( orb.Phi[T].getRcut(), this->orbital_rmax); + this->orbital_rmax = std::max( rcut[T], this->orbital_rmax); } if(GlobalV::test_gridt)ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"rmax of periodic grid (bohr)",orbital_rmax); diff --git a/source/module_hamilt_lcao/module_gint/grid_bigcell.h b/source/module_hamilt_lcao/module_gint/grid_bigcell.h index dbb823bfe7..b6efd5669a 100644 --- a/source/module_hamilt_lcao/module_gint/grid_bigcell.h +++ b/source/module_hamilt_lcao/module_gint/grid_bigcell.h @@ -32,7 +32,7 @@ class Grid_BigCell: public Grid_MeshCell //--------------------------------- - void init_grid_expansion(const UnitCell& ucell,const LCAO_Orbitals& orb); + void init_grid_expansion(const UnitCell& ucell,double* rcut); // get the max radius of all orbitals. // which will use to generate grid expansion, diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.cpp b/source/module_hamilt_lcao/module_gint/grid_technique.cpp index 6286dcb831..005daa41aa 100644 --- a/source/module_hamilt_lcao/module_gint/grid_technique.cpp +++ b/source/module_hamilt_lcao/module_gint/grid_technique.cpp @@ -160,7 +160,12 @@ void Grid_Technique::set_pbc_grid(const int& ncx_in, this->init_meshcell_pos(); // (2) expand the grid - this->init_grid_expansion(ucell,orb); + double* rcut=new double[ucell.ntype]; + for(int T=0; Tinit_grid_expansion(ucell,rcut); // (3) calculate the extended grid. this->cal_extended_cell(this->dxe, this->dye, this->dze); From ff3e2629255658ffb34b82e5e65302fab7435e97 Mon Sep 17 00:00:00 2001 From: A-006 <3158793232@qq.com> Date: Mon, 13 May 2024 19:32:43 +0800 Subject: [PATCH 06/23] fix error in init_gpu_gint_variables --- source/module_hamilt_lcao/module_gint/grid_technique.cpp | 4 ++-- source/module_hamilt_lcao/module_gint/grid_technique.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.cpp b/source/module_hamilt_lcao/module_gint/grid_technique.cpp index 005daa41aa..e4a39fc23b 100644 --- a/source/module_hamilt_lcao/module_gint/grid_technique.cpp +++ b/source/module_hamilt_lcao/module_gint/grid_technique.cpp @@ -183,7 +183,7 @@ void Grid_Technique::set_pbc_grid(const int& ncx_in, #if ((defined __CUDA) /* || (defined __ROCM) */) if(GlobalV::device_flag == "gpu") { - this->init_gpu_gint_variables(ucell,LCAO_Orbitals &orb); + this->init_gpu_gint_variables(ucell,orb); } #endif @@ -637,7 +637,7 @@ void Grid_Technique::cal_trace_lo(const UnitCell& ucell) #if ((defined __CUDA) /* || (defined __ROCM) */) -void Grid_Technique::init_gpu_gint_variables(const UnitCell& ucell,LCAO_Orbitals &orb) +void Grid_Technique::init_gpu_gint_variables(const UnitCell& ucell,const LCAO_Orbitals &orb) { if (is_malloced) { diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.h b/source/module_hamilt_lcao/module_gint/grid_technique.h index 2b95cd8a5f..1824d2ae9e 100644 --- a/source/module_hamilt_lcao/module_gint/grid_technique.h +++ b/source/module_hamilt_lcao/module_gint/grid_technique.h @@ -225,7 +225,7 @@ class Grid_Technique : public Grid_MeshBall matrix_multiple_func_type fastest_matrix_mul; private: - void init_gpu_gint_variables(const UnitCell& ucell,LCAO_Orbitals &orb); + void init_gpu_gint_variables(const UnitCell& ucell,const LCAO_Orbitals &orb); void free_gpu_gint_variables(const UnitCell& ucell); #endif From 7d06aa299525ed844dc72f7cdb4940b154f4f90a Mon Sep 17 00:00:00 2001 From: A-006 <3158793232@qq.com> Date: Mon, 13 May 2024 20:03:26 +0800 Subject: [PATCH 07/23] remove GlobalC in grid_technique and grid_bigcell --- .../hamilt_lcaodft/grid_init.cpp | 11 ++- .../module_hamilt_lcao/module_gint/gint.cpp | 73 ++++++++++--------- source/module_hamilt_lcao/module_gint/gint.h | 7 +- .../module_gint/grid_bigcell.cpp | 3 +- .../module_gint/grid_bigcell.h | 2 +- .../module_gint/grid_technique.cpp | 10 +-- .../module_gint/grid_technique.h | 2 +- 7 files changed, 61 insertions(+), 47 deletions(-) diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/grid_init.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/grid_init.cpp index a385c14d03..69c51fef43 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/grid_init.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/grid_init.cpp @@ -17,7 +17,8 @@ void grid_prepare( { ModuleBase::TITLE("LCAO_domain","grid_prepare"); ModuleBase::timer::tick("LCAO_domain","grid_prepare"); - + const UnitCell* ucell = &GlobalC::ucell; + const LCAO_Orbitals* orb = &GlobalC::ORB; if(GlobalV::GAMMA_ONLY_LOCAL) { gint_gamma.prep_grid( @@ -34,7 +35,9 @@ void grid_prepare( bigpw.nbxx, rhopw.ny, rhopw.nplane, - rhopw.startz_current); + rhopw.startz_current, + ucell, + orb); } else // multiple k-points { @@ -53,7 +56,9 @@ void grid_prepare( bigpw.nbxx, rhopw.ny, rhopw.nplane, - rhopw.startz_current); + rhopw.startz_current, + ucell, + orb); } ModuleBase::timer::tick("LCAO_domain","grid_prepare"); diff --git a/source/module_hamilt_lcao/module_gint/gint.cpp b/source/module_hamilt_lcao/module_gint/gint.cpp index 864a53ab77..2b86e881dc 100644 --- a/source/module_hamilt_lcao/module_gint/gint.cpp +++ b/source/module_hamilt_lcao/module_gint/gint.cpp @@ -67,9 +67,10 @@ void Gint::cal_gint(Gint_inout* inout) ModuleBase::TITLE("Gint_interface","cal_gint_force_meta"); ModuleBase::timer::tick("Gint_interface","cal_gint_force_meta"); } - + const UnitCell& ucell = *this->ucell; + const LCAO_Orbitals& orb = *this->orb; const int max_size = this->gridt->max_atom; - const int LD_pool = max_size * GlobalC::ucell.nwmax; + const int LD_pool = max_size * ucell.nwmax; const int lgd = this->gridt->lgd; const int nnrg = this->gridt->nnrg; @@ -86,21 +87,21 @@ void Gint::cal_gint(Gint_inout* inout) ylmcoef[i] = ModuleBase::Ylm::ylmcoef[i]; } - const int ntype = GlobalC::ORB.get_ntype(); + const int ntype = orb.get_ntype(); double* rcut = new double[ntype]; for (int it = 0; it < ntype; it++) { - rcut[it] = GlobalC::ORB.Phi[it].getRcut(); + rcut[it] = orb.Phi[it].getRcut(); } - const double dr = GlobalC::ORB.dr_uniform; + const double dr = orb.dr_uniform; if (inout->job == Gint_Tools::job_type::vlocal) { GintKernel::gint_gamma_vl_gpu(this->hRGint, lgd, max_size, - GlobalC::ucell.omega + ucell.omega / this->ncxyz, inout->vl, ylmcoef, @@ -109,7 +110,7 @@ void Gint::cal_gint(Gint_inout* inout) dr, rcut, *this->gridt, - GlobalC::ucell); + ucell); } else if (inout->job == Gint_Tools::job_type::rho) { @@ -123,17 +124,17 @@ void Gint::cal_gint(Gint_inout* inout) dr, rcut, *this->gridt, - GlobalC::ucell, + ucell, inout->rho[is]); } } else if (inout->job == Gint_Tools::job_type::force) { const int ncyz = this->ny * this->nplane; - int nat = GlobalC::ucell.nat; + int nat = ucell.nat; // for (int is = 0; is < GlobalV::NSPIN; ++is) // { - double *force = new double[GlobalC::ucell.nat * 3]; + double *force = new double[ucell.nat * 3]; for (int i = 0; i < nat * 3; i++) { force[i] = 0.0; @@ -144,7 +145,7 @@ void Gint::cal_gint(Gint_inout* inout) stress[i] = 0.0; } GintKernel::gint_gamma_force_gpu(this->DMRGint[inout->ispin], - GlobalC::ucell.omega + ucell.omega / this->ncxyz, inout->vl, force, @@ -153,7 +154,7 @@ void Gint::cal_gint(Gint_inout* inout) dr, rcut, *this->gridt, - GlobalC::ucell); + ucell); for (int iat = 0; iat < nat; iat++) { inout->fvl_dphi[0](iat, 0) += force[iat * 3]; @@ -187,11 +188,11 @@ void Gint::cal_gint(Gint_inout* inout) // prepare some constants const int ncyz = this->ny * this->nplane; // mohan add 2012-03-25 - const double dv = GlobalC::ucell.omega / this->ncxyz; + const double dv = ucell.omega / this->ncxyz; // it's a uniform grid to save orbital values, so the delta_r is // a constant. - const double delta_r = GlobalC::ORB.dr_uniform; + const double delta_r = orb.dr_uniform; if((inout->job==Gint_Tools::job_type::vlocal || inout->job==Gint_Tools::job_type::vlocal_meta) @@ -293,14 +294,14 @@ void Gint::cal_gint(Gint_inout* inout) //int* vindex = Gint_Tools::get_vindex(ncyz, ibx, jby, kbz); int* vindex = Gint_Tools::get_vindex(this->bxyz, this->bx, this->by, this->bz, this->nplane, this->gridt->start_ind[grid_index], ncyz); - this->gint_kernel_rho(na_grid, grid_index, delta_r, vindex, LD_pool, GlobalC::ucell,inout); + this->gint_kernel_rho(na_grid, grid_index, delta_r, vindex, LD_pool, ucell,inout); delete[] vindex; } else if(inout->job == Gint_Tools::job_type::tau) { int* vindex = Gint_Tools::get_vindex(this->bxyz, this->bx, this->by, this->bz, this->nplane, this->gridt->start_ind[grid_index], ncyz); - this->gint_kernel_tau(na_grid, grid_index, delta_r, vindex, LD_pool, inout,GlobalC::ucell); + this->gint_kernel_tau(na_grid, grid_index, delta_r, vindex, LD_pool, inout,ucell); delete[] vindex; } else if(inout->job == Gint_Tools::job_type::force) @@ -322,11 +323,11 @@ void Gint::cal_gint(Gint_inout* inout) #ifdef _OPENMP this->gint_kernel_force(na_grid, grid_index, delta_r, vldr3, LD_pool, DM_in, inout->ispin, inout->isforce, inout->isstress, - &fvl_dphi_thread, &svl_dphi_thread,GlobalC::ucell); + &fvl_dphi_thread, &svl_dphi_thread,ucell); #else this->gint_kernel_force(na_grid, grid_index, delta_r, vldr3, LD_pool, DM_in, inout->ispin, inout->isforce, inout->isstress, - inout->fvl_dphi, inout->svl_dphi,GlobalC::ucell); + inout->fvl_dphi, inout->svl_dphi,ucell); #endif delete[] vldr3; } @@ -338,17 +339,17 @@ void Gint::cal_gint(Gint_inout* inout) if((GlobalV::GAMMA_ONLY_LOCAL && lgd>0) || !GlobalV::GAMMA_ONLY_LOCAL) { this->gint_kernel_vlocal(na_grid, grid_index, delta_r, vldr3, LD_pool, - pvpR_thread,GlobalC::ucell,hRGint_thread); + pvpR_thread,ucell,hRGint_thread); } #else if(GlobalV::GAMMA_ONLY_LOCAL && lgd>0) { - this->gint_kernel_vlocal(na_grid, grid_index, delta_r, vldr3, LD_pool,GlobalC::ucell,nullptr); + this->gint_kernel_vlocal(na_grid, grid_index, delta_r, vldr3, LD_pool,ucell,nullptr); } if(!GlobalV::GAMMA_ONLY_LOCAL) { this->gint_kernel_vlocal(na_grid, grid_index, delta_r, vldr3, LD_pool, - GlobalC::ucell,this->pvpR_reduced[inout->ispin]); + ucell,this->pvpR_reduced[inout->ispin]); } #endif delete[] vldr3; @@ -359,13 +360,13 @@ void Gint::cal_gint(Gint_inout* inout) this->nplane, this->gridt->start_ind[grid_index], ncyz, dv); #ifdef _OPENMP this->gint_kernel_dvlocal(na_grid, grid_index, delta_r, vldr3, LD_pool, - pvdpRx_thread, pvdpRy_thread, pvdpRz_thread,GlobalC::ucell); + pvdpRx_thread, pvdpRy_thread, pvdpRz_thread,ucell); #else this->gint_kernel_dvlocal(na_grid, grid_index, delta_r, vldr3, LD_pool, this->pvdpRx_reduced[inout->ispin], this->pvdpRy_reduced[inout->ispin], this->pvdpRz_reduced[inout->ispin], - GlobalC::ucell); + ucell); #endif delete[] vldr3; } @@ -379,16 +380,16 @@ void Gint::cal_gint(Gint_inout* inout) if((GlobalV::GAMMA_ONLY_LOCAL && lgd>0) || !GlobalV::GAMMA_ONLY_LOCAL) { this->gint_kernel_vlocal_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool, - pvpR_thread, GlobalC::ucell,hRGint_thread); + pvpR_thread, ucell,hRGint_thread); } #else if(GlobalV::GAMMA_ONLY_LOCAL && lgd>0) { - this->gint_kernel_vlocal_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool, GlobalC::ucell,nullptr); + this->gint_kernel_vlocal_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool, ucell,nullptr); } if(!GlobalV::GAMMA_ONLY_LOCAL) { - this->gint_kernel_vlocal_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool,GlobalC::ucell, + this->gint_kernel_vlocal_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool,ucell, this->pvpR_reduced[inout->ispin]); } #endif @@ -416,11 +417,11 @@ void Gint::cal_gint(Gint_inout* inout) #ifdef _OPENMP this->gint_kernel_force_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool, DM_in, inout->ispin, inout->isforce, inout->isstress, - &fvl_dphi_thread, &svl_dphi_thread,GlobalC::ucell); + &fvl_dphi_thread, &svl_dphi_thread,ucell); #else this->gint_kernel_force_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool, DM_in, inout->ispin, inout->isforce, inout->isstress, - inout->fvl_dphi, inout->svl_dphi,GlobalC::ucell); + inout->fvl_dphi, inout->svl_dphi,ucell); #endif delete[] vldr3; delete[] vkdr3; @@ -505,7 +506,9 @@ void Gint::prep_grid(const Grid_Technique& gt, const int& nbxx_in, const int& ny_in, const int& nplane_in, - const int& startz_current_in) + const int& startz_current_in, + const UnitCell* ucell_in, + const LCAO_Orbitals* orb_in) { ModuleBase::TITLE(GlobalV::ofs_running, "Gint_k", "prep_grid"); @@ -523,6 +526,9 @@ void Gint::prep_grid(const Grid_Technique& gt, this->ny = ny_in; this->nplane = nplane_in; this->startz_current = startz_current_in; + this->ucell= ucell_in; + this->orb = orb_in; + assert(nbx > 0); assert(nby > 0); assert(nbz >= 0); @@ -535,8 +541,7 @@ void Gint::prep_grid(const Grid_Technique& gt, assert(ny > 0); assert(nplane >= 0); assert(startz_current >= 0); - - assert(GlobalC::ucell.omega > 0.0); + assert(this->ucell->omega > 0.0); return; } @@ -641,15 +646,15 @@ void Gint::initialize_pvpR(const UnitCell& ucell_in, Grid_Driver* gd) { ModuleBase::Vector3 dtau = gd->getAdjacentTau(ad) - tau1; double distance = dtau.norm() * ucell_in.lat0; - double rcut = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ORB.Phi[T2].getRcut(); + double rcut = this->orb->Phi[T1].getRcut() + this->orb->Phi[T2].getRcut(); //if(distance < rcut) // mohan reset this 2013-07-02 in Princeton - // we should make absolutely sure that the distance is smaller than GlobalC::ORB.Phi[it].getRcut + // we should make absolutely sure that the distance is smaller than orb.Phi[it].getRcut // this should be consistant with LCAO_nnr::cal_nnrg function // typical example : 7 Bohr cutoff Si orbital in 14 Bohr length of cell. // distance = 7.0000000000000000 - // GlobalC::ORB.Phi[it].getRcut = 7.0000000000000008 + // orb.Phi[it].getRcut = 7.0000000000000008 if(distance < rcut - 1.0e-15) { // calculate R index diff --git a/source/module_hamilt_lcao/module_gint/gint.h b/source/module_hamilt_lcao/module_gint/gint.h index d7a51c1a03..e07db55d12 100644 --- a/source/module_hamilt_lcao/module_gint/gint.h +++ b/source/module_hamilt_lcao/module_gint/gint.h @@ -37,7 +37,9 @@ class Gint const int& nbxx_in, const int& ny_in, const int& nplane_in, - const int& startz_current_in); + const int& startz_current_in, + const UnitCell* ucell_in, + const LCAO_Orbitals* orb_in); /** * @brief calculate the neighbor atoms of each atom in this processor @@ -54,7 +56,8 @@ class Gint void transfer_DM2DtoGrid(std::vector*> DM2D); const Grid_Technique* gridt = nullptr; - + const UnitCell* ucell; + const LCAO_Orbitals* orb ; protected: // variables related to FFT grid diff --git a/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp b/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp index 1257f731df..4ad0bb2591 100644 --- a/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp +++ b/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp @@ -28,7 +28,7 @@ Grid_BigCell::~Grid_BigCell() // delete tau positions. if(this->flag_tib) { - for(int i=0; inat; i++) { delete[] tau_in_bigcell[i]; } @@ -45,6 +45,7 @@ void Grid_BigCell::init_big_latvec(const UnitCell& ucell) assert(nby>0); assert(nbz>=0); + this->nat=ucell.nat; //size of each big room (same shape with unitcell) this->bigcell_vec1[0]= ucell.a1.x / (double)nbx * ucell.lat0; this->bigcell_vec1[1]= ucell.a1.y / (double)nbx * ucell.lat0; diff --git a/source/module_hamilt_lcao/module_gint/grid_bigcell.h b/source/module_hamilt_lcao/module_gint/grid_bigcell.h index b6efd5669a..99e069e706 100644 --- a/source/module_hamilt_lcao/module_gint/grid_bigcell.h +++ b/source/module_hamilt_lcao/module_gint/grid_bigcell.h @@ -16,7 +16,7 @@ class Grid_BigCell: public Grid_MeshCell // save the relative cartesian position // to bigcell of each atom. double** tau_in_bigcell; - + int nat; protected: //--------------------------------- diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.cpp b/source/module_hamilt_lcao/module_gint/grid_technique.cpp index e4a39fc23b..7c1f69e9b0 100644 --- a/source/module_hamilt_lcao/module_gint/grid_technique.cpp +++ b/source/module_hamilt_lcao/module_gint/grid_technique.cpp @@ -88,7 +88,7 @@ Grid_Technique::~Grid_Technique() if (allocate_find_R2) { - for (int iat = 0; iat < GlobalC::ucell.nat; iat++) + for (int iat = 0; iat < this->nat; iat++) { delete[] find_R2[iat]; delete[] find_R2st[iat]; @@ -102,7 +102,7 @@ Grid_Technique::~Grid_Technique() #if ((defined __CUDA) /* || (defined __ROCM) */) if(GlobalV::device_flag == "gpu") { - free_gpu_gint_variables(GlobalC::ucell); + free_gpu_gint_variables(this->nat); } #endif } @@ -641,7 +641,7 @@ void Grid_Technique::init_gpu_gint_variables(const UnitCell& ucell,const LCAO_Or { if (is_malloced) { - free_gpu_gint_variables(ucell); + free_gpu_gint_variables(this->nat); } double ylmcoef[100]; ModuleBase::GlobalFunc::ZEROS(ylmcoef, 100); @@ -950,7 +950,7 @@ void Grid_Technique::init_gpu_gint_variables(const UnitCell& ucell,const LCAO_Or free(atom_iw2_ylm_now); } -void Grid_Technique::free_gpu_gint_variables(const UnitCell& ucell) +void Grid_Technique::free_gpu_gint_variables(int nat) { if (!is_malloced) { @@ -1027,7 +1027,7 @@ void Grid_Technique::free_gpu_gint_variables(const UnitCell& ucell) checkCudaErrors(cudaFree(dot_product_g)); checkCudaErrors(cudaFree(rho_g)); - const int max_atom_pair_number = ucell.nat * ucell.nat; + const int max_atom_pair_number = nat * nat; for (int i = 0; i < max_atom_pair_number; i++) { if (grid_vlocal_g[i] != nullptr) diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.h b/source/module_hamilt_lcao/module_gint/grid_technique.h index 1824d2ae9e..83cfc9dc75 100644 --- a/source/module_hamilt_lcao/module_gint/grid_technique.h +++ b/source/module_hamilt_lcao/module_gint/grid_technique.h @@ -226,7 +226,7 @@ class Grid_Technique : public Grid_MeshBall private: void init_gpu_gint_variables(const UnitCell& ucell,const LCAO_Orbitals &orb); - void free_gpu_gint_variables(const UnitCell& ucell); + void free_gpu_gint_variables(int nat); #endif }; From da3522cf5d38a4b8782f16532a02bcea1f66e87f Mon Sep 17 00:00:00 2001 From: A-006 <3158793232@qq.com> Date: Mon, 13 May 2024 20:30:39 +0800 Subject: [PATCH 08/23] remove GlobalC in gint_tools and vbatch matrix --- .../module_gint/gint_fvl.cpp | 4 +- .../module_gint/gint_gamma_env.cpp | 3 +- .../module_gint/gint_k_env.cpp | 3 +- .../module_gint/gint_rho.cpp | 3 +- .../module_gint/gint_tau.cpp | 3 +- .../module_gint/gint_tools.cpp | 84 ++++++++++--------- .../module_gint/gint_tools.h | 6 +- .../module_gint/gint_vl.cpp | 8 +- .../module_gint/grid_technique.cpp | 7 +- .../module_gint/grid_technique.h | 6 +- .../kernels/cuda/vbatch_matrix_mul.cu | 16 ++-- .../kernels/cuda/vbatch_matrix_mul.cuh | 4 +- 12 files changed, 76 insertions(+), 71 deletions(-) diff --git a/source/module_hamilt_lcao/module_gint/gint_fvl.cpp b/source/module_hamilt_lcao/module_gint/gint_fvl.cpp index 36de330336..45706d4dcd 100644 --- a/source/module_hamilt_lcao/module_gint/gint_fvl.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_fvl.cpp @@ -31,7 +31,7 @@ void Gint::gint_kernel_force( Gint_Tools::Array_Pool dpsir_ylm_z(this->bxyz, LD_pool); Gint_Tools::cal_dpsir_ylm(*this->gridt, this->bxyz, na_grid, grid_index, delta_r, block_index, block_size, cal_flag, - psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D,ucell); + psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D); //calculating f_mu(r) = v(r)*psi_mu(r)*dv const Gint_Tools::Array_Pool psir_vlbr3 @@ -170,7 +170,7 @@ void Gint::gint_kernel_force_meta( //psi and gradient of psi Gint_Tools::cal_dpsir_ylm(*this->gridt, this->bxyz, na_grid, grid_index, delta_r, block_index, block_size, cal_flag, - psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D,ucell); + psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D); /* Gint_Tools::cal_dpsir_ylm(na_grid, grid_index, delta_r, block_index, block_size, cal_flag, psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D, displ1); diff --git a/source/module_hamilt_lcao/module_gint/gint_gamma_env.cpp b/source/module_hamilt_lcao/module_gint/gint_gamma_env.cpp index 29325a8e8b..02bbcd99d0 100644 --- a/source/module_hamilt_lcao/module_gint/gint_gamma_env.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_gamma_env.cpp @@ -40,8 +40,7 @@ void Gint_Gamma::cal_env(const double* wfc, double* rho,LCAO_Orbitals &orb,UnitC size, grid_index, delta_r, block_index, block_size, cal_flag, - psir_ylm.ptr_2D, - ucell); + psir_ylm.ptr_2D); int* vindex = Gint_Tools::get_vindex(this->bxyz, this->bx, this->by, this->bz, this->nplane, this->gridt->start_ind[grid_index], ncyz); diff --git a/source/module_hamilt_lcao/module_gint/gint_k_env.cpp b/source/module_hamilt_lcao/module_gint/gint_k_env.cpp index 76499b2e5a..859b8d1a4e 100644 --- a/source/module_hamilt_lcao/module_gint/gint_k_env.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_k_env.cpp @@ -50,8 +50,7 @@ void Gint_k::cal_env_k(int ik, this->bxyz, size, grid_index, delta_r, block_index, block_size, cal_flag, - psir_ylm.ptr_2D, - ucell); + psir_ylm.ptr_2D); int* vindex = Gint_Tools::get_vindex(this->bxyz, this->bx, this->by, this->bz, this->nplane, this->gridt->start_ind[grid_index], ncyz); diff --git a/source/module_hamilt_lcao/module_gint/gint_rho.cpp b/source/module_hamilt_lcao/module_gint/gint_rho.cpp index ff5bda6d7c..ea9688b8e2 100644 --- a/source/module_hamilt_lcao/module_gint/gint_rho.cpp +++ b/source/module_hamilt_lcao/module_gint/gint_rho.cpp @@ -29,8 +29,7 @@ void Gint::gint_kernel_rho( this->bxyz, na_grid, grid_index, delta_r, block_index, block_size, cal_flag, - psir_ylm.ptr_2D, - ucell); + psir_ylm.ptr_2D); for(int is=0; is GlobalC::ORB.Phi[it].getRcut() - 1.0e-10) + if(distance > orb.Phi[it].getRcut() - 1.0e-10) cal_flag[ib][id]=false; else cal_flag[ib][id]=true; @@ -184,11 +185,12 @@ namespace Gint_Tools const int*const block_index, // block_index[na_grid+1], count total number of atomis orbitals const int*const block_size, // block_size[na_grid], number of columns of a band const bool*const*const cal_flag, - double*const*const psir_ylm, - const UnitCell& ucell) // cal_flag[bxyz][na_grid], whether the atom-grid distance is larger than cutoff + double*const*const psir_ylm) // cal_flag[bxyz][na_grid], whether the atom-grid distance is larger than cutoff { ModuleBase::timer::tick("Gint_Tools", "cal_psir_ylm"); std::vector ylma; + const UnitCell& ucell=*gt.ucell; + const LCAO_Orbitals& orb=*gt.orb; for (int id=0; id it_psi_uniform(atom->nw); std::vector it_dpsi_uniform(atom->nw); // preprocess index @@ -248,7 +250,7 @@ namespace Gint_Tools // spherical harmonic functions Ylm //------------------------------------------------------ // Ylm::get_ylm_real(this->nnn[it], this->dr[id], ylma); - ModuleBase::Ylm::sph_harm ( GlobalC::ucell.atoms[it].nwl, + ModuleBase::Ylm::sph_harm ( ucell.atoms[it].nwl, dr[0] / distance, dr[1] / distance, dr[2] / distance, @@ -280,7 +282,7 @@ namespace Gint_Tools } p[iw]=phi * ylma[atom->iw2_ylm[iw]]; } // end iw - }// end distance<=(GlobalC::ORB.Phi[it].getRcut()-1.0e-15) + }// end distance<=(orb.Phi[it].getRcut()-1.0e-15) }// end ib }// end id ModuleBase::timer::tick("Gint_Tools", "cal_psir_ylm"); @@ -299,18 +301,19 @@ namespace Gint_Tools double*const*const psir_ylm, double*const*const dpsir_ylm_x, double*const*const dpsir_ylm_y, - double*const*const dpsir_ylm_z, - const UnitCell& ucell) + double*const*const dpsir_ylm_z) { ModuleBase::timer::tick("Gint_Tools", "cal_dpsir_ylm"); + const UnitCell& ucell=*gt.ucell; + const LCAO_Orbitals& orb=*gt.orb; for (int id=0; id rly; std::vector> grly; - ModuleBase::Ylm::grad_rl_sph_harm(GlobalC::ucell.atoms[it].nwl, dr[0], dr[1], dr[2], rly, grly); + ModuleBase::Ylm::grad_rl_sph_harm(ucell.atoms[it].nwl, dr[0], dr[1], dr[2], rly, grly); if(distance < 1e-9) distance = 1e-9; const double position = distance / delta_r; @@ -362,7 +365,7 @@ namespace Gint_Tools // function from interpolation method. if ( atom->iw2_new[iw] ) { - const Numerical_Orbital_Lm &philn = GlobalC::ORB.Phi[it].PhiLN( + const Numerical_Orbital_Lm &philn = orb.Phi[it].PhiLN( atom->iw2l[iw], atom->iw2n[iw]); @@ -429,14 +432,16 @@ namespace Gint_Tools double*const*const ddpsir_ylm_zz) { ModuleBase::timer::tick("Gint_Tools", "cal_ddpsir_ylm"); + const UnitCell& ucell=*gt.ucell; + const LCAO_Orbitals& orb=*gt.orb; for (int id=0; id rly; std::vector> grly; - ModuleBase::Ylm::grad_rl_sph_harm(GlobalC::ucell.atoms[it].nwl, dr1[0], dr1[1], dr1[2], rly, grly); + ModuleBase::Ylm::grad_rl_sph_harm(ucell.atoms[it].nwl, dr1[0], dr1[1], dr1[2], rly, grly); double distance1 = std::sqrt(dr1[0]*dr1[0] + dr1[1]*dr1[1] + dr1[2]*dr1[2]); if(distance1 < 1e-9) distance1 = 1e-9; @@ -532,7 +537,7 @@ namespace Gint_Tools // function from interpolation method. if ( atom->iw2_new[iw] ) { - const Numerical_Orbital_Lm &philn = GlobalC::ORB.Phi[it].PhiLN( + const Numerical_Orbital_Lm &philn = orb.Phi[it].PhiLN( atom->iw2l[iw], atom->iw2n[iw]); @@ -609,8 +614,8 @@ namespace Gint_Tools std::vector rly; std::vector> grly; std::vector> hrly; - ModuleBase::Ylm::grad_rl_sph_harm(GlobalC::ucell.atoms[it].nwl, dr[0], dr[1], dr[2], rly, grly); - ModuleBase::Ylm::hes_rl_sph_harm(GlobalC::ucell.atoms[it].nwl, dr[0], dr[1], dr[2], hrly); + ModuleBase::Ylm::grad_rl_sph_harm(ucell.atoms[it].nwl, dr[0], dr[1], dr[2], rly, grly); + ModuleBase::Ylm::hes_rl_sph_harm(ucell.atoms[it].nwl, dr[0], dr[1], dr[2], hrly); const double position = distance / delta_r; const double iq = static_cast(position); @@ -629,7 +634,7 @@ namespace Gint_Tools // function from interpolation method. if ( atom->iw2_new[iw] ) { - const Numerical_Orbital_Lm &philn = GlobalC::ORB.Phi[it].PhiLN( + const Numerical_Orbital_Lm &philn = orb.Phi[it].PhiLN( atom->iw2l[iw], atom->iw2n[iw]); @@ -723,13 +728,15 @@ namespace Gint_Tools double*const*const dpsir_ylm_zz) { ModuleBase::timer::tick("Gint_Tools", "cal_dpsirr_ylm"); + const UnitCell& ucell=*gt.ucell; + const LCAO_Orbitals& orb=*gt.orb; for (int id=0; idbxyz, na_grid, grid_index, delta_r, block_index, block_size, cal_flag, - psir_ylm.ptr_2D, - ucell); + psir_ylm.ptr_2D); //calculating f_mu(r) = v(r)*psi_mu(r)*dv const Gint_Tools::Array_Pool psir_vlbr3 = Gint_Tools::get_psir_vlbr3( @@ -97,7 +96,7 @@ void Gint::gint_kernel_dvlocal( Gint_Tools::Array_Pool dpsir_ylm_z(this->bxyz, LD_pool); Gint_Tools::cal_dpsir_ylm(*this->gridt, this->bxyz, na_grid, grid_index, delta_r, block_index, block_size, cal_flag, - psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D,ucell); + psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D); //calculating f_mu(r) = v(r)*psi_mu(r)*dv const Gint_Tools::Array_Pool psir_vlbr3 = Gint_Tools::get_psir_vlbr3( @@ -157,8 +156,7 @@ void Gint::gint_kernel_vlocal_meta( psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, - dpsir_ylm_z.ptr_2D, - ucell + dpsir_ylm_z.ptr_2D ); //calculating f_mu(r) = v(r)*psi_mu(r)*dv diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.cpp b/source/module_hamilt_lcao/module_gint/grid_technique.cpp index 7c1f69e9b0..fdcfdc32eb 100644 --- a/source/module_hamilt_lcao/module_gint/grid_technique.cpp +++ b/source/module_hamilt_lcao/module_gint/grid_technique.cpp @@ -127,7 +127,7 @@ void Grid_Technique::set_pbc_grid(const int& ncx_in, const int& nplane, const int& startz_current, const UnitCell& ucell, - const LCAO_Orbitals &orb) + const LCAO_Orbitals& orb) { ModuleBase::TITLE("Grid_Technique", "init"); ModuleBase::timer::tick("Grid_Technique", "init"); @@ -152,7 +152,8 @@ void Grid_Technique::set_pbc_grid(const int& ncx_in, nbxx_in, nbzp_start_in, nbzp_in); - + this->ucell=&ucell; + this->orb=&orb; this->init_latvec(ucell); this->init_big_latvec(ucell); @@ -941,7 +942,7 @@ void Grid_Technique::init_gpu_gint_variables(const UnitCell& ucell,const LCAO_Or checkCudaErrors(cudaStreamCreate(&streams[i])); } - gemm_algo_selector(bxyz, fastest_matrix_mul); + gemm_algo_selector(bxyz, fastest_matrix_mul,ucell); is_malloced = true; diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.h b/source/module_hamilt_lcao/module_gint/grid_technique.h index 83cfc9dc75..03cca8e924 100644 --- a/source/module_hamilt_lcao/module_gint/grid_technique.h +++ b/source/module_hamilt_lcao/module_gint/grid_technique.h @@ -74,6 +74,10 @@ class Grid_Technique : public Grid_MeshBall bool allocate_find_R2; int binary_search_find_R2_offset(int val, int iat) const; + //UnitCell and LCAO_Obrbitals + const UnitCell* ucell; + const LCAO_Orbitals* orb; + // indexes for nnrg -> orbital index + R index std::vector nnrg_index; @@ -98,7 +102,7 @@ class Grid_Technique : public Grid_MeshBall const int& nplane, const int& startz_current, const UnitCell& ucell, - const LCAO_Orbitals &orb); + const LCAO_Orbitals& orb); /// number of elements(basis-pairs) in this processon /// on all adjacent atoms-pairs(Grid division) diff --git a/source/module_hamilt_lcao/module_gint/kernels/cuda/vbatch_matrix_mul.cu b/source/module_hamilt_lcao/module_gint/kernels/cuda/vbatch_matrix_mul.cu index ed710edf5a..b84b76a840 100644 --- a/source/module_hamilt_lcao/module_gint/kernels/cuda/vbatch_matrix_mul.cu +++ b/source/module_hamilt_lcao/module_gint/kernels/cuda/vbatch_matrix_mul.cu @@ -534,17 +534,17 @@ void gemm_time_measure(int max_m, * in compilation time (TODO: so in the future, it will be necessary to split * this large section of code into multiple files, multiple compilation units). */ -void gemm_algo_selector(int matrix_k, matrix_multiple_func_type& fastest_algo) +void gemm_algo_selector(int matrix_k, matrix_multiple_func_type& fastest_algo,const UnitCell& ucell) { int batchCount_per_type = 32; int batchCount - = batchCount_per_type * GlobalC::ucell.ntype * GlobalC::ucell.ntype; + = batchCount_per_type * ucell.ntype * ucell.ntype; Cuda_Mem_Wrapper m(batchCount); Cuda_Mem_Wrapper n(batchCount); Cuda_Mem_Wrapper k(batchCount); - int max_m = GlobalC::ucell.nwmax, max_n = GlobalC::ucell.nwmax; + int max_m = ucell.nwmax, max_n = ucell.nwmax; Cuda_Mem_Wrapper A(batchCount * max_m * matrix_k); Cuda_Mem_Wrapper B(batchCount * max_n * matrix_k); @@ -572,17 +572,17 @@ void gemm_algo_selector(int matrix_k, matrix_multiple_func_type& fastest_algo) int index = 0; for (int i = 0; i < batchCount_per_type; ++i) { - for (int j = 0; j < GlobalC::ucell.ntype; j++) + for (int j = 0; j < ucell.ntype; j++) { - for (int l = 0; l < GlobalC::ucell.ntype; l++) + for (int l = 0; l < ucell.ntype; l++) { - m.get_host_pointer()[index] = GlobalC::ucell.atoms[j].nw; - n.get_host_pointer()[index] = GlobalC::ucell.atoms[l].nw; + m.get_host_pointer()[index] = ucell.atoms[j].nw; + n.get_host_pointer()[index] = ucell.atoms[l].nw; k.get_host_pointer()[index] = matrix_k; lda.get_host_pointer()[index] = matrix_k; ldb.get_host_pointer()[index] = matrix_k; - ldc.get_host_pointer()[index] = GlobalC::ucell.atoms[l].nw; + ldc.get_host_pointer()[index] = ucell.atoms[l].nw; A_array.get_host_pointer()[index] = &A.get_device_pointer()[index * max_m * matrix_k]; diff --git a/source/module_hamilt_lcao/module_gint/kernels/cuda/vbatch_matrix_mul.cuh b/source/module_hamilt_lcao/module_gint/kernels/cuda/vbatch_matrix_mul.cuh index 3972918675..ea4e42e521 100644 --- a/source/module_hamilt_lcao/module_gint/kernels/cuda/vbatch_matrix_mul.cuh +++ b/source/module_hamilt_lcao/module_gint/kernels/cuda/vbatch_matrix_mul.cuh @@ -7,7 +7,7 @@ #include // for fprintf and stderr #include - +#include "module_cell/unitcell.h" /** * Performs a batched matrix multiplication using the vbatched_gemm_impl * function. @@ -111,5 +111,5 @@ typedef std::function matrix_multiple_func_type; -void gemm_algo_selector(int k, matrix_multiple_func_type& func); +void gemm_algo_selector(int k, matrix_multiple_func_type& func,const UnitCell& ucell); #endif // VBATCH_MATRIX_MUL_H \ No newline at end of file From 939ed76dfcc62ccc365924d4d541f5721c2f2eec Mon Sep 17 00:00:00 2001 From: A-006 <3158793232@qq.com> Date: Tue, 21 May 2024 20:16:41 +0800 Subject: [PATCH 09/23] fix relax have compute stress and change GPU force compute to acclerate --- .../module_hamilt_lcao/module_gint/gint.cpp | 82 +++++++++++-------- .../module_gint/gint_force.h | 4 +- .../module_gint/gint_force_gpu.cu | 17 ++-- .../module_gint/kernels/cuda/gint_force.cu | 2 +- 4 files changed, 61 insertions(+), 44 deletions(-) diff --git a/source/module_hamilt_lcao/module_gint/gint.cpp b/source/module_hamilt_lcao/module_gint/gint.cpp index 2b86e881dc..a25f1deb11 100644 --- a/source/module_hamilt_lcao/module_gint/gint.cpp +++ b/source/module_hamilt_lcao/module_gint/gint.cpp @@ -36,7 +36,6 @@ Gint::~Gint() void Gint::cal_gint(Gint_inout* inout) { ModuleBase::timer::tick("Gint_interface", "cal_gint"); - if(inout->job==Gint_Tools::job_type::vlocal) { ModuleBase::TITLE("Gint_interface","cal_gint_vlocal"); @@ -132,44 +131,62 @@ void Gint::cal_gint(Gint_inout* inout) { const int ncyz = this->ny * this->nplane; int nat = ucell.nat; + const int isforce = inout->isforce; + const int isstress =inout->isstress; // for (int is = 0; is < GlobalV::NSPIN; ++is) // { - double *force = new double[ucell.nat * 3]; - for (int i = 0; i < nat * 3; i++) + if (isforce || isstress){ + double *force = new double[ucell.nat * 3]; + for (int i = 0; i < nat * 3; i++) + { + force[i] = 0.0; + } + double *stress = new double[6]; + for (int i = 0; i < 6; i++) + { + stress[i] = 0.0; + } + GintKernel::gint_gamma_force_gpu(this->DMRGint[inout->ispin], + ucell.omega + / this->ncxyz, + inout->vl, + force, + stress, + this->nplane, + dr, + rcut, + isforce, + isstress, + *this->gridt, + ucell); + + if (inout->isforce) { - force[i] = 0.0; + for (int iat = 0; iat < nat; iat++) + { + inout->fvl_dphi[0](iat, 0) += force[iat * 3]; + inout->fvl_dphi[0](iat, 1) += force[iat * 3 + 1]; + inout->fvl_dphi[0](iat, 2) += force[iat * 3 + 2]; + } + delete[] force; } - double *stress = new double[6]; - for (int i = 0; i < 6; i++) - { - stress[i] = 0.0; + if (inout->isstress){ + for (int iat = 0; iat < nat; iat++) + { + inout->fvl_dphi[0](iat, 0) += force[iat * 3]; + inout->fvl_dphi[0](iat, 1) += force[iat * 3 + 1]; + inout->fvl_dphi[0](iat, 2) += force[iat * 3 + 2]; + } + inout->svl_dphi[0](0, 0) += stress[0]; + inout->svl_dphi[0](0, 1) += stress[1]; + inout->svl_dphi[0](0, 2) += stress[2]; + inout->svl_dphi[0](1, 1) += stress[3]; + inout->svl_dphi[0](1, 2) += stress[4]; + inout->svl_dphi[0](2, 2) += stress[5]; + delete[] stress; } - GintKernel::gint_gamma_force_gpu(this->DMRGint[inout->ispin], - ucell.omega - / this->ncxyz, - inout->vl, - force, - stress, - this->nplane, - dr, - rcut, - *this->gridt, - ucell); - for (int iat = 0; iat < nat; iat++) - { - inout->fvl_dphi[0](iat, 0) += force[iat * 3]; - inout->fvl_dphi[0](iat, 1) += force[iat * 3 + 1]; - inout->fvl_dphi[0](iat, 2) += force[iat * 3 + 2]; } - inout->svl_dphi[0](0, 0) += stress[0]; - inout->svl_dphi[0](0, 1) += stress[1]; - inout->svl_dphi[0](0, 2) += stress[2]; - inout->svl_dphi[0](1, 1) += stress[3]; - inout->svl_dphi[0](1, 2) += stress[4]; - inout->svl_dphi[0](2, 2) += stress[5]; - delete[] force; - delete[] stress; // } } } @@ -310,7 +327,6 @@ void Gint::cal_gint(Gint_inout* inout) this->nplane, this->gridt->start_ind[grid_index], ncyz, dv); double** DM_in; - if(GlobalV::GAMMA_ONLY_LOCAL) { DM_in = inout->DM[GlobalV::CURRENT_SPIN]; diff --git a/source/module_hamilt_lcao/module_gint/gint_force.h b/source/module_hamilt_lcao/module_gint/gint_force.h index 6b6718e142..1cc22eaa59 100644 --- a/source/module_hamilt_lcao/module_gint/gint_force.h +++ b/source/module_hamilt_lcao/module_gint/gint_force.h @@ -92,6 +92,8 @@ void gint_gamma_force_gpu(hamilt::HContainer* dm, const int nczp, double dr, double* rcut, + int isforce, + int isstress, const Grid_Technique& gridt, const UnitCell& ucell); @@ -208,7 +210,7 @@ void cal_init(ForceStressIat& f_s_iat, const int cuda_block, const int atom_num_grid, const int max_size, - const ForceStressIatGlobal& f_s_iatg); + ForceStressIatGlobal& f_s_iatg); /** * @brief GridParameter memCpy,from Host to Device * diff --git a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu index 30ea8fa6ab..2d4186a8c3 100644 --- a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu +++ b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu @@ -63,6 +63,8 @@ void gint_gamma_force_gpu(hamilt::HContainer* dm, const int nczp, double dr, double* rcut, + int isforce, + int isstress, const Grid_Technique& gridt, const UnitCell& ucell) { @@ -110,7 +112,7 @@ void gint_gamma_force_gpu(hamilt::HContainer* dm, dim3 block_dot_force(cuda_threads); dim3 grid_dot(cuda_block); dim3 block_dot(cuda_threads); - + para_init(para, iter_num, nbz, gridt); cal_init(f_s_iat, para.stream_num, @@ -118,8 +120,6 @@ void gint_gamma_force_gpu(hamilt::HContainer* dm, atom_num_grid, max_size, f_s_iat_dev); - checkCuda(cudaStreamSynchronize(gridt.streams[para.stream_num])); - /*gpu task compute in CPU */ gpu_task_generator_force(gridt, ucell, @@ -195,7 +195,7 @@ void gint_gamma_force_gpu(hamilt::HContainer* dm, gridt.streams[para.stream_num], nullptr); - checkCuda(cudaStreamSynchronize(gridt.streams[para.stream_num])); + /* force compute in GPU */ dot_product_force<<* dm, nwmax, max_size, gridt.psir_size / nwmax); - /* force compute in CPU*/ - cal_force_add(f_s_iat, force, atom_num_grid); + // /* force compute in CPU*/ + /*stress compute in GPU*/ dot_product_stress<<* dm, gridt.psir_size); /* stress compute in CPU*/ cal_stress_add(f_s_iat, stress, cuda_block); + cal_force_add(f_s_iat, force, atom_num_grid); + iter_num++; } } - // cudaFree(f_s_iat.stress_device); - // cudaFree(f_s_iat.force_device); - // cudaFree(f_s_iat.iat_device); delete[] f_s_iat.stress_host; delete[] f_s_iat.force_host; delete[] f_s_iat.iat_host; diff --git a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu index bbc1d7dcb8..3153b7efd8 100644 --- a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu +++ b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu @@ -393,7 +393,7 @@ void cal_init(ForceStressIat& f_s_iat, const int cuda_block, const int atom_num_grid, const int max_size, - const ForceStressIatGlobal& f_s_iat_dev) + ForceStressIatGlobal& f_s_iat_dev) { const int iat_min = -max_size - 1; f_s_iat.stress_host = new double[6 * cuda_block]; From 01bde3e3baf35e51e8b80a227ab16badc6311b18 Mon Sep 17 00:00:00 2001 From: A-006 <3158793232@qq.com> Date: Tue, 21 May 2024 20:59:40 +0800 Subject: [PATCH 10/23] fix num stream in input.md and use num_stream in input --- docs/advanced/input_files/input-main.md | 3 +-- source/module_esolver/esolver_ks_lcao_elec.cpp | 3 ++- .../module_hamilt_lcao/module_gint/gint_force_gpu.cu | 10 +++++++++- .../module_hamilt_lcao/module_gint/grid_technique.cpp | 9 ++++++--- source/module_hamilt_lcao/module_gint/grid_technique.h | 9 +++++---- 5 files changed, 23 insertions(+), 11 deletions(-) diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md index d1c996127d..8ae8fcafba 100644 --- a/docs/advanced/input_files/input-main.md +++ b/docs/advanced/input_files/input-main.md @@ -61,7 +61,7 @@ - [search\_radius](#search_radius) - [search\_pbc](#search_pbc) - [bx, by, bz](#bx-by-bz) - - [num\_stream] (#num_stream) + - [num\_stream](#num_stream) - [Electronic structure](#electronic-structure) - [basis\_type](#basis_type) - [ks\_solver](#ks_solver) @@ -912,7 +912,6 @@ These variables are used to control the numerical atomic orbitals related parame - **Description**: choose the number of streams in GPU when we compute the `LCAO`. According to different devices , we may have different effects.For most devices,the stream is enough when the number is bigger then 2. - **Default** : "4" -[back to top](#full-list-of-input-keywords) ## Electronic structure diff --git a/source/module_esolver/esolver_ks_lcao_elec.cpp b/source/module_esolver/esolver_ks_lcao_elec.cpp index b3d8e975b5..263520e5f1 100644 --- a/source/module_esolver/esolver_ks_lcao_elec.cpp +++ b/source/module_esolver/esolver_ks_lcao_elec.cpp @@ -66,7 +66,8 @@ void ESolver_KS_LCAO::set_matrix_grid(Record_adj& ra) this->pw_rho->nplane, this->pw_rho->startz_current, GlobalC::ucell, - GlobalC::ORB); + GlobalC::ORB, + GlobalV::NUM_STREAM); // (2)For each atom, calculate the adjacent atoms in different cells // and allocate the space for H(R) and S(R). diff --git a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu index 2d4186a8c3..c9a6905595 100644 --- a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu +++ b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu @@ -150,6 +150,7 @@ void gint_gamma_force_gpu(hamilt::HContainer* dm, para.stream_num); checkCuda(cudaStreamSynchronize(gridt.streams[para.stream_num])); /* cuda stream compute and Multiplication of multinomial matrices */ + get_psi_force<<* dm, /* force compute in GPU */ + if (isforce){ dot_product_force<<* dm, nwmax, max_size, gridt.psir_size / nwmax); + } // /* force compute in CPU*/ /*stress compute in GPU*/ + if (isstress){ dot_product_stress<<* dm, para.psir_dm_device, f_s_iat.stress_device, gridt.psir_size); + } /* stress compute in CPU*/ + if (isstress){ cal_stress_add(f_s_iat, stress, cuda_block); + } + if (isforce){ cal_force_add(f_s_iat, force, atom_num_grid); - + } iter_num++; } } diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.cpp b/source/module_hamilt_lcao/module_gint/grid_technique.cpp index fdcfdc32eb..2eb74becc1 100644 --- a/source/module_hamilt_lcao/module_gint/grid_technique.cpp +++ b/source/module_hamilt_lcao/module_gint/grid_technique.cpp @@ -127,7 +127,8 @@ void Grid_Technique::set_pbc_grid(const int& ncx_in, const int& nplane, const int& startz_current, const UnitCell& ucell, - const LCAO_Orbitals& orb) + const LCAO_Orbitals& orb, + const int num_stream) { ModuleBase::TITLE("Grid_Technique", "init"); ModuleBase::timer::tick("Grid_Technique", "init"); @@ -184,7 +185,7 @@ void Grid_Technique::set_pbc_grid(const int& ncx_in, #if ((defined __CUDA) /* || (defined __ROCM) */) if(GlobalV::device_flag == "gpu") { - this->init_gpu_gint_variables(ucell,orb); + this->init_gpu_gint_variables(ucell,orb,num_stream); } #endif @@ -638,12 +639,14 @@ void Grid_Technique::cal_trace_lo(const UnitCell& ucell) #if ((defined __CUDA) /* || (defined __ROCM) */) -void Grid_Technique::init_gpu_gint_variables(const UnitCell& ucell,const LCAO_Orbitals &orb) +void Grid_Technique::init_gpu_gint_variables(const UnitCell& ucell,const LCAO_Orbitals &orb,const int num_stream) { if (is_malloced) { free_gpu_gint_variables(this->nat); } + nstreams = num_stream; + streams=new cudaStream_t[nstreams]; double ylmcoef[100]; ModuleBase::GlobalFunc::ZEROS(ylmcoef, 100); for (int i = 0; i < 100; i++) diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.h b/source/module_hamilt_lcao/module_gint/grid_technique.h index 03cca8e924..a40ad212de 100644 --- a/source/module_hamilt_lcao/module_gint/grid_technique.h +++ b/source/module_hamilt_lcao/module_gint/grid_technique.h @@ -102,7 +102,8 @@ class Grid_Technique : public Grid_MeshBall const int& nplane, const int& startz_current, const UnitCell& ucell, - const LCAO_Orbitals& orb); + const LCAO_Orbitals& orb, + const int num_stream); /// number of elements(basis-pairs) in this processon /// on all adjacent atoms-pairs(Grid division) @@ -162,8 +163,8 @@ class Grid_Technique : public Grid_MeshBall int atom_pair_mesh; int atom_pair_nbz; - const int nstreams = 4; - cudaStream_t streams[4]; + int nstreams ; + cudaStream_t* streams; // streams[nstreams] // TODO it needs to be implemented through configuration files @@ -229,7 +230,7 @@ class Grid_Technique : public Grid_MeshBall matrix_multiple_func_type fastest_matrix_mul; private: - void init_gpu_gint_variables(const UnitCell& ucell,const LCAO_Orbitals &orb); + void init_gpu_gint_variables(const UnitCell& ucell,const LCAO_Orbitals &orb,const int num_stream); void free_gpu_gint_variables(int nat); #endif From 6ebb7a372035fbff096bf20171c1857fd5eda4a9 Mon Sep 17 00:00:00 2001 From: A-006 <3158793232@qq.com> Date: Wed, 22 May 2024 11:46:03 +0800 Subject: [PATCH 11/23] fix error in compute force --- .../module_hamilt_lcao/module_gint/gint.cpp | 20 +++++++------------ 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/source/module_hamilt_lcao/module_gint/gint.cpp b/source/module_hamilt_lcao/module_gint/gint.cpp index a25f1deb11..c15b8ba993 100644 --- a/source/module_hamilt_lcao/module_gint/gint.cpp +++ b/source/module_hamilt_lcao/module_gint/gint.cpp @@ -171,19 +171,13 @@ void Gint::cal_gint(Gint_inout* inout) delete[] force; } if (inout->isstress){ - for (int iat = 0; iat < nat; iat++) - { - inout->fvl_dphi[0](iat, 0) += force[iat * 3]; - inout->fvl_dphi[0](iat, 1) += force[iat * 3 + 1]; - inout->fvl_dphi[0](iat, 2) += force[iat * 3 + 2]; - } - inout->svl_dphi[0](0, 0) += stress[0]; - inout->svl_dphi[0](0, 1) += stress[1]; - inout->svl_dphi[0](0, 2) += stress[2]; - inout->svl_dphi[0](1, 1) += stress[3]; - inout->svl_dphi[0](1, 2) += stress[4]; - inout->svl_dphi[0](2, 2) += stress[5]; - delete[] stress; + inout->svl_dphi[0](0, 0) += stress[0]; + inout->svl_dphi[0](0, 1) += stress[1]; + inout->svl_dphi[0](0, 2) += stress[2]; + inout->svl_dphi[0](1, 1) += stress[3]; + inout->svl_dphi[0](1, 2) += stress[4]; + inout->svl_dphi[0](2, 2) += stress[5]; + delete[] stress; } } From fbd2f7b60080c5dd8bfd48bbc5598efbdb77fee0 Mon Sep 17 00:00:00 2001 From: A-006 <3158793232@qq.com> Date: Thu, 23 May 2024 20:25:30 +0800 Subject: [PATCH 12/23] fix memory error in force compute --- source/module_hamilt_lcao/module_gint/gint_force_gpu.cu | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu index c9a6905595..309515955c 100644 --- a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu +++ b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu @@ -240,11 +240,12 @@ void gint_gamma_force_gpu(hamilt::HContainer* dm, cal_force_add(f_s_iat, force, atom_num_grid); } iter_num++; + delete[] f_s_iat.stress_host; + delete[] f_s_iat.force_host; + delete[] f_s_iat.iat_host; } } - delete[] f_s_iat.stress_host; - delete[] f_s_iat.force_host; - delete[] f_s_iat.iat_host; + delete[] denstiy_mat.density_mat_h; /*free variables in CPU host*/ for (int i = 0; i < gridt.nstreams; i++) { From c0e99902a5e731f18369a7413e5b30c43950ce09 Mon Sep 17 00:00:00 2001 From: A-006 <3158793232@qq.com> Date: Fri, 24 May 2024 10:25:16 +0800 Subject: [PATCH 13/23] use std instead of double * and add const --- source/module_hamilt_lcao/module_gint/gint.cpp | 18 ++++++------------ .../module_gint/gint_force.h | 12 ++++++------ .../module_gint/gint_force_gpu.cu | 8 ++++---- .../module_gint/kernels/cuda/gint_force.cu | 4 ++-- 4 files changed, 18 insertions(+), 24 deletions(-) diff --git a/source/module_hamilt_lcao/module_gint/gint.cpp b/source/module_hamilt_lcao/module_gint/gint.cpp index c15b8ba993..aa3ce5e6fb 100644 --- a/source/module_hamilt_lcao/module_gint/gint.cpp +++ b/source/module_hamilt_lcao/module_gint/gint.cpp @@ -136,16 +136,8 @@ void Gint::cal_gint(Gint_inout* inout) // for (int is = 0; is < GlobalV::NSPIN; ++is) // { if (isforce || isstress){ - double *force = new double[ucell.nat * 3]; - for (int i = 0; i < nat * 3; i++) - { - force[i] = 0.0; - } - double *stress = new double[6]; - for (int i = 0; i < 6; i++) - { - stress[i] = 0.0; - } + std::vector force(nat * 3, 0.0); + std::vector stress(6, 0.0); GintKernel::gint_gamma_force_gpu(this->DMRGint[inout->ispin], ucell.omega / this->ncxyz, @@ -168,7 +160,7 @@ void Gint::cal_gint(Gint_inout* inout) inout->fvl_dphi[0](iat, 1) += force[iat * 3 + 1]; inout->fvl_dphi[0](iat, 2) += force[iat * 3 + 2]; } - delete[] force; + } if (inout->isstress){ inout->svl_dphi[0](0, 0) += stress[0]; @@ -177,8 +169,10 @@ void Gint::cal_gint(Gint_inout* inout) inout->svl_dphi[0](1, 1) += stress[3]; inout->svl_dphi[0](1, 2) += stress[4]; inout->svl_dphi[0](2, 2) += stress[5]; - delete[] stress; + } + force.clear(); + stress.clear(); } // } diff --git a/source/module_hamilt_lcao/module_gint/gint_force.h b/source/module_hamilt_lcao/module_gint/gint_force.h index 1cc22eaa59..b968ccbfa2 100644 --- a/source/module_hamilt_lcao/module_gint/gint_force.h +++ b/source/module_hamilt_lcao/module_gint/gint_force.h @@ -87,13 +87,13 @@ typedef struct void gint_gamma_force_gpu(hamilt::HContainer* dm, const double vfactor, const double* vlocal, - double* force, - double* stress, + std::vector force, + std::vector stress, const int nczp, double dr, double* rcut, - int isforce, - int isstress, + const int isforce, + const int isstress, const Grid_Technique& gridt, const UnitCell& ucell); @@ -249,7 +249,7 @@ void cal_mem_cpy(ForceStressIat& f_s_iat, * @param atom_num_grid in force calculate,used for Block nums */ void cal_force_add(ForceStressIat& f_s_iat, - double* force, + std::vector force, const int atom_num_grid); /** * @brief Stress Calculate on Host @@ -260,7 +260,7 @@ void cal_force_add(ForceStressIat& f_s_iat, * @param cuda_block in stress compute,used for Block nums */ void cal_stress_add(ForceStressIat& f_s_iat, - double* stress, + std::vector stress, const int cuda_block); } // namespace GintKernel #endif diff --git a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu index 309515955c..ba87f02a11 100644 --- a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu +++ b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu @@ -58,13 +58,13 @@ namespace GintKernel void gint_gamma_force_gpu(hamilt::HContainer* dm, const double vfactor, const double* vlocal, - double* force, - double* stress, + std::vector force, + std::vector stress, const int nczp, double dr, double* rcut, - int isforce, - int isstress, + const int isforce, + const int isstress, const Grid_Technique& gridt, const UnitCell& ucell) { diff --git a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu index 3153b7efd8..60a9f06f95 100644 --- a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu +++ b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu @@ -572,7 +572,7 @@ void cal_mem_cpy(ForceStressIat& f_s_iat, * @param atom_num_grid in force calculate,used for Block nums */ void cal_force_add(ForceStressIat& f_s_iat, - double* force, + std::vector force, const int atom_num_grid) { checkCuda(cudaMemcpy(f_s_iat.force_host, @@ -601,7 +601,7 @@ void cal_force_add(ForceStressIat& f_s_iat, * @param cuda_block in stress compute,used for Block nums */ void cal_stress_add(ForceStressIat& f_s_iat, - double* stress, + std::vector stress, const int cuda_block) { checkCuda(cudaMemcpy(f_s_iat.stress_host, From 20f60abe9dc03e226d05197745c3fbb7a75ea42f Mon Sep 17 00:00:00 2001 From: A-006 <3158793232@qq.com> Date: Fri, 24 May 2024 10:53:43 +0800 Subject: [PATCH 14/23] fix error in vector use --- source/module_hamilt_lcao/module_gint/gint.cpp | 1 - source/module_hamilt_lcao/module_gint/gint_force.h | 8 ++++---- .../module_gint/gint_force_gpu.cu | 13 ++++--------- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/source/module_hamilt_lcao/module_gint/gint.cpp b/source/module_hamilt_lcao/module_gint/gint.cpp index aa3ce5e6fb..5c7bccb559 100644 --- a/source/module_hamilt_lcao/module_gint/gint.cpp +++ b/source/module_hamilt_lcao/module_gint/gint.cpp @@ -160,7 +160,6 @@ void Gint::cal_gint(Gint_inout* inout) inout->fvl_dphi[0](iat, 1) += force[iat * 3 + 1]; inout->fvl_dphi[0](iat, 2) += force[iat * 3 + 2]; } - } if (inout->isstress){ inout->svl_dphi[0](0, 0) += stress[0]; diff --git a/source/module_hamilt_lcao/module_gint/gint_force.h b/source/module_hamilt_lcao/module_gint/gint_force.h index b968ccbfa2..4f59d77520 100644 --- a/source/module_hamilt_lcao/module_gint/gint_force.h +++ b/source/module_hamilt_lcao/module_gint/gint_force.h @@ -87,8 +87,8 @@ typedef struct void gint_gamma_force_gpu(hamilt::HContainer* dm, const double vfactor, const double* vlocal, - std::vector force, - std::vector stress, + std::vector& force, + std::vector& stress, const int nczp, double dr, double* rcut, @@ -249,7 +249,7 @@ void cal_mem_cpy(ForceStressIat& f_s_iat, * @param atom_num_grid in force calculate,used for Block nums */ void cal_force_add(ForceStressIat& f_s_iat, - std::vector force, + std::vector& force, const int atom_num_grid); /** * @brief Stress Calculate on Host @@ -260,7 +260,7 @@ void cal_force_add(ForceStressIat& f_s_iat, * @param cuda_block in stress compute,used for Block nums */ void cal_stress_add(ForceStressIat& f_s_iat, - std::vector stress, + std::vector& stress, const int cuda_block); } // namespace GintKernel #endif diff --git a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu index ba87f02a11..a9c6d86632 100644 --- a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu +++ b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu @@ -58,8 +58,8 @@ namespace GintKernel void gint_gamma_force_gpu(hamilt::HContainer* dm, const double vfactor, const double* vlocal, - std::vector force, - std::vector stress, + std::vector& force, + std::vector& stress, const int nczp, double dr, double* rcut, @@ -195,8 +195,6 @@ void gint_gamma_force_gpu(hamilt::HContainer* dm, atom_pair_num, gridt.streams[para.stream_num], nullptr); - - /* force compute in GPU */ if (isforce){ dot_product_force<<* dm, max_size, gridt.psir_size / nwmax); } - // /* force compute in CPU*/ - - /*stress compute in GPU*/ if (isstress){ dot_product_stress<<* dm, } /* stress compute in CPU*/ if (isstress){ - cal_stress_add(f_s_iat, stress, cuda_block); + cal_stress_add(f_s_iat, stress, cuda_block); } if (isforce){ - cal_force_add(f_s_iat, force, atom_num_grid); + cal_force_add(f_s_iat, force, atom_num_grid); } iter_num++; delete[] f_s_iat.stress_host; From 8626c7a6ee236a3e77d0f80a8886c184027fd11c Mon Sep 17 00:00:00 2001 From: A-006 <3158793232@qq.com> Date: Fri, 24 May 2024 11:20:41 +0800 Subject: [PATCH 15/23] fix error in compile --- .../module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu index 60a9f06f95..ffc9ab3e97 100644 --- a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu +++ b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu @@ -572,7 +572,7 @@ void cal_mem_cpy(ForceStressIat& f_s_iat, * @param atom_num_grid in force calculate,used for Block nums */ void cal_force_add(ForceStressIat& f_s_iat, - std::vector force, + std::vector Date: Fri, 24 May 2024 11:45:46 +0800 Subject: [PATCH 16/23] fix error in compile with force --- .../module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu index ffc9ab3e97..ad5cb2477a 100644 --- a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu +++ b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu @@ -572,7 +572,7 @@ void cal_mem_cpy(ForceStressIat& f_s_iat, * @param atom_num_grid in force calculate,used for Block nums */ void cal_force_add(ForceStressIat& f_s_iat, - std::vector& force, const int atom_num_grid) { checkCuda(cudaMemcpy(f_s_iat.force_host, From cfae1f62aad719bd6e09e08f6e5000507319e57e Mon Sep 17 00:00:00 2001 From: A-006 <3158793232@qq.com> Date: Fri, 24 May 2024 12:52:19 +0800 Subject: [PATCH 17/23] fix compile error --- .../module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu index ad5cb2477a..f93db3d417 100644 --- a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu +++ b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu @@ -601,7 +601,7 @@ void cal_force_add(ForceStressIat& f_s_iat, * @param cuda_block in stress compute,used for Block nums */ void cal_stress_add(ForceStressIat& f_s_iat, - std::vector stress, + std::vector& stress, const int cuda_block) { checkCuda(cudaMemcpy(f_s_iat.stress_host, From 892918061e4609bf0482f863d45c53c7d69c5cbd Mon Sep 17 00:00:00 2001 From: A-006 <3158793232@qq.com> Date: Sun, 26 May 2024 16:35:02 +0800 Subject: [PATCH 18/23] fix paramter name and function name --- docs/advanced/input_files/input-main.md | 2 + .../module_hamilt_lcao/module_gint/gint.cpp | 2 +- .../module_gint/gint_force.h | 40 +++++++++---------- .../module_gint/gint_force_gpu.cu | 12 +++--- .../module_gint/gtask_force.cpp | 2 +- .../module_gint/kernels/cuda/gint_force.cu | 28 ++++++------- 6 files changed, 44 insertions(+), 42 deletions(-) diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md index b22e35e881..970dd4fd72 100644 --- a/docs/advanced/input_files/input-main.md +++ b/docs/advanced/input_files/input-main.md @@ -916,6 +916,8 @@ These variables are used to control the numerical atomic orbitals related parame enough when the number is bigger then 2. - **Default** : "4" +[back to top](#full-list-of-input-keywords) + ## Electronic structure These variables are used to control the electronic structure and geometry relaxation diff --git a/source/module_hamilt_lcao/module_gint/gint.cpp b/source/module_hamilt_lcao/module_gint/gint.cpp index 5c7bccb559..6a1ad203ee 100644 --- a/source/module_hamilt_lcao/module_gint/gint.cpp +++ b/source/module_hamilt_lcao/module_gint/gint.cpp @@ -138,7 +138,7 @@ void Gint::cal_gint(Gint_inout* inout) if (isforce || isstress){ std::vector force(nat * 3, 0.0); std::vector stress(6, 0.0); - GintKernel::gint_gamma_force_gpu(this->DMRGint[inout->ispin], + GintKernel::gint_fvl_gamma_gpu(this->DMRGint[inout->ispin], ucell.omega / this->ncxyz, inout->vl, diff --git a/source/module_hamilt_lcao/module_gint/gint_force.h b/source/module_hamilt_lcao/module_gint/gint_force.h index 4f59d77520..51851082de 100644 --- a/source/module_hamilt_lcao/module_gint/gint_force.h +++ b/source/module_hamilt_lcao/module_gint/gint_force.h @@ -44,7 +44,7 @@ typedef struct double** matrix_A_device; double** matrix_B_device; double** matrix_C_device; -} SGridParameter; +} grid_para; typedef struct { @@ -55,14 +55,14 @@ typedef struct int* iat_device; int* iat_host; -} ForceStressIat; +} frc_strs_iat; typedef struct { double* stress_global; double* force_global; int* iat_global; -} ForceStressIatGlobal; +} frc_strs_iat_gbl; typedef struct { @@ -84,7 +84,7 @@ typedef struct * @param ylmcoef_now Coefficients for spherical harmonics. * @param gridt Reference to Grid_Technique object. */ -void gint_gamma_force_gpu(hamilt::HContainer* dm, +void gint_fvl_gamma_gpu(hamilt::HContainer* dm, const double vfactor, const double* vlocal, std::vector& force, @@ -138,7 +138,7 @@ void gpu_task_generator_force(const Grid_Technique& gridt, int& max_m, int& max_n, int& atom_pair_num, - SGridParameter& para); + grid_para& para); /** * @brief Density Matrix,force Stress Iat Init * @@ -146,7 +146,7 @@ void gpu_task_generator_force(const Grid_Technique& gridt, * * @param denstiy_mat DensityMat,contained the density_mat_dice and * destiyMatHost - * @param f_s_iat_dev ForceStressIatGlobal,contined the Force Stress and + * @param f_s_iat_dev frc_strs_iat_gbl,contined the Force Stress and * Iat Number * @param dm hamilt::HContainer,denstiy stored in the Hcontainer * @param gridt Grid_Technique,stored the major method in the the gint. @@ -156,7 +156,7 @@ void gpu_task_generator_force(const Grid_Technique& gridt, * @param atom_num_grid in force calculate,used for Block nums */ void calculateInit(DensityMat& denstiy_mat, - ForceStressIatGlobal& f_s_iat_dev, + frc_strs_iat_gbl& f_s_iat_dev, hamilt::HContainer* dm, const Grid_Technique& gridt, const UnitCell& ucell, @@ -189,28 +189,28 @@ void allocateDm(double* matrix_host, * @param nbz int,stand for the number of Z-axis * @param gridt Grid_Technique,stored the major method in the the gint. */ -void para_init(SGridParameter& para, +void para_init(grid_para& para, const int iter_num, const int nbz, const Grid_Technique& gridt); /** - * @brief ForceStressIat on host and device Init + * @brief frc_strs_iat on host and device Init * * GridParameter init * - * @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Host + * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Host * @param stream_num int , record the stream in GPU * @param cuda_block in stress compute,used for Block nums * @param atom_num_grid in force calculate,used for Block nums * @param max_size Maximum size of atoms on a grid. - * @param ForceStressIatGlobal ForceStressIatGlobal,contains the Force Stree Iat on Host + * @param frc_strs_iat_gbl frc_strs_iat_gbl,contains the Force Stree Iat on Host */ -void cal_init(ForceStressIat& f_s_iat, +void cal_init(frc_strs_iat& f_s_iat, const int stream_num, const int cuda_block, const int atom_num_grid, const int max_size, - ForceStressIatGlobal& f_s_iatg); + frc_strs_iat_gbl& f_s_iatg); /** * @brief GridParameter memCpy,from Host to Device * @@ -221,21 +221,21 @@ void cal_init(ForceStressIat& f_s_iat, * @param nbz int,stand for the number of Z-axis * @param atom_num_grid in force calculate,used for Block nums */ -void para_mem_copy(SGridParameter& para, +void para_mem_copy(grid_para& para, const Grid_Technique& gridt, const int nbz, const int atom_num_grid); /** * @brief Force Stress Force Iat memCpy,from Host to Device * - * @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Device + * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device * and Host * @param gridt Grid_Technique,stored the major method in the the gint. * @param atom_num_grid in force calculate,used for Block nums * @param cuda_block in stress compute,used for Block nums * @param stream_num int , record the stream in GPU */ -void cal_mem_cpy(ForceStressIat& f_s_iat, +void cal_mem_cpy(frc_strs_iat& f_s_iat, const Grid_Technique& gridt, const int atom_num_grid, const int cuda_block, @@ -243,23 +243,23 @@ void cal_mem_cpy(ForceStressIat& f_s_iat, /** * @brief Force Calculate on Host * - * @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Device + * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device * and Host * @param force stored the force for each atom on each directions * @param atom_num_grid in force calculate,used for Block nums */ -void cal_force_add(ForceStressIat& f_s_iat, +void cal_force_add(frc_strs_iat& f_s_iat, std::vector& force, const int atom_num_grid); /** * @brief Stress Calculate on Host * - * @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Device + * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device * and Host * @param stress stored the stress for each directions * @param cuda_block in stress compute,used for Block nums */ -void cal_stress_add(ForceStressIat& f_s_iat, +void cal_stress_add(frc_strs_iat& f_s_iat, std::vector& stress, const int cuda_block); } // namespace GintKernel diff --git a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu index a9c6d86632..29e71d03b1 100644 --- a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu +++ b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu @@ -14,7 +14,7 @@ namespace GintKernel // Function to calculate forces using GPU-accelerated gamma point Gint /** - * @brief Calculate forces and stresses for the `gint_gamma_force_gpu` function. + * @brief Calculate forces and stresses for the `gint_fvl_gamma_gpu` function. * * This function calculates forces and stresses based on given parameters. * @@ -30,7 +30,7 @@ namespace GintKernel */ /** * Function to calculate forces using GPU-accelerated gamma point Gint - * @brief Calculate forces and stresses for the `gint_gamma_force_gpu` function. + * @brief Calculate forces and stresses for the `gint_fvl_gamma_gpu` function. * * This function calculates forces and stresses based on given parameters. * @@ -55,7 +55,7 @@ namespace GintKernel * 6. force dot on the GPU. * 7. Copy the results back to the host. */ -void gint_gamma_force_gpu(hamilt::HContainer* dm, +void gint_fvl_gamma_gpu(hamilt::HContainer* dm, const double vfactor, const double* vlocal, std::vector& force, @@ -79,9 +79,9 @@ void gint_gamma_force_gpu(hamilt::HContainer* dm, = std::min(64, (gridt.psir_size + cuda_threads - 1) / cuda_threads); int iter_num = 0; DensityMat denstiy_mat; - ForceStressIatGlobal f_s_iat_dev; - SGridParameter para; - ForceStressIat f_s_iat; + frc_strs_iat_gbl f_s_iat_dev; + grid_para para; + frc_strs_iat f_s_iat; calculateInit(denstiy_mat, f_s_iat_dev, diff --git a/source/module_hamilt_lcao/module_gint/gtask_force.cpp b/source/module_hamilt_lcao/module_gint/gtask_force.cpp index 7c30d3db83..e785b4192b 100644 --- a/source/module_hamilt_lcao/module_gint/gtask_force.cpp +++ b/source/module_hamilt_lcao/module_gint/gtask_force.cpp @@ -56,7 +56,7 @@ void gpu_task_generator_force(const Grid_Technique& gridt, int& max_m, int& max_n, int& atom_pair_num, - SGridParameter& para) + grid_para& para) { const int grid_index_ij = i * gridt.nby * gridt.nbzp + j * gridt.nbzp; const int nwmax = ucell.nwmax; diff --git a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu index f93db3d417..ae89dd2f2c 100644 --- a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu +++ b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu @@ -253,7 +253,7 @@ __global__ void dot_product_force(double* psir_lx, } } void calculateInit(DensityMat& denstiy_mat, - ForceStressIatGlobal& f_s_iat_dev, + frc_strs_iat_gbl& f_s_iat_dev, hamilt::HContainer* dm, const Grid_Technique& gridt, const UnitCell& ucell, @@ -300,7 +300,7 @@ void calculateInit(DensityMat& denstiy_mat, * @param nbz int,stand for the number of Z-axis * @param gridt Grid_Technique,stored the major method in the the gint. */ -void para_init(SGridParameter& para, +void para_init(grid_para& para, int iter_num, int nbz, const Grid_Technique& gridt) @@ -377,23 +377,23 @@ void para_init(SGridParameter& para, = &gridt.ap_output_gbl_g[gridt.atom_pair_nbz * para.stream_num]; } /** - * @brief ForceStressIat on host and device Init + * @brief frc_strs_iat on host and device Init * * GridParameter init * - * @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Host + * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Host * @param stream_num int , record the stream in GPU * @param cuda_block in stress compute,used for Block nums * @param atom_num_grid in force calculate,used for Block nums * @param max_size Maximum size of atoms on a grid. - * @param ForceStressIatGlobal ForceStressIatGlobal,contains the Force Stree Iat on Host + * @param frc_strs_iat_gbl frc_strs_iat_gbl,contains the Force Stree Iat on Host */ -void cal_init(ForceStressIat& f_s_iat, +void cal_init(frc_strs_iat& f_s_iat, const int stream_num, const int cuda_block, const int atom_num_grid, const int max_size, - ForceStressIatGlobal& f_s_iat_dev) + frc_strs_iat_gbl& f_s_iat_dev) { const int iat_min = -max_size - 1; f_s_iat.stress_host = new double[6 * cuda_block]; @@ -423,7 +423,7 @@ void cal_init(ForceStressIat& f_s_iat, * @param nbz int,stand for the number of Z-axis * @param atom_num_grid in force calculate,used for Block nums */ -void para_mem_copy(SGridParameter& para, +void para_mem_copy(grid_para& para, const Grid_Technique& gridt, const int nbz, const int atom_num_grid) @@ -536,14 +536,14 @@ void para_mem_copy(SGridParameter& para, /** * @brief Force Stress Force Iat memCpy,from Host to Device * - * @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Device + * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device * and Host * @param gridt Grid_Technique,stored the major method in the the gint. * @param atom_num_grid in force calculate,used for Block nums * @param cuda_block in stress compute,used for Block nums * @param stream_num int , record the stream in GPU */ -void cal_mem_cpy(ForceStressIat& f_s_iat, +void cal_mem_cpy(frc_strs_iat& f_s_iat, const Grid_Technique& gridt, const int atom_num_grid, const int cuda_block, @@ -566,12 +566,12 @@ void cal_mem_cpy(ForceStressIat& f_s_iat, /* * @brief Force Calculate on Host * - * @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Device + * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device * and Host * @param force stored the force for each atom on each directions * @param atom_num_grid in force calculate,used for Block nums */ -void cal_force_add(ForceStressIat& f_s_iat, +void cal_force_add(frc_strs_iat& f_s_iat, std::vector& force, const int atom_num_grid) { @@ -595,12 +595,12 @@ void cal_force_add(ForceStressIat& f_s_iat, /** * @brief Stress Calculate on Host * - * @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Device + * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device * and Host * @param stress stored the stress for each directions * @param cuda_block in stress compute,used for Block nums */ -void cal_stress_add(ForceStressIat& f_s_iat, +void cal_stress_add(frc_strs_iat& f_s_iat, std::vector& stress, const int cuda_block) { From 7056269aba6b136b2079592b95a16123f0278b8e Mon Sep 17 00:00:00 2001 From: A-006 <3158793232@qq.com> Date: Sun, 26 May 2024 21:12:14 +0800 Subject: [PATCH 19/23] add time ticker and fix nspin transport --- .../module_hamilt_lcao/module_gint/gint.cpp | 77 +++++++++---------- .../module_gint/gint_force_gpu.cu | 1 - 2 files changed, 37 insertions(+), 41 deletions(-) diff --git a/source/module_hamilt_lcao/module_gint/gint.cpp b/source/module_hamilt_lcao/module_gint/gint.cpp index 6a1ad203ee..8c558232ae 100644 --- a/source/module_hamilt_lcao/module_gint/gint.cpp +++ b/source/module_hamilt_lcao/module_gint/gint.cpp @@ -133,48 +133,45 @@ void Gint::cal_gint(Gint_inout* inout) int nat = ucell.nat; const int isforce = inout->isforce; const int isstress =inout->isstress; - // for (int is = 0; is < GlobalV::NSPIN; ++is) - // { - if (isforce || isstress){ - std::vector force(nat * 3, 0.0); - std::vector stress(6, 0.0); - GintKernel::gint_fvl_gamma_gpu(this->DMRGint[inout->ispin], - ucell.omega - / this->ncxyz, - inout->vl, - force, - stress, - this->nplane, - dr, - rcut, - isforce, - isstress, - *this->gridt, - ucell); - - if (inout->isforce) + ModuleBase::TITLE("Gint_interface","cal_force_gpu"); + ModuleBase::timer::tick("Gint_interface","cal_force_gpu"); + if (isforce || isstress){ + std::vector force(nat * 3, 0.0); + std::vector stress(6, 0.0); + GintKernel::gint_fvl_gamma_gpu(this->DMRGint[inout->ispin], + ucell.omega + / this->ncxyz, + inout->vl, + force, + stress, + this->nplane, + dr, + rcut, + isforce, + isstress, + *this->gridt, + ucell); + if (inout->isforce) + { + for (int iat = 0; iat < nat; iat++) { - for (int iat = 0; iat < nat; iat++) - { - inout->fvl_dphi[0](iat, 0) += force[iat * 3]; - inout->fvl_dphi[0](iat, 1) += force[iat * 3 + 1]; - inout->fvl_dphi[0](iat, 2) += force[iat * 3 + 2]; - } - } - if (inout->isstress){ - inout->svl_dphi[0](0, 0) += stress[0]; - inout->svl_dphi[0](0, 1) += stress[1]; - inout->svl_dphi[0](0, 2) += stress[2]; - inout->svl_dphi[0](1, 1) += stress[3]; - inout->svl_dphi[0](1, 2) += stress[4]; - inout->svl_dphi[0](2, 2) += stress[5]; - + inout->fvl_dphi[0](iat, 0) = force[iat * 3]; + inout->fvl_dphi[0](iat, 1) = force[iat * 3 + 1]; + inout->fvl_dphi[0](iat, 2) = force[iat * 3 + 2]; } - force.clear(); - stress.clear(); - } - - // } + } + if (inout->isstress){ + inout->svl_dphi[0](0, 0) = stress[0]; + inout->svl_dphi[0](0, 1) = stress[1]; + inout->svl_dphi[0](0, 2) = stress[2]; + inout->svl_dphi[0](1, 1) = stress[3]; + inout->svl_dphi[0](1, 2) = stress[4]; + inout->svl_dphi[0](2, 2) = stress[5]; + } + force.clear(); + stress.clear(); + } + ModuleBase::timer::tick("Gint_interface","cal_force_gpu"); } } else diff --git a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu index 29e71d03b1..38decf7de0 100644 --- a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu +++ b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu @@ -8,7 +8,6 @@ #include "kernels/cuda/gint_force.cuh" #include "module_base/ylm.h" #include "module_hamilt_lcao/module_gint/gint_tools.h" - namespace GintKernel { From 7ccc51f1d2c15b7f8725f8fdcf7c3e44d389acad Mon Sep 17 00:00:00 2001 From: A-006 <3158793232@qq.com> Date: Sun, 26 May 2024 21:13:02 +0800 Subject: [PATCH 20/23] delete printf in files --- source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu index ae89dd2f2c..a86c16cb5f 100644 --- a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu +++ b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu @@ -612,7 +612,6 @@ void cal_stress_add(frc_strs_iat& f_s_iat, { for (int index = 0; index < cuda_block; index++) { - // printf("the stress is %f\n",stress[i]); stress[i] += f_s_iat.stress_host[i * cuda_block + index]; } } From 60b1b4d39599b6de76cc0b51fac6a4bb640639b3 Mon Sep 17 00:00:00 2001 From: A-006 <3158793232@qq.com> Date: Sun, 26 May 2024 21:50:06 +0800 Subject: [PATCH 21/23] fix test bug and fix grid_size --- source/module_hamilt_lcao/module_gint/gint.cpp | 18 +++++++++--------- .../module_gint/gint_force_gpu.cu | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/source/module_hamilt_lcao/module_gint/gint.cpp b/source/module_hamilt_lcao/module_gint/gint.cpp index 8c558232ae..ae3205a187 100644 --- a/source/module_hamilt_lcao/module_gint/gint.cpp +++ b/source/module_hamilt_lcao/module_gint/gint.cpp @@ -155,18 +155,18 @@ void Gint::cal_gint(Gint_inout* inout) { for (int iat = 0; iat < nat; iat++) { - inout->fvl_dphi[0](iat, 0) = force[iat * 3]; - inout->fvl_dphi[0](iat, 1) = force[iat * 3 + 1]; - inout->fvl_dphi[0](iat, 2) = force[iat * 3 + 2]; + inout->fvl_dphi[0](iat, 0) += force[iat * 3]; + inout->fvl_dphi[0](iat, 1) += force[iat * 3 + 1]; + inout->fvl_dphi[0](iat, 2) += force[iat * 3 + 2]; } } if (inout->isstress){ - inout->svl_dphi[0](0, 0) = stress[0]; - inout->svl_dphi[0](0, 1) = stress[1]; - inout->svl_dphi[0](0, 2) = stress[2]; - inout->svl_dphi[0](1, 1) = stress[3]; - inout->svl_dphi[0](1, 2) = stress[4]; - inout->svl_dphi[0](2, 2) = stress[5]; + inout->svl_dphi[0](0, 0) += stress[0]; + inout->svl_dphi[0](0, 1) += stress[1]; + inout->svl_dphi[0](0, 2) += stress[2]; + inout->svl_dphi[0](1, 1) += stress[3]; + inout->svl_dphi[0](1, 2) += stress[4]; + inout->svl_dphi[0](2, 2) += stress[5]; } force.clear(); stress.clear(); diff --git a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu index 38decf7de0..3718f48819 100644 --- a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu +++ b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu @@ -105,7 +105,7 @@ void gint_fvl_gamma_gpu(hamilt::HContainer* dm, int max_m = 0; int max_n = 0; int atom_pair_num = 0; - dim3 grid_psi(nbz, 8); + dim3 grid_psi(nbz, 32); dim3 block_psi(64); dim3 grid_dot_force(cuda_block); dim3 block_dot_force(cuda_threads); From 5359609d2e4d8733157b85934f4a3e82e555f3c0 Mon Sep 17 00:00:00 2001 From: A-006 <3158793232@qq.com> Date: Mon, 27 May 2024 09:41:27 +0800 Subject: [PATCH 22/23] init nstreams --- source/module_hamilt_lcao/module_gint/grid_technique.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.h b/source/module_hamilt_lcao/module_gint/grid_technique.h index 19f02878cf..b51cb30686 100644 --- a/source/module_hamilt_lcao/module_gint/grid_technique.h +++ b/source/module_hamilt_lcao/module_gint/grid_technique.h @@ -163,7 +163,7 @@ class Grid_Technique : public Grid_MeshBall int atom_pair_mesh; int atom_pair_nbz; - int nstreams ; + int nstreams=4 ; cudaStream_t* streams; // streams[nstreams] // TODO it needs to be implemented through configuration files From 25d40d173a5fb21841edb47d0175bc2b8960f07d Mon Sep 17 00:00:00 2001 From: A-006 <3158793232@qq.com> Date: Mon, 27 May 2024 17:10:23 +0800 Subject: [PATCH 23/23] move cpp function from cu file to cpp file --- .../module_gint/gint_force.h | 2 + .../module_gint/gint_force_gpu.cu | 19 +- .../module_gint/gtask_force.cpp | 365 ++++++++++++++++++ .../module_gint/kernels/cuda/gint_force.cu | 363 ----------------- 4 files changed, 378 insertions(+), 371 deletions(-) diff --git a/source/module_hamilt_lcao/module_gint/gint_force.h b/source/module_hamilt_lcao/module_gint/gint_force.h index 51851082de..c4118f4611 100644 --- a/source/module_hamilt_lcao/module_gint/gint_force.h +++ b/source/module_hamilt_lcao/module_gint/gint_force.h @@ -192,6 +192,7 @@ void allocateDm(double* matrix_host, void para_init(grid_para& para, const int iter_num, const int nbz, + const int pipeline_index, const Grid_Technique& gridt); /** * @brief frc_strs_iat on host and device Init @@ -224,6 +225,7 @@ void cal_init(frc_strs_iat& f_s_iat, void para_mem_copy(grid_para& para, const Grid_Technique& gridt, const int nbz, + const int pipeline_index, const int atom_num_grid); /** * @brief Force Stress Force Iat memCpy,from Host to Device diff --git a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu index 3718f48819..5824ba494b 100644 --- a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu +++ b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu @@ -77,6 +77,7 @@ void gint_fvl_gamma_gpu(hamilt::HContainer* dm, const int cuda_block = std::min(64, (gridt.psir_size + cuda_threads - 1) / cuda_threads); int iter_num = 0; + int pipeline_index = 0; DensityMat denstiy_mat; frc_strs_iat_gbl f_s_iat_dev; grid_para para; @@ -112,9 +113,10 @@ void gint_fvl_gamma_gpu(hamilt::HContainer* dm, dim3 grid_dot(cuda_block); dim3 block_dot(cuda_threads); - para_init(para, iter_num, nbz, gridt); + pipeline_index = iter_num % gridt.nstreams; + para_init(para, iter_num, nbz, pipeline_index,gridt); cal_init(f_s_iat, - para.stream_num, + pipeline_index, cuda_block, atom_num_grid, max_size, @@ -141,19 +143,20 @@ void gint_fvl_gamma_gpu(hamilt::HContainer* dm, para_mem_copy(para, gridt, nbz, + pipeline_index, atom_num_grid); cal_mem_cpy(f_s_iat, gridt, atom_num_grid, cuda_block, - para.stream_num); - checkCuda(cudaStreamSynchronize(gridt.streams[para.stream_num])); + pipeline_index); + checkCuda(cudaStreamSynchronize(gridt.streams[pipeline_index])); /* cuda stream compute and Multiplication of multinomial matrices */ get_psi_force<<>>( + gridt.streams[pipeline_index]>>>( gridt.ylmcoef_g, dr, gridt.bxyz, @@ -192,14 +195,14 @@ void gint_fvl_gamma_gpu(hamilt::HContainer* dm, para.matrix_C_device, para.ldc_device, atom_pair_num, - gridt.streams[para.stream_num], + gridt.streams[pipeline_index], nullptr); /* force compute in GPU */ if (isforce){ dot_product_force<<>>( + gridt.streams[pipeline_index]>>>( para.psir_lx_device, para.psir_ly_device, para.psir_lz_device, @@ -215,7 +218,7 @@ void gint_fvl_gamma_gpu(hamilt::HContainer* dm, dot_product_stress<<>>( + gridt.streams[pipeline_index]>>>( para.psir_lxx_device, para.psir_lxy_device, para.psir_lxz_device, diff --git a/source/module_hamilt_lcao/module_gint/gtask_force.cpp b/source/module_hamilt_lcao/module_gint/gtask_force.cpp index e785b4192b..9aa7e9c386 100644 --- a/source/module_hamilt_lcao/module_gint/gtask_force.cpp +++ b/source/module_hamilt_lcao/module_gint/gtask_force.cpp @@ -259,5 +259,370 @@ void allocateDm(double* matrixHost, } return; } +void calculateInit(DensityMat& denstiy_mat, + frc_strs_iat_gbl& f_s_iat_dev, + hamilt::HContainer* dm, + const Grid_Technique& gridt, + const UnitCell& ucell, + int lgd, + int cuda_block, + int atom_num_grid) +{ + denstiy_mat.density_mat_h = new double[lgd * lgd]; + allocateDm(denstiy_mat.density_mat_h, dm, gridt, ucell); + + checkCuda(cudaMalloc((void**)&denstiy_mat.density_mat_d, + lgd * lgd * sizeof(double))); + checkCuda(cudaMemcpy(denstiy_mat.density_mat_d, + denstiy_mat.density_mat_h, + lgd * lgd * sizeof(double), + cudaMemcpyHostToDevice)); + + checkCuda(cudaMalloc((void**)&f_s_iat_dev.stress_global, + 6 * cuda_block * gridt.nstreams * sizeof(double))); + checkCuda(cudaMemset(f_s_iat_dev.stress_global, + 0, + 6 * cuda_block * gridt.nstreams * sizeof(double))); + + checkCuda(cudaMalloc((void**)&f_s_iat_dev.force_global, + 3 * atom_num_grid * gridt.nstreams * sizeof(double))); + checkCuda(cudaMemset(f_s_iat_dev.force_global, + 0, + 3 * atom_num_grid * gridt.nstreams * sizeof(double))); + + checkCuda(cudaMalloc((void**)&f_s_iat_dev.iat_global, + atom_num_grid * gridt.nstreams * sizeof(int))); + checkCuda(cudaMemset(f_s_iat_dev.iat_global, + 0, + atom_num_grid * gridt.nstreams * sizeof(int))); +} + +/** + * @brief grid parameter Init + * + * GridParameter init + * + * @param para double *,contained the destiyMatHost + * @param iter_num int , used for calcute the stream + * @param nbz int,stand for the number of Z-axis + * @param gridt Grid_Technique,stored the major method in the the gint. + */ +void para_init(grid_para& para, + const int iter_num, + const int nbz, + const int pipeline_index, + const Grid_Technique& gridt) +{ + + // stream_num stand for nstreams + + //input_dou and input _int used for the Spherical Harmonics + para.input_dou + = &gridt.psi_dbl_gbl[gridt.psi_size_max * pipeline_index * 5]; + para.input_int + = &gridt.psi_int_gbl[gridt.psi_size_max * pipeline_index * 2]; + para.num_psir = &gridt.num_psir_gbl[nbz * pipeline_index]; + //one dimension,record the length and the leading dimension of three matrix + para.atom_pair_A_m + = &gridt.l_info_global[gridt.atom_pair_nbz * pipeline_index]; + para.atom_pair_B_n + = &gridt.r_info_global[gridt.atom_pair_nbz * pipeline_index]; + para.atom_pair_K + = &gridt.k_info_global[gridt.atom_pair_nbz * pipeline_index]; + para.atom_pair_lda + = &gridt.lda_info_global[gridt.atom_pair_nbz * pipeline_index]; + para.atom_pair_ldb + = &gridt.ldb_info_global[gridt.atom_pair_nbz * pipeline_index]; + para.atom_pair_ldc + = &gridt.ldc_info_global[gridt.atom_pair_nbz * pipeline_index]; + //input_double_g and input_int_g used for the Spherical Harmonics on GPU + para.input_double_g + = &gridt.psi_dbl_gbl_g[gridt.psi_size_max * pipeline_index * 5]; + para.input_int_g + = &gridt.psi_int_gbl_g[gridt.psi_size_max * pipeline_index * 2]; + para.num_psir_g = &gridt.num_psir_gbl_g[nbz * pipeline_index]; + para.psir_dm_device = &gridt.dm_global_g[gridt.psir_size * pipeline_index]; + para.psir_r_device + = &gridt.right_global_g[gridt.psir_size * pipeline_index]; + //psi function ,record the force in x y z,and the stress in six dimension + para.psir_lx_device = &gridt.d_left_x_g[gridt.psir_size * pipeline_index]; + para.psir_ly_device = &gridt.d_left_y_g[gridt.psir_size * pipeline_index]; + para.psir_lz_device = &gridt.d_left_z_g[gridt.psir_size * pipeline_index]; + para.psir_lxx_device + = &gridt.dd_left_xx_g[gridt.psir_size * pipeline_index]; + para.psir_lxy_device + = &gridt.dd_left_xy_g[gridt.psir_size * pipeline_index]; + para.psir_lxz_device + = &gridt.dd_left_xz_g[gridt.psir_size * pipeline_index]; + para.psir_lyy_device + = &gridt.dd_left_yy_g[gridt.psir_size * pipeline_index]; + para.psir_lyz_device + = &gridt.dd_left_yz_g[gridt.psir_size * pipeline_index]; + para.psir_lzz_device + = &gridt.dd_left_zz_g[gridt.psir_size * pipeline_index]; + //one dimension,record the length and the leading dimension of three matrix on GPU + para.A_m_device + = &gridt.l_info_global_g[gridt.atom_pair_nbz * pipeline_index]; + para.B_n_device + = &gridt.r_info_global_g[gridt.atom_pair_nbz * pipeline_index]; + para.K_device + = &gridt.k_info_global_g[gridt.atom_pair_nbz * pipeline_index]; + para.lda_device + = &gridt.lda_info_gbl_g[gridt.atom_pair_nbz * pipeline_index]; + para.ldb_device + = &gridt.ldb_info_gbl_g[gridt.atom_pair_nbz * pipeline_index]; + para.ldc_device + = &gridt.ldc_info_gbl_g[gridt.atom_pair_nbz * pipeline_index]; + //two dimension,record number to compute + para.matrix_A = &gridt.ap_left_gbl[gridt.atom_pair_nbz * pipeline_index]; + para.matrix_B = &gridt.ap_right_gbl[gridt.atom_pair_nbz * pipeline_index]; + para.matrix_C = &gridt.ap_output_gbl[gridt.atom_pair_nbz * pipeline_index]; + para.matrix_A_device + = &gridt.ap_left_gbl_g[gridt.atom_pair_nbz * pipeline_index]; + para.matrix_B_device + = &gridt.ap_right_gbl_g[gridt.atom_pair_nbz * pipeline_index]; + para.matrix_C_device + = &gridt.ap_output_gbl_g[gridt.atom_pair_nbz * pipeline_index]; +} +/** + * @brief frc_strs_iat on host and device Init + * + * GridParameter init + * + * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Host + * @param stream_num int , record the stream in GPU + * @param cuda_block in stress compute,used for Block nums + * @param atom_num_grid in force calculate,used for Block nums + * @param max_size Maximum size of atoms on a grid. + * @param frc_strs_iat_gbl frc_strs_iat_gbl,contains the Force Stree Iat on Host + */ +void cal_init(frc_strs_iat& f_s_iat, + const int stream_num, + const int cuda_block, + const int atom_num_grid, + const int max_size, + frc_strs_iat_gbl& f_s_iat_dev) +{ + const int iat_min = -max_size - 1; + f_s_iat.stress_host = new double[6 * cuda_block]; + f_s_iat.stress_device + = &f_s_iat_dev.stress_global[6 * cuda_block * stream_num]; + f_s_iat.force_device + = &f_s_iat_dev.force_global[3 * atom_num_grid * stream_num]; + f_s_iat.iat_device + = &f_s_iat_dev.iat_global[atom_num_grid * stream_num]; + f_s_iat.iat_host = new int[atom_num_grid]; + for (int index = 0; index < atom_num_grid; index++) + { + f_s_iat.iat_host[index] = iat_min; + } + f_s_iat.force_host = new double[3 * atom_num_grid]; + ModuleBase::GlobalFunc::ZEROS(f_s_iat.force_host, + 3 * atom_num_grid); +} +/** + * @brief GridParameter memCpy,from Host to Device + * + * parameter init,which contains the gpu task and multi matrix multiplication + * + * @param para Grid parameter in task generator, + * @param gridt Grid_Technique,stored the major method in the the gint. + * @param nbz int,stand for the number of Z-axis + * @param atom_num_grid in force calculate,used for Block nums + */ +void para_mem_copy(grid_para& para, + const Grid_Technique& gridt, + const int nbz, + const int pipeline_index, + const int atom_num_grid) +{ + checkCuda(cudaMemcpyAsync(para.input_double_g, + para.input_dou, + gridt.psi_size_max * 5 * sizeof(double), + cudaMemcpyHostToDevice, + gridt.streams[pipeline_index])); + checkCuda(cudaMemcpyAsync(para.input_int_g, + para.input_int, + gridt.psi_size_max * 2 * sizeof(int), + cudaMemcpyHostToDevice, + gridt.streams[pipeline_index])); + checkCuda(cudaMemcpyAsync(para.num_psir_g, + para.num_psir, + nbz * sizeof(int), + cudaMemcpyHostToDevice, + gridt.streams[pipeline_index])); + checkCuda(cudaMemcpyAsync(para.A_m_device, + para.atom_pair_A_m, + gridt.atom_pair_nbz * sizeof(int), + cudaMemcpyHostToDevice, + gridt.streams[pipeline_index])); + checkCuda(cudaMemcpyAsync(para.B_n_device, + para.atom_pair_B_n, + gridt.atom_pair_nbz * sizeof(int), + cudaMemcpyHostToDevice, + gridt.streams[pipeline_index])); + checkCuda(cudaMemcpyAsync(para.K_device, + para.atom_pair_K, + gridt.atom_pair_nbz * sizeof(int), + cudaMemcpyHostToDevice, + gridt.streams[pipeline_index])); + checkCuda(cudaMemcpyAsync(para.lda_device, + para.atom_pair_lda, + gridt.atom_pair_nbz * sizeof(int), + cudaMemcpyHostToDevice, + gridt.streams[pipeline_index])); + checkCuda(cudaMemcpyAsync(para.ldb_device, + para.atom_pair_ldb, + gridt.atom_pair_nbz * sizeof(int), + cudaMemcpyHostToDevice, + gridt.streams[pipeline_index])); + checkCuda(cudaMemcpyAsync(para.ldc_device, + para.atom_pair_ldc, + gridt.atom_pair_nbz * sizeof(int), + cudaMemcpyHostToDevice, + gridt.streams[pipeline_index])); + checkCuda(cudaMemcpyAsync(para.matrix_A_device, + para.matrix_A, + gridt.atom_pair_nbz * sizeof(double*), + cudaMemcpyHostToDevice, + gridt.streams[pipeline_index])); + checkCuda(cudaMemcpyAsync(para.matrix_B_device, + para.matrix_B, + gridt.atom_pair_nbz * sizeof(double*), + cudaMemcpyHostToDevice, + gridt.streams[pipeline_index])); + checkCuda(cudaMemcpyAsync(para.matrix_C_device, + para.matrix_C, + gridt.atom_pair_nbz * sizeof(double*), + cudaMemcpyHostToDevice, + gridt.streams[pipeline_index])); + checkCuda(cudaMemsetAsync(para.psir_dm_device, + 0, + gridt.psir_size * sizeof(double), + gridt.streams[pipeline_index])); + checkCuda(cudaMemsetAsync(para.psir_r_device, + 0, + gridt.psir_size * sizeof(double), + gridt.streams[pipeline_index])); + checkCuda(cudaMemsetAsync(para.psir_lx_device, + 0, + gridt.psir_size * sizeof(double), + gridt.streams[pipeline_index])); + checkCuda(cudaMemsetAsync(para.psir_ly_device, + 0, + gridt.psir_size * sizeof(double), + gridt.streams[pipeline_index])); + checkCuda(cudaMemsetAsync(para.psir_lz_device, + 0, + gridt.psir_size * sizeof(double), + gridt.streams[pipeline_index])); + checkCuda(cudaMemsetAsync(para.psir_lxx_device, + 0, + gridt.psir_size * sizeof(double), + gridt.streams[pipeline_index])); + checkCuda(cudaMemsetAsync(para.psir_lxy_device, + 0, + gridt.psir_size * sizeof(double), + gridt.streams[pipeline_index])); + checkCuda(cudaMemsetAsync(para.psir_lxz_device, + 0, + gridt.psir_size * sizeof(double), + gridt.streams[pipeline_index])); + checkCuda(cudaMemsetAsync(para.psir_lyy_device, + 0, + gridt.psir_size * sizeof(double), + gridt.streams[pipeline_index])); + checkCuda(cudaMemsetAsync(para.psir_lyz_device, + 0, + gridt.psir_size * sizeof(double), + gridt.streams[pipeline_index])); + checkCuda(cudaMemsetAsync(para.psir_lzz_device, + 0, + gridt.psir_size * sizeof(double), + gridt.streams[pipeline_index])); +} +/** + * @brief Force Stress Force Iat memCpy,from Host to Device + * + * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device + * and Host + * @param gridt Grid_Technique,stored the major method in the the gint. + * @param atom_num_grid in force calculate,used for Block nums + * @param cuda_block in stress compute,used for Block nums + * @param stream_num int , record the stream in GPU + */ +void cal_mem_cpy(frc_strs_iat& f_s_iat, + const Grid_Technique& gridt, + const int atom_num_grid, + const int cuda_block, + const int stream_num) +{ + checkCuda(cudaMemcpyAsync(f_s_iat.iat_device, + f_s_iat.iat_host, + atom_num_grid * sizeof(int), + cudaMemcpyHostToDevice, + gridt.streams[stream_num])); + checkCuda(cudaMemsetAsync(f_s_iat.stress_device, + 0, + 6 * cuda_block * sizeof(double), + gridt.streams[stream_num])); + checkCuda(cudaMemsetAsync(f_s_iat.force_device, + 0, + 3 * atom_num_grid * sizeof(double), + gridt.streams[stream_num])); +} +/* + * @brief Force Calculate on Host + * + * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device + * and Host + * @param force stored the force for each atom on each directions + * @param atom_num_grid in force calculate,used for Block nums + */ +void cal_force_add(frc_strs_iat& f_s_iat, + std::vector& force, + const int atom_num_grid) +{ + checkCuda(cudaMemcpy(f_s_iat.force_host, + f_s_iat.force_device, + 3 * atom_num_grid * sizeof(double), + cudaMemcpyDeviceToHost)); + for (int index1 = 0; index1 < atom_num_grid; index1++) + { + int iat1 = f_s_iat.iat_host[index1]; + if (iat1 >= 0) + { + for (int index2 = 0; index2 < 3; index2++) + { + force[iat1 * 3 + index2] + += f_s_iat.force_host[index1 * 3 + index2]; + } + } + } +} +/** + * @brief Stress Calculate on Host + * + * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device + * and Host + * @param stress stored the stress for each directions + * @param cuda_block in stress compute,used for Block nums + */ +void cal_stress_add(frc_strs_iat& f_s_iat, + std::vector& stress, + const int cuda_block) +{ + checkCuda(cudaMemcpy(f_s_iat.stress_host, + f_s_iat.stress_device, + 6 * cuda_block * sizeof(double), + cudaMemcpyDeviceToHost)); + for (int i = 0; i < 6; i++) + { + for (int index = 0; index < cuda_block; index++) + { + stress[i] += f_s_iat.stress_host[i * cuda_block + index]; + } + } +} } // namespace GintKernel diff --git a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu index a86c16cb5f..a6c8b762b2 100644 --- a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu +++ b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu @@ -252,368 +252,5 @@ __global__ void dot_product_force(double* psir_lx, tid += blockDim.x * gridDim.x; } } -void calculateInit(DensityMat& denstiy_mat, - frc_strs_iat_gbl& f_s_iat_dev, - hamilt::HContainer* dm, - const Grid_Technique& gridt, - const UnitCell& ucell, - int lgd, - int cuda_block, - int atom_num_grid) -{ - denstiy_mat.density_mat_h = new double[lgd * lgd]; - allocateDm(denstiy_mat.density_mat_h, dm, gridt, ucell); - - checkCuda(cudaMalloc((void**)&denstiy_mat.density_mat_d, - lgd * lgd * sizeof(double))); - checkCuda(cudaMemcpy(denstiy_mat.density_mat_d, - denstiy_mat.density_mat_h, - lgd * lgd * sizeof(double), - cudaMemcpyHostToDevice)); - - checkCuda(cudaMalloc((void**)&f_s_iat_dev.stress_global, - 6 * cuda_block * gridt.nstreams * sizeof(double))); - checkCuda(cudaMemset(f_s_iat_dev.stress_global, - 0, - 6 * cuda_block * gridt.nstreams * sizeof(double))); - - checkCuda(cudaMalloc((void**)&f_s_iat_dev.force_global, - 3 * atom_num_grid * gridt.nstreams * sizeof(double))); - checkCuda(cudaMemset(f_s_iat_dev.force_global, - 0, - 3 * atom_num_grid * gridt.nstreams * sizeof(double))); - - checkCuda(cudaMalloc((void**)&f_s_iat_dev.iat_global, - atom_num_grid * gridt.nstreams * sizeof(int))); - checkCuda(cudaMemset(f_s_iat_dev.iat_global, - 0, - atom_num_grid * gridt.nstreams * sizeof(int))); -} - -/** - * @brief grid parameter Init - * - * GridParameter init - * - * @param para double *,contained the destiyMatHost - * @param iter_num int , used for calcute the stream - * @param nbz int,stand for the number of Z-axis - * @param gridt Grid_Technique,stored the major method in the the gint. - */ -void para_init(grid_para& para, - int iter_num, - int nbz, - const Grid_Technique& gridt) -{ - // stream_num stand for nstreams - para.stream_num = iter_num % gridt.nstreams; - //input_dou and input _int used for the Spherical Harmonics - para.input_dou - = &gridt.psi_dbl_gbl[gridt.psi_size_max * para.stream_num * 5]; - para.input_int - = &gridt.psi_int_gbl[gridt.psi_size_max * para.stream_num * 2]; - para.num_psir = &gridt.num_psir_gbl[nbz * para.stream_num]; - //one dimension,record the length and the leading dimension of three matrix - para.atom_pair_A_m - = &gridt.l_info_global[gridt.atom_pair_nbz * para.stream_num]; - para.atom_pair_B_n - = &gridt.r_info_global[gridt.atom_pair_nbz * para.stream_num]; - para.atom_pair_K - = &gridt.k_info_global[gridt.atom_pair_nbz * para.stream_num]; - para.atom_pair_lda - = &gridt.lda_info_global[gridt.atom_pair_nbz * para.stream_num]; - para.atom_pair_ldb - = &gridt.ldb_info_global[gridt.atom_pair_nbz * para.stream_num]; - para.atom_pair_ldc - = &gridt.ldc_info_global[gridt.atom_pair_nbz * para.stream_num]; - //input_double_g and input_int_g used for the Spherical Harmonics on GPU - para.input_double_g - = &gridt.psi_dbl_gbl_g[gridt.psi_size_max * para.stream_num * 5]; - para.input_int_g - = &gridt.psi_int_gbl_g[gridt.psi_size_max * para.stream_num * 2]; - para.num_psir_g = &gridt.num_psir_gbl_g[nbz * para.stream_num]; - para.psir_dm_device = &gridt.dm_global_g[gridt.psir_size * para.stream_num]; - para.psir_r_device - = &gridt.right_global_g[gridt.psir_size * para.stream_num]; - //psi function ,record the force in x y z,and the stress in six dimension - para.psir_lx_device = &gridt.d_left_x_g[gridt.psir_size * para.stream_num]; - para.psir_ly_device = &gridt.d_left_y_g[gridt.psir_size * para.stream_num]; - para.psir_lz_device = &gridt.d_left_z_g[gridt.psir_size * para.stream_num]; - para.psir_lxx_device - = &gridt.dd_left_xx_g[gridt.psir_size * para.stream_num]; - para.psir_lxy_device - = &gridt.dd_left_xy_g[gridt.psir_size * para.stream_num]; - para.psir_lxz_device - = &gridt.dd_left_xz_g[gridt.psir_size * para.stream_num]; - para.psir_lyy_device - = &gridt.dd_left_yy_g[gridt.psir_size * para.stream_num]; - para.psir_lyz_device - = &gridt.dd_left_yz_g[gridt.psir_size * para.stream_num]; - para.psir_lzz_device - = &gridt.dd_left_zz_g[gridt.psir_size * para.stream_num]; - //one dimension,record the length and the leading dimension of three matrix on GPU - para.A_m_device - = &gridt.l_info_global_g[gridt.atom_pair_nbz * para.stream_num]; - para.B_n_device - = &gridt.r_info_global_g[gridt.atom_pair_nbz * para.stream_num]; - para.K_device - = &gridt.k_info_global_g[gridt.atom_pair_nbz * para.stream_num]; - para.lda_device - = &gridt.lda_info_gbl_g[gridt.atom_pair_nbz * para.stream_num]; - para.ldb_device - = &gridt.ldb_info_gbl_g[gridt.atom_pair_nbz * para.stream_num]; - para.ldc_device - = &gridt.ldc_info_gbl_g[gridt.atom_pair_nbz * para.stream_num]; - //two dimension,record number to compute - para.matrix_A = &gridt.ap_left_gbl[gridt.atom_pair_nbz * para.stream_num]; - para.matrix_B = &gridt.ap_right_gbl[gridt.atom_pair_nbz * para.stream_num]; - para.matrix_C = &gridt.ap_output_gbl[gridt.atom_pair_nbz * para.stream_num]; - para.matrix_A_device - = &gridt.ap_left_gbl_g[gridt.atom_pair_nbz * para.stream_num]; - para.matrix_B_device - = &gridt.ap_right_gbl_g[gridt.atom_pair_nbz * para.stream_num]; - para.matrix_C_device - = &gridt.ap_output_gbl_g[gridt.atom_pair_nbz * para.stream_num]; -} -/** - * @brief frc_strs_iat on host and device Init - * - * GridParameter init - * - * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Host - * @param stream_num int , record the stream in GPU - * @param cuda_block in stress compute,used for Block nums - * @param atom_num_grid in force calculate,used for Block nums - * @param max_size Maximum size of atoms on a grid. - * @param frc_strs_iat_gbl frc_strs_iat_gbl,contains the Force Stree Iat on Host - */ -void cal_init(frc_strs_iat& f_s_iat, - const int stream_num, - const int cuda_block, - const int atom_num_grid, - const int max_size, - frc_strs_iat_gbl& f_s_iat_dev) -{ - const int iat_min = -max_size - 1; - f_s_iat.stress_host = new double[6 * cuda_block]; - f_s_iat.stress_device - = &f_s_iat_dev.stress_global[6 * cuda_block * stream_num]; - f_s_iat.force_device - = &f_s_iat_dev.force_global[3 * atom_num_grid * stream_num]; - f_s_iat.iat_device - = &f_s_iat_dev.iat_global[atom_num_grid * stream_num]; - f_s_iat.iat_host = new int[atom_num_grid]; - for (int index = 0; index < atom_num_grid; index++) - { - f_s_iat.iat_host[index] = iat_min; - } - f_s_iat.force_host = new double[3 * atom_num_grid]; - ModuleBase::GlobalFunc::ZEROS(f_s_iat.force_host, - 3 * atom_num_grid); -} - -/** - * @brief GridParameter memCpy,from Host to Device - * - * parameter init,which contains the gpu task and multi matrix multiplication - * - * @param para Grid parameter in task generator, - * @param gridt Grid_Technique,stored the major method in the the gint. - * @param nbz int,stand for the number of Z-axis - * @param atom_num_grid in force calculate,used for Block nums - */ -void para_mem_copy(grid_para& para, - const Grid_Technique& gridt, - const int nbz, - const int atom_num_grid) -{ - checkCuda(cudaMemcpyAsync(para.input_double_g, - para.input_dou, - gridt.psi_size_max * 5 * sizeof(double), - cudaMemcpyHostToDevice, - gridt.streams[para.stream_num])); - checkCuda(cudaMemcpyAsync(para.input_int_g, - para.input_int, - gridt.psi_size_max * 2 * sizeof(int), - cudaMemcpyHostToDevice, - gridt.streams[para.stream_num])); - checkCuda(cudaMemcpyAsync(para.num_psir_g, - para.num_psir, - nbz * sizeof(int), - cudaMemcpyHostToDevice, - gridt.streams[para.stream_num])); - checkCuda(cudaMemcpyAsync(para.A_m_device, - para.atom_pair_A_m, - gridt.atom_pair_nbz * sizeof(int), - cudaMemcpyHostToDevice, - gridt.streams[para.stream_num])); - checkCuda(cudaMemcpyAsync(para.B_n_device, - para.atom_pair_B_n, - gridt.atom_pair_nbz * sizeof(int), - cudaMemcpyHostToDevice, - gridt.streams[para.stream_num])); - checkCuda(cudaMemcpyAsync(para.K_device, - para.atom_pair_K, - gridt.atom_pair_nbz * sizeof(int), - cudaMemcpyHostToDevice, - gridt.streams[para.stream_num])); - checkCuda(cudaMemcpyAsync(para.lda_device, - para.atom_pair_lda, - gridt.atom_pair_nbz * sizeof(int), - cudaMemcpyHostToDevice, - gridt.streams[para.stream_num])); - checkCuda(cudaMemcpyAsync(para.ldb_device, - para.atom_pair_ldb, - gridt.atom_pair_nbz * sizeof(int), - cudaMemcpyHostToDevice, - gridt.streams[para.stream_num])); - checkCuda(cudaMemcpyAsync(para.ldc_device, - para.atom_pair_ldc, - gridt.atom_pair_nbz * sizeof(int), - cudaMemcpyHostToDevice, - gridt.streams[para.stream_num])); - checkCuda(cudaMemcpyAsync(para.matrix_A_device, - para.matrix_A, - gridt.atom_pair_nbz * sizeof(double*), - cudaMemcpyHostToDevice, - gridt.streams[para.stream_num])); - checkCuda(cudaMemcpyAsync(para.matrix_B_device, - para.matrix_B, - gridt.atom_pair_nbz * sizeof(double*), - cudaMemcpyHostToDevice, - gridt.streams[para.stream_num])); - checkCuda(cudaMemcpyAsync(para.matrix_C_device, - para.matrix_C, - gridt.atom_pair_nbz * sizeof(double*), - cudaMemcpyHostToDevice, - gridt.streams[para.stream_num])); - checkCuda(cudaMemsetAsync(para.psir_dm_device, - 0, - gridt.psir_size * sizeof(double), - gridt.streams[para.stream_num])); - checkCuda(cudaMemsetAsync(para.psir_r_device, - 0, - gridt.psir_size * sizeof(double), - gridt.streams[para.stream_num])); - checkCuda(cudaMemsetAsync(para.psir_lx_device, - 0, - gridt.psir_size * sizeof(double), - gridt.streams[para.stream_num])); - checkCuda(cudaMemsetAsync(para.psir_ly_device, - 0, - gridt.psir_size * sizeof(double), - gridt.streams[para.stream_num])); - checkCuda(cudaMemsetAsync(para.psir_lz_device, - 0, - gridt.psir_size * sizeof(double), - gridt.streams[para.stream_num])); - checkCuda(cudaMemsetAsync(para.psir_lxx_device, - 0, - gridt.psir_size * sizeof(double), - gridt.streams[para.stream_num])); - checkCuda(cudaMemsetAsync(para.psir_lxy_device, - 0, - gridt.psir_size * sizeof(double), - gridt.streams[para.stream_num])); - checkCuda(cudaMemsetAsync(para.psir_lxz_device, - 0, - gridt.psir_size * sizeof(double), - gridt.streams[para.stream_num])); - checkCuda(cudaMemsetAsync(para.psir_lyy_device, - 0, - gridt.psir_size * sizeof(double), - gridt.streams[para.stream_num])); - checkCuda(cudaMemsetAsync(para.psir_lyz_device, - 0, - gridt.psir_size * sizeof(double), - gridt.streams[para.stream_num])); - checkCuda(cudaMemsetAsync(para.psir_lzz_device, - 0, - gridt.psir_size * sizeof(double), - gridt.streams[para.stream_num])); -} -/** - * @brief Force Stress Force Iat memCpy,from Host to Device - * - * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device - * and Host - * @param gridt Grid_Technique,stored the major method in the the gint. - * @param atom_num_grid in force calculate,used for Block nums - * @param cuda_block in stress compute,used for Block nums - * @param stream_num int , record the stream in GPU - */ -void cal_mem_cpy(frc_strs_iat& f_s_iat, - const Grid_Technique& gridt, - const int atom_num_grid, - const int cuda_block, - const int stream_num) -{ - checkCuda(cudaMemcpyAsync(f_s_iat.iat_device, - f_s_iat.iat_host, - atom_num_grid * sizeof(int), - cudaMemcpyHostToDevice, - gridt.streams[stream_num])); - checkCuda(cudaMemsetAsync(f_s_iat.stress_device, - 0, - 6 * cuda_block * sizeof(double), - gridt.streams[stream_num])); - checkCuda(cudaMemsetAsync(f_s_iat.force_device, - 0, - 3 * atom_num_grid * sizeof(double), - gridt.streams[stream_num])); -} -/* - * @brief Force Calculate on Host - * - * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device - * and Host - * @param force stored the force for each atom on each directions - * @param atom_num_grid in force calculate,used for Block nums - */ -void cal_force_add(frc_strs_iat& f_s_iat, - std::vector& force, - const int atom_num_grid) -{ - checkCuda(cudaMemcpy(f_s_iat.force_host, - f_s_iat.force_device, - 3 * atom_num_grid * sizeof(double), - cudaMemcpyDeviceToHost)); - for (int index1 = 0; index1 < atom_num_grid; index1++) - { - int iat1 = f_s_iat.iat_host[index1]; - if (iat1 >= 0) - { - for (int index2 = 0; index2 < 3; index2++) - { - force[iat1 * 3 + index2] - += f_s_iat.force_host[index1 * 3 + index2]; - } - } - } -} -/** - * @brief Stress Calculate on Host - * - * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device - * and Host - * @param stress stored the stress for each directions - * @param cuda_block in stress compute,used for Block nums - */ -void cal_stress_add(frc_strs_iat& f_s_iat, - std::vector& stress, - const int cuda_block) -{ - checkCuda(cudaMemcpy(f_s_iat.stress_host, - f_s_iat.stress_device, - 6 * cuda_block * sizeof(double), - cudaMemcpyDeviceToHost)); - for (int i = 0; i < 6; i++) - { - for (int index = 0; index < cuda_block; index++) - { - stress[i] += f_s_iat.stress_host[i * cuda_block + index]; - } - } -} } // namespace GintKernel