From 44ac15341e93c432922b1dcd65300717f8e5f284 Mon Sep 17 00:00:00 2001
From: A-006 <3158793232@qq.com>
Date: Mon, 6 May 2024 17:22:47 +0800
Subject: [PATCH 01/23] replace ParaV in module gint

---
 .../operator_lcao/veff_lcao.cpp               |  4 +--
 .../hamilt_lcaodft/spar_dh.cpp                |  2 +-
 .../module_gint/gint_gamma.h                  |  2 +-
 .../module_gint/gint_gamma_vl.cpp             |  2 +-
 .../module_hamilt_lcao/module_gint/gint_k.h   | 21 +++++++++-----
 .../module_gint/gint_k_pvpr.cpp               |  3 +-
 .../module_gint/gint_k_sparse.cpp             | 20 +++++++------
 .../module_gint/gint_k_sparse1.cpp            | 29 ++++++++++---------
 source/module_io/write_Vxc.hpp                |  2 +-
 9 files changed, 48 insertions(+), 37 deletions(-)

diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp
index e6bfbc4c78..ceab9d6d02 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp
@@ -141,12 +141,12 @@ void Veff<OperatorLCAO<double, double>>::contributeHR(void)
     if(XC_Functional::get_func_type()==3 || XC_Functional::get_func_type()==5)
     {
         Gint_inout inout(vr_eff1, vofk_eff1, Gint_Tools::job_type::vlocal_meta);
-        this->GG->cal_vlocal(&inout, this->LM, this->new_e_iteration);
+        this->GG->cal_vlocal(&inout,  this->new_e_iteration);
     }
     else
     {
         Gint_inout inout(vr_eff1, Gint_Tools::job_type::vlocal);
-        this->GG->cal_vlocal(&inout, this->LM, this->new_e_iteration);
+        this->GG->cal_vlocal(&inout,  this->new_e_iteration);
     }
     this->GG->transfer_pvpR(this->hR);
 
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp
index f10f176474..5e3911ccb2 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp
@@ -42,7 +42,7 @@ void sparse_format::cal_dH(
     delete[] lm.DHloc_fixedR_y;
     delete[] lm.DHloc_fixedR_z;
 
-    gint_k.cal_dvlocal_R_sparseMatrix(current_spin, sparse_thr, &lm);
+    gint_k.cal_dvlocal_R_sparseMatrix(current_spin, sparse_thr, &lm, lm.ParaV);
 
     return;
 }
diff --git a/source/module_hamilt_lcao/module_gint/gint_gamma.h b/source/module_hamilt_lcao/module_gint/gint_gamma.h
index 2e053a913a..d6f31da80e 100644
--- a/source/module_hamilt_lcao/module_gint/gint_gamma.h
+++ b/source/module_hamilt_lcao/module_gint/gint_gamma.h
@@ -33,7 +33,7 @@ class Gint_Gamma : public Gint
     // there is an additional step in calculating vlocal for gamma point
     // namely the redistribution of Hamiltonian from grid to 2D block format
     // hence we have an additional layer outside the unified interface
-    void cal_vlocal(Gint_inout* inout, LCAO_Matrix* lm, const bool new_e_iteration);
+    void cal_vlocal(Gint_inout* inout, const bool new_e_iteration);
 
     //------------------------------------------------------
     // in gint_gamma_env.cpp 
diff --git a/source/module_hamilt_lcao/module_gint/gint_gamma_vl.cpp b/source/module_hamilt_lcao/module_gint/gint_gamma_vl.cpp
index 159527102b..163f127fc2 100644
--- a/source/module_hamilt_lcao/module_gint/gint_gamma_vl.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_gamma_vl.cpp
@@ -28,7 +28,7 @@ extern "C"
     void Cblacs_pcoord(int icontxt, int pnum, int *prow, int *pcol);
 }
 
-void Gint_Gamma::cal_vlocal(Gint_inout* inout, LCAO_Matrix* lm, bool new_e_iteration)
+void Gint_Gamma::cal_vlocal(Gint_inout* inout,bool new_e_iteration)
 {
 	const int max_size = this->gridt->max_atom;
 	const int lgd = this->gridt->lgd;
diff --git a/source/module_hamilt_lcao/module_gint/gint_k.h b/source/module_hamilt_lcao/module_gint/gint_k.h
index 17c136b838..36c4d196da 100644
--- a/source/module_hamilt_lcao/module_gint/gint_k.h
+++ b/source/module_hamilt_lcao/module_gint/gint_k.h
@@ -59,7 +59,7 @@ class Gint_k : public Gint
     // <phi_0i | V | phi_0j>
     // V is (Vl + Vh + Vxc) if no Vna is used,
     // and is (Vna + delta_Vh + Vxc) if Vna is used.
-    void folding_vl_k(const int &ik, LCAO_Matrix* LM, const std::vector<ModuleBase::Vector3<double>>& kvec_d);
+    void folding_vl_k(const int &ik, LCAO_Matrix* LM, Parallel_Orbitals *PV,const std::vector<ModuleBase::Vector3<double>>& kvec_d);
 
     /**
      * @brief transfer pvpR to this->hRGint
@@ -88,19 +88,23 @@ class Gint_k : public Gint
         const double &sparse_threshold, 
         const std::map<Abfs::Vector3_Order<int>,
         std::map<size_t, std::map<size_t, double>>> &pvpR_sparseMatrix,
-        LCAO_Matrix *LM);
+        LCAO_Matrix *LM,
+        Parallel_Orbitals *PV);
 
     void distribute_pvpR_soc_sparseMatrix(
         const double &sparse_threshold, 
         const std::map<Abfs::Vector3_Order<int>,
         std::map<size_t,
         std::map<size_t, std::complex<double>>>> &pvpR_soc_sparseMatrix,
-        LCAO_Matrix *LM);
+        LCAO_Matrix *LM,
+        Parallel_Orbitals *PV
+        );
 
     void cal_vlocal_R_sparseMatrix(
         const int &current_spin,
         const double &sparse_threshold,
-        LCAO_Matrix *LM);
+        LCAO_Matrix *LM,
+        Parallel_Orbitals *PV);
 
     //------------------------------------------------------
     // in gint_k_sparse1.cpp 
@@ -112,19 +116,22 @@ class Gint_k : public Gint
         const double &sparse_threshold, 
         const std::map<Abfs::Vector3_Order<int>,
         std::map<size_t, std::map<size_t, double>>> &pvdpR_sparseMatrix,
-        LCAO_Matrix *LM);
+        LCAO_Matrix *LM,
+        Parallel_Orbitals *PV);
 
     void distribute_pvdpR_soc_sparseMatrix(
         const int dim,
         const double &sparse_threshold, 
         const std::map<Abfs::Vector3_Order<int>,
         std::map<size_t, std::map<size_t, std::complex<double>>>> &pvdpR_soc_sparseMatrix,
-        LCAO_Matrix *LM);
+        LCAO_Matrix *LM,
+        Parallel_Orbitals *PV);
 
     void cal_dvlocal_R_sparseMatrix(
         const int &current_spin,
         const double &sparse_threshold,
-        LCAO_Matrix *LM);
+        LCAO_Matrix *LM,
+        Parallel_Orbitals *PV);
 
     private:
 
diff --git a/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp b/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp
index 8413035f11..693254060f 100644
--- a/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp
@@ -60,6 +60,7 @@ void Gint_k::destroy_pvpR(void)
 // H(k)=\sum{R} H(R)exp(ikR) 
 void Gint_k::folding_vl_k(const int &ik, 
                         LCAO_Matrix *LM, 
+                         Parallel_Orbitals *PV,
                         const std::vector<ModuleBase::Vector3<double>>& kvec_d)
 {
     ModuleBase::TITLE("Gint_k","folding_vl_k");
@@ -388,7 +389,7 @@ void Gint_k::folding_vl_k(const int &ik,
 #endif
         for (int j=0; j<nlocal; j++)
         {
-            if (!LM->ParaV->in_this_processor(i,j))
+            if (!PV->in_this_processor(i,j))
             {
                 continue;
             }
diff --git a/source/module_hamilt_lcao/module_gint/gint_k_sparse.cpp b/source/module_hamilt_lcao/module_gint/gint_k_sparse.cpp
index a3b77194a3..6bac72f1c8 100644
--- a/source/module_hamilt_lcao/module_gint/gint_k_sparse.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_k_sparse.cpp
@@ -15,7 +15,8 @@ void Gint_k::distribute_pvpR_sparseMatrix(
     const double &sparse_threshold, 
     const std::map<Abfs::Vector3_Order<int>,
     std::map<size_t, std::map<size_t, double>>> &pvpR_sparseMatrix,
-    LCAO_Matrix *LM
+    LCAO_Matrix *LM,
+    Parallel_Orbitals *PV
 )
 {
     ModuleBase::TITLE("Gint_k","distribute_pvpR_sparseMatrix");
@@ -110,11 +111,11 @@ void Gint_k::distribute_pvpR_sparseMatrix(
                 
                 Parallel_Reduce::reduce_pool(tmp, GlobalV::NLOCAL);
 
-                if (LM->ParaV->global2local_row(row) >= 0)
+                if (PV->global2local_row(row) >= 0)
                 {
                     for(int col = 0; col < GlobalV::NLOCAL; ++col)
                     {
-                        if(LM->ParaV->global2local_col(col) >= 0)
+                        if(PV->global2local_col(col) >= 0)
                         {
                             if (std::abs(tmp[col]) > sparse_threshold)
                             {
@@ -150,7 +151,8 @@ void Gint_k::distribute_pvpR_soc_sparseMatrix(
     const double &sparse_threshold, 
     const std::map<Abfs::Vector3_Order<int>,
     std::map<size_t, std::map<size_t, std::complex<double>>>> &pvpR_soc_sparseMatrix,
-    LCAO_Matrix *LM
+    LCAO_Matrix *LM,
+    Parallel_Orbitals *PV
 )
 {
     ModuleBase::TITLE("Gint_k","distribute_pvpR_soc_sparseMatrix");
@@ -244,11 +246,11 @@ void Gint_k::distribute_pvpR_soc_sparseMatrix(
                 
                 Parallel_Reduce::reduce_pool(tmp_soc, GlobalV::NLOCAL);
 
-                if (LM->ParaV->global2local_row(row) >= 0)
+                if (PV->global2local_row(row) >= 0)
                 {
                     for(int col = 0; col < GlobalV::NLOCAL; ++col)
                     {
-                        if(LM->ParaV->global2local_col(col) >= 0)
+                        if(PV->global2local_col(col) >= 0)
                         {
                             if (std::abs(tmp_soc[col]) > sparse_threshold)
                             {
@@ -281,7 +283,7 @@ void Gint_k::distribute_pvpR_soc_sparseMatrix(
 
 }
 
-void Gint_k::cal_vlocal_R_sparseMatrix(const int &current_spin, const double &sparse_threshold, LCAO_Matrix *LM)
+void Gint_k::cal_vlocal_R_sparseMatrix(const int &current_spin, const double &sparse_threshold, LCAO_Matrix *LM,Parallel_Orbitals *PV)
 {
     ModuleBase::TITLE("Gint_k","cal_vlocal_R_sparseMatrix");
 
@@ -418,11 +420,11 @@ void Gint_k::cal_vlocal_R_sparseMatrix(const int &current_spin, const double &sp
 
     if (GlobalV::NSPIN != 4)
     {
-        distribute_pvpR_sparseMatrix(current_spin, sparse_threshold, pvpR_sparseMatrix, LM);
+        distribute_pvpR_sparseMatrix(current_spin, sparse_threshold, pvpR_sparseMatrix, LM,PV);
     }
     else
     {
-        distribute_pvpR_soc_sparseMatrix(sparse_threshold, pvpR_soc_sparseMatrix, LM);
+        distribute_pvpR_soc_sparseMatrix(sparse_threshold, pvpR_soc_sparseMatrix, LM,PV);
     }
 
     return;
diff --git a/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp b/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp
index 36bdf3c317..eaeaede6a1 100644
--- a/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp
@@ -15,8 +15,8 @@ void Gint_k::distribute_pvdpR_sparseMatrix(
     const int dim,
     const double &sparse_threshold, 
     const std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, double>>> &pvdpR_sparseMatrix,
-    LCAO_Matrix *LM
-)
+    LCAO_Matrix *LM,
+    Parallel_Orbitals *PV)
 {
     ModuleBase::TITLE("Gint_k","distribute_pvdpR_sparseMatrix");
 
@@ -110,11 +110,11 @@ void Gint_k::distribute_pvdpR_sparseMatrix(
                 
                 Parallel_Reduce::reduce_pool(tmp, GlobalV::NLOCAL);
 
-                if (LM->ParaV->global2local_row(row) >= 0)
+                if (PV->global2local_row(row) >= 0)
                 {
                     for(int col = 0; col < GlobalV::NLOCAL; ++col)
                     {
-                        if(LM->ParaV->global2local_col(col) >= 0)
+                        if(PV->global2local_col(col) >= 0)
                         {
                             if (std::abs(tmp[col]) > sparse_threshold)
                             {
@@ -172,7 +172,8 @@ void Gint_k::distribute_pvdpR_soc_sparseMatrix(
     const double &sparse_threshold, 
     const std::map<Abfs::Vector3_Order<int>,
     std::map<size_t, std::map<size_t, std::complex<double>>>> &pvdpR_soc_sparseMatrix,
-    LCAO_Matrix *LM
+    LCAO_Matrix *LM,
+    Parallel_Orbitals *PV
 )
 {
     ModuleBase::TITLE("Gint_k","distribute_pvdpR_soc_sparseMatrix");
@@ -266,11 +267,11 @@ void Gint_k::distribute_pvdpR_soc_sparseMatrix(
                 
                 Parallel_Reduce::reduce_pool(tmp_soc, GlobalV::NLOCAL);
 
-                if (LM->ParaV->global2local_row(row) >= 0)
+                if (PV->global2local_row(row) >= 0)
                 {
                     for(int col = 0; col < GlobalV::NLOCAL; ++col)
                     {
-                        if(LM->ParaV->global2local_col(col) >= 0)
+                        if(PV->global2local_col(col) >= 0)
                         {
                             if (std::abs(tmp_soc[col]) > sparse_threshold)
                             {
@@ -324,7 +325,7 @@ void Gint_k::distribute_pvdpR_soc_sparseMatrix(
 
 }
 
-void Gint_k::cal_dvlocal_R_sparseMatrix(const int &current_spin, const double &sparse_threshold, LCAO_Matrix *LM)
+void Gint_k::cal_dvlocal_R_sparseMatrix(const int &current_spin, const double &sparse_threshold, LCAO_Matrix *LM,Parallel_Orbitals *PV)
 {
     ModuleBase::TITLE("Gint_k","cal_vlocal_R_sparseMatrix");
 
@@ -528,15 +529,15 @@ void Gint_k::cal_dvlocal_R_sparseMatrix(const int &current_spin, const double &s
 
     if (GlobalV::NSPIN != 4)
     {
-        distribute_pvdpR_sparseMatrix(current_spin, 0, sparse_threshold, pvdpRx_sparseMatrix, LM);
-        distribute_pvdpR_sparseMatrix(current_spin, 1, sparse_threshold, pvdpRy_sparseMatrix, LM);
-        distribute_pvdpR_sparseMatrix(current_spin, 2, sparse_threshold, pvdpRz_sparseMatrix, LM);
+        distribute_pvdpR_sparseMatrix(current_spin, 0, sparse_threshold, pvdpRx_sparseMatrix, LM,PV);
+        distribute_pvdpR_sparseMatrix(current_spin, 1, sparse_threshold, pvdpRy_sparseMatrix, LM,PV);
+        distribute_pvdpR_sparseMatrix(current_spin, 2, sparse_threshold, pvdpRz_sparseMatrix, LM,PV);
     }
     else
     {
-        distribute_pvdpR_soc_sparseMatrix(0, sparse_threshold, pvdpRx_soc_sparseMatrix, LM);
-        distribute_pvdpR_soc_sparseMatrix(1, sparse_threshold, pvdpRy_soc_sparseMatrix, LM);
-        distribute_pvdpR_soc_sparseMatrix(2, sparse_threshold, pvdpRz_soc_sparseMatrix, LM);
+        distribute_pvdpR_soc_sparseMatrix(0, sparse_threshold, pvdpRx_soc_sparseMatrix, LM,PV);
+        distribute_pvdpR_soc_sparseMatrix(1, sparse_threshold, pvdpRy_soc_sparseMatrix, LM,PV);
+        distribute_pvdpR_soc_sparseMatrix(2, sparse_threshold, pvdpRz_soc_sparseMatrix, LM,PV);
     }
 
     return;
diff --git a/source/module_io/write_Vxc.hpp b/source/module_io/write_Vxc.hpp
index 236ccafb77..e384daf45e 100644
--- a/source/module_io/write_Vxc.hpp
+++ b/source/module_io/write_Vxc.hpp
@@ -25,7 +25,7 @@ namespace ModuleIO
 			Gint_inout& io, 
 			LCAO_Matrix& lm) 
 	{ 
-		gg.cal_vlocal(&io, &lm, false); 
+		gg.cal_vlocal(&io, false); 
 	};
 
 	inline void gint_vl(

From 43fbbd42031dfc91de748db6960cc70b9b871b3f Mon Sep 17 00:00:00 2001
From: A-006 <3158793232@qq.com>
Date: Tue, 7 May 2024 15:18:43 +0800
Subject: [PATCH 02/23] change PV to pv in module gint

---
 .../module_hamilt_lcao/module_gint/gint_k.h   | 14 +++++-----
 .../module_gint/gint_k_pvpr.cpp               |  4 +--
 .../module_gint/gint_k_sparse.cpp             | 18 ++++++-------
 .../module_gint/gint_k_sparse1.cpp            | 26 +++++++++----------
 4 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/source/module_hamilt_lcao/module_gint/gint_k.h b/source/module_hamilt_lcao/module_gint/gint_k.h
index 36c4d196da..094dc7d591 100644
--- a/source/module_hamilt_lcao/module_gint/gint_k.h
+++ b/source/module_hamilt_lcao/module_gint/gint_k.h
@@ -59,7 +59,7 @@ class Gint_k : public Gint
     // <phi_0i | V | phi_0j>
     // V is (Vl + Vh + Vxc) if no Vna is used,
     // and is (Vna + delta_Vh + Vxc) if Vna is used.
-    void folding_vl_k(const int &ik, LCAO_Matrix* LM, Parallel_Orbitals *PV,const std::vector<ModuleBase::Vector3<double>>& kvec_d);
+    void folding_vl_k(const int &ik, LCAO_Matrix* LM, Parallel_Orbitals *pv,const std::vector<ModuleBase::Vector3<double>>& kvec_d);
 
     /**
      * @brief transfer pvpR to this->hRGint
@@ -89,7 +89,7 @@ class Gint_k : public Gint
         const std::map<Abfs::Vector3_Order<int>,
         std::map<size_t, std::map<size_t, double>>> &pvpR_sparseMatrix,
         LCAO_Matrix *LM,
-        Parallel_Orbitals *PV);
+        Parallel_Orbitals *pv);
 
     void distribute_pvpR_soc_sparseMatrix(
         const double &sparse_threshold, 
@@ -97,14 +97,14 @@ class Gint_k : public Gint
         std::map<size_t,
         std::map<size_t, std::complex<double>>>> &pvpR_soc_sparseMatrix,
         LCAO_Matrix *LM,
-        Parallel_Orbitals *PV
+        Parallel_Orbitals *pv
         );
 
     void cal_vlocal_R_sparseMatrix(
         const int &current_spin,
         const double &sparse_threshold,
         LCAO_Matrix *LM,
-        Parallel_Orbitals *PV);
+        Parallel_Orbitals *pv);
 
     //------------------------------------------------------
     // in gint_k_sparse1.cpp 
@@ -117,7 +117,7 @@ class Gint_k : public Gint
         const std::map<Abfs::Vector3_Order<int>,
         std::map<size_t, std::map<size_t, double>>> &pvdpR_sparseMatrix,
         LCAO_Matrix *LM,
-        Parallel_Orbitals *PV);
+        Parallel_Orbitals *pv);
 
     void distribute_pvdpR_soc_sparseMatrix(
         const int dim,
@@ -125,13 +125,13 @@ class Gint_k : public Gint
         const std::map<Abfs::Vector3_Order<int>,
         std::map<size_t, std::map<size_t, std::complex<double>>>> &pvdpR_soc_sparseMatrix,
         LCAO_Matrix *LM,
-        Parallel_Orbitals *PV);
+        Parallel_Orbitals *pv);
 
     void cal_dvlocal_R_sparseMatrix(
         const int &current_spin,
         const double &sparse_threshold,
         LCAO_Matrix *LM,
-        Parallel_Orbitals *PV);
+        Parallel_Orbitals *pv);
 
     private:
 
diff --git a/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp b/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp
index 693254060f..f39af59f16 100644
--- a/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp
@@ -60,7 +60,7 @@ void Gint_k::destroy_pvpR(void)
 // H(k)=\sum{R} H(R)exp(ikR) 
 void Gint_k::folding_vl_k(const int &ik, 
                         LCAO_Matrix *LM, 
-                         Parallel_Orbitals *PV,
+                         Parallel_Orbitals *pv,
                         const std::vector<ModuleBase::Vector3<double>>& kvec_d)
 {
     ModuleBase::TITLE("Gint_k","folding_vl_k");
@@ -389,7 +389,7 @@ void Gint_k::folding_vl_k(const int &ik,
 #endif
         for (int j=0; j<nlocal; j++)
         {
-            if (!PV->in_this_processor(i,j))
+            if (!pv->in_this_processor(i,j))
             {
                 continue;
             }
diff --git a/source/module_hamilt_lcao/module_gint/gint_k_sparse.cpp b/source/module_hamilt_lcao/module_gint/gint_k_sparse.cpp
index 6bac72f1c8..5efec28e2d 100644
--- a/source/module_hamilt_lcao/module_gint/gint_k_sparse.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_k_sparse.cpp
@@ -16,7 +16,7 @@ void Gint_k::distribute_pvpR_sparseMatrix(
     const std::map<Abfs::Vector3_Order<int>,
     std::map<size_t, std::map<size_t, double>>> &pvpR_sparseMatrix,
     LCAO_Matrix *LM,
-    Parallel_Orbitals *PV
+    Parallel_Orbitals *pv
 )
 {
     ModuleBase::TITLE("Gint_k","distribute_pvpR_sparseMatrix");
@@ -111,11 +111,11 @@ void Gint_k::distribute_pvpR_sparseMatrix(
                 
                 Parallel_Reduce::reduce_pool(tmp, GlobalV::NLOCAL);
 
-                if (PV->global2local_row(row) >= 0)
+                if (pv->global2local_row(row) >= 0)
                 {
                     for(int col = 0; col < GlobalV::NLOCAL; ++col)
                     {
-                        if(PV->global2local_col(col) >= 0)
+                        if(pv->global2local_col(col) >= 0)
                         {
                             if (std::abs(tmp[col]) > sparse_threshold)
                             {
@@ -152,7 +152,7 @@ void Gint_k::distribute_pvpR_soc_sparseMatrix(
     const std::map<Abfs::Vector3_Order<int>,
     std::map<size_t, std::map<size_t, std::complex<double>>>> &pvpR_soc_sparseMatrix,
     LCAO_Matrix *LM,
-    Parallel_Orbitals *PV
+    Parallel_Orbitals *pv
 )
 {
     ModuleBase::TITLE("Gint_k","distribute_pvpR_soc_sparseMatrix");
@@ -246,11 +246,11 @@ void Gint_k::distribute_pvpR_soc_sparseMatrix(
                 
                 Parallel_Reduce::reduce_pool(tmp_soc, GlobalV::NLOCAL);
 
-                if (PV->global2local_row(row) >= 0)
+                if (pv->global2local_row(row) >= 0)
                 {
                     for(int col = 0; col < GlobalV::NLOCAL; ++col)
                     {
-                        if(PV->global2local_col(col) >= 0)
+                        if(pv->global2local_col(col) >= 0)
                         {
                             if (std::abs(tmp_soc[col]) > sparse_threshold)
                             {
@@ -283,7 +283,7 @@ void Gint_k::distribute_pvpR_soc_sparseMatrix(
 
 }
 
-void Gint_k::cal_vlocal_R_sparseMatrix(const int &current_spin, const double &sparse_threshold, LCAO_Matrix *LM,Parallel_Orbitals *PV)
+void Gint_k::cal_vlocal_R_sparseMatrix(const int &current_spin, const double &sparse_threshold, LCAO_Matrix *LM,Parallel_Orbitals *pv)
 {
     ModuleBase::TITLE("Gint_k","cal_vlocal_R_sparseMatrix");
 
@@ -420,11 +420,11 @@ void Gint_k::cal_vlocal_R_sparseMatrix(const int &current_spin, const double &sp
 
     if (GlobalV::NSPIN != 4)
     {
-        distribute_pvpR_sparseMatrix(current_spin, sparse_threshold, pvpR_sparseMatrix, LM,PV);
+        distribute_pvpR_sparseMatrix(current_spin, sparse_threshold, pvpR_sparseMatrix, LM,pv);
     }
     else
     {
-        distribute_pvpR_soc_sparseMatrix(sparse_threshold, pvpR_soc_sparseMatrix, LM,PV);
+        distribute_pvpR_soc_sparseMatrix(sparse_threshold, pvpR_soc_sparseMatrix, LM,pv);
     }
 
     return;
diff --git a/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp b/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp
index eaeaede6a1..e5c7ac5729 100644
--- a/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp
@@ -16,7 +16,7 @@ void Gint_k::distribute_pvdpR_sparseMatrix(
     const double &sparse_threshold, 
     const std::map<Abfs::Vector3_Order<int>, std::map<size_t, std::map<size_t, double>>> &pvdpR_sparseMatrix,
     LCAO_Matrix *LM,
-    Parallel_Orbitals *PV)
+    Parallel_Orbitals *pv)
 {
     ModuleBase::TITLE("Gint_k","distribute_pvdpR_sparseMatrix");
 
@@ -110,11 +110,11 @@ void Gint_k::distribute_pvdpR_sparseMatrix(
                 
                 Parallel_Reduce::reduce_pool(tmp, GlobalV::NLOCAL);
 
-                if (PV->global2local_row(row) >= 0)
+                if (pv->global2local_row(row) >= 0)
                 {
                     for(int col = 0; col < GlobalV::NLOCAL; ++col)
                     {
-                        if(PV->global2local_col(col) >= 0)
+                        if(pv->global2local_col(col) >= 0)
                         {
                             if (std::abs(tmp[col]) > sparse_threshold)
                             {
@@ -173,7 +173,7 @@ void Gint_k::distribute_pvdpR_soc_sparseMatrix(
     const std::map<Abfs::Vector3_Order<int>,
     std::map<size_t, std::map<size_t, std::complex<double>>>> &pvdpR_soc_sparseMatrix,
     LCAO_Matrix *LM,
-    Parallel_Orbitals *PV
+    Parallel_Orbitals *pv
 )
 {
     ModuleBase::TITLE("Gint_k","distribute_pvdpR_soc_sparseMatrix");
@@ -267,11 +267,11 @@ void Gint_k::distribute_pvdpR_soc_sparseMatrix(
                 
                 Parallel_Reduce::reduce_pool(tmp_soc, GlobalV::NLOCAL);
 
-                if (PV->global2local_row(row) >= 0)
+                if (pv->global2local_row(row) >= 0)
                 {
                     for(int col = 0; col < GlobalV::NLOCAL; ++col)
                     {
-                        if(PV->global2local_col(col) >= 0)
+                        if(pv->global2local_col(col) >= 0)
                         {
                             if (std::abs(tmp_soc[col]) > sparse_threshold)
                             {
@@ -325,7 +325,7 @@ void Gint_k::distribute_pvdpR_soc_sparseMatrix(
 
 }
 
-void Gint_k::cal_dvlocal_R_sparseMatrix(const int &current_spin, const double &sparse_threshold, LCAO_Matrix *LM,Parallel_Orbitals *PV)
+void Gint_k::cal_dvlocal_R_sparseMatrix(const int &current_spin, const double &sparse_threshold, LCAO_Matrix *LM,Parallel_Orbitals *pv)
 {
     ModuleBase::TITLE("Gint_k","cal_vlocal_R_sparseMatrix");
 
@@ -529,15 +529,15 @@ void Gint_k::cal_dvlocal_R_sparseMatrix(const int &current_spin, const double &s
 
     if (GlobalV::NSPIN != 4)
     {
-        distribute_pvdpR_sparseMatrix(current_spin, 0, sparse_threshold, pvdpRx_sparseMatrix, LM,PV);
-        distribute_pvdpR_sparseMatrix(current_spin, 1, sparse_threshold, pvdpRy_sparseMatrix, LM,PV);
-        distribute_pvdpR_sparseMatrix(current_spin, 2, sparse_threshold, pvdpRz_sparseMatrix, LM,PV);
+        distribute_pvdpR_sparseMatrix(current_spin, 0, sparse_threshold, pvdpRx_sparseMatrix, LM,pv);
+        distribute_pvdpR_sparseMatrix(current_spin, 1, sparse_threshold, pvdpRy_sparseMatrix, LM,pv);
+        distribute_pvdpR_sparseMatrix(current_spin, 2, sparse_threshold, pvdpRz_sparseMatrix, LM,pv);
     }
     else
     {
-        distribute_pvdpR_soc_sparseMatrix(0, sparse_threshold, pvdpRx_soc_sparseMatrix, LM,PV);
-        distribute_pvdpR_soc_sparseMatrix(1, sparse_threshold, pvdpRy_soc_sparseMatrix, LM,PV);
-        distribute_pvdpR_soc_sparseMatrix(2, sparse_threshold, pvdpRz_soc_sparseMatrix, LM,PV);
+        distribute_pvdpR_soc_sparseMatrix(0, sparse_threshold, pvdpRx_soc_sparseMatrix, LM,pv);
+        distribute_pvdpR_soc_sparseMatrix(1, sparse_threshold, pvdpRy_soc_sparseMatrix, LM,pv);
+        distribute_pvdpR_soc_sparseMatrix(2, sparse_threshold, pvdpRz_soc_sparseMatrix, LM,pv);
     }
 
     return;

From 3a67c2b82682cdd70cfffd7aac359ea6ea90539f Mon Sep 17 00:00:00 2001
From: A-006 <3158793232@qq.com>
Date: Mon, 13 May 2024 16:12:35 +0800
Subject: [PATCH 03/23] change GlobalC in module gint

---
 .../module_elecstate/elecstate_lcao_tddft.cpp |   1 -
 .../module_esolver/esolver_ks_lcao_elec.cpp   |   4 +-
 .../operator_lcao/veff_lcao.cpp               |   7 +-
 .../hamilt_lcaodft/operator_lcao/veff_lcao.h  |   7 +-
 .../hamilt_lcaodft/spar_dh.cpp                |   2 +-
 .../module_hamilt_lcao/module_gint/gint.cpp   |  29 ++--
 source/module_hamilt_lcao/module_gint/gint.h  |  21 ++-
 .../module_gint/gint_fvl.cpp                  |  10 +-
 .../module_gint/gint_gamma.h                  |   4 +-
 .../module_gint/gint_gamma_env.cpp            |  17 +-
 .../module_gint/gint_gamma_vl.cpp             |   4 +-
 .../module_hamilt_lcao/module_gint/gint_k.h   |  19 ++-
 .../module_gint/gint_k_env.cpp                |  20 +--
 .../module_gint/gint_k_pvpr.cpp               |  94 +++++------
 .../module_gint/gint_k_sparse.cpp             |  37 ++---
 .../module_gint/gint_k_sparse1.cpp            |  41 ++---
 .../module_gint/gint_rho.cpp                  |   4 +-
 .../module_gint/gint_tau.cpp                  |   9 +-
 .../module_gint/gint_tools.cpp                |   6 +-
 .../module_gint/gint_tools.h                  |   6 +-
 .../module_gint/gint_vl.cpp                   |  38 +++--
 .../module_gint/grid_bigcell.cpp              |  56 +++----
 .../module_gint/grid_bigcell.h                |   6 +-
 .../module_gint/grid_meshcell.cpp             |  20 +--
 .../module_gint/grid_meshcell.h               |   4 +-
 .../module_gint/grid_technique.cpp            | 148 +++++++++---------
 .../module_gint/grid_technique.h              |  19 ++-
 source/module_io/istate_envelope.cpp          |   4 +-
 28 files changed, 345 insertions(+), 292 deletions(-)

diff --git a/source/module_elecstate/elecstate_lcao_tddft.cpp b/source/module_elecstate/elecstate_lcao_tddft.cpp
index cc0e7133e4..c4c1e21e19 100644
--- a/source/module_elecstate/elecstate_lcao_tddft.cpp
+++ b/source/module_elecstate/elecstate_lcao_tddft.cpp
@@ -5,7 +5,6 @@
 #include "module_base/timer.h"
 #include "module_hamilt_lcao/module_gint/grid_technique.h"
 #include "module_hamilt_pw/hamilt_pwdft/global.h"
-
 namespace elecstate
 {
 
diff --git a/source/module_esolver/esolver_ks_lcao_elec.cpp b/source/module_esolver/esolver_ks_lcao_elec.cpp
index aa8e53bd7c..b74d1c5425 100644
--- a/source/module_esolver/esolver_ks_lcao_elec.cpp
+++ b/source/module_esolver/esolver_ks_lcao_elec.cpp
@@ -64,7 +64,9 @@ void ESolver_KS_LCAO<TK, TR>::set_matrix_grid(Record_adj& ra)
                              this->pw_big->nbzp,
                              this->pw_rho->ny,
                              this->pw_rho->nplane,
-                             this->pw_rho->startz_current);
+                             this->pw_rho->startz_current,
+                             GlobalC::ucell,
+                             GlobalC::ORB);
 
     // (2)For each atom, calculate the adjacent atoms in different cells
     // and allocate the space for H(R) and S(R).
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp
index ceab9d6d02..2fd691d4f4 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp
@@ -2,7 +2,7 @@
 #include "module_base/timer.h"
 #include "module_base/tool_title.h"
 #include "module_hamilt_general/module_xc/xc_functional.h"
-
+#include "module_cell/unitcell.h"
 namespace hamilt
 {
 
@@ -112,8 +112,7 @@ void Veff<OperatorLCAO<TK, TR>>::contributeHR()
             }
         }
     }
-
-    this->GK->transfer_pvpR(this->hR);
+    this->GK->transfer_pvpR(this->hR,this->ucell,GlobalC::ORB,this->gd);
 
     ModuleBase::timer::tick("Veff", "contributeHR");
     return;
@@ -148,7 +147,7 @@ void Veff<OperatorLCAO<double, double>>::contributeHR(void)
         Gint_inout inout(vr_eff1, Gint_Tools::job_type::vlocal);
         this->GG->cal_vlocal(&inout,  this->new_e_iteration);
     }
-    this->GG->transfer_pvpR(this->hR);
+    this->GG->transfer_pvpR(this->hR,this->ucell,GlobalC::ORB);
 
     this->new_e_iteration = false;
     ModuleBase::timer::tick("Veff", "contributeHR");
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h
index 41bd719731..65e8b89765 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h
@@ -48,6 +48,8 @@ class Veff<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
         : GK(GK_in),
           loc(loc_in),
           pot(pot_in),
+          ucell(ucell_in),
+          gd(GridD_in),
           OperatorLCAO<TK, TR>(LM_in, kvec_d_in, hR_in, hK_in)
     {
         this->cal_type = lcao_gint;
@@ -74,7 +76,6 @@ class Veff<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
         OperatorLCAO<TK, TR>(LM_in, kvec_d_in, hR_in, hK_in)
     {
         this->cal_type = lcao_gint;
-
         this->initialize_HR(ucell_in, GridD_in, paraV);
 
         GG_in->initialize_pvpR(*ucell_in, GridD_in);
@@ -89,7 +90,9 @@ class Veff<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
      * grid integration is used to calculate the contribution Hamiltonian of effective potential
      */
     virtual void contributeHR() override;
-
+  
+  const UnitCell* ucell;
+  Grid_Driver* gd;
   private:
     // used for k-dependent grid integration.
     Gint_k* GK = nullptr;
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp
index 5e3911ccb2..3eb079006b 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp
@@ -42,7 +42,7 @@ void sparse_format::cal_dH(
     delete[] lm.DHloc_fixedR_y;
     delete[] lm.DHloc_fixedR_z;
 
-    gint_k.cal_dvlocal_R_sparseMatrix(current_spin, sparse_thr, &lm, lm.ParaV);
+    gint_k.cal_dvlocal_R_sparseMatrix(current_spin, sparse_thr, &lm, lm.ParaV,GlobalC::ORB,GlobalC::ucell,GlobalC::GridD);
 
     return;
 }
diff --git a/source/module_hamilt_lcao/module_gint/gint.cpp b/source/module_hamilt_lcao/module_gint/gint.cpp
index 2392d21df2..864a53ab77 100644
--- a/source/module_hamilt_lcao/module_gint/gint.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint.cpp
@@ -293,14 +293,14 @@ void Gint::cal_gint(Gint_inout* inout)
 					//int* vindex = Gint_Tools::get_vindex(ncyz, ibx, jby, kbz);
                     int* vindex = Gint_Tools::get_vindex(this->bxyz, this->bx, this->by, this->bz,
                         this->nplane, this->gridt->start_ind[grid_index], ncyz);
-                    this->gint_kernel_rho(na_grid, grid_index, delta_r, vindex, LD_pool, inout);
+                    this->gint_kernel_rho(na_grid, grid_index, delta_r, vindex, LD_pool, GlobalC::ucell,inout);
 					delete[] vindex;
 				}
 				else if(inout->job == Gint_Tools::job_type::tau)
 				{
                     int* vindex = Gint_Tools::get_vindex(this->bxyz, this->bx, this->by, this->bz,
                         this->nplane, this->gridt->start_ind[grid_index], ncyz);
-                    this->gint_kernel_tau(na_grid, grid_index, delta_r, vindex, LD_pool, inout);
+                    this->gint_kernel_tau(na_grid, grid_index, delta_r, vindex, LD_pool, inout,GlobalC::ucell);
 					delete[] vindex;
 				}
 				else if(inout->job == Gint_Tools::job_type::force)
@@ -322,11 +322,11 @@ void Gint::cal_gint(Gint_inout* inout)
 #ifdef _OPENMP
 						this->gint_kernel_force(na_grid, grid_index, delta_r, vldr3, LD_pool,
 							DM_in, inout->ispin, inout->isforce, inout->isstress,
-							&fvl_dphi_thread, &svl_dphi_thread);
+							&fvl_dphi_thread, &svl_dphi_thread,GlobalC::ucell);
 #else
 						this->gint_kernel_force(na_grid, grid_index, delta_r, vldr3, LD_pool,
 							DM_in, inout->ispin, inout->isforce, inout->isstress,
-							inout->fvl_dphi, inout->svl_dphi);
+							inout->fvl_dphi, inout->svl_dphi,GlobalC::ucell);
 #endif
 					delete[] vldr3;
 				}
@@ -338,17 +338,17 @@ void Gint::cal_gint(Gint_inout* inout)
 					if((GlobalV::GAMMA_ONLY_LOCAL && lgd>0) || !GlobalV::GAMMA_ONLY_LOCAL)
 					{
 						this->gint_kernel_vlocal(na_grid, grid_index, delta_r, vldr3, LD_pool,
-								pvpR_thread, hRGint_thread);
+								pvpR_thread,GlobalC::ucell,hRGint_thread);
 					}
 #else
 					if(GlobalV::GAMMA_ONLY_LOCAL && lgd>0)
 					{
-						this->gint_kernel_vlocal(na_grid, grid_index, delta_r, vldr3, LD_pool, nullptr);
+						this->gint_kernel_vlocal(na_grid, grid_index, delta_r, vldr3, LD_pool,GlobalC::ucell,nullptr);
 					}
 					if(!GlobalV::GAMMA_ONLY_LOCAL)
 					{
 						this->gint_kernel_vlocal(na_grid, grid_index, delta_r, vldr3, LD_pool,
-								this->pvpR_reduced[inout->ispin]);
+								GlobalC::ucell,this->pvpR_reduced[inout->ispin]);
 					}
 #endif
 					delete[] vldr3;
@@ -359,12 +359,13 @@ void Gint::cal_gint(Gint_inout* inout)
 							this->nplane, this->gridt->start_ind[grid_index], ncyz, dv);
 #ifdef _OPENMP
 					this->gint_kernel_dvlocal(na_grid, grid_index, delta_r, vldr3, LD_pool,
-							pvdpRx_thread, pvdpRy_thread, pvdpRz_thread);
+							pvdpRx_thread, pvdpRy_thread, pvdpRz_thread,GlobalC::ucell);
 #else
 					this->gint_kernel_dvlocal(na_grid, grid_index, delta_r, vldr3, LD_pool,
 							this->pvdpRx_reduced[inout->ispin],
                             this->pvdpRy_reduced[inout->ispin],
-                            this->pvdpRz_reduced[inout->ispin]);
+                            this->pvdpRz_reduced[inout->ispin],
+                            GlobalC::ucell);
 #endif
 					delete[] vldr3;
 				}
@@ -378,16 +379,16 @@ void Gint::cal_gint(Gint_inout* inout)
 					if((GlobalV::GAMMA_ONLY_LOCAL && lgd>0) || !GlobalV::GAMMA_ONLY_LOCAL)
 					{
 						this->gint_kernel_vlocal_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool,
-								pvpR_thread, hRGint_thread);
+								pvpR_thread, GlobalC::ucell,hRGint_thread);
 					}
 #else
 					if(GlobalV::GAMMA_ONLY_LOCAL && lgd>0)
 					{
-						this->gint_kernel_vlocal_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool, nullptr);
+						this->gint_kernel_vlocal_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool, GlobalC::ucell,nullptr);
 					}
 					if(!GlobalV::GAMMA_ONLY_LOCAL)
 					{
-						this->gint_kernel_vlocal_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool,
+						this->gint_kernel_vlocal_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool,GlobalC::ucell,
 								this->pvpR_reduced[inout->ispin]);
 					}
 #endif
@@ -415,11 +416,11 @@ void Gint::cal_gint(Gint_inout* inout)
 #ifdef _OPENMP
 					this->gint_kernel_force_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool,
 							DM_in, inout->ispin, inout->isforce, inout->isstress,
-							&fvl_dphi_thread, &svl_dphi_thread);
+							&fvl_dphi_thread, &svl_dphi_thread,GlobalC::ucell);
 #else
 					this->gint_kernel_force_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool,
 							DM_in, inout->ispin, inout->isforce, inout->isstress,
-							inout->fvl_dphi, inout->svl_dphi);
+							inout->fvl_dphi, inout->svl_dphi,GlobalC::ucell);
 #endif
 					delete[] vldr3;
 					delete[] vkdr3;
diff --git a/source/module_hamilt_lcao/module_gint/gint.h b/source/module_hamilt_lcao/module_gint/gint.h
index 8a90250c47..d7a51c1a03 100644
--- a/source/module_hamilt_lcao/module_gint/gint.h
+++ b/source/module_hamilt_lcao/module_gint/gint.h
@@ -12,7 +12,7 @@
 #include "gint_tools.h"
 #include "module_hamilt_lcao/module_hcontainer/hcontainer.h"
 #include "module_cell/module_neighbor/sltk_grid_driver.h"
-
+#include "module_hamilt_lcao/module_gint/grid_technique.h"
 class Gint
 {
     public:
@@ -80,6 +80,7 @@ class Gint
         double* vldr3,
         const int LD_pool,
         double* pvpR_reduced,
+        const UnitCell& ucell,
         hamilt::HContainer<double>* hR = nullptr);
 
     // calculate < phi_0 | vlocal | dphi_R >
@@ -91,7 +92,8 @@ class Gint
         const int LD_pool,
         double* pvdpRx_reduced,
         double* pvdpRy_reduced,
-        double* pvdpRz_reduced);
+        double* pvdpRz_reduced,
+        const UnitCell& ucell);
 
     void gint_kernel_vlocal_meta(
         const int na_grid,
@@ -101,6 +103,7 @@ class Gint
         double* vkdr3,
         const int LD_pool,
         double* pvpR_reduced,
+        const UnitCell& ucell,
         hamilt::HContainer<double>* hR = nullptr);
 
 	void cal_meshball_vlocal_gamma(
@@ -125,7 +128,8 @@ class Gint
         bool** cal_flag, 
         double** psir_ylm,
         double** psir_vlbr3,
-        double* pvpR);
+        double* pvpR,
+        const UnitCell& ucell);
 
     //------------------------------------------------------
     // in gint_fvl.cpp 
@@ -142,7 +146,8 @@ class Gint
         const bool isforce,
         const bool isstress,
         ModuleBase::matrix* fvl_dphi,
-        ModuleBase::matrix* svl_dphi);
+        ModuleBase::matrix* svl_dphi,
+        const UnitCell& ucell);
 
     void gint_kernel_force_meta(
         const int na_grid,
@@ -156,7 +161,8 @@ class Gint
         const bool isforce,
         const bool isstress,
         ModuleBase::matrix* fvl_dphi,
-        ModuleBase::matrix* svl_dphi);
+        ModuleBase::matrix* svl_dphi,
+        const UnitCell& ucell);
 
     void cal_meshball_force(
         const int grid_index,
@@ -191,6 +197,7 @@ class Gint
         const double delta_r,
         int* vindex,
         const int LD_pool,
+        const UnitCell& ucell,
         Gint_inout *inout);
 
     void cal_meshball_rho(
@@ -207,7 +214,8 @@ class Gint
         const double delta_r,
         int* vindex,
         const int LD_pool,
-        Gint_inout *inout);
+        Gint_inout *inout,
+        const UnitCell& ucell);
 
     void cal_meshball_tau(
         const int na_grid,
@@ -221,7 +229,6 @@ class Gint
         double** dpsiz_dm,
         double* rho);
 
-    // dimension: [GlobalC::LNNR.nnrg] 
     // save the < phi_0i | V | phi_Rj > in sparse H matrix.
     bool pvpR_alloc_flag = false;
     double** pvpR_reduced = nullptr; //stores Hamiltonian in reduced format, for multi-l
diff --git a/source/module_hamilt_lcao/module_gint/gint_fvl.cpp b/source/module_hamilt_lcao/module_gint/gint_fvl.cpp
index 59359dc674..36de330336 100644
--- a/source/module_hamilt_lcao/module_gint/gint_fvl.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_fvl.cpp
@@ -14,7 +14,8 @@ void Gint::gint_kernel_force(
     const bool isforce,
     const bool isstress,
     ModuleBase::matrix* fvl_dphi,
-    ModuleBase::matrix* svl_dphi)
+    ModuleBase::matrix* svl_dphi,
+	const UnitCell& ucell)
 {
     //prepare block information
 	int* block_iw=nullptr;
@@ -30,7 +31,7 @@ void Gint::gint_kernel_force(
 	Gint_Tools::Array_Pool<double> dpsir_ylm_z(this->bxyz, LD_pool);
 
 	Gint_Tools::cal_dpsir_ylm(*this->gridt, this->bxyz, na_grid, grid_index, delta_r,	block_index, block_size, cal_flag,
-		psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D);
+		psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D,ucell);
 
     //calculating f_mu(r) = v(r)*psi_mu(r)*dv
 	const Gint_Tools::Array_Pool<double> psir_vlbr3 
@@ -117,7 +118,8 @@ void Gint::gint_kernel_force_meta(
     const bool isforce,
     const bool isstress,
     ModuleBase::matrix* fvl_dphi,
-    ModuleBase::matrix* svl_dphi)
+    ModuleBase::matrix* svl_dphi,
+	const UnitCell& ucell)
 {
     //prepare block information
 	int* block_iw=nullptr;
@@ -168,7 +170,7 @@ void Gint::gint_kernel_force_meta(
 
 	//psi and gradient of psi
 	Gint_Tools::cal_dpsir_ylm(*this->gridt, this->bxyz, na_grid, grid_index, delta_r,	block_index, block_size, cal_flag,
-		psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D);
+		psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D,ucell);
 	/*
 	Gint_Tools::cal_dpsir_ylm(na_grid, grid_index, delta_r,	block_index, block_size, cal_flag,
 		psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D, displ1);
diff --git a/source/module_hamilt_lcao/module_gint/gint_gamma.h b/source/module_hamilt_lcao/module_gint/gint_gamma.h
index d6f31da80e..7ed700828f 100644
--- a/source/module_hamilt_lcao/module_gint/gint_gamma.h
+++ b/source/module_hamilt_lcao/module_gint/gint_gamma.h
@@ -39,13 +39,13 @@ class Gint_Gamma : public Gint
     // in gint_gamma_env.cpp 
     //------------------------------------------------------
 	// calcualte the envelope function
-	void cal_env(const double* wfc, double* rho);
+	void cal_env(const double* wfc, double* rho,LCAO_Orbitals &orb,UnitCell &ucell);
 
     //------------------------------------------------------
     // in veff_lcao.cpp
     //------------------------------------------------------
     /// transfer this->hRGint to Veff::hR
-    void transfer_pvpR(hamilt::HContainer<double>* hR);
+    void transfer_pvpR(hamilt::HContainer<double>* hR,const UnitCell* ucell,const LCAO_Orbitals& orb);
 
 private:
 
diff --git a/source/module_hamilt_lcao/module_gint/gint_gamma_env.cpp b/source/module_hamilt_lcao/module_gint/gint_gamma_env.cpp
index b63b108411..29325a8e8b 100644
--- a/source/module_hamilt_lcao/module_gint/gint_gamma_env.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_gamma_env.cpp
@@ -5,14 +5,14 @@
 #include "module_base/ylm.h"
 #include "module_base/timer.h"
 
-void Gint_Gamma::cal_env(const double* wfc, double* rho)
+void Gint_Gamma::cal_env(const double* wfc, double* rho,LCAO_Orbitals &orb,UnitCell &ucell)
 {
     ModuleBase::TITLE("Grid_Integral","cal_env");
 
     // it's a uniform grid to save orbital values, so the delta_r is a constant.
-    const double delta_r = GlobalC::ORB.dr_uniform;
+    const double delta_r = orb.dr_uniform;
 	const int max_size = this->gridt->max_atom;
-	const int LD_pool = max_size*GlobalC::ucell.nwmax;
+	const int LD_pool = max_size*ucell.nwmax;
 
 	if(max_size!=0) 
 	{
@@ -40,7 +40,8 @@ void Gint_Gamma::cal_env(const double* wfc, double* rho)
                 size, grid_index, delta_r,
 				block_index, block_size, 
 				cal_flag,
-				psir_ylm.ptr_2D);
+				psir_ylm.ptr_2D,
+				ucell);
 
             int* vindex = Gint_Tools::get_vindex(this->bxyz, this->bx, this->by, this->bz,
                 this->nplane, this->gridt->start_ind[grid_index], ncyz);
@@ -49,11 +50,11 @@ void Gint_Gamma::cal_env(const double* wfc, double* rho)
 			{
 				const int mcell_index1 = this->gridt->bcell_start[grid_index] + ia1;
 				const int iat = this->gridt->which_atom[mcell_index1];
-				const int T1 = GlobalC::ucell.iat2it[iat];
-				Atom *atom1 = &GlobalC::ucell.atoms[T1];
-				const int I1 = GlobalC::ucell.iat2ia[iat];
+				const int T1 = ucell.iat2it[iat];
+				Atom *atom1 = &ucell.atoms[T1];
+				const int I1 = ucell.iat2ia[iat];
 				// get the start index of local orbitals.
-				const int start1 = GlobalC::ucell.itiaiw2iwt(T1, I1, 0);
+				const int start1 = ucell.itiaiw2iwt(T1, I1, 0);
 				for (int ib=0; ib<this->bxyz; ib++)
 				{
 					if(cal_flag[ib][ia1])
diff --git a/source/module_hamilt_lcao/module_gint/gint_gamma_vl.cpp b/source/module_hamilt_lcao/module_gint/gint_gamma_vl.cpp
index 163f127fc2..5177036c11 100644
--- a/source/module_hamilt_lcao/module_gint/gint_gamma_vl.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_gamma_vl.cpp
@@ -289,11 +289,11 @@ void Gint_Gamma::vl_grid_to_2D(
 #ifdef __MPI
 #include "module_hamilt_lcao/module_hcontainer/hcontainer_funcs.h"
 #endif
-void Gint_Gamma::transfer_pvpR(hamilt::HContainer<double>* hR)
+void Gint_Gamma::transfer_pvpR(hamilt::HContainer<double>* hR,const UnitCell* ucell,const LCAO_Orbitals& orb)
 {
     ModuleBase::TITLE("Gint_Gamma","transfer_pvpR");
     ModuleBase::timer::tick("Gint_Gamma","transfer_pvpR");
-
+    
     for(int iap=0;iap<this->hRGint->size_atom_pairs();iap++)
     {
         auto& ap = this->hRGint->get_atom_pair(iap);
diff --git a/source/module_hamilt_lcao/module_gint/gint_k.h b/source/module_hamilt_lcao/module_gint/gint_k.h
index 094dc7d591..396eda3423 100644
--- a/source/module_hamilt_lcao/module_gint/gint_k.h
+++ b/source/module_hamilt_lcao/module_gint/gint_k.h
@@ -59,14 +59,16 @@ class Gint_k : public Gint
     // <phi_0i | V | phi_0j>
     // V is (Vl + Vh + Vxc) if no Vna is used,
     // and is (Vna + delta_Vh + Vxc) if Vna is used.
-    void folding_vl_k(const int &ik, LCAO_Matrix* LM, Parallel_Orbitals *pv,const std::vector<ModuleBase::Vector3<double>>& kvec_d);
+    void folding_vl_k(const int &ik, LCAO_Matrix* LM, Parallel_Orbitals *pv,
+                    const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+                    const UnitCell& ucell,const LCAO_Orbitals& orb,Grid_Driver& gd);
 
     /**
      * @brief transfer pvpR to this->hRGint
      * then pass this->hRGint to Veff<OperatorLCAO>::hR
     */
-    void transfer_pvpR(hamilt::HContainer<double> *hR);
-    void transfer_pvpR(hamilt::HContainer<std::complex<double>> *hR);
+    void transfer_pvpR(hamilt::HContainer<double> *hR,const UnitCell* ucell_in,const LCAO_Orbitals& orb,Grid_Driver* gd);
+    void transfer_pvpR(hamilt::HContainer<std::complex<double>> *hR,const UnitCell* ucell_in,const LCAO_Orbitals& orb,Grid_Driver* gd);
 
     //------------------------------------------------------
     // in gint_k_env.cpp 
@@ -76,7 +78,8 @@ class Gint_k : public Gint
                    const std::complex<double>* psi_k,
                    double* rho,
                    const std::vector<ModuleBase::Vector3<double>>& kvec_c,
-                   const std::vector<ModuleBase::Vector3<double>>& kvec_d);
+                   const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+                   LCAO_Orbitals &orb,UnitCell &ucell);
 
     //------------------------------------------------------
     // in gint_k_sparse.cpp 
@@ -104,7 +107,9 @@ class Gint_k : public Gint
         const int &current_spin,
         const double &sparse_threshold,
         LCAO_Matrix *LM,
-        Parallel_Orbitals *pv);
+        Parallel_Orbitals *pv,
+        LCAO_Orbitals &orb,UnitCell &ucell,
+        Grid_Driver &gdriver);
 
     //------------------------------------------------------
     // in gint_k_sparse1.cpp 
@@ -131,7 +136,9 @@ class Gint_k : public Gint
         const int &current_spin,
         const double &sparse_threshold,
         LCAO_Matrix *LM,
-        Parallel_Orbitals *pv);
+        Parallel_Orbitals *pv,
+        LCAO_Orbitals &orb,UnitCell &ucell,
+        Grid_Driver &gdriver);
 
     private:
 
diff --git a/source/module_hamilt_lcao/module_gint/gint_k_env.cpp b/source/module_hamilt_lcao/module_gint/gint_k_env.cpp
index 33c22526bf..76499b2e5a 100644
--- a/source/module_hamilt_lcao/module_gint/gint_k_env.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_k_env.cpp
@@ -9,15 +9,16 @@ void Gint_k::cal_env_k(int ik,
                        const std::complex<double>* psi_k, 
                        double* rho, 
                        const std::vector<ModuleBase::Vector3<double>>& kvec_c, 
-                       const std::vector<ModuleBase::Vector3<double>>& kvec_d)
+                       const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+                       LCAO_Orbitals &orb,UnitCell &ucell)
 {
     ModuleBase::TITLE("Gint_k", "cal_env_k");
     ModuleBase::timer::tick("Gint_k", "cal_env_k");
 
     // it's a uniform grid to save orbital values, so the delta_r is a constant.
-    const double delta_r = GlobalC::ORB.dr_uniform;
+    const double delta_r = orb.dr_uniform;
 	const int max_size = this->gridt->max_atom;
-	const int LD_pool = max_size*GlobalC::ucell.nwmax;
+	const int LD_pool = max_size*ucell.nwmax;
 
     if (max_size != 0)
     {
@@ -49,7 +50,8 @@ void Gint_k::cal_env_k(int ik,
                 this->bxyz, size, grid_index, delta_r,
                 block_index, block_size,
                 cal_flag,
-                psir_ylm.ptr_2D);
+                psir_ylm.ptr_2D,
+                ucell);
 
             int* vindex = Gint_Tools::get_vindex(this->bxyz, this->bx, this->by, this->bz,
                 this->nplane, this->gridt->start_ind[grid_index], ncyz);
@@ -58,9 +60,9 @@ void Gint_k::cal_env_k(int ik,
             {
                 const int mcell_index1 = this->gridt->bcell_start[grid_index] + ia1;
                 const int iat = this->gridt->which_atom[mcell_index1];
-                const int T1 = GlobalC::ucell.iat2it[iat];
-                Atom* atom1 = &GlobalC::ucell.atoms[T1];
-                const int I1 = GlobalC::ucell.iat2ia[iat];
+                const int T1 = ucell.iat2it[iat];
+                Atom* atom1 = &ucell.atoms[T1];
+                const int I1 = ucell.iat2ia[iat];
 
                 //find R by which_unitcell and cal kphase
                 const int id_ucell = this->gridt->which_unitcell[mcell_index1];
@@ -72,12 +74,12 @@ void Gint_k::cal_env_k(int ik,
                 //std::cout << "kvec_c: " << kvec_c[ik].x << " " << kvec_c[ik].y << " " << kvec_c[ik].z << std::endl;
                 //std::cout << "R: " << R.x << " " << R.y << " " << R.z << std::endl;
                 const double arg = (kvec_d[ik] * R) * ModuleBase::TWO_PI;
-                const double arg1 = (kvec_c[ik] * (R.x * GlobalC::ucell.a1 + R.y * GlobalC::ucell.a2 + R.z * GlobalC::ucell.a3)) * ModuleBase::TWO_PI;
+                const double arg1 = (kvec_c[ik] * (R.x * ucell.a1 + R.y * ucell.a2 + R.z * ucell.a3)) * ModuleBase::TWO_PI;
                 //std::cout << "arg0=" << arg << ", arg1=" << arg1 << std::endl;
                 const std::complex<double> kphase = std::complex <double>(cos(arg), sin(arg));
 
                 // get the start index of local orbitals.
-                const int start1 = GlobalC::ucell.itiaiw2iwt(T1, I1, 0);
+                const int start1 = ucell.itiaiw2iwt(T1, I1, 0);
                 for (int ib = 0; ib < this->bxyz; ib++)
                 {
                     if (cal_flag[ib][ia1])
diff --git a/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp b/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp
index f39af59f16..3f62519bba 100644
--- a/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp
@@ -60,8 +60,12 @@ void Gint_k::destroy_pvpR(void)
 // H(k)=\sum{R} H(R)exp(ikR) 
 void Gint_k::folding_vl_k(const int &ik, 
                         LCAO_Matrix *LM, 
-                         Parallel_Orbitals *pv,
-                        const std::vector<ModuleBase::Vector3<double>>& kvec_d)
+                        Parallel_Orbitals *pv,
+                        const std::vector<ModuleBase::Vector3<double>>& kvec_d,
+                        const UnitCell& ucell,
+                        const LCAO_Orbitals& orb,
+                        Grid_Driver& gd
+                        )
 {
     ModuleBase::TITLE("Gint_k","folding_vl_k");
     ModuleBase::timer::tick("Gint_k","folding_vl_k");
@@ -131,25 +135,25 @@ void Gint_k::folding_vl_k(const int &ik,
 #ifdef _OPENMP
 #pragma omp for schedule(dynamic)
 #endif
-    for(int iat=0; iat<GlobalC::ucell.nat; ++iat)
+    for(int iat=0; iat<ucell.nat; ++iat)
     {
-        const int T1 = GlobalC::ucell.iat2it[iat];
-        const int I1 = GlobalC::ucell.iat2ia[iat];
+        const int T1 = ucell.iat2it[iat];
+        const int I1 = ucell.iat2ia[iat];
         {
             // atom in this grid piece.
             if(this->gridt->in_this_processor[iat])
             {
-                Atom* atom1 = &GlobalC::ucell.atoms[T1];
-                const int start1 = GlobalC::ucell.itiaiw2iwt(T1, I1, 0);
+                Atom* atom1 = &ucell.atoms[T1];
+                const int start1 = ucell.itiaiw2iwt(T1, I1, 0);
 
                 // get the start positions of elements.
                 const int DM_start = this->gridt->nlocstartg[iat];
 
                 // get the coordinates of adjacent atoms.
-                tau1 = GlobalC::ucell.atoms[T1].tau[I1];
-                //GlobalC::GridD.Find_atom(tau1);	
+                tau1 = ucell.atoms[T1].tau[I1];
+                //GridD.Find_atom(tau1);	
                 AdjacentAtomInfo adjs;
-                GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1, &adjs);	
+                gd.Find_atom(ucell, tau1, T1, I1, &adjs);	
                 // search for the adjacent atoms.
                 int nad = 0;
 
@@ -158,25 +162,25 @@ void Gint_k::folding_vl_k(const int &ik,
                     // get iat2
                     const int T2 = adjs.ntype[ad];
                     const int I2 = adjs.natom[ad];
-                    const int iat2 = GlobalC::ucell.itia2iat(T2, I2);
+                    const int iat2 = ucell.itia2iat(T2, I2);
 
 
                     // adjacent atom is also on the grid.
                     if(this->gridt->in_this_processor[iat2])
                     {
-                        Atom* atom2 = &GlobalC::ucell.atoms[T2];
+                        Atom* atom2 = &ucell.atoms[T2];
                         dtau = adjs.adjacent_tau[ad] - tau1;
-                        double distance = dtau.norm() * GlobalC::ucell.lat0;
-                        double rcut = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ORB.Phi[T2].getRcut();
+                        double distance = dtau.norm() * ucell.lat0;
+                        double rcut = orb.Phi[T1].getRcut() + orb.Phi[T2].getRcut();
 
                         // for the local part, only need to calculate <phi_i | phi_j> within range
                         // mohan note 2012-07-06
                         if(distance < rcut)
                         {
-                            const int start2 = GlobalC::ucell.itiaiw2iwt(T2, I2, 0); 
+                            const int start2 = ucell.itiaiw2iwt(T2, I2, 0); 
 
                             // calculate the distance between iat1 and iat2.
-                            // ModuleBase::Vector3<double> dR = GlobalC::GridD.getAdjacentTau(ad) - tau1;
+                            // ModuleBase::Vector3<double> dR = GridD.getAdjacentTau(ad) - tau1;
                             dR.x = adjs.box[ad].x;
                             dR.y = adjs.box[ad].y;
                             dR.z = adjs.box[ad].z;
@@ -422,7 +426,7 @@ void Gint_k::folding_vl_k(const int &ik,
 #include "module_hamilt_lcao/module_hcontainer/hcontainer_funcs.h"
 
 //transfer_pvpR, NSPIN = 1 or 2
-void Gint_k::transfer_pvpR(hamilt::HContainer<double> *hR)
+void Gint_k::transfer_pvpR(hamilt::HContainer<double> *hR,const UnitCell* ucell_in,const LCAO_Orbitals& orb,Grid_Driver* gd)
 {
     ModuleBase::TITLE("Gint_k","transfer_pvpR");
     ModuleBase::timer::tick("Gint_k","transfer_pvpR");
@@ -434,25 +438,25 @@ void Gint_k::transfer_pvpR(hamilt::HContainer<double> *hR)
     this->hRGint->set_zero();
 
     const int npol = GlobalV::NPOL;
-
-    for(int iat=0; iat<GlobalC::ucell.nat; ++iat)
+    const UnitCell& ucell = *ucell_in;
+    for(int iat=0; iat<ucell.nat; ++iat)
     {
-        const int T1 = GlobalC::ucell.iat2it[iat];
-        const int I1 = GlobalC::ucell.iat2ia[iat];
+        const int T1 = ucell.iat2it[iat];
+        const int I1 = ucell.iat2ia[iat];
         {
             // atom in this grid piece.
             if(this->gridt->in_this_processor[iat])
             {
-                Atom* atom1 = &GlobalC::ucell.atoms[T1];
+                Atom* atom1 = &ucell.atoms[T1];
 
                 // get the start positions of elements.
                 const int DM_start = this->gridt->nlocstartg[iat];
 
                 // get the coordinates of adjacent atoms.
-                auto& tau1 = GlobalC::ucell.atoms[T1].tau[I1];
-                //GlobalC::GridD.Find_atom(tau1);	
+                auto& tau1 = ucell.atoms[T1].tau[I1];
+                //gd.Find_atom(tau1);	
                 AdjacentAtomInfo adjs;
-                GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1, &adjs);	
+                gd->Find_atom(GlobalC::ucell, tau1, T1, I1, &adjs);	
                 // search for the adjacent atoms.
                 int nad = 0;
 
@@ -461,16 +465,16 @@ void Gint_k::transfer_pvpR(hamilt::HContainer<double> *hR)
                     // get iat2
                     const int T2 = adjs.ntype[ad];
                     const int I2 = adjs.natom[ad];
-                    const int iat2 = GlobalC::ucell.itia2iat(T2, I2);
+                    const int iat2 = ucell.itia2iat(T2, I2);
 
 
                     // adjacent atom is also on the grid.
                     if(this->gridt->in_this_processor[iat2])
                     {
-                        Atom* atom2 = &GlobalC::ucell.atoms[T2];
+                        Atom* atom2 = &ucell.atoms[T2];
                         auto dtau = adjs.adjacent_tau[ad] - tau1;
-                        double distance = dtau.norm() * GlobalC::ucell.lat0;
-                        double rcut = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ORB.Phi[T2].getRcut();
+                        double distance = dtau.norm() * ucell.lat0;
+                        double rcut = orb.Phi[T1].getRcut() + orb.Phi[T2].getRcut();
 
                         if(distance < rcut)
                         {
@@ -480,7 +484,7 @@ void Gint_k::transfer_pvpR(hamilt::HContainer<double> *hR)
                                 continue;
                             }
                             // calculate the distance between iat1 and iat2.
-                            // ModuleBase::Vector3<double> dR = GlobalC::GridD.getAdjacentTau(ad) - tau1;
+                            // ModuleBase::Vector3<double> dR = gd.getAdjacentTau(ad) - tau1;
                             auto& dR = adjs.box[ad];
                             //dR.x = adjs.box[ad].x;
                             //dR.y = adjs.box[ad].y;
@@ -548,11 +552,10 @@ void Gint_k::transfer_pvpR(hamilt::HContainer<double> *hR)
 }
 
 //transfer_pvpR, NSPIN = 4
-void Gint_k::transfer_pvpR(hamilt::HContainer<std::complex<double>> *hR)
+void Gint_k::transfer_pvpR(hamilt::HContainer<std::complex<double>> *hR,const UnitCell* ucell_in,const LCAO_Orbitals& orb,Grid_Driver* gd)
 {
     ModuleBase::TITLE("Gint_k","transfer_pvpR");
     ModuleBase::timer::tick("Gint_k","transfer_pvpR");
-
     if(!pvpR_alloc_flag || this->hRGintCd == nullptr)
     {
         ModuleBase::WARNING_QUIT("Gint_k::destroy_pvpR","pvpR hasnot been allocated yet!");
@@ -560,25 +563,26 @@ void Gint_k::transfer_pvpR(hamilt::HContainer<std::complex<double>> *hR)
     this->hRGintCd->set_zero();
 
     const int npol = GlobalV::NPOL;
-
-    for(int iat=0; iat<GlobalC::ucell.nat; ++iat)
+    const UnitCell& ucell = *ucell_in;
+    
+    for(int iat=0; iat<ucell.nat; ++iat)
     {
-        const int T1 = GlobalC::ucell.iat2it[iat];
-        const int I1 = GlobalC::ucell.iat2ia[iat];
+        const int T1 = ucell.iat2it[iat];
+        const int I1 = ucell.iat2ia[iat];
         {
             // atom in this grid piece.
             if(this->gridt->in_this_processor[iat])
             {
-                Atom* atom1 = &GlobalC::ucell.atoms[T1];
+                Atom* atom1 = &ucell.atoms[T1];
 
                 // get the start positions of elements.
                 const int DM_start = this->gridt->nlocstartg[iat];
 
                 // get the coordinates of adjacent atoms.
-                auto& tau1 = GlobalC::ucell.atoms[T1].tau[I1];
-                //GlobalC::GridD.Find_atom(tau1);	
+                auto& tau1 = ucell.atoms[T1].tau[I1];
+                //gd.Find_atom(tau1);	
                 AdjacentAtomInfo adjs;
-                GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1, &adjs);	
+                gd->Find_atom(GlobalC::ucell, tau1, T1, I1, &adjs);	
                 // search for the adjacent atoms.
                 int nad = 0;
 
@@ -587,15 +591,15 @@ void Gint_k::transfer_pvpR(hamilt::HContainer<std::complex<double>> *hR)
                     // get iat2
                     const int T2 = adjs.ntype[ad];
                     const int I2 = adjs.natom[ad];
-                    const int iat2 = GlobalC::ucell.itia2iat(T2, I2);
+                    const int iat2 = ucell.itia2iat(T2, I2);
 
                     // adjacent atom is also on the grid.
                     if(this->gridt->in_this_processor[iat2])
                     {
-                        Atom* atom2 = &GlobalC::ucell.atoms[T2];
+                        Atom* atom2 = &ucell.atoms[T2];
                         auto dtau = adjs.adjacent_tau[ad] - tau1;
-                        double distance = dtau.norm() * GlobalC::ucell.lat0;
-                        double rcut = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ORB.Phi[T2].getRcut();
+                        double distance = dtau.norm() * ucell.lat0;
+                        double rcut = orb.Phi[T1].getRcut() + orb.Phi[T2].getRcut();
 
                         if(distance < rcut)
                         {
@@ -605,7 +609,7 @@ void Gint_k::transfer_pvpR(hamilt::HContainer<std::complex<double>> *hR)
                                 continue;
                             }
                             // calculate the distance between iat1 and iat2.
-                            // ModuleBase::Vector3<double> dR = GlobalC::GridD.getAdjacentTau(ad) - tau1;
+                            // ModuleBase::Vector3<double> dR = gd.getAdjacentTau(ad) - tau1;
                             auto& dR = adjs.box[ad];
                             //dR.x = adjs.box[ad].x;
                             //dR.y = adjs.box[ad].y;
diff --git a/source/module_hamilt_lcao/module_gint/gint_k_sparse.cpp b/source/module_hamilt_lcao/module_gint/gint_k_sparse.cpp
index 5efec28e2d..55337badf9 100644
--- a/source/module_hamilt_lcao/module_gint/gint_k_sparse.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_k_sparse.cpp
@@ -283,7 +283,8 @@ void Gint_k::distribute_pvpR_soc_sparseMatrix(
 
 }
 
-void Gint_k::cal_vlocal_R_sparseMatrix(const int &current_spin, const double &sparse_threshold, LCAO_Matrix *LM,Parallel_Orbitals *pv)
+void Gint_k::cal_vlocal_R_sparseMatrix(const int &current_spin, const double &sparse_threshold, 
+                LCAO_Matrix *LM,Parallel_Orbitals *pv,LCAO_Orbitals &orb,UnitCell &ucell,Grid_Driver &gdriver)
 {
     ModuleBase::TITLE("Gint_k","cal_vlocal_R_sparseMatrix");
 
@@ -295,38 +296,38 @@ void Gint_k::cal_vlocal_R_sparseMatrix(const int &current_spin, const double &sp
     std::complex<double> temp_value_complex;
 
     ModuleBase::Vector3<double> tau1, dtau;
-    for(int T1=0; T1<GlobalC::ucell.ntype; ++T1)
+    for(int T1=0; T1<ucell.ntype; ++T1)
     {
-        for(int I1=0; I1<GlobalC::ucell.atoms[T1].na; ++I1)
+        for(int I1=0; I1<ucell.atoms[T1].na; ++I1)
         {
-            const int iat = GlobalC::ucell.itia2iat(T1,I1);
+            const int iat = ucell.itia2iat(T1,I1);
             if(this->gridt->in_this_processor[iat])
             {
-                Atom* atom1 = &GlobalC::ucell.atoms[T1];
-                const int start1 = GlobalC::ucell.itiaiw2iwt(T1, I1, 0);
+                Atom* atom1 = &ucell.atoms[T1];
+                const int start1 = ucell.itiaiw2iwt(T1, I1, 0);
 
                 const int DM_start = this->gridt->nlocstartg[iat];
-                tau1 = GlobalC::ucell.atoms[T1].tau[I1];
-                GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1);
+                tau1 = ucell.atoms[T1].tau[I1];
+                gdriver.Find_atom(ucell, tau1, T1, I1);
                 int nad2 = 0;
 
-                for(int ad = 0; ad < GlobalC::GridD.getAdjacentNum()+1; ad++)
+                for(int ad = 0; ad < gdriver.getAdjacentNum()+1; ad++)
                 {
-                    const int T2 = GlobalC::GridD.getType(ad);
-                    const int I2 = GlobalC::GridD.getNatom(ad);
-                    const int iat2 = GlobalC::ucell.itia2iat(T2, I2);
+                    const int T2 = gdriver.getType(ad);
+                    const int I2 = gdriver.getNatom(ad);
+                    const int iat2 = ucell.itia2iat(T2, I2);
 
                     if(this->gridt->in_this_processor[iat2])
                     {
-                        Atom* atom2 = &GlobalC::ucell.atoms[T2];
-                        dtau = GlobalC::GridD.getAdjacentTau(ad) - tau1;
-                        double distance = dtau.norm() * GlobalC::ucell.lat0;
-                        double rcut = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ORB.Phi[T2].getRcut();
+                        Atom* atom2 = &ucell.atoms[T2];
+                        dtau = gdriver.getAdjacentTau(ad) - tau1;
+                        double distance = dtau.norm() * ucell.lat0;
+                        double rcut = orb.Phi[T1].getRcut() + orb.Phi[T2].getRcut();
 
                         if(distance < rcut)
                         {
-                            const int start2 = GlobalC::ucell.itiaiw2iwt(T2, I2, 0);
-                            Abfs::Vector3_Order<int> dR(GlobalC::GridD.getBox(ad).x, GlobalC::GridD.getBox(ad).y, GlobalC::GridD.getBox(ad).z);
+                            const int start2 = ucell.itiaiw2iwt(T2, I2, 0);
+                            Abfs::Vector3_Order<int> dR(gdriver.getBox(ad).x, gdriver.getBox(ad).y, gdriver.getBox(ad).z);
                             int ixxx = DM_start + this->gridt->find_R2st[iat][nad2];
                             for(int iw=0; iw<atom1->nw * GlobalV::NPOL; iw++)
                             {
diff --git a/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp b/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp
index e5c7ac5729..637067d719 100644
--- a/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_k_sparse1.cpp
@@ -325,7 +325,8 @@ void Gint_k::distribute_pvdpR_soc_sparseMatrix(
 
 }
 
-void Gint_k::cal_dvlocal_R_sparseMatrix(const int &current_spin, const double &sparse_threshold, LCAO_Matrix *LM,Parallel_Orbitals *pv)
+void Gint_k::cal_dvlocal_R_sparseMatrix(const int &current_spin, const double &sparse_threshold, 
+            LCAO_Matrix *LM,Parallel_Orbitals *pv,LCAO_Orbitals &orb,UnitCell &ucell,Grid_Driver &gdriver)
 {
     ModuleBase::TITLE("Gint_k","cal_vlocal_R_sparseMatrix");
 
@@ -341,40 +342,40 @@ void Gint_k::cal_dvlocal_R_sparseMatrix(const int &current_spin, const double &s
     std::complex<double> temp_value_complex;
 
     ModuleBase::Vector3<double> tau1, dtau;
-    for(int T1=0; T1<GlobalC::ucell.ntype; ++T1)
+    for(int T1=0; T1<ucell.ntype; ++T1)
     {
-        for(int I1=0; I1<GlobalC::ucell.atoms[T1].na; ++I1)
+        for(int I1=0; I1<ucell.atoms[T1].na; ++I1)
         {
-            const int iat = GlobalC::ucell.itia2iat(T1,I1);
+            const int iat = ucell.itia2iat(T1,I1);
             if(this->gridt->in_this_processor[iat])
             {
-                Atom* atom1 = &GlobalC::ucell.atoms[T1];
-                const int start1 = GlobalC::ucell.itiaiw2iwt(T1, I1, 0);
+                Atom* atom1 = &ucell.atoms[T1];
+                const int start1 = ucell.itiaiw2iwt(T1, I1, 0);
 
                 const int DM_start = this->gridt->nlocstartg[iat];
-                tau1 = GlobalC::ucell.atoms[T1].tau[I1];
-                GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1);
+                tau1 = ucell.atoms[T1].tau[I1];
+                gdriver.Find_atom(ucell, tau1, T1, I1);
                 int nad2 = 0;
 
-                for(int ad = 0; ad < GlobalC::GridD.getAdjacentNum()+1; ad++)
+                for(int ad = 0; ad < gdriver.getAdjacentNum()+1; ad++)
                 {
-                    const int T2 = GlobalC::GridD.getType(ad);
-                    const int I2 = GlobalC::GridD.getNatom(ad);
-                    const int iat2 = GlobalC::ucell.itia2iat(T2, I2);
+                    const int T2 = gdriver.getType(ad);
+                    const int I2 = gdriver.getNatom(ad);
+                    const int iat2 = ucell.itia2iat(T2, I2);
 
                     if(this->gridt->in_this_processor[iat2])
                     {
-                        Atom* atom2 = &GlobalC::ucell.atoms[T2];
-                        dtau = GlobalC::GridD.getAdjacentTau(ad) - tau1;
-                        double distance = dtau.norm() * GlobalC::ucell.lat0;
-                        double rcut = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ORB.Phi[T2].getRcut();
+                        Atom* atom2 = &ucell.atoms[T2];
+                        dtau = gdriver.getAdjacentTau(ad) - tau1;
+                        double distance = dtau.norm() * ucell.lat0;
+                        double rcut = orb.Phi[T1].getRcut() + orb.Phi[T2].getRcut();
 
                         if(distance < rcut)
                         {
-                            const int start2 = GlobalC::ucell.itiaiw2iwt(T2, I2, 0);
-							Abfs::Vector3_Order<int> dR(GlobalC::GridD.getBox(ad).x, 
-									GlobalC::GridD.getBox(ad).y, 
-									GlobalC::GridD.getBox(ad).z);
+                            const int start2 = ucell.itiaiw2iwt(T2, I2, 0);
+							Abfs::Vector3_Order<int> dR(gdriver.getBox(ad).x, 
+									gdriver.getBox(ad).y, 
+									gdriver.getBox(ad).z);
 
                             int ixxx = DM_start + this->gridt->find_R2st[iat][nad2];
 
diff --git a/source/module_hamilt_lcao/module_gint/gint_rho.cpp b/source/module_hamilt_lcao/module_gint/gint_rho.cpp
index 4f9a58b64f..ff5bda6d7c 100644
--- a/source/module_hamilt_lcao/module_gint/gint_rho.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_rho.cpp
@@ -15,6 +15,7 @@ void Gint::gint_kernel_rho(
 	const double delta_r,
 	int* vindex,
 	const int LD_pool,
+	const UnitCell& ucell,
 	Gint_inout *inout)
 {
 	//prepare block information
@@ -28,7 +29,8 @@ void Gint::gint_kernel_rho(
 		this->bxyz, na_grid, grid_index, delta_r,
 		block_index, block_size, 
 		cal_flag,
-		psir_ylm.ptr_2D);
+		psir_ylm.ptr_2D,
+		ucell);
 
 	for(int is=0; is<GlobalV::NSPIN; ++is)
 	{
diff --git a/source/module_hamilt_lcao/module_gint/gint_tau.cpp b/source/module_hamilt_lcao/module_gint/gint_tau.cpp
index f6f82f8a1c..8cb8665d0b 100644
--- a/source/module_hamilt_lcao/module_gint/gint_tau.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_tau.cpp
@@ -8,14 +8,15 @@
 #include "module_base/blas_connector.h"
 #include "module_base/timer.h"
 #include "gint_tools.h"
-
+#include "module_hamilt_lcao/module_gint/grid_technique.h"
 void Gint::gint_kernel_tau(
 	const int na_grid,
 	const int grid_index,
 	const double delta_r,
 	int* vindex,
 	const int LD_pool,
-	Gint_inout *inout)
+	Gint_inout *inout,
+	const UnitCell &ucell)
 {
 	//prepare block information
 	int * block_iw, * block_index, * block_size;
@@ -35,8 +36,8 @@ void Gint::gint_kernel_tau(
 		psir_ylm.ptr_2D,
 		dpsir_ylm_x.ptr_2D,
 		dpsir_ylm_y.ptr_2D,
-		dpsir_ylm_z.ptr_2D
-	);
+		dpsir_ylm_z.ptr_2D,
+		ucell);
 
 	for(int is=0; is<GlobalV::NSPIN; ++is)
 	{
diff --git a/source/module_hamilt_lcao/module_gint/gint_tools.cpp b/source/module_hamilt_lcao/module_gint/gint_tools.cpp
index a8756ef402..739c2cc7c2 100644
--- a/source/module_hamilt_lcao/module_gint/gint_tools.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_tools.cpp
@@ -184,7 +184,8 @@ namespace Gint_Tools
 		const int*const block_index,  		// block_index[na_grid+1], count total number of atomis orbitals
 		const int*const block_size, 		// block_size[na_grid],	number of columns of a band
 		const bool*const*const cal_flag,
-		double*const*const psir_ylm) 	// cal_flag[bxyz][na_grid],	whether the atom-grid distance is larger than cutoff
+		double*const*const psir_ylm,
+		const UnitCell& ucell) 	// cal_flag[bxyz][na_grid],	whether the atom-grid distance is larger than cutoff
     {
 		ModuleBase::timer::tick("Gint_Tools", "cal_psir_ylm");
         std::vector<double> ylma;
@@ -298,7 +299,8 @@ namespace Gint_Tools
 		double*const*const psir_ylm,
 		double*const*const dpsir_ylm_x,
 		double*const*const dpsir_ylm_y,
-		double*const*const dpsir_ylm_z)
+		double*const*const dpsir_ylm_z,
+		const UnitCell& ucell)
 	{
 		ModuleBase::timer::tick("Gint_Tools", "cal_dpsir_ylm");
 		for (int id=0; id<na_grid; id++)
diff --git a/source/module_hamilt_lcao/module_gint/gint_tools.h b/source/module_hamilt_lcao/module_gint/gint_tools.h
index 12c93ae699..99c1028e44 100644
--- a/source/module_hamilt_lcao/module_gint/gint_tools.h
+++ b/source/module_hamilt_lcao/module_gint/gint_tools.h
@@ -203,7 +203,8 @@ namespace Gint_Tools
 		const int*const block_index,  // count total number of atomis orbitals
 		const int*const block_size, 
 		const bool*const*const cal_flag,
-		double*const*const psir_ylm); // whether the atom-grid distance is larger than cutoff
+		double*const*const psir_ylm,
+        const UnitCell& ucell); // whether the atom-grid distance is larger than cutoff
 
 	// psir_ylm and dpsir_ylm, both[pw.bxyz][LD_pool]
     void cal_dpsir_ylm(
@@ -218,7 +219,8 @@ namespace Gint_Tools
 		double*const*const psir_ylm,
 		double*const*const dpsir_ylm_x,
 		double*const*const dpsir_ylm_y,
-		double*const*const dpsir_ylm_z);
+		double*const*const dpsir_ylm_z,
+        const UnitCell& ucell);
 
 	// dpsir_ylm * (r-R), R is the atomic position
     void cal_dpsirr_ylm(
diff --git a/source/module_hamilt_lcao/module_gint/gint_vl.cpp b/source/module_hamilt_lcao/module_gint/gint_vl.cpp
index cf443ed75e..4e79e3c384 100644
--- a/source/module_hamilt_lcao/module_gint/gint_vl.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_vl.cpp
@@ -24,6 +24,7 @@ void Gint::gint_kernel_vlocal(
 	double* vldr3,
 	const int LD_pool,
 	double* pvpR_in,
+	const UnitCell& ucell,
 	hamilt::HContainer<double>* hR)
 {
 	//prepare block information
@@ -37,7 +38,8 @@ void Gint::gint_kernel_vlocal(
 		this->bxyz, na_grid, grid_index, delta_r,
 		block_index, block_size, 
 		cal_flag,
-		psir_ylm.ptr_2D);
+		psir_ylm.ptr_2D,
+		ucell);
 	
 	//calculating f_mu(r) = v(r)*psi_mu(r)*dv
 	const Gint_Tools::Array_Pool<double> psir_vlbr3 = Gint_Tools::get_psir_vlbr3(
@@ -56,7 +58,7 @@ void Gint::gint_kernel_vlocal(
     {
         this->cal_meshball_vlocal_k(
             na_grid, LD_pool, grid_index, block_size, block_index, block_iw, cal_flag,
-            psir_ylm.ptr_2D, psir_vlbr3.ptr_2D, pvpR_in);
+            psir_ylm.ptr_2D, psir_vlbr3.ptr_2D, pvpR_in,ucell);
     }
 
     //release memories
@@ -80,7 +82,8 @@ void Gint::gint_kernel_dvlocal(
 	const int LD_pool,
 	double* pvdpRx,
 	double* pvdpRy,
-	double* pvdpRz)
+	double* pvdpRz,
+	const UnitCell& ucell)
 {
 	//prepare block information
 	int * block_iw, * block_index, * block_size;
@@ -94,7 +97,7 @@ void Gint::gint_kernel_dvlocal(
 	Gint_Tools::Array_Pool<double> dpsir_ylm_z(this->bxyz, LD_pool);
 
 	Gint_Tools::cal_dpsir_ylm(*this->gridt, this->bxyz, na_grid, grid_index, delta_r,	block_index, block_size, cal_flag,
-		psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D);
+		psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D,ucell);
 
 	//calculating f_mu(r) = v(r)*psi_mu(r)*dv
 	const Gint_Tools::Array_Pool<double> psir_vlbr3 = Gint_Tools::get_psir_vlbr3(
@@ -104,13 +107,13 @@ void Gint::gint_kernel_dvlocal(
 	//and accumulates to the corresponding element in Hamiltonian
 	this->cal_meshball_vlocal_k(
 		na_grid, LD_pool, grid_index, block_size, block_index, block_iw, cal_flag,
-		psir_vlbr3.ptr_2D, dpsir_ylm_x.ptr_2D, pvdpRx);
+		psir_vlbr3.ptr_2D, dpsir_ylm_x.ptr_2D, pvdpRx,ucell);
 	this->cal_meshball_vlocal_k(
 		na_grid, LD_pool, grid_index, block_size, block_index, block_iw, cal_flag,
-		psir_vlbr3.ptr_2D, dpsir_ylm_y.ptr_2D, pvdpRy);
+		psir_vlbr3.ptr_2D, dpsir_ylm_y.ptr_2D, pvdpRy,ucell);
 	this->cal_meshball_vlocal_k(
 		na_grid, LD_pool, grid_index, block_size, block_index, block_iw, cal_flag,
-		psir_vlbr3.ptr_2D, dpsir_ylm_z.ptr_2D, pvdpRz);
+		psir_vlbr3.ptr_2D, dpsir_ylm_z.ptr_2D, pvdpRz,ucell);
 
     //release memories
 	delete[] block_iw;
@@ -133,6 +136,7 @@ void Gint::gint_kernel_vlocal_meta(
 	double* vkdr3,
 	const int LD_pool,
 	double* pvpR_in,
+	const UnitCell& ucell,
 	hamilt::HContainer<double>* hR)
 {
 	//prepare block information
@@ -153,7 +157,8 @@ void Gint::gint_kernel_vlocal_meta(
 		psir_ylm.ptr_2D,
 		dpsir_ylm_x.ptr_2D,
 		dpsir_ylm_y.ptr_2D,
-		dpsir_ylm_z.ptr_2D
+		dpsir_ylm_z.ptr_2D,
+		ucell
 	);
 	
 	//calculating f_mu(r) = v(r)*psi_mu(r)*dv
@@ -192,16 +197,16 @@ void Gint::gint_kernel_vlocal_meta(
     {
         this->cal_meshball_vlocal_k(
             na_grid, LD_pool, grid_index, block_size, block_index, block_iw, cal_flag,
-            psir_ylm.ptr_2D, psir_vlbr3.ptr_2D, pvpR_in);
+            psir_ylm.ptr_2D, psir_vlbr3.ptr_2D, pvpR_in,ucell);
 		this->cal_meshball_vlocal_k(
             na_grid, LD_pool, grid_index, block_size, block_index, block_iw, cal_flag,
-			dpsir_ylm_x.ptr_2D, dpsix_vlbr3.ptr_2D, pvpR_in);
+			dpsir_ylm_x.ptr_2D, dpsix_vlbr3.ptr_2D, pvpR_in,ucell);
 		this->cal_meshball_vlocal_k(
             na_grid, LD_pool, grid_index, block_size, block_index, block_iw, cal_flag,
-			dpsir_ylm_y.ptr_2D, dpsiy_vlbr3.ptr_2D, pvpR_in);
+			dpsir_ylm_y.ptr_2D, dpsiy_vlbr3.ptr_2D, pvpR_in,ucell);
 		this->cal_meshball_vlocal_k(
             na_grid, LD_pool, grid_index, block_size, block_index, block_iw, cal_flag,
-			dpsir_ylm_z.ptr_2D, dpsiz_vlbr3.ptr_2D, pvpR_in);
+			dpsir_ylm_z.ptr_2D, dpsiz_vlbr3.ptr_2D, pvpR_in,ucell);
     }
 
     //release memories
@@ -317,7 +322,8 @@ void Gint::cal_meshball_vlocal_k(
 	bool** cal_flag,  
 	double** psir_ylm,
 	double** psir_vlbr3,
-	double* pvpR)
+	double* pvpR,
+	const UnitCell& ucell)
 {
     auto find_offset = [&](const int id1, const int id2, const int iat1, const int iat2)->int
     {
@@ -352,14 +358,14 @@ void Gint::cal_meshball_vlocal_k(
 		int m=block_size[ia1];
 		const int mcell_index1 = this->gridt->bcell_start[grid_index] + ia1;
 		const int iat1= this->gridt->which_atom[mcell_index1];
-		const int T1 = GlobalC::ucell.iat2it[iat1];
+		const int T1 = ucell.iat2it[iat1];
 		const int id1 = this->gridt->which_unitcell[mcell_index1];
 		const int DM_start = this->gridt->nlocstartg[iat1];
 		for(int ia2=0; ia2<na_grid; ++ia2)
 		{
 			const int mcell_index2 = this->gridt->bcell_start[grid_index] + ia2;
 			const int iat2 = this->gridt->which_atom[mcell_index2];
-			const int T2 = GlobalC::ucell.iat2it[iat2];
+			const int T2 = ucell.iat2it[iat2];
 			if (iat1 <= iat2)
 			{
     			int cal_num=0;
@@ -373,7 +379,7 @@ void Gint::cal_meshball_vlocal_k(
     			
                 const int idx2=block_index[ia2];
         		int n=block_size[ia2];
-				//const int I2 = GlobalC::ucell.iat2ia[iat2];
+				//const int I2 = ucell.iat2ia[iat2];
 				const int mcell_index2 = this->gridt->bcell_start[grid_index] + ia2;
 				const int id2 = this->gridt->which_unitcell[mcell_index2];
 				int offset;
diff --git a/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp b/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp
index 8f23f842eb..8aa3219c15 100644
--- a/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp
+++ b/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp
@@ -4,7 +4,7 @@
 #include "module_base/timer.h"
 #include "module_basis/module_ao/ORB_read.h"
 #include "module_hamilt_pw/hamilt_pwdft/global.h"
-
+#include "module_cell/unitcell.h"
 Grid_BigCell::Grid_BigCell()
 {
 	this->flag_tib = false;
@@ -37,7 +37,7 @@ Grid_BigCell::~Grid_BigCell()
 	delete[] index_atom;
 }
 
-void Grid_BigCell::init_big_latvec(void)
+void Grid_BigCell::init_big_latvec(const UnitCell& ucell)
 {
 	ModuleBase::TITLE("Grid_BigCell","init_big_latvec");
 	// initialize the mesh cell vectors.
@@ -46,17 +46,17 @@ void Grid_BigCell::init_big_latvec(void)
 	assert(nbz>=0);
 
 	//size of each big room (same shape with unitcell)
-	this->bigcell_vec1[0]= GlobalC::ucell.a1.x / (double)nbx * GlobalC::ucell.lat0;
-	this->bigcell_vec1[1]= GlobalC::ucell.a1.y / (double)nbx * GlobalC::ucell.lat0;
-	this->bigcell_vec1[2]= GlobalC::ucell.a1.z / (double)nbx * GlobalC::ucell.lat0;
+	this->bigcell_vec1[0]= ucell.a1.x / (double)nbx * ucell.lat0;
+	this->bigcell_vec1[1]= ucell.a1.y / (double)nbx * ucell.lat0;
+	this->bigcell_vec1[2]= ucell.a1.z / (double)nbx * ucell.lat0;
 
-	this->bigcell_vec2[0]= GlobalC::ucell.a2.x / (double)nby * GlobalC::ucell.lat0;
-	this->bigcell_vec2[1]= GlobalC::ucell.a2.y / (double)nby * GlobalC::ucell.lat0;
-	this->bigcell_vec2[2]= GlobalC::ucell.a2.z / (double)nby * GlobalC::ucell.lat0;
+	this->bigcell_vec2[0]= ucell.a2.x / (double)nby * ucell.lat0;
+	this->bigcell_vec2[1]= ucell.a2.y / (double)nby * ucell.lat0;
+	this->bigcell_vec2[2]= ucell.a2.z / (double)nby * ucell.lat0;
 
-	this->bigcell_vec3[0]= GlobalC::ucell.a3.x / (double)nbz * GlobalC::ucell.lat0;
-	this->bigcell_vec3[1]= GlobalC::ucell.a3.y / (double)nbz * GlobalC::ucell.lat0;
-	this->bigcell_vec3[2]= GlobalC::ucell.a3.z / (double)nbz * GlobalC::ucell.lat0;
+	this->bigcell_vec3[0]= ucell.a3.x / (double)nbz * ucell.lat0;
+	this->bigcell_vec3[1]= ucell.a3.y / (double)nbz * ucell.lat0;
+	this->bigcell_vec3[2]= ucell.a3.z / (double)nbz * ucell.lat0;
 
 	this->bigcell_latvec0.e11 = this->bigcell_vec1[0];
 	this->bigcell_latvec0.e12 = this->bigcell_vec1[1];
@@ -105,15 +105,15 @@ void Grid_BigCell::init_big_latvec(void)
 }
 
 
-void Grid_BigCell::init_grid_expansion(void)
+void Grid_BigCell::init_grid_expansion(const UnitCell& ucell,const LCAO_Orbitals &orb)
 {
 	ModuleBase::TITLE("Grid_BigCell","init_grid_expansion");
 
 	// calculate the max cutoff radius among all orbitals.
 	// then we will use this parameter to generate grid expansion.
-	for(int T=0; T<GlobalC::ucell.ntype; T++)
+	for(int T=0; T<ucell.ntype; T++)
 	{
-		this->orbital_rmax = std::max( GlobalC::ORB.Phi[T].getRcut(), this->orbital_rmax);
+		this->orbital_rmax = std::max( orb.Phi[T].getRcut(), this->orbital_rmax);
 	}
 	if(GlobalV::test_gridt)ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"rmax of periodic grid (bohr)",orbital_rmax);
 
@@ -160,7 +160,7 @@ void Grid_BigCell::init_grid_expansion(void)
 }
 
 
-void Grid_BigCell::init_tau_in_bigcell(void)
+void Grid_BigCell::init_tau_in_bigcell(const UnitCell& ucell)
 {
 	ModuleBase::TITLE("Grid_BigCell","init_tau_in_bigcell");
 	
@@ -169,8 +169,8 @@ void Grid_BigCell::init_tau_in_bigcell(void)
 
 	if(!flag_tib)
 	{
-		this->tau_in_bigcell = new double* [GlobalC::ucell.nat];
-		for(int i=0; i<GlobalC::ucell.nat; i++)
+		this->tau_in_bigcell = new double* [ucell.nat];
+		for(int i=0; i<ucell.nat; i++)
 		{
 			this->tau_in_bigcell[i] = new double[3];
 		}
@@ -179,9 +179,9 @@ void Grid_BigCell::init_tau_in_bigcell(void)
 		// allocate space, these arrays record which meshcell
 		// the atom is in.
 		delete[] index_atom;
-		this->index_atom = new int[GlobalC::ucell.nat];
+		this->index_atom = new int[ucell.nat];
 
-		ModuleBase::Memory::record("tau_in_bigcell", sizeof(double) * GlobalC::ucell.nat*3);
+		ModuleBase::Memory::record("tau_in_bigcell", sizeof(double) * ucell.nat*3);
 	}
 	
 	// get the fraction number of (i,j,k)
@@ -189,22 +189,22 @@ void Grid_BigCell::init_tau_in_bigcell(void)
 	int iat=0;
 	int ii,jj,kk;
 	double delta[3];
-	for(int it=0; it<GlobalC::ucell.ntype; it++)
+	for(int it=0; it<ucell.ntype; it++)
 	{
-		for(int ia=0; ia<GlobalC::ucell.atoms[it].na; ia++)
+		for(int ia=0; ia<ucell.atoms[it].na; ia++)
 		{
 			// direct positions of atoms calculated from cartesian coordinates.
 			// not used because the factrion may be <0 (although very small, such as
 			// -1.0e-15) mohan note 2012-07-03
-			//fraction = ( GlobalC::ucell.atoms[it].tau[ia] * GlobalC::ucell.lat0 )* this->bigcell_GT;
+			//fraction = ( ucell.atoms[it].tau[ia] * ucell.lat0 )* this->bigcell_GT;
 
 			//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 			// mohan add 2012-07-03,
 			// this can make sure faction are always larger than 0.
 			//!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-			fraction.x = GlobalC::ucell.atoms[it].taud[ia].x / (1.0/(double)nbx);
-			fraction.y = GlobalC::ucell.atoms[it].taud[ia].y / (1.0/(double)nby);
-			fraction.z = GlobalC::ucell.atoms[it].taud[ia].z / (1.0/(double)nbz);
+			fraction.x = ucell.atoms[it].taud[ia].x / (1.0/(double)nbx);
+			fraction.y = ucell.atoms[it].taud[ia].y / (1.0/(double)nby);
+			fraction.z = ucell.atoms[it].taud[ia].z / (1.0/(double)nbz);
 
 			// never use the following, especially for k-algorithm,
 			// it may move the atom to a cell that it doesn't belong 
@@ -222,9 +222,9 @@ void Grid_BigCell::init_tau_in_bigcell(void)
 			if( fraction.x < 0 || fraction.y < 0 || fraction.z < 0)
 			{
 				std::cout << " Atom positions " << std::endl;
-				std::cout << GlobalC::ucell.atoms[it].tau[ia].x << " " ;
-				std::cout << GlobalC::ucell.atoms[it].tau[ia].y << " " ;
-				std::cout << GlobalC::ucell.atoms[it].tau[ia].z << " " ;
+				std::cout << ucell.atoms[it].tau[ia].x << " " ;
+				std::cout << ucell.atoms[it].tau[ia].y << " " ;
+				std::cout << ucell.atoms[it].tau[ia].z << " " ;
 				std::cout << " fraction " << std::endl;
 				std::cout << fraction.x << " ";
 				std::cout << fraction.y << " ";
diff --git a/source/module_hamilt_lcao/module_gint/grid_bigcell.h b/source/module_hamilt_lcao/module_gint/grid_bigcell.h
index 0cc94d000d..bae7272652 100644
--- a/source/module_hamilt_lcao/module_gint/grid_bigcell.h
+++ b/source/module_hamilt_lcao/module_gint/grid_bigcell.h
@@ -20,7 +20,7 @@ class Grid_BigCell: public Grid_MeshCell
 	protected:
 
 	//---------------------------------
-	void init_big_latvec(void);
+	void init_big_latvec(const UnitCell &ucell);
 
 	double bigcell_vec1[3];
 	double bigcell_vec2[3];
@@ -32,7 +32,7 @@ class Grid_BigCell: public Grid_MeshCell
 
 
 	//---------------------------------
-	void init_grid_expansion(void);
+	void init_grid_expansion(const UnitCell& ucell,const LCAO_Orbitals &orb);
 
 	// get the max radius of all orbitals.
 	// which will use to generate grid expansion,
@@ -59,7 +59,7 @@ class Grid_BigCell: public Grid_MeshCell
 
 
 	//---------------------------------
-	void init_tau_in_bigcell(void);
+	void init_tau_in_bigcell(const UnitCell& ucell);
 
 	//this flag will be false at first and turned to true after memory of tau_in_meshcell has been allocated.  
 	bool flag_tib;
diff --git a/source/module_hamilt_lcao/module_gint/grid_meshcell.cpp b/source/module_hamilt_lcao/module_gint/grid_meshcell.cpp
index 05eacedede..7f1914560a 100644
--- a/source/module_hamilt_lcao/module_gint/grid_meshcell.cpp
+++ b/source/module_hamilt_lcao/module_gint/grid_meshcell.cpp
@@ -73,7 +73,7 @@ void Grid_MeshCell::set_grid_dim(
 
 
 // (1)
-void Grid_MeshCell::init_latvec(void)
+void Grid_MeshCell::init_latvec(const UnitCell &ucell)
 {
 	ModuleBase::TITLE("Grid_MeshCell","init_latvec");
 	// initialize the mesh cell vectors.
@@ -82,17 +82,17 @@ void Grid_MeshCell::init_latvec(void)
 	assert(ncz>0);
 
 	//size of each room (same shape with unitcell)
-	this->meshcell_vec1[0]= GlobalC::ucell.a1.x / (double)ncx * GlobalC::ucell.lat0;
-	this->meshcell_vec1[1]= GlobalC::ucell.a1.y / (double)ncx * GlobalC::ucell.lat0;
-	this->meshcell_vec1[2]= GlobalC::ucell.a1.z / (double)ncx * GlobalC::ucell.lat0;
+	this->meshcell_vec1[0]= ucell.a1.x / (double)ncx * ucell.lat0;
+	this->meshcell_vec1[1]= ucell.a1.y / (double)ncx * ucell.lat0;
+	this->meshcell_vec1[2]= ucell.a1.z / (double)ncx * ucell.lat0;
 
-	this->meshcell_vec2[0]= GlobalC::ucell.a2.x / (double)ncy * GlobalC::ucell.lat0;
-	this->meshcell_vec2[1]= GlobalC::ucell.a2.y / (double)ncy * GlobalC::ucell.lat0;
-	this->meshcell_vec2[2]= GlobalC::ucell.a2.z / (double)ncy * GlobalC::ucell.lat0;
+	this->meshcell_vec2[0]= ucell.a2.x / (double)ncy * ucell.lat0;
+	this->meshcell_vec2[1]= ucell.a2.y / (double)ncy * ucell.lat0;
+	this->meshcell_vec2[2]= ucell.a2.z / (double)ncy * ucell.lat0;
 
-	this->meshcell_vec3[0]= GlobalC::ucell.a3.x / (double)ncz * GlobalC::ucell.lat0;
-	this->meshcell_vec3[1]= GlobalC::ucell.a3.y / (double)ncz * GlobalC::ucell.lat0;
-	this->meshcell_vec3[2]= GlobalC::ucell.a3.z / (double)ncz * GlobalC::ucell.lat0;
+	this->meshcell_vec3[0]= ucell.a3.x / (double)ncz * ucell.lat0;
+	this->meshcell_vec3[1]= ucell.a3.y / (double)ncz * ucell.lat0;
+	this->meshcell_vec3[2]= ucell.a3.z / (double)ncz * ucell.lat0;
 
 	this->meshcell_latvec0.e11 = this->meshcell_vec1[0];
 	this->meshcell_latvec0.e12 = this->meshcell_vec1[1];
diff --git a/source/module_hamilt_lcao/module_gint/grid_meshcell.h b/source/module_hamilt_lcao/module_gint/grid_meshcell.h
index 74762abf59..71253ed747 100644
--- a/source/module_hamilt_lcao/module_gint/grid_meshcell.h
+++ b/source/module_hamilt_lcao/module_gint/grid_meshcell.h
@@ -4,7 +4,7 @@
 #include "module_base/global_variable.h"
 #include "module_base/matrix3.h"
 #include "grid_meshk.h"
-
+#include "module_cell/unitcell.h"
 class Grid_MeshCell: public Grid_MeshK
 {
 	public:
@@ -39,7 +39,7 @@ class Grid_MeshCell: public Grid_MeshK
 			const int &nbzp_in);
 
 
-	void init_latvec(void);
+	void init_latvec(const UnitCell &ucell);
     void init_meshcell_pos(void);
 
     void cal_extended_cell(const int &dxe, const int &dye, const int &dze);
diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.cpp b/source/module_hamilt_lcao/module_gint/grid_technique.cpp
index 691650aa63..6286dcb831 100644
--- a/source/module_hamilt_lcao/module_gint/grid_technique.cpp
+++ b/source/module_hamilt_lcao/module_gint/grid_technique.cpp
@@ -5,6 +5,7 @@
 #include "module_base/timer.h"
 #include "module_hamilt_pw/hamilt_pwdft/global.h"
 #include "module_hsolver/kernels/cuda/helper_cuda.h"
+
 Grid_Technique::Grid_Technique()
 {
     this->nlocdimg = nullptr;
@@ -101,7 +102,7 @@ Grid_Technique::~Grid_Technique()
 #if ((defined __CUDA) /* || (defined __ROCM) */)
     if(GlobalV::device_flag == "gpu")
     {
-        free_gpu_gint_variables();
+        free_gpu_gint_variables(GlobalC::ucell);
     }
 #endif
 }
@@ -124,7 +125,9 @@ void Grid_Technique::set_pbc_grid(const int& ncx_in,
                                   const int& nbzp_in,
                                   const int& ny,
                                   const int& nplane,
-                                  const int& startz_current)
+                                  const int& startz_current,
+                                  const UnitCell& ucell,
+                                  const LCAO_Orbitals &orb)
 {
     ModuleBase::TITLE("Grid_Technique", "init");
     ModuleBase::timer::tick("Grid_Technique", "init");
@@ -150,32 +153,32 @@ void Grid_Technique::set_pbc_grid(const int& ncx_in,
                        nbzp_start_in,
                        nbzp_in);
 
-    this->init_latvec();
+    this->init_latvec(ucell);
 
-    this->init_big_latvec();
+    this->init_big_latvec(ucell);
 
     this->init_meshcell_pos();
 
     // (2) expand the grid
-    this->init_grid_expansion();
+    this->init_grid_expansion(ucell,orb);
 
     // (3) calculate the extended grid.
     this->cal_extended_cell(this->dxe, this->dye, this->dze);
 
-    this->init_tau_in_bigcell();
+    this->init_tau_in_bigcell(ucell);
 
     // init meshball
     this->delete_meshball_positions(); // LiuXh add 2018-12-14
 
     this->init_meshball();
 
-    this->init_atoms_on_grid(ny, nplane, startz_current);
+    this->init_atoms_on_grid(ny, nplane, startz_current,ucell);
 
-    this->cal_trace_lo();
+    this->cal_trace_lo(ucell);
 #if ((defined __CUDA) /* || (defined __ROCM) */)
     if(GlobalV::device_flag == "gpu")
     {
-        this->init_gpu_gint_variables();
+        this->init_gpu_gint_variables(ucell,LCAO_Orbitals &orb);
     }
 #endif
 
@@ -237,7 +240,8 @@ void Grid_Technique::get_startind(const int& ny,
 // mohan add 2021-04-06
 void Grid_Technique::init_atoms_on_grid(const int& ny,
                                         const int& nplane,
-                                        const int& startz_current)
+                                        const int& startz_current,
+                                        const UnitCell& ucell)
 {
     ModuleBase::TITLE("Grid_Technique", "init_atoms_on_grid");
 
@@ -269,8 +273,8 @@ void Grid_Technique::init_atoms_on_grid(const int& ny,
     // (3) Find the atoms using
     // when doing grid integration.
     delete[] in_this_processor;
-    this->in_this_processor = new bool[GlobalC::ucell.nat];
-    for (int i = 0; i < GlobalC::ucell.nat; i++)
+    this->in_this_processor = new bool[ucell.nat];
+    for (int i = 0; i < ucell.nat; i++)
     {
         in_this_processor[i] = false;
     }
@@ -288,9 +292,9 @@ void Grid_Technique::init_atoms_on_grid(const int& ny,
 	int normal=0;
 	this->total_atoms_on_grid = 0;
 	int nat_local = 0;
-	for(int it=0; it<GlobalC::ucell.ntype; it++)
+	for(int it=0; it<ucell.ntype; it++)
 	{
-		for(int ia=0; ia<GlobalC::ucell.atoms[it].na; ia++)
+		for(int ia=0; ia<ucell.atoms[it].na; ia++)
 		{
 			for(int im=0; im< this->meshball_ncells; im++)
 			{
@@ -351,7 +355,7 @@ void Grid_Technique::init_atoms_on_grid(const int& ny,
     if (nat_local > 0)
     {
         this->trace_iat.resize(nat_local);
-        for (int iat = GlobalC::ucell.nat - 1; iat >= 0; iat--)
+        for (int iat = ucell.nat - 1; iat >= 0; iat--)
         {
             if (this->in_this_processor[iat])
             {
@@ -363,7 +367,7 @@ void Grid_Technique::init_atoms_on_grid(const int& ny,
     // need how_many_atoms first.
     this->cal_grid_integration_index();
     // bcell_start is needed.
-    this->init_atoms_on_grid2(index2normal);
+    this->init_atoms_on_grid2(index2normal,ucell);
     delete[] index2normal;
     return;
 }
@@ -410,7 +414,7 @@ void Grid_Technique::check_bigcell(int*& ind_bigcell,
     return;
 }
 
-void Grid_Technique::init_atoms_on_grid2(const int* index2normal)
+void Grid_Technique::init_atoms_on_grid2(const int* index2normal,const UnitCell& ucell)
 {
     ModuleBase::TITLE("Grid_Techinique", "init_atoms_on_grid2");
 
@@ -455,9 +459,9 @@ void Grid_Technique::init_atoms_on_grid2(const int* index2normal)
 	int count = 0;
 	int iat = 0;
 	ModuleBase::GlobalFunc::ZEROS(this->how_many_atoms, nbxx);
-	for(int it=0; it<GlobalC::ucell.ntype; it++)
+	for(int it=0; it<ucell.ntype; it++)
 	{
-		for(int ia=0; ia<GlobalC::ucell.atoms[it].na; ia++)
+		for(int ia=0; ia<ucell.atoms[it].na; ia++)
 		{
 			// zero bigcell of meshball indicate ?
 			for(int im=0; im< this->meshball_ncells; im++)
@@ -555,12 +559,12 @@ void Grid_Technique::cal_grid_integration_index(void)
 }
 
 // set 'lgd' variable
-void Grid_Technique::cal_trace_lo(void)
+void Grid_Technique::cal_trace_lo(const UnitCell& ucell)
 {
     ModuleBase::TITLE("Grid_Technique", "cal_trace_lo");
     // save the atom information in trace_lo,
     // in fact the trace_lo dimension can be reduced
-    // to GlobalC::ucell.nat, but I think this is another way.
+    // to ucell.nat, but I think this is another way.
     delete[] trace_lo;
     this->trace_lo = new int[GlobalV::NLOCAL];
     for (int i = 0; i < GlobalV::NLOCAL; i++)
@@ -575,14 +579,14 @@ void Grid_Technique::cal_trace_lo(void)
     int iw_all = 0;
     int iw_local = 0;
 
-	for(int it=0; it<GlobalC::ucell.ntype; it++)
+	for(int it=0; it<ucell.ntype; it++)
 	{
-		for(int ia=0; ia<GlobalC::ucell.atoms[it].na; ia++)
+		for(int ia=0; ia<ucell.atoms[it].na; ia++)
 		{
 			if(this->in_this_processor[iat])
 			{
 				++lnat;
-				int nw0 = GlobalC::ucell.atoms[it].nw;
+				int nw0 = ucell.atoms[it].nw;
 				if(GlobalV::NSPIN==4)
 				{//added by zhengdy-soc, need to be double in soc
 					nw0 *= 2;
@@ -590,7 +594,7 @@ void Grid_Technique::cal_trace_lo(void)
 				}
 				else
 				{
-					this->lgd += GlobalC::ucell.atoms[it].nw;
+					this->lgd += ucell.atoms[it].nw;
 				}
 				
 				for(int iw=0; iw<nw0; iw++)
@@ -603,8 +607,8 @@ void Grid_Technique::cal_trace_lo(void)
 			else
 			{
 				// global index of atomic orbitals
-				iw_all += GlobalC::ucell.atoms[it].nw;
-				if(GlobalV::NSPIN==4) iw_all += GlobalC::ucell.atoms[it].nw;
+				iw_all += ucell.atoms[it].nw;
+				if(GlobalV::NSPIN==4) iw_all += ucell.atoms[it].nw;
 			}
 			++iat;
 		}
@@ -628,11 +632,11 @@ void Grid_Technique::cal_trace_lo(void)
 
 #if ((defined __CUDA) /* || (defined __ROCM) */)
 
-void Grid_Technique::init_gpu_gint_variables()
+void Grid_Technique::init_gpu_gint_variables(const UnitCell& ucell,LCAO_Orbitals &orb)
 {
     if (is_malloced)
     {
-        free_gpu_gint_variables();
+        free_gpu_gint_variables(ucell);
     }
     double ylmcoef[100];
     ModuleBase::GlobalFunc::ZEROS(ylmcoef, 100);
@@ -648,67 +652,67 @@ void Grid_Technique::init_gpu_gint_variables()
 
     const Numerical_Orbital_Lm* pointer;
     double max_cut = 0;
-    for (int i = 0; i < GlobalC::ucell.ntype; i++)
+    for (int i = 0; i < ucell.ntype; i++)
     {
-        if (GlobalC::ORB.Phi[i].getRcut() > max_cut)
+        if (orb.Phi[i].getRcut() > max_cut)
         {
-            max_cut = GlobalC::ORB.Phi[i].getRcut();
+            max_cut = orb.Phi[i].getRcut();
         }
     }
 
-    int atom_nw_now[GlobalC::ucell.ntype];
-    int ucell_atom_nwl_now[GlobalC::ucell.ntype];
-    for (int i = 0; i < GlobalC::ucell.ntype; i++)
+    int atom_nw_now[ucell.ntype];
+    int ucell_atom_nwl_now[ucell.ntype];
+    for (int i = 0; i < ucell.ntype; i++)
     {
-        atom_nw_now[i] = GlobalC::ucell.atoms[i].nw;
-        ucell_atom_nwl_now[i] = GlobalC::ucell.atoms[i].nwl;
+        atom_nw_now[i] = ucell.atoms[i].nw;
+        ucell_atom_nwl_now[i] = ucell.atoms[i].nwl;
     }
 
     nr_max = static_cast<int>(1000 * max_cut) + 10;
-    // double psi_u_now[GlobalC::ucell.ntype * GlobalC::ucell.nwmax * nr_max *
+    // double psi_u_now[ucell.ntype * ucell.nwmax * nr_max *
     // 2];
     double* psi_u_now
-        = (double*)malloc(GlobalC::ucell.ntype * GlobalC::ucell.nwmax * nr_max
+        = (double*)malloc(ucell.ntype * ucell.nwmax * nr_max
                           * 2 * sizeof(double));
     memset(psi_u_now,
            0,
-           GlobalC::ucell.ntype * GlobalC::ucell.nwmax * nr_max * 2
+           ucell.ntype * ucell.nwmax * nr_max * 2
                * sizeof(double));
     bool* atom_iw2_new_now = (bool*)malloc(
-        GlobalC::ucell.ntype * GlobalC::ucell.nwmax * sizeof(bool));
+        ucell.ntype * ucell.nwmax * sizeof(bool));
     memset(atom_iw2_new_now,
            0,
-           GlobalC::ucell.ntype * GlobalC::ucell.nwmax * sizeof(bool));
-    int* atom_iw2_ylm_now = (int*)malloc(GlobalC::ucell.ntype
-                                         * GlobalC::ucell.nwmax * sizeof(int));
+           ucell.ntype * ucell.nwmax * sizeof(bool));
+    int* atom_iw2_ylm_now = (int*)malloc(ucell.ntype
+                                         * ucell.nwmax * sizeof(int));
     memset(atom_iw2_ylm_now,
            0,
-           GlobalC::ucell.ntype * GlobalC::ucell.nwmax * sizeof(int));
-    int* atom_iw2_l_now = (int*)malloc(GlobalC::ucell.ntype
-                                       * GlobalC::ucell.nwmax * sizeof(int));
+           ucell.ntype * ucell.nwmax * sizeof(int));
+    int* atom_iw2_l_now = (int*)malloc(ucell.ntype
+                                       * ucell.nwmax * sizeof(int));
     memset(atom_iw2_l_now,
            0,
-           GlobalC::ucell.ntype * GlobalC::ucell.nwmax * sizeof(int));
+           ucell.ntype * ucell.nwmax * sizeof(int));
 
     Atom* atomx;
-    for (int i = 0; i < GlobalC::ucell.ntype; i++)
+    for (int i = 0; i < ucell.ntype; i++)
     {
-        atomx = &GlobalC::ucell.atoms[i];
-        for (int j = 0; j < GlobalC::ucell.nwmax; j++)
+        atomx = &ucell.atoms[i];
+        for (int j = 0; j < ucell.nwmax; j++)
         {
             if (j < atomx->nw)
             {
-                atom_iw2_new_now[i * GlobalC::ucell.nwmax + j]
+                atom_iw2_new_now[i * ucell.nwmax + j]
                     = atomx->iw2_new[j];
-                atom_iw2_ylm_now[i * GlobalC::ucell.nwmax + j]
+                atom_iw2_ylm_now[i * ucell.nwmax + j]
                     = atomx->iw2_ylm[j];
-                atom_iw2_l_now[i * GlobalC::ucell.nwmax + j] = atomx->iw2l[j];
-                pointer = &GlobalC::ORB.Phi[i].PhiLN(atomx->iw2l[j],
+                atom_iw2_l_now[i * ucell.nwmax + j] = atomx->iw2l[j];
+                pointer = &orb.Phi[i].PhiLN(atomx->iw2l[j],
                                                      atomx->iw2n[j]);
                 for (int k = 0; k < nr_max; k++)
                 {
                     int index_temp
-                        = (i * GlobalC::ucell.nwmax * nr_max + j * nr_max + k)
+                        = (i * ucell.nwmax * nr_max + j * nr_max + k)
                           * 2;
                     if (k < pointer->nr_uniform)
                     {
@@ -721,55 +725,55 @@ void Grid_Technique::init_gpu_gint_variables()
     }
 
     checkCudaErrors(
-        cudaMalloc((void**)&atom_nw_g, GlobalC::ucell.ntype * sizeof(int)));
+        cudaMalloc((void**)&atom_nw_g, ucell.ntype * sizeof(int)));
     checkCudaErrors(cudaMemcpy(atom_nw_g,
                                atom_nw_now,
-                               GlobalC::ucell.ntype * sizeof(int),
+                               ucell.ntype * sizeof(int),
                                cudaMemcpyHostToDevice));
 
     checkCudaErrors(cudaMalloc((void**)&atom_nwl_g,
-                               GlobalC::ucell.ntype * sizeof(int)));
+                               ucell.ntype * sizeof(int)));
     checkCudaErrors(cudaMemcpy(atom_nwl_g,
                                ucell_atom_nwl_now,
-                               GlobalC::ucell.ntype * sizeof(int),
+                               ucell.ntype * sizeof(int),
                                cudaMemcpyHostToDevice));
 
     checkCudaErrors(cudaMalloc((void**)&psi_u_g,
-                               GlobalC::ucell.ntype * GlobalC::ucell.nwmax
+                               ucell.ntype * ucell.nwmax
                                    * nr_max * sizeof(double) * 2));
     checkCudaErrors(cudaMemcpy(psi_u_g,
                                psi_u_now,
-                               GlobalC::ucell.ntype * GlobalC::ucell.nwmax
+                               ucell.ntype * ucell.nwmax
                                    * nr_max * sizeof(double) * 2,
                                cudaMemcpyHostToDevice));
 
     checkCudaErrors(
         cudaMalloc((void**)&atom_new_g,
-                   GlobalC::ucell.ntype * GlobalC::ucell.nwmax * sizeof(bool)));
+                   ucell.ntype * ucell.nwmax * sizeof(bool)));
     checkCudaErrors(
         cudaMalloc((void**)&atom_ylm_g,
-                   GlobalC::ucell.ntype * GlobalC::ucell.nwmax * sizeof(int)));
+                   ucell.ntype * ucell.nwmax * sizeof(int)));
     checkCudaErrors(
         cudaMalloc((void**)&atom_l_g,
-                   GlobalC::ucell.ntype * GlobalC::ucell.nwmax * sizeof(int)));
+                   ucell.ntype * ucell.nwmax * sizeof(int)));
 
     checkCudaErrors(
         cudaMemcpy(atom_new_g,
                    atom_iw2_new_now,
-                   GlobalC::ucell.ntype * GlobalC::ucell.nwmax * sizeof(bool),
+                   ucell.ntype * ucell.nwmax * sizeof(bool),
                    cudaMemcpyHostToDevice));
     checkCudaErrors(
         cudaMemcpy(atom_ylm_g,
                    atom_iw2_ylm_now,
-                   GlobalC::ucell.ntype * GlobalC::ucell.nwmax * sizeof(int),
+                   ucell.ntype * ucell.nwmax * sizeof(int),
                    cudaMemcpyHostToDevice));
     checkCudaErrors(
         cudaMemcpy(atom_l_g,
                    atom_iw2_l_now,
-                   GlobalC::ucell.ntype * GlobalC::ucell.nwmax * sizeof(int),
+                   ucell.ntype * ucell.nwmax * sizeof(int),
                    cudaMemcpyHostToDevice));
 
-    const int max_atom_pair_number = GlobalC::ucell.nat * GlobalC::ucell.nat;
+    const int max_atom_pair_number = ucell.nat * ucell.nat;
     checkCudaErrors(cudaMallocHost(
         (void**)&grid_vlocal_g,
         max_atom_pair_number
@@ -781,7 +785,7 @@ void Grid_Technique::init_gpu_gint_variables()
         grid_vlocal_g[iat] = nullptr;
     }
 
-    psir_size = nbzp * max_atom * bxyz * GlobalC::ucell.nwmax;
+    psir_size = nbzp * max_atom * bxyz * ucell.nwmax;
 
     checkCudaErrors(cudaMalloc((void**)&left_global_g,
                                psir_size * nstreams * sizeof(double)));
@@ -941,7 +945,7 @@ void Grid_Technique::init_gpu_gint_variables()
     free(atom_iw2_ylm_now);
 }
 
-void Grid_Technique::free_gpu_gint_variables()
+void Grid_Technique::free_gpu_gint_variables(const UnitCell& ucell)
 {
     if (!is_malloced)
     {
@@ -1018,7 +1022,7 @@ void Grid_Technique::free_gpu_gint_variables()
     checkCudaErrors(cudaFree(dot_product_g));
     checkCudaErrors(cudaFree(rho_g));
 
-    const int max_atom_pair_number = GlobalC::ucell.nat * GlobalC::ucell.nat;
+    const int max_atom_pair_number = ucell.nat * ucell.nat;
     for (int i = 0; i < max_atom_pair_number; i++)
     {
         if (grid_vlocal_g[i] != nullptr)
diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.h b/source/module_hamilt_lcao/module_gint/grid_technique.h
index a32eb7feb2..2b95cd8a5f 100644
--- a/source/module_hamilt_lcao/module_gint/grid_technique.h
+++ b/source/module_hamilt_lcao/module_gint/grid_technique.h
@@ -4,6 +4,9 @@
 #include "grid_index.h"
 #include "grid_meshball.h"
 #include "module_basis/module_ao/parallel_orbitals.h"
+#include  "module_cell/unitcell.h"
+#include "module_basis/module_ao/ORB_read.h"
+#include "module_cell/module_neighbor/sltk_grid_driver.h"
 #if ((defined __CUDA) /* || (defined __ROCM) */)
 #include <cuda_runtime.h>
 
@@ -93,7 +96,9 @@ class Grid_Technique : public Grid_MeshBall
                       const int& nbzp_in,
                       const int& ny,
                       const int& nplane,
-                      const int& startz_current);
+                      const int& startz_current,
+                      const UnitCell& ucell,
+                      const LCAO_Orbitals &orb);
 
     /// number of elements(basis-pairs) in this processon
     /// on all adjacent atoms-pairs(Grid division)
@@ -123,10 +128,12 @@ class Grid_Technique : public Grid_MeshBall
     // atoms on meshball
     void init_atoms_on_grid(const int& ny,
                             const int& nplane,
-                            const int& startz_current);
-    void init_atoms_on_grid2(const int* index2normal);
+                            const int& startz_current,
+                            const UnitCell& ucell);
+    void init_atoms_on_grid2(const int* index2normal,
+                            const UnitCell& ucell);
     void cal_grid_integration_index(void);
-    void cal_trace_lo(void);
+    void cal_trace_lo(const UnitCell& ucell);
     void check_bigcell(int*& ind_bigcell, bool*& bigcell_on_processor);
     void get_startind(const int& ny,
                       const int& nplane,
@@ -218,8 +225,8 @@ class Grid_Technique : public Grid_MeshBall
     matrix_multiple_func_type fastest_matrix_mul;
 
   private:
-    void init_gpu_gint_variables();
-    void free_gpu_gint_variables();
+    void init_gpu_gint_variables(const UnitCell& ucell,LCAO_Orbitals &orb);
+    void free_gpu_gint_variables(const UnitCell& ucell);
 
 #endif
 };
diff --git a/source/module_io/istate_envelope.cpp b/source/module_io/istate_envelope.cpp
index 1e369a36e3..8c95b0b3e6 100644
--- a/source/module_io/istate_envelope.cpp
+++ b/source/module_io/istate_envelope.cpp
@@ -109,7 +109,7 @@ void IState_Envelope::begin(const psi::Psi<double>* psid,
                         wfc_gamma_grid[is][i][j] = psid[0](i, j);
                 }
 #endif
-                gg.cal_env(wfc_gamma_grid[is][ib], pes->charge->rho[is]);
+                gg.cal_env(wfc_gamma_grid[is][ib], pes->charge->rho[is],GlobalC::ORB,GlobalC::ucell);
 
 
                 pes->charge->save_rho_before_sum_band(); //xiaohui add 2014-12-09
@@ -260,7 +260,7 @@ void IState_Envelope::begin(const psi::Psi<std::complex<double>>* psi,
                 }
 #endif
                 //deal with NSPIN=4
-                gk.cal_env_k(ik, lowf.wfc_k_grid[ik][ib], pes->charge->rho[ispin], kv.kvec_c, kv.kvec_d);
+                gk.cal_env_k(ik, lowf.wfc_k_grid[ik][ib], pes->charge->rho[ispin], kv.kvec_c, kv.kvec_d,GlobalC::ORB,GlobalC::ucell);
 
                 std::stringstream ss;
                 ss << global_out_dir << "BAND" << ib + 1 << "_k_" << ik / nspin0 + 1 << "_s_" << ispin + 1 << "_ENV.cube";

From e50ee792d66ee65359e122e878835971b199b3f0 Mon Sep 17 00:00:00 2001
From: A-006 <3158793232@qq.com>
Date: Mon, 13 May 2024 16:29:56 +0800
Subject: [PATCH 04/23] fix LCAO_Orbitals in module gint

---
 source/module_hamilt_lcao/module_gint/grid_bigcell.cpp | 2 +-
 source/module_hamilt_lcao/module_gint/grid_bigcell.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp b/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp
index 8aa3219c15..9f470030b6 100644
--- a/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp
+++ b/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp
@@ -105,7 +105,7 @@ void Grid_BigCell::init_big_latvec(const UnitCell& ucell)
 }
 
 
-void Grid_BigCell::init_grid_expansion(const UnitCell& ucell,const LCAO_Orbitals &orb)
+void Grid_BigCell::init_grid_expansion(const UnitCell& ucell,const LCAO_Orbitals& orb)
 {
 	ModuleBase::TITLE("Grid_BigCell","init_grid_expansion");
 
diff --git a/source/module_hamilt_lcao/module_gint/grid_bigcell.h b/source/module_hamilt_lcao/module_gint/grid_bigcell.h
index bae7272652..dbb823bfe7 100644
--- a/source/module_hamilt_lcao/module_gint/grid_bigcell.h
+++ b/source/module_hamilt_lcao/module_gint/grid_bigcell.h
@@ -32,7 +32,7 @@ class Grid_BigCell: public Grid_MeshCell
 
 
 	//---------------------------------
-	void init_grid_expansion(const UnitCell& ucell,const LCAO_Orbitals &orb);
+	void init_grid_expansion(const UnitCell& ucell,const LCAO_Orbitals& orb);
 
 	// get the max radius of all orbitals.
 	// which will use to generate grid expansion,

From 83ebe2ef2196dc2d07274c380af23e63c8368e19 Mon Sep 17 00:00:00 2001
From: A-006 <3158793232@qq.com>
Date: Mon, 13 May 2024 17:10:31 +0800
Subject: [PATCH 05/23] fix error in compile without abacus

---
 source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp    | 4 ++--
 source/module_hamilt_lcao/module_gint/grid_bigcell.cpp   | 5 +++--
 source/module_hamilt_lcao/module_gint/grid_bigcell.h     | 2 +-
 source/module_hamilt_lcao/module_gint/grid_technique.cpp | 7 ++++++-
 4 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp b/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp
index 3f62519bba..14f2aa3288 100644
--- a/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_k_pvpr.cpp
@@ -456,7 +456,7 @@ void Gint_k::transfer_pvpR(hamilt::HContainer<double> *hR,const UnitCell* ucell_
                 auto& tau1 = ucell.atoms[T1].tau[I1];
                 //gd.Find_atom(tau1);	
                 AdjacentAtomInfo adjs;
-                gd->Find_atom(GlobalC::ucell, tau1, T1, I1, &adjs);	
+                gd->Find_atom(ucell, tau1, T1, I1, &adjs);	
                 // search for the adjacent atoms.
                 int nad = 0;
 
@@ -582,7 +582,7 @@ void Gint_k::transfer_pvpR(hamilt::HContainer<std::complex<double>> *hR,const Un
                 auto& tau1 = ucell.atoms[T1].tau[I1];
                 //gd.Find_atom(tau1);	
                 AdjacentAtomInfo adjs;
-                gd->Find_atom(GlobalC::ucell, tau1, T1, I1, &adjs);	
+                gd->Find_atom(ucell, tau1, T1, I1, &adjs);	
                 // search for the adjacent atoms.
                 int nad = 0;
 
diff --git a/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp b/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp
index 9f470030b6..1257f731df 100644
--- a/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp
+++ b/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp
@@ -105,15 +105,16 @@ void Grid_BigCell::init_big_latvec(const UnitCell& ucell)
 }
 
 
-void Grid_BigCell::init_grid_expansion(const UnitCell& ucell,const LCAO_Orbitals& orb)
+void Grid_BigCell::init_grid_expansion(const UnitCell& ucell,double* rcut)
 {
 	ModuleBase::TITLE("Grid_BigCell","init_grid_expansion");
 
 	// calculate the max cutoff radius among all orbitals.
 	// then we will use this parameter to generate grid expansion.
+
 	for(int T=0; T<ucell.ntype; T++)
 	{
-		this->orbital_rmax = std::max( orb.Phi[T].getRcut(), this->orbital_rmax);
+		this->orbital_rmax = std::max( rcut[T], this->orbital_rmax);
 	}
 	if(GlobalV::test_gridt)ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"rmax of periodic grid (bohr)",orbital_rmax);
 
diff --git a/source/module_hamilt_lcao/module_gint/grid_bigcell.h b/source/module_hamilt_lcao/module_gint/grid_bigcell.h
index dbb823bfe7..b6efd5669a 100644
--- a/source/module_hamilt_lcao/module_gint/grid_bigcell.h
+++ b/source/module_hamilt_lcao/module_gint/grid_bigcell.h
@@ -32,7 +32,7 @@ class Grid_BigCell: public Grid_MeshCell
 
 
 	//---------------------------------
-	void init_grid_expansion(const UnitCell& ucell,const LCAO_Orbitals& orb);
+	void init_grid_expansion(const UnitCell& ucell,double* rcut);
 
 	// get the max radius of all orbitals.
 	// which will use to generate grid expansion,
diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.cpp b/source/module_hamilt_lcao/module_gint/grid_technique.cpp
index 6286dcb831..005daa41aa 100644
--- a/source/module_hamilt_lcao/module_gint/grid_technique.cpp
+++ b/source/module_hamilt_lcao/module_gint/grid_technique.cpp
@@ -160,7 +160,12 @@ void Grid_Technique::set_pbc_grid(const int& ncx_in,
     this->init_meshcell_pos();
 
     // (2) expand the grid
-    this->init_grid_expansion(ucell,orb);
+    double* rcut=new double[ucell.ntype];
+    for(int T=0; T<ucell.ntype; T++)
+	{
+		rcut[T]=orb.Phi[T].getRcut();
+	}
+    this->init_grid_expansion(ucell,rcut);
 
     // (3) calculate the extended grid.
     this->cal_extended_cell(this->dxe, this->dye, this->dze);

From ff3e2629255658ffb34b82e5e65302fab7435e97 Mon Sep 17 00:00:00 2001
From: A-006 <3158793232@qq.com>
Date: Mon, 13 May 2024 19:32:43 +0800
Subject: [PATCH 06/23] fix error in init_gpu_gint_variables

---
 source/module_hamilt_lcao/module_gint/grid_technique.cpp | 4 ++--
 source/module_hamilt_lcao/module_gint/grid_technique.h   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.cpp b/source/module_hamilt_lcao/module_gint/grid_technique.cpp
index 005daa41aa..e4a39fc23b 100644
--- a/source/module_hamilt_lcao/module_gint/grid_technique.cpp
+++ b/source/module_hamilt_lcao/module_gint/grid_technique.cpp
@@ -183,7 +183,7 @@ void Grid_Technique::set_pbc_grid(const int& ncx_in,
 #if ((defined __CUDA) /* || (defined __ROCM) */)
     if(GlobalV::device_flag == "gpu")
     {
-        this->init_gpu_gint_variables(ucell,LCAO_Orbitals &orb);
+        this->init_gpu_gint_variables(ucell,orb);
     }
 #endif
 
@@ -637,7 +637,7 @@ void Grid_Technique::cal_trace_lo(const UnitCell& ucell)
 
 #if ((defined __CUDA) /* || (defined __ROCM) */)
 
-void Grid_Technique::init_gpu_gint_variables(const UnitCell& ucell,LCAO_Orbitals &orb)
+void Grid_Technique::init_gpu_gint_variables(const UnitCell& ucell,const LCAO_Orbitals &orb)
 {
     if (is_malloced)
     {
diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.h b/source/module_hamilt_lcao/module_gint/grid_technique.h
index 2b95cd8a5f..1824d2ae9e 100644
--- a/source/module_hamilt_lcao/module_gint/grid_technique.h
+++ b/source/module_hamilt_lcao/module_gint/grid_technique.h
@@ -225,7 +225,7 @@ class Grid_Technique : public Grid_MeshBall
     matrix_multiple_func_type fastest_matrix_mul;
 
   private:
-    void init_gpu_gint_variables(const UnitCell& ucell,LCAO_Orbitals &orb);
+    void init_gpu_gint_variables(const UnitCell& ucell,const LCAO_Orbitals &orb);
     void free_gpu_gint_variables(const UnitCell& ucell);
 
 #endif

From 7d06aa299525ed844dc72f7cdb4940b154f4f90a Mon Sep 17 00:00:00 2001
From: A-006 <3158793232@qq.com>
Date: Mon, 13 May 2024 20:03:26 +0800
Subject: [PATCH 07/23] remove GlobalC in grid_technique and grid_bigcell

---
 .../hamilt_lcaodft/grid_init.cpp              | 11 ++-
 .../module_hamilt_lcao/module_gint/gint.cpp   | 73 ++++++++++---------
 source/module_hamilt_lcao/module_gint/gint.h  |  7 +-
 .../module_gint/grid_bigcell.cpp              |  3 +-
 .../module_gint/grid_bigcell.h                |  2 +-
 .../module_gint/grid_technique.cpp            | 10 +--
 .../module_gint/grid_technique.h              |  2 +-
 7 files changed, 61 insertions(+), 47 deletions(-)

diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/grid_init.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/grid_init.cpp
index a385c14d03..69c51fef43 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/grid_init.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/grid_init.cpp
@@ -17,7 +17,8 @@ void grid_prepare(
 {
     ModuleBase::TITLE("LCAO_domain","grid_prepare");
     ModuleBase::timer::tick("LCAO_domain","grid_prepare");
-
+	const UnitCell* ucell = &GlobalC::ucell;
+	const LCAO_Orbitals* orb = &GlobalC::ORB;
     if(GlobalV::GAMMA_ONLY_LOCAL)
     {
 		gint_gamma.prep_grid(
@@ -34,7 +35,9 @@ void grid_prepare(
 				bigpw.nbxx,
 				rhopw.ny, 
 				rhopw.nplane, 
-				rhopw.startz_current);
+				rhopw.startz_current,
+				ucell,
+				orb);
 	}
     else // multiple k-points
     {
@@ -53,7 +56,9 @@ void grid_prepare(
 				bigpw.nbxx,
 				rhopw.ny, 
 				rhopw.nplane, 
-				rhopw.startz_current);
+				rhopw.startz_current,
+				ucell,
+				orb);
 	}
 
     ModuleBase::timer::tick("LCAO_domain","grid_prepare");
diff --git a/source/module_hamilt_lcao/module_gint/gint.cpp b/source/module_hamilt_lcao/module_gint/gint.cpp
index 864a53ab77..2b86e881dc 100644
--- a/source/module_hamilt_lcao/module_gint/gint.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint.cpp
@@ -67,9 +67,10 @@ void Gint::cal_gint(Gint_inout* inout)
 		ModuleBase::TITLE("Gint_interface","cal_gint_force_meta");
         ModuleBase::timer::tick("Gint_interface","cal_gint_force_meta");
 	}
-
+    const UnitCell& ucell = *this->ucell;
+    const LCAO_Orbitals& orb = *this->orb;
     const int max_size = this->gridt->max_atom;
-    const int LD_pool = max_size * GlobalC::ucell.nwmax;
+    const int LD_pool = max_size * ucell.nwmax;
     const int lgd = this->gridt->lgd;
     const int nnrg = this->gridt->nnrg;
 
@@ -86,21 +87,21 @@ void Gint::cal_gint(Gint_inout* inout)
                 ylmcoef[i] = ModuleBase::Ylm::ylmcoef[i];
             }
 
-            const int ntype = GlobalC::ORB.get_ntype();
+            const int ntype = orb.get_ntype();
             double* rcut = new double[ntype];
             for (int it = 0; it < ntype; it++)
             {
-                rcut[it] = GlobalC::ORB.Phi[it].getRcut();
+                rcut[it] = orb.Phi[it].getRcut();
             }
 
-            const double dr = GlobalC::ORB.dr_uniform;
+            const double dr = orb.dr_uniform;
 
             if (inout->job == Gint_Tools::job_type::vlocal)
             {
                 GintKernel::gint_gamma_vl_gpu(this->hRGint,
                                               lgd,
                                               max_size,
-                                              GlobalC::ucell.omega
+                                              ucell.omega
                                                   / this->ncxyz,
                                               inout->vl,
                                               ylmcoef,
@@ -109,7 +110,7 @@ void Gint::cal_gint(Gint_inout* inout)
                                               dr,
                                               rcut,
                                               *this->gridt,
-                                              GlobalC::ucell);
+                                              ucell);
             }
             else if (inout->job == Gint_Tools::job_type::rho)
             {
@@ -123,17 +124,17 @@ void Gint::cal_gint(Gint_inout* inout)
                                                    dr,
                                                    rcut,
                                                    *this->gridt,
-                                                   GlobalC::ucell,
+                                                   ucell,
                                                    inout->rho[is]);
                 }
             }
             else if (inout->job == Gint_Tools::job_type::force)
             {
                 const int ncyz = this->ny * this->nplane;
-                int nat = GlobalC::ucell.nat;
+                int nat = ucell.nat;
                 // for (int is = 0; is < GlobalV::NSPIN; ++is)
                 // {
-                    double *force = new double[GlobalC::ucell.nat * 3];
+                    double *force = new double[ucell.nat * 3];
                     for (int i = 0; i < nat * 3; i++)
                     {
                         force[i] = 0.0;
@@ -144,7 +145,7 @@ void Gint::cal_gint(Gint_inout* inout)
                         stress[i] = 0.0;
                     }
                     GintKernel::gint_gamma_force_gpu(this->DMRGint[inout->ispin],
-                                                     GlobalC::ucell.omega
+                                                     ucell.omega
                                                          / this->ncxyz,
                                                      inout->vl,
                                                      force,
@@ -153,7 +154,7 @@ void Gint::cal_gint(Gint_inout* inout)
                                                      dr,
                                                      rcut,
                                                      *this->gridt,
-                                                     GlobalC::ucell);
+                                                     ucell);
                     for (int iat = 0; iat < nat; iat++)
                     {
                         inout->fvl_dphi[0](iat, 0) += force[iat * 3];
@@ -187,11 +188,11 @@ void Gint::cal_gint(Gint_inout* inout)
                 // prepare some constants
                 const int ncyz
                     = this->ny * this->nplane; // mohan add 2012-03-25
-                const double dv = GlobalC::ucell.omega / this->ncxyz;
+                const double dv = ucell.omega / this->ncxyz;
 
                 // it's a uniform grid to save orbital values, so the delta_r is
                 // a constant.
-                const double delta_r = GlobalC::ORB.dr_uniform;
+                const double delta_r = orb.dr_uniform;
 
             if((inout->job==Gint_Tools::job_type::vlocal
                 || inout->job==Gint_Tools::job_type::vlocal_meta)
@@ -293,14 +294,14 @@ void Gint::cal_gint(Gint_inout* inout)
 					//int* vindex = Gint_Tools::get_vindex(ncyz, ibx, jby, kbz);
                     int* vindex = Gint_Tools::get_vindex(this->bxyz, this->bx, this->by, this->bz,
                         this->nplane, this->gridt->start_ind[grid_index], ncyz);
-                    this->gint_kernel_rho(na_grid, grid_index, delta_r, vindex, LD_pool, GlobalC::ucell,inout);
+                    this->gint_kernel_rho(na_grid, grid_index, delta_r, vindex, LD_pool, ucell,inout);
 					delete[] vindex;
 				}
 				else if(inout->job == Gint_Tools::job_type::tau)
 				{
                     int* vindex = Gint_Tools::get_vindex(this->bxyz, this->bx, this->by, this->bz,
                         this->nplane, this->gridt->start_ind[grid_index], ncyz);
-                    this->gint_kernel_tau(na_grid, grid_index, delta_r, vindex, LD_pool, inout,GlobalC::ucell);
+                    this->gint_kernel_tau(na_grid, grid_index, delta_r, vindex, LD_pool, inout,ucell);
 					delete[] vindex;
 				}
 				else if(inout->job == Gint_Tools::job_type::force)
@@ -322,11 +323,11 @@ void Gint::cal_gint(Gint_inout* inout)
 #ifdef _OPENMP
 						this->gint_kernel_force(na_grid, grid_index, delta_r, vldr3, LD_pool,
 							DM_in, inout->ispin, inout->isforce, inout->isstress,
-							&fvl_dphi_thread, &svl_dphi_thread,GlobalC::ucell);
+							&fvl_dphi_thread, &svl_dphi_thread,ucell);
 #else
 						this->gint_kernel_force(na_grid, grid_index, delta_r, vldr3, LD_pool,
 							DM_in, inout->ispin, inout->isforce, inout->isstress,
-							inout->fvl_dphi, inout->svl_dphi,GlobalC::ucell);
+							inout->fvl_dphi, inout->svl_dphi,ucell);
 #endif
 					delete[] vldr3;
 				}
@@ -338,17 +339,17 @@ void Gint::cal_gint(Gint_inout* inout)
 					if((GlobalV::GAMMA_ONLY_LOCAL && lgd>0) || !GlobalV::GAMMA_ONLY_LOCAL)
 					{
 						this->gint_kernel_vlocal(na_grid, grid_index, delta_r, vldr3, LD_pool,
-								pvpR_thread,GlobalC::ucell,hRGint_thread);
+								pvpR_thread,ucell,hRGint_thread);
 					}
 #else
 					if(GlobalV::GAMMA_ONLY_LOCAL && lgd>0)
 					{
-						this->gint_kernel_vlocal(na_grid, grid_index, delta_r, vldr3, LD_pool,GlobalC::ucell,nullptr);
+						this->gint_kernel_vlocal(na_grid, grid_index, delta_r, vldr3, LD_pool,ucell,nullptr);
 					}
 					if(!GlobalV::GAMMA_ONLY_LOCAL)
 					{
 						this->gint_kernel_vlocal(na_grid, grid_index, delta_r, vldr3, LD_pool,
-								GlobalC::ucell,this->pvpR_reduced[inout->ispin]);
+								ucell,this->pvpR_reduced[inout->ispin]);
 					}
 #endif
 					delete[] vldr3;
@@ -359,13 +360,13 @@ void Gint::cal_gint(Gint_inout* inout)
 							this->nplane, this->gridt->start_ind[grid_index], ncyz, dv);
 #ifdef _OPENMP
 					this->gint_kernel_dvlocal(na_grid, grid_index, delta_r, vldr3, LD_pool,
-							pvdpRx_thread, pvdpRy_thread, pvdpRz_thread,GlobalC::ucell);
+							pvdpRx_thread, pvdpRy_thread, pvdpRz_thread,ucell);
 #else
 					this->gint_kernel_dvlocal(na_grid, grid_index, delta_r, vldr3, LD_pool,
 							this->pvdpRx_reduced[inout->ispin],
                             this->pvdpRy_reduced[inout->ispin],
                             this->pvdpRz_reduced[inout->ispin],
-                            GlobalC::ucell);
+                            ucell);
 #endif
 					delete[] vldr3;
 				}
@@ -379,16 +380,16 @@ void Gint::cal_gint(Gint_inout* inout)
 					if((GlobalV::GAMMA_ONLY_LOCAL && lgd>0) || !GlobalV::GAMMA_ONLY_LOCAL)
 					{
 						this->gint_kernel_vlocal_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool,
-								pvpR_thread, GlobalC::ucell,hRGint_thread);
+								pvpR_thread, ucell,hRGint_thread);
 					}
 #else
 					if(GlobalV::GAMMA_ONLY_LOCAL && lgd>0)
 					{
-						this->gint_kernel_vlocal_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool, GlobalC::ucell,nullptr);
+						this->gint_kernel_vlocal_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool, ucell,nullptr);
 					}
 					if(!GlobalV::GAMMA_ONLY_LOCAL)
 					{
-						this->gint_kernel_vlocal_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool,GlobalC::ucell,
+						this->gint_kernel_vlocal_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool,ucell,
 								this->pvpR_reduced[inout->ispin]);
 					}
 #endif
@@ -416,11 +417,11 @@ void Gint::cal_gint(Gint_inout* inout)
 #ifdef _OPENMP
 					this->gint_kernel_force_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool,
 							DM_in, inout->ispin, inout->isforce, inout->isstress,
-							&fvl_dphi_thread, &svl_dphi_thread,GlobalC::ucell);
+							&fvl_dphi_thread, &svl_dphi_thread,ucell);
 #else
 					this->gint_kernel_force_meta(na_grid, grid_index, delta_r, vldr3, vkdr3, LD_pool,
 							DM_in, inout->ispin, inout->isforce, inout->isstress,
-							inout->fvl_dphi, inout->svl_dphi,GlobalC::ucell);
+							inout->fvl_dphi, inout->svl_dphi,ucell);
 #endif
 					delete[] vldr3;
 					delete[] vkdr3;
@@ -505,7 +506,9 @@ void Gint::prep_grid(const Grid_Technique& gt,
                      const int& nbxx_in,
                      const int& ny_in,
                      const int& nplane_in,
-                     const int& startz_current_in)
+                     const int& startz_current_in,
+                     const UnitCell* ucell_in,
+                     const LCAO_Orbitals* orb_in)
 {
     ModuleBase::TITLE(GlobalV::ofs_running, "Gint_k", "prep_grid");
 
@@ -523,6 +526,9 @@ void Gint::prep_grid(const Grid_Technique& gt,
     this->ny = ny_in;
     this->nplane = nplane_in;
     this->startz_current = startz_current_in;
+    this->ucell= ucell_in;
+    this->orb = orb_in;
+
     assert(nbx > 0);
     assert(nby > 0);
     assert(nbz >= 0);
@@ -535,8 +541,7 @@ void Gint::prep_grid(const Grid_Technique& gt,
     assert(ny > 0);
     assert(nplane >= 0);
     assert(startz_current >= 0);
-
-    assert(GlobalC::ucell.omega > 0.0);
+    assert(this->ucell->omega > 0.0);
 
     return;
 }
@@ -641,15 +646,15 @@ void Gint::initialize_pvpR(const UnitCell& ucell_in, Grid_Driver* gd)
 					{
 						ModuleBase::Vector3<double> dtau = gd->getAdjacentTau(ad) - tau1;
 						double distance = dtau.norm() * ucell_in.lat0;
-						double rcut = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ORB.Phi[T2].getRcut();
+						double rcut = this->orb->Phi[T1].getRcut() + this->orb->Phi[T2].getRcut();
 
 						//if(distance < rcut)
 						// mohan reset this 2013-07-02 in Princeton
-						// we should make absolutely sure that the distance is smaller than GlobalC::ORB.Phi[it].getRcut
+						// we should make absolutely sure that the distance is smaller than orb.Phi[it].getRcut
 						// this should be consistant with LCAO_nnr::cal_nnrg function
 						// typical example : 7 Bohr cutoff Si orbital in 14 Bohr length of cell.
 						// distance = 7.0000000000000000
-						// GlobalC::ORB.Phi[it].getRcut = 7.0000000000000008
+						// orb.Phi[it].getRcut = 7.0000000000000008
 						if(distance < rcut - 1.0e-15)
 						{
 							// calculate R index
diff --git a/source/module_hamilt_lcao/module_gint/gint.h b/source/module_hamilt_lcao/module_gint/gint.h
index d7a51c1a03..e07db55d12 100644
--- a/source/module_hamilt_lcao/module_gint/gint.h
+++ b/source/module_hamilt_lcao/module_gint/gint.h
@@ -37,7 +37,9 @@ class Gint
         const int& nbxx_in,
         const int& ny_in,
         const int& nplane_in,
-        const int& startz_current_in);
+        const int& startz_current_in,
+        const UnitCell* ucell_in,
+        const LCAO_Orbitals* orb_in);
     
     /**
      * @brief calculate the neighbor atoms of each atom in this processor
@@ -54,7 +56,8 @@ class Gint
     void transfer_DM2DtoGrid(std::vector<hamilt::HContainer<double>*> DM2D);
 
     const Grid_Technique* gridt = nullptr;
-
+    const UnitCell* ucell;
+    const LCAO_Orbitals* orb ;
     protected:
 
     // variables related to FFT grid
diff --git a/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp b/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp
index 1257f731df..4ad0bb2591 100644
--- a/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp
+++ b/source/module_hamilt_lcao/module_gint/grid_bigcell.cpp
@@ -28,7 +28,7 @@ Grid_BigCell::~Grid_BigCell()
 	// delete tau positions.
 	if(this->flag_tib)
 	{
-		for(int i=0; i<GlobalC::ucell.nat; i++)
+		for(int i=0; i<this->nat; i++)
 		{
 			delete[] tau_in_bigcell[i];
 		}
@@ -45,6 +45,7 @@ void Grid_BigCell::init_big_latvec(const UnitCell& ucell)
 	assert(nby>0);
 	assert(nbz>=0);
 
+	this->nat=ucell.nat;
 	//size of each big room (same shape with unitcell)
 	this->bigcell_vec1[0]= ucell.a1.x / (double)nbx * ucell.lat0;
 	this->bigcell_vec1[1]= ucell.a1.y / (double)nbx * ucell.lat0;
diff --git a/source/module_hamilt_lcao/module_gint/grid_bigcell.h b/source/module_hamilt_lcao/module_gint/grid_bigcell.h
index b6efd5669a..99e069e706 100644
--- a/source/module_hamilt_lcao/module_gint/grid_bigcell.h
+++ b/source/module_hamilt_lcao/module_gint/grid_bigcell.h
@@ -16,7 +16,7 @@ class Grid_BigCell: public Grid_MeshCell
 	// save the relative cartesian position
 	// to bigcell of each atom.
 	double** tau_in_bigcell;
-
+	int nat;
 	protected:
 
 	//---------------------------------
diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.cpp b/source/module_hamilt_lcao/module_gint/grid_technique.cpp
index e4a39fc23b..7c1f69e9b0 100644
--- a/source/module_hamilt_lcao/module_gint/grid_technique.cpp
+++ b/source/module_hamilt_lcao/module_gint/grid_technique.cpp
@@ -88,7 +88,7 @@ Grid_Technique::~Grid_Technique()
     
     if (allocate_find_R2)
     {
-        for (int iat = 0; iat < GlobalC::ucell.nat; iat++)
+        for (int iat = 0; iat < this->nat; iat++)
         {
             delete[] find_R2[iat];
             delete[] find_R2st[iat];
@@ -102,7 +102,7 @@ Grid_Technique::~Grid_Technique()
 #if ((defined __CUDA) /* || (defined __ROCM) */)
     if(GlobalV::device_flag == "gpu")
     {
-        free_gpu_gint_variables(GlobalC::ucell);
+        free_gpu_gint_variables(this->nat);
     }
 #endif
 }
@@ -641,7 +641,7 @@ void Grid_Technique::init_gpu_gint_variables(const UnitCell& ucell,const LCAO_Or
 {
     if (is_malloced)
     {
-        free_gpu_gint_variables(ucell);
+        free_gpu_gint_variables(this->nat);
     }
     double ylmcoef[100];
     ModuleBase::GlobalFunc::ZEROS(ylmcoef, 100);
@@ -950,7 +950,7 @@ void Grid_Technique::init_gpu_gint_variables(const UnitCell& ucell,const LCAO_Or
     free(atom_iw2_ylm_now);
 }
 
-void Grid_Technique::free_gpu_gint_variables(const UnitCell& ucell)
+void Grid_Technique::free_gpu_gint_variables(int nat)
 {
     if (!is_malloced)
     {
@@ -1027,7 +1027,7 @@ void Grid_Technique::free_gpu_gint_variables(const UnitCell& ucell)
     checkCudaErrors(cudaFree(dot_product_g));
     checkCudaErrors(cudaFree(rho_g));
 
-    const int max_atom_pair_number = ucell.nat * ucell.nat;
+    const int max_atom_pair_number = nat * nat;
     for (int i = 0; i < max_atom_pair_number; i++)
     {
         if (grid_vlocal_g[i] != nullptr)
diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.h b/source/module_hamilt_lcao/module_gint/grid_technique.h
index 1824d2ae9e..83cfc9dc75 100644
--- a/source/module_hamilt_lcao/module_gint/grid_technique.h
+++ b/source/module_hamilt_lcao/module_gint/grid_technique.h
@@ -226,7 +226,7 @@ class Grid_Technique : public Grid_MeshBall
 
   private:
     void init_gpu_gint_variables(const UnitCell& ucell,const LCAO_Orbitals &orb);
-    void free_gpu_gint_variables(const UnitCell& ucell);
+    void free_gpu_gint_variables(int nat);
 
 #endif
 };

From da3522cf5d38a4b8782f16532a02bcea1f66e87f Mon Sep 17 00:00:00 2001
From: A-006 <3158793232@qq.com>
Date: Mon, 13 May 2024 20:30:39 +0800
Subject: [PATCH 08/23] remove GlobalC in gint_tools and vbatch matrix

---
 .../module_gint/gint_fvl.cpp                  |  4 +-
 .../module_gint/gint_gamma_env.cpp            |  3 +-
 .../module_gint/gint_k_env.cpp                |  3 +-
 .../module_gint/gint_rho.cpp                  |  3 +-
 .../module_gint/gint_tau.cpp                  |  3 +-
 .../module_gint/gint_tools.cpp                | 84 ++++++++++---------
 .../module_gint/gint_tools.h                  |  6 +-
 .../module_gint/gint_vl.cpp                   |  8 +-
 .../module_gint/grid_technique.cpp            |  7 +-
 .../module_gint/grid_technique.h              |  6 +-
 .../kernels/cuda/vbatch_matrix_mul.cu         | 16 ++--
 .../kernels/cuda/vbatch_matrix_mul.cuh        |  4 +-
 12 files changed, 76 insertions(+), 71 deletions(-)

diff --git a/source/module_hamilt_lcao/module_gint/gint_fvl.cpp b/source/module_hamilt_lcao/module_gint/gint_fvl.cpp
index 36de330336..45706d4dcd 100644
--- a/source/module_hamilt_lcao/module_gint/gint_fvl.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_fvl.cpp
@@ -31,7 +31,7 @@ void Gint::gint_kernel_force(
 	Gint_Tools::Array_Pool<double> dpsir_ylm_z(this->bxyz, LD_pool);
 
 	Gint_Tools::cal_dpsir_ylm(*this->gridt, this->bxyz, na_grid, grid_index, delta_r,	block_index, block_size, cal_flag,
-		psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D,ucell);
+		psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D);
 
     //calculating f_mu(r) = v(r)*psi_mu(r)*dv
 	const Gint_Tools::Array_Pool<double> psir_vlbr3 
@@ -170,7 +170,7 @@ void Gint::gint_kernel_force_meta(
 
 	//psi and gradient of psi
 	Gint_Tools::cal_dpsir_ylm(*this->gridt, this->bxyz, na_grid, grid_index, delta_r,	block_index, block_size, cal_flag,
-		psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D,ucell);
+		psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D);
 	/*
 	Gint_Tools::cal_dpsir_ylm(na_grid, grid_index, delta_r,	block_index, block_size, cal_flag,
 		psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D, displ1);
diff --git a/source/module_hamilt_lcao/module_gint/gint_gamma_env.cpp b/source/module_hamilt_lcao/module_gint/gint_gamma_env.cpp
index 29325a8e8b..02bbcd99d0 100644
--- a/source/module_hamilt_lcao/module_gint/gint_gamma_env.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_gamma_env.cpp
@@ -40,8 +40,7 @@ void Gint_Gamma::cal_env(const double* wfc, double* rho,LCAO_Orbitals &orb,UnitC
                 size, grid_index, delta_r,
 				block_index, block_size, 
 				cal_flag,
-				psir_ylm.ptr_2D,
-				ucell);
+				psir_ylm.ptr_2D);
 
             int* vindex = Gint_Tools::get_vindex(this->bxyz, this->bx, this->by, this->bz,
                 this->nplane, this->gridt->start_ind[grid_index], ncyz);
diff --git a/source/module_hamilt_lcao/module_gint/gint_k_env.cpp b/source/module_hamilt_lcao/module_gint/gint_k_env.cpp
index 76499b2e5a..859b8d1a4e 100644
--- a/source/module_hamilt_lcao/module_gint/gint_k_env.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_k_env.cpp
@@ -50,8 +50,7 @@ void Gint_k::cal_env_k(int ik,
                 this->bxyz, size, grid_index, delta_r,
                 block_index, block_size,
                 cal_flag,
-                psir_ylm.ptr_2D,
-                ucell);
+                psir_ylm.ptr_2D);
 
             int* vindex = Gint_Tools::get_vindex(this->bxyz, this->bx, this->by, this->bz,
                 this->nplane, this->gridt->start_ind[grid_index], ncyz);
diff --git a/source/module_hamilt_lcao/module_gint/gint_rho.cpp b/source/module_hamilt_lcao/module_gint/gint_rho.cpp
index ff5bda6d7c..ea9688b8e2 100644
--- a/source/module_hamilt_lcao/module_gint/gint_rho.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_rho.cpp
@@ -29,8 +29,7 @@ void Gint::gint_kernel_rho(
 		this->bxyz, na_grid, grid_index, delta_r,
 		block_index, block_size, 
 		cal_flag,
-		psir_ylm.ptr_2D,
-		ucell);
+		psir_ylm.ptr_2D);
 
 	for(int is=0; is<GlobalV::NSPIN; ++is)
 	{
diff --git a/source/module_hamilt_lcao/module_gint/gint_tau.cpp b/source/module_hamilt_lcao/module_gint/gint_tau.cpp
index 8cb8665d0b..e870d84982 100644
--- a/source/module_hamilt_lcao/module_gint/gint_tau.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_tau.cpp
@@ -36,8 +36,7 @@ void Gint::gint_kernel_tau(
 		psir_ylm.ptr_2D,
 		dpsir_ylm_x.ptr_2D,
 		dpsir_ylm_y.ptr_2D,
-		dpsir_ylm_z.ptr_2D,
-		ucell);
+		dpsir_ylm_z.ptr_2D);
 
 	for(int is=0; is<GlobalV::NSPIN; ++is)
 	{
diff --git a/source/module_hamilt_lcao/module_gint/gint_tools.cpp b/source/module_hamilt_lcao/module_gint/gint_tools.cpp
index 739c2cc7c2..4d4155430f 100644
--- a/source/module_hamilt_lcao/module_gint/gint_tools.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_tools.cpp
@@ -139,18 +139,19 @@ namespace Gint_Tools
 		{
 			cal_flag[ib] = new bool[na_grid];
 		}
-
+		const UnitCell& ucell=*gt.ucell;
+		const LCAO_Orbitals& orb=*gt.orb;
 		block_index[0] = 0;
 		for (int id=0; id<na_grid; id++)
 		{
 			const int mcell_index=gt.bcell_start[grid_index] + id;
 			const int iat=gt.which_atom[mcell_index]; // index of atom
-			const int it=GlobalC::ucell.iat2it[ iat ]; // index of atom type
-			const int ia=GlobalC::ucell.iat2ia[ iat ]; // index of atoms within each type
-			const int start=GlobalC::ucell.itiaiw2iwt(it, ia, 0); // the index of the first wave function for atom (it,ia)
+			const int it=ucell.iat2it[ iat ]; // index of atom type
+			const int ia=ucell.iat2ia[ iat ]; // index of atoms within each type
+			const int start=ucell.itiaiw2iwt(it, ia, 0); // the index of the first wave function for atom (it,ia)
 			block_iw[id]=gt.trace_lo[start];
-			block_index[id+1] = block_index[id]+GlobalC::ucell.atoms[it].nw;
-			block_size[id]=GlobalC::ucell.atoms[it].nw;
+			block_index[id+1] = block_index[id]+ucell.atoms[it].nw;
+			block_size[id]=ucell.atoms[it].nw;
 
 			const int imcell=gt.which_bigcell[mcell_index];
 			const double mt[3] = {
@@ -167,7 +168,7 @@ namespace Gint_Tools
 					gt.meshcell_pos[ib][2] + mt[2]};
 				const double distance = std::sqrt(dr[0]*dr[0] + dr[1]*dr[1] + dr[2]*dr[2]);	// distance between atom and grid
 
-				if(distance > GlobalC::ORB.Phi[it].getRcut() - 1.0e-10)
+				if(distance > orb.Phi[it].getRcut() - 1.0e-10)
 					cal_flag[ib][id]=false;
 				else
 					cal_flag[ib][id]=true;
@@ -184,11 +185,12 @@ namespace Gint_Tools
 		const int*const block_index,  		// block_index[na_grid+1], count total number of atomis orbitals
 		const int*const block_size, 		// block_size[na_grid],	number of columns of a band
 		const bool*const*const cal_flag,
-		double*const*const psir_ylm,
-		const UnitCell& ucell) 	// cal_flag[bxyz][na_grid],	whether the atom-grid distance is larger than cutoff
+		double*const*const psir_ylm) 	// cal_flag[bxyz][na_grid],	whether the atom-grid distance is larger than cutoff
     {
 		ModuleBase::timer::tick("Gint_Tools", "cal_psir_ylm");
         std::vector<double> ylma;
+		const UnitCell& ucell=*gt.ucell;
+		const LCAO_Orbitals& orb=*gt.orb;
         for (int id=0; id<na_grid; id++)
 		{
 			// there are two parameters we want to know here:
@@ -197,9 +199,9 @@ namespace Gint_Tools
 			const int mcell_index=gt.bcell_start[grid_index] + id;
 
 			const int iat=gt.which_atom[mcell_index]; // index of atom
-			const int it=GlobalC::ucell.iat2it[iat]; // index of atom type
-			const Atom*const atom=&GlobalC::ucell.atoms[it];
-			auto &OrbPhi = GlobalC::ORB.Phi[it];
+			const int it=ucell.iat2it[iat]; // index of atom type
+			const Atom*const atom=&ucell.atoms[it];
+			auto &OrbPhi = orb.Phi[it];
 			std::vector<const double*> it_psi_uniform(atom->nw);
 			std::vector<const double*> it_dpsi_uniform(atom->nw);
 			// preprocess index
@@ -248,7 +250,7 @@ namespace Gint_Tools
 					// spherical harmonic functions Ylm
 					//------------------------------------------------------
 					//	Ylm::get_ylm_real(this->nnn[it], this->dr[id], ylma);
-					ModuleBase::Ylm::sph_harm ( GlobalC::ucell.atoms[it].nwl,
+					ModuleBase::Ylm::sph_harm ( ucell.atoms[it].nwl,
 							dr[0] / distance,
 							dr[1] / distance,
 							dr[2] / distance,
@@ -280,7 +282,7 @@ namespace Gint_Tools
 						}
 						p[iw]=phi * ylma[atom->iw2_ylm[iw]];
 					} // end iw
-				}// end distance<=(GlobalC::ORB.Phi[it].getRcut()-1.0e-15)
+				}// end distance<=(orb.Phi[it].getRcut()-1.0e-15)
 			}// end ib
 		}// end id
 		ModuleBase::timer::tick("Gint_Tools", "cal_psir_ylm");
@@ -299,18 +301,19 @@ namespace Gint_Tools
 		double*const*const psir_ylm,
 		double*const*const dpsir_ylm_x,
 		double*const*const dpsir_ylm_y,
-		double*const*const dpsir_ylm_z,
-		const UnitCell& ucell)
+		double*const*const dpsir_ylm_z)
 	{
 		ModuleBase::timer::tick("Gint_Tools", "cal_dpsir_ylm");
+		const UnitCell& ucell=*gt.ucell;
+		const LCAO_Orbitals& orb=*gt.orb;
 		for (int id=0; id<na_grid; id++)
 		{
 			const int mcell_index = gt.bcell_start[grid_index] + id;
 			const int imcell = gt.which_bigcell[mcell_index];
 			int iat = gt.which_atom[mcell_index];
-			const int it = GlobalC::ucell.iat2it[iat];
-			const int ia = GlobalC::ucell.iat2ia[iat];
-			Atom *atom = &GlobalC::ucell.atoms[it];
+			const int it = ucell.iat2it[iat];
+			const int ia = ucell.iat2ia[iat];
+			Atom *atom = &ucell.atoms[it];
 
 			const double mt[3]={
 				gt.meshball_positions[imcell][0] - gt.tau_in_bigcell[iat][0],
@@ -341,7 +344,7 @@ namespace Gint_Tools
 					//array to store spherical harmonics and its derivatives
 					std::vector<double> rly;
 					std::vector<std::vector<double>> grly;
-					ModuleBase::Ylm::grad_rl_sph_harm(GlobalC::ucell.atoms[it].nwl, dr[0], dr[1], dr[2], rly, grly);
+					ModuleBase::Ylm::grad_rl_sph_harm(ucell.atoms[it].nwl, dr[0], dr[1], dr[2], rly, grly);
 					if(distance < 1e-9)  distance = 1e-9;
 
 					const double position = distance / delta_r;
@@ -362,7 +365,7 @@ namespace Gint_Tools
 						// function from interpolation method.
 						if ( atom->iw2_new[iw] )
 						{
-							const Numerical_Orbital_Lm &philn = GlobalC::ORB.Phi[it].PhiLN(
+							const Numerical_Orbital_Lm &philn = orb.Phi[it].PhiLN(
 									atom->iw2l[iw],
 									atom->iw2n[iw]);
 
@@ -429,14 +432,16 @@ namespace Gint_Tools
 		double*const*const ddpsir_ylm_zz)
 	{
 		ModuleBase::timer::tick("Gint_Tools", "cal_ddpsir_ylm");
+		const UnitCell& ucell=*gt.ucell;
+		const LCAO_Orbitals& orb=*gt.orb;
 		for (int id=0; id<na_grid; id++)
 		{
 			const int mcell_index = gt.bcell_start[grid_index] + id;
 			const int imcell = gt.which_bigcell[mcell_index];
 			int iat = gt.which_atom[mcell_index];
-			const int it = GlobalC::ucell.iat2it[iat];
-			const int ia = GlobalC::ucell.iat2ia[iat];
-			Atom *atom = &GlobalC::ucell.atoms[it];
+			const int it = ucell.iat2it[iat];
+			const int ia = ucell.iat2ia[iat];
+			Atom *atom = &ucell.atoms[it];
 
 			const double mt[3]={
 				gt.meshball_positions[imcell][0] - gt.tau_in_bigcell[iat][0],
@@ -509,7 +514,7 @@ namespace Gint_Tools
 							//array to store spherical harmonics and its derivatives
 							std::vector<double> rly;
 							std::vector<std::vector<double>> grly;
-							ModuleBase::Ylm::grad_rl_sph_harm(GlobalC::ucell.atoms[it].nwl, dr1[0], dr1[1], dr1[2], rly, grly);
+							ModuleBase::Ylm::grad_rl_sph_harm(ucell.atoms[it].nwl, dr1[0], dr1[1], dr1[2], rly, grly);
 
 							double distance1 = std::sqrt(dr1[0]*dr1[0] + dr1[1]*dr1[1] + dr1[2]*dr1[2]);
 							if(distance1 < 1e-9)  distance1 = 1e-9;
@@ -532,7 +537,7 @@ namespace Gint_Tools
 								// function from interpolation method.
 								if ( atom->iw2_new[iw] )
 								{
-									const Numerical_Orbital_Lm &philn = GlobalC::ORB.Phi[it].PhiLN(
+									const Numerical_Orbital_Lm &philn = orb.Phi[it].PhiLN(
 											atom->iw2l[iw],
 											atom->iw2n[iw]);
 
@@ -609,8 +614,8 @@ namespace Gint_Tools
 						std::vector<double> rly;
 						std::vector<std::vector<double>> grly;
 						std::vector<std::vector<double>> hrly;
-						ModuleBase::Ylm::grad_rl_sph_harm(GlobalC::ucell.atoms[it].nwl, dr[0], dr[1], dr[2], rly, grly);
-						ModuleBase::Ylm::hes_rl_sph_harm(GlobalC::ucell.atoms[it].nwl, dr[0], dr[1], dr[2], hrly);
+						ModuleBase::Ylm::grad_rl_sph_harm(ucell.atoms[it].nwl, dr[0], dr[1], dr[2], rly, grly);
+						ModuleBase::Ylm::hes_rl_sph_harm(ucell.atoms[it].nwl, dr[0], dr[1], dr[2], hrly);
 						const double position = distance / delta_r;
 
 						const double iq = static_cast<int>(position);
@@ -629,7 +634,7 @@ namespace Gint_Tools
 							// function from interpolation method.
 							if ( atom->iw2_new[iw] )
 							{
-								const Numerical_Orbital_Lm &philn = GlobalC::ORB.Phi[it].PhiLN(
+								const Numerical_Orbital_Lm &philn = orb.Phi[it].PhiLN(
 										atom->iw2l[iw],
 										atom->iw2n[iw]);
 
@@ -723,13 +728,15 @@ namespace Gint_Tools
 		double*const*const dpsir_ylm_zz)
 	{
 		ModuleBase::timer::tick("Gint_Tools", "cal_dpsirr_ylm");
+		const UnitCell& ucell=*gt.ucell;
+		const LCAO_Orbitals& orb=*gt.orb;
 		for (int id=0; id<na_grid; id++)
 		{
 			const int mcell_index = gt.bcell_start[grid_index] + id;
 			const int imcell = gt.which_bigcell[mcell_index];
 			int iat = gt.which_atom[mcell_index];
-			const int it = GlobalC::ucell.iat2it[iat];
-			Atom *atom = &GlobalC::ucell.atoms[it];
+			const int it = ucell.iat2it[iat];
+			Atom *atom = &ucell.atoms[it];
 
 			const double mt[3]={
 				gt.meshball_positions[imcell][0] - gt.tau_in_bigcell[iat][0],
@@ -1139,8 +1146,9 @@ namespace Gint_Tools
 	{
 		double *psi2, *psi2_dmr;
 		int iwi, iww;
-		const int LD_pool = gt.max_atom*GlobalC::ucell.nwmax;
-
+		const UnitCell& ucell=*gt.ucell;
+		const LCAO_Orbitals& orb=*gt.orb;
+		const int LD_pool = gt.max_atom*ucell.nwmax;
 		bool *all_out_of_range = new bool[na_grid];
 		for(int ia=0; ia<na_grid; ++ia) //number of atoms
 		{
@@ -1168,9 +1176,9 @@ namespace Gint_Tools
 
 			const int mcell_index1 = gt.bcell_start[grid_index] + ia1;
 			const int iat = gt.which_atom[mcell_index1];
-			const int T1 = GlobalC::ucell.iat2it[iat];
-			const int I1 = GlobalC::ucell.iat2ia[iat];
-			Atom *atom1 = &GlobalC::ucell.atoms[T1];
+			const int T1 = ucell.iat2it[iat];
+			const int I1 = ucell.iat2ia[iat];
+			Atom *atom1 = &ucell.atoms[T1];
 
 			//~~~~~~~~~~~~~~~~
 			// get cell R1.
@@ -1274,10 +1282,10 @@ namespace Gint_Tools
 				if(!same_flag) continue;
 
 				const int bcell2 = gt.bcell_start[grid_index] + ia2;
-				const int T2 = GlobalC::ucell.iat2it[ gt.which_atom[bcell2]];
+				const int T2 = ucell.iat2it[ gt.which_atom[bcell2]];
 				const int iat2 = gt.which_atom[bcell2];
 
-				Atom *atom2 = &GlobalC::ucell.atoms[T2];
+				Atom *atom2 = &ucell.atoms[T2];
 
 				//---------------
 				// get cell R2.
diff --git a/source/module_hamilt_lcao/module_gint/gint_tools.h b/source/module_hamilt_lcao/module_gint/gint_tools.h
index 99c1028e44..12c93ae699 100644
--- a/source/module_hamilt_lcao/module_gint/gint_tools.h
+++ b/source/module_hamilt_lcao/module_gint/gint_tools.h
@@ -203,8 +203,7 @@ namespace Gint_Tools
 		const int*const block_index,  // count total number of atomis orbitals
 		const int*const block_size, 
 		const bool*const*const cal_flag,
-		double*const*const psir_ylm,
-        const UnitCell& ucell); // whether the atom-grid distance is larger than cutoff
+		double*const*const psir_ylm); // whether the atom-grid distance is larger than cutoff
 
 	// psir_ylm and dpsir_ylm, both[pw.bxyz][LD_pool]
     void cal_dpsir_ylm(
@@ -219,8 +218,7 @@ namespace Gint_Tools
 		double*const*const psir_ylm,
 		double*const*const dpsir_ylm_x,
 		double*const*const dpsir_ylm_y,
-		double*const*const dpsir_ylm_z,
-        const UnitCell& ucell);
+		double*const*const dpsir_ylm_z);
 
 	// dpsir_ylm * (r-R), R is the atomic position
     void cal_dpsirr_ylm(
diff --git a/source/module_hamilt_lcao/module_gint/gint_vl.cpp b/source/module_hamilt_lcao/module_gint/gint_vl.cpp
index 4e79e3c384..28094d01ec 100644
--- a/source/module_hamilt_lcao/module_gint/gint_vl.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint_vl.cpp
@@ -38,8 +38,7 @@ void Gint::gint_kernel_vlocal(
 		this->bxyz, na_grid, grid_index, delta_r,
 		block_index, block_size, 
 		cal_flag,
-		psir_ylm.ptr_2D,
-		ucell);
+		psir_ylm.ptr_2D);
 	
 	//calculating f_mu(r) = v(r)*psi_mu(r)*dv
 	const Gint_Tools::Array_Pool<double> psir_vlbr3 = Gint_Tools::get_psir_vlbr3(
@@ -97,7 +96,7 @@ void Gint::gint_kernel_dvlocal(
 	Gint_Tools::Array_Pool<double> dpsir_ylm_z(this->bxyz, LD_pool);
 
 	Gint_Tools::cal_dpsir_ylm(*this->gridt, this->bxyz, na_grid, grid_index, delta_r,	block_index, block_size, cal_flag,
-		psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D,ucell);
+		psir_ylm.ptr_2D, dpsir_ylm_x.ptr_2D, dpsir_ylm_y.ptr_2D, dpsir_ylm_z.ptr_2D);
 
 	//calculating f_mu(r) = v(r)*psi_mu(r)*dv
 	const Gint_Tools::Array_Pool<double> psir_vlbr3 = Gint_Tools::get_psir_vlbr3(
@@ -157,8 +156,7 @@ void Gint::gint_kernel_vlocal_meta(
 		psir_ylm.ptr_2D,
 		dpsir_ylm_x.ptr_2D,
 		dpsir_ylm_y.ptr_2D,
-		dpsir_ylm_z.ptr_2D,
-		ucell
+		dpsir_ylm_z.ptr_2D
 	);
 	
 	//calculating f_mu(r) = v(r)*psi_mu(r)*dv
diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.cpp b/source/module_hamilt_lcao/module_gint/grid_technique.cpp
index 7c1f69e9b0..fdcfdc32eb 100644
--- a/source/module_hamilt_lcao/module_gint/grid_technique.cpp
+++ b/source/module_hamilt_lcao/module_gint/grid_technique.cpp
@@ -127,7 +127,7 @@ void Grid_Technique::set_pbc_grid(const int& ncx_in,
                                   const int& nplane,
                                   const int& startz_current,
                                   const UnitCell& ucell,
-                                  const LCAO_Orbitals &orb)
+                                  const LCAO_Orbitals& orb)
 {
     ModuleBase::TITLE("Grid_Technique", "init");
     ModuleBase::timer::tick("Grid_Technique", "init");
@@ -152,7 +152,8 @@ void Grid_Technique::set_pbc_grid(const int& ncx_in,
                        nbxx_in,
                        nbzp_start_in,
                        nbzp_in);
-
+    this->ucell=&ucell;
+    this->orb=&orb;
     this->init_latvec(ucell);
 
     this->init_big_latvec(ucell);
@@ -941,7 +942,7 @@ void Grid_Technique::init_gpu_gint_variables(const UnitCell& ucell,const LCAO_Or
         checkCudaErrors(cudaStreamCreate(&streams[i]));
     }
 
-    gemm_algo_selector(bxyz, fastest_matrix_mul);
+    gemm_algo_selector(bxyz, fastest_matrix_mul,ucell);
 
     is_malloced = true;
 
diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.h b/source/module_hamilt_lcao/module_gint/grid_technique.h
index 83cfc9dc75..03cca8e924 100644
--- a/source/module_hamilt_lcao/module_gint/grid_technique.h
+++ b/source/module_hamilt_lcao/module_gint/grid_technique.h
@@ -74,6 +74,10 @@ class Grid_Technique : public Grid_MeshBall
     bool allocate_find_R2;
     int binary_search_find_R2_offset(int val, int iat) const;
 
+    //UnitCell and LCAO_Obrbitals
+    const UnitCell* ucell;
+    const LCAO_Orbitals* orb;
+
     // indexes for nnrg -> orbital index + R index
     std::vector<gridIntegral::gridIndex> nnrg_index;
 
@@ -98,7 +102,7 @@ class Grid_Technique : public Grid_MeshBall
                       const int& nplane,
                       const int& startz_current,
                       const UnitCell& ucell,
-                      const LCAO_Orbitals &orb);
+                      const LCAO_Orbitals& orb);
 
     /// number of elements(basis-pairs) in this processon
     /// on all adjacent atoms-pairs(Grid division)
diff --git a/source/module_hamilt_lcao/module_gint/kernels/cuda/vbatch_matrix_mul.cu b/source/module_hamilt_lcao/module_gint/kernels/cuda/vbatch_matrix_mul.cu
index ed710edf5a..b84b76a840 100644
--- a/source/module_hamilt_lcao/module_gint/kernels/cuda/vbatch_matrix_mul.cu
+++ b/source/module_hamilt_lcao/module_gint/kernels/cuda/vbatch_matrix_mul.cu
@@ -534,17 +534,17 @@ void gemm_time_measure(int max_m,
  * in compilation time (TODO: so in the future, it will be necessary to split
  * this large section of code into multiple files, multiple compilation units).
  */
-void gemm_algo_selector(int matrix_k, matrix_multiple_func_type& fastest_algo)
+void gemm_algo_selector(int matrix_k, matrix_multiple_func_type& fastest_algo,const UnitCell& ucell)
 {
     int batchCount_per_type = 32;
     int batchCount
-        = batchCount_per_type * GlobalC::ucell.ntype * GlobalC::ucell.ntype;
+        = batchCount_per_type * ucell.ntype * ucell.ntype;
 
     Cuda_Mem_Wrapper<int> m(batchCount);
     Cuda_Mem_Wrapper<int> n(batchCount);
     Cuda_Mem_Wrapper<int> k(batchCount);
 
-    int max_m = GlobalC::ucell.nwmax, max_n = GlobalC::ucell.nwmax;
+    int max_m = ucell.nwmax, max_n = ucell.nwmax;
 
     Cuda_Mem_Wrapper<double> A(batchCount * max_m * matrix_k);
     Cuda_Mem_Wrapper<double> B(batchCount * max_n * matrix_k);
@@ -572,17 +572,17 @@ void gemm_algo_selector(int matrix_k, matrix_multiple_func_type& fastest_algo)
     int index = 0;
     for (int i = 0; i < batchCount_per_type; ++i)
     {
-        for (int j = 0; j < GlobalC::ucell.ntype; j++)
+        for (int j = 0; j < ucell.ntype; j++)
         {
-            for (int l = 0; l < GlobalC::ucell.ntype; l++)
+            for (int l = 0; l < ucell.ntype; l++)
             {
-                m.get_host_pointer()[index] = GlobalC::ucell.atoms[j].nw;
-                n.get_host_pointer()[index] = GlobalC::ucell.atoms[l].nw;
+                m.get_host_pointer()[index] = ucell.atoms[j].nw;
+                n.get_host_pointer()[index] = ucell.atoms[l].nw;
                 k.get_host_pointer()[index] = matrix_k;
 
                 lda.get_host_pointer()[index] = matrix_k;
                 ldb.get_host_pointer()[index] = matrix_k;
-                ldc.get_host_pointer()[index] = GlobalC::ucell.atoms[l].nw;
+                ldc.get_host_pointer()[index] = ucell.atoms[l].nw;
 
                 A_array.get_host_pointer()[index]
                     = &A.get_device_pointer()[index * max_m * matrix_k];
diff --git a/source/module_hamilt_lcao/module_gint/kernels/cuda/vbatch_matrix_mul.cuh b/source/module_hamilt_lcao/module_gint/kernels/cuda/vbatch_matrix_mul.cuh
index 3972918675..ea4e42e521 100644
--- a/source/module_hamilt_lcao/module_gint/kernels/cuda/vbatch_matrix_mul.cuh
+++ b/source/module_hamilt_lcao/module_gint/kernels/cuda/vbatch_matrix_mul.cuh
@@ -7,7 +7,7 @@
 #include <stdio.h> // for fprintf and stderr
 
 #include <functional>
-
+#include "module_cell/unitcell.h"
 /**
  * Performs a batched matrix multiplication using the vbatched_gemm_impl
  * function.
@@ -111,5 +111,5 @@ typedef std::function<void(int,
                            double* alpha)>
     matrix_multiple_func_type;
 
-void gemm_algo_selector(int k, matrix_multiple_func_type& func);
+void gemm_algo_selector(int k, matrix_multiple_func_type& func,const UnitCell& ucell);
 #endif // VBATCH_MATRIX_MUL_H
\ No newline at end of file

From 939ed76dfcc62ccc365924d4d541f5721c2f2eec Mon Sep 17 00:00:00 2001
From: A-006 <3158793232@qq.com>
Date: Tue, 21 May 2024 20:16:41 +0800
Subject: [PATCH 09/23] fix relax have compute stress and change GPU force
 compute to acclerate

---
 .../module_hamilt_lcao/module_gint/gint.cpp   | 82 +++++++++++--------
 .../module_gint/gint_force.h                  |  4 +-
 .../module_gint/gint_force_gpu.cu             | 17 ++--
 .../module_gint/kernels/cuda/gint_force.cu    |  2 +-
 4 files changed, 61 insertions(+), 44 deletions(-)

diff --git a/source/module_hamilt_lcao/module_gint/gint.cpp b/source/module_hamilt_lcao/module_gint/gint.cpp
index 2b86e881dc..a25f1deb11 100644
--- a/source/module_hamilt_lcao/module_gint/gint.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint.cpp
@@ -36,7 +36,6 @@ Gint::~Gint()
 void Gint::cal_gint(Gint_inout* inout)
 {
 	ModuleBase::timer::tick("Gint_interface", "cal_gint");
-
 	if(inout->job==Gint_Tools::job_type::vlocal)
 	{
 		ModuleBase::TITLE("Gint_interface","cal_gint_vlocal");
@@ -132,44 +131,62 @@ void Gint::cal_gint(Gint_inout* inout)
             {
                 const int ncyz = this->ny * this->nplane;
                 int nat = ucell.nat;
+                const int isforce = inout->isforce;
+                const int isstress =inout->isstress;
                 // for (int is = 0; is < GlobalV::NSPIN; ++is)
                 // {
-                    double *force = new double[ucell.nat * 3];
-                    for (int i = 0; i < nat * 3; i++)
+                    if (isforce || isstress){
+                        double *force = new double[ucell.nat * 3];
+                            for (int i = 0; i < nat * 3; i++)
+                            {
+                                force[i] = 0.0;
+                            }
+                        double *stress = new double[6];
+                        for (int i = 0; i < 6; i++)
+                        {
+                            stress[i] = 0.0;
+                        }
+                        GintKernel::gint_gamma_force_gpu(this->DMRGint[inout->ispin],
+                                                        ucell.omega
+                                                            / this->ncxyz,
+                                                        inout->vl,
+                                                        force,
+                                                        stress,
+                                                        this->nplane,
+                                                        dr,
+                                                        rcut,
+                                                        isforce,
+                                                        isstress,
+                                                        *this->gridt,
+                                                        ucell);
+                    
+                    if (inout->isforce)
                     {
-                        force[i] = 0.0;
+                        for (int iat = 0; iat < nat; iat++)
+                        {
+                            inout->fvl_dphi[0](iat, 0) += force[iat * 3];
+                            inout->fvl_dphi[0](iat, 1) += force[iat * 3 + 1];
+                            inout->fvl_dphi[0](iat, 2) += force[iat * 3 + 2];
+                        }
+                        delete[] force;
                     }
-                    double *stress = new double[6];
-                    for (int i = 0; i < 6; i++)
-                    {
-                        stress[i] = 0.0;
+                    if (inout->isstress){
+                        for (int iat = 0; iat < nat; iat++)
+                        {
+                            inout->fvl_dphi[0](iat, 0) += force[iat * 3];
+                            inout->fvl_dphi[0](iat, 1) += force[iat * 3 + 1];
+                            inout->fvl_dphi[0](iat, 2) += force[iat * 3 + 2];
+                        }
+                            inout->svl_dphi[0](0, 0) += stress[0];
+                            inout->svl_dphi[0](0, 1) += stress[1];
+                            inout->svl_dphi[0](0, 2) += stress[2];
+                            inout->svl_dphi[0](1, 1) += stress[3];
+                            inout->svl_dphi[0](1, 2) += stress[4];
+                            inout->svl_dphi[0](2, 2) += stress[5];
+                            delete[] stress;
                     }
-                    GintKernel::gint_gamma_force_gpu(this->DMRGint[inout->ispin],
-                                                     ucell.omega
-                                                         / this->ncxyz,
-                                                     inout->vl,
-                                                     force,
-                                                     stress,
-                                                     this->nplane,
-                                                     dr,
-                                                     rcut,
-                                                     *this->gridt,
-                                                     ucell);
-                    for (int iat = 0; iat < nat; iat++)
-                    {
-                        inout->fvl_dphi[0](iat, 0) += force[iat * 3];
-                        inout->fvl_dphi[0](iat, 1) += force[iat * 3 + 1];
-                        inout->fvl_dphi[0](iat, 2) += force[iat * 3 + 2];
                     }
-                        inout->svl_dphi[0](0, 0) += stress[0];
-                        inout->svl_dphi[0](0, 1) += stress[1];
-                        inout->svl_dphi[0](0, 2) += stress[2];
-                        inout->svl_dphi[0](1, 1) += stress[3];
-                        inout->svl_dphi[0](1, 2) += stress[4];
-                        inout->svl_dphi[0](2, 2) += stress[5];
                     
-                    delete[] force;
-                    delete[] stress;
                 // }
             }
         }
@@ -310,7 +327,6 @@ void Gint::cal_gint(Gint_inout* inout)
                         this->nplane, this->gridt->start_ind[grid_index], ncyz, dv);
 
                     double** DM_in;
-
 					if(GlobalV::GAMMA_ONLY_LOCAL)
 					{
 						DM_in = inout->DM[GlobalV::CURRENT_SPIN];
diff --git a/source/module_hamilt_lcao/module_gint/gint_force.h b/source/module_hamilt_lcao/module_gint/gint_force.h
index 6b6718e142..1cc22eaa59 100644
--- a/source/module_hamilt_lcao/module_gint/gint_force.h
+++ b/source/module_hamilt_lcao/module_gint/gint_force.h
@@ -92,6 +92,8 @@ void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
                           const int nczp,
                           double dr,
                           double* rcut,
+                          int isforce,
+                          int isstress,
                           const Grid_Technique& gridt,
                           const UnitCell& ucell);
 
@@ -208,7 +210,7 @@ void cal_init(ForceStressIat& f_s_iat,
                         const int cuda_block,
                         const int atom_num_grid,
                         const int max_size,
-                        const ForceStressIatGlobal& f_s_iatg);
+                        ForceStressIatGlobal& f_s_iatg);
 /**
  * @brief GridParameter memCpy,from Host to Device
  *
diff --git a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
index 30ea8fa6ab..2d4186a8c3 100644
--- a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
+++ b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
@@ -63,6 +63,8 @@ void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
                           const int nczp,
                           double dr,
                           double* rcut,
+                          int isforce,
+                          int isstress,
                           const Grid_Technique& gridt,
                           const UnitCell& ucell)
 {
@@ -110,7 +112,7 @@ void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
             dim3 block_dot_force(cuda_threads);
             dim3 grid_dot(cuda_block);
             dim3 block_dot(cuda_threads);
-
+            
             para_init(para, iter_num, nbz, gridt);
             cal_init(f_s_iat,
                                para.stream_num,
@@ -118,8 +120,6 @@ void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
                                atom_num_grid,
                                max_size,
                                f_s_iat_dev);
-            checkCuda(cudaStreamSynchronize(gridt.streams[para.stream_num]));
-
             /*gpu task compute in CPU */
             gpu_task_generator_force(gridt,
                                      ucell,
@@ -195,7 +195,7 @@ void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
                                      gridt.streams[para.stream_num],
                                      nullptr);
 
-            checkCuda(cudaStreamSynchronize(gridt.streams[para.stream_num]));
+            
             /* force compute in GPU */
             dot_product_force<<<grid_dot_force,
                                 block_dot_force,
@@ -210,8 +210,8 @@ void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
                 nwmax,
                 max_size,
                 gridt.psir_size / nwmax);
-            /* force compute in CPU*/
-            cal_force_add(f_s_iat, force, atom_num_grid);
+            // /* force compute in CPU*/
+            
 
             /*stress compute in GPU*/
             dot_product_stress<<<grid_dot,
@@ -229,12 +229,11 @@ void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
                 gridt.psir_size);
             /* stress compute in CPU*/
             cal_stress_add(f_s_iat, stress, cuda_block);
+            cal_force_add(f_s_iat, force, atom_num_grid);
+            
             iter_num++;
         }
     }
-    // cudaFree(f_s_iat.stress_device);
-    // cudaFree(f_s_iat.force_device);
-    // cudaFree(f_s_iat.iat_device);
     delete[] f_s_iat.stress_host;
     delete[] f_s_iat.force_host;
     delete[] f_s_iat.iat_host;
diff --git a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
index bbc1d7dcb8..3153b7efd8 100644
--- a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
+++ b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
@@ -393,7 +393,7 @@ void cal_init(ForceStressIat& f_s_iat,
                         const int cuda_block,
                         const int atom_num_grid,
                         const int max_size,
-                        const ForceStressIatGlobal& f_s_iat_dev)
+                        ForceStressIatGlobal& f_s_iat_dev)
 {
     const int iat_min = -max_size - 1;
     f_s_iat.stress_host = new double[6 * cuda_block];

From 01bde3e3baf35e51e8b80a227ab16badc6311b18 Mon Sep 17 00:00:00 2001
From: A-006 <3158793232@qq.com>
Date: Tue, 21 May 2024 20:59:40 +0800
Subject: [PATCH 10/23] fix num stream in input.md and use num_stream in input

---
 docs/advanced/input_files/input-main.md                |  3 +--
 source/module_esolver/esolver_ks_lcao_elec.cpp         |  3 ++-
 .../module_hamilt_lcao/module_gint/gint_force_gpu.cu   | 10 +++++++++-
 .../module_hamilt_lcao/module_gint/grid_technique.cpp  |  9 ++++++---
 source/module_hamilt_lcao/module_gint/grid_technique.h |  9 +++++----
 5 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md
index d1c996127d..8ae8fcafba 100644
--- a/docs/advanced/input_files/input-main.md
+++ b/docs/advanced/input_files/input-main.md
@@ -61,7 +61,7 @@
     - [search\_radius](#search_radius)
     - [search\_pbc](#search_pbc)
     - [bx, by, bz](#bx-by-bz)
-    - [num\_stream] (#num_stream)
+    - [num\_stream](#num_stream)
   - [Electronic structure](#electronic-structure)
     - [basis\_type](#basis_type)
     - [ks\_solver](#ks_solver)
@@ -912,7 +912,6 @@ These variables are used to control the numerical atomic orbitals related parame
 - **Description**: choose the number of streams in GPU when we compute the `LCAO`. According to different devices , we may have different effects.For most devices,the stream is
 enough when the number is bigger then 2.
 - **Default** : "4" 
-[back to top](#full-list-of-input-keywords)
 
 ## Electronic structure
 
diff --git a/source/module_esolver/esolver_ks_lcao_elec.cpp b/source/module_esolver/esolver_ks_lcao_elec.cpp
index b3d8e975b5..263520e5f1 100644
--- a/source/module_esolver/esolver_ks_lcao_elec.cpp
+++ b/source/module_esolver/esolver_ks_lcao_elec.cpp
@@ -66,7 +66,8 @@ void ESolver_KS_LCAO<TK, TR>::set_matrix_grid(Record_adj& ra)
                              this->pw_rho->nplane,
                              this->pw_rho->startz_current,
                              GlobalC::ucell,
-                             GlobalC::ORB);
+                             GlobalC::ORB,
+                             GlobalV::NUM_STREAM);
 
     // (2)For each atom, calculate the adjacent atoms in different cells
     // and allocate the space for H(R) and S(R).
diff --git a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
index 2d4186a8c3..c9a6905595 100644
--- a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
+++ b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
@@ -150,6 +150,7 @@ void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
                                  para.stream_num);
             checkCuda(cudaStreamSynchronize(gridt.streams[para.stream_num]));
             /* cuda stream compute and Multiplication of multinomial matrices */
+            
             get_psi_force<<<grid_psi,
                             block_psi,
                             0,
@@ -197,6 +198,7 @@ void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
 
             
             /* force compute in GPU */
+            if (isforce){
             dot_product_force<<<grid_dot_force,
                                 block_dot_force,
                                 0,
@@ -210,10 +212,12 @@ void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
                 nwmax,
                 max_size,
                 gridt.psir_size / nwmax);
+            }
             // /* force compute in CPU*/
             
 
             /*stress compute in GPU*/
+            if (isstress){
             dot_product_stress<<<grid_dot,
                                  block_dot,
                                  0,
@@ -227,10 +231,14 @@ void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
                 para.psir_dm_device,
                 f_s_iat.stress_device,
                 gridt.psir_size);
+            }
             /* stress compute in CPU*/
+            if (isstress){
             cal_stress_add(f_s_iat, stress, cuda_block);
+            }
+            if (isforce){
             cal_force_add(f_s_iat, force, atom_num_grid);
-            
+            }
             iter_num++;
         }
     }
diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.cpp b/source/module_hamilt_lcao/module_gint/grid_technique.cpp
index fdcfdc32eb..2eb74becc1 100644
--- a/source/module_hamilt_lcao/module_gint/grid_technique.cpp
+++ b/source/module_hamilt_lcao/module_gint/grid_technique.cpp
@@ -127,7 +127,8 @@ void Grid_Technique::set_pbc_grid(const int& ncx_in,
                                   const int& nplane,
                                   const int& startz_current,
                                   const UnitCell& ucell,
-                                  const LCAO_Orbitals& orb)
+                                  const LCAO_Orbitals& orb,
+                                  const int num_stream)
 {
     ModuleBase::TITLE("Grid_Technique", "init");
     ModuleBase::timer::tick("Grid_Technique", "init");
@@ -184,7 +185,7 @@ void Grid_Technique::set_pbc_grid(const int& ncx_in,
 #if ((defined __CUDA) /* || (defined __ROCM) */)
     if(GlobalV::device_flag == "gpu")
     {
-        this->init_gpu_gint_variables(ucell,orb);
+        this->init_gpu_gint_variables(ucell,orb,num_stream);
     }
 #endif
 
@@ -638,12 +639,14 @@ void Grid_Technique::cal_trace_lo(const UnitCell& ucell)
 
 #if ((defined __CUDA) /* || (defined __ROCM) */)
 
-void Grid_Technique::init_gpu_gint_variables(const UnitCell& ucell,const LCAO_Orbitals &orb)
+void Grid_Technique::init_gpu_gint_variables(const UnitCell& ucell,const LCAO_Orbitals &orb,const int num_stream)
 {
     if (is_malloced)
     {
         free_gpu_gint_variables(this->nat);
     }
+    nstreams = num_stream;
+    streams=new cudaStream_t[nstreams];
     double ylmcoef[100];
     ModuleBase::GlobalFunc::ZEROS(ylmcoef, 100);
     for (int i = 0; i < 100; i++)
diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.h b/source/module_hamilt_lcao/module_gint/grid_technique.h
index 03cca8e924..a40ad212de 100644
--- a/source/module_hamilt_lcao/module_gint/grid_technique.h
+++ b/source/module_hamilt_lcao/module_gint/grid_technique.h
@@ -102,7 +102,8 @@ class Grid_Technique : public Grid_MeshBall
                       const int& nplane,
                       const int& startz_current,
                       const UnitCell& ucell,
-                      const LCAO_Orbitals& orb);
+                      const LCAO_Orbitals& orb,
+                      const int num_stream);
 
     /// number of elements(basis-pairs) in this processon
     /// on all adjacent atoms-pairs(Grid division)
@@ -162,8 +163,8 @@ class Grid_Technique : public Grid_MeshBall
     int atom_pair_mesh;
     int atom_pair_nbz;
 
-    const int nstreams = 4;
-    cudaStream_t streams[4];
+    int nstreams ;
+    cudaStream_t* streams;
     // streams[nstreams]
     // TODO it needs to be implemented through configuration files
 
@@ -229,7 +230,7 @@ class Grid_Technique : public Grid_MeshBall
     matrix_multiple_func_type fastest_matrix_mul;
 
   private:
-    void init_gpu_gint_variables(const UnitCell& ucell,const LCAO_Orbitals &orb);
+    void init_gpu_gint_variables(const UnitCell& ucell,const LCAO_Orbitals &orb,const int num_stream);
     void free_gpu_gint_variables(int nat);
 
 #endif

From 6ebb7a372035fbff096bf20171c1857fd5eda4a9 Mon Sep 17 00:00:00 2001
From: A-006 <3158793232@qq.com>
Date: Wed, 22 May 2024 11:46:03 +0800
Subject: [PATCH 11/23] fix error in compute force

---
 .../module_hamilt_lcao/module_gint/gint.cpp   | 20 +++++++------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/source/module_hamilt_lcao/module_gint/gint.cpp b/source/module_hamilt_lcao/module_gint/gint.cpp
index a25f1deb11..c15b8ba993 100644
--- a/source/module_hamilt_lcao/module_gint/gint.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint.cpp
@@ -171,19 +171,13 @@ void Gint::cal_gint(Gint_inout* inout)
                         delete[] force;
                     }
                     if (inout->isstress){
-                        for (int iat = 0; iat < nat; iat++)
-                        {
-                            inout->fvl_dphi[0](iat, 0) += force[iat * 3];
-                            inout->fvl_dphi[0](iat, 1) += force[iat * 3 + 1];
-                            inout->fvl_dphi[0](iat, 2) += force[iat * 3 + 2];
-                        }
-                            inout->svl_dphi[0](0, 0) += stress[0];
-                            inout->svl_dphi[0](0, 1) += stress[1];
-                            inout->svl_dphi[0](0, 2) += stress[2];
-                            inout->svl_dphi[0](1, 1) += stress[3];
-                            inout->svl_dphi[0](1, 2) += stress[4];
-                            inout->svl_dphi[0](2, 2) += stress[5];
-                            delete[] stress;
+                        inout->svl_dphi[0](0, 0) += stress[0];
+                        inout->svl_dphi[0](0, 1) += stress[1];
+                        inout->svl_dphi[0](0, 2) += stress[2];
+                        inout->svl_dphi[0](1, 1) += stress[3];
+                        inout->svl_dphi[0](1, 2) += stress[4];
+                        inout->svl_dphi[0](2, 2) += stress[5];
+                        delete[] stress;
                     }
                     }
                     

From fbd2f7b60080c5dd8bfd48bbc5598efbdb77fee0 Mon Sep 17 00:00:00 2001
From: A-006 <3158793232@qq.com>
Date: Thu, 23 May 2024 20:25:30 +0800
Subject: [PATCH 12/23] fix memory error in force compute

---
 source/module_hamilt_lcao/module_gint/gint_force_gpu.cu | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
index c9a6905595..309515955c 100644
--- a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
+++ b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
@@ -240,11 +240,12 @@ void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
             cal_force_add(f_s_iat, force, atom_num_grid);
             }
             iter_num++;
+            delete[] f_s_iat.stress_host;
+            delete[] f_s_iat.force_host;
+            delete[] f_s_iat.iat_host;
         }
     }
-    delete[] f_s_iat.stress_host;
-    delete[] f_s_iat.force_host;
-    delete[] f_s_iat.iat_host;
+    delete[] denstiy_mat.density_mat_h;
     /*free variables in CPU host*/
     for (int i = 0; i < gridt.nstreams; i++)
     {

From c0e99902a5e731f18369a7413e5b30c43950ce09 Mon Sep 17 00:00:00 2001
From: A-006 <3158793232@qq.com>
Date: Fri, 24 May 2024 10:25:16 +0800
Subject: [PATCH 13/23] use std instead of double * and add const

---
 source/module_hamilt_lcao/module_gint/gint.cpp | 18 ++++++------------
 .../module_gint/gint_force.h                   | 12 ++++++------
 .../module_gint/gint_force_gpu.cu              |  8 ++++----
 .../module_gint/kernels/cuda/gint_force.cu     |  4 ++--
 4 files changed, 18 insertions(+), 24 deletions(-)

diff --git a/source/module_hamilt_lcao/module_gint/gint.cpp b/source/module_hamilt_lcao/module_gint/gint.cpp
index c15b8ba993..aa3ce5e6fb 100644
--- a/source/module_hamilt_lcao/module_gint/gint.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint.cpp
@@ -136,16 +136,8 @@ void Gint::cal_gint(Gint_inout* inout)
                 // for (int is = 0; is < GlobalV::NSPIN; ++is)
                 // {
                     if (isforce || isstress){
-                        double *force = new double[ucell.nat * 3];
-                            for (int i = 0; i < nat * 3; i++)
-                            {
-                                force[i] = 0.0;
-                            }
-                        double *stress = new double[6];
-                        for (int i = 0; i < 6; i++)
-                        {
-                            stress[i] = 0.0;
-                        }
+                        std::vector<double> force(nat * 3, 0.0);
+                        std::vector<double> stress(6, 0.0);
                         GintKernel::gint_gamma_force_gpu(this->DMRGint[inout->ispin],
                                                         ucell.omega
                                                             / this->ncxyz,
@@ -168,7 +160,7 @@ void Gint::cal_gint(Gint_inout* inout)
                             inout->fvl_dphi[0](iat, 1) += force[iat * 3 + 1];
                             inout->fvl_dphi[0](iat, 2) += force[iat * 3 + 2];
                         }
-                        delete[] force;
+                        
                     }
                     if (inout->isstress){
                         inout->svl_dphi[0](0, 0) += stress[0];
@@ -177,8 +169,10 @@ void Gint::cal_gint(Gint_inout* inout)
                         inout->svl_dphi[0](1, 1) += stress[3];
                         inout->svl_dphi[0](1, 2) += stress[4];
                         inout->svl_dphi[0](2, 2) += stress[5];
-                        delete[] stress;
+                        
                     }
+                    force.clear();
+                    stress.clear();
                     }
                     
                 // }
diff --git a/source/module_hamilt_lcao/module_gint/gint_force.h b/source/module_hamilt_lcao/module_gint/gint_force.h
index 1cc22eaa59..b968ccbfa2 100644
--- a/source/module_hamilt_lcao/module_gint/gint_force.h
+++ b/source/module_hamilt_lcao/module_gint/gint_force.h
@@ -87,13 +87,13 @@ typedef struct
 void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
                           const double vfactor,
                           const double* vlocal,
-                          double* force,
-                          double* stress,
+                          std::vector<double> force,
+                          std::vector<double> stress,
                           const int nczp,
                           double dr,
                           double* rcut,
-                          int isforce,
-                          int isstress,
+                          const int isforce,
+                          const int isstress,
                           const Grid_Technique& gridt,
                           const UnitCell& ucell);
 
@@ -249,7 +249,7 @@ void cal_mem_cpy(ForceStressIat& f_s_iat,
  * @param atom_num_grid in force calculate,used for Block nums
  */
 void cal_force_add(ForceStressIat& f_s_iat,
-                    double* force,
+                    std::vector<double> force,
                     const int atom_num_grid);
 /**
  * @brief Stress Calculate on Host
@@ -260,7 +260,7 @@ void cal_force_add(ForceStressIat& f_s_iat,
  * @param cuda_block in stress compute,used for Block nums
  */
 void cal_stress_add(ForceStressIat& f_s_iat,
-                     double* stress,
+                     std::vector<double> stress,
                      const int cuda_block);
 } // namespace GintKernel
 #endif
diff --git a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
index 309515955c..ba87f02a11 100644
--- a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
+++ b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
@@ -58,13 +58,13 @@ namespace GintKernel
 void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
                           const double vfactor,
                           const double* vlocal,
-                          double* force,
-                          double* stress,
+                          std::vector<double> force,
+                          std::vector<double> stress,
                           const int nczp,
                           double dr,
                           double* rcut,
-                          int isforce,
-                          int isstress,
+                          const int isforce,
+                          const int isstress,
                           const Grid_Technique& gridt,
                           const UnitCell& ucell)
 {
diff --git a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
index 3153b7efd8..60a9f06f95 100644
--- a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
+++ b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
@@ -572,7 +572,7 @@ void cal_mem_cpy(ForceStressIat& f_s_iat,
  * @param atom_num_grid in force calculate,used for Block nums
  */
 void cal_force_add(ForceStressIat& f_s_iat,
-                     double* force,
+                     std::vector<double> force,
                     const int atom_num_grid)
 {
     checkCuda(cudaMemcpy(f_s_iat.force_host,
@@ -601,7 +601,7 @@ void cal_force_add(ForceStressIat& f_s_iat,
  * @param cuda_block in stress compute,used for Block nums
  */
 void cal_stress_add(ForceStressIat& f_s_iat,
-                     double* stress,
+                     std::vector<double> stress,
                      const int cuda_block)
 {
     checkCuda(cudaMemcpy(f_s_iat.stress_host,

From 20f60abe9dc03e226d05197745c3fbb7a75ea42f Mon Sep 17 00:00:00 2001
From: A-006 <3158793232@qq.com>
Date: Fri, 24 May 2024 10:53:43 +0800
Subject: [PATCH 14/23] fix error in vector use

---
 source/module_hamilt_lcao/module_gint/gint.cpp      |  1 -
 source/module_hamilt_lcao/module_gint/gint_force.h  |  8 ++++----
 .../module_gint/gint_force_gpu.cu                   | 13 ++++---------
 3 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/source/module_hamilt_lcao/module_gint/gint.cpp b/source/module_hamilt_lcao/module_gint/gint.cpp
index aa3ce5e6fb..5c7bccb559 100644
--- a/source/module_hamilt_lcao/module_gint/gint.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint.cpp
@@ -160,7 +160,6 @@ void Gint::cal_gint(Gint_inout* inout)
                             inout->fvl_dphi[0](iat, 1) += force[iat * 3 + 1];
                             inout->fvl_dphi[0](iat, 2) += force[iat * 3 + 2];
                         }
-                        
                     }
                     if (inout->isstress){
                         inout->svl_dphi[0](0, 0) += stress[0];
diff --git a/source/module_hamilt_lcao/module_gint/gint_force.h b/source/module_hamilt_lcao/module_gint/gint_force.h
index b968ccbfa2..4f59d77520 100644
--- a/source/module_hamilt_lcao/module_gint/gint_force.h
+++ b/source/module_hamilt_lcao/module_gint/gint_force.h
@@ -87,8 +87,8 @@ typedef struct
 void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
                           const double vfactor,
                           const double* vlocal,
-                          std::vector<double> force,
-                          std::vector<double> stress,
+                          std::vector<double>& force,
+                          std::vector<double>& stress,
                           const int nczp,
                           double dr,
                           double* rcut,
@@ -249,7 +249,7 @@ void cal_mem_cpy(ForceStressIat& f_s_iat,
  * @param atom_num_grid in force calculate,used for Block nums
  */
 void cal_force_add(ForceStressIat& f_s_iat,
-                    std::vector<double> force,
+                    std::vector<double>& force,
                     const int atom_num_grid);
 /**
  * @brief Stress Calculate on Host
@@ -260,7 +260,7 @@ void cal_force_add(ForceStressIat& f_s_iat,
  * @param cuda_block in stress compute,used for Block nums
  */
 void cal_stress_add(ForceStressIat& f_s_iat,
-                     std::vector<double> stress,
+                     std::vector<double>& stress,
                      const int cuda_block);
 } // namespace GintKernel
 #endif
diff --git a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
index ba87f02a11..a9c6d86632 100644
--- a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
+++ b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
@@ -58,8 +58,8 @@ namespace GintKernel
 void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
                           const double vfactor,
                           const double* vlocal,
-                          std::vector<double> force,
-                          std::vector<double> stress,
+                          std::vector<double>& force,
+                          std::vector<double>& stress,
                           const int nczp,
                           double dr,
                           double* rcut,
@@ -195,8 +195,6 @@ void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
                                      atom_pair_num,
                                      gridt.streams[para.stream_num],
                                      nullptr);
-
-            
             /* force compute in GPU */
             if (isforce){
             dot_product_force<<<grid_dot_force,
@@ -213,9 +211,6 @@ void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
                 max_size,
                 gridt.psir_size / nwmax);
             }
-            // /* force compute in CPU*/
-            
-
             /*stress compute in GPU*/
             if (isstress){
             dot_product_stress<<<grid_dot,
@@ -234,10 +229,10 @@ void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
             }
             /* stress compute in CPU*/
             if (isstress){
-            cal_stress_add(f_s_iat, stress, cuda_block);
+                cal_stress_add(f_s_iat, stress, cuda_block);
             }
             if (isforce){
-            cal_force_add(f_s_iat, force, atom_num_grid);
+                cal_force_add(f_s_iat, force, atom_num_grid);
             }
             iter_num++;
             delete[] f_s_iat.stress_host;

From 8626c7a6ee236a3e77d0f80a8886c184027fd11c Mon Sep 17 00:00:00 2001
From: A-006 <3158793232@qq.com>
Date: Fri, 24 May 2024 11:20:41 +0800
Subject: [PATCH 15/23] fix error in compile

---
 .../module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
index 60a9f06f95..ffc9ab3e97 100644
--- a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
+++ b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
@@ -572,7 +572,7 @@ void cal_mem_cpy(ForceStressIat& f_s_iat,
  * @param atom_num_grid in force calculate,used for Block nums
  */
 void cal_force_add(ForceStressIat& f_s_iat,
-                     std::vector<double> force,
+                     std::vector<double& force,
                     const int atom_num_grid)
 {
     checkCuda(cudaMemcpy(f_s_iat.force_host,

From 5a81037f41b66d07c4a1ef10d66ecec53c62a126 Mon Sep 17 00:00:00 2001
From: A-006 <3158793232@qq.com>
Date: Fri, 24 May 2024 11:45:46 +0800
Subject: [PATCH 16/23] fix error in compile with force

---
 .../module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
index ffc9ab3e97..ad5cb2477a 100644
--- a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
+++ b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
@@ -572,7 +572,7 @@ void cal_mem_cpy(ForceStressIat& f_s_iat,
  * @param atom_num_grid in force calculate,used for Block nums
  */
 void cal_force_add(ForceStressIat& f_s_iat,
-                     std::vector<double& force,
+                     std::vector<double>& force,
                     const int atom_num_grid)
 {
     checkCuda(cudaMemcpy(f_s_iat.force_host,

From cfae1f62aad719bd6e09e08f6e5000507319e57e Mon Sep 17 00:00:00 2001
From: A-006 <3158793232@qq.com>
Date: Fri, 24 May 2024 12:52:19 +0800
Subject: [PATCH 17/23] fix compile error

---
 .../module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
index ad5cb2477a..f93db3d417 100644
--- a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
+++ b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
@@ -601,7 +601,7 @@ void cal_force_add(ForceStressIat& f_s_iat,
  * @param cuda_block in stress compute,used for Block nums
  */
 void cal_stress_add(ForceStressIat& f_s_iat,
-                     std::vector<double> stress,
+                     std::vector<double>& stress,
                      const int cuda_block)
 {
     checkCuda(cudaMemcpy(f_s_iat.stress_host,

From 892918061e4609bf0482f863d45c53c7d69c5cbd Mon Sep 17 00:00:00 2001
From: A-006 <3158793232@qq.com>
Date: Sun, 26 May 2024 16:35:02 +0800
Subject: [PATCH 18/23] fix paramter name and function name

---
 docs/advanced/input_files/input-main.md       |  2 +
 .../module_hamilt_lcao/module_gint/gint.cpp   |  2 +-
 .../module_gint/gint_force.h                  | 40 +++++++++----------
 .../module_gint/gint_force_gpu.cu             | 12 +++---
 .../module_gint/gtask_force.cpp               |  2 +-
 .../module_gint/kernels/cuda/gint_force.cu    | 28 ++++++-------
 6 files changed, 44 insertions(+), 42 deletions(-)

diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md
index b22e35e881..970dd4fd72 100644
--- a/docs/advanced/input_files/input-main.md
+++ b/docs/advanced/input_files/input-main.md
@@ -916,6 +916,8 @@ These variables are used to control the numerical atomic orbitals related parame
 enough when the number is bigger then 2.
 - **Default** : "4" 
 
+[back to top](#full-list-of-input-keywords)
+
 ## Electronic structure
 
 These variables are used to control the electronic structure and geometry relaxation
diff --git a/source/module_hamilt_lcao/module_gint/gint.cpp b/source/module_hamilt_lcao/module_gint/gint.cpp
index 5c7bccb559..6a1ad203ee 100644
--- a/source/module_hamilt_lcao/module_gint/gint.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint.cpp
@@ -138,7 +138,7 @@ void Gint::cal_gint(Gint_inout* inout)
                     if (isforce || isstress){
                         std::vector<double> force(nat * 3, 0.0);
                         std::vector<double> stress(6, 0.0);
-                        GintKernel::gint_gamma_force_gpu(this->DMRGint[inout->ispin],
+                        GintKernel::gint_fvl_gamma_gpu(this->DMRGint[inout->ispin],
                                                         ucell.omega
                                                             / this->ncxyz,
                                                         inout->vl,
diff --git a/source/module_hamilt_lcao/module_gint/gint_force.h b/source/module_hamilt_lcao/module_gint/gint_force.h
index 4f59d77520..51851082de 100644
--- a/source/module_hamilt_lcao/module_gint/gint_force.h
+++ b/source/module_hamilt_lcao/module_gint/gint_force.h
@@ -44,7 +44,7 @@ typedef struct
     double** matrix_A_device;
     double** matrix_B_device;
     double** matrix_C_device;
-} SGridParameter;
+} grid_para;
 
 typedef struct
 {
@@ -55,14 +55,14 @@ typedef struct
     int* iat_device;
     int* iat_host;
 
-} ForceStressIat;
+} frc_strs_iat;
 
 typedef struct
 {
     double* stress_global;
     double* force_global;
     int* iat_global;
-} ForceStressIatGlobal;
+} frc_strs_iat_gbl;
 
 typedef struct
 {
@@ -84,7 +84,7 @@ typedef struct
  * @param ylmcoef_now Coefficients for spherical harmonics.
  * @param gridt Reference to Grid_Technique object.
  */
-void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
+void gint_fvl_gamma_gpu(hamilt::HContainer<double>* dm,
                           const double vfactor,
                           const double* vlocal,
                           std::vector<double>& force,
@@ -138,7 +138,7 @@ void gpu_task_generator_force(const Grid_Technique& gridt,
                               int& max_m,
                               int& max_n,
                               int& atom_pair_num,
-                              SGridParameter& para);
+                              grid_para& para);
 /**
  * @brief Density Matrix,force Stress Iat Init
  *
@@ -146,7 +146,7 @@ void gpu_task_generator_force(const Grid_Technique& gridt,
  *
  * @param denstiy_mat DensityMat,contained the density_mat_dice and
  * destiyMatHost
- * @param f_s_iat_dev ForceStressIatGlobal,contined the Force Stress and
+ * @param f_s_iat_dev frc_strs_iat_gbl,contined the Force Stress and
  * Iat Number
  * @param dm hamilt::HContainer,denstiy stored in the Hcontainer
  * @param gridt Grid_Technique,stored the major method in the the gint.
@@ -156,7 +156,7 @@ void gpu_task_generator_force(const Grid_Technique& gridt,
  * @param atom_num_grid in force calculate,used for Block nums
  */
 void calculateInit(DensityMat& denstiy_mat,
-                   ForceStressIatGlobal& f_s_iat_dev,
+                   frc_strs_iat_gbl& f_s_iat_dev,
                    hamilt::HContainer<double>* dm,
                    const Grid_Technique& gridt,
                    const UnitCell& ucell,
@@ -189,28 +189,28 @@ void allocateDm(double* matrix_host,
  * @param nbz int,stand for the number of Z-axis
  * @param gridt Grid_Technique,stored the major method in the the gint.
  */
-void para_init(SGridParameter& para,
+void para_init(grid_para& para,
                        const int iter_num,
                        const int nbz,
                        const Grid_Technique& gridt);
 /**
- * @brief ForceStressIat on host and device Init
+ * @brief frc_strs_iat on host and device Init
  *
  * GridParameter init
  *
- * @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Host
+ * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Host
  * @param stream_num int , record the stream in GPU
  * @param cuda_block in stress compute,used for Block nums
  * @param atom_num_grid in force calculate,used for Block nums
  * @param max_size Maximum size of atoms on a grid.
- * @param ForceStressIatGlobal ForceStressIatGlobal,contains the Force Stree Iat on Host
+ * @param frc_strs_iat_gbl frc_strs_iat_gbl,contains the Force Stree Iat on Host
  */
-void cal_init(ForceStressIat& f_s_iat,
+void cal_init(frc_strs_iat& f_s_iat,
                         const int stream_num,
                         const int cuda_block,
                         const int atom_num_grid,
                         const int max_size,
-                        ForceStressIatGlobal& f_s_iatg);
+                        frc_strs_iat_gbl& f_s_iatg);
 /**
  * @brief GridParameter memCpy,from Host to Device
  *
@@ -221,21 +221,21 @@ void cal_init(ForceStressIat& f_s_iat,
  * @param nbz int,stand for the number of Z-axis
  * @param atom_num_grid in force calculate,used for Block nums
  */
-void para_mem_copy(SGridParameter& para,
+void para_mem_copy(grid_para& para,
                          const Grid_Technique& gridt,
                          const int nbz,
                          const int atom_num_grid);
 /**
  * @brief Force Stress Force Iat memCpy,from Host to Device
  *
- *  @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Device
+ *  @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device
  * and Host
  *  @param gridt Grid_Technique,stored the major method in the the gint.
  *  @param atom_num_grid in force calculate,used for Block nums
  *  @param cuda_block in stress compute,used for Block nums
  *  @param stream_num int , record the stream in GPU
  */
-void cal_mem_cpy(ForceStressIat& f_s_iat,
+void cal_mem_cpy(frc_strs_iat& f_s_iat,
                           const Grid_Technique& gridt,
                           const int atom_num_grid,
                           const int cuda_block,
@@ -243,23 +243,23 @@ void cal_mem_cpy(ForceStressIat& f_s_iat,
 /**
  * @brief Force Calculate on Host
  *
- * @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Device
+ * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device
  * and Host
  * @param force stored the force for each atom on each directions
  * @param atom_num_grid in force calculate,used for Block nums
  */
-void cal_force_add(ForceStressIat& f_s_iat,
+void cal_force_add(frc_strs_iat& f_s_iat,
                     std::vector<double>& force,
                     const int atom_num_grid);
 /**
  * @brief Stress Calculate on Host
  *
- * @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Device
+ * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device
  * and Host
  * @param stress stored the stress for each directions
  * @param cuda_block in stress compute,used for Block nums
  */
-void cal_stress_add(ForceStressIat& f_s_iat,
+void cal_stress_add(frc_strs_iat& f_s_iat,
                      std::vector<double>& stress,
                      const int cuda_block);
 } // namespace GintKernel
diff --git a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
index a9c6d86632..29e71d03b1 100644
--- a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
+++ b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
@@ -14,7 +14,7 @@ namespace GintKernel
 
 // Function to calculate forces using GPU-accelerated gamma point Gint
 /**
- * @brief Calculate forces and stresses for the `gint_gamma_force_gpu` function.
+ * @brief Calculate forces and stresses for the `gint_fvl_gamma_gpu` function.
  *
  * This function calculates forces and stresses based on given parameters.
  *
@@ -30,7 +30,7 @@ namespace GintKernel
  */
 /**
  * Function to calculate forces using GPU-accelerated gamma point Gint
- * @brief Calculate forces and stresses for the `gint_gamma_force_gpu` function.
+ * @brief Calculate forces and stresses for the `gint_fvl_gamma_gpu` function.
  *
  * This function calculates forces and stresses based on given parameters.
  *
@@ -55,7 +55,7 @@ namespace GintKernel
  * 6. force dot on the GPU.
  * 7. Copy the results back to the host.
  */
-void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
+void gint_fvl_gamma_gpu(hamilt::HContainer<double>* dm,
                           const double vfactor,
                           const double* vlocal,
                           std::vector<double>& force,
@@ -79,9 +79,9 @@ void gint_gamma_force_gpu(hamilt::HContainer<double>* dm,
         = std::min(64, (gridt.psir_size + cuda_threads - 1) / cuda_threads);
     int iter_num = 0;
     DensityMat denstiy_mat;
-    ForceStressIatGlobal f_s_iat_dev;
-    SGridParameter para;
-    ForceStressIat f_s_iat;
+    frc_strs_iat_gbl f_s_iat_dev;
+    grid_para para;
+    frc_strs_iat f_s_iat;
 
     calculateInit(denstiy_mat,
                   f_s_iat_dev,
diff --git a/source/module_hamilt_lcao/module_gint/gtask_force.cpp b/source/module_hamilt_lcao/module_gint/gtask_force.cpp
index 7c30d3db83..e785b4192b 100644
--- a/source/module_hamilt_lcao/module_gint/gtask_force.cpp
+++ b/source/module_hamilt_lcao/module_gint/gtask_force.cpp
@@ -56,7 +56,7 @@ void gpu_task_generator_force(const Grid_Technique& gridt,
                               int& max_m,
                               int& max_n,
                               int& atom_pair_num,
-                              SGridParameter& para)
+                              grid_para& para)
 {
     const int grid_index_ij = i * gridt.nby * gridt.nbzp + j * gridt.nbzp;
     const int nwmax = ucell.nwmax;
diff --git a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
index f93db3d417..ae89dd2f2c 100644
--- a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
+++ b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
@@ -253,7 +253,7 @@ __global__ void dot_product_force(double* psir_lx,
     }
 }
 void calculateInit(DensityMat& denstiy_mat,
-                   ForceStressIatGlobal& f_s_iat_dev,
+                   frc_strs_iat_gbl& f_s_iat_dev,
                    hamilt::HContainer<double>* dm,
                    const Grid_Technique& gridt,
                    const UnitCell& ucell,
@@ -300,7 +300,7 @@ void calculateInit(DensityMat& denstiy_mat,
  * @param nbz int,stand for the number of Z-axis
  * @param gridt Grid_Technique,stored the major method in the the gint.
  */
-void para_init(SGridParameter& para,
+void para_init(grid_para& para,
                        int iter_num,
                        int nbz,
                        const Grid_Technique& gridt)
@@ -377,23 +377,23 @@ void para_init(SGridParameter& para,
         = &gridt.ap_output_gbl_g[gridt.atom_pair_nbz * para.stream_num];
 }
 /**
- * @brief ForceStressIat on host and device Init
+ * @brief frc_strs_iat on host and device Init
  *
  * GridParameter init
  *
- * @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Host
+ * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Host
  * @param stream_num int , record the stream in GPU
  * @param cuda_block in stress compute,used for Block nums
  * @param atom_num_grid in force calculate,used for Block nums
  * @param max_size Maximum size of atoms on a grid.
- * @param ForceStressIatGlobal ForceStressIatGlobal,contains the Force Stree Iat on Host
+ * @param frc_strs_iat_gbl frc_strs_iat_gbl,contains the Force Stree Iat on Host
  */
-void cal_init(ForceStressIat& f_s_iat,
+void cal_init(frc_strs_iat& f_s_iat,
                         const int stream_num,
                         const int cuda_block,
                         const int atom_num_grid,
                         const int max_size,
-                        ForceStressIatGlobal& f_s_iat_dev)
+                        frc_strs_iat_gbl& f_s_iat_dev)
 {
     const int iat_min = -max_size - 1;
     f_s_iat.stress_host = new double[6 * cuda_block];
@@ -423,7 +423,7 @@ void cal_init(ForceStressIat& f_s_iat,
  * @param nbz int,stand for the number of Z-axis
  * @param atom_num_grid in force calculate,used for Block nums
  */
-void para_mem_copy(SGridParameter& para,
+void para_mem_copy(grid_para& para,
                          const Grid_Technique& gridt,
                          const int nbz,
                          const int atom_num_grid)
@@ -536,14 +536,14 @@ void para_mem_copy(SGridParameter& para,
 /**
  * @brief Force Stress Force Iat memCpy,from Host to Device
  *
- *  @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Device
+ *  @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device
  * and Host
  *  @param gridt Grid_Technique,stored the major method in the the gint.
  *  @param atom_num_grid in force calculate,used for Block nums
  *  @param cuda_block in stress compute,used for Block nums
  *  @param stream_num int , record the stream in GPU
  */
-void cal_mem_cpy(ForceStressIat& f_s_iat,
+void cal_mem_cpy(frc_strs_iat& f_s_iat,
                           const Grid_Technique& gridt,
                           const int atom_num_grid,
                           const int cuda_block,
@@ -566,12 +566,12 @@ void cal_mem_cpy(ForceStressIat& f_s_iat,
 /*
  * @brief Force Calculate on Host
  *
- * @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Device
+ * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device
  * and Host
  * @param force stored the force for each atom on each directions
  * @param atom_num_grid in force calculate,used for Block nums
  */
-void cal_force_add(ForceStressIat& f_s_iat,
+void cal_force_add(frc_strs_iat& f_s_iat,
                      std::vector<double>& force,
                     const int atom_num_grid)
 {
@@ -595,12 +595,12 @@ void cal_force_add(ForceStressIat& f_s_iat,
 /**
  * @brief Stress Calculate on Host
  *
- * @param ForceStressIat ForceStressIat,contains the Force Stree Iat on Device
+ * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device
  * and Host
  * @param stress stored the stress for each directions
  * @param cuda_block in stress compute,used for Block nums
  */
-void cal_stress_add(ForceStressIat& f_s_iat,
+void cal_stress_add(frc_strs_iat& f_s_iat,
                      std::vector<double>& stress,
                      const int cuda_block)
 {

From 7056269aba6b136b2079592b95a16123f0278b8e Mon Sep 17 00:00:00 2001
From: A-006 <3158793232@qq.com>
Date: Sun, 26 May 2024 21:12:14 +0800
Subject: [PATCH 19/23] add time ticker and fix nspin transport

---
 .../module_hamilt_lcao/module_gint/gint.cpp   | 77 +++++++++----------
 .../module_gint/gint_force_gpu.cu             |  1 -
 2 files changed, 37 insertions(+), 41 deletions(-)

diff --git a/source/module_hamilt_lcao/module_gint/gint.cpp b/source/module_hamilt_lcao/module_gint/gint.cpp
index 6a1ad203ee..8c558232ae 100644
--- a/source/module_hamilt_lcao/module_gint/gint.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint.cpp
@@ -133,48 +133,45 @@ void Gint::cal_gint(Gint_inout* inout)
                 int nat = ucell.nat;
                 const int isforce = inout->isforce;
                 const int isstress =inout->isstress;
-                // for (int is = 0; is < GlobalV::NSPIN; ++is)
-                // {
-                    if (isforce || isstress){
-                        std::vector<double> force(nat * 3, 0.0);
-                        std::vector<double> stress(6, 0.0);
-                        GintKernel::gint_fvl_gamma_gpu(this->DMRGint[inout->ispin],
-                                                        ucell.omega
-                                                            / this->ncxyz,
-                                                        inout->vl,
-                                                        force,
-                                                        stress,
-                                                        this->nplane,
-                                                        dr,
-                                                        rcut,
-                                                        isforce,
-                                                        isstress,
-                                                        *this->gridt,
-                                                        ucell);
-                    
-                    if (inout->isforce)
+                ModuleBase::TITLE("Gint_interface","cal_force_gpu");
+                ModuleBase::timer::tick("Gint_interface","cal_force_gpu");
+                if (isforce || isstress){
+                    std::vector<double> force(nat * 3, 0.0);
+                    std::vector<double> stress(6, 0.0);
+                    GintKernel::gint_fvl_gamma_gpu(this->DMRGint[inout->ispin],
+                                                    ucell.omega
+                                                        / this->ncxyz,
+                                                    inout->vl,
+                                                    force,
+                                                    stress,
+                                                    this->nplane,
+                                                    dr,
+                                                    rcut,
+                                                    isforce,
+                                                    isstress,
+                                                    *this->gridt,
+                                                    ucell);
+                if (inout->isforce)
+                {
+                    for (int iat = 0; iat < nat; iat++)
                     {
-                        for (int iat = 0; iat < nat; iat++)
-                        {
-                            inout->fvl_dphi[0](iat, 0) += force[iat * 3];
-                            inout->fvl_dphi[0](iat, 1) += force[iat * 3 + 1];
-                            inout->fvl_dphi[0](iat, 2) += force[iat * 3 + 2];
-                        }
-                    }
-                    if (inout->isstress){
-                        inout->svl_dphi[0](0, 0) += stress[0];
-                        inout->svl_dphi[0](0, 1) += stress[1];
-                        inout->svl_dphi[0](0, 2) += stress[2];
-                        inout->svl_dphi[0](1, 1) += stress[3];
-                        inout->svl_dphi[0](1, 2) += stress[4];
-                        inout->svl_dphi[0](2, 2) += stress[5];
-                        
+                        inout->fvl_dphi[0](iat, 0) = force[iat * 3];
+                        inout->fvl_dphi[0](iat, 1) = force[iat * 3 + 1];
+                        inout->fvl_dphi[0](iat, 2) = force[iat * 3 + 2];
                     }
-                    force.clear();
-                    stress.clear();
-                    }
-                    
-                // }
+                }
+                if (inout->isstress){
+                    inout->svl_dphi[0](0, 0) = stress[0];
+                    inout->svl_dphi[0](0, 1) = stress[1];
+                    inout->svl_dphi[0](0, 2) = stress[2];
+                    inout->svl_dphi[0](1, 1) = stress[3];
+                    inout->svl_dphi[0](1, 2) = stress[4];
+                    inout->svl_dphi[0](2, 2) = stress[5];
+                }
+                force.clear();
+                stress.clear();
+                }
+                ModuleBase::timer::tick("Gint_interface","cal_force_gpu");   
             }
         }
         else
diff --git a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
index 29e71d03b1..38decf7de0 100644
--- a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
+++ b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
@@ -8,7 +8,6 @@
 #include "kernels/cuda/gint_force.cuh"
 #include "module_base/ylm.h"
 #include "module_hamilt_lcao/module_gint/gint_tools.h"
-
 namespace GintKernel
 {
 

From 7ccc51f1d2c15b7f8725f8fdcf7c3e44d389acad Mon Sep 17 00:00:00 2001
From: A-006 <3158793232@qq.com>
Date: Sun, 26 May 2024 21:13:02 +0800
Subject: [PATCH 20/23] delete printf in files

---
 source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu | 1 -
 1 file changed, 1 deletion(-)

diff --git a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
index ae89dd2f2c..a86c16cb5f 100644
--- a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
+++ b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
@@ -612,7 +612,6 @@ void cal_stress_add(frc_strs_iat& f_s_iat,
     {
         for (int index = 0; index < cuda_block; index++)
         {
-            // printf("the stress is %f\n",stress[i]);
             stress[i] += f_s_iat.stress_host[i * cuda_block + index];
         }
     }

From 60b1b4d39599b6de76cc0b51fac6a4bb640639b3 Mon Sep 17 00:00:00 2001
From: A-006 <3158793232@qq.com>
Date: Sun, 26 May 2024 21:50:06 +0800
Subject: [PATCH 21/23] fix test bug and fix grid_size

---
 source/module_hamilt_lcao/module_gint/gint.cpp | 18 +++++++++---------
 .../module_gint/gint_force_gpu.cu              |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/source/module_hamilt_lcao/module_gint/gint.cpp b/source/module_hamilt_lcao/module_gint/gint.cpp
index 8c558232ae..ae3205a187 100644
--- a/source/module_hamilt_lcao/module_gint/gint.cpp
+++ b/source/module_hamilt_lcao/module_gint/gint.cpp
@@ -155,18 +155,18 @@ void Gint::cal_gint(Gint_inout* inout)
                 {
                     for (int iat = 0; iat < nat; iat++)
                     {
-                        inout->fvl_dphi[0](iat, 0) = force[iat * 3];
-                        inout->fvl_dphi[0](iat, 1) = force[iat * 3 + 1];
-                        inout->fvl_dphi[0](iat, 2) = force[iat * 3 + 2];
+                        inout->fvl_dphi[0](iat, 0) += force[iat * 3];
+                        inout->fvl_dphi[0](iat, 1) += force[iat * 3 + 1];
+                        inout->fvl_dphi[0](iat, 2) += force[iat * 3 + 2];
                     }
                 }
                 if (inout->isstress){
-                    inout->svl_dphi[0](0, 0) = stress[0];
-                    inout->svl_dphi[0](0, 1) = stress[1];
-                    inout->svl_dphi[0](0, 2) = stress[2];
-                    inout->svl_dphi[0](1, 1) = stress[3];
-                    inout->svl_dphi[0](1, 2) = stress[4];
-                    inout->svl_dphi[0](2, 2) = stress[5];
+                    inout->svl_dphi[0](0, 0) += stress[0];
+                    inout->svl_dphi[0](0, 1) += stress[1];
+                    inout->svl_dphi[0](0, 2) += stress[2];
+                    inout->svl_dphi[0](1, 1) += stress[3];
+                    inout->svl_dphi[0](1, 2) += stress[4];
+                    inout->svl_dphi[0](2, 2) += stress[5];
                 }
                 force.clear();
                 stress.clear();
diff --git a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
index 38decf7de0..3718f48819 100644
--- a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
+++ b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
@@ -105,7 +105,7 @@ void gint_fvl_gamma_gpu(hamilt::HContainer<double>* dm,
             int max_m = 0;
             int max_n = 0;
             int atom_pair_num = 0;
-            dim3 grid_psi(nbz, 8);
+            dim3 grid_psi(nbz, 32);
             dim3 block_psi(64);
             dim3 grid_dot_force(cuda_block);
             dim3 block_dot_force(cuda_threads);

From 5359609d2e4d8733157b85934f4a3e82e555f3c0 Mon Sep 17 00:00:00 2001
From: A-006 <3158793232@qq.com>
Date: Mon, 27 May 2024 09:41:27 +0800
Subject: [PATCH 22/23] init nstreams

---
 source/module_hamilt_lcao/module_gint/grid_technique.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/module_hamilt_lcao/module_gint/grid_technique.h b/source/module_hamilt_lcao/module_gint/grid_technique.h
index 19f02878cf..b51cb30686 100644
--- a/source/module_hamilt_lcao/module_gint/grid_technique.h
+++ b/source/module_hamilt_lcao/module_gint/grid_technique.h
@@ -163,7 +163,7 @@ class Grid_Technique : public Grid_MeshBall
     int atom_pair_mesh;
     int atom_pair_nbz;
 
-    int nstreams ;
+    int nstreams=4 ;
     cudaStream_t* streams;
     // streams[nstreams]
     // TODO it needs to be implemented through configuration files

From 25d40d173a5fb21841edb47d0175bc2b8960f07d Mon Sep 17 00:00:00 2001
From: A-006 <3158793232@qq.com>
Date: Mon, 27 May 2024 17:10:23 +0800
Subject: [PATCH 23/23] move cpp function from cu file to cpp file

---
 .../module_gint/gint_force.h                  |   2 +
 .../module_gint/gint_force_gpu.cu             |  19 +-
 .../module_gint/gtask_force.cpp               | 365 ++++++++++++++++++
 .../module_gint/kernels/cuda/gint_force.cu    | 363 -----------------
 4 files changed, 378 insertions(+), 371 deletions(-)

diff --git a/source/module_hamilt_lcao/module_gint/gint_force.h b/source/module_hamilt_lcao/module_gint/gint_force.h
index 51851082de..c4118f4611 100644
--- a/source/module_hamilt_lcao/module_gint/gint_force.h
+++ b/source/module_hamilt_lcao/module_gint/gint_force.h
@@ -192,6 +192,7 @@ void allocateDm(double* matrix_host,
 void para_init(grid_para& para,
                        const int iter_num,
                        const int nbz,
+                       const int pipeline_index,
                        const Grid_Technique& gridt);
 /**
  * @brief frc_strs_iat on host and device Init
@@ -224,6 +225,7 @@ void cal_init(frc_strs_iat& f_s_iat,
 void para_mem_copy(grid_para& para,
                          const Grid_Technique& gridt,
                          const int nbz,
+                         const int pipeline_index,
                          const int atom_num_grid);
 /**
  * @brief Force Stress Force Iat memCpy,from Host to Device
diff --git a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
index 3718f48819..5824ba494b 100644
--- a/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
+++ b/source/module_hamilt_lcao/module_gint/gint_force_gpu.cu
@@ -77,6 +77,7 @@ void gint_fvl_gamma_gpu(hamilt::HContainer<double>* dm,
     const int cuda_block
         = std::min(64, (gridt.psir_size + cuda_threads - 1) / cuda_threads);
     int iter_num = 0;
+    int pipeline_index = 0;
     DensityMat denstiy_mat;
     frc_strs_iat_gbl f_s_iat_dev;
     grid_para para;
@@ -112,9 +113,10 @@ void gint_fvl_gamma_gpu(hamilt::HContainer<double>* dm,
             dim3 grid_dot(cuda_block);
             dim3 block_dot(cuda_threads);
             
-            para_init(para, iter_num, nbz, gridt);
+            pipeline_index = iter_num % gridt.nstreams;
+            para_init(para, iter_num, nbz, pipeline_index,gridt);
             cal_init(f_s_iat,
-                               para.stream_num,
+                               pipeline_index,
                                cuda_block,
                                atom_num_grid,
                                max_size,
@@ -141,19 +143,20 @@ void gint_fvl_gamma_gpu(hamilt::HContainer<double>* dm,
             para_mem_copy(para, 
                                  gridt, 
                                  nbz, 
+                                 pipeline_index,
                                  atom_num_grid);
             cal_mem_cpy(f_s_iat,
                                  gridt,
                                  atom_num_grid,
                                  cuda_block,
-                                 para.stream_num);
-            checkCuda(cudaStreamSynchronize(gridt.streams[para.stream_num]));
+                                 pipeline_index);
+            checkCuda(cudaStreamSynchronize(gridt.streams[pipeline_index]));
             /* cuda stream compute and Multiplication of multinomial matrices */
             
             get_psi_force<<<grid_psi,
                             block_psi,
                             0,
-                            gridt.streams[para.stream_num]>>>(
+                            gridt.streams[pipeline_index]>>>(
                 gridt.ylmcoef_g,
                 dr,
                 gridt.bxyz,
@@ -192,14 +195,14 @@ void gint_fvl_gamma_gpu(hamilt::HContainer<double>* dm,
                                      para.matrix_C_device,
                                      para.ldc_device,
                                      atom_pair_num,
-                                     gridt.streams[para.stream_num],
+                                     gridt.streams[pipeline_index],
                                      nullptr);
             /* force compute in GPU */
             if (isforce){
             dot_product_force<<<grid_dot_force,
                                 block_dot_force,
                                 0,
-                                gridt.streams[para.stream_num]>>>(
+                                gridt.streams[pipeline_index]>>>(
                 para.psir_lx_device,
                 para.psir_ly_device,
                 para.psir_lz_device,
@@ -215,7 +218,7 @@ void gint_fvl_gamma_gpu(hamilt::HContainer<double>* dm,
             dot_product_stress<<<grid_dot,
                                  block_dot,
                                  0,
-                                 gridt.streams[para.stream_num]>>>(
+                                 gridt.streams[pipeline_index]>>>(
                 para.psir_lxx_device,
                 para.psir_lxy_device,
                 para.psir_lxz_device,
diff --git a/source/module_hamilt_lcao/module_gint/gtask_force.cpp b/source/module_hamilt_lcao/module_gint/gtask_force.cpp
index e785b4192b..9aa7e9c386 100644
--- a/source/module_hamilt_lcao/module_gint/gtask_force.cpp
+++ b/source/module_hamilt_lcao/module_gint/gtask_force.cpp
@@ -259,5 +259,370 @@ void allocateDm(double* matrixHost,
     }
     return;
 }
+void calculateInit(DensityMat& denstiy_mat,
+                   frc_strs_iat_gbl& f_s_iat_dev,
+                   hamilt::HContainer<double>* dm,
+                   const Grid_Technique& gridt,
+                   const UnitCell& ucell,
+                   int lgd,
+                   int cuda_block,
+                   int atom_num_grid)
+{
+    denstiy_mat.density_mat_h = new double[lgd * lgd];
+    allocateDm(denstiy_mat.density_mat_h, dm, gridt, ucell);
+
+    checkCuda(cudaMalloc((void**)&denstiy_mat.density_mat_d,
+                         lgd * lgd * sizeof(double)));
+    checkCuda(cudaMemcpy(denstiy_mat.density_mat_d,
+                         denstiy_mat.density_mat_h,
+                         lgd * lgd * sizeof(double),
+                         cudaMemcpyHostToDevice));
+
+    checkCuda(cudaMalloc((void**)&f_s_iat_dev.stress_global,
+                         6 * cuda_block * gridt.nstreams * sizeof(double)));
+    checkCuda(cudaMemset(f_s_iat_dev.stress_global,
+                         0,
+                         6 * cuda_block * gridt.nstreams * sizeof(double)));
+
+    checkCuda(cudaMalloc((void**)&f_s_iat_dev.force_global,
+                         3 * atom_num_grid * gridt.nstreams * sizeof(double)));
+    checkCuda(cudaMemset(f_s_iat_dev.force_global,
+                         0,
+                         3 * atom_num_grid * gridt.nstreams * sizeof(double)));
+
+    checkCuda(cudaMalloc((void**)&f_s_iat_dev.iat_global,
+                         atom_num_grid * gridt.nstreams * sizeof(int)));
+    checkCuda(cudaMemset(f_s_iat_dev.iat_global,
+                         0,
+                         atom_num_grid * gridt.nstreams * sizeof(int)));
+}
+
+/**
+ * @brief grid parameter Init
+ *
+ * GridParameter init
+ *
+ * @param para double *,contained the destiyMatHost
+ * @param iter_num int , used for calcute the stream
+ * @param nbz int,stand for the number of Z-axis
+ * @param gridt Grid_Technique,stored the major method in the the gint.
+ */
+void para_init(grid_para& para,
+                       const int iter_num,
+                       const int nbz,
+                       const int pipeline_index,
+                       const Grid_Technique& gridt)
+{
+
+    // stream_num stand for nstreams
+    
+    //input_dou and input _int used for the Spherical Harmonics
+    para.input_dou
+        = &gridt.psi_dbl_gbl[gridt.psi_size_max * pipeline_index * 5];
+    para.input_int
+        = &gridt.psi_int_gbl[gridt.psi_size_max * pipeline_index * 2];
+    para.num_psir = &gridt.num_psir_gbl[nbz * pipeline_index];
+    //one dimension,record the length and the leading dimension of three matrix
+    para.atom_pair_A_m
+        = &gridt.l_info_global[gridt.atom_pair_nbz * pipeline_index];
+    para.atom_pair_B_n
+        = &gridt.r_info_global[gridt.atom_pair_nbz * pipeline_index];
+    para.atom_pair_K
+        = &gridt.k_info_global[gridt.atom_pair_nbz * pipeline_index];
+    para.atom_pair_lda
+        = &gridt.lda_info_global[gridt.atom_pair_nbz * pipeline_index];
+    para.atom_pair_ldb
+        = &gridt.ldb_info_global[gridt.atom_pair_nbz * pipeline_index];
+    para.atom_pair_ldc
+        = &gridt.ldc_info_global[gridt.atom_pair_nbz * pipeline_index];
+    //input_double_g and input_int_g used for the Spherical Harmonics on GPU
+    para.input_double_g
+        = &gridt.psi_dbl_gbl_g[gridt.psi_size_max * pipeline_index * 5];
+    para.input_int_g
+        = &gridt.psi_int_gbl_g[gridt.psi_size_max * pipeline_index * 2];
+    para.num_psir_g = &gridt.num_psir_gbl_g[nbz * pipeline_index];
+    para.psir_dm_device = &gridt.dm_global_g[gridt.psir_size * pipeline_index];
+    para.psir_r_device
+        = &gridt.right_global_g[gridt.psir_size * pipeline_index];
+    //psi function ,record the force in x y z,and the stress in six dimension
+    para.psir_lx_device = &gridt.d_left_x_g[gridt.psir_size * pipeline_index];
+    para.psir_ly_device = &gridt.d_left_y_g[gridt.psir_size * pipeline_index];
+    para.psir_lz_device = &gridt.d_left_z_g[gridt.psir_size * pipeline_index];
+    para.psir_lxx_device
+        = &gridt.dd_left_xx_g[gridt.psir_size * pipeline_index];
+    para.psir_lxy_device
+        = &gridt.dd_left_xy_g[gridt.psir_size * pipeline_index];
+    para.psir_lxz_device
+        = &gridt.dd_left_xz_g[gridt.psir_size * pipeline_index];
+    para.psir_lyy_device
+        = &gridt.dd_left_yy_g[gridt.psir_size * pipeline_index];
+    para.psir_lyz_device
+        = &gridt.dd_left_yz_g[gridt.psir_size * pipeline_index];
+    para.psir_lzz_device
+        = &gridt.dd_left_zz_g[gridt.psir_size * pipeline_index];
+    //one dimension,record the length and the leading dimension of three matrix on GPU
+    para.A_m_device
+        = &gridt.l_info_global_g[gridt.atom_pair_nbz * pipeline_index];
+    para.B_n_device
+        = &gridt.r_info_global_g[gridt.atom_pair_nbz * pipeline_index];
+    para.K_device
+        = &gridt.k_info_global_g[gridt.atom_pair_nbz * pipeline_index];
+    para.lda_device
+        = &gridt.lda_info_gbl_g[gridt.atom_pair_nbz * pipeline_index];
+    para.ldb_device
+        = &gridt.ldb_info_gbl_g[gridt.atom_pair_nbz * pipeline_index];
+    para.ldc_device
+        = &gridt.ldc_info_gbl_g[gridt.atom_pair_nbz * pipeline_index];
+    //two dimension,record number to compute
+    para.matrix_A = &gridt.ap_left_gbl[gridt.atom_pair_nbz * pipeline_index];
+    para.matrix_B = &gridt.ap_right_gbl[gridt.atom_pair_nbz * pipeline_index];
+    para.matrix_C = &gridt.ap_output_gbl[gridt.atom_pair_nbz * pipeline_index];
+    para.matrix_A_device
+        = &gridt.ap_left_gbl_g[gridt.atom_pair_nbz * pipeline_index];
+    para.matrix_B_device
+        = &gridt.ap_right_gbl_g[gridt.atom_pair_nbz * pipeline_index];
+    para.matrix_C_device
+        = &gridt.ap_output_gbl_g[gridt.atom_pair_nbz * pipeline_index];
+}
+/**
+ * @brief frc_strs_iat on host and device Init
+ *
+ * GridParameter init
+ *
+ * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Host
+ * @param stream_num int , record the stream in GPU
+ * @param cuda_block in stress compute,used for Block nums
+ * @param atom_num_grid in force calculate,used for Block nums
+ * @param max_size Maximum size of atoms on a grid.
+ * @param frc_strs_iat_gbl frc_strs_iat_gbl,contains the Force Stree Iat on Host
+ */
+void cal_init(frc_strs_iat& f_s_iat,
+                        const int stream_num,
+                        const int cuda_block,
+                        const int atom_num_grid,
+                        const int max_size,
+                        frc_strs_iat_gbl& f_s_iat_dev)
+{
+    const int iat_min = -max_size - 1;
+    f_s_iat.stress_host = new double[6 * cuda_block];
+    f_s_iat.stress_device
+        = &f_s_iat_dev.stress_global[6 * cuda_block * stream_num];
+    f_s_iat.force_device
+        = &f_s_iat_dev.force_global[3 * atom_num_grid * stream_num];
+    f_s_iat.iat_device
+        = &f_s_iat_dev.iat_global[atom_num_grid * stream_num];
+    f_s_iat.iat_host = new int[atom_num_grid];
+    for (int index = 0; index < atom_num_grid; index++)
+    {
+        f_s_iat.iat_host[index] = iat_min;
+    }
+    f_s_iat.force_host = new double[3 * atom_num_grid];
+    ModuleBase::GlobalFunc::ZEROS(f_s_iat.force_host,
+                                  3 * atom_num_grid);
+}
 
+/**
+ * @brief GridParameter memCpy,from Host to Device
+ *
+ * parameter init,which contains the gpu task and multi matrix multiplication
+ *
+ * @param para Grid parameter in task generator,
+ * @param gridt Grid_Technique,stored the major method in the the gint.
+ * @param nbz int,stand for the number of Z-axis
+ * @param atom_num_grid in force calculate,used for Block nums
+ */
+void para_mem_copy(grid_para& para,
+                         const Grid_Technique& gridt,
+                         const int nbz,
+                         const int pipeline_index,
+                         const int atom_num_grid)
+{
+    checkCuda(cudaMemcpyAsync(para.input_double_g,
+                              para.input_dou,
+                              gridt.psi_size_max * 5 * sizeof(double),
+                              cudaMemcpyHostToDevice,
+                              gridt.streams[pipeline_index]));
+    checkCuda(cudaMemcpyAsync(para.input_int_g,
+                              para.input_int,
+                              gridt.psi_size_max * 2 * sizeof(int),
+                              cudaMemcpyHostToDevice,
+                              gridt.streams[pipeline_index]));
+    checkCuda(cudaMemcpyAsync(para.num_psir_g,
+                              para.num_psir,
+                              nbz * sizeof(int),
+                              cudaMemcpyHostToDevice,
+                              gridt.streams[pipeline_index]));
+    checkCuda(cudaMemcpyAsync(para.A_m_device,
+                              para.atom_pair_A_m,
+                              gridt.atom_pair_nbz * sizeof(int),
+                              cudaMemcpyHostToDevice,
+                              gridt.streams[pipeline_index]));
+    checkCuda(cudaMemcpyAsync(para.B_n_device,
+                              para.atom_pair_B_n,
+                              gridt.atom_pair_nbz * sizeof(int),
+                              cudaMemcpyHostToDevice,
+                              gridt.streams[pipeline_index]));
+    checkCuda(cudaMemcpyAsync(para.K_device,
+                              para.atom_pair_K,
+                              gridt.atom_pair_nbz * sizeof(int),
+                              cudaMemcpyHostToDevice,
+                              gridt.streams[pipeline_index]));
+    checkCuda(cudaMemcpyAsync(para.lda_device,
+                              para.atom_pair_lda,
+                              gridt.atom_pair_nbz * sizeof(int),
+                              cudaMemcpyHostToDevice,
+                              gridt.streams[pipeline_index]));
+    checkCuda(cudaMemcpyAsync(para.ldb_device,
+                              para.atom_pair_ldb,
+                              gridt.atom_pair_nbz * sizeof(int),
+                              cudaMemcpyHostToDevice,
+                              gridt.streams[pipeline_index]));
+    checkCuda(cudaMemcpyAsync(para.ldc_device,
+                              para.atom_pair_ldc,
+                              gridt.atom_pair_nbz * sizeof(int),
+                              cudaMemcpyHostToDevice,
+                              gridt.streams[pipeline_index]));
+    checkCuda(cudaMemcpyAsync(para.matrix_A_device,
+                              para.matrix_A,
+                              gridt.atom_pair_nbz * sizeof(double*),
+                              cudaMemcpyHostToDevice,
+                              gridt.streams[pipeline_index]));
+    checkCuda(cudaMemcpyAsync(para.matrix_B_device,
+                              para.matrix_B,
+                              gridt.atom_pair_nbz * sizeof(double*),
+                              cudaMemcpyHostToDevice,
+                              gridt.streams[pipeline_index]));
+    checkCuda(cudaMemcpyAsync(para.matrix_C_device,
+                              para.matrix_C,
+                              gridt.atom_pair_nbz * sizeof(double*),
+                              cudaMemcpyHostToDevice,
+                              gridt.streams[pipeline_index]));
+    checkCuda(cudaMemsetAsync(para.psir_dm_device,
+                              0,
+                              gridt.psir_size * sizeof(double),
+                              gridt.streams[pipeline_index]));
+    checkCuda(cudaMemsetAsync(para.psir_r_device,
+                              0,
+                              gridt.psir_size * sizeof(double),
+                              gridt.streams[pipeline_index]));
+    checkCuda(cudaMemsetAsync(para.psir_lx_device,
+                              0,
+                              gridt.psir_size * sizeof(double),
+                              gridt.streams[pipeline_index]));
+    checkCuda(cudaMemsetAsync(para.psir_ly_device,
+                              0,
+                              gridt.psir_size * sizeof(double),
+                              gridt.streams[pipeline_index]));
+    checkCuda(cudaMemsetAsync(para.psir_lz_device,
+                              0,
+                              gridt.psir_size * sizeof(double),
+                              gridt.streams[pipeline_index]));
+    checkCuda(cudaMemsetAsync(para.psir_lxx_device,
+                              0,
+                              gridt.psir_size * sizeof(double),
+                              gridt.streams[pipeline_index]));
+    checkCuda(cudaMemsetAsync(para.psir_lxy_device,
+                              0,
+                              gridt.psir_size * sizeof(double),
+                              gridt.streams[pipeline_index]));
+    checkCuda(cudaMemsetAsync(para.psir_lxz_device,
+                              0,
+                              gridt.psir_size * sizeof(double),
+                              gridt.streams[pipeline_index]));
+    checkCuda(cudaMemsetAsync(para.psir_lyy_device,
+                              0,
+                              gridt.psir_size * sizeof(double),
+                              gridt.streams[pipeline_index]));
+    checkCuda(cudaMemsetAsync(para.psir_lyz_device,
+                              0,
+                              gridt.psir_size * sizeof(double),
+                              gridt.streams[pipeline_index]));
+    checkCuda(cudaMemsetAsync(para.psir_lzz_device,
+                              0,
+                              gridt.psir_size * sizeof(double),
+                              gridt.streams[pipeline_index]));
+}
+/**
+ * @brief Force Stress Force Iat memCpy,from Host to Device
+ *
+ *  @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device
+ * and Host
+ *  @param gridt Grid_Technique,stored the major method in the the gint.
+ *  @param atom_num_grid in force calculate,used for Block nums
+ *  @param cuda_block in stress compute,used for Block nums
+ *  @param stream_num int , record the stream in GPU
+ */
+void cal_mem_cpy(frc_strs_iat& f_s_iat,
+                          const Grid_Technique& gridt,
+                          const int atom_num_grid,
+                          const int cuda_block,
+                          const int stream_num)
+{
+    checkCuda(cudaMemcpyAsync(f_s_iat.iat_device,
+                              f_s_iat.iat_host,
+                              atom_num_grid * sizeof(int),
+                              cudaMemcpyHostToDevice,
+                              gridt.streams[stream_num]));
+    checkCuda(cudaMemsetAsync(f_s_iat.stress_device,
+                              0,
+                              6 * cuda_block * sizeof(double),
+                              gridt.streams[stream_num]));
+    checkCuda(cudaMemsetAsync(f_s_iat.force_device,
+                              0,
+                              3 * atom_num_grid * sizeof(double),
+                              gridt.streams[stream_num]));
+}
+/*
+ * @brief Force Calculate on Host
+ *
+ * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device
+ * and Host
+ * @param force stored the force for each atom on each directions
+ * @param atom_num_grid in force calculate,used for Block nums
+ */
+void cal_force_add(frc_strs_iat& f_s_iat,
+                     std::vector<double>& force,
+                    const int atom_num_grid)
+{
+    checkCuda(cudaMemcpy(f_s_iat.force_host,
+                         f_s_iat.force_device,
+                         3 * atom_num_grid * sizeof(double),
+                         cudaMemcpyDeviceToHost));
+    for (int index1 = 0; index1 < atom_num_grid; index1++)
+    {
+        int iat1 = f_s_iat.iat_host[index1];
+        if (iat1 >= 0)
+        {
+            for (int index2 = 0; index2 < 3; index2++)
+            {
+                force[iat1 * 3 + index2]
+                    += f_s_iat.force_host[index1 * 3 + index2];
+            }
+        }
+    }
+}
+/**
+ * @brief Stress Calculate on Host
+ *
+ * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device
+ * and Host
+ * @param stress stored the stress for each directions
+ * @param cuda_block in stress compute,used for Block nums
+ */
+void cal_stress_add(frc_strs_iat& f_s_iat,
+                     std::vector<double>& stress,
+                     const int cuda_block)
+{
+    checkCuda(cudaMemcpy(f_s_iat.stress_host,
+                         f_s_iat.stress_device,
+                         6 * cuda_block * sizeof(double),
+                         cudaMemcpyDeviceToHost));
+    for (int i = 0; i < 6; i++)
+    {
+        for (int index = 0; index < cuda_block; index++)
+        {
+            stress[i] += f_s_iat.stress_host[i * cuda_block + index];
+        }
+    }
+}
 } // namespace GintKernel
diff --git a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
index a86c16cb5f..a6c8b762b2 100644
--- a/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
+++ b/source/module_hamilt_lcao/module_gint/kernels/cuda/gint_force.cu
@@ -252,368 +252,5 @@ __global__ void dot_product_force(double* psir_lx,
         tid += blockDim.x * gridDim.x;
     }
 }
-void calculateInit(DensityMat& denstiy_mat,
-                   frc_strs_iat_gbl& f_s_iat_dev,
-                   hamilt::HContainer<double>* dm,
-                   const Grid_Technique& gridt,
-                   const UnitCell& ucell,
-                   int lgd,
-                   int cuda_block,
-                   int atom_num_grid)
-{
-    denstiy_mat.density_mat_h = new double[lgd * lgd];
-    allocateDm(denstiy_mat.density_mat_h, dm, gridt, ucell);
-
-    checkCuda(cudaMalloc((void**)&denstiy_mat.density_mat_d,
-                         lgd * lgd * sizeof(double)));
-    checkCuda(cudaMemcpy(denstiy_mat.density_mat_d,
-                         denstiy_mat.density_mat_h,
-                         lgd * lgd * sizeof(double),
-                         cudaMemcpyHostToDevice));
-
-    checkCuda(cudaMalloc((void**)&f_s_iat_dev.stress_global,
-                         6 * cuda_block * gridt.nstreams * sizeof(double)));
-    checkCuda(cudaMemset(f_s_iat_dev.stress_global,
-                         0,
-                         6 * cuda_block * gridt.nstreams * sizeof(double)));
-
-    checkCuda(cudaMalloc((void**)&f_s_iat_dev.force_global,
-                         3 * atom_num_grid * gridt.nstreams * sizeof(double)));
-    checkCuda(cudaMemset(f_s_iat_dev.force_global,
-                         0,
-                         3 * atom_num_grid * gridt.nstreams * sizeof(double)));
-
-    checkCuda(cudaMalloc((void**)&f_s_iat_dev.iat_global,
-                         atom_num_grid * gridt.nstreams * sizeof(int)));
-    checkCuda(cudaMemset(f_s_iat_dev.iat_global,
-                         0,
-                         atom_num_grid * gridt.nstreams * sizeof(int)));
-}
-
-/**
- * @brief grid parameter Init
- *
- * GridParameter init
- *
- * @param para double *,contained the destiyMatHost
- * @param iter_num int , used for calcute the stream
- * @param nbz int,stand for the number of Z-axis
- * @param gridt Grid_Technique,stored the major method in the the gint.
- */
-void para_init(grid_para& para,
-                       int iter_num,
-                       int nbz,
-                       const Grid_Technique& gridt)
-{
 
-    // stream_num stand for nstreams
-    para.stream_num = iter_num % gridt.nstreams;
-    //input_dou and input _int used for the Spherical Harmonics
-    para.input_dou
-        = &gridt.psi_dbl_gbl[gridt.psi_size_max * para.stream_num * 5];
-    para.input_int
-        = &gridt.psi_int_gbl[gridt.psi_size_max * para.stream_num * 2];
-    para.num_psir = &gridt.num_psir_gbl[nbz * para.stream_num];
-    //one dimension,record the length and the leading dimension of three matrix
-    para.atom_pair_A_m
-        = &gridt.l_info_global[gridt.atom_pair_nbz * para.stream_num];
-    para.atom_pair_B_n
-        = &gridt.r_info_global[gridt.atom_pair_nbz * para.stream_num];
-    para.atom_pair_K
-        = &gridt.k_info_global[gridt.atom_pair_nbz * para.stream_num];
-    para.atom_pair_lda
-        = &gridt.lda_info_global[gridt.atom_pair_nbz * para.stream_num];
-    para.atom_pair_ldb
-        = &gridt.ldb_info_global[gridt.atom_pair_nbz * para.stream_num];
-    para.atom_pair_ldc
-        = &gridt.ldc_info_global[gridt.atom_pair_nbz * para.stream_num];
-    //input_double_g and input_int_g used for the Spherical Harmonics on GPU
-    para.input_double_g
-        = &gridt.psi_dbl_gbl_g[gridt.psi_size_max * para.stream_num * 5];
-    para.input_int_g
-        = &gridt.psi_int_gbl_g[gridt.psi_size_max * para.stream_num * 2];
-    para.num_psir_g = &gridt.num_psir_gbl_g[nbz * para.stream_num];
-    para.psir_dm_device = &gridt.dm_global_g[gridt.psir_size * para.stream_num];
-    para.psir_r_device
-        = &gridt.right_global_g[gridt.psir_size * para.stream_num];
-    //psi function ,record the force in x y z,and the stress in six dimension
-    para.psir_lx_device = &gridt.d_left_x_g[gridt.psir_size * para.stream_num];
-    para.psir_ly_device = &gridt.d_left_y_g[gridt.psir_size * para.stream_num];
-    para.psir_lz_device = &gridt.d_left_z_g[gridt.psir_size * para.stream_num];
-    para.psir_lxx_device
-        = &gridt.dd_left_xx_g[gridt.psir_size * para.stream_num];
-    para.psir_lxy_device
-        = &gridt.dd_left_xy_g[gridt.psir_size * para.stream_num];
-    para.psir_lxz_device
-        = &gridt.dd_left_xz_g[gridt.psir_size * para.stream_num];
-    para.psir_lyy_device
-        = &gridt.dd_left_yy_g[gridt.psir_size * para.stream_num];
-    para.psir_lyz_device
-        = &gridt.dd_left_yz_g[gridt.psir_size * para.stream_num];
-    para.psir_lzz_device
-        = &gridt.dd_left_zz_g[gridt.psir_size * para.stream_num];
-    //one dimension,record the length and the leading dimension of three matrix on GPU
-    para.A_m_device
-        = &gridt.l_info_global_g[gridt.atom_pair_nbz * para.stream_num];
-    para.B_n_device
-        = &gridt.r_info_global_g[gridt.atom_pair_nbz * para.stream_num];
-    para.K_device
-        = &gridt.k_info_global_g[gridt.atom_pair_nbz * para.stream_num];
-    para.lda_device
-        = &gridt.lda_info_gbl_g[gridt.atom_pair_nbz * para.stream_num];
-    para.ldb_device
-        = &gridt.ldb_info_gbl_g[gridt.atom_pair_nbz * para.stream_num];
-    para.ldc_device
-        = &gridt.ldc_info_gbl_g[gridt.atom_pair_nbz * para.stream_num];
-    //two dimension,record number to compute
-    para.matrix_A = &gridt.ap_left_gbl[gridt.atom_pair_nbz * para.stream_num];
-    para.matrix_B = &gridt.ap_right_gbl[gridt.atom_pair_nbz * para.stream_num];
-    para.matrix_C = &gridt.ap_output_gbl[gridt.atom_pair_nbz * para.stream_num];
-    para.matrix_A_device
-        = &gridt.ap_left_gbl_g[gridt.atom_pair_nbz * para.stream_num];
-    para.matrix_B_device
-        = &gridt.ap_right_gbl_g[gridt.atom_pair_nbz * para.stream_num];
-    para.matrix_C_device
-        = &gridt.ap_output_gbl_g[gridt.atom_pair_nbz * para.stream_num];
-}
-/**
- * @brief frc_strs_iat on host and device Init
- *
- * GridParameter init
- *
- * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Host
- * @param stream_num int , record the stream in GPU
- * @param cuda_block in stress compute,used for Block nums
- * @param atom_num_grid in force calculate,used for Block nums
- * @param max_size Maximum size of atoms on a grid.
- * @param frc_strs_iat_gbl frc_strs_iat_gbl,contains the Force Stree Iat on Host
- */
-void cal_init(frc_strs_iat& f_s_iat,
-                        const int stream_num,
-                        const int cuda_block,
-                        const int atom_num_grid,
-                        const int max_size,
-                        frc_strs_iat_gbl& f_s_iat_dev)
-{
-    const int iat_min = -max_size - 1;
-    f_s_iat.stress_host = new double[6 * cuda_block];
-    f_s_iat.stress_device
-        = &f_s_iat_dev.stress_global[6 * cuda_block * stream_num];
-    f_s_iat.force_device
-        = &f_s_iat_dev.force_global[3 * atom_num_grid * stream_num];
-    f_s_iat.iat_device
-        = &f_s_iat_dev.iat_global[atom_num_grid * stream_num];
-    f_s_iat.iat_host = new int[atom_num_grid];
-    for (int index = 0; index < atom_num_grid; index++)
-    {
-        f_s_iat.iat_host[index] = iat_min;
-    }
-    f_s_iat.force_host = new double[3 * atom_num_grid];
-    ModuleBase::GlobalFunc::ZEROS(f_s_iat.force_host,
-                                  3 * atom_num_grid);
-}
-
-/**
- * @brief GridParameter memCpy,from Host to Device
- *
- * parameter init,which contains the gpu task and multi matrix multiplication
- *
- * @param para Grid parameter in task generator,
- * @param gridt Grid_Technique,stored the major method in the the gint.
- * @param nbz int,stand for the number of Z-axis
- * @param atom_num_grid in force calculate,used for Block nums
- */
-void para_mem_copy(grid_para& para,
-                         const Grid_Technique& gridt,
-                         const int nbz,
-                         const int atom_num_grid)
-{
-    checkCuda(cudaMemcpyAsync(para.input_double_g,
-                              para.input_dou,
-                              gridt.psi_size_max * 5 * sizeof(double),
-                              cudaMemcpyHostToDevice,
-                              gridt.streams[para.stream_num]));
-    checkCuda(cudaMemcpyAsync(para.input_int_g,
-                              para.input_int,
-                              gridt.psi_size_max * 2 * sizeof(int),
-                              cudaMemcpyHostToDevice,
-                              gridt.streams[para.stream_num]));
-    checkCuda(cudaMemcpyAsync(para.num_psir_g,
-                              para.num_psir,
-                              nbz * sizeof(int),
-                              cudaMemcpyHostToDevice,
-                              gridt.streams[para.stream_num]));
-    checkCuda(cudaMemcpyAsync(para.A_m_device,
-                              para.atom_pair_A_m,
-                              gridt.atom_pair_nbz * sizeof(int),
-                              cudaMemcpyHostToDevice,
-                              gridt.streams[para.stream_num]));
-    checkCuda(cudaMemcpyAsync(para.B_n_device,
-                              para.atom_pair_B_n,
-                              gridt.atom_pair_nbz * sizeof(int),
-                              cudaMemcpyHostToDevice,
-                              gridt.streams[para.stream_num]));
-    checkCuda(cudaMemcpyAsync(para.K_device,
-                              para.atom_pair_K,
-                              gridt.atom_pair_nbz * sizeof(int),
-                              cudaMemcpyHostToDevice,
-                              gridt.streams[para.stream_num]));
-    checkCuda(cudaMemcpyAsync(para.lda_device,
-                              para.atom_pair_lda,
-                              gridt.atom_pair_nbz * sizeof(int),
-                              cudaMemcpyHostToDevice,
-                              gridt.streams[para.stream_num]));
-    checkCuda(cudaMemcpyAsync(para.ldb_device,
-                              para.atom_pair_ldb,
-                              gridt.atom_pair_nbz * sizeof(int),
-                              cudaMemcpyHostToDevice,
-                              gridt.streams[para.stream_num]));
-    checkCuda(cudaMemcpyAsync(para.ldc_device,
-                              para.atom_pair_ldc,
-                              gridt.atom_pair_nbz * sizeof(int),
-                              cudaMemcpyHostToDevice,
-                              gridt.streams[para.stream_num]));
-    checkCuda(cudaMemcpyAsync(para.matrix_A_device,
-                              para.matrix_A,
-                              gridt.atom_pair_nbz * sizeof(double*),
-                              cudaMemcpyHostToDevice,
-                              gridt.streams[para.stream_num]));
-    checkCuda(cudaMemcpyAsync(para.matrix_B_device,
-                              para.matrix_B,
-                              gridt.atom_pair_nbz * sizeof(double*),
-                              cudaMemcpyHostToDevice,
-                              gridt.streams[para.stream_num]));
-    checkCuda(cudaMemcpyAsync(para.matrix_C_device,
-                              para.matrix_C,
-                              gridt.atom_pair_nbz * sizeof(double*),
-                              cudaMemcpyHostToDevice,
-                              gridt.streams[para.stream_num]));
-    checkCuda(cudaMemsetAsync(para.psir_dm_device,
-                              0,
-                              gridt.psir_size * sizeof(double),
-                              gridt.streams[para.stream_num]));
-    checkCuda(cudaMemsetAsync(para.psir_r_device,
-                              0,
-                              gridt.psir_size * sizeof(double),
-                              gridt.streams[para.stream_num]));
-    checkCuda(cudaMemsetAsync(para.psir_lx_device,
-                              0,
-                              gridt.psir_size * sizeof(double),
-                              gridt.streams[para.stream_num]));
-    checkCuda(cudaMemsetAsync(para.psir_ly_device,
-                              0,
-                              gridt.psir_size * sizeof(double),
-                              gridt.streams[para.stream_num]));
-    checkCuda(cudaMemsetAsync(para.psir_lz_device,
-                              0,
-                              gridt.psir_size * sizeof(double),
-                              gridt.streams[para.stream_num]));
-    checkCuda(cudaMemsetAsync(para.psir_lxx_device,
-                              0,
-                              gridt.psir_size * sizeof(double),
-                              gridt.streams[para.stream_num]));
-    checkCuda(cudaMemsetAsync(para.psir_lxy_device,
-                              0,
-                              gridt.psir_size * sizeof(double),
-                              gridt.streams[para.stream_num]));
-    checkCuda(cudaMemsetAsync(para.psir_lxz_device,
-                              0,
-                              gridt.psir_size * sizeof(double),
-                              gridt.streams[para.stream_num]));
-    checkCuda(cudaMemsetAsync(para.psir_lyy_device,
-                              0,
-                              gridt.psir_size * sizeof(double),
-                              gridt.streams[para.stream_num]));
-    checkCuda(cudaMemsetAsync(para.psir_lyz_device,
-                              0,
-                              gridt.psir_size * sizeof(double),
-                              gridt.streams[para.stream_num]));
-    checkCuda(cudaMemsetAsync(para.psir_lzz_device,
-                              0,
-                              gridt.psir_size * sizeof(double),
-                              gridt.streams[para.stream_num]));
-}
-/**
- * @brief Force Stress Force Iat memCpy,from Host to Device
- *
- *  @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device
- * and Host
- *  @param gridt Grid_Technique,stored the major method in the the gint.
- *  @param atom_num_grid in force calculate,used for Block nums
- *  @param cuda_block in stress compute,used for Block nums
- *  @param stream_num int , record the stream in GPU
- */
-void cal_mem_cpy(frc_strs_iat& f_s_iat,
-                          const Grid_Technique& gridt,
-                          const int atom_num_grid,
-                          const int cuda_block,
-                          const int stream_num)
-{
-    checkCuda(cudaMemcpyAsync(f_s_iat.iat_device,
-                              f_s_iat.iat_host,
-                              atom_num_grid * sizeof(int),
-                              cudaMemcpyHostToDevice,
-                              gridt.streams[stream_num]));
-    checkCuda(cudaMemsetAsync(f_s_iat.stress_device,
-                              0,
-                              6 * cuda_block * sizeof(double),
-                              gridt.streams[stream_num]));
-    checkCuda(cudaMemsetAsync(f_s_iat.force_device,
-                              0,
-                              3 * atom_num_grid * sizeof(double),
-                              gridt.streams[stream_num]));
-}
-/*
- * @brief Force Calculate on Host
- *
- * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device
- * and Host
- * @param force stored the force for each atom on each directions
- * @param atom_num_grid in force calculate,used for Block nums
- */
-void cal_force_add(frc_strs_iat& f_s_iat,
-                     std::vector<double>& force,
-                    const int atom_num_grid)
-{
-    checkCuda(cudaMemcpy(f_s_iat.force_host,
-                         f_s_iat.force_device,
-                         3 * atom_num_grid * sizeof(double),
-                         cudaMemcpyDeviceToHost));
-    for (int index1 = 0; index1 < atom_num_grid; index1++)
-    {
-        int iat1 = f_s_iat.iat_host[index1];
-        if (iat1 >= 0)
-        {
-            for (int index2 = 0; index2 < 3; index2++)
-            {
-                force[iat1 * 3 + index2]
-                    += f_s_iat.force_host[index1 * 3 + index2];
-            }
-        }
-    }
-}
-/**
- * @brief Stress Calculate on Host
- *
- * @param frc_strs_iat frc_strs_iat,contains the Force Stree Iat on Device
- * and Host
- * @param stress stored the stress for each directions
- * @param cuda_block in stress compute,used for Block nums
- */
-void cal_stress_add(frc_strs_iat& f_s_iat,
-                     std::vector<double>& stress,
-                     const int cuda_block)
-{
-    checkCuda(cudaMemcpy(f_s_iat.stress_host,
-                         f_s_iat.stress_device,
-                         6 * cuda_block * sizeof(double),
-                         cudaMemcpyDeviceToHost));
-    for (int i = 0; i < 6; i++)
-    {
-        for (int index = 0; index < cuda_block; index++)
-        {
-            stress[i] += f_s_iat.stress_host[i * cuda_block + index];
-        }
-    }
-}
 } // namespace GintKernel