From 88e234513e931bee2d9cb800181fa65991b7d683 Mon Sep 17 00:00:00 2001
From: dyzheng <zhengdy@bjaisi.com>
Date: Fri, 28 Jun 2024 17:36:07 +0800
Subject: [PATCH 01/11] Refactor: remove Hloc, Hloc2, Sloc, Sloc2 in
 LCAO_Matrix

---
 source/Makefile.Objects                       |   1 -
 source/module_esolver/esolver_ks_lcao.cpp     |   8 +-
 .../module_esolver/esolver_ks_lcao_elec.cpp   |   5 +-
 .../esolver_ks_lcao_tmpfunc.cpp               |   3 +-
 source/module_hamilt_general/matrixblock.h    |   2 +-
 .../hamilt_lcaodft/CMakeLists.txt             |   3 +-
 .../hamilt_lcaodft/FORCE_STRESS.cpp           |   8 +-
 .../hamilt_lcaodft/FORCE_gamma.cpp            |  17 +-
 .../hamilt_lcaodft/FORCE_k.cpp                |  12 +-
 .../hamilt_lcaodft/LCAO_domain.h              |   8 -
 .../hamilt_lcaodft/LCAO_matrix.cpp            | 199 ---------------
 .../hamilt_lcaodft/LCAO_matrix.h              |  30 ---
 .../hamilt_lcaodft/LCAO_nl_beta.cpp           | 240 ------------------
 .../hamilt_lcaodft/LCAO_nnr.cpp               | 218 ----------------
 .../hamilt_lcaodft/hamilt_lcao.cpp            | 194 ++++----------
 .../hamilt_lcaodft/hamilt_lcao.h              |  24 +-
 .../hamilt_lcaodft/hs_matrix_k.hpp            |  32 +++
 .../operator_lcao/deepks_lcao.cpp             |  25 +-
 .../operator_lcao/deepks_lcao.h               |   7 +-
 .../operator_lcao/dftu_force_stress.hpp       |   2 +-
 .../operator_lcao/dftu_lcao.cpp               |  15 +-
 .../hamilt_lcaodft/operator_lcao/dftu_lcao.h  |  10 +-
 .../operator_lcao/ekinetic_new.cpp            |  17 +-
 .../operator_lcao/ekinetic_new.h              |   8 +-
 .../operator_lcao/meta_lcao.cpp               |  55 ----
 .../hamilt_lcaodft/operator_lcao/meta_lcao.h  |  38 +--
 .../operator_lcao/nonlocal_new.cpp            |  16 +-
 .../operator_lcao/nonlocal_new.h              |  12 +-
 .../operator_lcao/op_dftu_lcao.cpp            |  27 +-
 .../operator_lcao/op_dftu_lcao.h              |   5 +-
 .../operator_lcao/op_exx_lcao.cpp             |   6 +-
 .../operator_lcao/op_exx_lcao.h               |   7 +-
 .../operator_lcao/op_exx_lcao.hpp             |  11 +-
 .../operator_lcao/operator_lcao.cpp           |  42 +--
 .../operator_lcao/operator_lcao.h             |  32 +--
 .../operator_lcao/overlap_new.cpp             |  27 +-
 .../operator_lcao/overlap_new.h               |  11 +-
 .../operator_lcao/sc_lambda_lcao.cpp          |  18 +-
 .../operator_lcao/sc_lambda_lcao.h            |   5 +-
 .../operator_lcao/td_ekinetic_lcao.cpp        |  25 +-
 .../operator_lcao/td_ekinetic_lcao.h          |   7 +-
 .../operator_lcao/td_nonlocal_lcao.cpp        |  25 +-
 .../operator_lcao/td_nonlocal_lcao.h          |  10 +-
 .../operator_lcao/test/test_T_NL_cd.cpp       |  18 +-
 .../operator_lcao/test/test_dftu.cpp          |  14 +-
 .../operator_lcao/test/test_ekineticnew.cpp   |  22 +-
 .../operator_lcao/test/test_nonlocalnew.cpp   |  20 +-
 .../operator_lcao/test/test_overlapnew.cpp    |  26 +-
 .../operator_lcao/test/test_overlapnew_cd.cpp |  24 +-
 .../test/test_sc_lambda_lcao.cpp              |  62 ++---
 .../operator_lcao/test/tmp_mocks.cpp          |   2 +-
 .../operator_lcao/veff_lcao.cpp               |   5 +-
 .../hamilt_lcaodft/operator_lcao/veff_lcao.h  |  22 +-
 .../hamilt_lcaodft/spar_dh.cpp                |   2 +-
 .../module_deltaspin/cal_h_lambda.cpp         |   2 +-
 .../module_deltaspin/cal_mw.cpp               |   9 +-
 .../module_deltaspin/spin_constrain.h         |   2 +-
 .../module_deltaspin/template_helpers.cpp     |   2 +-
 .../test/cal_h_lambda_test.cpp                |  12 +-
 .../test/template_helpers_test.cpp            |   2 +-
 source/module_hamilt_lcao/module_dftu/dftu.h  |   6 +-
 .../module_dftu/dftu_folding.cpp              |   6 +-
 .../module_dftu/dftu_hamilt.cpp               |   8 +-
 .../module_dftu/dftu_occup.cpp                |  15 +-
 .../module_hcontainer/hcontainer.h            |   6 +
 source/module_io/output_sk.cpp                |  10 +-
 source/module_io/write_Vxc.hpp                |  26 +-
 source/module_io/write_dos_lcao.cpp           |  17 +-
 source/module_io/write_proj_band_lcao.cpp     |  16 +-
 source/module_ri/RI_2D_Comm.h                 |   2 +-
 source/module_ri/RI_2D_Comm.hpp               |   4 +-
 71 files changed, 495 insertions(+), 1334 deletions(-)
 delete mode 100644 source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nl_beta.cpp
 create mode 100644 source/module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp

diff --git a/source/Makefile.Objects b/source/Makefile.Objects
index 635b842c46..21336c06e4 100644
--- a/source/Makefile.Objects
+++ b/source/Makefile.Objects
@@ -528,7 +528,6 @@ OBJS_LCAO=DM_gamma.o\
       LCAO_set_fs.o\
       LCAO_set_st.o\
       LCAO_nl_mu.o\
-      LCAO_nl_beta.o\
       LCAO_nnr.o\
       LCAO_set_zero.o\
       center2_orb-orb11.o\
diff --git a/source/module_esolver/esolver_ks_lcao.cpp b/source/module_esolver/esolver_ks_lcao.cpp
index 0f522ab7a8..c00713bd21 100644
--- a/source/module_esolver/esolver_ks_lcao.cpp
+++ b/source/module_esolver/esolver_ks_lcao.cpp
@@ -970,16 +970,16 @@ void ESolver_KS_LCAO<TK, TR>::iter_finish(int iter)
     if (GlobalC::restart.info_save.save_H && two_level_step > 0
         && (!GlobalC::exx_info.info_global.separate_loop || iter == 1)) // to avoid saving the same value repeatedly
     {
-        std::vector<TK> Hexxk_save(this->orb_con.ParaV.get_local_size());
+        hamilt::HS_Matrix_K<TK> Hexxk_save(&this->orb_con.ParaV, 1);
         for (int ik = 0; ik < this->kv.get_nks(); ++ik)
         {
-            ModuleBase::GlobalFunc::ZEROS(Hexxk_save.data(), Hexxk_save.size());
+            Hexxk_save.set_zero_hk();
 
-            hamilt::OperatorEXX<hamilt::OperatorLCAO<TK, TR>> opexx_save(&this->LM, nullptr, &Hexxk_save, this->kv);
+            hamilt::OperatorEXX<hamilt::OperatorLCAO<TK, TR>> opexx_save(&Hexxk_save, &this->LM, nullptr, this->kv);
 
             opexx_save.contributeHk(ik);
 
-            GlobalC::restart.save_disk("Hexx", ik, this->orb_con.ParaV.get_local_size(), Hexxk_save.data());
+            GlobalC::restart.save_disk("Hexx", ik, this->orb_con.ParaV.get_local_size(), Hexxk_save.get_hk());
         }
         if (GlobalV::MY_RANK == 0)
         {
diff --git a/source/module_esolver/esolver_ks_lcao_elec.cpp b/source/module_esolver/esolver_ks_lcao_elec.cpp
index 41426c335c..0ec24d696e 100644
--- a/source/module_esolver/esolver_ks_lcao_elec.cpp
+++ b/source/module_esolver/esolver_ks_lcao_elec.cpp
@@ -165,6 +165,7 @@ void ESolver_KS_LCAO<TK, TR>::beforesolver(const int istep)
             GlobalV::GAMMA_ONLY_LOCAL ? &(this->GG) : nullptr,
             GlobalV::GAMMA_ONLY_LOCAL ? nullptr : &(this->GK),
             &(this->LM),
+            this->LM.ParaV,
             &(this->LOC),
             this->pelec->pot,
             this->kv,
@@ -533,7 +534,7 @@ void ESolver_KS_LCAO<std::complex<double>, double>::get_S(void)
 
     if (this->p_hamilt == nullptr)
     {
-        this->p_hamilt = new hamilt::HamiltLCAO<std::complex<double>, double>(&this->LM,
+        this->p_hamilt = new hamilt::HamiltLCAO<std::complex<double>, double>(this->LM.ParaV,
                                                                               this->kv,
                                                                               *(two_center_bundle_.overlap_orb));
         dynamic_cast<hamilt::OperatorLCAO<std::complex<double>, double>*>(this->p_hamilt->ops)->contributeHR();
@@ -572,7 +573,7 @@ void ESolver_KS_LCAO<std::complex<double>, std::complex<double>>::get_S(void)
     if (this->p_hamilt == nullptr)
     {
         this->p_hamilt
-            = new hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>(&this->LM,
+            = new hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>(this->LM.ParaV,
                                                                                  this->kv,
                                                                                  *(two_center_bundle_.overlap_orb));
         dynamic_cast<hamilt::OperatorLCAO<std::complex<double>, std::complex<double>>*>(this->p_hamilt->ops)
diff --git a/source/module_esolver/esolver_ks_lcao_tmpfunc.cpp b/source/module_esolver/esolver_ks_lcao_tmpfunc.cpp
index 2a16b09b17..7a5f9fdbcd 100644
--- a/source/module_esolver/esolver_ks_lcao_tmpfunc.cpp
+++ b/source/module_esolver/esolver_ks_lcao_tmpfunc.cpp
@@ -19,7 +19,8 @@ namespace ModuleESolver
 		GlobalC::dftu.cal_occup_m_gamma(
 				iter, 
 				dm, 
-				this->p_chgmix->get_mixing_beta());
+				this->p_chgmix->get_mixing_beta(),
+                this->p_hamilt);
     }
 
     //! dftu occupation matrix for multiple k-points using dm(complex)
diff --git a/source/module_hamilt_general/matrixblock.h b/source/module_hamilt_general/matrixblock.h
index e75cf4cac5..481a95b8a5 100644
--- a/source/module_hamilt_general/matrixblock.h
+++ b/source/module_hamilt_general/matrixblock.h
@@ -12,7 +12,7 @@ template <typename T> struct MatrixBlock
     T* p;
     size_t row;
     size_t col;
-    int* desc;
+    const int* desc;
 };
 
 } // namespace hamilt
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/CMakeLists.txt b/source/module_hamilt_lcao/hamilt_lcaodft/CMakeLists.txt
index ea16b4cb64..27229c81f6 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/CMakeLists.txt
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/CMakeLists.txt
@@ -36,8 +36,7 @@ if(ENABLE_LCAO)
         LCAO_nnr.cpp
 		LCAO_set_fs.cpp
 		LCAO_set_st.cpp
-		LCAO_nl_mu.cpp
-		LCAO_nl_beta.cpp
+		LCAO_nl_mu.cpp	
         LCAO_set_zero.cpp
 		record_adj.cpp
         center2_orb-orb11.cpp
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp
index ada447c66d..fc5133470c 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp
@@ -249,15 +249,13 @@ void Force_Stress_LCAO<T>::getForceStress(const bool isforce,
         }
         else
         {
-            hamilt::DFTU<hamilt::OperatorLCAO<T, double>> tmp_dftu(&lm,
+            hamilt::DFTU<hamilt::OperatorLCAO<T, double>> tmp_dftu(nullptr, // HK and SK are not used for force&stress
                                                                    kv.kvec_d,
-                                                                   nullptr,
-                                                                   nullptr,
+                                                                   nullptr, // HR are not used for force&stress
                                                                    GlobalC::ucell,
                                                                    &GlobalC::GridD,
                                                                    two_center_bundle.overlap_orb_onsite.get(),
-                                                                   &GlobalC::dftu,
-                                                                   *(lm.ParaV));
+                                                                   &GlobalC::dftu);
 
             tmp_dftu.cal_force_stress(isforce, isstress, force_dftu, stress_dftu);
         }
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp
index 512d90658c..e5e0681e70 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_gamma.cpp
@@ -77,7 +77,7 @@ void Force_LCAO<double>::allocate(const Parallel_Orbitals& pv,
                               pv,
                               two_center_bundle,
                               &GlobalC::GridD,
-                              lm.Sloc.data());
+                              nullptr);
 
     // calculate dT in LCAP
     // allocation dt
@@ -101,11 +101,11 @@ void Force_LCAO<double>::allocate(const Parallel_Orbitals& pv,
                               pv,
                               two_center_bundle,
                               &GlobalC::GridD,
-                              lm.Hloc_fixed.data());
+                              nullptr);
 
     LCAO_domain::build_Nonlocal_mu_new(lm,
                                        fsr,
-                                       lm.Hloc_fixed.data(),
+                                       nullptr,
                                        cal_deri,
                                        GlobalC::ucell,
                                        GlobalC::ORB,
@@ -116,8 +116,9 @@ void Force_LCAO<double>::allocate(const Parallel_Orbitals& pv,
     if (INPUT.cal_syns)
     {
         cal_deri = false;
-
-        lm.zeros_HSgamma('S');
+        ModuleBase::WARNING_QUIT("cal_syns", "this function has been broken and will be fixed later.");
+        /*
+        std::vector<double> Sloc(pv.nloc, 0.0);
 
         LCAO_domain::build_ST_new(lm,
                                   fsr,
@@ -128,7 +129,7 @@ void Force_LCAO<double>::allocate(const Parallel_Orbitals& pv,
                                   pv,
                                   two_center_bundle,
                                   &GlobalC::GridD,
-                                  lm.Sloc.data(),
+                                  Sloc.data(),
                                   INPUT.cal_syns,
                                   INPUT.dmax);
 
@@ -147,7 +148,7 @@ void Force_LCAO<double>::allocate(const Parallel_Orbitals& pv,
                            GlobalV::DRANK);
 
         ModuleIO::save_mat(0,
-                           lm.Sloc.data(),
+                           Sloc.data(),
                            GlobalV::NLOCAL,
                            bit,
                            GlobalV::out_ndigits,
@@ -156,7 +157,7 @@ void Force_LCAO<double>::allocate(const Parallel_Orbitals& pv,
                            "S",
                            "data-" + std::to_string(0),
                            pv,
-                           GlobalV::DRANK);
+                           GlobalV::DRANK);*/
     }
 
     ModuleBase::timer::tick("Force_LCAO", "allocate");
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp
index e0044d709e..56953592fd 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp
@@ -131,7 +131,7 @@ void Force_LCAO<std::complex<double>>::allocate(const Parallel_Orbitals& pv,
     // calculate dVnl=<phi|dVnl|dphi> in LCAO
     LCAO_domain::build_Nonlocal_mu_new(lm,
                                        fsr,
-                                       lm.Hloc_fixed.data(),
+                                       nullptr,
                                        cal_deri,
                                        GlobalC::ucell,
                                        GlobalC::ORB,
@@ -143,6 +143,9 @@ void Force_LCAO<std::complex<double>>::allocate(const Parallel_Orbitals& pv,
     {
         cal_deri = false;
 
+        ModuleBase::WARNING_QUIT("cal_syns",
+                                 "This function has been broken and will be fixed later.");
+
         LCAO_domain::build_ST_new(lm,
                                   fsr,
                                   'S',
@@ -158,10 +161,9 @@ void Force_LCAO<std::complex<double>>::allocate(const Parallel_Orbitals& pv,
 
         for (int ik = 0; ik < nks; ik++)
         {
-            lm.zeros_HSk('S');
-            lm.folding_fixedH(ik, kvec_d, 1);
+            
             bool bit = false; // LiuXh, 2017-03-21
-            ModuleIO::save_mat(0,
+            /*ModuleIO::save_mat(0,
                                lm.Hloc2.data(),
                                GlobalV::NLOCAL,
                                bit,
@@ -183,7 +185,7 @@ void Force_LCAO<std::complex<double>>::allocate(const Parallel_Orbitals& pv,
                                "S",
                                "data-" + std::to_string(ik),
                                pv,
-                               GlobalV::DRANK);
+                               GlobalV::DRANK);*/
         }
     }
 
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h
index bf081ef344..7469eca53e 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h
@@ -16,14 +16,6 @@
 namespace LCAO_domain
 {
 
-// can used in gamma algorithm.
-void build_Nonlocal_beta_new(LCAO_Matrix& lm,
-                             double* Hloc,
-                             const UnitCell& ucell,
-                             const LCAO_Orbitals& orb,
-                             const TwoCenterIntegrator& intor_orb_beta,
-                             Grid_Driver* GridD);
-
 void build_Nonlocal_mu_new(LCAO_Matrix& lm,
                            ForceStressArrays& fsr, // mohan 2024-06-16
                            double* HlocR,
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.cpp
index ad069a522f..c5c3d1c42b 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.cpp
@@ -22,15 +22,6 @@ void LCAO_Matrix::divide_HS_in_frag(const bool isGamma, Parallel_Orbitals& pv, c
     //(1), (2): set up matrix division have been moved into ORB_control
     // just pass `ParaV` as pointer is enough
     this->ParaV = &pv;
-    // (3) allocate for S, H_fixed, H, and S_diag
-    if (isGamma)
-    {
-        allocate_HS_gamma(this->ParaV->nloc);
-    }
-    else
-    {
-        allocate_HS_k(this->ParaV->nloc);
-    }
 #ifdef __DEEPKS
     // wenfei 2021-12-19
     // preparation for DeePKS
@@ -63,202 +54,12 @@ void LCAO_Matrix::divide_HS_in_frag(const bool isGamma, Parallel_Orbitals& pv, c
     return;
 }
 
-void LCAO_Matrix::allocate_HS_gamma(const long& nloc)
-{
-    ModuleBase::TITLE("LCAO_Matrix", "allocate_HS_gamma");
-
-    ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "nloc", nloc);
-
-    if (nloc == 0)
-    {
-        return;
-    }
-
-    // because we initilize in the constructor function
-    // with dimension '1', so here we reconstruct these
-    // matrices
-
-    this->Sloc.resize(nloc);
-    this->Hloc_fixed.resize(nloc);
-    this->Hloc.resize(nloc);
-
-    ModuleBase::GlobalFunc::ZEROS(Sloc.data(), nloc);
-    ModuleBase::GlobalFunc::ZEROS(Hloc_fixed.data(), nloc);
-    ModuleBase::GlobalFunc::ZEROS(Hloc.data(), nloc);
-
-    return;
-}
-
-void LCAO_Matrix::allocate_HS_k(const long& nloc)
-{
-    ModuleBase::TITLE("LCAO_Matrix", "allocate_HS_k");
-
-    ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "nloc", nloc);
-
-    if (nloc == 0)
-    {
-        return; // mohan fix bug 2012-05-25
-    }
-
-    // because we initilize in the constructor function
-    // with dimension '1', so here we reconstruct these
-    // matrices
-    this->Sloc2.resize(nloc);
-    this->Hloc_fixed2.resize(nloc);
-    this->Hloc2.resize(nloc);
-
-    ModuleBase::GlobalFunc::ZEROS(Sloc2.data(), nloc);
-    ModuleBase::GlobalFunc::ZEROS(Hloc_fixed2.data(), nloc);
-    ModuleBase::GlobalFunc::ZEROS(Hloc2.data(), nloc);
-
-    return;
-}
-
 void LCAO_Matrix::set_HSgamma(const int& iw1_all, const int& iw2_all, const double& v, double* HSloc)
 {
     LCAO_Matrix::set_mat2d<double>(iw1_all, iw2_all, v, *this->ParaV, HSloc);
     return;
 }
 
-void LCAO_Matrix::zeros_HSgamma(const char& mtype)
-{
-    auto zeros_HSgamma_ker = [&](int num_threads, int thread_id) {
-        long long beg, len;
-        if (mtype == 'S')
-        {
-            ModuleBase::BLOCK_TASK_DIST_1D(num_threads,
-                                           thread_id,
-                                           (long long)this->Sloc.size(),
-                                           (long long)512,
-                                           beg,
-                                           len);
-
-            ModuleBase::GlobalFunc::ZEROS(this->Sloc.data() + beg, len);
-        }
-        else if (mtype == 'T')
-        {
-            ModuleBase::BLOCK_TASK_DIST_1D(num_threads,
-                                           thread_id,
-                                           (long long)this->Hloc_fixed.size(),
-                                           (long long)512,
-                                           beg,
-                                           len);
-
-            ModuleBase::GlobalFunc::ZEROS(this->Hloc_fixed.data() + beg, len);
-        }
-        else if (mtype == 'H')
-        {
-            ModuleBase::BLOCK_TASK_DIST_1D(num_threads,
-                                           thread_id,
-                                           (long long)this->Hloc.size(),
-                                           (long long)512,
-                                           beg,
-                                           len);
-
-            ModuleBase::GlobalFunc::ZEROS(this->Hloc.data() + beg, len);
-        }
-    };
-    ModuleBase::OMP_PARALLEL(zeros_HSgamma_ker);
-    return;
-}
-
-void LCAO_Matrix::zeros_HSk(const char& mtype)
-{
-    auto zeros_HSk_ker = [&](int num_threads, int thread_id) {
-        long long beg, len;
-        if (mtype == 'S')
-        {
-            ModuleBase::BLOCK_TASK_DIST_1D(num_threads,
-                                           thread_id,
-                                           (long long)this->Sloc2.size(),
-                                           (long long)256,
-                                           beg,
-                                           len);
-            ModuleBase::GlobalFunc::ZEROS(this->Sloc2.data() + beg, len);
-        }
-        else if (mtype == 'T')
-        {
-            ModuleBase::BLOCK_TASK_DIST_1D(num_threads,
-                                           thread_id,
-                                           (long long)this->Hloc_fixed2.size(),
-                                           (long long)256,
-                                           beg,
-                                           len);
-            ModuleBase::GlobalFunc::ZEROS(this->Hloc_fixed2.data() + beg, len);
-        }
-        else if (mtype == 'H')
-        {
-            ModuleBase::BLOCK_TASK_DIST_1D(num_threads,
-                                           thread_id,
-                                           (long long)this->Hloc2.size(),
-                                           (long long)256,
-                                           beg,
-                                           len);
-            ModuleBase::GlobalFunc::ZEROS(this->Hloc2.data() + beg, len);
-        }
-    };
-    ModuleBase::OMP_PARALLEL(zeros_HSk_ker);
-    return;
-}
-
-// becareful! Update Hloc, we add new members to it.
-void LCAO_Matrix::update_Hloc()
-{
-    ModuleBase::TITLE("LCAO_Matrix", "update_Hloc");
-#ifdef _OPENMP
-#pragma omp parallel for schedule(static, 1024)
-#endif
-    for (long i = 0; i < this->ParaV->nloc; i++)
-    {
-        Hloc[i] += Hloc_fixed[i];
-    }
-    return;
-}
-
-void LCAO_Matrix::update_Hloc2(const int& ik)
-{
-    ModuleBase::TITLE("LCAO_Matrix", "update_Hloc2");
-#ifdef _OPENMP
-#pragma omp parallel for schedule(static, 1024)
-#endif
-    for (long i = 0; i < this->ParaV->nloc; i++)
-    {
-        Hloc2[i] += Hloc_fixed2[i];
-#ifdef __DEEPKS
-        if (GlobalV::deepks_scf)
-        {
-            Hloc2[i] += GlobalC::ld.H_V_delta_k[ik][i];
-        }
-#endif
-    }
-
-    return;
-}
-
-void LCAO_Matrix::output_HSk(const char& mtype, std::string& fn)
-{
-    ModuleBase::TITLE("LCAO_Matrix", "output_HSk");
-    std::stringstream ss;
-    ss << GlobalV::global_out_dir << fn;
-    std::ofstream ofs(ss.str().c_str());
-    ofs << GlobalV::NLOCAL << std::endl;
-    for (int i = 0; i < GlobalV::NLOCAL; i++)
-    {
-        for (int j = 0; j < GlobalV::NLOCAL; j++)
-        {
-            const int index = i * GlobalV::NLOCAL + j;
-            if (mtype == 'S')
-                ofs << Sloc2[index].real() << " " << Sloc2[index].imag() << std::endl;
-            else if (mtype == 'T')
-                ofs << Hloc_fixed2[index].real() << " " << Hloc_fixed2[index].imag() << std::endl;
-            else if (mtype == 'H')
-                ofs << Hloc2[index].real() << " " << Hloc2[index].imag() << std::endl;
-        }
-    }
-    ofs.close();
-    return;
-}
-
 void LCAO_Matrix::set_HR_tr(const int& Rx,
                             const int& Ry,
                             const int& Rz,
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h
index 698b80a23a..1ac8af26a2 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h
@@ -22,10 +22,6 @@ class LCAO_Matrix
 
     void divide_HS_in_frag(const bool isGamma, Parallel_Orbitals& pv, const int& nks);
 
-    // folding the fixed Hamiltonian (T+Vnl) if
-    // k-point algorithm is used.
-    void folding_fixedH(const int& ik, const std::vector<ModuleBase::Vector3<double>>& kvec_d, bool cal_syns = false);
-
     Parallel_Orbitals* ParaV;
 
 #ifdef __EXX
@@ -37,33 +33,7 @@ class LCAO_Matrix
     std::vector<std::vector<std::complex<double>>> Hexxc_k_load;
 #endif
 
-    void allocate_HS_k(const long& nloc);
-
-  private:
-    void allocate_HS_gamma(const long& nloc);
-
   public:
-    //------------------------------
-    // H, S, Hfixed
-    // used in gamma only algorithm.
-    // thse matrix are used to
-    // diagonalize.
-    //------------------------------
-    std::vector<double> Hloc;
-    std::vector<double> Sloc;
-    std::vector<double> Hloc_fixed;
-
-    //------------------------------
-    // 1. Hamiltonian(vl),
-    // 2. overlap matrix Sloc2
-    // 3. fixed (vna+T+Vnl) matrix.
-    // used in kpoint algorithm.
-    // these matrix are used to
-    // diagonalize.
-    //------------------------------
-    std::vector<std::complex<double>> Hloc2;
-    std::vector<std::complex<double>> Sloc2;
-    std::vector<std::complex<double>> Hloc_fixed2;
 
     // LiuXh add 2019-07-15
     double**** Hloc_fixedR_tr;
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nl_beta.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nl_beta.cpp
deleted file mode 100644
index e4f526a502..0000000000
--- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nl_beta.cpp
+++ /dev/null
@@ -1,240 +0,0 @@
-#include "module_base/timer.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_domain.h"
-
-#ifdef __MKL
-#include <mkl_service.h> // mkl_get_max_threads
-#endif
-
-namespace LCAO_domain
-{
-
-void build_Nonlocal_beta_new(LCAO_Matrix& lm,
-                             double* HSloc,
-                             const UnitCell& ucell,
-                             const LCAO_Orbitals& orb,
-                             const TwoCenterIntegrator& intor_orb_beta,
-                             Grid_Driver* GridD) // update by liuyu 2021-04-07
-{
-    ModuleBase::TITLE("LCAO_domain", "b_NL_beta_new");
-    ModuleBase::timer::tick("LCAO_domain", "b_NL_beta_new");
-
-    const Parallel_Orbitals* pv = lm.ParaV;
-    const int npol = GlobalV::NPOL;
-
-#ifdef __MKL
-    const int mkl_threads = mkl_get_max_threads();
-    mkl_set_num_threads(1);
-#endif
-
-    const std::vector<AdjacentAtomInfo> adjs_all = GridD->get_adjs(ucell);
-
-#ifdef _OPENMP
-#pragma omp parallel
-    {
-        double* Nonlocal_thread;
-        Nonlocal_thread = new double[pv->nloc];
-        ModuleBase::GlobalFunc::ZEROS(Nonlocal_thread, pv->nloc);
-#pragma omp for schedule(dynamic)
-#endif
-        for (int iat = 0; iat < ucell.nat; iat++)
-        {
-            const int T0 = ucell.iat2it[iat];
-            const int I0 = ucell.iat2ia[iat];
-            Atom* atom0 = &ucell.atoms[T0];
-
-//=======================================================
-// Step1:
-// saves <beta|psi>, where beta runs over L0,M0 on atom I0
-// and psi runs over atomic basis sets on the current core
-//=======================================================
-#ifdef _OPENMP
-            std::vector<std::unordered_map<int, std::vector<double>>> nlm_tot_thread;
-            nlm_tot_thread.resize(adjs_all[iat].adj_num + 1);
-#else
-        std::vector<std::unordered_map<int, std::vector<double>>> nlm_tot;
-        nlm_tot.resize(adjs_all[iat].adj_num + 1);
-#endif
-
-            const ModuleBase::Vector3<double> tau0 = atom0->tau[I0];
-            const double Rcut_Beta = ucell.infoNL.Beta[T0].get_rcut_max();
-
-            // outermost loop : all adjacent atoms
-            for (int ad_count = 0; ad_count < adjs_all[iat].adj_num + 1; ad_count++)
-            {
-                const int T1 = adjs_all[iat].ntype[ad_count];
-                const int I1 = adjs_all[iat].natom[ad_count];
-                const int start1 = ucell.itiaiw2iwt(T1, I1, 0);
-                const double Rcut_AO1 = orb.Phi[T1].getRcut();
-                const ModuleBase::Vector3<double> tau1 = adjs_all[iat].adjacent_tau[ad_count];
-                const Atom* atom1 = &ucell.atoms[T1];
-                const int nw1_tot = atom1->nw * npol;
-
-#ifdef _OPENMP
-                nlm_tot_thread[ad_count].clear();
-#else
-            nlm_tot[ad_count].clear();
-#endif
-
-                // middle loop : atomic basis on current processor (either row or column)
-                const double dist1 = (tau1 - tau0).norm() * ucell.lat0;
-                if (dist1 > Rcut_Beta + Rcut_AO1)
-                {
-                    continue;
-                }
-
-                for (int iw1 = 0; iw1 < nw1_tot; ++iw1)
-                {
-                    const int iw1_all = start1 + iw1;
-                    const int iw1_local = pv->global2local_row(iw1_all);
-                    const int iw2_local = pv->global2local_col(iw1_all);
-
-                    if (iw1_local < 0 && iw2_local < 0)
-                        continue;
-
-                    const int iw1_0 = iw1 / npol;
-                    std::vector<std::vector<double>> nlm;
-                    // 2D, but first dimension is only 1 here
-                    // for force, the right hand side is the gradient
-                    // and the first dimension is then 3
-                    // inner loop : all projectors (L0,M0)
-
-                    int L1 = atom1->iw2l[iw1_0];
-                    int N1 = atom1->iw2n[iw1_0];
-                    int m1 = atom1->iw2m[iw1_0];
-
-                    // convert m (0,1,...2l) to M (-l, -l+1, ..., l-1, l)
-                    int M1 = (m1 % 2 == 0) ? -m1 / 2 : (m1 + 1) / 2;
-
-                    ModuleBase::Vector3<double> dtau = ucell.atoms[T0].tau[I0] - tau1;
-                    intor_orb_beta.snap(T1, L1, N1, M1, T0, dtau * ucell.lat0, false, nlm);
-
-#ifdef _OPENMP
-                    nlm_tot_thread[ad_count].insert({iw1_all, nlm[0]});
-#else
-                nlm_tot[ad_count].insert({iw1_all, nlm[0]});
-#endif
-                } // end iw
-            }     // end ad
-
-            //=======================================================
-            // Step2:
-            // calculate sum_(L0,M0) beta<psi_i|beta><beta|psi_j>
-            // and accumulate the value to Hloc_fixed(i,j)
-            //=======================================================
-            for (int ad1_count = 0; ad1_count < adjs_all[iat].adj_num + 1; ad1_count++)
-            {
-                const int T1 = adjs_all[iat].ntype[ad1_count];
-                const int I1 = adjs_all[iat].natom[ad1_count];
-                const int start1 = ucell.itiaiw2iwt(T1, I1, 0);
-                const ModuleBase::Vector3<double> tau1 = adjs_all[iat].adjacent_tau[ad1_count];
-                const Atom* atom1 = &ucell.atoms[T1];
-                const int nw1_tot = atom1->nw * npol;
-                const double Rcut_AO1 = orb.Phi[T1].getRcut();
-
-                for (int ad2_count = 0; ad2_count < adjs_all[iat].adj_num + 1; ad2_count++)
-                {
-                    const int T2 = adjs_all[iat].ntype[ad2_count];
-                    const int I2 = adjs_all[iat].natom[ad2_count];
-                    const int start2 = ucell.itiaiw2iwt(T2, I2, 0);
-                    const ModuleBase::Vector3<double> tau2 = adjs_all[iat].adjacent_tau[ad2_count];
-                    const Atom* atom2 = &ucell.atoms[T2];
-                    const int nw2_tot = atom2->nw * npol;
-                    const double Rcut_AO2 = orb.Phi[T2].getRcut();
-                    const double dist1 = (tau1 - tau0).norm() * ucell.lat0;
-                    const double dist2 = (tau2 - tau0).norm() * ucell.lat0;
-
-                    if (dist1 > Rcut_Beta + Rcut_AO1 || dist2 > Rcut_Beta + Rcut_AO2)
-                    {
-                        continue;
-                    }
-
-                    for (int iw1 = 0; iw1 < nw1_tot; ++iw1)
-                    {
-                        const int iw1_all = start1 + iw1;
-                        const int iw1_local = pv->global2local_row(iw1_all);
-                        if (iw1_local < 0)
-                            continue;
-                        const int iw1_0 = iw1 / npol;
-                        for (int iw2 = 0; iw2 < nw2_tot; ++iw2)
-                        {
-                            const int iw2_all = start2 + iw2;
-                            const int iw2_local = pv->global2local_col(iw2_all);
-                            if (iw2_local < 0)
-                                continue;
-                            const int iw2_0 = iw2 / npol;
-#ifdef _OPENMP
-                            std::vector<double> nlm1 = nlm_tot_thread[ad1_count][iw1_all];
-                            std::vector<double> nlm2 = nlm_tot_thread[ad2_count][iw2_all];
-#else
-                        std::vector<double> nlm1 = nlm_tot[ad1_count][iw1_all];
-                        std::vector<double> nlm2 = nlm_tot[ad2_count][iw2_all];
-#endif
-
-                            assert(nlm1.size() == nlm2.size());
-#ifdef _OPENMP
-                            double nlm_thread = 0.0;
-#else
-                        double nlm = 0.0;
-#endif
-                            const int nproj = ucell.infoNL.nproj[T0];
-                            int ib = 0;
-                            for (int nb = 0; nb < nproj; nb++)
-                            {
-                                const int L0 = ucell.infoNL.Beta[T0].Proj[nb].getL();
-                                for (int m = 0; m < 2 * L0 + 1; m++)
-                                {
-#ifdef _OPENMP
-                                    nlm_thread += nlm1[ib] * nlm2[ib] * ucell.atoms[T0].ncpp.dion(nb, nb);
-#else
-                                nlm += nlm1[ib] * nlm2[ib] * ucell.atoms[T0].ncpp.dion(nb, nb);
-#endif
-                                    ib += 1;
-                                }
-                            }
-                            assert(ib == nlm1.size());
-
-                            const int ir = pv->global2local_row(iw1_all);
-                            const int ic = pv->global2local_col(iw2_all);
-                            long index = 0;
-                            if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER())
-                            {
-                                index = ic * pv->nrow + ir;
-                            }
-                            else
-                            {
-                                index = ir * pv->ncol + ic;
-                            }
-#ifdef _OPENMP
-                            Nonlocal_thread[index] += nlm_thread;
-#else
-                        lm.set_HSgamma(iw1_all, iw2_all, nlm, HSloc);
-#endif
-                        } // iw2
-                    }     // iw1
-                }         // ad2
-            }             // ad1
-        }                 // end iat
-
-#ifdef _OPENMP
-#pragma omp critical(cal_nonlocal)
-        {
-            for (int i = 0; i < pv->nloc; i++)
-            {
-                lm.Hloc_fixed[i] += Nonlocal_thread[i];
-            }
-        }
-        delete[] Nonlocal_thread;
-#endif
-#ifdef _OPENMP
-    }
-#endif
-
-#ifdef __MKL
-    mkl_set_num_threads(mkl_threads);
-#endif
-
-    ModuleBase::timer::tick("LCAO_domain", "b_NL_beta_new");
-    return;
-}
-
-} // namespace LCAO_domain
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nnr.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nnr.cpp
index 25a0214cc7..13399b6722 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nnr.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nnr.cpp
@@ -289,221 +289,3 @@ int Grid_Technique::binary_search_find_R2_offset(int val, int iat) const
     }
     return -1;
 }
-
-// be called in LCAO_Hamilt::calculate_Hk.
-void LCAO_Matrix::folding_fixedH(
-						const int &ik, 
-						const std::vector<ModuleBase::Vector3<double>>& kvec_d,
-						bool cal_syns)
-{
-	ModuleBase::TITLE("LCAO_nnr","folding_fixedH");
-    ModuleBase::timer::tick("LCAO_nnr", "folding_fixedH");
-    const Parallel_Orbitals* pv = this->ParaV;
-
-	int tot_index = 0;
-#ifdef _OPENMP
-#pragma omp parallel reduction(+:tot_index)
-{
-	const int num_threads = omp_get_num_threads();
-	const int thread_id = omp_get_thread_num();
-#else
-	const int num_threads = 1;
-	const int thread_id = 0;
-#endif
-	ModuleBase::Vector3<double> dtau;
-	ModuleBase::Vector3<double> tau1;
-	ModuleBase::Vector3<double> tau2;
-
-	ModuleBase::Vector3<double> dtau1;
-	ModuleBase::Vector3<double> dtau2;
-	ModuleBase::Vector3<double> tau0;
-
-#ifdef __DEEPKS
-	if (GlobalV::deepks_scf)
-    {
-		int beg, len;
-		ModuleBase::BLOCK_TASK_DIST_1D(num_threads, thread_id, (int)pv->nloc, 1024, beg, len);
-		ModuleBase::GlobalFunc::ZEROS(GlobalC::ld.H_V_delta_k[ik].data() + beg, len);
-	}
-#endif
-
-#ifdef _OPENMP
-#pragma omp for schedule(dynamic)
-#endif
-	for (int iat=0; iat<GlobalC::ucell.nat; ++iat)
-	{
-		const int T1 = GlobalC::ucell.iat2it[iat];
-		Atom* atom1 = &GlobalC::ucell.atoms[T1];
-		const int I1 = GlobalC::ucell.iat2ia[iat];
-		{
-			tau1 = atom1->tau[I1];
-			//GlobalC::GridD.Find_atom(tau1);
-			AdjacentAtomInfo adjs;
-			GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1, &adjs);
-			Atom* atom1 = &GlobalC::ucell.atoms[T1];
-			const int start = GlobalC::ucell.itiaiw2iwt(T1,I1,0);
-			int index = pv->nlocstart[iat];
-
-			if (cal_syns)
-            {
-                for (int k = 0; k < 3; k++)
-                {
-                    tau1[k] = tau1[k] - atom1->vel[I1][k] * INPUT.mdp.md_dt / GlobalC::ucell.lat0 ;
-                }
-            }
-
-			// (2) search among all adjacent atoms.
-			for (int ad = 0; ad < adjs.adj_num+1; ++ad)
-			{
-				const int T2 = adjs.ntype[ad];
-				const int I2 = adjs.natom[ad];
-				Atom* atom2 = &GlobalC::ucell.atoms[T2];
-
-				tau2 = adjs.adjacent_tau[ad];
-				dtau = tau2 - tau1;
-				double distance = dtau.norm() * GlobalC::ucell.lat0;
-				double rcut = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ORB.Phi[T2].getRcut();
-
-				bool adj = false;
-
-				if(distance < rcut) 
-				{
-					adj = true;
-				}
-				else if(distance >= rcut)
-				{
-					for (int ad0 = 0; ad0 < adjs.adj_num+1; ++ad0)
-					{
-						const int T0 = adjs.ntype[ad0]; 
-						const int I0 = adjs.natom[ad0]; 
-						//const int iat0 = GlobalC::ucell.itia2iat(T0, I0);
-						//const int start0 = GlobalC::ucell.itiaiw2iwt(T0, I0, 0);
-
-						tau0 = adjs.adjacent_tau[ad0];
-						dtau1 = tau0 - tau1;
-						dtau2 = tau0 - tau2;
-
-						double distance1 = dtau1.norm() * GlobalC::ucell.lat0;
-						double distance2 = dtau2.norm() * GlobalC::ucell.lat0;
-
-						double rcut1 = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ucell.infoNL.Beta[T0].get_rcut_max();
-						double rcut2 = GlobalC::ORB.Phi[T2].getRcut() + GlobalC::ucell.infoNL.Beta[T0].get_rcut_max();
-
-						if( distance1 < rcut1 && distance2 < rcut2 )
-						{
-							adj = true;
-							break;
-						}
-					}
-				}
-
-				if(adj) // mohan fix bug 2011-06-26, should not be '<='
-				{
-					// (3) calculate the nu of atom (T2, I2)
-					const int start2 = GlobalC::ucell.itiaiw2iwt(T2,I2,0);
-					//------------------------------------------------
-					// exp(k dot dR)
-					// dR is the index of box in Crystal coordinates
-					//------------------------------------------------
-					ModuleBase::Vector3<double> dR(adjs.box[ad].x, adjs.box[ad].y, adjs.box[ad].z); 
-					const double arg = ( kvec_d[ik] * dR ) * ModuleBase::TWO_PI;
-					//const double arg = ( kvec_d[ik] * GlobalC::GridD.getBox(ad) ) * ModuleBase::TWO_PI;
-					double sinp, cosp;
-					ModuleBase::libm::sincos(arg, &sinp, &cosp);
-					const std::complex<double> kphase = std::complex <double> ( cosp,  sinp );
-
-					//--------------------------------------------------
-					// calculate how many matrix elements are in 
-					// this processor.
-					//--------------------------------------------------
-					//########################### EXPLAIN ###############################
-					// 1. overlap matrix with k point
-					// this->SlocR = < phi_0i | phi_Rj >, where 0, R are the cell index
-					// while i,j are the orbital index.
-
-					// 2. H_fixed=T+Vnl matrix element with k point (if Vna is not used).
-					// H_fixed=T+Vnl+Vna matrix element with k point (if Vna is used).
-					// this->Hloc_fixed = < phi_0i | H_fixed | phi_Rj>
-
-					// 3. H(k) |psi(k)> = S(k) | psi(k)> 
-					// Sloc2 is used to diagonalize for a give k point.
-					// Hloc_fixed2 is used to diagonalize (eliminate index R).
-					//###################################################################
-#define NW_IJ_LOOP_BEGIN() \
-					for(int ii=0; ii<atom1->nw*GlobalV::NPOL; ii++)				\
-					{															\
-						/* the index of orbitals in this processor */			\
-						const int iw1_all = start + ii;							\
-						const int mu = pv->global2local_row(iw1_all);				\
-						if(mu<0) {continue;}									\
-						for(int jj=0; jj<atom2->nw*GlobalV::NPOL; jj++)			\
-						{														\
-							int iw2_all = start2 + jj;							\
-							const int nu = pv->global2local_col(iw2_all);			\
-							if(nu<0) {continue;}
-
-								// DO STH.
-
-#define NW_IJ_LOOP_END() \
-							++index;											\
-							++tot_index;										\
-						}/*end jj*/												\
-					}/*end ii*/
-
-					if(GlobalV::NSPIN!=4)
-					{
-						auto compute_kernel = [&](int iic, int index) {
-#ifdef __DEEPKS
-							if(GlobalV::deepks_scf)
-							{
-								GlobalC::ld.H_V_delta_k[ik][iic] += GlobalC::ld.H_V_deltaR[index] * kphase;
-							}
-#endif
-						};
-						if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER())
-						{
-							NW_IJ_LOOP_BEGIN()
-							compute_kernel(mu+nu*pv->nrow, index);
-							NW_IJ_LOOP_END()
-						} else {
-							NW_IJ_LOOP_BEGIN()
-							compute_kernel(mu*pv->ncol+nu, index);
-							NW_IJ_LOOP_END()
-						}
-					}
-					else
-					{
-						auto compute_kernel = [&](int iic, int index, int iw1_all, int iw2_all) {
-#ifdef __DEEPKS
-							if(GlobalV::deepks_scf)
-							{
-								if (iw1_all % 2 == iw2_all % 2)
-								{
-									GlobalC::ld.H_V_delta_k[ik][iic] += GlobalC::ld.H_V_deltaR[index] * kphase;
-								}
-							}
-#endif
-						};
-						if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER())
-						{
-							NW_IJ_LOOP_BEGIN()
-							compute_kernel(mu+nu*pv->nrow, index, iw1_all, iw2_all);
-							NW_IJ_LOOP_END()
-						} else {
-							NW_IJ_LOOP_BEGIN()
-							compute_kernel(mu*pv->ncol+nu, index, iw1_all, iw2_all);
-							NW_IJ_LOOP_END()
-						}
-					}
-				}
-			}// end ad
-		}// end I1
-	} // end T1
-#ifdef _OPENMP
-}
-#endif
-	assert(tot_index==this->ParaV->nnr);
-
-	ModuleBase::timer::tick("LCAO_nnr","folding_fixedH");
-	return;
-}
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.cpp
index 69d1844bd9..1f20a0e778 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.cpp
@@ -36,32 +36,31 @@ namespace hamilt
 {
 
 template <typename TK, typename TR>
-HamiltLCAO<TK, TR>::HamiltLCAO(LCAO_Matrix* LM_in, const K_Vectors& kv_in, const TwoCenterIntegrator& intor_overlap_orb)
+HamiltLCAO<TK, TR>::HamiltLCAO(const Parallel_Orbitals* paraV, const K_Vectors& kv_in, const TwoCenterIntegrator& intor_overlap_orb)
 {
     this->classname = "HamiltLCAO";
 
     this->kv = &kv_in;
 
     // Real space Hamiltonian is inited with template TR
-    this->hR = new HContainer<TR>(LM_in->ParaV);
-    this->sR = new HContainer<TR>(LM_in->ParaV);
+    this->hR = new HContainer<TR>(paraV);
+    this->sR = new HContainer<TR>(paraV);
+    this->hsk = new HS_Matrix_K<TK>(paraV);
 
-    this->getOperator() = new OverlapNew<OperatorLCAO<TK, TR>>(LM_in,
+    this->getOperator() = new OverlapNew<OperatorLCAO<TK, TR>>(this->hsk,
                                                                this->kv->kvec_d,
                                                                this->hR,
-                                                               &(this->getHk(LM_in)),
                                                                this->sR,
-                                                               &(this->getSk(LM_in)),
                                                                &GlobalC::ucell,
                                                                &GlobalC::GridD,
-                                                               &intor_overlap_orb,
-                                                               LM_in->ParaV);
+                                                               &intor_overlap_orb);
 }
 
 template <typename TK, typename TR>
 HamiltLCAO<TK, TR>::HamiltLCAO(Gint_Gamma* GG_in,
                                Gint_k* GK_in,
                                LCAO_Matrix* LM_in,
+                               const Parallel_Orbitals* paraV,
                                Local_Orbital_Charge* loc_in,
                                elecstate::Potential* pot_in,
                                const K_Vectors& kv_in,
@@ -73,13 +72,9 @@ HamiltLCAO<TK, TR>::HamiltLCAO(Gint_Gamma* GG_in,
     this->classname = "HamiltLCAO";
 
     // Real space Hamiltonian is inited with template TR
-    this->hR = new HContainer<TR>(LM_in->ParaV);
-    this->sR = new HContainer<TR>(LM_in->ParaV);
-
-    const std::size_t row_col_size
-        = static_cast<std::size_t>(LM_in->ParaV->get_row_size()) * LM_in->ParaV->get_col_size();
-    this->getSk(LM_in).resize(row_col_size);
-    this->getHk(LM_in).resize(row_col_size);
+    this->hR = new HContainer<TR>(paraV);
+    this->sR = new HContainer<TR>(paraV);
+    this->hsk = new HS_Matrix_K<TK>(paraV);
 
     // Effective potential term (\sum_r <psi(r)|Veff(r)|psi(r)>) is registered without template
     std::vector<std::string> pot_register_in;
@@ -121,51 +116,42 @@ HamiltLCAO<TK, TR>::HamiltLCAO(Gint_Gamma* GG_in,
         this->hR->fix_gamma();
         // initial operator for Gamma_only case
         // overlap term (<psi|psi>) is indispensable
-        // in Gamma_only case, target SR is LCAO_Matrix::Sloc, which is same as SK
-        this->getOperator() = new OverlapNew<OperatorLCAO<TK, TR>>(LM_in,
+        // in Gamma_only case, target SK is this->hsk->get_sk(), the target SR is this->sR
+        this->getOperator() = new OverlapNew<OperatorLCAO<TK, TR>>(this->hsk,
                                                                    this->kv->kvec_d,
                                                                    this->hR,
-                                                                   &(this->getHk(LM_in)),
                                                                    this->sR,
-                                                                   &(this->getSk(LM_in)),
                                                                    &GlobalC::ucell,
                                                                    &GlobalC::GridD,
-                                                                   two_center_bundle.overlap_orb.get(),
-                                                                   LM_in->ParaV);
+                                                                   two_center_bundle.overlap_orb.get());
 
-        // kinetic term (<psi|T|psi>),
-        // in Gamma_only case, target HR is LCAO_Matrix::Hloc_fixed, while target HK is LCAO_Matrix::Hloc
-        // LCAO_Matrix::Hloc_fixed2 is used for storing
+        // kinetic term (<psi|T|psi>)
         if (GlobalV::T_IN_H)
         {
-            Operator<TK>* ekinetic = new EkineticNew<OperatorLCAO<TK, TR>>(LM_in,
+            Operator<TK>* ekinetic = new EkineticNew<OperatorLCAO<TK, TR>>(this->hsk,
                                                                            this->kv->kvec_d,
                                                                            this->hR,
-                                                                           &(this->getHk(LM_in)),
                                                                            &GlobalC::ucell,
                                                                            &GlobalC::GridD,
-                                                                           two_center_bundle.kinetic_orb.get(),
-                                                                           LM_in->ParaV);
+                                                                           two_center_bundle.kinetic_orb.get());
             this->getOperator()->add(ekinetic);
         }
 
         // nonlocal term (<psi|beta>D<beta|psi>)
-        // in general case, target HR is this->hR, while target HK is LCAO_Matrix::Hloc
+        // in general case, target HR is this->hR, while target HK is this->hsk->get_hk()
         if (GlobalV::VNL_IN_H)
         {
-            Operator<TK>* nonlocal = new NonlocalNew<OperatorLCAO<TK, TR>>(LM_in,
+            Operator<TK>* nonlocal = new NonlocalNew<OperatorLCAO<TK, TR>>(this->hsk,
                                                                            this->kv->kvec_d,
                                                                            this->hR,
-                                                                           &(this->getHk(LM_in)),
                                                                            &GlobalC::ucell,
                                                                            &GlobalC::GridD,
-                                                                           two_center_bundle.overlap_orb_beta.get(),
-                                                                           LM_in->ParaV);
+                                                                           two_center_bundle.overlap_orb_beta.get());
             this->getOperator()->add(nonlocal);
         }
 
         // Effective potential term (\sum_r <psi(r)|Veff(r)|psi(r)>)
-        // in general case, target HR is Gint::hRGint, while target HK is LCAO_Matrix::Hloc
+        // in general case, target HR is Gint::hRGint, while target HK is this->hsk->get_hk()
         if (GlobalV::VL_IN_H)
         {
             // only Potential is not empty, Veff and Meta are available
@@ -176,14 +162,12 @@ HamiltLCAO<TK, TR>::HamiltLCAO(Gint_Gamma* GG_in,
                 // effective potential term
                 Operator<TK>* veff = new Veff<OperatorLCAO<TK, TR>>(GG_in,
                                                                     loc_in,
-                                                                    LM_in,
+                                                                    this->hsk,
                                                                     this->kv->kvec_d,
                                                                     pot_in,
                                                                     this->hR, // no explicit call yet
-                                                                    &(this->getHk(LM_in)),
                                                                     &GlobalC::ucell,
-                                                                    &GlobalC::GridD,
-                                                                    LM_in->ParaV // no explicit call yet
+                                                                    &GlobalC::GridD
                 );
                 this->getOperator()->add(veff);
             }
@@ -192,11 +176,9 @@ HamiltLCAO<TK, TR>::HamiltLCAO(Gint_Gamma* GG_in,
 #ifdef __DEEPKS
         if (GlobalV::deepks_scf)
         {
-            Operator<TK>* deepks = new DeePKS<OperatorLCAO<TK, TR>>(loc_in,
-                                                                    LM_in,
+            Operator<TK>* deepks = new DeePKS<OperatorLCAO<TK, TR>>(this->hsk,
                                                                     this->kv->kvec_d,
                                                                     this->hR, // no explicit call yet
-                                                                    &(this->getHk(LM_in)),
                                                                     &GlobalC::ucell,
                                                                     &GlobalC::GridD,
                                                                     two_center_bundle.overlap_orb_alpha.get(),
@@ -212,23 +194,20 @@ HamiltLCAO<TK, TR>::HamiltLCAO(Gint_Gamma* GG_in,
             Operator<TK>* dftu = nullptr;
             if (GlobalV::dft_plus_u == 2)
             {
-                dftu = new OperatorDFTU<OperatorLCAO<TK, TR>>(LM_in,
+                dftu = new OperatorDFTU<OperatorLCAO<TK, TR>>(this->hsk,
                                                               kv->kvec_d,
                                                               this->hR, // no explicit call yet
-                                                              &(this->getHk(LM_in)),
                                                               this->kv->isk);
             }
             else
             {
-                dftu = new DFTU<OperatorLCAO<TK, TR>>(LM_in,
+                dftu = new DFTU<OperatorLCAO<TK, TR>>(this->hsk,
                                                       this->kv->kvec_d,
                                                       this->hR,
-                                                      &(this->getHk(LM_in)),
                                                       GlobalC::ucell,
                                                       &GlobalC::GridD,
                                                       two_center_bundle.overlap_orb_onsite.get(),
-                                                      &GlobalC::dftu,
-                                                      *(LM_in->ParaV));
+                                                      &GlobalC::dftu);
             }
             this->getOperator()->add(dftu);
         }
@@ -238,7 +217,7 @@ HamiltLCAO<TK, TR>::HamiltLCAO(Gint_Gamma* GG_in,
     {
         // Effective potential term (\sum_r <psi(r)|Veff(r)|psi(r)>)
         // Meta potential term (\sum_r <psi(r)|tau(r)|psi(r)>)
-        // in general case, target HR is Gint::pvpR_reduced, while target HK is LCAO_Matrix::Hloc2
+        // in general case, target HR is Gint::pvpR_reduced, while target HK is this->hsk->get_hk()
         if (GlobalV::VL_IN_H)
         {
             // only Potential is not empty, Veff and Meta are available
@@ -249,14 +228,12 @@ HamiltLCAO<TK, TR>::HamiltLCAO(Gint_Gamma* GG_in,
                 // Veff term
                 this->getOperator() = new Veff<OperatorLCAO<TK, TR>>(GK_in,
                                                                      loc_in,
-                                                                     LM_in,
+                                                                     this->hsk,
                                                                      kv->kvec_d,
                                                                      pot_in,
                                                                      this->hR,
-                                                                     &(this->getHk(LM_in)),
                                                                      &GlobalC::ucell,
-                                                                     &GlobalC::GridD,
-                                                                     LM_in->ParaV);
+                                                                     &GlobalC::GridD);
                 // reset spin index and real space Hamiltonian matrix
                 int start_spin = -1;
                 GK_in->reset_spin(start_spin);
@@ -267,16 +244,13 @@ HamiltLCAO<TK, TR>::HamiltLCAO(Gint_Gamma* GG_in,
 
         // initial operator for multi-k case
         // overlap term is indispensable
-        Operator<TK>* overlap = new OverlapNew<OperatorLCAO<TK, TR>>(LM_in,
+        Operator<TK>* overlap = new OverlapNew<OperatorLCAO<TK, TR>>(this->hsk,
                                                                      this->kv->kvec_d,
                                                                      this->hR,
-                                                                     &(this->getHk(LM_in)),
                                                                      this->sR,
-                                                                     &(this->getSk(LM_in)),
                                                                      &GlobalC::ucell,
                                                                      &GlobalC::GridD,
-                                                                     two_center_bundle.overlap_orb.get(),
-                                                                     LM_in->ParaV);
+                                                                     two_center_bundle.overlap_orb.get());
         if (this->getOperator() == nullptr)
         {
             this->getOperator() = overlap;
@@ -287,32 +261,28 @@ HamiltLCAO<TK, TR>::HamiltLCAO(Gint_Gamma* GG_in,
         }
 
         // kinetic term (<psi|T|psi>),
-        // in general case, target HR is this->hR, while target HK is LCAO_Matrix::Hloc2
+        // in general case, target HR is this->hR, while target HK is this->hsk->get_hk()
         if (GlobalV::T_IN_H)
         {
-            Operator<TK>* ekinetic = new EkineticNew<OperatorLCAO<TK, TR>>(LM_in,
+            Operator<TK>* ekinetic = new EkineticNew<OperatorLCAO<TK, TR>>(this->hsk,
                                                                            this->kv->kvec_d,
                                                                            this->hR,
-                                                                           &(this->getHk(LM_in)),
                                                                            &GlobalC::ucell,
                                                                            &GlobalC::GridD,
-                                                                           two_center_bundle.kinetic_orb.get(),
-                                                                           LM_in->ParaV);
+                                                                           two_center_bundle.kinetic_orb.get());
             this->getOperator()->add(ekinetic);
         }
 
         // nonlocal term (<psi|beta>D<beta|psi>)
-        // in general case, target HR is this->hR, while target HK is LCAO_Matrix::Hloc2
+        // in general case, target HR is this->hR, while target HK is this->hsk->get_hk()
         if (GlobalV::VNL_IN_H)
         {
-            Operator<TK>* nonlocal = new NonlocalNew<OperatorLCAO<TK, TR>>(LM_in,
+            Operator<TK>* nonlocal = new NonlocalNew<OperatorLCAO<TK, TR>>(this->hsk,
                                                                            this->kv->kvec_d,
                                                                            this->hR,
-                                                                           &(this->getHk(LM_in)),
                                                                            &GlobalC::ucell,
                                                                            &GlobalC::GridD,
-                                                                           two_center_bundle.overlap_orb_beta.get(),
-                                                                           LM_in->ParaV);
+                                                                           two_center_bundle.overlap_orb_beta.get());
             // TDDFT velocity gague will calculate full non-local potential including the original one and the
             // correction on its own. So the original non-local potential term should be skipped
             if (GlobalV::ESOLVER_TYPE != "tddft" || elecstate::H_TDDFT_pw::stype != 1)
@@ -328,11 +298,9 @@ HamiltLCAO<TK, TR>::HamiltLCAO(Gint_Gamma* GG_in,
 #ifdef __DEEPKS
         if (GlobalV::deepks_scf)
         {
-            Operator<TK>* deepks = new DeePKS<OperatorLCAO<TK, TR>>(loc_in,
-                                                                    LM_in,
+            Operator<TK>* deepks = new DeePKS<OperatorLCAO<TK, TR>>(this->hsk,
                                                                     this->kv->kvec_d,
                                                                     hR,
-                                                                    &(this->getHk(LM_in)),
                                                                     &GlobalC::ucell,
                                                                     &GlobalC::GridD,
                                                                     two_center_bundle.overlap_orb_alpha.get(),
@@ -345,22 +313,19 @@ HamiltLCAO<TK, TR>::HamiltLCAO(Gint_Gamma* GG_in,
         if (GlobalV::ESOLVER_TYPE == "tddft" && elecstate::H_TDDFT_pw::stype == 1)
         {
             elecstate::H_TDDFT_pw::update_At();
-            Operator<TK>* td_ekinetic = new TDEkinetic<OperatorLCAO<TK, TR>>(LM_in,
+            Operator<TK>* td_ekinetic = new TDEkinetic<OperatorLCAO<TK, TR>>(this->hsk,
                                                                              this->hR,
-                                                                             &(this->getHk(LM_in)),
                                                                              this->sR,
                                                                              kv,
                                                                              &GlobalC::ucell,
                                                                              &GlobalC::GridD);
             this->getOperator()->add(td_ekinetic);
 
-            Operator<TK>* td_nonlocal = new TDNonlocal<OperatorLCAO<TK, TR>>(LM_in,
+            Operator<TK>* td_nonlocal = new TDNonlocal<OperatorLCAO<TK, TR>>(this->hsk,
                                                                              this->kv->kvec_d,
                                                                              this->hR,
-                                                                             &(this->getHk(LM_in)),
                                                                              &GlobalC::ucell,
-                                                                             &GlobalC::GridD,
-                                                                             LM_in->ParaV);
+                                                                             &GlobalC::GridD);
             this->getOperator()->add(td_nonlocal);
         }
         if (GlobalV::dft_plus_u)
@@ -368,32 +333,28 @@ HamiltLCAO<TK, TR>::HamiltLCAO(Gint_Gamma* GG_in,
             Operator<TK>* dftu = nullptr;
             if (GlobalV::dft_plus_u == 2)
             {
-                dftu = new OperatorDFTU<OperatorLCAO<TK, TR>>(LM_in,
+                dftu = new OperatorDFTU<OperatorLCAO<TK, TR>>(this->hsk,
                                                               kv->kvec_d,
                                                               this->hR, // no explicit call yet
-                                                              &(this->getHk(LM_in)),
                                                               this->kv->isk);
             }
             else
             {
-                dftu = new DFTU<OperatorLCAO<TK, TR>>(LM_in,
+                dftu = new DFTU<OperatorLCAO<TK, TR>>(this->hsk,
                                                       this->kv->kvec_d,
                                                       this->hR,
-                                                      &(this->getHk(LM_in)),
                                                       GlobalC::ucell,
                                                       &GlobalC::GridD,
                                                       two_center_bundle.overlap_orb_onsite.get(),
-                                                      &GlobalC::dftu,
-                                                      *(LM_in->ParaV));
+                                                      &GlobalC::dftu);
             }
             this->getOperator()->add(dftu);
         }
         if (GlobalV::sc_mag_switch)
         {
-            Operator<TK>* sc_lambda = new OperatorScLambda<OperatorLCAO<TK, TR>>(LM_in,
+            Operator<TK>* sc_lambda = new OperatorScLambda<OperatorLCAO<TK, TR>>(this->hsk,
                                                                                  kv->kvec_d,
                                                                                  this->hR, // no explicit call yet
-                                                                                 &(this->getHk(LM_in)),
                                                                                  this->kv->isk);
             this->getOperator()->add(sc_lambda);
         }
@@ -402,9 +363,9 @@ HamiltLCAO<TK, TR>::HamiltLCAO(Gint_Gamma* GG_in,
 #ifdef __EXX
     if (GlobalC::exx_info.info_global.cal_exx)
     {
-        Operator<TK>* exx = new OperatorEXX<OperatorLCAO<TK, TR>>(LM_in,
+        Operator<TK>* exx = new OperatorEXX<OperatorLCAO<TK, TR>>(this->hsk,
+                                                                  LM_in,
                                                                   this->hR,
-                                                                  &(this->getHk(LM_in)),
                                                                   *this->kv,
                                                                   LM_in->Hexxd,
                                                                   LM_in->Hexxc,
@@ -487,71 +448,22 @@ Operator<TK>*& HamiltLCAO<TK, TR>::getOperator()
 {
     return this->ops;
 }
-// getHk
-template <>
-std::vector<double>& HamiltLCAO<double, double>::getHk(LCAO_Matrix* LM)
-{
-    return LM->Hloc;
-}
-
-template <>
-std::vector<std::complex<double>>& HamiltLCAO<std::complex<double>, double>::getHk(LCAO_Matrix* LM)
-{
-    return LM->Hloc2;
-}
-template <>
-std::vector<std::complex<double>>& HamiltLCAO<std::complex<double>, std::complex<double>>::getHk(LCAO_Matrix* LM)
-{
-    return LM->Hloc2;
-}
-
-// getSk
-template <>
-std::vector<double>& HamiltLCAO<double, double>::getSk(LCAO_Matrix* LM)
-{
-    return LM->Sloc;
-}
-
-template <>
-std::vector<std::complex<double>>& HamiltLCAO<std::complex<double>, double>::getSk(LCAO_Matrix* LM)
-{
-    return LM->Sloc2;
-}
-template <>
-std::vector<std::complex<double>>& HamiltLCAO<std::complex<double>, std::complex<double>>::getSk(LCAO_Matrix* LM)
-{
-    return LM->Sloc2;
-}
-
-// getHR
-template <typename TK, typename TR>
-HContainer<TR>*& HamiltLCAO<TK, TR>::getHR()
-{
-    return this->hR;
-}
-
-// getSR
-template <typename TK, typename TR>
-HContainer<TR>*& HamiltLCAO<TK, TR>::getSR()
-{
-    return this->sR;
-}
 
 template <typename TK, typename TR>
-void HamiltLCAO<TK, TR>::updateSk(const int ik, LCAO_Matrix* LM_in, const int hk_type)
+void HamiltLCAO<TK, TR>::updateSk(const int ik, const int hk_type)
 {
     ModuleBase::TITLE("HamiltLCAO", "updateSk");
     ModuleBase::timer::tick("HamiltLCAO", "updateSk");
-    ModuleBase::GlobalFunc::ZEROS(this->getSk(LM_in).data(), this->getSk(LM_in).size());
+    ModuleBase::GlobalFunc::ZEROS(this->getSk(), this->get_size_hsk());
     if (hk_type == 1) // collumn-major matrix for SK
     {
-        const int nrow = LM_in->ParaV->get_row_size();
-        hamilt::folding_HR(*this->sR, this->getSk(LM_in).data(), this->kv->kvec_d[ik], nrow, 1);
+        const int nrow = this->hsk->get_pv()->get_row_size();
+        hamilt::folding_HR(*this->sR, this->getSk(), this->kv->kvec_d[ik], nrow, 1);
     }
     else if (hk_type == 0) // row-major matrix for SK
     {
-        const int ncol = LM_in->ParaV->get_col_size();
-        hamilt::folding_HR(*this->sR, this->getSk(LM_in).data(), this->kv->kvec_d[ik], ncol, 0);
+        const int ncol = this->hsk->get_pv()->get_col_size();
+        hamilt::folding_HR(*this->sR, this->getSk(), this->kv->kvec_d[ik], ncol, 0);
     }
     ModuleBase::timer::tick("HamiltLCAO", "updateSk");
 }
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h
index fa3b7f643f..e227d41cc5 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h
@@ -10,7 +10,7 @@
 #include "module_hamilt_lcao/module_gint/gint_gamma.h"
 #include "module_hamilt_lcao/module_gint/gint_k.h"
 #include "module_hamilt_lcao/module_hcontainer/hcontainer.h"
-
+#include "module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp"
 namespace hamilt
 {
 
@@ -27,6 +27,7 @@ class HamiltLCAO : public Hamilt<TK>
     HamiltLCAO(Gint_Gamma* GG_in,
                Gint_k* GK_in,
                LCAO_Matrix* LM_in,
+               const Parallel_Orbitals* paraV,
                Local_Orbital_Charge* loc_in,
                elecstate::Potential* pot_in,
                const K_Vectors& kv_in,
@@ -36,7 +37,7 @@ class HamiltLCAO : public Hamilt<TK>
     /**
      * @brief Constructor of vacuum Operators, only HR and SR will be initialed as empty HContainer
      */
-    HamiltLCAO(LCAO_Matrix* LM_in, const K_Vectors& kv_in, const TwoCenterIntegrator& intor_overlap_orb);
+    HamiltLCAO(const Parallel_Orbitals* paraV, const K_Vectors& kv_in, const TwoCenterIntegrator& intor_overlap_orb);
 
     ~HamiltLCAO()
     {
@@ -46,18 +47,20 @@ class HamiltLCAO : public Hamilt<TK>
         }
         delete this->hR;
         delete this->sR;
-    };
+        delete this->hsk;
+    }
 
     /// get pointer of Operator<TK> ops
     Operator<TK>*& getOperator();
-    /// get hk-pointer of std::vector<TK>, the return will be LM->Hloc or LM->Hloc2
-    std::vector<TK>& getHk(LCAO_Matrix* LM);
-    /// get sk-pointer of std::vector<TK>, the return will be this->sk
-    std::vector<TK>& getSk(LCAO_Matrix* LM);
+    /// get hk-pointer
+    TK* getHk() const{return this->hsk->get_hk();}
+    /// get sk-pointer
+    TK* getSk() const{return this->hsk->get_sk();}
+    int get_size_hsk() const{return this->hsk->get_size();}
     /// get HR pointer of *this->hR, which is a HContainer<TR> and contains H(R)
-    HContainer<TR>*& getHR();
+    HContainer<TR>*& getHR(){return this->hR;}
     /// get SR pointer of *this->sR, which is a HContainer<TR> and contains S(R)
-    HContainer<TR>*& getSR();
+    HContainer<TR>*& getSR(){return this->sR;}
     /// refresh the status of HR
     void refresh() override;
 
@@ -71,7 +74,7 @@ class HamiltLCAO : public Hamilt<TK>
      * @param hk_type 0: SK is row-major, 1: SK is collumn-major
      * @return void
      */
-    void updateSk(const int ik, LCAO_Matrix* LM_in, const int hk_type = 0);
+    void updateSk(const int ik, const int hk_type = 0);
 
     // core function: return H(k) and S(k) matrixs for direct solving eigenvalues.
     // not used in PW base
@@ -85,6 +88,7 @@ class HamiltLCAO : public Hamilt<TK>
     HContainer<TR>* hR = nullptr;
     HContainer<TR>* sR = nullptr;
 
+    HS_Matrix_K<TK>* hsk = nullptr;
     // special case for NSPIN=2 , data of HR should be separated into two parts
     // save them in this->hRS2;
     std::vector<TR> hRS2;
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp
new file mode 100644
index 0000000000..ce3d79b2d6
--- /dev/null
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp
@@ -0,0 +1,32 @@
+#ifndef ABACUS_HS_MATRIX_K_HPP
+#define ABACUS_HS_MATRIX_K_HPP
+
+#include <vector>
+namespace hamilt
+{
+    template <typename TK>
+    class HS_Matrix_K
+    {
+        public:
+            HS_Matrix_K(const Parallel_Orbitals* paraV, bool no_s=false){
+                hk.resize(paraV->nloc);
+                if(!no_s) 
+                {
+                    sk.resize(paraV->nloc);
+                }
+                this->pv = paraV;
+            }
+            TK* get_hk() {return hk.data();}
+            TK* get_sk() {return sk.data();}
+            int get_size() {return hk.size();}
+            void set_zero_hk() {hk.assign(hk.size(), 0);}
+            void set_zero_sk() {sk.assign(sk.size(), 0);}
+            const Parallel_Orbitals* get_pv() const {return this->pv;}
+        private:
+            std::vector<TK> hk;
+            std::vector<TK> sk;
+            const Parallel_Orbitals* pv = nullptr;
+    };
+}
+
+#endif
\ No newline at end of file
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp
index fc46a0ece3..28565ecdaf 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp
@@ -15,22 +15,20 @@ namespace hamilt
 {
 
 template <typename TK, typename TR>
-DeePKS<OperatorLCAO<TK, TR>>::DeePKS(Local_Orbital_Charge* loc_in,
-                                     LCAO_Matrix* LM_in,
+DeePKS<OperatorLCAO<TK, TR>>::DeePKS(HS_Matrix_K<TK>* hsk_in,
                                      const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
                                      HContainer<TR>* hR_in,
-                                     std::vector<TK>* hK_in,
                                      const UnitCell* ucell_in,
                                      Grid_Driver* GridD_in,
                                      const TwoCenterIntegrator* intor_orb_alpha,
                                      const int& nks_in,
                                      elecstate::DensityMatrix<TK, double>* DM_in)
-    : loc(loc_in), nks(nks_in), ucell(ucell_in), OperatorLCAO<TK, TR>(LM_in, kvec_d_in, hR_in, hK_in), DM(DM_in),
+    : nks(nks_in), ucell(ucell_in), OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in), DM(DM_in),
       intor_orb_alpha_(intor_orb_alpha)
 {
     this->cal_type = calculation_type::lcao_deepks;
 #ifdef __DEEPKS
-    this->initialize_HR(GridD_in, LM_in->ParaV);
+    this->initialize_HR(GridD_in);
 #endif
 }
 
@@ -46,11 +44,14 @@ DeePKS<OperatorLCAO<TK, TR>>::~DeePKS()
 #ifdef __DEEPKS
 // initialize_HR()
 template <typename TK, typename TR>
-void hamilt::DeePKS<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* GridD, const Parallel_Orbitals* paraV)
+void hamilt::DeePKS<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* GridD)
 {
     ModuleBase::TITLE("DeePKS", "initialize_HR");
     ModuleBase::timer::tick("DeePKS", "initialize_HR");
 
+    auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
+    // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
+
     // this->H_V_delta = new HContainer<TR>(paraV);
     if (std::is_same<TK, double>::value)
     {
@@ -154,7 +155,7 @@ void DeePKS<OperatorLCAO<double, double>>::contributeHR()
     if (GlobalC::ld.get_hr_cal())
     {
         ModuleBase::timer::tick("DeePKS", "contributeHR");
-        const Parallel_Orbitals* pv = this->LM->ParaV;
+        const Parallel_Orbitals* pv = this->hsk->get_pv();
         GlobalC::ld.cal_projected_DM(this->DM, *this->ucell, GlobalC::ORB, GlobalC::GridD);
         GlobalC::ld.cal_descriptor(this->ucell->nat);
         GlobalC::ld.cal_gedm(this->ucell->nat);
@@ -246,7 +247,7 @@ void hamilt::DeePKS<hamilt::OperatorLCAO<TK, TR>>::pre_calculate_nlm(
     const int iat0,
     std::vector<std::unordered_map<int, std::vector<double>>>& nlm_in)
 {
-    const Parallel_Orbitals* paraV = this->LM->ParaV;
+    const Parallel_Orbitals* paraV = this->hR->get_paraV();
     const int npol = this->ucell->get_npol();
     auto tau0 = ucell->get_tau(iat0);
     int T0, I0;
@@ -303,7 +304,7 @@ void hamilt::DeePKS<hamilt::OperatorLCAO<TK, TR>>::calculate_HR()
     }
     ModuleBase::timer::tick("DeePKS", "calculate_HR");
 
-    const Parallel_Orbitals* paraV = this->H_V_delta->get_atom_pair(0).get_paraV();
+    const Parallel_Orbitals* paraV = this->H_V_delta->get_paraV();
     const int npol = this->ucell->get_npol();
 
     const LCAO_Orbitals& orb = LCAO_Orbitals::get_const_instance();
@@ -512,16 +513,16 @@ void hamilt::DeePKS<hamilt::OperatorLCAO<TK, TR>>::contributeHk(int ik)
     TK* h_delta_k = nullptr;
     get_h_delta_k(ik, h_delta_k);
     // set SK to zero and then calculate SK for each k vector
-    ModuleBase::GlobalFunc::ZEROS(h_delta_k, this->hK->size());
+    ModuleBase::GlobalFunc::ZEROS(h_delta_k, this->hsk->get_size());
 
     if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER())
     {
-        const int nrow = this->LM->ParaV->get_row_size();
+        const int nrow = this->hsk->get_pv()->get_row_size();
         hamilt::folding_HR(*this->H_V_delta, h_delta_k, this->kvec_d[ik], nrow, 1);
     }
     else
     {
-        const int ncol = this->LM->ParaV->get_col_size();
+        const int ncol = this->hsk->get_pv()->get_col_size();
         hamilt::folding_HR(*this->H_V_delta, h_delta_k, this->kvec_d[ik], ncol, 0);
     }
     ModuleBase::timer::tick("DeePKS", "contributeHk");
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.h
index af8348d4bc..b0fd1e0b46 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.h
@@ -30,11 +30,9 @@ template <typename TK, typename TR>
 class DeePKS<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 {
   public:
-    DeePKS<OperatorLCAO<TK, TR>>(Local_Orbital_Charge* loc_in,
-                                 LCAO_Matrix* LM_in,
+    DeePKS<OperatorLCAO<TK, TR>>(HS_Matrix_K<TK>* hsk_in,
                                  const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
                                  HContainer<TR>* hR_in,
-                                 std::vector<TK>* hK_in,
                                  const UnitCell* ucell_in,
                                  Grid_Driver* GridD_in,
                                  const TwoCenterIntegrator* intor_orb_alpha,
@@ -57,7 +55,6 @@ class DeePKS<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 #endif
 
   private:
-    Local_Orbital_Charge* loc;
 
     elecstate::DensityMatrix<TK, double>* DM;
 
@@ -75,7 +72,7 @@ class DeePKS<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
      * HContainer is used to store the DeePKS real space Hamiltonian correction with specific <I,J,R> atom-pairs
      * the size of HR will be fixed after initialization
      */
-    void initialize_HR(Grid_Driver* GridD, const Parallel_Orbitals* paraV);
+    void initialize_HR(Grid_Driver* GridD);
 
     /**
      * @brief calculate the DeePKS correction matrix with specific <I,J,R> atom-pairs
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp
index e7fbf8c0ac..2209e2b426 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp
@@ -28,7 +28,7 @@ void DFTU<OperatorLCAO<TK, TR>>::cal_force_stress(const bool cal_force,
     // begin the calculation of force and stress
     ModuleBase::timer::tick("DFTU", "cal_force_stress");
 
-    const Parallel_Orbitals* paraV = dmR_tmp[0]->get_atom_pair(0).get_paraV();
+    const Parallel_Orbitals* paraV = dmR_tmp[0]->get_paraV();
     const int npol = this->ucell->get_npol();
     std::vector<double> stress_tmp(6, 0);
     if (cal_force)
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.cpp
index f239091bcb..6d40d8ffef 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.cpp
@@ -11,16 +11,14 @@
 #include "module_base/parallel_reduce.h"
 
 template <typename TK, typename TR>
-hamilt::DFTU<hamilt::OperatorLCAO<TK, TR>>::DFTU(LCAO_Matrix* LM_in,
+hamilt::DFTU<hamilt::OperatorLCAO<TK, TR>>::DFTU(HS_Matrix_K<TK>* hsk_in,
                                                  const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
                                                  hamilt::HContainer<TR>* hR_in,
-                                                 std::vector<TK>* hK_in,
                                                  const UnitCell& ucell_in,
                                                  Grid_Driver* GridD_in,
                                                  const TwoCenterIntegrator* intor,
-                                                 ModuleDFTU::DFTU* dftu_in,
-                                                 const Parallel_Orbitals& paraV)
-    : hamilt::OperatorLCAO<TK, TR>(LM_in, kvec_d_in, hR_in, hK_in), intor_(intor)
+                                                 ModuleDFTU::DFTU* dftu_in)
+    : hamilt::OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in), intor_(intor)
 {
     this->cal_type = calculation_type::lcao_dftu;
     this->ucell = &ucell_in;
@@ -29,7 +27,7 @@ hamilt::DFTU<hamilt::OperatorLCAO<TK, TR>>::DFTU(LCAO_Matrix* LM_in,
     assert(this->ucell != nullptr);
 #endif
     // initialize HR to allocate sparse Nonlocal matrix memory
-    this->initialize_HR(GridD_in, &paraV);
+    this->initialize_HR(GridD_in);
     // set nspin
     this->nspin = GlobalV::NSPIN;
 }
@@ -42,11 +40,14 @@ hamilt::DFTU<hamilt::OperatorLCAO<TK, TR>>::~DFTU()
 
 // initialize_HR()
 template <typename TK, typename TR>
-void hamilt::DFTU<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* GridD, const Parallel_Orbitals* paraV)
+void hamilt::DFTU<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* GridD)
 {
     ModuleBase::TITLE("DFTU", "initialize_HR");
     ModuleBase::timer::tick("DFTU", "initialize_HR");
 
+    auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
+    // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
+
     this->adjs_all.clear();
     this->adjs_all.reserve(this->ucell->nat);
     for (int iat0 = 0; iat0 < ucell->nat; iat0++)
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.h
index 64f64a6d5b..f17471ac91 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.h
@@ -26,15 +26,13 @@ template <typename TK, typename TR>
 class DFTU<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 {
   public:
-    DFTU<OperatorLCAO<TK, TR>>(LCAO_Matrix* lm_in,
+    DFTU<OperatorLCAO<TK, TR>>(HS_Matrix_K<TK>* hsk_in,
                                const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
                                hamilt::HContainer<TR>* hR_in,
-                               std::vector<TK>* hK_in,
                                const UnitCell& ucell_in,
                                Grid_Driver* gridD_in,
                                const TwoCenterIntegrator* intor,
-                               ModuleDFTU::DFTU* dftu_in,
-                               const Parallel_Orbitals& paraV);
+                               ModuleDFTU::DFTU* dftu_in);
     ~DFTU<OperatorLCAO<TK, TR>>();
 
     /**
@@ -56,8 +54,6 @@ class DFTU<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 
     hamilt::HContainer<TR>* HR = nullptr;
 
-    TK* HK_pointer = nullptr;
-
     const TwoCenterIntegrator* intor_ = nullptr;
 
     /// @brief the number of spin components, 1 for no-spin, 2 for collinear spin case and 4 for non-collinear spin case
@@ -70,7 +66,7 @@ class DFTU<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
      * the size of HR will not change in DFTU,
      * because I don't want to expand HR larger than Nonlocal operator caused by DFTU
      */
-    void initialize_HR(Grid_Driver* gridD_in, const Parallel_Orbitals* paraV);
+    void initialize_HR(Grid_Driver* gridD_in);
 
     /**
      * @brief calculate the <phi|alpha^I> overlap values and save them in this->nlm_tot
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/ekinetic_new.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/ekinetic_new.cpp
index 6ece09f001..0702a14d77 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/ekinetic_new.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/ekinetic_new.cpp
@@ -9,23 +9,22 @@
 // Constructor
 template <typename TK, typename TR>
 hamilt::EkineticNew<hamilt::OperatorLCAO<TK, TR>>::EkineticNew(
-    LCAO_Matrix* LM_in,
+    HS_Matrix_K<TK>* hsk_in,
     const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
     hamilt::HContainer<TR>* hR_in,
-    std::vector<TK>* hK_in,
     const UnitCell* ucell_in,
     Grid_Driver* GridD_in,
-    const TwoCenterIntegrator* intor,
-    const Parallel_Orbitals* paraV)
-    : hamilt::OperatorLCAO<TK, TR>(LM_in, kvec_d_in, hR_in, hK_in), intor_(intor)
+    const TwoCenterIntegrator* intor)
+    : hamilt::OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in), intor_(intor)
 {
     this->cal_type = calculation_type::lcao_fixed;
     this->ucell = ucell_in;
 #ifdef __DEBUG
     assert(this->ucell != nullptr);
+    assert(this->hsk != nullptr);
 #endif
     // initialize HR to allocate sparse Ekinetic matrix memory
-    this->initialize_HR(GridD_in, paraV);
+    this->initialize_HR(GridD_in);
 }
 
 // destructor
@@ -40,12 +39,14 @@ hamilt::EkineticNew<hamilt::OperatorLCAO<TK, TR>>::~EkineticNew()
 
 // initialize_HR()
 template <typename TK, typename TR>
-void hamilt::EkineticNew<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* GridD,
-                                                                      const Parallel_Orbitals* paraV)
+void hamilt::EkineticNew<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* GridD)
 {
     ModuleBase::TITLE("EkineticNew", "initialize_HR");
     ModuleBase::timer::tick("EkineticNew", "initialize_HR");
 
+    auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
+    // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
+
     for (int iat1 = 0; iat1 < ucell->nat; iat1++)
     {
         auto tau1 = ucell->get_tau(iat1);
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/ekinetic_new.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/ekinetic_new.h
index 35e827fc3d..2cb05b53eb 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/ekinetic_new.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/ekinetic_new.h
@@ -39,14 +39,12 @@ class EkineticNew<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
     /**
      * @brief Construct a new EkineticNew object
      */
-    EkineticNew<OperatorLCAO<TK, TR>>(LCAO_Matrix* LM_in,
+    EkineticNew<OperatorLCAO<TK, TR>>(HS_Matrix_K<TK>* hsk_in,
                                       const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
                                       HContainer<TR>* hR_in,
-                                      std::vector<TK>* hK_in,
                                       const UnitCell* ucell_in,
                                       Grid_Driver* GridD_in,
-                                      const TwoCenterIntegrator* intor,
-                                      const Parallel_Orbitals* paraV);
+                                      const TwoCenterIntegrator* intor);
 
     /**
      * @brief Destroy the EkineticNew object
@@ -77,7 +75,7 @@ class EkineticNew<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
      * HContainer is used to store the electronic kinetic matrix with specific <I,J,R> atom-pairs
      * the size of HR will be fixed after initialization
      */
-    void initialize_HR(Grid_Driver* GridD_in, const Parallel_Orbitals* paraV);
+    void initialize_HR(Grid_Driver* GridD_in);
 
     /**
      * @brief calculate the electronic kinetic matrix with specific <I,J,R> atom-pairs
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/meta_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/meta_lcao.cpp
index fb2e5e4e6e..de325a998f 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/meta_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/meta_lcao.cpp
@@ -12,59 +12,4 @@ template class Meta<OperatorLCAO<std::complex<double>, double>>;
 
 template class Meta<OperatorLCAO<std::complex<double>, std::complex<double>>>;
 
-template<>
-Meta<OperatorLCAO<double, double>>::~Meta()
-{
-}
-
-template<>
-Meta<OperatorLCAO<std::complex<double>, double>>::~Meta()
-{
-}
-
-template<>
-Meta<OperatorLCAO<std::complex<double>, std::complex<double>>>::~Meta()
-{
-}
-
-//nothing to do in LCAO base for meta operator
-template<>
-void Meta<OperatorLCAO<double, double>>::contributeHR()
-{
-    return;
-}
-
-//nothing to do in LCAO base for meta operator
-template<>
-void Meta<OperatorLCAO<std::complex<double>, double>>::contributeHR()
-{
-    return;
-}
-//nothing to do in LCAO base for meta operator
-template<>
-void Meta<OperatorLCAO<std::complex<double>, std::complex<double>>>::contributeHR()
-{
-    return;
-}
-
-//nothing to do in LCAO base for meta operator
-template<>
-void Meta<OperatorLCAO<double, double>>::contributeHk(int ik)
-{
-    return;
-}
-
-//nothing to do in LCAO base for meta operator
-template<>
-void Meta<OperatorLCAO<std::complex<double>, double>>::contributeHk(int ik)
-{
-    return;
-}
-
-template<>
-void Meta<OperatorLCAO<std::complex<double>, std::complex<double>>>::contributeHk(int ik)
-{
-    return;
-}
-
 }
\ No newline at end of file
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/meta_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/meta_lcao.h
index c33429e79b..587ea036b8 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/meta_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/meta_lcao.h
@@ -23,46 +23,22 @@ template <typename TK, typename TR>
 class Meta<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 {
   public:
-    Meta<OperatorLCAO<TK, TR>>(Gint_k* GK_in,
-                          Local_Orbital_Charge* loc_in,
-                          LCAO_Matrix* LM_in,
+    Meta<OperatorLCAO<TK, TR>>(
+                          HS_Matrix_K<TK>* hsk_in,
                           const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
-                          HContainer<TR>* hR_in,
-                          std::vector<TK>* hK_in)
-        : GK(GK_in),
-          loc(loc_in),
-          OperatorLCAO<TK, TR>(LM_in, kvec_d_in, hR_in, hK_in)
-    {
-        this->cal_type = calculation_type::lcao_gint;
-    }
-    Meta<OperatorLCAO<TK, TR>>(Gint_Gamma* GG_in,
-                          Local_Orbital_Charge* loc_in,
-                          LCAO_Matrix* LM_in,
-                          const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
-                          HContainer<TR>* hR_in,
-                          std::vector<TK>* hK_in)
-        : GG(GG_in),
-          loc(loc_in),
-          OperatorLCAO<TK, TR>(LM_in, kvec_d_in, hR_in, hK_in)
+                          HContainer<TR>* hR_in)
+        : OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in)
     {
         this->cal_type = calculation_type::lcao_gint;
     }
 
-    ~Meta<OperatorLCAO<TK, TR>>();
+    ~Meta<OperatorLCAO<TK, TR>>(){};
 
-    virtual void contributeHR() override;
+    virtual void contributeHR() override{}//do nothing now
 
-    virtual void contributeHk(int ik) override;
+    virtual void contributeHk(int ik) override{};//do nothing now
 
   private:
-    // used for k-dependent grid integration.
-    Gint_k* GK = nullptr;
-
-    // used for gamma only algorithms.
-    Gint_Gamma* GG = nullptr;
-
-    // Charge calculating method in LCAO base and contained grid base calculation: DM_R, DM, pvpR_reduced
-    Local_Orbital_Charge* loc = nullptr;
 };
 
 } // namespace hamilt
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.cpp
index bf8bd4683f..7714cea230 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.cpp
@@ -11,15 +11,13 @@
 
 template <typename TK, typename TR>
 hamilt::NonlocalNew<hamilt::OperatorLCAO<TK, TR>>::NonlocalNew(
-    LCAO_Matrix* LM_in,
+    HS_Matrix_K<TK>* hsk_in,
     const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
     hamilt::HContainer<TR>* hR_in,
-    std::vector<TK>* hK_in,
     const UnitCell* ucell_in,
     Grid_Driver* GridD_in,
-    const TwoCenterIntegrator* intor,
-    const Parallel_Orbitals* paraV)
-    : hamilt::OperatorLCAO<TK, TR>(LM_in, kvec_d_in, hR_in, hK_in), intor_(intor)
+    const TwoCenterIntegrator* intor)
+    : hamilt::OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in), intor_(intor)
 {
     this->cal_type = calculation_type::lcao_fixed;
     this->ucell = ucell_in;
@@ -27,7 +25,7 @@ hamilt::NonlocalNew<hamilt::OperatorLCAO<TK, TR>>::NonlocalNew(
     assert(this->ucell != nullptr);
 #endif
     // initialize HR to allocate sparse Nonlocal matrix memory
-    this->initialize_HR(GridD_in, paraV);
+    this->initialize_HR(GridD_in);
 }
 
 // destructor
@@ -42,12 +40,14 @@ hamilt::NonlocalNew<hamilt::OperatorLCAO<TK, TR>>::~NonlocalNew()
 
 // initialize_HR()
 template <typename TK, typename TR>
-void hamilt::NonlocalNew<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* GridD,
-                                                                      const Parallel_Orbitals* paraV)
+void hamilt::NonlocalNew<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* GridD)
 {
     ModuleBase::TITLE("NonlocalNew", "initialize_HR");
     ModuleBase::timer::tick("NonlocalNew", "initialize_HR");
 
+    auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
+    // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
+
     this->adjs_all.clear();
     this->adjs_all.reserve(this->ucell->nat);
     for (int iat0 = 0; iat0 < ucell->nat; iat0++)
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.h
index 6dbcdd9ca3..92fdc19bde 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.h
@@ -38,14 +38,12 @@ template <typename TK, typename TR>
 class NonlocalNew<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 {
   public:
-    NonlocalNew<OperatorLCAO<TK, TR>>(LCAO_Matrix* LM_in,
+    NonlocalNew<OperatorLCAO<TK, TR>>(HS_Matrix_K<TK>* hsk_in,
                                       const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
                                       hamilt::HContainer<TR>* hR_in,
-                                      std::vector<TK>* hK_in,
                                       const UnitCell* ucell_in,
                                       Grid_Driver* GridD_in,
-                                      const TwoCenterIntegrator* intor,
-                                      const Parallel_Orbitals* paraV);
+                                      const TwoCenterIntegrator* intor);
     ~NonlocalNew<OperatorLCAO<TK, TR>>();
 
     /**
@@ -59,8 +57,6 @@ class NonlocalNew<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
   private:
     const UnitCell* ucell = nullptr;
 
-    hamilt::HContainer<TR>* HR = nullptr;
-
     hamilt::HContainer<TR>* HR_fixed = nullptr;
 
     // the following variable is introduced temporarily during LCAO refactoring
@@ -68,8 +64,6 @@ class NonlocalNew<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 
     bool allocated = false;
 
-    TK* HK_pointer = nullptr;
-
     bool HR_fixed_done = false;
 
     /**
@@ -77,7 +71,7 @@ class NonlocalNew<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
      * HContainer is used to store the non-local pseudopotential matrix with specific <I,J,R> atom-pairs
      * the size of HR will be fixed after initialization
      */
-    void initialize_HR(Grid_Driver* GridD_in, const Parallel_Orbitals* paraV);
+    void initialize_HR(Grid_Driver* GridD_in);
 
     /**
      * @brief calculate the non-local pseudopotential matrix with specific <I,J,R> atom-pairs
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.cpp
index a1a210711e..924dcf1764 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.cpp
@@ -26,12 +26,13 @@ void OperatorDFTU<OperatorLCAO<double, double>>::contributeHk(int ik)
     ModuleBase::TITLE("OperatorDFTU", "contributeHk");
     ModuleBase::timer::tick("OperatorDFTU", "contributeHk");
     // Effective potential of DFT+U is added to total Hamiltonian here; Quxin adds on 20201029
-    std::vector<double> eff_pot(this->LM->ParaV->nloc);
-    GlobalC::dftu.cal_eff_pot_mat_real(ik, &eff_pot[0], isk);
+    std::vector<double> eff_pot(this->hsk->get_pv()->nloc);
+    GlobalC::dftu.cal_eff_pot_mat_real(ik, &eff_pot[0], isk, this->hsk->get_sk());
+    double* hk = this->hsk->get_hk();
 
-    for (int irc = 0; irc < this->LM->ParaV->nloc; irc++)
+    for (int irc = 0; irc < this->hsk->get_pv()->nloc; irc++)
     {
-        this->LM->Hloc[irc] += eff_pot[irc];
+        hk[irc] += eff_pot[irc];
     }
 
     ModuleBase::timer::tick("OperatorDFTU", "contributeHk");
@@ -43,12 +44,13 @@ void OperatorDFTU<OperatorLCAO<std::complex<double>, double>>::contributeHk(int
     ModuleBase::TITLE("OperatorDFTU", "contributeHk");
     ModuleBase::timer::tick("OperatorDFTU", "contributeHk");
     // Effective potential of DFT+U is added to total Hamiltonian here; Quxin adds on 20201029
-    std::vector<std::complex<double>> eff_pot(this->LM->ParaV->nloc);
-    GlobalC::dftu.cal_eff_pot_mat_complex(ik, &eff_pot[0], isk);
+    std::vector<std::complex<double>> eff_pot(this->hsk->get_pv()->nloc);
+    GlobalC::dftu.cal_eff_pot_mat_complex(ik, &eff_pot[0], isk, this->hsk->get_sk());
+    std::complex<double>* hk = this->hsk->get_hk();
 
-    for (int irc = 0; irc < this->LM->ParaV->nloc; irc++)
+    for (int irc = 0; irc < this->hsk->get_pv()->nloc; irc++)
     {
-        this->LM->Hloc2[irc] += eff_pot[irc];
+        hk[irc] += eff_pot[irc];
     }
 
     ModuleBase::timer::tick("OperatorDFTU", "contributeHk");
@@ -60,12 +62,13 @@ void OperatorDFTU<OperatorLCAO<std::complex<double>, std::complex<double>>>::con
     ModuleBase::TITLE("OperatorDFTU", "contributeHk");
     ModuleBase::timer::tick("OperatorDFTU", "contributeHk");
     // Effective potential of DFT+U is added to total Hamiltonian here; Quxin adds on 20201029
-    std::vector<std::complex<double>> eff_pot(this->LM->ParaV->nloc);
-    GlobalC::dftu.cal_eff_pot_mat_complex(ik, &eff_pot[0], isk);
+    std::vector<std::complex<double>> eff_pot(this->hsk->get_pv()->nloc);
+    GlobalC::dftu.cal_eff_pot_mat_complex(ik, &eff_pot[0], isk, this->hsk->get_sk());
 
-    for (int irc = 0; irc < this->LM->ParaV->nloc; irc++)
+    std::complex<double>* hk = this->hsk->get_hk();
+    for (int irc = 0; irc < this->hsk->get_pv()->nloc; irc++)
     {
-        this->LM->Hloc2[irc] += eff_pot[irc];
+        hk[irc] += eff_pot[irc];
     }
 
     ModuleBase::timer::tick("OperatorDFTU", "contributeHk");
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.h
index e80c8285c0..de40bce4b3 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.h
@@ -20,12 +20,11 @@ template <typename TK, typename TR>
 class OperatorDFTU<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 {
   public:
-    OperatorDFTU<OperatorLCAO<TK, TR>>(LCAO_Matrix* LM_in,
+    OperatorDFTU<OperatorLCAO<TK, TR>>(HS_Matrix_K<TK>* hsk_in,
                                   const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
                                   hamilt::HContainer<TR>* hR_in,
-                                  std::vector<TK>* hK_in,
                                   const std::vector<int>& isk_in)
-        : isk(isk_in), OperatorLCAO<TK, TR>(LM_in, kvec_d_in, hR_in, hK_in)
+        : isk(isk_in), OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in)
     {
         this->cal_type = calculation_type::lcao_dftu;
     }
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.cpp
index ff1b6f9ca1..f4418840b5 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.cpp
@@ -8,18 +8,18 @@ namespace hamilt
 template<>
 void OperatorEXX<OperatorLCAO<double, double>>::add_loaded_Hexx(const int ik)
 {
-    BlasConnector::axpy(this->LM->ParaV->get_local_size(), 1.0, this->LM->Hexxd_k_load[ik].data(), 1, this->LM->Hloc.data(), 1);
+    BlasConnector::axpy(this->LM->ParaV->get_local_size(), 1.0, this->LM->Hexxd_k_load[ik].data(), 1, this->hsk->get_hk(), 1);
 }
 template<>
 void OperatorEXX<OperatorLCAO<std::complex<double>, double>>::add_loaded_Hexx(const int ik)
 {
 
-    BlasConnector::axpy(this->LM->ParaV->get_local_size(), 1.0, this->LM->Hexxc_k_load[ik].data(), 1, this->LM->Hloc2.data(), 1);
+    BlasConnector::axpy(this->LM->ParaV->get_local_size(), 1.0, this->LM->Hexxc_k_load[ik].data(), 1, this->hsk->get_hk(), 1);
 }
 template<>
 void OperatorEXX<OperatorLCAO<std::complex<double>, std::complex<double>>>::add_loaded_Hexx(const int ik)
 {
-    BlasConnector::axpy(this->LM->ParaV->get_local_size(), 1.0, this->LM->Hexxc_k_load[ik].data(), 1, this->LM->Hloc2.data(), 1);
+    BlasConnector::axpy(this->LM->ParaV->get_local_size(), 1.0, this->LM->Hexxc_k_load[ik].data(), 1, this->hsk->get_hk(), 1);
 }
 
 } // namespace hamilt
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.h
index 4609cb44ee..6cb1e8fde2 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.h
@@ -6,6 +6,7 @@
 #include <RI/global/Tensor.h>
 #include "operator_lcao.h"
 #include "module_cell/klist.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
 
 namespace hamilt
 {
@@ -25,9 +26,9 @@ class OperatorEXX<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 {
     using TAC = std::pair<int, std::array<int, 3>>;
 public:
-    OperatorEXX<OperatorLCAO<TK, TR>>(LCAO_Matrix* LM_in,
+    OperatorEXX<OperatorLCAO<TK, TR>>(HS_Matrix_K<TK>* hsk_in,
+        LCAO_Matrix* LM_in,
         hamilt::HContainer<TR>* hR_in,
-        std::vector<TK>* hK_in,
         const K_Vectors& kv_in,
         std::vector<std::map<int, std::map<TAC, RI::Tensor<double>>>>* Hexxd_in = nullptr,
         std::vector<std::map<int, std::map<TAC, RI::Tensor<std::complex<double>>>>>* Hexxc_in = nullptr,
@@ -53,6 +54,8 @@ class OperatorEXX<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 
       void add_loaded_Hexx(const int ik);
       const K_Vectors& kv;
+
+      LCAO_Matrix* LM = nullptr;
 };
 
 } // namespace hamilt
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.hpp
index 5f180bd145..781112ff5c 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.hpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.hpp
@@ -13,20 +13,21 @@ namespace hamilt
 
 template <typename TK, typename TR>
 OperatorEXX<OperatorLCAO<TK, TR>>::OperatorEXX(
+	HS_Matrix_K<TK>* hsk_in,
 	LCAO_Matrix* LM_in,
 	hamilt::HContainer<TR>* hR_in,
-	std::vector<TK>* hK_in,
 	const K_Vectors& kv_in,
 	std::vector<std::map<int, std::map<TAC, RI::Tensor<double>>>>* Hexxd_in,
 	std::vector<std::map<int, std::map<TAC, RI::Tensor<std::complex<double>>>>>* Hexxc_in,
 	int* two_level_step_in,
 	const bool restart_in)
-	: OperatorLCAO<TK, TR>(LM_in, kv_in.kvec_d, hR_in, hK_in),
+	: OperatorLCAO<TK, TR>(hsk_in, kv_in.kvec_d, hR_in),
 	  kv(kv_in),
 	  Hexxd(Hexxd_in),
 	  Hexxc(Hexxc_in), 
 	  two_level_step(two_level_step_in),
-	  restart(restart_in)
+	  restart(restart_in),
+	  LM(LM_in)
 {
 	this->cal_type = calculation_type::lcao_exx;
 	if (this->restart)
@@ -102,7 +103,7 @@ void OperatorEXX<OperatorLCAO<TK, TR>>::contributeHk(int ik)
 				GlobalC::exx_info.info_global.hybrid_alpha,
 				this->Hexxd == nullptr ? *this->LM->Hexxd : *this->Hexxd,
 				*this->LM->ParaV,
-				*this->hK);
+				this->hsk->get_hk());
 		else
 			RI_2D_Comm::add_Hexx(
 				this->kv,
@@ -110,7 +111,7 @@ void OperatorEXX<OperatorLCAO<TK, TR>>::contributeHk(int ik)
 				GlobalC::exx_info.info_global.hybrid_alpha,
 				this->Hexxc == nullptr ? *this->LM->Hexxc : *this->Hexxc,
 				*this->LM->ParaV,
-				*this->hK);
+				this->hsk->get_hk());
 	}
 }
 
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/operator_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/operator_lcao.cpp
index d47eecf5b4..50937ff6af 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/operator_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/operator_lcao.cpp
@@ -15,16 +15,16 @@ template<>
 void OperatorLCAO<double, double>::get_hs_pointers()
 {
     ModuleBase::timer::tick("OperatorLCAO", "get_hs_pointers");
-    this->hmatrix_k = this->LM->Hloc.data();
+    this->hmatrix_k = this->hsk->get_hk();
     if ((this->new_e_iteration && ik == 0) || hsolver::HSolverLCAO<double>::out_mat_hs[0])
     {
         if (this->smatrix_k == nullptr)
         {
-            this->smatrix_k = new double[this->LM->Sloc.size()];
+            this->smatrix_k = new double[this->hsk->get_size()];
             this->allocated_smatrix = true;
         }
         const int inc = 1;
-        BlasConnector::copy(this->LM->Sloc.size(), this->LM->Sloc.data(), inc, this->smatrix_k, inc);
+        BlasConnector::copy(this->hsk->get_size(), this->hsk->get_sk(), inc, this->smatrix_k, inc);
 #ifdef __ELPA
         hsolver::DiagoElpa<double>::DecomposedState = 0;
 #endif
@@ -36,36 +36,22 @@ void OperatorLCAO<double, double>::get_hs_pointers()
 template<>
 void OperatorLCAO<std::complex<double>, double>::get_hs_pointers()
 {
-    this->hmatrix_k = this->LM->Hloc2.data();
-    this->smatrix_k = this->LM->Sloc2.data();
+    this->hmatrix_k = this->hsk->get_hk();
+    this->smatrix_k = this->hsk->get_sk();
 }
 
 template<>
 void OperatorLCAO<std::complex<double>, std::complex<double>>::get_hs_pointers()
 {
-    this->hmatrix_k = this->LM->Hloc2.data();
-    this->smatrix_k = this->LM->Sloc2.data();
+    this->hmatrix_k = this->hsk->get_hk();
+    this->smatrix_k = this->hsk->get_sk();
 }
 
-template<>
-void OperatorLCAO<double, double>::refresh_h()
-{
-    // Set the matrix 'H' to zero.
-    this->LM->zeros_HSgamma('H');
-}
-
-template<>
-void OperatorLCAO<std::complex<double>, double>::refresh_h()
-{
-    // Set the matrix 'H' to zero.
-    this->LM->zeros_HSk('H');
-}
-
-template<>
-void OperatorLCAO<std::complex<double>, std::complex<double>>::refresh_h()
+template<typename TK, typename TR>
+void OperatorLCAO<TK, TR>::refresh_h()
 {
     // Set the matrix 'H' to zero.
-    this->LM->zeros_HSk('H');
+    this->hsk->set_zero_hk();
 }
 
 template<typename TK, typename TR>
@@ -254,13 +240,13 @@ void OperatorLCAO<TK, TR>::contributeHk(int ik)
     ModuleBase::timer::tick("OperatorLCAO", "contributeHk");
     if(ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER())
     {
-        const int nrow = this->LM->ParaV->get_row_size();
-        hamilt::folding_HR(*this->hR, this->hK->data(), this->kvec_d[ik], nrow, 1);
+        const int nrow = this->hsk->get_pv()->get_row_size();
+        hamilt::folding_HR(*this->hR, this->hsk->get_hk(), this->kvec_d[ik], nrow, 1);
     }
     else
     {
-        const int ncol = this->LM->ParaV->get_col_size();
-        hamilt::folding_HR(*this->hR, this->hK->data(), this->kvec_d[ik], ncol, 0);
+        const int ncol = this->hsk->get_pv()->get_col_size();
+        hamilt::folding_HR(*this->hR, this->hsk->get_hk(), this->kvec_d[ik], ncol, 0);
     }
     ModuleBase::timer::tick("OperatorLCAO", "contributeHk");
 }
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/operator_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/operator_lcao.h
index b5a7e536b9..5aece587ab 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/operator_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/operator_lcao.h
@@ -3,8 +3,8 @@
 #include "module_base/vector3.h"
 #include "module_hamilt_general/matrixblock.h"
 #include "module_hamilt_general/operator.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
 #include "module_hamilt_lcao/module_hcontainer/hcontainer.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp"
 
 namespace hamilt
 {
@@ -14,16 +14,15 @@ class OperatorLCAO : public Operator<TK>
 {
   public:
     OperatorLCAO(
-        LCAO_Matrix* LM_in, 
+        HS_Matrix_K<TK>* hsk_in, 
         const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
-        HContainer<TR>* hR_in,
-        std::vector<TK>* hK_in)
-        : LM(LM_in), kvec_d(kvec_d_in), hR(hR_in), hK(hK_in){};
+        HContainer<TR>* hR_in)
+        : hsk(hsk_in), kvec_d(kvec_d_in), hR(hR_in){}
     virtual ~OperatorLCAO()
     {
         if (this->allocated_smatrix)
             delete[] this->smatrix_k;
-    };
+    }
 
     /* Function init(k) is used for update HR and HK ,
     data pointers of HR and HK are not passed by this function, but passed by constructors of every derived classes.
@@ -55,16 +54,16 @@ class OperatorLCAO : public Operator<TK>
         this->get_hs_pointers();
 #ifdef __MPI
         hk_in = MatrixBlock<TK>{hmatrix_k,
-                               (size_t)this->LM->ParaV->nrow,
-                               (size_t)this->LM->ParaV->ncol,
-                               this->LM->ParaV->desc};
+                               (size_t)this->hsk->get_pv()->nrow,
+                               (size_t)this->hsk->get_pv()->ncol,
+                               this->hsk->get_pv()->desc};
         sk_in = MatrixBlock<TK>{smatrix_k,
-                               (size_t)this->LM->ParaV->nrow,
-                               (size_t)this->LM->ParaV->ncol,
-                               this->LM->ParaV->desc};
+                               (size_t)this->hsk->get_pv()->nrow,
+                               (size_t)this->hsk->get_pv()->ncol,
+                               this->hsk->get_pv()->desc};
 #else
-        hk_in = MatrixBlock<TK>{hmatrix_k, (size_t)this->LM->ParaV->nrow, (size_t)this->LM->ParaV->ncol, nullptr};
-        sk_in = MatrixBlock<TK>{smatrix_k, (size_t)this->LM->ParaV->nrow, (size_t)this->LM->ParaV->ncol, nullptr};
+        hk_in = MatrixBlock<TK>{hmatrix_k, (size_t)this->hsk->get_pv()->nrow, (size_t)this->hsk->get_pv()->ncol, nullptr};
+        sk_in = MatrixBlock<TK>{smatrix_k, (size_t)this->hsk->get_pv()->nrow, (size_t)this->hsk->get_pv()->ncol, nullptr};
 #endif
     }
 
@@ -88,7 +87,7 @@ class OperatorLCAO : public Operator<TK>
 
     // protected:
     //  Hamiltonian matrix which are stored in LCAO_Matrix and calculated in OperatorLCAO
-    LCAO_Matrix* LM = nullptr;
+    HS_Matrix_K<TK>* hsk = nullptr;
     const std::vector<ModuleBase::Vector3<double>>& kvec_d;
 
   protected:
@@ -97,9 +96,6 @@ class OperatorLCAO : public Operator<TK>
     // Real space Hamiltonian pointer
     hamilt::HContainer<TR>* hR = nullptr;
 
-    // vector of HK matrix for current k point in reciprocal space
-    std::vector<TK>* hK = nullptr;
-
   private:
     void get_hs_pointers();
 
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/overlap_new.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/overlap_new.cpp
index 3d0e6b5397..4baf56edd3 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/overlap_new.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/overlap_new.cpp
@@ -7,37 +7,34 @@
 #include "module_hamilt_lcao/module_hcontainer/hcontainer_funcs.h"
 
 template <typename TK, typename TR>
-hamilt::OverlapNew<hamilt::OperatorLCAO<TK, TR>>::OverlapNew(LCAO_Matrix* LM_in,
+hamilt::OverlapNew<hamilt::OperatorLCAO<TK, TR>>::OverlapNew(HS_Matrix_K<TK>* hsk_in,
                                                              const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
                                                              hamilt::HContainer<TR>* hR_in,
-                                                             std::vector<TK>* hK_in,
                                                              hamilt::HContainer<TR>* SR_in,
-                                                             std::vector<TK>* SK_pointer_in,
                                                              const UnitCell* ucell_in,
                                                              Grid_Driver* GridD_in,
-                                                             const TwoCenterIntegrator* intor,
-                                                             const Parallel_Orbitals* paraV)
-    : hamilt::OperatorLCAO<TK, TR>(LM_in, kvec_d_in, hR_in, hK_in), intor_(intor)
+                                                             const TwoCenterIntegrator* intor)
+    : hamilt::OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in), intor_(intor)
 {
     this->cal_type = calculation_type::lcao_overlap;
     this->ucell = ucell_in;
     this->SR = SR_in;
-    this->SK_pointer = SK_pointer_in;
 #ifdef __DEBUG
     assert(this->ucell != nullptr);
     assert(this->SR != nullptr);
-    assert(this->SK_pointer != nullptr);
 #endif
     // initialize SR to allocate sparse overlap matrix memory
-    this->initialize_SR(GridD_in, paraV);
+    this->initialize_SR(GridD_in);
 }
 
 // initialize_SR()
 template <typename TK, typename TR>
-void hamilt::OverlapNew<hamilt::OperatorLCAO<TK, TR>>::initialize_SR(Grid_Driver* GridD, const Parallel_Orbitals* paraV)
+void hamilt::OverlapNew<hamilt::OperatorLCAO<TK, TR>>::initialize_SR(Grid_Driver* GridD)
 {
     ModuleBase::TITLE("OverlapNew", "initialize_SR");
     ModuleBase::timer::tick("OverlapNew", "initialize_SR");
+    auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
+    // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
     for (int iat1 = 0; iat1 < ucell->nat; iat1++)
     {
         auto tau1 = ucell->get_tau(iat1);
@@ -197,16 +194,16 @@ void hamilt::OverlapNew<hamilt::OperatorLCAO<TK, TR>>::contributeHk(int ik)
     ModuleBase::TITLE("OverlapNew", "contributeHk");
     ModuleBase::timer::tick("OverlapNew", "contributeHk");
     // set SK to zero and then calculate SK for each k vector
-    ModuleBase::GlobalFunc::ZEROS(this->SK_pointer->data(), this->SK_pointer->size());
+    this->hsk->set_zero_sk();
     if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER())
     {
         const int nrow = this->SR->get_atom_pair(0).get_paraV()->get_row_size();
-        hamilt::folding_HR(*this->SR, this->SK_pointer->data(), this->kvec_d[ik], nrow, 1);
+        hamilt::folding_HR(*this->SR, this->hsk->get_sk(), this->kvec_d[ik], nrow, 1);
     }
     else
     {
         const int ncol = this->SR->get_atom_pair(0).get_paraV()->get_col_size();
-        hamilt::folding_HR(*this->SR, this->SK_pointer->data(), this->kvec_d[ik], ncol, 0);
+        hamilt::folding_HR(*this->SR, this->hsk->get_sk(), this->kvec_d[ik], ncol, 0);
     }
     // update kvec_d_old
     this->kvec_d_old = this->kvec_d[ik];
@@ -217,9 +214,9 @@ void hamilt::OverlapNew<hamilt::OperatorLCAO<TK, TR>>::contributeHk(int ik)
 template <typename TK, typename TR>
 TK* hamilt::OverlapNew<hamilt::OperatorLCAO<TK, TR>>::getSk()
 {
-    if (this->SK_pointer != nullptr)
+    if (this->hsk != nullptr)
     {
-        return this->SK_pointer->data();
+        return this->hsk->get_sk();
     }
     return nullptr;
 }
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/overlap_new.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/overlap_new.h
index 84bfb7519a..8f6016cd21 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/overlap_new.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/overlap_new.h
@@ -36,16 +36,13 @@ template <typename TK, typename TR>
 class OverlapNew<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 {
   public:
-    OverlapNew<OperatorLCAO<TK, TR>>(LCAO_Matrix* LM_in,
+    OverlapNew<OperatorLCAO<TK, TR>>(HS_Matrix_K<TK>* hsk_in,
                                      const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
                                      hamilt::HContainer<TR>* hR_in,
-                                     std::vector<TK>* hK_in,
                                      hamilt::HContainer<TR>* SR_in,
-                                     std::vector<TK>* SK_pointer_in,
                                      const UnitCell* ucell_in,
                                      Grid_Driver* GridD_in,
-                                     const TwoCenterIntegrator* intor,
-                                     const Parallel_Orbitals* paraV);
+                                     const TwoCenterIntegrator* intor);
 
     virtual void contributeHR() override;
 
@@ -58,8 +55,6 @@ class OverlapNew<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 
     hamilt::HContainer<TR>* SR = nullptr;
 
-    std::vector<TK>* SK_pointer = nullptr;
-
     const TwoCenterIntegrator* intor_ = nullptr;
 
     bool SR_fixed_done = false;
@@ -69,7 +64,7 @@ class OverlapNew<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
      * HContainer is used to store the overlap matrix with specific <I,J,R> atom-pairs
      * the size of SR will be fixed after initialization
      */
-    void initialize_SR(Grid_Driver* GridD_in, const Parallel_Orbitals* paraV);
+    void initialize_SR(Grid_Driver* GridD_in);
 
     /**
      * @brief calculate the overlap matrix with specific <I,J,R> atom-pairs
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/sc_lambda_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/sc_lambda_lcao.cpp
index cea1313e89..2aa96175dd 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/sc_lambda_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/sc_lambda_lcao.cpp
@@ -20,12 +20,13 @@ void OperatorScLambda<OperatorLCAO<std::complex<double>, std::complex<double>>>:
     ModuleBase::timer::tick("OperatorScLambda", "contributeHk");
     SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>& sc
         = SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>::getScInstance();
-    std::vector<std::complex<double>> h_lambda(this->LM->ParaV->nloc);
+    std::vector<std::complex<double>> h_lambda(this->hsk->get_pv()->nloc);
     std::fill(h_lambda.begin(), h_lambda.end(), std::complex<double>(0, 0));
-    sc.cal_h_lambda(&h_lambda[0], this->LM->Sloc2, ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(), this->isk[ik]);
-    for (int irc = 0; irc < this->LM->ParaV->nloc; irc++)
+    sc.cal_h_lambda(&h_lambda[0], this->hsk->get_sk(), ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(), this->isk[ik]);
+    std::complex<double>* hk = this->hsk->get_hk();
+    for (int irc = 0; irc < this->hsk->get_pv()->nloc; irc++)
     {
-        this->LM->Hloc2[irc] += h_lambda[irc];
+        hk[irc] += h_lambda[irc];
     }
     //std::cout << "OperatorScLambda contributeHk" << std::endl;
     ModuleBase::timer::tick("OperatorScLambda", "contributeHk");
@@ -39,12 +40,13 @@ void OperatorScLambda<OperatorLCAO<std::complex<double>, double>>::contributeHk(
     ModuleBase::timer::tick("OperatorScLambda", "contributeHk");
     SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>& sc
         = SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>::getScInstance();
-    std::vector<std::complex<double>> h_lambda(this->LM->ParaV->nloc);
+    std::vector<std::complex<double>> h_lambda(this->hsk->get_pv()->nloc);
     std::fill(h_lambda.begin(), h_lambda.end(), std::complex<double>(0, 0));
-    sc.cal_h_lambda(&h_lambda[0], this->LM->Sloc2, ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(), this->isk[ik]);
-    for (int irc = 0; irc < this->LM->ParaV->nloc; irc++)
+    sc.cal_h_lambda(&h_lambda[0], this->hsk->get_sk(), ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(), this->isk[ik]);
+    std::complex<double>* hk = this->hsk->get_hk();
+    for (int irc = 0; irc < this->hsk->get_pv()->nloc; irc++)
     {
-        this->LM->Hloc2[irc] += h_lambda[irc];
+        hk[irc] += h_lambda[irc];
     }
     //std::cout << "OperatorScLambda contributeHk" << std::endl;
     ModuleBase::timer::tick("OperatorScLambda", "contributeHk");
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/sc_lambda_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/sc_lambda_lcao.h
index 9d196db9f0..646c0415fb 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/sc_lambda_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/sc_lambda_lcao.h
@@ -22,12 +22,11 @@ template<typename TK, typename TR>
 class OperatorScLambda<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 {
   public:
-    OperatorScLambda<OperatorLCAO<TK, TR>>(LCAO_Matrix* LM_in,
+    OperatorScLambda<OperatorLCAO<TK, TR>>(HS_Matrix_K<TK>* hsk_in,
                                     const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
                                     hamilt::HContainer<TR>* hR_in,
-                                    std::vector<TK>* hK_in,
                                     const std::vector<int>& isk_in)
-        : isk(isk_in), OperatorLCAO<TK, TR>(LM_in, kvec_d_in, hR_in, hK_in)
+        : isk(isk_in), OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in)
     {
         this->cal_type = calculation_type::lcao_sc_lambda;
     }
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.cpp
index 1bf02046a6..ac26806e94 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.cpp
@@ -14,22 +14,20 @@
 namespace hamilt
 {
 template <typename TK, typename TR>
-TDEkinetic<OperatorLCAO<TK, TR>>::TDEkinetic(LCAO_Matrix* LM_in,
+TDEkinetic<OperatorLCAO<TK, TR>>::TDEkinetic(HS_Matrix_K<TK>* hsk_in,
                                              hamilt::HContainer<TR>* hR_in,
-                                             std::vector<TK>* hK_in,
                                              hamilt::HContainer<TR>* SR_in,
                                              const K_Vectors* kv_in,
                                              const UnitCell* ucell_in,
                                              Grid_Driver* GridD_in)
-    : SR(SR_in), kv(kv_in), OperatorLCAO<TK, TR>(LM_in, kv_in->kvec_d, hR_in, hK_in)
+    : SR(SR_in), kv(kv_in), OperatorLCAO<TK, TR>(hsk_in, kv_in->kvec_d, hR_in)
 {
-    this->LM = LM_in;
     this->ucell = ucell_in;
     this->cal_type = calculation_type::lcao_tddft_velocity;
     this->Grid = GridD_in;
     this->init_td();
     // initialize HR to get adjs info.
-    this->initialize_HR(Grid, this->LM->ParaV);
+    this->initialize_HR(Grid);
     if (TD_Velocity::out_mat_R == true)
     {
         out_mat_R = true;
@@ -252,7 +250,7 @@ void hamilt::TDEkinetic<hamilt::OperatorLCAO<TK, TR>>::set_HR_fixed(void* hR_tmp
     this->allocated = false;
 }
 template <typename TK, typename TR>
-void TDEkinetic<OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* GridD, const Parallel_Orbitals* paraV)
+void TDEkinetic<OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* GridD)
 {
     if (elecstate::H_TDDFT_pw::stype != 1)
     {
@@ -261,6 +259,9 @@ void TDEkinetic<OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* GridD, const P
     ModuleBase::TITLE("TDEkinetic", "initialize_HR");
     ModuleBase::timer::tick("TDEkinetic", "initialize_HR");
 
+    auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
+    // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
+
     this->adjs_all.clear();
     this->adjs_all.reserve(this->ucell->nat);
     for (int iat1 = 0; iat1 < ucell->nat; iat1++)
@@ -298,7 +299,7 @@ void TDEkinetic<OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* GridD, const P
     ModuleBase::timer::tick("TDEkinetic", "initialize_HR");
 }
 template <typename TK, typename TR>
-void TDEkinetic<OperatorLCAO<TK, TR>>::initialize_HR_tmp(const Parallel_Orbitals* paraV)
+void TDEkinetic<OperatorLCAO<TK, TR>>::initialize_HR_tmp()
 {
     if (elecstate::H_TDDFT_pw::stype != 1)
     {
@@ -307,6 +308,8 @@ void TDEkinetic<OperatorLCAO<TK, TR>>::initialize_HR_tmp(const Parallel_Orbitals
     ModuleBase::TITLE("TDEkinetic", "initialize_HR_tmp");
     ModuleBase::timer::tick("TDEkinetic", "initialize_HR_tmp");
 
+    auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
+    // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
     for (int i = 0; i < this->hR->size_atom_pairs(); ++i)
     {
         hamilt::AtomPair<TR>& tmp = this->hR->get_atom_pair(i);
@@ -342,9 +345,9 @@ void TDEkinetic<OperatorLCAO<TK, TR>>::contributeHR()
         // if this Operator is the first node of the sub_chain, then hR_tmp is nullptr
         if (this->hR_tmp == nullptr)
         {
-            this->hR_tmp = new hamilt::HContainer<std::complex<double>>(this->LM->ParaV);
+            this->hR_tmp = new hamilt::HContainer<std::complex<double>>(this->hR->get_paraV());
             // allocate memory for hR_tmp use the same memory as hR
-            this->initialize_HR_tmp(this->LM->ParaV);
+            this->initialize_HR_tmp();
             this->allocated = true;
         }
         if (this->next_sub_op != nullptr)
@@ -398,12 +401,12 @@ void TDEkinetic<OperatorLCAO<std::complex<double>, double>>::contributeHk(int ik
         if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER())
         {
             const int nrow = paraV->get_row_size();
-            hamilt::folding_HR(*this->hR_tmp, this->hK->data(), this->kvec_d[ik], nrow, 1);
+            hamilt::folding_HR(*this->hR_tmp, this->hsk->get_hk(), this->kvec_d[ik], nrow, 1);
         }
         else
         {
             const int ncol = paraV->get_col_size();
-            hamilt::folding_HR(*this->hR_tmp, this->hK->data(), this->kvec_d[ik], ncol, 0);
+            hamilt::folding_HR(*this->hR_tmp, this->hsk->get_hk(), this->kvec_d[ik], ncol, 0);
         }
 
         ModuleBase::timer::tick("TDEkinetic", "contributeHk");
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.h
index 70fa1f08c5..f9baba25eb 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.h
@@ -36,9 +36,8 @@ template <typename TK, typename TR>
 class TDEkinetic<OperatorLCAO<TK,TR>> : public OperatorLCAO<TK, TR>
 {
   public:
-    TDEkinetic<OperatorLCAO<TK, TR>>(LCAO_Matrix* LM_in,
+    TDEkinetic<OperatorLCAO<TK, TR>>(HS_Matrix_K<TK>* hsk_in,
                                  hamilt::HContainer<TR>* hR_in,
-                                 std::vector<TK>* hK_in,
                                  hamilt::HContainer<TR>* SR_in,
                                  const K_Vectors* kv_in,
                                  const UnitCell* ucell_in,
@@ -55,13 +54,13 @@ class TDEkinetic<OperatorLCAO<TK,TR>> : public OperatorLCAO<TK, TR>
      * HContainer is used to store the non-local pseudopotential matrix with specific <I,J,R> atom-pairs
      * the size of HR will be fixed after initialization
      */
-    void initialize_HR(Grid_Driver* GridD, const Parallel_Orbitals* paraV);
+    void initialize_HR(Grid_Driver* GridD);
 
     /**
      * @brief initialize HR_tmp
      * Allocate the memory for HR_tmp with the same size as HR
     */
-    void initialize_HR_tmp(const Parallel_Orbitals* paraV);
+    void initialize_HR_tmp();
 
     /**
      * @brief calculate the HR local matrix of <I,J,R> atom pair
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_nonlocal_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_nonlocal_lcao.cpp
index 8574ffa19f..bec3cefb31 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_nonlocal_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_nonlocal_lcao.cpp
@@ -12,25 +12,22 @@
 #endif
 
 template <typename TK, typename TR>
-hamilt::TDNonlocal<hamilt::OperatorLCAO<TK, TR>>::TDNonlocal(LCAO_Matrix* LM_in,
+hamilt::TDNonlocal<hamilt::OperatorLCAO<TK, TR>>::TDNonlocal(HS_Matrix_K<TK>* hsk_in,
                                                              const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
                                                              hamilt::HContainer<TR>* hR_in,
-                                                             std::vector<TK>* hK_in,
                                                              const UnitCell* ucell_in,
-                                                             Grid_Driver* GridD_in,
-                                                             const Parallel_Orbitals* paraV)
-    : hamilt::OperatorLCAO<TK, TR>(LM_in, kvec_d_in, hR_in, hK_in)
+                                                             Grid_Driver* GridD_in)
+    : hamilt::OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in)
 {
     this->cal_type = calculation_type::lcao_tddft_velocity;
     this->ucell = ucell_in;
-    this->LM = LM_in;
     this->Grid = GridD_in;
 #ifdef __DEBUG
     assert(this->ucell != nullptr);
 #endif
     // initialize HR to get adjs info.
     this->init_td();
-    this->initialize_HR(Grid, this->LM->ParaV);
+    this->initialize_HR(Grid);
 }
 
 // destructor
@@ -50,7 +47,7 @@ void hamilt::TDNonlocal<hamilt::OperatorLCAO<TK, TR>>::init_td()
 }
 // initialize_HR()
 template <typename TK, typename TR>
-void hamilt::TDNonlocal<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* GridD, const Parallel_Orbitals* paraV)
+void hamilt::TDNonlocal<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* GridD)
 {
     if (elecstate::H_TDDFT_pw::stype != 1)
     {
@@ -320,9 +317,9 @@ void hamilt::TDNonlocal<hamilt::OperatorLCAO<TK, TR>>::contributeHR()
     {
         if (this->hR_tmp == nullptr)
         {
-            this->hR_tmp = new hamilt::HContainer<std::complex<double>>(this->LM->ParaV);
+            this->hR_tmp = new hamilt::HContainer<std::complex<double>>(this->hsk->get_pv());
             // allocate memory for hR_tmp use the same memory as hR
-            this->initialize_HR_tmp(this->LM->ParaV);
+            this->initialize_HR_tmp(this->hsk->get_pv());
             this->allocated = true;
         }
         if (this->next_sub_op != nullptr)
@@ -356,13 +353,13 @@ void hamilt::TDNonlocal<hamilt::OperatorLCAO<std::complex<double>, double>>::con
         // folding inside HR to HK
         if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER())
         {
-            const int nrow = this->LM->ParaV->get_row_size();
-            folding_HR(*this->hR_tmp, this->hK->data(), this->kvec_d[ik], nrow, 1);
+            const int nrow = this->hsk->get_pv()->get_row_size();
+            folding_HR(*this->hR_tmp, this->hsk->get_hk(), this->kvec_d[ik], nrow, 1);
         }
         else
         {
-            const int ncol = this->LM->ParaV->get_col_size();
-            folding_HR(*this->hR_tmp, this->hK->data(), this->kvec_d[ik], ncol, 0);
+            const int ncol = this->hsk->get_pv()->get_col_size();
+            folding_HR(*this->hR_tmp, this->hsk->get_hk(), this->kvec_d[ik], ncol, 0);
         }
 
         ModuleBase::timer::tick("TDNonlocal", "contributeHk");
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_nonlocal_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_nonlocal_lcao.h
index 47f5b538d2..b3d3bcbd1f 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_nonlocal_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_nonlocal_lcao.h
@@ -34,13 +34,11 @@ template <typename TK, typename TR>
 class TDNonlocal<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 {
   public:
-    TDNonlocal<OperatorLCAO<TK, TR>>(LCAO_Matrix* LM_in,
+    TDNonlocal<OperatorLCAO<TK, TR>>(HS_Matrix_K<TK>* hsk_in,
                                      const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
                                      hamilt::HContainer<TR>* hR_in,
-                                     std::vector<TK>* hK_in,
                                      const UnitCell* ucell_in,
-                                     Grid_Driver* GridD_in,
-                                     const Parallel_Orbitals* paraV);
+                                     Grid_Driver* GridD_in);
     ~TDNonlocal<OperatorLCAO<TK, TR>>();
 
     /**
@@ -63,8 +61,6 @@ class TDNonlocal<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 
     bool allocated = false;
 
-    TK* HK_pointer = nullptr;
-
     bool hR_tmp_done = false;
 
     /**
@@ -72,7 +68,7 @@ class TDNonlocal<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
      * HContainer is used to store the non-local pseudopotential matrix with specific <I,J,R> atom-pairs
      * the size of HR will be fixed after initialization
      */
-    void initialize_HR(Grid_Driver* GridD_in, const Parallel_Orbitals* paraV);
+    void initialize_HR(Grid_Driver* GridD_in);
 
     /**
      * @brief initialize HR_tmp
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_T_NL_cd.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_T_NL_cd.cpp
index bdb85cf947..45d095a7b2 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_T_NL_cd.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_T_NL_cd.cpp
@@ -136,27 +136,24 @@ TEST_F(TNLTest, testTVNLcd2cd)
     int npol = ucell.get_npol();
     std::vector<ModuleBase::Vector3<double>> kvec_d_in(2, ModuleBase::Vector3<double>(0.0, 0.0, 0.0));
     kvec_d_in[1] = ModuleBase::Vector3<double>(0.1, 0.2, 0.3);
-    std::vector<std::complex<double>> hk(paraV->get_row_size() * paraV->get_col_size(), std::complex<double>(0.0, 0.0));
+    hamilt::HS_Matrix_K<std::complex<double>> hsk(paraV, 1);
+    hsk.set_zero_hk();
     Grid_Driver gd(0, 0, 0);
     std::chrono::high_resolution_clock::time_point start_time = std::chrono::high_resolution_clock::now();
     hamilt::Operator<std::complex<double>>* op
-        = new hamilt::EkineticNew<hamilt::OperatorLCAO<std::complex<double>, std::complex<double>>>(nullptr,
+        = new hamilt::EkineticNew<hamilt::OperatorLCAO<std::complex<double>, std::complex<double>>>(&hsk,
                                                                                                     kvec_d_in,
                                                                                                     HR,
-                                                                                                    &hk,
                                                                                                     &ucell,
                                                                                                     &gd,
-                                                                                                    &intor_,
-                                                                                                    paraV);
+                                                                                                    &intor_);
     hamilt::Operator<std::complex<double>>* op1
-        = new hamilt::NonlocalNew<hamilt::OperatorLCAO<std::complex<double>, std::complex<double>>>(nullptr,
+        = new hamilt::NonlocalNew<hamilt::OperatorLCAO<std::complex<double>, std::complex<double>>>(&hsk,
                                                                                                     kvec_d_in,
                                                                                                     HR,
-                                                                                                    &hk,
                                                                                                     &ucell,
                                                                                                     &gd,
-                                                                                                    &intor_,
-                                                                                                    paraV);
+                                                                                                    &intor_);
     // merge two Operators to a chain
     op->add(op1);
     std::chrono::high_resolution_clock::time_point end_time = std::chrono::high_resolution_clock::now();
@@ -202,6 +199,7 @@ TEST_F(TNLTest, testTVNLcd2cd)
         }
     }
     // check the value of HK of gamma point
+    auto* hk = hsk.get_hk();
     result_ref += 1.0;
     int i = 0;
     for (int irow = 0; irow < paraV->get_row_size(); ++irow)
@@ -223,7 +221,7 @@ TEST_F(TNLTest, testTVNLcd2cd)
     }
     // calculate HK for k point
     start_time = std::chrono::high_resolution_clock::now();
-    hk.assign(paraV->get_row_size() * paraV->get_col_size(), std::complex<double>(0.0, 0.0));
+    hsk.set_zero_hk();
     op->init(1);
     end_time = std::chrono::high_resolution_clock::now();
     std::chrono::duration<double> elapsed_time2
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_dftu.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_dftu.cpp
index 8f61b6919a..28d63c8074 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_dftu.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_dftu.cpp
@@ -147,7 +147,8 @@ TEST_F(DFTUTest, constructHRd2d)
     // test for nspin=1
     GlobalV::NSPIN = 1;
     std::vector<ModuleBase::Vector3<double>> kvec_d_in(1, ModuleBase::Vector3<double>(0.0, 0.0, 0.0));
-    std::vector<double> hk(paraV->get_row_size() * paraV->get_col_size(), 0.0);
+    hamilt::HS_Matrix_K<double> hsk(paraV, 1);
+    hsk.set_zero_hk();
     Grid_Driver gd(0, 0, 0);
     // check some input values
     EXPECT_EQ(LCAO_Orbitals::get_const_instance().Phi[0].getRcut(), 1.0);
@@ -160,7 +161,7 @@ TEST_F(DFTUTest, constructHRd2d)
     }
     std::chrono::high_resolution_clock::time_point start_time = std::chrono::high_resolution_clock::now();
     hamilt::DFTU<hamilt::OperatorLCAO<double, double>>
-        op(nullptr, kvec_d_in, HR, &hk, ucell, &gd, &intor_, &GlobalC::dftu, *paraV);
+        op(&hsk, kvec_d_in, HR, ucell, &gd, &intor_, &GlobalC::dftu);
     std::chrono::high_resolution_clock::time_point end_time = std::chrono::high_resolution_clock::now();
     std::chrono::duration<double> elapsed_time
         = std::chrono::duration_cast<std::chrono::duration<double>>(end_time - start_time);
@@ -197,7 +198,8 @@ TEST_F(DFTUTest, constructHRd2d)
     end_time = std::chrono::high_resolution_clock::now();
     std::chrono::duration<double> elapsed_time2
         = std::chrono::duration_cast<std::chrono::duration<double>>(end_time - start_time);
-    // check the value of SK
+    // check the value of HK
+    double* hk = hsk.get_hk();
     for (int i = 0; i < paraV->get_row_size() * paraV->get_col_size(); ++i)
     {
         EXPECT_NEAR(hk[i], -10.0 * test_size, 1e-10);
@@ -213,7 +215,8 @@ TEST_F(DFTUTest, constructHRd2cd)
     // test for nspin=2
     GlobalV::NSPIN = 2;
     std::vector<ModuleBase::Vector3<double>> kvec_d_in(2, ModuleBase::Vector3<double>(0.0, 0.0, 0.0));
-    std::vector<std::complex<double>> hk(paraV->get_row_size() * paraV->get_col_size(), std::complex<double>(0.0, 0.0));
+    hamilt::HS_Matrix_K<std::complex<double>> hsk(paraV, 1);
+    hsk.set_zero_hk();
     Grid_Driver gd(0, 0, 0);
     EXPECT_EQ(LCAO_Orbitals::get_const_instance().Phi[0].getRcut(), 1.0);
     // reset HR and DMR
@@ -224,7 +227,7 @@ TEST_F(DFTUTest, constructHRd2cd)
         HR->get_wrapper()[i] = 0.0;
     }
     hamilt::DFTU<hamilt::OperatorLCAO<std::complex<double>, double>>
-        op(nullptr, kvec_d_in, HR, &hk, ucell, &gd, &intor_, &GlobalC::dftu, *paraV);
+        op(&hsk, kvec_d_in, HR, ucell, &gd, &intor_, &GlobalC::dftu);
     op.contributeHR();
     // check the occupations of dftu for spin-up
     for (int iat = 0; iat < test_size; iat++)
@@ -251,6 +254,7 @@ TEST_F(DFTUTest, constructHRd2cd)
     // calculate HK for gamma point
     op.contributeHk(0);
     // check the value of HK of gamma point
+    std::complex<double>* hk = hsk.get_hk();
     for (int i = 0; i < paraV->get_row_size() * paraV->get_col_size(); ++i)
     {
         EXPECT_NEAR(hk[i].real(), -10.0 * test_size, 1e-10);
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_ekineticnew.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_ekineticnew.cpp
index 2985317391..0befa32c0d 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_ekineticnew.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_ekineticnew.cpp
@@ -104,10 +104,11 @@ class EkineticNewTest : public ::testing::Test
 TEST_F(EkineticNewTest, constructHRd2d)
 {
     std::vector<ModuleBase::Vector3<double>> kvec_d_in(1, ModuleBase::Vector3<double>(0.0, 0.0, 0.0));
-    std::vector<double> hk(paraV->get_row_size() * paraV->get_col_size(), 0.0);
+    hamilt::HS_Matrix_K<double> hsk(paraV, 1);
+    hsk.set_zero_hk();
     Grid_Driver gd(0, 0, 0);
     hamilt::EkineticNew<hamilt::OperatorLCAO<double, double>>
-        op(nullptr, kvec_d_in, HR, &hk, &ucell, &gd, &intor_, paraV);
+        op(&hsk, kvec_d_in, HR, &ucell, &gd, &intor_);
     op.contributeHR();
     // check the value of HR
     for (int iap = 0; iap < HR->size_atom_pairs(); ++iap)
@@ -123,9 +124,10 @@ TEST_F(EkineticNewTest, constructHRd2d)
             EXPECT_EQ(tmp.get_pointer(0)[i], 1.0);
         }
     }
-    // calculate SK
+    // calculate HK
     op.contributeHk(0);
-    // check the value of SK
+    // check the value of HK
+    double* hk = hsk.get_hk();
     for (int i = 0; i < paraV->get_row_size() * paraV->get_col_size(); ++i)
     {
         EXPECT_EQ(hk[i], 1.0);
@@ -152,10 +154,11 @@ TEST_F(EkineticNewTest, constructHRd2cd)
 {
     std::vector<ModuleBase::Vector3<double>> kvec_d_in(2, ModuleBase::Vector3<double>(0.0, 0.0, 0.0));
     kvec_d_in[1] = ModuleBase::Vector3<double>(0.1, 0.2, 0.3);
-    std::vector<std::complex<double>> hk(paraV->get_row_size() * paraV->get_col_size(), std::complex<double>(0.0, 0.0));
+    hamilt::HS_Matrix_K<std::complex<double>> hsk(paraV, 1);
+    hsk.set_zero_hk();
     Grid_Driver gd(0, 0, 0);
     hamilt::EkineticNew<hamilt::OperatorLCAO<std::complex<double>, double>>
-        op(nullptr, kvec_d_in, HR, &hk, &ucell, &gd, &intor_, paraV);
+        op(&hsk, kvec_d_in, HR, &ucell, &gd, &intor_);
     op.contributeHR();
     // check the value of HR
     for (int iap = 0; iap < HR->size_atom_pairs(); ++iap)
@@ -171,16 +174,17 @@ TEST_F(EkineticNewTest, constructHRd2cd)
             EXPECT_EQ(tmp.get_pointer(0)[i], 1.0);
         }
     }
-    // calculate SK for gamma point
+    // calculate HK for gamma point
     op.contributeHk(0);
-    // check the value of SK of gamma point
+    auto* hk = hsk.get_hk();
+    // check the value of HK of gamma point
     for (int i = 0; i < paraV->get_row_size() * paraV->get_col_size(); ++i)
     {
         EXPECT_EQ(hk[i].real(), 1.0);
         EXPECT_EQ(hk[i].imag(), 0.0);
     }
     // calculate HK for k point
-    hk.assign(paraV->get_row_size() * paraV->get_col_size(), std::complex<double>(0.0, 0.0));
+    hsk.set_zero_hk();
     op.contributeHk(1);
     // check the value of HK
     for (int i = 0; i < paraV->get_row_size() * paraV->get_col_size(); ++i)
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_nonlocalnew.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_nonlocalnew.cpp
index 6aafc318c5..a2054a6fa2 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_nonlocalnew.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_nonlocalnew.cpp
@@ -133,14 +133,15 @@ class NonlocalNewTest : public ::testing::Test
 TEST_F(NonlocalNewTest, constructHRd2d)
 {
     std::vector<ModuleBase::Vector3<double>> kvec_d_in(1, ModuleBase::Vector3<double>(0.0, 0.0, 0.0));
-    std::vector<double> hk(paraV->get_row_size() * paraV->get_col_size(), 0.0);
+    hamilt::HS_Matrix_K<double> hsk(paraV, 1);
+    hsk.set_zero_hk();
     Grid_Driver gd(0, 0, 0);
     // check some input values
     EXPECT_EQ(ucell.infoNL.Beta[0].get_rcut_max(), 1.0);
     EXPECT_EQ(LCAO_Orbitals::get_const_instance().Phi[0].getRcut(), 1.0);
     std::chrono::high_resolution_clock::time_point start_time = std::chrono::high_resolution_clock::now();
     hamilt::NonlocalNew<hamilt::OperatorLCAO<double, double>>
-        op(nullptr, kvec_d_in, HR, &hk, &ucell, &gd, &intor_, paraV);
+        op(&hsk, kvec_d_in, HR, &ucell, &gd, &intor_);
     std::chrono::high_resolution_clock::time_point end_time = std::chrono::high_resolution_clock::now();
     std::chrono::duration<double> elapsed_time
         = std::chrono::duration_cast<std::chrono::duration<double>>(end_time - start_time);
@@ -169,7 +170,8 @@ TEST_F(NonlocalNewTest, constructHRd2d)
     end_time = std::chrono::high_resolution_clock::now();
     std::chrono::duration<double> elapsed_time2
         = std::chrono::duration_cast<std::chrono::duration<double>>(end_time - start_time);
-    // check the value of SK
+    // check the value of HK
+    double* hk = hsk.get_hk();
     for (int i = 0; i < paraV->get_row_size() * paraV->get_col_size(); ++i)
     {
         EXPECT_EQ(hk[i], 5.0 * test_size);
@@ -205,10 +207,11 @@ TEST_F(NonlocalNewTest, constructHRd2cd)
 {
     std::vector<ModuleBase::Vector3<double>> kvec_d_in(2, ModuleBase::Vector3<double>(0.0, 0.0, 0.0));
     kvec_d_in[1] = ModuleBase::Vector3<double>(0.1, 0.2, 0.3);
-    std::vector<std::complex<double>> hk(paraV->get_row_size() * paraV->get_col_size(), std::complex<double>(0.0, 0.0));
+    hamilt::HS_Matrix_K<std::complex<double>> hsk(paraV);
+    hsk.set_zero_hk();
     Grid_Driver gd(0, 0, 0);
     hamilt::NonlocalNew<hamilt::OperatorLCAO<std::complex<double>, double>>
-        op(nullptr, kvec_d_in, HR, &hk, &ucell, &gd, &intor_, paraV);
+        op(&hsk, kvec_d_in, HR, &ucell, &gd, &intor_);
     op.contributeHR();
     // check the value of HR
     for (int iap = 0; iap < HR->size_atom_pairs(); ++iap)
@@ -224,16 +227,17 @@ TEST_F(NonlocalNewTest, constructHRd2cd)
             EXPECT_EQ(tmp.get_pointer(0)[i], 5.0 * test_size);
         }
     }
-    // calculate SK for gamma point
+    // calculate HK for gamma point
     op.contributeHk(0);
-    // check the value of SK of gamma point
+    // check the value of HK of gamma point
+    auto* hk = hsk.get_hk();
     for (int i = 0; i < paraV->get_row_size() * paraV->get_col_size(); ++i)
     {
         EXPECT_EQ(hk[i].real(), 5.0 * test_size);
         EXPECT_EQ(hk[i].imag(), 0.0);
     }
     // calculate HK for k point
-    hk.assign(paraV->get_row_size() * paraV->get_col_size(), std::complex<double>(0.0, 0.0));
+    hsk.set_zero_hk();
     op.contributeHk(1);
     // check the value of HK
     for (int i = 0; i < paraV->get_row_size() * paraV->get_col_size(); ++i)
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_overlapnew.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_overlapnew.cpp
index a7d637371d..298439b074 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_overlapnew.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_overlapnew.cpp
@@ -104,10 +104,11 @@ class OverlapNewTest : public ::testing::Test
 TEST_F(OverlapNewTest, constructHRd2d)
 {
     std::vector<ModuleBase::Vector3<double>> kvec_d_in(1, ModuleBase::Vector3<double>(0.0, 0.0, 0.0));
-    std::vector<double> hk(paraV->get_row_size() * paraV->get_col_size(), 0.0);
+    hamilt::HS_Matrix_K<double> hsk(paraV);
+    hsk.set_zero_sk();
     Grid_Driver gd(0, 0, 0);
     hamilt::OverlapNew<hamilt::OperatorLCAO<double, double>>
-        op(nullptr, kvec_d_in, nullptr, nullptr, SR, &hk, &ucell, &gd, &intor_, paraV);
+        op(&hsk, kvec_d_in, nullptr, SR, &ucell, &gd, &intor_);
     op.contributeHR();
     // check the value of SR
     for (int iap = 0; iap < SR->size_atom_pairs(); ++iap)
@@ -126,9 +127,10 @@ TEST_F(OverlapNewTest, constructHRd2d)
     // calculate SK
     op.contributeHk(0);
     // check the value of SK
-    for (int i = 0; i < paraV->get_row_size() * paraV->get_col_size(); ++i)
+    double* sk = hsk.get_sk();
+    for (int i = 0; i < hsk.get_size(); ++i)
     {
-        EXPECT_EQ(hk[i], 1.0);
+        EXPECT_EQ(sk[i], 1.0);
     }
 }
 
@@ -136,10 +138,11 @@ TEST_F(OverlapNewTest, constructHRd2cd)
 {
     std::vector<ModuleBase::Vector3<double>> kvec_d_in(2, ModuleBase::Vector3<double>(0.0, 0.0, 0.0));
     kvec_d_in[1] = ModuleBase::Vector3<double>(0.1, 0.2, 0.3);
-    std::vector<std::complex<double>> hk(paraV->get_row_size() * paraV->get_col_size(), std::complex<double>(0.0, 0.0));
+    hamilt::HS_Matrix_K<std::complex<double>> hsk(paraV);
+    hsk.set_zero_sk();
     Grid_Driver gd(0, 0, 0);
     hamilt::OverlapNew<hamilt::OperatorLCAO<std::complex<double>, double>>
-        op(nullptr, kvec_d_in, nullptr, nullptr, SR, &hk, &ucell, &gd, &intor_, paraV);
+        op(&hsk, kvec_d_in, nullptr, SR, &ucell, &gd, &intor_);
     op.contributeHR();
     // check the value of SR
     for (int iap = 0; iap < SR->size_atom_pairs(); ++iap)
@@ -158,19 +161,20 @@ TEST_F(OverlapNewTest, constructHRd2cd)
     // calculate SK for gamma point
     op.contributeHk(0);
     // check the value of SK of gamma point
+    auto* sk = hsk.get_sk();
     for (int i = 0; i < paraV->get_row_size() * paraV->get_col_size(); ++i)
     {
-        EXPECT_EQ(hk[i].real(), 1.0);
-        EXPECT_EQ(hk[i].imag(), 0.0);
+        EXPECT_EQ(sk[i].real(), 1.0);
+        EXPECT_EQ(sk[i].imag(), 0.0);
     }
     // calculate SK for k point
-    hk.assign(paraV->get_row_size() * paraV->get_col_size(), std::complex<double>(0.0, 0.0));
+    hsk.set_zero_sk();
     op.contributeHk(1);
     // check the value of SK
     for (int i = 0; i < paraV->get_row_size() * paraV->get_col_size(); ++i)
     {
-        EXPECT_NEAR(hk[i].real(), -0.80901699437494723, 1e-10);
-        EXPECT_NEAR(hk[i].imag(), -0.58778525229247336, 1e-10);
+        EXPECT_NEAR(sk[i].real(), -0.80901699437494723, 1e-10);
+        EXPECT_NEAR(sk[i].imag(), -0.58778525229247336, 1e-10);
     }
 }
 
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_overlapnew_cd.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_overlapnew_cd.cpp
index 171f34c2cd..26f51500a6 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_overlapnew_cd.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_overlapnew_cd.cpp
@@ -104,10 +104,11 @@ TEST_F(OverlapNewTest, constructHRcd2cd)
     int npol = ucell.get_npol();
     std::vector<ModuleBase::Vector3<double>> kvec_d_in(2, ModuleBase::Vector3<double>(0.0, 0.0, 0.0));
     kvec_d_in[1] = ModuleBase::Vector3<double>(0.1, 0.2, 0.3);
-    std::vector<std::complex<double>> hk(paraV->get_row_size() * paraV->get_col_size(), std::complex<double>(0.0, 0.0));
+    hamilt::HS_Matrix_K<std::complex<double>> hsk(paraV);
+    hsk.set_zero_sk();
     Grid_Driver gd(0, 0, 0);
     hamilt::OverlapNew<hamilt::OperatorLCAO<std::complex<double>, std::complex<double>>>
-        op(nullptr, kvec_d_in, nullptr, nullptr, SR, &hk, &ucell, &gd, &intor_, paraV);
+        op(&hsk, kvec_d_in, nullptr, SR, &ucell, &gd, &intor_);
     op.contributeHR();
     // check the value of SR
     for (int iap = 0; iap < SR->size_atom_pairs(); ++iap)
@@ -139,6 +140,7 @@ TEST_F(OverlapNewTest, constructHRcd2cd)
     // calculate SK for gamma point
     op.contributeHk(0);
     // check the value of SK of gamma point
+    auto* sk = hsk.get_sk();
     int i = 0;
     for (int irow = 0; irow < paraV->get_row_size(); ++irow)
     {
@@ -146,19 +148,19 @@ TEST_F(OverlapNewTest, constructHRcd2cd)
         {
             if (irow % npol == icol % npol)
             {
-                EXPECT_NEAR(hk[i].real(), 1.0, 1e-10);
-                EXPECT_NEAR(hk[i].imag(), 0.0, 1e-10);
+                EXPECT_NEAR(sk[i].real(), 1.0, 1e-10);
+                EXPECT_NEAR(sk[i].imag(), 0.0, 1e-10);
             }
             else
             {
-                EXPECT_NEAR(hk[i].real(), 0.0, 1e-10);
-                EXPECT_NEAR(hk[i].imag(), 0.0, 1e-10);
+                EXPECT_NEAR(sk[i].real(), 0.0, 1e-10);
+                EXPECT_NEAR(sk[i].imag(), 0.0, 1e-10);
             }
             ++i;
         }
     }
     // calculate SK for k point
-    hk.assign(paraV->get_row_size() * paraV->get_col_size(), std::complex<double>(0.0, 0.0));
+    hsk.set_zero_sk();
     op.contributeHk(1);
     // check the value of SK
     i = 0;
@@ -168,13 +170,13 @@ TEST_F(OverlapNewTest, constructHRcd2cd)
         {
             if (irow % npol == icol % npol)
             {
-                EXPECT_NEAR(hk[i].real(), -0.80901699437494723, 1e-10);
-                EXPECT_NEAR(hk[i].imag(), -0.58778525229247336, 1e-10);
+                EXPECT_NEAR(sk[i].real(), -0.80901699437494723, 1e-10);
+                EXPECT_NEAR(sk[i].imag(), -0.58778525229247336, 1e-10);
             }
             else
             {
-                EXPECT_NEAR(hk[i].real(), 0.0, 1e-10);
-                EXPECT_NEAR(hk[i].imag(), 0.0, 1e-10);
+                EXPECT_NEAR(sk[i].real(), 0.0, 1e-10);
+                EXPECT_NEAR(sk[i].imag(), 0.0, 1e-10);
             }
             ++i;
         }
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_sc_lambda_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_sc_lambda_lcao.cpp
index 15efa6c7b8..d37e94efbb 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_sc_lambda_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_sc_lambda_lcao.cpp
@@ -86,17 +86,11 @@ TEST_F(ScLambdaLCAOTest, ContributeHk)
     int ncol = 2;
     paraV.set_serial(nrow, ncol);
     EXPECT_EQ(paraV.nloc, 4);
-    // set LM
-    LCAO_Matrix LM;
-    LM.ParaV = &paraV;
-    EXPECT_EQ(LM.ParaV->nloc, 4);
-    LM.Sloc2 = {
-        std::complex<double>{1.0, 0.0},
-        std::complex<double>{0.0, 0.0},
-        std::complex<double>{0.0, 0.0},
-        std::complex<double>{1.0, 0.0}
-    };
-    LM.Hloc2.resize(LM.ParaV->nloc, 0.0);
+    hamilt::HS_Matrix_K<std::complex<double>> hsk(&paraV);
+    hsk.set_zero_hk();
+    hsk.set_zero_sk();
+    hsk.get_sk()[0] = std::complex<double>{1.0, 0.0};
+    hsk.get_sk()[3] = std::complex<double>{1.0, 0.0};
     // set sc
     SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>& sc
         = SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>::getScInstance();
@@ -122,10 +116,9 @@ TEST_F(ScLambdaLCAOTest, ContributeHk)
     EXPECT_TRUE(ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER());
     // set sc_lambda_op
     auto sc_lambda_op
-        = hamilt::OperatorScLambda<hamilt::OperatorLCAO<std::complex<double>, std::complex<double>>>(&LM,
+        = hamilt::OperatorScLambda<hamilt::OperatorLCAO<std::complex<double>, std::complex<double>>>(&hsk,
                                                                                                      this->kvec_d,
                                                                                                      nullptr,
-                                                                                                     nullptr,
                                                                                                      isk);
     sc_lambda_op.contributeHk(0);
     std::vector<std::complex<double>> columnMajor_h_lambda = {
@@ -134,14 +127,14 @@ TEST_F(ScLambdaLCAOTest, ContributeHk)
         std::complex<double>{-1.0, -1.0},
         std::complex<double>{1.0,  0.0}
     };
-    EXPECT_DOUBLE_EQ(LM.Hloc2[0].real(), columnMajor_h_lambda[0].real());
-    EXPECT_DOUBLE_EQ(LM.Hloc2[0].imag(), columnMajor_h_lambda[0].imag());
-    EXPECT_DOUBLE_EQ(LM.Hloc2[1].real(), columnMajor_h_lambda[1].real());
-    EXPECT_DOUBLE_EQ(LM.Hloc2[1].imag(), columnMajor_h_lambda[1].imag());
-    EXPECT_DOUBLE_EQ(LM.Hloc2[2].real(), columnMajor_h_lambda[2].real());
-    EXPECT_DOUBLE_EQ(LM.Hloc2[2].imag(), columnMajor_h_lambda[2].imag());
-    EXPECT_DOUBLE_EQ(LM.Hloc2[3].real(), columnMajor_h_lambda[3].real());
-    EXPECT_DOUBLE_EQ(LM.Hloc2[3].imag(), columnMajor_h_lambda[3].imag());
+    EXPECT_DOUBLE_EQ(hsk.get_hk()[0].real(), columnMajor_h_lambda[0].real());
+    EXPECT_DOUBLE_EQ(hsk.get_hk()[0].imag(), columnMajor_h_lambda[0].imag());
+    EXPECT_DOUBLE_EQ(hsk.get_hk()[1].real(), columnMajor_h_lambda[1].real());
+    EXPECT_DOUBLE_EQ(hsk.get_hk()[1].imag(), columnMajor_h_lambda[1].imag());
+    EXPECT_DOUBLE_EQ(hsk.get_hk()[2].real(), columnMajor_h_lambda[2].real());
+    EXPECT_DOUBLE_EQ(hsk.get_hk()[2].imag(), columnMajor_h_lambda[2].imag());
+    EXPECT_DOUBLE_EQ(hsk.get_hk()[3].real(), columnMajor_h_lambda[3].real());
+    EXPECT_DOUBLE_EQ(hsk.get_hk()[3].imag(), columnMajor_h_lambda[3].imag());
 }
 
 TEST_F(ScLambdaLCAOTest, ContributeHkS2)
@@ -154,13 +147,9 @@ TEST_F(ScLambdaLCAOTest, ContributeHkS2)
     paraV.set_serial(nrow, ncol);
     EXPECT_EQ(paraV.nloc, 1);
     // set LM
-    LCAO_Matrix LM;
-    LM.ParaV = &paraV;
-    EXPECT_EQ(LM.ParaV->nloc, 1);
-    LM.Sloc2 = {
-        std::complex<double>{1.0, 0.0}
-    };
-    LM.Hloc2.resize(LM.ParaV->nloc, 0.0);
+    hamilt::HS_Matrix_K<std::complex<double>> hsk(&paraV);
+    hsk.set_zero_hk();
+    hsk.get_sk()[0] = std::complex<double>{1.0, 0.0};
     // set sc
     SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>& sc
         = SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>::getScInstance();
@@ -186,17 +175,16 @@ TEST_F(ScLambdaLCAOTest, ContributeHkS2)
     EXPECT_TRUE(ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER());
     // set sc_lambda_op
     auto sc_lambda_op
-        = hamilt::OperatorScLambda<hamilt::OperatorLCAO<std::complex<double>, double>>(&LM,
-                                                                                                     this->kvec_d,
-                                                                                                     nullptr,
-                                                                                                     nullptr,
-                                                                                                     isk);
+        = hamilt::OperatorScLambda<hamilt::OperatorLCAO<std::complex<double>, double>>(&hsk,
+                                                                                    this->kvec_d,
+                                                                                    nullptr,
+                                                                                    isk);
     sc_lambda_op.contributeHk(0);
     std::vector<std::complex<double>> columnMajor_h_lambda = {
         std::complex<double>{-1.0,  0.0}
     };
-    EXPECT_DOUBLE_EQ(LM.Hloc2[0].real(), columnMajor_h_lambda[0].real());
-    EXPECT_DOUBLE_EQ(LM.Hloc2[0].imag(), columnMajor_h_lambda[0].imag());
+    EXPECT_DOUBLE_EQ(hsk.get_hk()[0].real(), columnMajor_h_lambda[0].real());
+    EXPECT_DOUBLE_EQ(hsk.get_hk()[0].imag(), columnMajor_h_lambda[0].imag());
 }
 
 TEST_F(ScLambdaLCAOTest, TemplateHelpers)
@@ -205,15 +193,13 @@ TEST_F(ScLambdaLCAOTest, TemplateHelpers)
         = hamilt::OperatorScLambda<hamilt::OperatorLCAO<std::complex<double>, std::complex<double>>>(nullptr,
                                                                                                      this->kvec_d,
                                                                                                      nullptr,
-                                                                                                     nullptr,
                                                                                                      isk);
     auto sc_lambda_op1 = hamilt::OperatorScLambda<hamilt::OperatorLCAO<std::complex<double>, double>>(nullptr,
                                                                                                       this->kvec_d,
                                                                                                       nullptr,
-                                                                                                      nullptr,
                                                                                                       isk);
     auto sc_lambda_op2
-        = hamilt::OperatorScLambda<hamilt::OperatorLCAO<double, double>>(nullptr, this->kvec_d, nullptr, nullptr, isk);
+        = hamilt::OperatorScLambda<hamilt::OperatorLCAO<double, double>>(nullptr, this->kvec_d, nullptr, isk);
     EXPECT_NO_THROW(sc_lambda_op.contributeHR());
     EXPECT_NO_THROW(sc_lambda_op1.contributeHR());
     EXPECT_NO_THROW(sc_lambda_op2.contributeHR());
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/tmp_mocks.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/tmp_mocks.cpp
index db6e0a02bb..1664bc0ff2 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/tmp_mocks.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/tmp_mocks.cpp
@@ -127,7 +127,7 @@ void hamilt::OperatorLCAO<TK, TR>::contributeHk(int ik)
     else
     {
         const int ncol = this->hR->get_atom_pair(0).get_paraV()->get_col_size();
-        hamilt::folding_HR(*this->hR, this->hK->data(), this->kvec_d[ik], ncol, 0);
+        hamilt::folding_HR(*this->hR, this->hsk->get_hk(), this->kvec_d[ik], ncol, 0);
     }
 }
 template <typename TK, typename TR>
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp
index c0f3f58590..39e91219a0 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp
@@ -10,13 +10,14 @@ namespace hamilt
 // initialize_HR()
 template <typename TK, typename TR>
 void Veff<OperatorLCAO<TK, TR>>::initialize_HR(const UnitCell* ucell_in,
-                                        Grid_Driver* GridD,
-                                        const Parallel_Orbitals* paraV)
+                                        Grid_Driver* GridD)
 {
     ModuleBase::TITLE("Veff", "initialize_HR");
     ModuleBase::timer::tick("Veff", "initialize_HR");
 
     this->nspin = GlobalV::NSPIN;
+    auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
+    // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
 
     for (int iat1 = 0; iat1 < ucell_in->nat; iat1++)
     {
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h
index 2587fb54cc..0b639696ad 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h
@@ -37,24 +37,22 @@ class Veff<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
     */
     Veff<OperatorLCAO<TK, TR>>(Gint_k* GK_in,
                           Local_Orbital_Charge* loc_in,
-                          LCAO_Matrix* LM_in,
+                          HS_Matrix_K<TK>* hsk_in,
                           const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
                           elecstate::Potential* pot_in,
                           hamilt::HContainer<TR>* hR_in,
-                          std::vector<TK>* hK_in,
                           const UnitCell* ucell_in,
-                          Grid_Driver* GridD_in,
-                          const Parallel_Orbitals* paraV)
+                          Grid_Driver* GridD_in)
         : GK(GK_in),
           loc(loc_in),
           pot(pot_in),
           ucell(ucell_in),
           gd(GridD_in),
-          OperatorLCAO<TK, TR>(LM_in, kvec_d_in, hR_in, hK_in)
+          OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in)
     {
         this->cal_type = calculation_type::lcao_gint;
 
-        this->initialize_HR(ucell_in, GridD_in, paraV);
+        this->initialize_HR(ucell_in, GridD_in);
         GK_in->initialize_pvpR(*ucell_in, GridD_in);
     }
     /**
@@ -63,20 +61,18 @@ class Veff<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
     */
     Veff<OperatorLCAO<TK, TR>>(Gint_Gamma* GG_in,
                           Local_Orbital_Charge* loc_in,
-                          LCAO_Matrix* LM_in,
+                          HS_Matrix_K<TK>* hsk_in,
                           const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
                           elecstate::Potential* pot_in,
                           hamilt::HContainer<TR>* hR_in,
-                          std::vector<TK>* hK_in,
                           const UnitCell* ucell_in,
-                          Grid_Driver* GridD_in,
-                          const Parallel_Orbitals* paraV
+                          Grid_Driver* GridD_in
                           )
         : GG(GG_in), loc(loc_in), pot(pot_in),
-        OperatorLCAO<TK, TR>(LM_in, kvec_d_in, hR_in, hK_in)
+        OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in)
     {
         this->cal_type = calculation_type::lcao_gint;
-        this->initialize_HR(ucell_in, GridD_in, paraV);
+        this->initialize_HR(ucell_in, GridD_in);
 
         GG_in->initialize_pvpR(*ucell_in, GridD_in);
     }
@@ -113,7 +109,7 @@ class Veff<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
      * HContainer is used to store the electronic kinetic matrix with specific <I,J,R> atom-pairs
      * the size of HR will be fixed after initialization
      */
-    void initialize_HR(const UnitCell* ucell_in, Grid_Driver* GridD_in, const Parallel_Orbitals* paraV);
+    void initialize_HR(const UnitCell* ucell_in, Grid_Driver* GridD_in);
 
 };
 
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp
index ab64b30238..36e5fff423 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/spar_dh.cpp
@@ -60,7 +60,7 @@ void sparse_format::cal_dH(LCAO_Matrix& lm,
 
     LCAO_domain::build_Nonlocal_mu_new(lm,
                                        fsr_dh,
-                                       lm.Hloc_fixed.data(),
+                                       nullptr,
                                        true,
                                        GlobalC::ucell,
                                        GlobalC::ORB,
diff --git a/source/module_hamilt_lcao/module_deltaspin/cal_h_lambda.cpp b/source/module_hamilt_lcao/module_deltaspin/cal_h_lambda.cpp
index 6bf1265386..be60643477 100644
--- a/source/module_hamilt_lcao/module_deltaspin/cal_h_lambda.cpp
+++ b/source/module_hamilt_lcao/module_deltaspin/cal_h_lambda.cpp
@@ -7,7 +7,7 @@
 template <>
 void SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>::cal_h_lambda(
     std::complex<double>* h_lambda,
-    const std::vector<std::complex<double>>& Sloc2,
+    const std::complex<double>* Sloc2,
     bool column_major,
     int isk)
 {
diff --git a/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp b/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp
index 8701fe5cd8..fb88b8eb57 100644
--- a/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp
+++ b/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp
@@ -19,13 +19,16 @@ ModuleBase::matrix SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>:
     ModuleBase::matrix MecMulP(this->nspin_, nlocal, true), orbMulP(this->nspin_, nlocal, true);
     for(size_t ik = 0; ik != this->kv_.get_nks(); ++ik)
     {
+        std::complex<double> *sk = nullptr;
         if (this->nspin_ == 4)
         {
-            dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(this->p_hamilt)->updateSk(ik, LM, 1);
+            dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(this->p_hamilt)->updateSk(ik, 1);
+            sk = dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(this->p_hamilt)->getSk();
         }
         else
         {
-            dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(this->p_hamilt)->updateSk(ik, LM, 1);
+            dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(this->p_hamilt)->updateSk(ik, 1);
+            sk = dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(this->p_hamilt)->getSk();
         }
         ModuleBase::ComplexMatrix mud(this->ParaV->ncol, this->ParaV->nrow, true);
 #ifdef __MPI
@@ -43,7 +46,7 @@ ModuleBase::matrix SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>:
                 &one_int,
                 &one_int,
                 this->ParaV->desc,
-                LM->Sloc2.data(),
+                sk,
                 &one_int,
                 &one_int,
                 this->ParaV->desc,
diff --git a/source/module_hamilt_lcao/module_deltaspin/spin_constrain.h b/source/module_hamilt_lcao/module_deltaspin/spin_constrain.h
index 382ed88686..24151fb833 100644
--- a/source/module_hamilt_lcao/module_deltaspin/spin_constrain.h
+++ b/source/module_hamilt_lcao/module_deltaspin/spin_constrain.h
@@ -44,7 +44,7 @@ class SpinConstrain
                elecstate::ElecState* pelec_in);
 
   /// calculate h_lambda operator for spin-constrained DFT
-  void cal_h_lambda(std::complex<double>* h_lambda, const std::vector<std::complex<double>>& Sloc2, bool column_major, int isk);
+  void cal_h_lambda(std::complex<double>* h_lambda, const std::complex<double>* Sloc2, bool column_major, int isk);
 
   void cal_MW(const int& step, LCAO_Matrix* LM, bool print = false);
 
diff --git a/source/module_hamilt_lcao/module_deltaspin/template_helpers.cpp b/source/module_hamilt_lcao/module_deltaspin/template_helpers.cpp
index 5afd7e98a9..98721bd87b 100644
--- a/source/module_hamilt_lcao/module_deltaspin/template_helpers.cpp
+++ b/source/module_hamilt_lcao/module_deltaspin/template_helpers.cpp
@@ -2,7 +2,7 @@
 
 template <>
 void SpinConstrain<double, base_device::DEVICE_CPU>::cal_h_lambda(std::complex<double>* h_lambda,
-                                                                  const std::vector<std::complex<double>>& Sloc2,
+                                                                  const std::complex<double>* Sloc2,
                                                                   bool column_major,
                                                                   int isk)
 {
diff --git a/source/module_hamilt_lcao/module_deltaspin/test/cal_h_lambda_test.cpp b/source/module_hamilt_lcao/module_deltaspin/test/cal_h_lambda_test.cpp
index 19984eb01b..f3312716b7 100644
--- a/source/module_hamilt_lcao/module_deltaspin/test/cal_h_lambda_test.cpp
+++ b/source/module_hamilt_lcao/module_deltaspin/test/cal_h_lambda_test.cpp
@@ -56,7 +56,7 @@ TEST_F(SpinConstrainTest, CalHLambda)
     sc_lambda[0][2] = 1.0;
     sc.set_sc_lambda(sc_lambda, 1);
     // column_major = true
-    sc.cal_h_lambda(&h_lambda[0], Sloc2, true, 0);
+    sc.cal_h_lambda(&h_lambda[0], Sloc2.data(), true, 0);
     // h_lambda = - [lambda_x * sigma_x + lambda_y * sigma_y + lambda_z * sigma_z] * Sloc2
     std::vector<std::complex<double>> columnMajor_h_lambda = {
         std::complex<double>{-1.0, 0.0 },
@@ -74,7 +74,7 @@ TEST_F(SpinConstrainTest, CalHLambda)
     EXPECT_DOUBLE_EQ(h_lambda[3].imag(), columnMajor_h_lambda[3].imag());
     // column_major = false
     delete[] sc_lambda;
-    sc.cal_h_lambda(&h_lambda[0], Sloc2, false, 0);
+    sc.cal_h_lambda(&h_lambda[0], Sloc2.data(), false, 0);
     std::vector<std::complex<double>> rowMajor_h_lambda = {
         std::complex<double>{-1.0, 0.0 },
         std::complex<double>{-1.0, -1.0},
@@ -122,14 +122,14 @@ TEST_F(SpinConstrainTest, CalHLambdaS2)
     sc_lambda[0][2] = 2.0;
     sc.set_sc_lambda(sc_lambda, 1);
     // column_major = true
-    sc.cal_h_lambda(&h_lambda[0], Sloc2, true, 0);
+    sc.cal_h_lambda(&h_lambda[0], Sloc2.data(), true, 0);
     // h_lambda = -Sloc2[icc] * this->lambda_[iat2][2]
     std::vector<std::complex<double>> columnMajor_h_lambda = {
         std::complex<double>{-2.0, 0.0 },
     };
     EXPECT_DOUBLE_EQ(h_lambda[0].real(), columnMajor_h_lambda[0].real());
     EXPECT_DOUBLE_EQ(h_lambda[0].imag(), columnMajor_h_lambda[0].imag());
-    sc.cal_h_lambda(&h_lambda[0], Sloc2, true, 1);
+    sc.cal_h_lambda(&h_lambda[0], Sloc2.data(), true, 1);
     // h_lambda = -Sloc2[icc] * (-this->lambda_[iat2][2])
     std::vector<std::complex<double>> columnMajor_h_lambda1 = {
         std::complex<double>{2.0, 0.0 },
@@ -139,14 +139,14 @@ TEST_F(SpinConstrainTest, CalHLambdaS2)
     // column_major = false
     delete[] sc_lambda;
     // h_lambda = -Sloc2[icc] * this->lambda_[iat2][2]
-    sc.cal_h_lambda(&h_lambda[0], Sloc2, false, 0);
+    sc.cal_h_lambda(&h_lambda[0], Sloc2.data(), false, 0);
     std::vector<std::complex<double>> rowMajor_h_lambda = {
         std::complex<double>{-2.0, 0.0 },
     };
     EXPECT_DOUBLE_EQ(h_lambda[0].real(), rowMajor_h_lambda[0].real());
     EXPECT_DOUBLE_EQ(h_lambda[0].imag(), rowMajor_h_lambda[0].imag());
     // h_lambda = -Sloc2[icc] * (-this->lambda_[iat2][2])
-    sc.cal_h_lambda(&h_lambda[0], Sloc2, false, 1);
+    sc.cal_h_lambda(&h_lambda[0], Sloc2.data(), false, 1);
     std::vector<std::complex<double>> rowMajor_h_lambda1 = {
         std::complex<double>{2.0, 0.0 },
     };
diff --git a/source/module_hamilt_lcao/module_deltaspin/test/template_helpers_test.cpp b/source/module_hamilt_lcao/module_deltaspin/test/template_helpers_test.cpp
index d6b0f627ed..993a65184a 100644
--- a/source/module_hamilt_lcao/module_deltaspin/test/template_helpers_test.cpp
+++ b/source/module_hamilt_lcao/module_deltaspin/test/template_helpers_test.cpp
@@ -34,7 +34,7 @@ TEST_F(SpinConstrainTest, TemplatHelpers)
 {
     // this is a trivial test as the double version is not used
     std::vector<std::complex<double>> Sloc2;
-    EXPECT_NO_THROW(sc.cal_h_lambda(nullptr, Sloc2, true, 0));
+    EXPECT_NO_THROW(sc.cal_h_lambda(nullptr, Sloc2.data(), true, 0));
     EXPECT_NO_THROW(sc.cal_mw_from_lambda(0));
     EXPECT_NO_THROW(sc.cal_MW_k(nullptr, std::vector<std::vector<std::complex<double>>>(0)));
     EXPECT_NO_THROW(sc.cal_MW(0, nullptr, false));
diff --git a/source/module_hamilt_lcao/module_dftu/dftu.h b/source/module_hamilt_lcao/module_dftu/dftu.h
index ee8d1ff7cc..db86ab26ad 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu.h
+++ b/source/module_hamilt_lcao/module_dftu/dftu.h
@@ -69,8 +69,8 @@ class DFTU
     // For calculating contribution to Hamiltonian matrices
     //=============================================================
   public:
-    void cal_eff_pot_mat_complex(const int ik, std::complex<double>* eff_pot, const std::vector<int>& isk);
-    void cal_eff_pot_mat_real(const int ik, double* eff_pot, const std::vector<int>& isk);
+    void cal_eff_pot_mat_complex(const int ik, std::complex<double>* eff_pot, const std::vector<int>& isk, const std::complex<double>* sk);
+    void cal_eff_pot_mat_real(const int ik, double* eff_pot, const std::vector<int>& isk, const double* sk);
     void cal_eff_pot_mat_R_double(const int ispin, double* SR, double* HR);
     void cal_eff_pot_mat_R_complex_double(const int ispin, std::complex<double>* SR, std::complex<double>* HR);
 
@@ -82,7 +82,7 @@ class DFTU
   public:
     // calculate the local occupation number matrix
     void cal_occup_m_k(const int iter, const std::vector<std::vector<std::complex<double>>>& dm_k, const K_Vectors& kv, const double& mixing_beta, hamilt::Hamilt<std::complex<double>>* p_ham);
-    void cal_occup_m_gamma(const int iter, const std::vector<std::vector<double>>& dm_gamma, const double& mixing_beta);
+    void cal_occup_m_gamma(const int iter, const std::vector<std::vector<double>>& dm_gamma, const double& mixing_beta, hamilt::Hamilt<double>* p_ham);
 
     // dftu can be calculated only after locale has been initialed
     bool initialed_locale = false;
diff --git a/source/module_hamilt_lcao/module_dftu/dftu_folding.cpp b/source/module_hamilt_lcao/module_dftu/dftu_folding.cpp
index 8b58dd8344..0abaac6ef7 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu_folding.cpp
+++ b/source/module_hamilt_lcao/module_dftu/dftu_folding.cpp
@@ -282,17 +282,17 @@ void DFTU::folding_matrix_k_new(const int ik,
     // get SR and fold to mat_k
     if(GlobalV::GAMMA_ONLY_LOCAL)
     {
-        dynamic_cast<hamilt::HamiltLCAO<double, double>*>(p_ham)->updateSk(ik, this->LM, hk_type);
+        dynamic_cast<hamilt::HamiltLCAO<double, double>*>(p_ham)->updateSk(ik, hk_type);
     }
     else
     {
         if(GlobalV::NSPIN != 4)
         {
-            dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->updateSk(ik, this->LM, hk_type);
+            dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->updateSk(ik, hk_type);
         }
         else
         {
-            dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->updateSk(ik, this->LM, hk_type);
+            dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->updateSk(ik, hk_type);
         }
     }
 }
diff --git a/source/module_hamilt_lcao/module_dftu/dftu_hamilt.cpp b/source/module_hamilt_lcao/module_dftu/dftu_hamilt.cpp
index 15b7f844c2..a786e97a42 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu_hamilt.cpp
+++ b/source/module_hamilt_lcao/module_dftu/dftu_hamilt.cpp
@@ -6,7 +6,7 @@
 namespace ModuleDFTU
 {
 
-void DFTU::cal_eff_pot_mat_complex(const int ik, std::complex<double>* eff_pot, const std::vector<int>& isk)
+void DFTU::cal_eff_pot_mat_complex(const int ik, std::complex<double>* eff_pot, const std::vector<int>& isk, const std::complex<double>* sk)
 {
     ModuleBase::TITLE("DFTU", "cal_eff_pot_mat");
     ModuleBase::timer::tick("DFTU", "cal_eff_pot_mat");
@@ -37,7 +37,7 @@ void DFTU::cal_eff_pot_mat_complex(const int ik, std::complex<double>* eff_pot,
             &GlobalV::NLOCAL, &GlobalV::NLOCAL, &GlobalV::NLOCAL,
             &half, 
             ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->LM->ParaV->desc,
-            this->LM->Sloc2.data(), &one_int, &one_int, this->LM->ParaV->desc,
+            sk, &one_int, &one_int, this->LM->ParaV->desc,
             &zero,
             eff_pot, &one_int, &one_int, this->LM->ParaV->desc);
 #endif
@@ -57,7 +57,7 @@ void DFTU::cal_eff_pot_mat_complex(const int ik, std::complex<double>* eff_pot,
     return;
 }
 
-void DFTU::cal_eff_pot_mat_real(const int ik, double* eff_pot, const std::vector<int>& isk)
+void DFTU::cal_eff_pot_mat_real(const int ik, double* eff_pot, const std::vector<int>& isk, const double* sk)
 {
     ModuleBase::TITLE("DFTU", "cal_eff_pot_mat");
     ModuleBase::timer::tick("DFTU", "cal_eff_pot_mat");
@@ -86,7 +86,7 @@ void DFTU::cal_eff_pot_mat_real(const int ik, double* eff_pot, const std::vector
             &GlobalV::NLOCAL, &GlobalV::NLOCAL, &GlobalV::NLOCAL,
             &half, 
             ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->LM->ParaV->desc, 
-            this->LM->Sloc.data(), &one_int, &one_int, this->LM->ParaV->desc,
+            sk, &one_int, &one_int, this->LM->ParaV->desc,
             &beta,
             eff_pot, &one_int, &one_int, this->LM->ParaV->desc);
 #endif
diff --git a/source/module_hamilt_lcao/module_dftu/dftu_occup.cpp b/source/module_hamilt_lcao/module_dftu/dftu_occup.cpp
index 5242dd96f3..da8d93a8b7 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu_occup.cpp
+++ b/source/module_hamilt_lcao/module_dftu/dftu_occup.cpp
@@ -1,6 +1,7 @@
 #include "dftu.h"
 #include "module_base/timer.h"
 #include "module_hamilt_pw/hamilt_pwdft/global.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h"
 
 extern "C"
 {
@@ -160,7 +161,15 @@ void DFTU::cal_occup_m_k(const int iter,
     {
         // srho(mu,nu) = \sum_{iw} S(mu,iw)*dm_k(iw,nu)
         this->folding_matrix_k_new(ik, p_ham);
-        std::complex<double>* s_k_pointer = this->LM->Sloc2.data();
+        std::complex<double>* s_k_pointer = nullptr;
+        if(GlobalV::NSPIN != 4)
+        {
+            s_k_pointer = dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->getSk();
+        }
+        else
+        {
+            s_k_pointer = dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->getSk();
+        }
 
 #ifdef __MPI
         pzgemm_(&transN,
@@ -344,7 +353,7 @@ void DFTU::cal_occup_m_k(const int iter,
     return;
 }
 
-void DFTU::cal_occup_m_gamma(const int iter, const std::vector<std::vector<double>> &dm_gamma, const double& mixing_beta)
+void DFTU::cal_occup_m_gamma(const int iter, const std::vector<std::vector<double>> &dm_gamma, const double& mixing_beta, hamilt::Hamilt<double>* p_ham)
 {
     ModuleBase::TITLE("DFTU", "cal_occup_m_gamma");
     ModuleBase::timer::tick("DFTU", "cal_occup_m_gamma");
@@ -361,7 +370,7 @@ void DFTU::cal_occup_m_gamma(const int iter, const std::vector<std::vector<doubl
     for (int is = 0; is < GlobalV::NSPIN; is++)
     {
         // srho(mu,nu) = \sum_{iw} S(mu,iw)*dm_gamma(iw,nu)
-        double* s_gamma_pointer = this->LM->Sloc.data();
+        double* s_gamma_pointer = dynamic_cast<hamilt::HamiltLCAO<double, double>*>(p_ham)->getSk();
 
 #ifdef __MPI
         pdgemm_(&transN,
diff --git a/source/module_hamilt_lcao/module_hcontainer/hcontainer.h b/source/module_hamilt_lcao/module_hcontainer/hcontainer.h
index 269c4ff93f..79105ce340 100644
--- a/source/module_hamilt_lcao/module_hcontainer/hcontainer.h
+++ b/source/module_hamilt_lcao/module_hcontainer/hcontainer.h
@@ -419,6 +419,12 @@ class HContainer
         return paraV->get_global_row_size();
     }
 
+    /**
+     * @brief get parallel orbital pointer to check parallel information
+     * @return const Parallel_Orbitals* , if return is nullptr, it means HContainer is not in parallel mode
+     */
+    const Parallel_Orbitals* get_paraV() const{return paraV;}
+
   private:
     // i-j atom pairs, sorted by matrix of (atom_i, atom_j)
     std::vector<AtomPair<T>> atom_pairs;
diff --git a/source/module_io/output_sk.cpp b/source/module_io/output_sk.cpp
index 24776e4f67..96c0663579 100644
--- a/source/module_io/output_sk.cpp
+++ b/source/module_io/output_sk.cpp
@@ -18,7 +18,7 @@ double* Output_Sk<double>::get_Sk(int ik)
     {
         ModuleBase::WARNING_QUIT("Output_Sk::get_sk", "ik out of range");
     }
-    return this->LM_->Sloc.data();
+    return dynamic_cast<hamilt::HamiltLCAO<double, double>*>(this->p_hamilt_)->getSk();
 }
 
 template <>
@@ -31,13 +31,15 @@ std::complex<double>* Output_Sk<std::complex<double>>::get_Sk(int ik)
     if (this->nspin_ == 4)
     {
         dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(this->p_hamilt_)
-            ->updateSk(ik, this->LM_, 1);
+            ->updateSk(ik, 1);
+        return dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(this->p_hamilt_)->getSk();
     }
     else
     {
-        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(this->p_hamilt_)->updateSk(ik, this->LM_, 1);
+        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(this->p_hamilt_)->updateSk(ik, 1);
+        return dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(this->p_hamilt_)->getSk();
     }
-    return this->LM_->Sloc2.data();
+    return nullptr;
 }
 
 template class Output_Sk<double>;
diff --git a/source/module_io/write_Vxc.hpp b/source/module_io/write_Vxc.hpp
index d8b395e773..2b6b923566 100644
--- a/source/module_io/write_Vxc.hpp
+++ b/source/module_io/write_Vxc.hpp
@@ -234,7 +234,7 @@ namespace ModuleIO
         std::vector<hamilt::HContainer<TR>> vxcs_R_ao(nspin0, hamilt::HContainer<TR>(pv));
         for (int is = 0;is < nspin0;++is) vxcs_R_ao[is].set_zero();
         // k (size for each k-point)
-        std::vector<TK> vxc_k_ao(pv->nloc);
+        hamilt::HS_Matrix_K<TK> vxc_k_ao(pv, 1); // only hk is needed, sk is skipped
 
         // 3. allocate operators and contribute HR
         // op (corresponding to hR)
@@ -248,26 +248,24 @@ namespace ModuleIO
             vxcs_op_ao[is] = new hamilt::Veff<hamilt::OperatorLCAO<TK, TR>>(
 					gint, 
 					&loc, 
-					&lm, 
+					&vxc_k_ao, 
 					kv.kvec_d, 
 					potxc, 
 					&vxcs_R_ao[is], 
-					&vxc_k_ao, 
 					&ucell, 
-					&gd, 
-					pv);
+					&gd);
 
             vxcs_op_ao[is]->contributeHR();
         }
         std::vector<std::vector<double>> e_orb_locxc; // orbital energy (local XC)
         std::vector<std::vector<double>> e_orb_tot; // orbital energy (total)
 #ifdef __EXX
-        hamilt::OperatorEXX<hamilt::OperatorLCAO<TK, TR>> vexx_op_ao(&lm, nullptr, &vxc_k_ao, kv);
-        std::vector<TK> vexxonly_k_ao(pv->nloc);
-        hamilt::OperatorEXX<hamilt::OperatorLCAO<TK, TR>> vexxonly_op_ao(&lm, nullptr, &vexxonly_k_ao, kv);
+        hamilt::OperatorEXX<hamilt::OperatorLCAO<TK, TR>> vexx_op_ao(&vxc_k_ao, &lm, nullptr, kv);
+        hamilt::HS_Matrix_K<TK> vexxonly_k_ao(pv, 1); // only hk is needed, sk is skipped
+        hamilt::OperatorEXX<hamilt::OperatorLCAO<TK, TR>> vexxonly_op_ao(&vexxonly_k_ao, &lm, nullptr, kv);
         std::vector<std::vector<double>> e_orb_exx; // orbital energy (EXX)
 #endif
-        hamilt::OperatorDFTU<hamilt::OperatorLCAO<TK, TR>> vdftu_op_ao(&lm, kv.kvec_d, nullptr, &vxc_k_ao, kv.isk);
+        hamilt::OperatorDFTU<hamilt::OperatorLCAO<TK, TR>> vdftu_op_ao(&vxc_k_ao, kv.kvec_d, nullptr, kv.isk);
 
         //4. calculate and write the MO-matrix Exc
         Parallel_2D p2d;
@@ -279,19 +277,19 @@ namespace ModuleIO
         // ======test=======
         for (int ik = 0;ik < kv.get_nks();++ik)
         {
-            ModuleBase::GlobalFunc::ZEROS(vxc_k_ao.data(), pv->nloc);
+            vxc_k_ao.set_zero_hk();
             int is = kv.isk[ik];
             dynamic_cast<hamilt::OperatorLCAO<TK, TR>*>(vxcs_op_ao[is])->contributeHk(ik);
-            const std::vector<TK>& vlocxc_k_mo = cVc(vxc_k_ao.data(), &psi(ik, 0, 0), nbasis, nbands, *pv, p2d);
+            const std::vector<TK>& vlocxc_k_mo = cVc(vxc_k_ao.get_hk(), &psi(ik, 0, 0), nbasis, nbands, *pv, p2d);
 
 #ifdef __EXX
             if (GlobalC::exx_info.info_global.cal_exx)
             {
                 e_orb_locxc.emplace_back(orbital_energy(ik, nbands, vlocxc_k_mo, p2d));
-                ModuleBase::GlobalFunc::ZEROS(vexxonly_k_ao.data(), pv->nloc);
+                ModuleBase::GlobalFunc::ZEROS(vexxonly_k_ao.get_hk(), pv->nloc);
                 vexx_op_ao.contributeHk(ik);
                 vexxonly_op_ao.contributeHk(ik);
-                std::vector<TK> vexx_k_mo = cVc(vexxonly_k_ao.data(), &psi(ik, 0, 0), nbasis, nbands, *pv, p2d);
+                std::vector<TK> vexx_k_mo = cVc(vexxonly_k_ao.get_hk(), &psi(ik, 0, 0), nbasis, nbands, *pv, p2d);
                 e_orb_exx.emplace_back(orbital_energy(ik, nbands, vexx_k_mo, p2d));
             }
             // ======test=======
@@ -302,7 +300,7 @@ namespace ModuleIO
 			{
 				vdftu_op_ao.contributeHk(ik);
 			}
-            const std::vector<TK>& vxc_tot_k_mo = cVc(vxc_k_ao.data(), &psi(ik, 0, 0), nbasis, nbands, *pv, p2d);
+            const std::vector<TK>& vxc_tot_k_mo = cVc(vxc_k_ao.get_hk(), &psi(ik, 0, 0), nbasis, nbands, *pv, p2d);
             e_orb_tot.emplace_back(orbital_energy(ik, nbands, vxc_tot_k_mo, p2d));
             // write
             ModuleIO::save_mat(-1, vxc_tot_k_mo.data(), nbands,
diff --git a/source/module_io/write_dos_lcao.cpp b/source/module_io/write_dos_lcao.cpp
index 9203e9f969..81c30a9462 100644
--- a/source/module_io/write_dos_lcao.cpp
+++ b/source/module_io/write_dos_lcao.cpp
@@ -47,6 +47,9 @@ void ModuleIO::write_dos_lcao(const psi::Psi<double>* psi,
         nspin0 = 2;
     }
 
+    // get the date pointer of SK
+    const double* sk = dynamic_cast<const hamilt::HamiltLCAO<double, double>*>(p_ham)->getSk();
+
     // find the maximal and minimal band energy.
     double emax = ekb(0, 0);
     double emin = ekb(0, 0);
@@ -151,7 +154,7 @@ void ModuleIO::write_dos_lcao(const psi::Psi<double>* psi,
                     &GlobalV::NLOCAL,
                     &GlobalV::NLOCAL,
                     &one_float,
-                    lm.Sloc.data(),
+                    sk,
                     &one_int,
                     &one_int,
                     pv.desc,
@@ -442,16 +445,16 @@ void ModuleIO::write_dos_lcao(const psi::Psi<std::complex<double>>* psi,
             if (is == kv.isk[ik])
             {
                 // calculate SK for current k point
-                // the target matrix is LM->Sloc2 with collumn-major
+                const std::complex<double>* sk = nullptr;
                 if (GlobalV::NSPIN == 4)
                 {
-                    dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->updateSk(ik,
-                                                                                                                   &lm,
-                                                                                                                   1);
+                    dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->updateSk(ik, 1);
+                    sk = dynamic_cast<const hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->getSk();
                 }
                 else
                 {
-                    dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->updateSk(ik, &lm, 1);
+                    dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->updateSk(ik, 1);
+                    sk = dynamic_cast<const hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->getSk();
                 }
 
                 psi->fix_k(ik);
@@ -489,7 +492,7 @@ void ModuleIO::write_dos_lcao(const psi::Psi<std::complex<double>>* psi,
                             &GlobalV::NLOCAL,
                             &GlobalV::NLOCAL,
                             &one_float[0],
-                            lm.Sloc2.data(),
+                            sk,
                             &one_int,
                             &one_int,
                             pv.desc,
diff --git a/source/module_io/write_proj_band_lcao.cpp b/source/module_io/write_proj_band_lcao.cpp
index 0ac97a0062..32425dc130 100644
--- a/source/module_io/write_proj_band_lcao.cpp
+++ b/source/module_io/write_proj_band_lcao.cpp
@@ -21,6 +21,8 @@ void ModuleIO::write_proj_band_lcao(
     ModuleBase::timer::tick("ModuleIO", "write_proj_band_lcao");
 
     const Parallel_Orbitals* pv = lm.ParaV;
+    // get the date pointer of SK
+    const double* sk = dynamic_cast<const hamilt::HamiltLCAO<double, double>*>(p_ham)->getSk();
 
     int nspin0 = 1;
     if (GlobalV::NSPIN == 2)
@@ -61,7 +63,7 @@ void ModuleIO::write_proj_band_lcao(
                 &GlobalV::NLOCAL,
                 &GlobalV::NLOCAL,
                 &one_float,
-                lm.Sloc.data(),
+                sk,
                 &one_int,
                 &one_int,
                 pv->desc,
@@ -207,16 +209,16 @@ void ModuleIO::write_proj_band_lcao(
                 if (is == kv.isk[ik])
                 {
                     // calculate SK for current k point
-                    // the target matrix is LM->Sloc2 with collumn-major
+                    const std::complex<double>* sk = nullptr;
                     if (GlobalV::NSPIN == 4)
                     {
-                        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->updateSk(
-                          ik, &lm, 1);
+                        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->updateSk(ik, 1);
+                        sk = dynamic_cast<const hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->getSk();
                     }
                     else
                     {
-                        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->updateSk(
-                          ik, &lm, 1);
+                        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->updateSk(ik, 1);
+                        sk = dynamic_cast<const hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->getSk();
                     }
 
                     // calculate Mulk
@@ -241,7 +243,7 @@ void ModuleIO::write_proj_band_lcao(
                             &GlobalV::NLOCAL,
                             &GlobalV::NLOCAL,
                             &one_float[0],
-                            lm.Sloc2.data(),
+                            sk,
                             &one_int,
                             &one_int,
                             pv->desc,
diff --git a/source/module_ri/RI_2D_Comm.h b/source/module_ri/RI_2D_Comm.h
index 1c52b41458..d4be823ddf 100644
--- a/source/module_ri/RI_2D_Comm.h
+++ b/source/module_ri/RI_2D_Comm.h
@@ -43,7 +43,7 @@ namespace RI_2D_Comm
         const double alpha,
         const std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>>& Hs,
         const Parallel_Orbitals& pv,
-        std::vector<TK>& Hloc);
+        TK* hk);
 
 	template<typename Tdata>
 	extern std::vector<std::vector<Tdata>> Hexxs_to_Hk(
diff --git a/source/module_ri/RI_2D_Comm.hpp b/source/module_ri/RI_2D_Comm.hpp
index 182c47d032..893316f6fe 100644
--- a/source/module_ri/RI_2D_Comm.hpp
+++ b/source/module_ri/RI_2D_Comm.hpp
@@ -101,7 +101,7 @@ void RI_2D_Comm::add_Hexx(
 	const double alpha,
 	const std::vector<std::map<TA,std::map<TAC,RI::Tensor<Tdata>>>> &Hs,
     const Parallel_Orbitals& pv,
-    std::vector<TK>& Hloc)
+    TK* hk)
 {
 	ModuleBase::TITLE("RI_2D_Comm","add_Hexx");
 	ModuleBase::timer::tick("RI_2D_Comm", "add_Hexx");
@@ -129,7 +129,7 @@ void RI_2D_Comm::add_Hexx(
 					{
 						const int iwt1 = RI_2D_Comm::get_iwt(iat1, iw1_b, is1_b);
                         if (pv.global2local_col(iwt1) < 0)	continue;
-                        LCAO_Matrix::set_mat2d(iwt0, iwt1, RI::Global_Func::convert<TK>(H(iw0_b, iw1_b)) * RI::Global_Func::convert<TK>(frac), pv, Hloc.data());
+                        LCAO_Matrix::set_mat2d(iwt0, iwt1, RI::Global_Func::convert<TK>(H(iw0_b, iw1_b)) * RI::Global_Func::convert<TK>(frac), pv, hk);
 					}
 				}
 			}

From 1211fc31b7057facd54ed26b53c018c888cf84e2 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci-lite[bot]"
 <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com>
Date: Fri, 28 Jun 2024 11:46:00 +0000
Subject: [PATCH 02/11] [pre-commit.ci lite] apply automatic fixes

---
 source/module_esolver/esolver_ks_lcao.cpp     |   2 +-
 .../esolver_ks_lcao_tmpfunc.cpp               | 146 ++--
 source/module_hamilt_general/matrixblock.h    |   3 +-
 .../hamilt_lcaodft/FORCE_k.cpp                |   5 +-
 .../hamilt_lcaodft/LCAO_matrix.h              |   1 -
 .../hamilt_lcaodft/LCAO_nnr.cpp               | 504 +++++++-------
 .../hamilt_lcaodft/hamilt_lcao.cpp            |   7 +-
 .../hamilt_lcaodft/hamilt_lcao.h              |  27 +-
 .../hamilt_lcaodft/hs_matrix_k.hpp            |  66 +-
 .../operator_lcao/deepks_lcao.cpp             |   2 +-
 .../operator_lcao/deepks_lcao.h               |   1 -
 .../operator_lcao/dftu_lcao.cpp               |   2 +-
 .../operator_lcao/ekinetic_new.cpp            |   2 +-
 .../operator_lcao/meta_lcao.cpp               |   3 +-
 .../hamilt_lcaodft/operator_lcao/meta_lcao.h  |  13 +-
 .../operator_lcao/nonlocal_new.cpp            |   2 +-
 .../operator_lcao/op_dftu_lcao.cpp            |  13 +-
 .../operator_lcao/op_dftu_lcao.h              |   7 +-
 .../operator_lcao/op_exx_lcao.cpp             |  28 +-
 .../operator_lcao/op_exx_lcao.h               |  38 +-
 .../operator_lcao/op_exx_lcao.hpp             | 196 +++---
 .../operator_lcao/operator_lcao.cpp           | 239 ++++---
 .../operator_lcao/operator_lcao.h             |  37 +-
 .../operator_lcao/overlap_new.cpp             |   2 +-
 .../operator_lcao/sc_lambda_lcao.cpp          |  16 +-
 .../operator_lcao/sc_lambda_lcao.h            |  10 +-
 .../operator_lcao/td_ekinetic_lcao.cpp        |   4 +-
 .../operator_lcao/td_ekinetic_lcao.h          |  54 +-
 .../operator_lcao/test/test_dftu.cpp          |   3 +-
 .../operator_lcao/test/test_ekineticnew.cpp   |  11 +-
 .../operator_lcao/test/test_nonlocalnew.cpp   |  11 +-
 .../operator_lcao/test/test_overlapnew.cpp    |   3 +-
 .../test/test_sc_lambda_lcao.cpp              |  49 +-
 .../operator_lcao/veff_lcao.cpp               |  47 +-
 .../hamilt_lcaodft/operator_lcao/veff_lcao.h  |  64 +-
 .../module_deltaspin/cal_h_lambda.cpp         |  30 +-
 .../module_deltaspin/cal_mw.cpp               |  17 +-
 .../module_deltaspin/spin_constrain.h         | 140 ++--
 .../test/cal_h_lambda_test.cpp                |  70 +-
 .../test/template_helpers_test.cpp            |   6 +-
 source/module_hamilt_lcao/module_dftu/dftu.h  | 191 ++---
 .../module_dftu/dftu_folding.cpp              | 128 ++--
 .../module_dftu/dftu_hamilt.cpp               | 185 +++--
 .../module_dftu/dftu_occup.cpp                | 128 ++--
 .../module_hcontainer/hcontainer.h            |   5 +-
 source/module_io/output_sk.cpp                |   3 +-
 source/module_io/write_Vxc.hpp                | 654 ++++++++++--------
 source/module_io/write_dos_lcao.cpp           |   6 +-
 source/module_io/write_proj_band_lcao.cpp     | 221 +++---
 source/module_ri/RI_2D_Comm.h                 |  88 ++-
 source/module_ri/RI_2D_Comm.hpp               | 322 +++++----
 51 files changed, 2023 insertions(+), 1789 deletions(-)

diff --git a/source/module_esolver/esolver_ks_lcao.cpp b/source/module_esolver/esolver_ks_lcao.cpp
index c00713bd21..b2e720e4a1 100644
--- a/source/module_esolver/esolver_ks_lcao.cpp
+++ b/source/module_esolver/esolver_ks_lcao.cpp
@@ -390,7 +390,7 @@ void ESolver_KS_LCAO<TK, TR>::cal_stress(ModuleBase::matrix& stress)
 //! mohan add 2024-05-11
 //------------------------------------------------------------------------------
 template <typename TK, typename TR>
-void ESolver_KS_LCAO<TK, TR>::after_all_runners(void)
+void ESolver_KS_LCAO<TK, TR>::after_all_runners()
 {
     ModuleBase::TITLE("ESolver_KS_LCAO", "after_all_runners");
     ModuleBase::timer::tick("ESolver_KS_LCAO", "after_all_runners");
diff --git a/source/module_esolver/esolver_ks_lcao_tmpfunc.cpp b/source/module_esolver/esolver_ks_lcao_tmpfunc.cpp
index 7a5f9fdbcd..7dc536f139 100644
--- a/source/module_esolver/esolver_ks_lcao_tmpfunc.cpp
+++ b/source/module_esolver/esolver_ks_lcao_tmpfunc.cpp
@@ -8,104 +8,70 @@
 namespace ModuleESolver
 {
 
-    using namespace std;
+using namespace std;
 
-    //! dftu occupation matrix for gamma only using dm(double)
-    template <>
-    void ESolver_KS_LCAO<double, double>::dftu_cal_occup_m(
-         const int& iter, 
-         const vector<vector<double>>& dm)const
-    {
-		GlobalC::dftu.cal_occup_m_gamma(
-				iter, 
-				dm, 
-				this->p_chgmix->get_mixing_beta(),
-                this->p_hamilt);
-    }
+//! dftu occupation matrix for gamma only using dm(double)
+template <>
+void ESolver_KS_LCAO<double, double>::dftu_cal_occup_m(const int& iter, const vector<vector<double>>& dm) const
+{
+    GlobalC::dftu.cal_occup_m_gamma(iter, dm, this->p_chgmix->get_mixing_beta(), this->p_hamilt);
+}
 
-    //! dftu occupation matrix for multiple k-points using dm(complex)
-    template <>
-    void ESolver_KS_LCAO<complex<double>, double>::dftu_cal_occup_m(
-       const int& iter, 
-       const vector<vector<complex<double>>>& dm)const
-    {
-        GlobalC::dftu.cal_occup_m_k(
-                 iter, 
-                 dm, 
-                 this->kv, 
-                 this->p_chgmix->get_mixing_beta(), 
-                 this->p_hamilt);
-    }
+//! dftu occupation matrix for multiple k-points using dm(complex)
+template <>
+void ESolver_KS_LCAO<complex<double>, double>::dftu_cal_occup_m(const int& iter,
+                                                                const vector<vector<complex<double>>>& dm) const
+{
+    GlobalC::dftu.cal_occup_m_k(iter, dm, this->kv, this->p_chgmix->get_mixing_beta(), this->p_hamilt);
+}
 
-    //! dftu occupation matrix
-    template <>
-    void ESolver_KS_LCAO<complex<double>, complex<double>>::dftu_cal_occup_m(
-       const int& iter, 
-       const vector<vector<complex<double>>>& dm)const
-    {
-        GlobalC::dftu.cal_occup_m_k(
-                 iter, 
-                 dm, 
-                 this->kv, 
-                 this->p_chgmix->get_mixing_beta(), 
-                 this->p_hamilt);
-    }
+//! dftu occupation matrix
+template <>
+void ESolver_KS_LCAO<complex<double>, complex<double>>::dftu_cal_occup_m(
+    const int& iter,
+    const vector<vector<complex<double>>>& dm) const
+{
+    GlobalC::dftu.cal_occup_m_k(iter, dm, this->kv, this->p_chgmix->get_mixing_beta(), this->p_hamilt);
+}
 
 #ifdef __DEEPKS
-    template<>
-    void ESolver_KS_LCAO<double, double>::dpks_cal_e_delta_band(
-       const vector<vector<double>>& dm)const
-    {
-        GlobalC::ld.cal_e_delta_band(dm);
-    }
-
-
-    template<>
-    void ESolver_KS_LCAO<complex<double>, double>::dpks_cal_e_delta_band(
-        const vector<vector<complex<double>>>& dm)const
-    {
-        GlobalC::ld.cal_e_delta_band_k(dm, this->kv.get_nks());
-    }
-
-
-    template<>
-    void ESolver_KS_LCAO<complex<double>, complex<double>>::dpks_cal_e_delta_band(
-        const vector<vector<complex<double>>>& dm)const
-    {
-        GlobalC::ld.cal_e_delta_band_k(dm, this->kv.get_nks());
-    }
-
+template <>
+void ESolver_KS_LCAO<double, double>::dpks_cal_e_delta_band(const vector<vector<double>>& dm) const
+{
+    GlobalC::ld.cal_e_delta_band(dm);
+}
 
-    template<>
-    void ESolver_KS_LCAO<double, double>::dpks_cal_projected_DM(
-        const elecstate::DensityMatrix<double, double>* dm)const
-    {
-        GlobalC::ld.cal_projected_DM(dm,
-            GlobalC::ucell,
-            GlobalC::ORB,
-            GlobalC::GridD);
-    }
+template <>
+void ESolver_KS_LCAO<complex<double>, double>::dpks_cal_e_delta_band(const vector<vector<complex<double>>>& dm) const
+{
+    GlobalC::ld.cal_e_delta_band_k(dm, this->kv.get_nks());
+}
 
+template <>
+void ESolver_KS_LCAO<complex<double>, complex<double>>::dpks_cal_e_delta_band(
+    const vector<vector<complex<double>>>& dm) const
+{
+    GlobalC::ld.cal_e_delta_band_k(dm, this->kv.get_nks());
+}
 
-    template<>
-    void ESolver_KS_LCAO<complex<double>, double>::dpks_cal_projected_DM(
-          const elecstate::DensityMatrix<complex<double>, double>* dm)const
-    {
-        GlobalC::ld.cal_projected_DM_k(dm,
-            GlobalC::ucell,
-            GlobalC::ORB,
-            GlobalC::GridD);
-    }
+template <>
+void ESolver_KS_LCAO<double, double>::dpks_cal_projected_DM(const elecstate::DensityMatrix<double, double>* dm) const
+{
+    GlobalC::ld.cal_projected_DM(dm, GlobalC::ucell, GlobalC::ORB, GlobalC::GridD);
+}
 
+template <>
+void ESolver_KS_LCAO<complex<double>, double>::dpks_cal_projected_DM(
+    const elecstate::DensityMatrix<complex<double>, double>* dm) const
+{
+    GlobalC::ld.cal_projected_DM_k(dm, GlobalC::ucell, GlobalC::ORB, GlobalC::GridD);
+}
 
-    template<>
-    void ESolver_KS_LCAO<complex<double>, complex<double>>::dpks_cal_projected_DM(
-          const elecstate::DensityMatrix<complex<double>, double>* dm)const
-    {
-        GlobalC::ld.cal_projected_DM_k(dm,
-            GlobalC::ucell,
-            GlobalC::ORB,
-            GlobalC::GridD);
-    }
-#endif
+template <>
+void ESolver_KS_LCAO<complex<double>, complex<double>>::dpks_cal_projected_DM(
+    const elecstate::DensityMatrix<complex<double>, double>* dm) const
+{
+    GlobalC::ld.cal_projected_DM_k(dm, GlobalC::ucell, GlobalC::ORB, GlobalC::GridD);
 }
+#endif
+} // namespace ModuleESolver
diff --git a/source/module_hamilt_general/matrixblock.h b/source/module_hamilt_general/matrixblock.h
index 481a95b8a5..32979d2df3 100644
--- a/source/module_hamilt_general/matrixblock.h
+++ b/source/module_hamilt_general/matrixblock.h
@@ -5,7 +5,8 @@
 namespace hamilt
 {
 
-template <typename T> struct MatrixBlock
+template <typename T>
+struct MatrixBlock
 {
     /* this is a simple template block of a matrix
        would change to Eigen in the future */
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp
index 56953592fd..a8d2af9dda 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp
@@ -143,8 +143,7 @@ void Force_LCAO<std::complex<double>>::allocate(const Parallel_Orbitals& pv,
     {
         cal_deri = false;
 
-        ModuleBase::WARNING_QUIT("cal_syns",
-                                 "This function has been broken and will be fixed later.");
+        ModuleBase::WARNING_QUIT("cal_syns", "This function has been broken and will be fixed later.");
 
         LCAO_domain::build_ST_new(lm,
                                   fsr,
@@ -161,7 +160,7 @@ void Force_LCAO<std::complex<double>>::allocate(const Parallel_Orbitals& pv,
 
         for (int ik = 0; ik < nks; ik++)
         {
-            
+
             bool bit = false; // LiuXh, 2017-03-21
             /*ModuleIO::save_mat(0,
                                lm.Hloc2.data(),
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h
index 1ac8af26a2..d0f6b909b2 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h
@@ -34,7 +34,6 @@ class LCAO_Matrix
 #endif
 
   public:
-
     // LiuXh add 2019-07-15
     double**** Hloc_fixedR_tr;
     double**** SlocR_tr;
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nnr.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nnr.cpp
index 13399b6722..6bdc290ab3 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nnr.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nnr.cpp
@@ -1,284 +1,292 @@
-#include "module_hamilt_pw/hamilt_pwdft/global.h"
-#include "module_cell/module_neighbor/sltk_grid_driver.h"
-#include "record_adj.h" //mohan add 2012-07-06
 #include "module_base/timer.h"
 #include "module_base/tool_threading.h"
 #include "module_cell/module_neighbor/sltk_grid_driver.h"
+#include "module_hamilt_pw/hamilt_pwdft/global.h"
+#include "record_adj.h" //mohan add 2012-07-06
 #ifdef __DEEPKS
 #include "module_hamilt_lcao/module_deepks/LCAO_deepks.h"
 #endif
 #include "module_base/libm/libm.h"
+
 #include <algorithm>
 
-// This is for cell R dependent part. 
+// This is for cell R dependent part.
 void Grid_Technique::cal_nnrg(Parallel_Orbitals* pv)
 {
-	ModuleBase::TITLE("LCAO_nnr","cal_nnrg");
-
-	this->cal_max_box_index();
-
-	this->nnrg = 0;
-
-	nlocdimg.clear();
-	nlocstartg.clear();
-	nad.clear();
-	this->nnrg_index.resize(0);
-	
-	this->nad = std::vector<int>(GlobalC::ucell.nat, 0);
-	this->nlocdimg = std::vector<int>(GlobalC::ucell.nat, 0);
-	this->nlocstartg =std::vector<int>(GlobalC::ucell.nat, 0);
-
-	ModuleBase::Vector3<double> tau1, tau2, dtau;
-	ModuleBase::Vector3<double> dtau1, dtau2, tau0;
-	for (int T1 = 0; T1 < GlobalC::ucell.ntype; ++T1)
-	{
-		Atom* atom1 = &GlobalC::ucell.atoms[T1];
-		for (int I1 = 0; I1 < atom1->na; ++I1)
-		{
-			tau1 = atom1->tau[I1];
-
-			GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1);
-
-			const int iat = GlobalC::ucell.itia2iat(T1,I1);
-
-			// for grid integration (on FFT box),
-			// we only need to consider <phi_i | phi_j>,
-			// which is different from non-local term,
-			// which we need to consdier <phi_i|beta_k><beta_k|phi_j>
-
-			// whether this atom is in this processor.
-			if(this->in_this_processor[iat])
-			{
-				// starting index of adjacents.
-				this->nlocstartg[iat] = this->nnrg;
-
-				// number of adjacent atoms for atom 'iat'
-				this->nad[iat] = 0;
-
-				int count = 0;
-				for (int ad = 0; ad < GlobalC::GridD.getAdjacentNum()+1; ++ad)
-				{
-					const int T2 = GlobalC::GridD.getType(ad);
-					const int I2 = GlobalC::GridD.getNatom(ad);
-					const int iat2 = GlobalC::ucell.itia2iat(T2, I2);
-					Atom* atom2 = &GlobalC::ucell.atoms[T2]; 
-
-					// if the adjacent atom is in this processor.
-					if(this->in_this_processor[iat2])
-					{
-						tau2 = GlobalC::GridD.getAdjacentTau(ad);
-						dtau = GlobalC::GridD.getAdjacentTau(ad) - tau1;
-						double distance = dtau.norm() * GlobalC::ucell.lat0;
-						double rcut = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ORB.Phi[T2].getRcut();
-
-
-						//if(distance < rcut)
-						// mohan reset this 2013-07-02 in Princeton
-						// we should make absolutely sure that the distance is smaller than GlobalC::ORB.Phi[it].getRcut
-						// this should be consistant with LCAO_nnr::cal_nnrg function 
-						// typical example : 7 Bohr cutoff Si orbital in 14 Bohr length of cell.
-						// distance = 7.0000000000000000
-						// GlobalC::ORB.Phi[it].getRcut = 7.0000000000000008
-						if(distance < rcut - 1.0e-15)
-						{
-							//storing the indexed for nnrg
-							const int mu = pv->global2local_row(iat);
-							const int nu = pv->global2local_col(iat2);
-							this->nnrg_index.push_back(gridIntegral::gridIndex{this->nnrg, mu, nu, GlobalC::GridD.getBox(ad), atom1->nw, atom2->nw});
-							
-							const int nelement = atom1->nw * atom2->nw;
-							this->nnrg += nelement;
-							this->nlocdimg[iat] += nelement; 
-							this->nad[iat]++;
-							++count;
-						}
-					}// end iat2
-				}// end ad
-//				GlobalV::ofs_running << " iat=" << iat << " nlocstartg=" << nlocstartg[iat] << " nad=" << nad[iat] << std::endl;
-			}// end iat
-		}// end I1
-	}// end T1
-
-	if(GlobalV::OUT_LEVEL != "m") ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"nnrg",this->nnrg);
-
-	//--------------------------------------------------
-	// search again, to order each (iat2, b1, b2, b3)
-	// find_R2 is used to target DM_R.
-	// because DM_R is allocated with nnrg.
-	// So once we had dR = R2 - R1 and iat2,
-	// we need to find out the corresponding positions
-	// in DM_R
-	//--------------------------------------------------
-	if(allocate_find_R2)
-	{
-		find_R2.clear();
-		find_R2st.clear();
-		find_R2_sorted_index.clear();
-		allocate_find_R2 = false;
-	}
-	this->find_R2.resize(GlobalC::ucell.nat);
-	this->find_R2st.resize(GlobalC::ucell.nat);
-	this->find_R2_sorted_index.resize(GlobalC::ucell.nat);
-	for (int iat = 0; iat < GlobalC::ucell.nat; iat++)
-	{
-		this->find_R2_sorted_index[iat].resize(nad[iat]);
-		std::fill(this->find_R2_sorted_index[iat].begin(), this->find_R2_sorted_index[iat].end(), 0);
-		this->find_R2st[iat].resize(nad[iat]);
-		std::fill(this->find_R2st[iat].begin(), this->find_R2st[iat].end(), 0);
-		this->find_R2[iat].resize(nad[iat]);
-		std::fill(this->find_R2[iat].begin(), this->find_R2[iat].end(), 0);
-	}
-
-	allocate_find_R2 = true;
-
-	for (int T1 = 0; T1 < GlobalC::ucell.ntype; T1++)
-	{
-		for (int I1 = 0; I1 < GlobalC::ucell.atoms[T1].na; I1++)
-		{
-//			std::cout << " T1=" << T1 << " I1=" << I1 << std::endl; 
-			tau1 = GlobalC::ucell.atoms[T1].tau[I1];
-			GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1);
-			const int iat = GlobalC::ucell.itia2iat(T1,I1);
-
-//			std::cout << " Number of adjacent = " << GlobalC::GridD.getAdjacentNum()+1 << std::endl;
-			
-			int count=0;
-			for (int ad = 0; ad < GlobalC::GridD.getAdjacentNum()+1; ad++)
-			{
-		//		std::cout << " ad=" << ad << std::endl;
-				const int T2 = GlobalC::GridD.getType(ad);
-				const int I2 = GlobalC::GridD.getNatom(ad);
-				const int iat2 = GlobalC::ucell.itia2iat(T2,I2);
-
-				// if this atom is in this processor.
-				if(this->in_this_processor[iat])
-				{
-					if(this->in_this_processor[iat2])
-					{
-						dtau = GlobalC::GridD.getAdjacentTau(ad) - tau1;
+    ModuleBase::TITLE("LCAO_nnr", "cal_nnrg");
+
+    this->cal_max_box_index();
+
+    this->nnrg = 0;
+
+    nlocdimg.clear();
+    nlocstartg.clear();
+    nad.clear();
+    this->nnrg_index.resize(0);
+
+    this->nad = std::vector<int>(GlobalC::ucell.nat, 0);
+    this->nlocdimg = std::vector<int>(GlobalC::ucell.nat, 0);
+    this->nlocstartg = std::vector<int>(GlobalC::ucell.nat, 0);
+
+    ModuleBase::Vector3<double> tau1, tau2, dtau;
+    ModuleBase::Vector3<double> dtau1, dtau2, tau0;
+    for (int T1 = 0; T1 < GlobalC::ucell.ntype; ++T1)
+    {
+        Atom* atom1 = &GlobalC::ucell.atoms[T1];
+        for (int I1 = 0; I1 < atom1->na; ++I1)
+        {
+            tau1 = atom1->tau[I1];
+
+            GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1);
+
+            const int iat = GlobalC::ucell.itia2iat(T1, I1);
+
+            // for grid integration (on FFT box),
+            // we only need to consider <phi_i | phi_j>,
+            // which is different from non-local term,
+            // which we need to consdier <phi_i|beta_k><beta_k|phi_j>
+
+            // whether this atom is in this processor.
+            if (this->in_this_processor[iat])
+            {
+                // starting index of adjacents.
+                this->nlocstartg[iat] = this->nnrg;
+
+                // number of adjacent atoms for atom 'iat'
+                this->nad[iat] = 0;
+
+                int count = 0;
+                for (int ad = 0; ad < GlobalC::GridD.getAdjacentNum() + 1; ++ad)
+                {
+                    const int T2 = GlobalC::GridD.getType(ad);
+                    const int I2 = GlobalC::GridD.getNatom(ad);
+                    const int iat2 = GlobalC::ucell.itia2iat(T2, I2);
+                    Atom* atom2 = &GlobalC::ucell.atoms[T2];
+
+                    // if the adjacent atom is in this processor.
+                    if (this->in_this_processor[iat2])
+                    {
+                        tau2 = GlobalC::GridD.getAdjacentTau(ad);
+                        dtau = GlobalC::GridD.getAdjacentTau(ad) - tau1;
+                        double distance = dtau.norm() * GlobalC::ucell.lat0;
+                        double rcut = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ORB.Phi[T2].getRcut();
+
+                        // if(distance < rcut)
+                        //  mohan reset this 2013-07-02 in Princeton
+                        //  we should make absolutely sure that the distance is smaller than
+                        //  GlobalC::ORB.Phi[it].getRcut this should be consistant with LCAO_nnr::cal_nnrg function
+                        //  typical example : 7 Bohr cutoff Si orbital in 14 Bohr length of cell.
+                        //  distance = 7.0000000000000000
+                        //  GlobalC::ORB.Phi[it].getRcut = 7.0000000000000008
+                        if (distance < rcut - 1.0e-15)
+                        {
+                            // storing the indexed for nnrg
+                            const int mu = pv->global2local_row(iat);
+                            const int nu = pv->global2local_col(iat2);
+                            this->nnrg_index.push_back(gridIntegral::gridIndex{this->nnrg,
+                                                                               mu,
+                                                                               nu,
+                                                                               GlobalC::GridD.getBox(ad),
+                                                                               atom1->nw,
+                                                                               atom2->nw});
+
+                            const int nelement = atom1->nw * atom2->nw;
+                            this->nnrg += nelement;
+                            this->nlocdimg[iat] += nelement;
+                            this->nad[iat]++;
+                            ++count;
+                        }
+                    } // end iat2
+                }     // end ad
+                      //				GlobalV::ofs_running << " iat=" << iat << " nlocstartg=" << nlocstartg[iat] << " nad=" <<
+                      //nad[iat] << std::endl;
+            }         // end iat
+        }             // end I1
+    }                 // end T1
+
+    if (GlobalV::OUT_LEVEL != "m")
+        ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "nnrg", this->nnrg);
+
+    //--------------------------------------------------
+    // search again, to order each (iat2, b1, b2, b3)
+    // find_R2 is used to target DM_R.
+    // because DM_R is allocated with nnrg.
+    // So once we had dR = R2 - R1 and iat2,
+    // we need to find out the corresponding positions
+    // in DM_R
+    //--------------------------------------------------
+    if (allocate_find_R2)
+    {
+        find_R2.clear();
+        find_R2st.clear();
+        find_R2_sorted_index.clear();
+        allocate_find_R2 = false;
+    }
+    this->find_R2.resize(GlobalC::ucell.nat);
+    this->find_R2st.resize(GlobalC::ucell.nat);
+    this->find_R2_sorted_index.resize(GlobalC::ucell.nat);
+    for (int iat = 0; iat < GlobalC::ucell.nat; iat++)
+    {
+        this->find_R2_sorted_index[iat].resize(nad[iat]);
+        std::fill(this->find_R2_sorted_index[iat].begin(), this->find_R2_sorted_index[iat].end(), 0);
+        this->find_R2st[iat].resize(nad[iat]);
+        std::fill(this->find_R2st[iat].begin(), this->find_R2st[iat].end(), 0);
+        this->find_R2[iat].resize(nad[iat]);
+        std::fill(this->find_R2[iat].begin(), this->find_R2[iat].end(), 0);
+    }
+
+    allocate_find_R2 = true;
+
+    for (int T1 = 0; T1 < GlobalC::ucell.ntype; T1++)
+    {
+        for (int I1 = 0; I1 < GlobalC::ucell.atoms[T1].na; I1++)
+        {
+            //			std::cout << " T1=" << T1 << " I1=" << I1 << std::endl;
+            tau1 = GlobalC::ucell.atoms[T1].tau[I1];
+            GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1);
+            const int iat = GlobalC::ucell.itia2iat(T1, I1);
+
+            //			std::cout << " Number of adjacent = " << GlobalC::GridD.getAdjacentNum()+1 << std::endl;
+
+            int count = 0;
+            for (int ad = 0; ad < GlobalC::GridD.getAdjacentNum() + 1; ad++)
+            {
+                //		std::cout << " ad=" << ad << std::endl;
+                const int T2 = GlobalC::GridD.getType(ad);
+                const int I2 = GlobalC::GridD.getNatom(ad);
+                const int iat2 = GlobalC::ucell.itia2iat(T2, I2);
+
+                // if this atom is in this processor.
+                if (this->in_this_processor[iat])
+                {
+                    if (this->in_this_processor[iat2])
+                    {
+                        dtau = GlobalC::GridD.getAdjacentTau(ad) - tau1;
                         double distance = dtau.norm() * GlobalC::ucell.lat0;
                         double rcut = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ORB.Phi[T2].getRcut();
 
-						const int b1 = GlobalC::GridD.getBox(ad).x;
-						const int b2 = GlobalC::GridD.getBox(ad).y;
-						const int b3 = GlobalC::GridD.getBox(ad).z;
-					
-						// mohan fix bug 2011-06-26, should be '<', not '<='	
-						//			if(distance < rcut)
-
-						// mohan reset this 2013-07-02 in Princeton
-						// we should make absolutely sure that the distance is smaller than GlobalC::ORB.Phi[it].getRcut
-						// this should be consistant with LCAO_nnr::cal_nnrg function 
-						// typical example : 7 Bohr cutoff Si orbital in 14 Bohr length of cell.
-						// distance = 7.0000000000000000
-						// GlobalC::ORB.Phi[it].getRcut = 7.0000000000000008
-						if(distance < rcut - 1.0e-15)
-						{
-						//	assert( count < nad[iat] );
-							//--------------------------------------------------------------
-							// start positions of adjacent atom of 'iat'
-							// note: the first is not zero.
-							//--------------------------------------------------------------
-							find_R2[iat][count] = this->cal_RindexAtom(b1, b2, b3, iat2);
-							find_R2_sorted_index[iat][count] = count;
-
-
-							// find_R2st
-							// note: the first must be zero.
-							// find_R2st: start position of each adjacen atom.
-							if( count + 1 < nad[iat] )
-							{
-								find_R2st[iat][count+1] = find_R2st[iat][count] 
-								+ GlobalC::ucell.atoms[T1].nw * GlobalC::ucell.atoms[T2].nw; //modified by zhengdy-soc
-							}
-							++count;
-						}
-					}
-				}
-			}
- // Include the necessary header file
-
-			std::stable_sort(find_R2_sorted_index[iat].begin(), find_R2_sorted_index[iat].begin() + nad[iat],
-				[&](int pos1, int pos2){ return find_R2[iat][pos1] < find_R2[iat][pos2]; });
-		}
-	}
-
-	return;
+                        const int b1 = GlobalC::GridD.getBox(ad).x;
+                        const int b2 = GlobalC::GridD.getBox(ad).y;
+                        const int b3 = GlobalC::GridD.getBox(ad).z;
+
+                        // mohan fix bug 2011-06-26, should be '<', not '<='
+                        //			if(distance < rcut)
+
+                        // mohan reset this 2013-07-02 in Princeton
+                        // we should make absolutely sure that the distance is smaller than GlobalC::ORB.Phi[it].getRcut
+                        // this should be consistant with LCAO_nnr::cal_nnrg function
+                        // typical example : 7 Bohr cutoff Si orbital in 14 Bohr length of cell.
+                        // distance = 7.0000000000000000
+                        // GlobalC::ORB.Phi[it].getRcut = 7.0000000000000008
+                        if (distance < rcut - 1.0e-15)
+                        {
+                            //	assert( count < nad[iat] );
+                            //--------------------------------------------------------------
+                            // start positions of adjacent atom of 'iat'
+                            // note: the first is not zero.
+                            //--------------------------------------------------------------
+                            find_R2[iat][count] = this->cal_RindexAtom(b1, b2, b3, iat2);
+                            find_R2_sorted_index[iat][count] = count;
+
+                            // find_R2st
+                            // note: the first must be zero.
+                            // find_R2st: start position of each adjacen atom.
+                            if (count + 1 < nad[iat])
+                            {
+                                find_R2st[iat][count + 1]
+                                    = find_R2st[iat][count]
+                                      + GlobalC::ucell.atoms[T1].nw
+                                            * GlobalC::ucell.atoms[T2].nw; // modified by zhengdy-soc
+                            }
+                            ++count;
+                        }
+                    }
+                }
+            }
+            // Include the necessary header file
+
+            std::stable_sort(find_R2_sorted_index[iat].begin(),
+                             find_R2_sorted_index[iat].begin() + nad[iat],
+                             [&](int pos1, int pos2) { return find_R2[iat][pos1] < find_R2[iat][pos2]; });
+        }
+    }
+
+    return;
 }
 
 void Grid_Technique::cal_max_box_index(void)
 {
-	ModuleBase::TITLE("LCAO_nnr","cal_max_box_index");
-	this->maxB1 = this->maxB2 = this->maxB3 = -10000;
-	this->minB1 = this->minB2 = this->minB3 = 10000;
-	for (int T1 = 0; T1 < GlobalC::ucell.ntype; T1++)
-	{
-		for (int I1 = 0; I1 < GlobalC::ucell.atoms[T1].na; I1++)
-		{
-			ModuleBase::Vector3<double> tau1 = GlobalC::ucell.atoms[T1].tau[I1];
-			//GlobalC::GridD.Find_atom(tau1);
-			GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1);
-			for (int ad = 0; ad < GlobalC::GridD.getAdjacentNum()+1; ad++)
-			{
-				this->maxB1 = std::max( GlobalC::GridD.getBox(ad).x, maxB1 ); 
-				this->maxB2 = std::max( GlobalC::GridD.getBox(ad).y, maxB2 ); 
-				this->maxB3 = std::max( GlobalC::GridD.getBox(ad).z, maxB3 ); 
-
-				this->minB1 = std::min( GlobalC::GridD.getBox(ad).x, minB1 ); 
-				this->minB2 = std::min( GlobalC::GridD.getBox(ad).y, minB2 ); 
-				this->minB3 = std::min( GlobalC::GridD.getBox(ad).z, minB3 ); 
-			}
-		}
-	}
-
-	nB1 = maxB1-minB1+1;
-	nB2 = maxB2-minB2+1;
-	nB3 = maxB3-minB3+1;
-
-	nbox = nB1 * nB2 * nB3;
-	
-	//ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"nbox",nbox);
-
-	return;
+    ModuleBase::TITLE("LCAO_nnr", "cal_max_box_index");
+    this->maxB1 = this->maxB2 = this->maxB3 = -10000;
+    this->minB1 = this->minB2 = this->minB3 = 10000;
+    for (int T1 = 0; T1 < GlobalC::ucell.ntype; T1++)
+    {
+        for (int I1 = 0; I1 < GlobalC::ucell.atoms[T1].na; I1++)
+        {
+            ModuleBase::Vector3<double> tau1 = GlobalC::ucell.atoms[T1].tau[I1];
+            // GlobalC::GridD.Find_atom(tau1);
+            GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1);
+            for (int ad = 0; ad < GlobalC::GridD.getAdjacentNum() + 1; ad++)
+            {
+                this->maxB1 = std::max(GlobalC::GridD.getBox(ad).x, maxB1);
+                this->maxB2 = std::max(GlobalC::GridD.getBox(ad).y, maxB2);
+                this->maxB3 = std::max(GlobalC::GridD.getBox(ad).z, maxB3);
+
+                this->minB1 = std::min(GlobalC::GridD.getBox(ad).x, minB1);
+                this->minB2 = std::min(GlobalC::GridD.getBox(ad).y, minB2);
+                this->minB3 = std::min(GlobalC::GridD.getBox(ad).z, minB3);
+            }
+        }
+    }
+
+    nB1 = maxB1 - minB1 + 1;
+    nB2 = maxB2 - minB2 + 1;
+    nB3 = maxB3 - minB3 + 1;
+
+    nbox = nB1 * nB2 * nB3;
+
+    // ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"nbox",nbox);
+
+    return;
 }
 
-int Grid_Technique::cal_RindexAtom(const int &u1, const int &u2, const int &u3, const int &iat2) const
+int Grid_Technique::cal_RindexAtom(const int& u1, const int& u2, const int& u3, const int& iat2) const
 {
-	const int x1 = u1 - this->minB1;
-	const int x2 = u2 - this->minB2;
-	const int x3 = u3 - this->minB3;
-	
-	if(x1<0 || x2<0 || x3<0)
-	{
-		std::cout << " u1=" << u1 << " minB1=" << minB1 << std::endl;
-		std::cout << " u2=" << u2 << " minB2=" << minB2 << std::endl;
-		std::cout << " u3=" << u3 << " minB3=" << minB3 << std::endl;
-		ModuleBase::WARNING_QUIT("LCAO_nnr::cal_Rindex","x1<0 || x2<0 || x3<0 !");
-	}
-
-	assert(x1>=0);
-	assert(x2>=0);
-	assert(x3>=0);
-
-	return (iat2 + (x3 + x2 * this->nB3 + x1 * this->nB2 * this->nB3) * GlobalC::ucell.nat);
+    const int x1 = u1 - this->minB1;
+    const int x2 = u2 - this->minB2;
+    const int x3 = u3 - this->minB3;
+
+    if (x1 < 0 || x2 < 0 || x3 < 0)
+    {
+        std::cout << " u1=" << u1 << " minB1=" << minB1 << std::endl;
+        std::cout << " u2=" << u2 << " minB2=" << minB2 << std::endl;
+        std::cout << " u3=" << u3 << " minB3=" << minB3 << std::endl;
+        ModuleBase::WARNING_QUIT("LCAO_nnr::cal_Rindex", "x1<0 || x2<0 || x3<0 !");
+    }
+
+    assert(x1 >= 0);
+    assert(x2 >= 0);
+    assert(x3 >= 0);
+
+    return (iat2 + (x3 + x2 * this->nB3 + x1 * this->nB2 * this->nB3) * GlobalC::ucell.nat);
 }
 
 int Grid_Technique::binary_search_find_R2_offset(int val, int iat) const
 {
     auto findR2 = this->find_R2[iat];
-	auto findR2_index = this->find_R2_sorted_index[iat];
+    auto findR2_index = this->find_R2_sorted_index[iat];
 
-	int left = 0;
+    int left = 0;
     int right = nad[iat] - 1;
-    while(left <= right)
+    while (left <= right)
     {
         int mid = left + ((right - left) >> 1);
-		int idx = findR2_index[mid];
-        if(val == findR2[idx])
+        int idx = findR2_index[mid];
+        if (val == findR2[idx])
         {
             return idx;
         }
-        if(val < findR2[idx])
+        if (val < findR2[idx])
         {
             right = mid - 1;
         }
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.cpp
index 1f20a0e778..0d4da7422e 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.cpp
@@ -36,7 +36,9 @@ namespace hamilt
 {
 
 template <typename TK, typename TR>
-HamiltLCAO<TK, TR>::HamiltLCAO(const Parallel_Orbitals* paraV, const K_Vectors& kv_in, const TwoCenterIntegrator& intor_overlap_orb)
+HamiltLCAO<TK, TR>::HamiltLCAO(const Parallel_Orbitals* paraV,
+                               const K_Vectors& kv_in,
+                               const TwoCenterIntegrator& intor_overlap_orb)
 {
     this->classname = "HamiltLCAO";
 
@@ -167,8 +169,7 @@ HamiltLCAO<TK, TR>::HamiltLCAO(Gint_Gamma* GG_in,
                                                                     pot_in,
                                                                     this->hR, // no explicit call yet
                                                                     &GlobalC::ucell,
-                                                                    &GlobalC::GridD
-                );
+                                                                    &GlobalC::GridD);
                 this->getOperator()->add(veff);
             }
         }
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h
index e227d41cc5..8a3363f7b5 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h
@@ -6,11 +6,11 @@
 #include "module_elecstate/potentials/potential_new.h"
 #include "module_hamilt_general/hamilt.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp"
 #include "module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h"
 #include "module_hamilt_lcao/module_gint/gint_gamma.h"
 #include "module_hamilt_lcao/module_gint/gint_k.h"
 #include "module_hamilt_lcao/module_hcontainer/hcontainer.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp"
 namespace hamilt
 {
 
@@ -53,14 +53,29 @@ class HamiltLCAO : public Hamilt<TK>
     /// get pointer of Operator<TK> ops
     Operator<TK>*& getOperator();
     /// get hk-pointer
-    TK* getHk() const{return this->hsk->get_hk();}
+    TK* getHk() const
+    {
+        return this->hsk->get_hk();
+    }
     /// get sk-pointer
-    TK* getSk() const{return this->hsk->get_sk();}
-    int get_size_hsk() const{return this->hsk->get_size();}
+    TK* getSk() const
+    {
+        return this->hsk->get_sk();
+    }
+    int get_size_hsk() const
+    {
+        return this->hsk->get_size();
+    }
     /// get HR pointer of *this->hR, which is a HContainer<TR> and contains H(R)
-    HContainer<TR>*& getHR(){return this->hR;}
+    HContainer<TR>*& getHR()
+    {
+        return this->hR;
+    }
     /// get SR pointer of *this->sR, which is a HContainer<TR> and contains S(R)
-    HContainer<TR>*& getSR(){return this->sR;}
+    HContainer<TR>*& getSR()
+    {
+        return this->sR;
+    }
     /// refresh the status of HR
     void refresh() override;
 
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp
index ce3d79b2d6..f80f55749a 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp
@@ -4,29 +4,49 @@
 #include <vector>
 namespace hamilt
 {
-    template <typename TK>
-    class HS_Matrix_K
+template <typename TK>
+class HS_Matrix_K
+{
+  public:
+    HS_Matrix_K(const Parallel_Orbitals* paraV, bool no_s = false)
+    {
+        hk.resize(paraV->nloc);
+        if (!no_s)
+        {
+            sk.resize(paraV->nloc);
+        }
+        this->pv = paraV;
+    }
+    TK* get_hk()
+    {
+        return hk.data();
+    }
+    TK* get_sk()
+    {
+        return sk.data();
+    }
+    int get_size()
     {
-        public:
-            HS_Matrix_K(const Parallel_Orbitals* paraV, bool no_s=false){
-                hk.resize(paraV->nloc);
-                if(!no_s) 
-                {
-                    sk.resize(paraV->nloc);
-                }
-                this->pv = paraV;
-            }
-            TK* get_hk() {return hk.data();}
-            TK* get_sk() {return sk.data();}
-            int get_size() {return hk.size();}
-            void set_zero_hk() {hk.assign(hk.size(), 0);}
-            void set_zero_sk() {sk.assign(sk.size(), 0);}
-            const Parallel_Orbitals* get_pv() const {return this->pv;}
-        private:
-            std::vector<TK> hk;
-            std::vector<TK> sk;
-            const Parallel_Orbitals* pv = nullptr;
-    };
-}
+        return hk.size();
+    }
+    void set_zero_hk()
+    {
+        hk.assign(hk.size(), 0);
+    }
+    void set_zero_sk()
+    {
+        sk.assign(sk.size(), 0);
+    }
+    const Parallel_Orbitals* get_pv() const
+    {
+        return this->pv;
+    }
+
+  private:
+    std::vector<TK> hk;
+    std::vector<TK> sk;
+    const Parallel_Orbitals* pv = nullptr;
+};
+} // namespace hamilt
 
 #endif
\ No newline at end of file
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp
index 28565ecdaf..5be9bba4cb 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp
@@ -49,7 +49,7 @@ void hamilt::DeePKS<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* Gr
     ModuleBase::TITLE("DeePKS", "initialize_HR");
     ModuleBase::timer::tick("DeePKS", "initialize_HR");
 
-    auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
+    auto* paraV = this->hR->get_paraV(); // get parallel orbitals from HR
     // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
 
     // this->H_V_delta = new HContainer<TR>(paraV);
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.h
index b0fd1e0b46..57bccacf85 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.h
@@ -55,7 +55,6 @@ class DeePKS<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 #endif
 
   private:
-
     elecstate::DensityMatrix<TK, double>* DM;
 
     const UnitCell* ucell = nullptr;
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.cpp
index 6d40d8ffef..2186267ba0 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.cpp
@@ -45,7 +45,7 @@ void hamilt::DFTU<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* Grid
     ModuleBase::TITLE("DFTU", "initialize_HR");
     ModuleBase::timer::tick("DFTU", "initialize_HR");
 
-    auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
+    auto* paraV = this->hR->get_paraV(); // get parallel orbitals from HR
     // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
 
     this->adjs_all.clear();
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/ekinetic_new.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/ekinetic_new.cpp
index 0702a14d77..25963b13de 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/ekinetic_new.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/ekinetic_new.cpp
@@ -44,7 +44,7 @@ void hamilt::EkineticNew<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Drive
     ModuleBase::TITLE("EkineticNew", "initialize_HR");
     ModuleBase::timer::tick("EkineticNew", "initialize_HR");
 
-    auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
+    auto* paraV = this->hR->get_paraV(); // get parallel orbitals from HR
     // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
 
     for (int iat1 = 0; iat1 < ucell->nat; iat1++)
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/meta_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/meta_lcao.cpp
index de325a998f..279e679680 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/meta_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/meta_lcao.cpp
@@ -1,4 +1,5 @@
 #include "meta_lcao.h"
+
 #include "module_base/timer.h"
 #include "module_base/tool_title.h"
 #include "module_hamilt_pw/hamilt_pwdft/global.h"
@@ -12,4 +13,4 @@ template class Meta<OperatorLCAO<std::complex<double>, double>>;
 
 template class Meta<OperatorLCAO<std::complex<double>, std::complex<double>>>;
 
-}
\ No newline at end of file
+} // namespace hamilt
\ No newline at end of file
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/meta_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/meta_lcao.h
index 587ea036b8..217fec4593 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/meta_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/meta_lcao.h
@@ -23,10 +23,9 @@ template <typename TK, typename TR>
 class Meta<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 {
   public:
-    Meta<OperatorLCAO<TK, TR>>(
-                          HS_Matrix_K<TK>* hsk_in,
-                          const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
-                          HContainer<TR>* hR_in)
+    Meta<OperatorLCAO<TK, TR>>(HS_Matrix_K<TK>* hsk_in,
+                               const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
+                               HContainer<TR>* hR_in)
         : OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in)
     {
         this->cal_type = calculation_type::lcao_gint;
@@ -34,9 +33,11 @@ class Meta<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 
     ~Meta<OperatorLCAO<TK, TR>>(){};
 
-    virtual void contributeHR() override{}//do nothing now
+    virtual void contributeHR() override
+    {
+    } // do nothing now
 
-    virtual void contributeHk(int ik) override{};//do nothing now
+    virtual void contributeHk(int ik) override{}; // do nothing now
 
   private:
 };
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.cpp
index 7714cea230..d2179f7621 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.cpp
@@ -45,7 +45,7 @@ void hamilt::NonlocalNew<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Drive
     ModuleBase::TITLE("NonlocalNew", "initialize_HR");
     ModuleBase::timer::tick("NonlocalNew", "initialize_HR");
 
-    auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
+    auto* paraV = this->hR->get_paraV(); // get parallel orbitals from HR
     // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
 
     this->adjs_all.clear();
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.cpp
index 924dcf1764..d49d4a09bc 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.cpp
@@ -1,4 +1,5 @@
 #include "op_dftu_lcao.h"
+
 #include "module_base/timer.h"
 #include "module_base/tool_title.h"
 #include "module_hamilt_lcao/module_dftu/dftu.h"
@@ -13,14 +14,14 @@ template class OperatorDFTU<OperatorLCAO<std::complex<double>, double>>;
 
 template class OperatorDFTU<OperatorLCAO<std::complex<double>, std::complex<double>>>;
 
-template<typename TK, typename TR>
+template <typename TK, typename TR>
 void OperatorDFTU<OperatorLCAO<TK, TR>>::contributeHR()
 {
-    //no calculation of HR yet for DFTU operator
+    // no calculation of HR yet for DFTU operator
     return;
 }
 
-template<>
+template <>
 void OperatorDFTU<OperatorLCAO<double, double>>::contributeHk(int ik)
 {
     ModuleBase::TITLE("OperatorDFTU", "contributeHk");
@@ -38,7 +39,7 @@ void OperatorDFTU<OperatorLCAO<double, double>>::contributeHk(int ik)
     ModuleBase::timer::tick("OperatorDFTU", "contributeHk");
 }
 
-template<>
+template <>
 void OperatorDFTU<OperatorLCAO<std::complex<double>, double>>::contributeHk(int ik)
 {
     ModuleBase::TITLE("OperatorDFTU", "contributeHk");
@@ -56,7 +57,7 @@ void OperatorDFTU<OperatorLCAO<std::complex<double>, double>>::contributeHk(int
     ModuleBase::timer::tick("OperatorDFTU", "contributeHk");
 }
 
-template<>
+template <>
 void OperatorDFTU<OperatorLCAO<std::complex<double>, std::complex<double>>>::contributeHk(int ik)
 {
     ModuleBase::TITLE("OperatorDFTU", "contributeHk");
@@ -74,4 +75,4 @@ void OperatorDFTU<OperatorLCAO<std::complex<double>, std::complex<double>>>::con
     ModuleBase::timer::tick("OperatorDFTU", "contributeHk");
 }
 
-}
\ No newline at end of file
+} // namespace hamilt
\ No newline at end of file
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.h
index de40bce4b3..8482fc5e6c 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.h
@@ -21,9 +21,9 @@ class OperatorDFTU<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 {
   public:
     OperatorDFTU<OperatorLCAO<TK, TR>>(HS_Matrix_K<TK>* hsk_in,
-                                  const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
-                                  hamilt::HContainer<TR>* hR_in,
-                                  const std::vector<int>& isk_in)
+                                       const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
+                                       hamilt::HContainer<TR>* hR_in,
+                                       const std::vector<int>& isk_in)
         : isk(isk_in), OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in)
     {
         this->cal_type = calculation_type::lcao_dftu;
@@ -34,7 +34,6 @@ class OperatorDFTU<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
     virtual void contributeHk(int ik) override;
 
   private:
-
     bool HR_fixed_done = false;
 
     const std::vector<int>& isk;
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.cpp
index f4418840b5..83140ec15d 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.cpp
@@ -1,25 +1,41 @@
 #ifdef __EXX
 #include "op_exx_lcao.h"
+
 #include "module_base/blacs_connector.h"
 
 namespace hamilt
 {
 
-template<>
+template <>
 void OperatorEXX<OperatorLCAO<double, double>>::add_loaded_Hexx(const int ik)
 {
-    BlasConnector::axpy(this->LM->ParaV->get_local_size(), 1.0, this->LM->Hexxd_k_load[ik].data(), 1, this->hsk->get_hk(), 1);
+    BlasConnector::axpy(this->LM->ParaV->get_local_size(),
+                        1.0,
+                        this->LM->Hexxd_k_load[ik].data(),
+                        1,
+                        this->hsk->get_hk(),
+                        1);
 }
-template<>
+template <>
 void OperatorEXX<OperatorLCAO<std::complex<double>, double>>::add_loaded_Hexx(const int ik)
 {
 
-    BlasConnector::axpy(this->LM->ParaV->get_local_size(), 1.0, this->LM->Hexxc_k_load[ik].data(), 1, this->hsk->get_hk(), 1);
+    BlasConnector::axpy(this->LM->ParaV->get_local_size(),
+                        1.0,
+                        this->LM->Hexxc_k_load[ik].data(),
+                        1,
+                        this->hsk->get_hk(),
+                        1);
 }
-template<>
+template <>
 void OperatorEXX<OperatorLCAO<std::complex<double>, std::complex<double>>>::add_loaded_Hexx(const int ik)
 {
-    BlasConnector::axpy(this->LM->ParaV->get_local_size(), 1.0, this->LM->Hexxc_k_load[ik].data(), 1, this->hsk->get_hk(), 1);
+    BlasConnector::axpy(this->LM->ParaV->get_local_size(),
+                        1.0,
+                        this->LM->Hexxc_k_load[ik].data(),
+                        1,
+                        this->hsk->get_hk(),
+                        1);
 }
 
 } // namespace hamilt
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.h
index 6cb1e8fde2..f3a3f6fb40 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.h
@@ -3,10 +3,11 @@
 
 #ifdef __EXX
 
-#include <RI/global/Tensor.h>
-#include "operator_lcao.h"
 #include "module_cell/klist.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
+#include "operator_lcao.h"
+
+#include <RI/global/Tensor.h>
 
 namespace hamilt
 {
@@ -25,8 +26,10 @@ template <typename TK, typename TR>
 class OperatorEXX<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 {
     using TAC = std::pair<int, std::array<int, 3>>;
-public:
-    OperatorEXX<OperatorLCAO<TK, TR>>(HS_Matrix_K<TK>* hsk_in,
+
+  public:
+    OperatorEXX<OperatorLCAO<TK, TR>>(
+        HS_Matrix_K<TK>* hsk_in,
         LCAO_Matrix* LM_in,
         hamilt::HContainer<TR>* hR_in,
         const K_Vectors& kv_in,
@@ -38,24 +41,23 @@ class OperatorEXX<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
     virtual void contributeHk(int ik) override;
 
   private:
+    bool HR_fixed_done = false;
 
-      bool HR_fixed_done = false;
-
-      std::vector<std::map<int, std::map<TAC, RI::Tensor<double>>>>* Hexxd = nullptr;
-      std::vector<std::map<int, std::map<TAC, RI::Tensor<std::complex<double>>>>>* Hexxc = nullptr;
+    std::vector<std::map<int, std::map<TAC, RI::Tensor<double>>>>* Hexxd = nullptr;
+    std::vector<std::map<int, std::map<TAC, RI::Tensor<std::complex<double>>>>>* Hexxc = nullptr;
 
-      /// @brief  the step of the outer loop.
-      /// nullptr: no dependence on the number of two_level_step, contributeHk will do enerything normally.
-      /// 0: the first outer loop. If restart, contributeHk will directly add Hexx to Hloc. else, do nothing.
-      /// >0: not the first outer loop. contributeHk will do enerything normally.
-      int* two_level_step = nullptr;
-      /// @brief if restart, read and save Hexx, and directly use it during the first outer loop.
-      bool restart = false;
+    /// @brief  the step of the outer loop.
+    /// nullptr: no dependence on the number of two_level_step, contributeHk will do enerything normally.
+    /// 0: the first outer loop. If restart, contributeHk will directly add Hexx to Hloc. else, do nothing.
+    /// >0: not the first outer loop. contributeHk will do enerything normally.
+    int* two_level_step = nullptr;
+    /// @brief if restart, read and save Hexx, and directly use it during the first outer loop.
+    bool restart = false;
 
-      void add_loaded_Hexx(const int ik);
-      const K_Vectors& kv;
+    void add_loaded_Hexx(const int ik);
+    const K_Vectors& kv;
 
-      LCAO_Matrix* LM = nullptr;
+    LCAO_Matrix* LM = nullptr;
 };
 
 } // namespace hamilt
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.hpp
index 781112ff5c..92db36721e 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.hpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.hpp
@@ -3,116 +3,118 @@
 
 #ifdef __EXX
 
-#include "op_exx_lcao.h"
-#include "module_ri/RI_2D_Comm.h"
-#include "module_hamilt_pw/hamilt_pwdft/global.h"
 #include "module_hamilt_general/module_xc/xc_functional.h"
+#include "module_hamilt_pw/hamilt_pwdft/global.h"
+#include "module_ri/RI_2D_Comm.h"
+#include "op_exx_lcao.h"
 
 namespace hamilt
 {
 
 template <typename TK, typename TR>
 OperatorEXX<OperatorLCAO<TK, TR>>::OperatorEXX(
-	HS_Matrix_K<TK>* hsk_in,
-	LCAO_Matrix* LM_in,
-	hamilt::HContainer<TR>* hR_in,
-	const K_Vectors& kv_in,
-	std::vector<std::map<int, std::map<TAC, RI::Tensor<double>>>>* Hexxd_in,
-	std::vector<std::map<int, std::map<TAC, RI::Tensor<std::complex<double>>>>>* Hexxc_in,
-	int* two_level_step_in,
-	const bool restart_in)
-	: OperatorLCAO<TK, TR>(hsk_in, kv_in.kvec_d, hR_in),
-	  kv(kv_in),
-	  Hexxd(Hexxd_in),
-	  Hexxc(Hexxc_in), 
-	  two_level_step(two_level_step_in),
-	  restart(restart_in),
-	  LM(LM_in)
+    HS_Matrix_K<TK>* hsk_in,
+    LCAO_Matrix* LM_in,
+    hamilt::HContainer<TR>* hR_in,
+    const K_Vectors& kv_in,
+    std::vector<std::map<int, std::map<TAC, RI::Tensor<double>>>>* Hexxd_in,
+    std::vector<std::map<int, std::map<TAC, RI::Tensor<std::complex<double>>>>>* Hexxc_in,
+    int* two_level_step_in,
+    const bool restart_in)
+    : OperatorLCAO<TK, TR>(hsk_in, kv_in.kvec_d, hR_in), kv(kv_in), Hexxd(Hexxd_in), Hexxc(Hexxc_in),
+      two_level_step(two_level_step_in), restart(restart_in), LM(LM_in)
 {
-	this->cal_type = calculation_type::lcao_exx;
-	if (this->restart)
-	{///  Now only Hexx depends on DM, so we can directly read Hexx to reduce the computational cost.
-	/// If other operators depends on DM, we can also read DM and then calculate the operators to save the memory to store operator terms.
-		assert(this->two_level_step != nullptr);
-		/// read in Hexx
-		if (std::is_same<TK, double>::value)
-		{
-			this->LM->Hexxd_k_load.resize(this->kv.get_nks());
-			for (int ik = 0; ik < this->kv.get_nks(); ik++)
-			{
-				this->LM->Hexxd_k_load[ik].resize(this->LM->ParaV->get_local_size(), 0.0);
-				this->restart = GlobalC::restart.load_disk(
-					"Hexx", ik,
-					this->LM->ParaV->get_local_size(), this->LM->Hexxd_k_load[ik].data(), false);
-				if (!this->restart) break;
-			}
-		}
-		else
-		{
-			this->LM->Hexxc_k_load.resize(this->kv.get_nks());
-			for (int ik = 0; ik < this->kv.get_nks(); ik++)
-			{
-				this->LM->Hexxc_k_load[ik].resize(this->LM->ParaV->get_local_size(), 0.0);
-				this->restart = GlobalC::restart.load_disk(
-					"Hexx", ik,
-					this->LM->ParaV->get_local_size(), this->LM->Hexxc_k_load[ik].data(), false);
-				if (!this->restart) break;
-			}
-		}
-		if (!this->restart)
-			std::cout << "WARNING: Hexx not found, restart from the non-exx loop." << std::endl
-			          << "If the loaded charge density is EXX-solved, this may lead to poor convergence." << std::endl;
-		GlobalC::restart.info_load.load_H_finish = this->restart;
-	}
+    this->cal_type = calculation_type::lcao_exx;
+    if (this->restart)
+    {   ///  Now only Hexx depends on DM, so we can directly read Hexx to reduce the computational cost.
+        /// If other operators depends on DM, we can also read DM and then calculate the operators to save the memory to
+        /// store operator terms.
+        assert(this->two_level_step != nullptr);
+        /// read in Hexx
+        if (std::is_same<TK, double>::value)
+        {
+            this->LM->Hexxd_k_load.resize(this->kv.get_nks());
+            for (int ik = 0; ik < this->kv.get_nks(); ik++)
+            {
+                this->LM->Hexxd_k_load[ik].resize(this->LM->ParaV->get_local_size(), 0.0);
+                this->restart = GlobalC::restart.load_disk("Hexx",
+                                                           ik,
+                                                           this->LM->ParaV->get_local_size(),
+                                                           this->LM->Hexxd_k_load[ik].data(),
+                                                           false);
+                if (!this->restart)
+                    break;
+            }
+        }
+        else
+        {
+            this->LM->Hexxc_k_load.resize(this->kv.get_nks());
+            for (int ik = 0; ik < this->kv.get_nks(); ik++)
+            {
+                this->LM->Hexxc_k_load[ik].resize(this->LM->ParaV->get_local_size(), 0.0);
+                this->restart = GlobalC::restart.load_disk("Hexx",
+                                                           ik,
+                                                           this->LM->ParaV->get_local_size(),
+                                                           this->LM->Hexxc_k_load[ik].data(),
+                                                           false);
+                if (!this->restart)
+                    break;
+            }
+        }
+        if (!this->restart)
+            std::cout << "WARNING: Hexx not found, restart from the non-exx loop." << std::endl
+                      << "If the loaded charge density is EXX-solved, this may lead to poor convergence." << std::endl;
+        GlobalC::restart.info_load.load_H_finish = this->restart;
+    }
 }
 
-template<typename TK, typename TR>
+template <typename TK, typename TR>
 void OperatorEXX<OperatorLCAO<TK, TR>>::contributeHk(int ik)
 {
-	// Peize Lin add 2016-12-03
-	if (GlobalV::CALCULATION != "nscf" && this->two_level_step != nullptr && *this->two_level_step == 0 && !this->restart) return;  //in the non-exx loop, do nothing 
-	if (XC_Functional::get_func_type() == 4 || XC_Functional::get_func_type() == 5)
-	{
-		if (this->restart && this->two_level_step != nullptr)
-		{
-			if (*this->two_level_step == 0)
-			{
-				this->add_loaded_Hexx(ik);
-				return;
-			}
-			else // clear loaded Hexx and release memory
-			{
-				if (this->LM->Hexxd_k_load.size() > 0)
-				{
-					this->LM->Hexxd_k_load.clear();
-					this->LM->Hexxd_k_load.shrink_to_fit();
-				}
-				else if (this->LM->Hexxc_k_load.size() > 0)
-				{
-					this->LM->Hexxc_k_load.clear();
-					this->LM->Hexxc_k_load.shrink_to_fit();
-				}
-			}
-		}
-		// cal H(k) from H(R) normally
+    // Peize Lin add 2016-12-03
+    if (GlobalV::CALCULATION != "nscf" && this->two_level_step != nullptr && *this->two_level_step == 0
+        && !this->restart)
+        return; // in the non-exx loop, do nothing
+    if (XC_Functional::get_func_type() == 4 || XC_Functional::get_func_type() == 5)
+    {
+        if (this->restart && this->two_level_step != nullptr)
+        {
+            if (*this->two_level_step == 0)
+            {
+                this->add_loaded_Hexx(ik);
+                return;
+            }
+            else // clear loaded Hexx and release memory
+            {
+                if (this->LM->Hexxd_k_load.size() > 0)
+                {
+                    this->LM->Hexxd_k_load.clear();
+                    this->LM->Hexxd_k_load.shrink_to_fit();
+                }
+                else if (this->LM->Hexxc_k_load.size() > 0)
+                {
+                    this->LM->Hexxc_k_load.clear();
+                    this->LM->Hexxc_k_load.shrink_to_fit();
+                }
+            }
+        }
+        // cal H(k) from H(R) normally
 
-		if (GlobalC::exx_info.info_ri.real_number)
-			RI_2D_Comm::add_Hexx(
-				this->kv,
-				ik,
-				GlobalC::exx_info.info_global.hybrid_alpha,
-				this->Hexxd == nullptr ? *this->LM->Hexxd : *this->Hexxd,
-				*this->LM->ParaV,
-				this->hsk->get_hk());
-		else
-			RI_2D_Comm::add_Hexx(
-				this->kv,
-				ik,
-				GlobalC::exx_info.info_global.hybrid_alpha,
-				this->Hexxc == nullptr ? *this->LM->Hexxc : *this->Hexxc,
-				*this->LM->ParaV,
-				this->hsk->get_hk());
-	}
+        if (GlobalC::exx_info.info_ri.real_number)
+            RI_2D_Comm::add_Hexx(this->kv,
+                                 ik,
+                                 GlobalC::exx_info.info_global.hybrid_alpha,
+                                 this->Hexxd == nullptr ? *this->LM->Hexxd : *this->Hexxd,
+                                 *this->LM->ParaV,
+                                 this->hsk->get_hk());
+        else
+            RI_2D_Comm::add_Hexx(this->kv,
+                                 ik,
+                                 GlobalC::exx_info.info_global.hybrid_alpha,
+                                 this->Hexxc == nullptr ? *this->LM->Hexxc : *this->Hexxc,
+                                 *this->LM->ParaV,
+                                 this->hsk->get_hk());
+    }
 }
 
 } // namespace hamilt
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/operator_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/operator_lcao.cpp
index 50937ff6af..3407777da6 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/operator_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/operator_lcao.cpp
@@ -1,8 +1,9 @@
 #include "operator_lcao.h"
+
 #include "module_base/timer.h"
 #include "module_base/tool_title.h"
-#include "module_hsolver/hsolver_lcao.h"
 #include "module_hamilt_lcao/module_hcontainer/hcontainer_funcs.h"
+#include "module_hsolver/hsolver_lcao.h"
 
 #ifdef __ELPA
 #include "module_hsolver/diago_elpa.h"
@@ -11,7 +12,7 @@
 namespace hamilt
 {
 
-template<>
+template <>
 void OperatorLCAO<double, double>::get_hs_pointers()
 {
     ModuleBase::timer::tick("OperatorLCAO", "get_hs_pointers");
@@ -33,196 +34,188 @@ void OperatorLCAO<double, double>::get_hs_pointers()
     ModuleBase::timer::tick("OperatorLCAO", "get_hs_pointers");
 }
 
-template<>
+template <>
 void OperatorLCAO<std::complex<double>, double>::get_hs_pointers()
 {
     this->hmatrix_k = this->hsk->get_hk();
     this->smatrix_k = this->hsk->get_sk();
 }
 
-template<>
+template <>
 void OperatorLCAO<std::complex<double>, std::complex<double>>::get_hs_pointers()
 {
     this->hmatrix_k = this->hsk->get_hk();
     this->smatrix_k = this->hsk->get_sk();
 }
 
-template<typename TK, typename TR>
+template <typename TK, typename TR>
 void OperatorLCAO<TK, TR>::refresh_h()
 {
     // Set the matrix 'H' to zero.
     this->hsk->set_zero_hk();
 }
 
-template<typename TK, typename TR>
+template <typename TK, typename TR>
 void OperatorLCAO<TK, TR>::set_hr_done(bool hr_done_in)
 {
     this->hr_done = hr_done_in;
 }
 
-template<typename TK, typename TR>
+template <typename TK, typename TR>
 void OperatorLCAO<TK, TR>::init(const int ik_in)
 {
     ModuleBase::TITLE("OperatorLCAO", "init");
     ModuleBase::timer::tick("OperatorLCAO", "init");
-    if(this->is_first_node)
+    if (this->is_first_node)
     {
         // refresh HK
         this->refresh_h();
-        if(!this->hr_done)
+        if (!this->hr_done)
         {
             // refresh HR
             this->hR->set_zero();
         }
     }
-    switch(this->cal_type)
+    switch (this->cal_type)
     {
-        case calculation_type::lcao_overlap:
-        {
-            //cal_type=lcao_overlap refer to overlap matrix operators, which are only rely on stucture, and not changed during SCF
+    case calculation_type::lcao_overlap: {
+        // cal_type=lcao_overlap refer to overlap matrix operators, which are only rely on stucture, and not changed
+        // during SCF
 
-            if(!this->hr_done)
-            {
-                //update SR first
-                //in cal_type=lcao_overlap, SR should be updated by each sub-chain nodes
-                OperatorLCAO<TK, TR>* last = this;
-                while(last != nullptr)
-                {
-                    last->contributeHR();
-                    last = dynamic_cast<OperatorLCAO<TK, TR>*>(last->next_sub_op);
-                }
-            }
-
-            //update SK next
-            //in cal_type=lcao_overlap, SK should be update here
-            this->contributeHk(ik_in);
-
-            break;
-        }
-        case calculation_type::lcao_fixed:
+        if (!this->hr_done)
         {
-            //cal_type=lcao_fixed refer to fixed matrix operators, which are only rely on stucture, and not changed during SCF
-
-            //update HR first
-            if(!this->hr_done)
+            // update SR first
+            // in cal_type=lcao_overlap, SR should be updated by each sub-chain nodes
+            OperatorLCAO<TK, TR>* last = this;
+            while (last != nullptr)
             {
-                //in cal_type=lcao_fixed, HR should be updated by each sub-chain nodes
-                OperatorLCAO<TK, TR>* last = this;
-                while(last != nullptr)
-                {
-                    last->contributeHR();
-                    last = dynamic_cast<OperatorLCAO<TK, TR>*>(last->next_sub_op);
-                }
+                last->contributeHR();
+                last = dynamic_cast<OperatorLCAO<TK, TR>*>(last->next_sub_op);
             }
-
-            //update HK next
-            //in cal_type=lcao_fixed, HK will update in the last node with OperatorLCAO::contributeHk()
-
-            break;
         }
-        case calculation_type::lcao_gint:
-        {
-            //cal_type=lcao_gint refer to grid integral operators, which are relied on stucture and potential based on real space grids
-            //and should be updated each SCF steps
 
-            if(!this->hr_done)
-            {
-                OperatorLCAO<TK, TR>* last = this;
-                while(last != nullptr)
-                {
-                    //update HR first
-                    //in cal_type=lcao_gint, HR should be updated by every sub-node.
-                    last->contributeHR();
+        // update SK next
+        // in cal_type=lcao_overlap, SK should be update here
+        this->contributeHk(ik_in);
 
-                    //update HK next
-                    //in cal_type=lcao_gint, HK will update in the last node with OperatorLCAO::contributeHk()
-                    last = dynamic_cast<OperatorLCAO<TK, TR>*>(last->next_sub_op);
-                }
-            }
+        break;
+    }
+    case calculation_type::lcao_fixed: {
+        // cal_type=lcao_fixed refer to fixed matrix operators, which are only rely on stucture, and not changed during
+        // SCF
 
-            break;
-        }
-#ifdef __DEEPKS
-        case calculation_type::lcao_deepks:
+        // update HR first
+        if (!this->hr_done)
         {
-            //update HR first
-            if(!this->hr_done)
+            // in cal_type=lcao_fixed, HR should be updated by each sub-chain nodes
+            OperatorLCAO<TK, TR>* last = this;
+            while (last != nullptr)
             {
-                //in cal_type=lcao_deepks, HR should be updated
-                this->contributeHR();
+                last->contributeHR();
+                last = dynamic_cast<OperatorLCAO<TK, TR>*>(last->next_sub_op);
             }
+        }
 
-            //update H_V_delta_k next
-            this->contributeHk(ik_in);
+        // update HK next
+        // in cal_type=lcao_fixed, HK will update in the last node with OperatorLCAO::contributeHk()
 
-            break;
+        break;
+    }
+    case calculation_type::lcao_gint: {
+        // cal_type=lcao_gint refer to grid integral operators, which are relied on stucture and potential based on real
+        // space grids and should be updated each SCF steps
 
-        }
-#endif
-        case calculation_type::lcao_dftu:
+        if (!this->hr_done)
         {
-            //only HK should be updated when cal_type=lcao_dftu
-            //in cal_type=lcao_dftu, HK only need to update from one node
-            if(!this->hr_done)
+            OperatorLCAO<TK, TR>* last = this;
+            while (last != nullptr)
             {
-                //in cal_type=lcao_deepks, HR should be updated
-                this->contributeHR();
+                // update HR first
+                // in cal_type=lcao_gint, HR should be updated by every sub-node.
+                last->contributeHR();
+
+                // update HK next
+                // in cal_type=lcao_gint, HK will update in the last node with OperatorLCAO::contributeHk()
+                last = dynamic_cast<OperatorLCAO<TK, TR>*>(last->next_sub_op);
             }
-            break;
         }
-        case calculation_type::lcao_sc_lambda:
+
+        break;
+    }
+#ifdef __DEEPKS
+    case calculation_type::lcao_deepks: {
+        // update HR first
+        if (!this->hr_done)
         {
-            //update HK only
-            //in cal_type=lcao_sc_mag, HK only need to be updated
-            this->contributeHk(ik_in);
-            break;
+            // in cal_type=lcao_deepks, HR should be updated
+            this->contributeHR();
         }
-        case calculation_type::lcao_exx:
+
+        // update H_V_delta_k next
+        this->contributeHk(ik_in);
+
+        break;
+    }
+#endif
+    case calculation_type::lcao_dftu: {
+        // only HK should be updated when cal_type=lcao_dftu
+        // in cal_type=lcao_dftu, HK only need to update from one node
+        if (!this->hr_done)
         {
-            //update HR first
-            //in cal_type=lcao_exx, HR should be updated by most priority sub-chain nodes
+            // in cal_type=lcao_deepks, HR should be updated
             this->contributeHR();
+        }
+        break;
+    }
+    case calculation_type::lcao_sc_lambda: {
+        // update HK only
+        // in cal_type=lcao_sc_mag, HK only need to be updated
+        this->contributeHk(ik_in);
+        break;
+    }
+    case calculation_type::lcao_exx: {
+        // update HR first
+        // in cal_type=lcao_exx, HR should be updated by most priority sub-chain nodes
+        this->contributeHR();
 
-            //update HK next
-            //in cal_type=lcao_exx, HK only need to update from one node
-            this->contributeHk(ik_in);
+        // update HK next
+        // in cal_type=lcao_exx, HK only need to update from one node
+        this->contributeHk(ik_in);
 
-            break;
-        }
-        case calculation_type::lcao_tddft_velocity:
+        break;
+    }
+    case calculation_type::lcao_tddft_velocity: {
+        if (!this->hr_done)
         {
-            if(!this->hr_done)
+            // in cal_type=lcao_fixed, HR should be updated by each sub-chain nodes
+            OperatorLCAO<TK, TR>* last = this;
+            while (last != nullptr)
             {
-                //in cal_type=lcao_fixed, HR should be updated by each sub-chain nodes
-                OperatorLCAO<TK, TR>* last = this;
-                while(last != nullptr)
-                {
-                    last->contributeHR();
-                    last = dynamic_cast<OperatorLCAO<TK, TR>*>(last->next_sub_op);
-                }
+                last->contributeHR();
+                last = dynamic_cast<OperatorLCAO<TK, TR>*>(last->next_sub_op);
             }
-            this->contributeHk(ik_in);
-
-            break;
-        }
-        default:
-        {
-            ModuleBase::WARNING_QUIT("OperatorLCAO::init", "unknown cal_type");
-            break;
         }
+        this->contributeHk(ik_in);
+
+        break;
+    }
+    default: {
+        ModuleBase::WARNING_QUIT("OperatorLCAO::init", "unknown cal_type");
+        break;
     }
-    if(this->next_op != nullptr)
-    {//it is not the last node, loop next init() function
+    }
+    if (this->next_op != nullptr)
+    { // it is not the last node, loop next init() function
         // pass HR status to next node and than set HR status of this node to done
-        if(!this->hr_done)
+        if (!this->hr_done)
         {
             dynamic_cast<OperatorLCAO<TK, TR>*>(this->next_op)->hr_done = this->hr_done;
         }
         // call init() function of next node
         this->next_op->init(ik_in);
     }
-    else 
-    {//it is the last node, update HK with the current total HR
+    else
+    { // it is the last node, update HK with the current total HR
         OperatorLCAO<TK, TR>::contributeHk(ik_in);
     }
 
@@ -233,12 +226,12 @@ void OperatorLCAO<TK, TR>::init(const int ik_in)
 }
 
 // contributeHk()
-template<typename TK, typename TR>
+template <typename TK, typename TR>
 void OperatorLCAO<TK, TR>::contributeHk(int ik)
 {
     ModuleBase::TITLE("OperatorLCAO", "contributeHk");
     ModuleBase::timer::tick("OperatorLCAO", "contributeHk");
-    if(ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER())
+    if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER())
     {
         const int nrow = this->hsk->get_pv()->get_row_size();
         hamilt::folding_HR(*this->hR, this->hsk->get_hk(), this->kvec_d[ik], nrow, 1);
@@ -254,4 +247,4 @@ void OperatorLCAO<TK, TR>::contributeHk(int ik)
 template class OperatorLCAO<double, double>;
 template class OperatorLCAO<std::complex<double>, double>;
 template class OperatorLCAO<std::complex<double>, std::complex<double>>;
-}  // namespace hamilt
+} // namespace hamilt
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/operator_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/operator_lcao.h
index 5aece587ab..024dd02fe5 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/operator_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/operator_lcao.h
@@ -3,8 +3,8 @@
 #include "module_base/vector3.h"
 #include "module_hamilt_general/matrixblock.h"
 #include "module_hamilt_general/operator.h"
-#include "module_hamilt_lcao/module_hcontainer/hcontainer.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp"
+#include "module_hamilt_lcao/module_hcontainer/hcontainer.h"
 
 namespace hamilt
 {
@@ -13,11 +13,12 @@ template <typename TK, typename TR>
 class OperatorLCAO : public Operator<TK>
 {
   public:
-    OperatorLCAO(
-        HS_Matrix_K<TK>* hsk_in, 
-        const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
-        HContainer<TR>* hR_in)
-        : hsk(hsk_in), kvec_d(kvec_d_in), hR(hR_in){}
+    OperatorLCAO(HS_Matrix_K<TK>* hsk_in,
+                 const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
+                 HContainer<TR>* hR_in)
+        : hsk(hsk_in), kvec_d(kvec_d_in), hR(hR_in)
+    {
+    }
     virtual ~OperatorLCAO()
     {
         if (this->allocated_smatrix)
@@ -54,16 +55,18 @@ class OperatorLCAO : public Operator<TK>
         this->get_hs_pointers();
 #ifdef __MPI
         hk_in = MatrixBlock<TK>{hmatrix_k,
-                               (size_t)this->hsk->get_pv()->nrow,
-                               (size_t)this->hsk->get_pv()->ncol,
-                               this->hsk->get_pv()->desc};
+                                (size_t)this->hsk->get_pv()->nrow,
+                                (size_t)this->hsk->get_pv()->ncol,
+                                this->hsk->get_pv()->desc};
         sk_in = MatrixBlock<TK>{smatrix_k,
-                               (size_t)this->hsk->get_pv()->nrow,
-                               (size_t)this->hsk->get_pv()->ncol,
-                               this->hsk->get_pv()->desc};
+                                (size_t)this->hsk->get_pv()->nrow,
+                                (size_t)this->hsk->get_pv()->ncol,
+                                this->hsk->get_pv()->desc};
 #else
-        hk_in = MatrixBlock<TK>{hmatrix_k, (size_t)this->hsk->get_pv()->nrow, (size_t)this->hsk->get_pv()->ncol, nullptr};
-        sk_in = MatrixBlock<TK>{smatrix_k, (size_t)this->hsk->get_pv()->nrow, (size_t)this->hsk->get_pv()->ncol, nullptr};
+        hk_in
+            = MatrixBlock<TK>{hmatrix_k, (size_t)this->hsk->get_pv()->nrow, (size_t)this->hsk->get_pv()->ncol, nullptr};
+        sk_in
+            = MatrixBlock<TK>{smatrix_k, (size_t)this->hsk->get_pv()->nrow, (size_t)this->hsk->get_pv()->ncol, nullptr};
 #endif
     }
 
@@ -74,7 +77,7 @@ class OperatorLCAO : public Operator<TK>
     /**
      * @brief set_HR_fixed() is used for pass HR_fixed matrix to the next node in sub-chain table
      * not used in base class, only be override in fixed Hamiltonian Operators (e.g. Ekinetic and Nonlocal)
-    */
+     */
     virtual void set_HR_fixed(void*)
     {
         return;
@@ -82,7 +85,7 @@ class OperatorLCAO : public Operator<TK>
 
     /**
      * @brief reset hr_done status
-    */
+     */
     void set_hr_done(bool hr_done_in);
 
     // protected:
@@ -106,7 +109,7 @@ class OperatorLCAO : public Operator<TK>
 
     // only used for Gamma_only case
     bool allocated_smatrix = false;
-    
+
     // if HR is calculated
     bool hr_done = false;
 };
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/overlap_new.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/overlap_new.cpp
index 4baf56edd3..b66477f213 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/overlap_new.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/overlap_new.cpp
@@ -33,7 +33,7 @@ void hamilt::OverlapNew<hamilt::OperatorLCAO<TK, TR>>::initialize_SR(Grid_Driver
 {
     ModuleBase::TITLE("OverlapNew", "initialize_SR");
     ModuleBase::timer::tick("OverlapNew", "initialize_SR");
-    auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
+    auto* paraV = this->hR->get_paraV(); // get parallel orbitals from HR
     // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
     for (int iat1 = 0; iat1 < ucell->nat; iat1++)
     {
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/sc_lambda_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/sc_lambda_lcao.cpp
index 2aa96175dd..b1e87e4db3 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/sc_lambda_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/sc_lambda_lcao.cpp
@@ -1,5 +1,7 @@
 #include "sc_lambda_lcao.h"
+
 #include "module_hamilt_lcao/module_deltaspin/spin_constrain.h"
+
 #include <algorithm>
 
 namespace hamilt
@@ -22,13 +24,16 @@ void OperatorScLambda<OperatorLCAO<std::complex<double>, std::complex<double>>>:
         = SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>::getScInstance();
     std::vector<std::complex<double>> h_lambda(this->hsk->get_pv()->nloc);
     std::fill(h_lambda.begin(), h_lambda.end(), std::complex<double>(0, 0));
-    sc.cal_h_lambda(&h_lambda[0], this->hsk->get_sk(), ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(), this->isk[ik]);
+    sc.cal_h_lambda(&h_lambda[0],
+                    this->hsk->get_sk(),
+                    ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(),
+                    this->isk[ik]);
     std::complex<double>* hk = this->hsk->get_hk();
     for (int irc = 0; irc < this->hsk->get_pv()->nloc; irc++)
     {
         hk[irc] += h_lambda[irc];
     }
-    //std::cout << "OperatorScLambda contributeHk" << std::endl;
+    // std::cout << "OperatorScLambda contributeHk" << std::endl;
     ModuleBase::timer::tick("OperatorScLambda", "contributeHk");
 }
 
@@ -42,13 +47,16 @@ void OperatorScLambda<OperatorLCAO<std::complex<double>, double>>::contributeHk(
         = SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>::getScInstance();
     std::vector<std::complex<double>> h_lambda(this->hsk->get_pv()->nloc);
     std::fill(h_lambda.begin(), h_lambda.end(), std::complex<double>(0, 0));
-    sc.cal_h_lambda(&h_lambda[0], this->hsk->get_sk(), ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(), this->isk[ik]);
+    sc.cal_h_lambda(&h_lambda[0],
+                    this->hsk->get_sk(),
+                    ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(),
+                    this->isk[ik]);
     std::complex<double>* hk = this->hsk->get_hk();
     for (int irc = 0; irc < this->hsk->get_pv()->nloc; irc++)
     {
         hk[irc] += h_lambda[irc];
     }
-    //std::cout << "OperatorScLambda contributeHk" << std::endl;
+    // std::cout << "OperatorScLambda contributeHk" << std::endl;
     ModuleBase::timer::tick("OperatorScLambda", "contributeHk");
 }
 
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/sc_lambda_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/sc_lambda_lcao.h
index 646c0415fb..2d46c4a824 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/sc_lambda_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/sc_lambda_lcao.h
@@ -18,14 +18,14 @@ class OperatorScLambda : public T
 
 #endif
 
-template<typename TK, typename TR>
+template <typename TK, typename TR>
 class OperatorScLambda<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 {
   public:
     OperatorScLambda<OperatorLCAO<TK, TR>>(HS_Matrix_K<TK>* hsk_in,
-                                    const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
-                                    hamilt::HContainer<TR>* hR_in,
-                                    const std::vector<int>& isk_in)
+                                           const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
+                                           hamilt::HContainer<TR>* hR_in,
+                                           const std::vector<int>& isk_in)
         : isk(isk_in), OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in)
     {
         this->cal_type = calculation_type::lcao_sc_lambda;
@@ -34,8 +34,8 @@ class OperatorScLambda<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
     virtual void contributeHR() override;
 
     virtual void contributeHk(int ik) override;
-  private:
 
+  private:
     const std::vector<int>& isk;
 };
 
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.cpp
index ac26806e94..0053990c63 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.cpp
@@ -259,7 +259,7 @@ void TDEkinetic<OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* GridD)
     ModuleBase::TITLE("TDEkinetic", "initialize_HR");
     ModuleBase::timer::tick("TDEkinetic", "initialize_HR");
 
-    auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
+    auto* paraV = this->hR->get_paraV(); // get parallel orbitals from HR
     // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
 
     this->adjs_all.clear();
@@ -308,7 +308,7 @@ void TDEkinetic<OperatorLCAO<TK, TR>>::initialize_HR_tmp()
     ModuleBase::TITLE("TDEkinetic", "initialize_HR_tmp");
     ModuleBase::timer::tick("TDEkinetic", "initialize_HR_tmp");
 
-    auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
+    auto* paraV = this->hR->get_paraV(); // get parallel orbitals from HR
     // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
     for (int i = 0; i < this->hR->size_atom_pairs(); ++i)
     {
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.h
index f9baba25eb..b142267034 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.h
@@ -1,15 +1,14 @@
 #ifndef TDEKINETIC_H
 #define TDEKINETIC_H
 #include "module_base/timer.h"
-#include "operator_lcao.h"
+#include "module_basis/module_ao/ORB_gaunt_table.h"
+#include "module_basis/module_ao/ORB_table_phi.h"
 #include "module_cell/klist.h"
 #include "module_cell/module_neighbor/sltk_grid_driver.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/center2_orb-orb11.h"
 #include "module_hamilt_lcao/module_hcontainer/hcontainer.h"
-#include "module_basis/module_ao/ORB_table_phi.h"
-#include "module_basis/module_ao/ORB_gaunt_table.h"
 #include "module_hamilt_lcao/module_tddft/td_velocity.h"
-
+#include "operator_lcao.h"
 
 namespace hamilt
 {
@@ -33,15 +32,15 @@ class TDEkinetic : public T
 /// - TR: data type of real space Hamiltonian
 
 template <typename TK, typename TR>
-class TDEkinetic<OperatorLCAO<TK,TR>> : public OperatorLCAO<TK, TR>
+class TDEkinetic<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 {
   public:
     TDEkinetic<OperatorLCAO<TK, TR>>(HS_Matrix_K<TK>* hsk_in,
-                                 hamilt::HContainer<TR>* hR_in,
-                                 hamilt::HContainer<TR>* SR_in,
-                                 const K_Vectors* kv_in,
-                                 const UnitCell* ucell_in,
-                                 Grid_Driver* GridD_in);
+                                     hamilt::HContainer<TR>* hR_in,
+                                     hamilt::HContainer<TR>* SR_in,
+                                     const K_Vectors* kv_in,
+                                     const UnitCell* ucell_in,
+                                     Grid_Driver* GridD_in);
     ~TDEkinetic();
 
     virtual void contributeHR() override;
@@ -59,13 +58,18 @@ class TDEkinetic<OperatorLCAO<TK,TR>> : public OperatorLCAO<TK, TR>
     /**
      * @brief initialize HR_tmp
      * Allocate the memory for HR_tmp with the same size as HR
-    */
+     */
     void initialize_HR_tmp();
 
     /**
      * @brief calculate the HR local matrix of <I,J,R> atom pair
      */
-    void cal_HR_IJR(const int& iat1,const int& iat2,const Parallel_Orbitals* paraV,const ModuleBase::Vector3<double>& dtau,std::complex<double>* data_pointer,TR* s_pointer);
+    void cal_HR_IJR(const int& iat1,
+                    const int& iat2,
+                    const Parallel_Orbitals* paraV,
+                    const ModuleBase::Vector3<double>& dtau,
+                    std::complex<double>* data_pointer,
+                    TR* s_pointer);
 
     /**
      * @brief calculate the ekinetic matrix correction term in tddft with specific <I,J,R> atom-pairs
@@ -73,15 +77,16 @@ class TDEkinetic<OperatorLCAO<TK,TR>> : public OperatorLCAO<TK, TR>
      * loop the atom-pairs in HR and calculate the ekinetic matrix
      */
     void calculate_HR(void);
-    virtual void set_HR_fixed(void*)override;
+    virtual void set_HR_fixed(void*) override;
 
     TD_Velocity td_velocity;
 
   private:
     const UnitCell* ucell = nullptr;
-    
+
     HContainer<TR>* SR = nullptr;
-    /// @brief Store real space hamiltonian. TD term should include imaginary part, thus it has to be complex type. Only shared between TD operators.
+    /// @brief Store real space hamiltonian. TD term should include imaginary part, thus it has to be complex type. Only
+    /// shared between TD operators.
     HContainer<std::complex<double>>* hR_tmp = nullptr;
     Grid_Driver* Grid = nullptr;
 
@@ -92,16 +97,17 @@ class TDEkinetic<OperatorLCAO<TK,TR>> : public OperatorLCAO<TK, TR>
     void td_ekinetic_grad(std::complex<double>* Hloc, int nnr, ModuleBase::Vector3<double> grad_overlap);
 
     ORB_table_phi MOT;
-	  ORB_gaunt_table MGT;
-    
+    ORB_gaunt_table MGT;
+
     /// @brief Store the two center integrals outcome <𝝓_𝝁𝟎 |𝛁| 𝝓_𝝂𝑹> for td_ekinetic term
-    std::map<size_t,                                // TA
-      std::map<size_t,                            // TB
-        std::map<int,                           // LA
-          std::map<size_t,                    // NA
-            std::map<int,                   // LB
-              std::map<size_t,            // NB
-                Center2_Orb::Orb11>>>>>> center2_orb11_s;
+    std::map<size_t,                                              // TA
+             std::map<size_t,                                     // TB
+                      std::map<int,                               // LA
+                               std::map<size_t,                   // NA
+                                        std::map<int,             // LB
+                                                 std::map<size_t, // NB
+                                                          Center2_Orb::Orb11>>>>>>
+        center2_orb11_s;
 
     /// @brief Store the vector potential for td_ekinetic term
     ModuleBase::Vector3<double> cart_At;
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_dftu.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_dftu.cpp
index 28d63c8074..17de4e01d5 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_dftu.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_dftu.cpp
@@ -160,8 +160,7 @@ TEST_F(DFTUTest, constructHRd2d)
         HR->get_wrapper()[i] = 0.0;
     }
     std::chrono::high_resolution_clock::time_point start_time = std::chrono::high_resolution_clock::now();
-    hamilt::DFTU<hamilt::OperatorLCAO<double, double>>
-        op(&hsk, kvec_d_in, HR, ucell, &gd, &intor_, &GlobalC::dftu);
+    hamilt::DFTU<hamilt::OperatorLCAO<double, double>> op(&hsk, kvec_d_in, HR, ucell, &gd, &intor_, &GlobalC::dftu);
     std::chrono::high_resolution_clock::time_point end_time = std::chrono::high_resolution_clock::now();
     std::chrono::duration<double> elapsed_time
         = std::chrono::duration_cast<std::chrono::duration<double>>(end_time - start_time);
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_ekineticnew.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_ekineticnew.cpp
index 0befa32c0d..10d20528c6 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_ekineticnew.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_ekineticnew.cpp
@@ -107,8 +107,7 @@ TEST_F(EkineticNewTest, constructHRd2d)
     hamilt::HS_Matrix_K<double> hsk(paraV, 1);
     hsk.set_zero_hk();
     Grid_Driver gd(0, 0, 0);
-    hamilt::EkineticNew<hamilt::OperatorLCAO<double, double>>
-        op(&hsk, kvec_d_in, HR, &ucell, &gd, &intor_);
+    hamilt::EkineticNew<hamilt::OperatorLCAO<double, double>> op(&hsk, kvec_d_in, HR, &ucell, &gd, &intor_);
     op.contributeHR();
     // check the value of HR
     for (int iap = 0; iap < HR->size_atom_pairs(); ++iap)
@@ -157,8 +156,12 @@ TEST_F(EkineticNewTest, constructHRd2cd)
     hamilt::HS_Matrix_K<std::complex<double>> hsk(paraV, 1);
     hsk.set_zero_hk();
     Grid_Driver gd(0, 0, 0);
-    hamilt::EkineticNew<hamilt::OperatorLCAO<std::complex<double>, double>>
-        op(&hsk, kvec_d_in, HR, &ucell, &gd, &intor_);
+    hamilt::EkineticNew<hamilt::OperatorLCAO<std::complex<double>, double>> op(&hsk,
+                                                                               kvec_d_in,
+                                                                               HR,
+                                                                               &ucell,
+                                                                               &gd,
+                                                                               &intor_);
     op.contributeHR();
     // check the value of HR
     for (int iap = 0; iap < HR->size_atom_pairs(); ++iap)
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_nonlocalnew.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_nonlocalnew.cpp
index a2054a6fa2..a95e517129 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_nonlocalnew.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_nonlocalnew.cpp
@@ -140,8 +140,7 @@ TEST_F(NonlocalNewTest, constructHRd2d)
     EXPECT_EQ(ucell.infoNL.Beta[0].get_rcut_max(), 1.0);
     EXPECT_EQ(LCAO_Orbitals::get_const_instance().Phi[0].getRcut(), 1.0);
     std::chrono::high_resolution_clock::time_point start_time = std::chrono::high_resolution_clock::now();
-    hamilt::NonlocalNew<hamilt::OperatorLCAO<double, double>>
-        op(&hsk, kvec_d_in, HR, &ucell, &gd, &intor_);
+    hamilt::NonlocalNew<hamilt::OperatorLCAO<double, double>> op(&hsk, kvec_d_in, HR, &ucell, &gd, &intor_);
     std::chrono::high_resolution_clock::time_point end_time = std::chrono::high_resolution_clock::now();
     std::chrono::duration<double> elapsed_time
         = std::chrono::duration_cast<std::chrono::duration<double>>(end_time - start_time);
@@ -210,8 +209,12 @@ TEST_F(NonlocalNewTest, constructHRd2cd)
     hamilt::HS_Matrix_K<std::complex<double>> hsk(paraV);
     hsk.set_zero_hk();
     Grid_Driver gd(0, 0, 0);
-    hamilt::NonlocalNew<hamilt::OperatorLCAO<std::complex<double>, double>>
-        op(&hsk, kvec_d_in, HR, &ucell, &gd, &intor_);
+    hamilt::NonlocalNew<hamilt::OperatorLCAO<std::complex<double>, double>> op(&hsk,
+                                                                               kvec_d_in,
+                                                                               HR,
+                                                                               &ucell,
+                                                                               &gd,
+                                                                               &intor_);
     op.contributeHR();
     // check the value of HR
     for (int iap = 0; iap < HR->size_atom_pairs(); ++iap)
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_overlapnew.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_overlapnew.cpp
index 298439b074..4b2457a001 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_overlapnew.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_overlapnew.cpp
@@ -107,8 +107,7 @@ TEST_F(OverlapNewTest, constructHRd2d)
     hamilt::HS_Matrix_K<double> hsk(paraV);
     hsk.set_zero_sk();
     Grid_Driver gd(0, 0, 0);
-    hamilt::OverlapNew<hamilt::OperatorLCAO<double, double>>
-        op(&hsk, kvec_d_in, nullptr, SR, &ucell, &gd, &intor_);
+    hamilt::OverlapNew<hamilt::OperatorLCAO<double, double>> op(&hsk, kvec_d_in, nullptr, SR, &ucell, &gd, &intor_);
     op.contributeHR();
     // check the value of SR
     for (int iap = 0; iap < SR->size_atom_pairs(); ++iap)
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_sc_lambda_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_sc_lambda_lcao.cpp
index d37e94efbb..e876255ee1 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_sc_lambda_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_sc_lambda_lcao.cpp
@@ -1,12 +1,12 @@
-#include <algorithm>
-#include <random>
-
 #include "../sc_lambda_lcao.h"
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
 #include "module_cell/klist.h"
 #include "module_hamilt_lcao/module_deltaspin/spin_constrain.h"
 
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <algorithm>
+#include <random>
+
 // mockes
 K_Vectors::K_Vectors()
 {
@@ -96,12 +96,8 @@ TEST_F(ScLambdaLCAOTest, ContributeHk)
         = SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>::getScInstance();
     sc.set_ParaV(&paraV);
     EXPECT_EQ(sc.ParaV->nloc, 4);
-    std::map<int, int> atomCounts = {
-        {0, 1}
-    };
-    std::map<int, int> orbitalCounts = {
-        {0, 1}
-    };
+    std::map<int, int> atomCounts = {{0, 1}};
+    std::map<int, int> orbitalCounts = {{0, 1}};
     sc.set_atomCounts(atomCounts);
     sc.set_orbitalCounts(orbitalCounts);
     sc.set_nspin(4);
@@ -121,12 +117,10 @@ TEST_F(ScLambdaLCAOTest, ContributeHk)
                                                                                                      nullptr,
                                                                                                      isk);
     sc_lambda_op.contributeHk(0);
-    std::vector<std::complex<double>> columnMajor_h_lambda = {
-        std::complex<double>{-1.0, 0.0},
-        std::complex<double>{-1.0, 1.0},
-        std::complex<double>{-1.0, -1.0},
-        std::complex<double>{1.0,  0.0}
-    };
+    std::vector<std::complex<double>> columnMajor_h_lambda = {std::complex<double>{-1.0, 0.0},
+                                                              std::complex<double>{-1.0, 1.0},
+                                                              std::complex<double>{-1.0, -1.0},
+                                                              std::complex<double>{1.0, 0.0}};
     EXPECT_DOUBLE_EQ(hsk.get_hk()[0].real(), columnMajor_h_lambda[0].real());
     EXPECT_DOUBLE_EQ(hsk.get_hk()[0].imag(), columnMajor_h_lambda[0].imag());
     EXPECT_DOUBLE_EQ(hsk.get_hk()[1].real(), columnMajor_h_lambda[1].real());
@@ -155,12 +149,8 @@ TEST_F(ScLambdaLCAOTest, ContributeHkS2)
         = SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>::getScInstance();
     sc.set_ParaV(&paraV);
     EXPECT_EQ(sc.ParaV->nloc, 1);
-    std::map<int, int> atomCounts = {
-        {0, 1}
-    };
-    std::map<int, int> orbitalCounts = {
-        {0, 1}
-    };
+    std::map<int, int> atomCounts = {{0, 1}};
+    std::map<int, int> orbitalCounts = {{0, 1}};
     sc.set_atomCounts(atomCounts);
     sc.set_orbitalCounts(orbitalCounts);
     sc.set_nspin(2);
@@ -174,15 +164,12 @@ TEST_F(ScLambdaLCAOTest, ContributeHkS2)
     GlobalV::KS_SOLVER = "genelpa";
     EXPECT_TRUE(ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER());
     // set sc_lambda_op
-    auto sc_lambda_op
-        = hamilt::OperatorScLambda<hamilt::OperatorLCAO<std::complex<double>, double>>(&hsk,
-                                                                                    this->kvec_d,
-                                                                                    nullptr,
-                                                                                    isk);
+    auto sc_lambda_op = hamilt::OperatorScLambda<hamilt::OperatorLCAO<std::complex<double>, double>>(&hsk,
+                                                                                                     this->kvec_d,
+                                                                                                     nullptr,
+                                                                                                     isk);
     sc_lambda_op.contributeHk(0);
-    std::vector<std::complex<double>> columnMajor_h_lambda = {
-        std::complex<double>{-1.0,  0.0}
-    };
+    std::vector<std::complex<double>> columnMajor_h_lambda = {std::complex<double>{-1.0, 0.0}};
     EXPECT_DOUBLE_EQ(hsk.get_hk()[0].real(), columnMajor_h_lambda[0].real());
     EXPECT_DOUBLE_EQ(hsk.get_hk()[0].imag(), columnMajor_h_lambda[0].imag());
 }
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp
index 39e91219a0..309c878a2c 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp
@@ -1,22 +1,21 @@
 #include "veff_lcao.h"
+
 #include "module_base/timer.h"
 #include "module_base/tool_title.h"
-#include "module_hamilt_general/module_xc/xc_functional.h"
 #include "module_cell/unitcell.h"
+#include "module_hamilt_general/module_xc/xc_functional.h"
 namespace hamilt
 {
 
-
 // initialize_HR()
 template <typename TK, typename TR>
-void Veff<OperatorLCAO<TK, TR>>::initialize_HR(const UnitCell* ucell_in,
-                                        Grid_Driver* GridD)
+void Veff<OperatorLCAO<TK, TR>>::initialize_HR(const UnitCell* ucell_in, Grid_Driver* GridD)
 {
     ModuleBase::TITLE("Veff", "initialize_HR");
     ModuleBase::timer::tick("Veff", "initialize_HR");
 
     this->nspin = GlobalV::NSPIN;
-    auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
+    auto* paraV = this->hR->get_paraV(); // get parallel orbitals from HR
     // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
 
     for (int iat1 = 0; iat1 < ucell_in->nat; iat1++)
@@ -39,8 +38,8 @@ void Veff<OperatorLCAO<TK, TR>>::initialize_HR(const UnitCell* ucell_in,
             const ModuleBase::Vector3<int>& R_index2 = adjs.box[ad1];
             // choose the real adjacent atoms
             const LCAO_Orbitals& orb = LCAO_Orbitals::get_const_instance();
-            // Note: the distance of atoms should less than the cutoff radius, 
-            // When equal, the theoretical value of matrix element is zero, 
+            // Note: the distance of atoms should less than the cutoff radius,
+            // When equal, the theoretical value of matrix element is zero,
             // but the calculated value is not zero due to the numerical error, which would lead to result changes.
             if (ucell_in->cal_dtau(iat1, iat2, R_index2).norm() * ucell_in->lat0
                 < orb.Phi[T1].getRcut() + orb.Phi[T2].getRcut())
@@ -56,9 +55,7 @@ void Veff<OperatorLCAO<TK, TR>>::initialize_HR(const UnitCell* ucell_in,
     ModuleBase::timer::tick("Veff", "initialize_HR");
 }
 
-
-
-template<typename TK, typename TR>
+template <typename TK, typename TR>
 void Veff<OperatorLCAO<TK, TR>>::contributeHR()
 {
     ModuleBase::TITLE("Veff", "contributeHR");
@@ -79,7 +76,7 @@ void Veff<OperatorLCAO<TK, TR>>::contributeHR()
 
     // if you change the place of the following code,
     // rememeber to delete the #include
-    if(XC_Functional::get_func_type()==3 || XC_Functional::get_func_type()==5)
+    if (XC_Functional::get_func_type() == 3 || XC_Functional::get_func_type() == 5)
     {
         Gint_inout inout(vr_eff1, vofk_eff1, 0, Gint_Tools::job_type::vlocal_meta);
         this->GK->cal_gint(&inout);
@@ -98,12 +95,12 @@ void Veff<OperatorLCAO<TK, TR>>::contributeHR()
         for (int is = 1; is < 4; is++)
         {
             vr_eff1 = this->pot->get_effective_v(is);
-            if(XC_Functional::get_func_type()==3 || XC_Functional::get_func_type()==5)
+            if (XC_Functional::get_func_type() == 3 || XC_Functional::get_func_type() == 5)
             {
                 vofk_eff1 = this->pot->get_effective_vofk(is);
             }
-            
-            if(XC_Functional::get_func_type()==3 || XC_Functional::get_func_type()==5)
+
+            if (XC_Functional::get_func_type() == 3 || XC_Functional::get_func_type() == 5)
             {
                 Gint_inout inout(vr_eff1, vofk_eff1, is, Gint_Tools::job_type::vlocal_meta);
                 this->GK->cal_gint(&inout);
@@ -115,16 +112,17 @@ void Veff<OperatorLCAO<TK, TR>>::contributeHR()
             }
         }
     }
-    this->GK->transfer_pvpR(this->hR,this->ucell,this->gd);
+    this->GK->transfer_pvpR(this->hR, this->ucell, this->gd);
 
-    if(this->nspin == 2) this->current_spin = 1 - this->current_spin;
+    if (this->nspin == 2)
+        this->current_spin = 1 - this->current_spin;
 
     ModuleBase::timer::tick("Veff", "contributeHR");
     return;
 }
 
 // special case of gamma-only
-template<>
+template <>
 void Veff<OperatorLCAO<double, double>>::contributeHR(void)
 {
     ModuleBase::TITLE("Veff", "contributeHR");
@@ -142,29 +140,30 @@ void Veff<OperatorLCAO<double, double>>::contributeHR(void)
     // and diagonalize the H matrix (T+Vl+Vnl).
     //--------------------------------------------
 
-    if(XC_Functional::get_func_type()==3 || XC_Functional::get_func_type()==5)
+    if (XC_Functional::get_func_type() == 3 || XC_Functional::get_func_type() == 5)
     {
         Gint_inout inout(vr_eff1, vofk_eff1, Gint_Tools::job_type::vlocal_meta);
-        this->GG->cal_vlocal(&inout,  this->new_e_iteration);
+        this->GG->cal_vlocal(&inout, this->new_e_iteration);
     }
     else
     {
         Gint_inout inout(vr_eff1, Gint_Tools::job_type::vlocal);
-        this->GG->cal_vlocal(&inout,  this->new_e_iteration);
+        this->GG->cal_vlocal(&inout, this->new_e_iteration);
     }
-    this->GG->transfer_pvpR(this->hR,this->ucell);
+    this->GG->transfer_pvpR(this->hR, this->ucell);
 
     this->new_e_iteration = false;
 
-    if(this->nspin == 2) this->current_spin = 1 - this->current_spin;
+    if (this->nspin == 2)
+        this->current_spin = 1 - this->current_spin;
 
     ModuleBase::timer::tick("Veff", "contributeHR");
 }
 
-// definition of class template should in the end of file to avoid compiling warning 
+// definition of class template should in the end of file to avoid compiling warning
 template class Veff<OperatorLCAO<double, double>>;
 
 template class Veff<OperatorLCAO<std::complex<double>, double>>;
 
 template class Veff<OperatorLCAO<std::complex<double>, std::complex<double>>>;
-}
+} // namespace hamilt
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h
index 0b639696ad..aadf9c65fa 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h
@@ -1,13 +1,13 @@
 #ifndef VEFFLCAO_H
 #define VEFFLCAO_H
 #include "module_base/timer.h"
+#include "module_cell/module_neighbor/sltk_grid_driver.h"
+#include "module_cell/unitcell.h"
 #include "module_elecstate/potentials/potential_new.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h"
 #include "module_hamilt_lcao/module_gint/gint_gamma.h"
 #include "module_hamilt_lcao/module_gint/gint_k.h"
 #include "operator_lcao.h"
-#include "module_cell/module_neighbor/sltk_grid_driver.h"
-#include "module_cell/unitcell.h"
 
 namespace hamilt
 {
@@ -25,8 +25,8 @@ class Veff : public T
 /// @brief Effective potential class, used for calculating Hamiltonian with grid integration tools
 /// If user want to separate the contribution of V_{eff} into V_{H} and V_{XC} and V_{local pseudopotential} and so on,
 /// the user can separate the Potential class into different parts, and construct different Veff class for each part.
-/// @tparam TK 
-/// @tparam TR 
+/// @tparam TK
+/// @tparam TR
 template <typename TK, typename TR>
 class Veff<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 {
@@ -34,21 +34,17 @@ class Veff<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
     /**
      * @brief Construct a new Veff object for multi-kpoint calculation
      * @param GK_in: the pointer of Gint_k object, used for grid integration
-    */
+     */
     Veff<OperatorLCAO<TK, TR>>(Gint_k* GK_in,
-                          Local_Orbital_Charge* loc_in,
-                          HS_Matrix_K<TK>* hsk_in,
-                          const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
-                          elecstate::Potential* pot_in,
-                          hamilt::HContainer<TR>* hR_in,
-                          const UnitCell* ucell_in,
-                          Grid_Driver* GridD_in)
-        : GK(GK_in),
-          loc(loc_in),
-          pot(pot_in),
-          ucell(ucell_in),
-          gd(GridD_in),
-          OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in)
+                               Local_Orbital_Charge* loc_in,
+                               HS_Matrix_K<TK>* hsk_in,
+                               const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
+                               elecstate::Potential* pot_in,
+                               hamilt::HContainer<TR>* hR_in,
+                               const UnitCell* ucell_in,
+                               Grid_Driver* GridD_in)
+        : GK(GK_in), loc(loc_in), pot(pot_in), ucell(ucell_in),
+          gd(GridD_in), OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in)
     {
         this->cal_type = calculation_type::lcao_gint;
 
@@ -58,18 +54,16 @@ class Veff<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
     /**
      * @brief Construct a new Veff object for Gamma-only calculation
      * @param GG_in: the pointer of Gint_Gamma object, used for grid integration
-    */
+     */
     Veff<OperatorLCAO<TK, TR>>(Gint_Gamma* GG_in,
-                          Local_Orbital_Charge* loc_in,
-                          HS_Matrix_K<TK>* hsk_in,
-                          const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
-                          elecstate::Potential* pot_in,
-                          hamilt::HContainer<TR>* hR_in,
-                          const UnitCell* ucell_in,
-                          Grid_Driver* GridD_in
-                          )
-        : GG(GG_in), loc(loc_in), pot(pot_in),
-        OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in)
+                               Local_Orbital_Charge* loc_in,
+                               HS_Matrix_K<TK>* hsk_in,
+                               const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
+                               elecstate::Potential* pot_in,
+                               hamilt::HContainer<TR>* hR_in,
+                               const UnitCell* ucell_in,
+                               Grid_Driver* GridD_in)
+        : GG(GG_in), loc(loc_in), pot(pot_in), OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in)
     {
         this->cal_type = calculation_type::lcao_gint;
         this->initialize_HR(ucell_in, GridD_in);
@@ -82,13 +76,14 @@ class Veff<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
     /**
      * @brief contributeHR() is used to calculate the HR matrix
      * <phi_{\mu, 0}|V_{eff}|phi_{\nu, R}>
-     * the contribution of V_{eff} is calculated by the contribution of V_{H} and V_{XC} and V_{local pseudopotential} and so on.
-     * grid integration is used to calculate the contribution Hamiltonian of effective potential
+     * the contribution of V_{eff} is calculated by the contribution of V_{H} and V_{XC} and V_{local pseudopotential}
+     * and so on. grid integration is used to calculate the contribution Hamiltonian of effective potential
      */
     virtual void contributeHR() override;
-  
-  const UnitCell* ucell;
-  Grid_Driver* gd;
+
+    const UnitCell* ucell;
+    Grid_Driver* gd;
+
   private:
     // used for k-dependent grid integration.
     Gint_k* GK = nullptr;
@@ -110,7 +105,6 @@ class Veff<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
      * the size of HR will be fixed after initialization
      */
     void initialize_HR(const UnitCell* ucell_in, Grid_Driver* GridD_in);
-
 };
 
 } // namespace hamilt
diff --git a/source/module_hamilt_lcao/module_deltaspin/cal_h_lambda.cpp b/source/module_hamilt_lcao/module_deltaspin/cal_h_lambda.cpp
index be60643477..9330cdd6ea 100644
--- a/source/module_hamilt_lcao/module_deltaspin/cal_h_lambda.cpp
+++ b/source/module_hamilt_lcao/module_deltaspin/cal_h_lambda.cpp
@@ -1,20 +1,20 @@
-#include "spin_constrain.h"
+#include "module_base/global_function.h"
 #include "module_base/timer.h"
 #include "module_base/tool_title.h"
-#include "module_base/global_function.h"
+#include "spin_constrain.h"
+
 #include <algorithm>
 
 template <>
-void SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>::cal_h_lambda(
-    std::complex<double>* h_lambda,
-    const std::complex<double>* Sloc2,
-    bool column_major,
-    int isk)
+void SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>::cal_h_lambda(std::complex<double>* h_lambda,
+                                                                                const std::complex<double>* Sloc2,
+                                                                                bool column_major,
+                                                                                int isk)
 {
-    ModuleBase::TITLE("SpinConstrain","cal_h_lambda");
+    ModuleBase::TITLE("SpinConstrain", "cal_h_lambda");
     ModuleBase::timer::tick("SpinConstrain", "cal_h_lambda");
     const Parallel_Orbitals* pv = this->ParaV;
-    for (const auto& sc_elem1 : this->get_atomCounts())
+    for (const auto& sc_elem1: this->get_atomCounts())
     {
         int it1 = sc_elem1.first;
         int nat_it1 = sc_elem1.second;
@@ -22,12 +22,13 @@ void SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>::cal_h_lambda(
         for (int ia1 = 0; ia1 < nat_it1; ia1++)
         {
             int iat1 = this->get_iat(it1, ia1);
-            for (int iw1 = 0; iw1 < nw_it1*this->npol_; iw1++)
+            for (int iw1 = 0; iw1 < nw_it1 * this->npol_; iw1++)
             {
                 int iwt1 = this->get_iwt(it1, ia1, iw1);
                 const int mu = pv->global2local_row(iwt1);
-                if (mu < 0) continue;
-                for (const auto& sc_elem2 : this->get_atomCounts())
+                if (mu < 0)
+                    continue;
+                for (const auto& sc_elem2: this->get_atomCounts())
                 {
                     int it2 = sc_elem2.first;
                     int nat_it2 = sc_elem2.second;
@@ -35,11 +36,12 @@ void SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>::cal_h_lambda(
                     for (int ia2 = 0; ia2 < nat_it2; ia2++)
                     {
                         int iat2 = this->get_iat(it2, ia2);
-                        for (int iw2 = 0; iw2 < nw_it2*this->npol_; iw2++)
+                        for (int iw2 = 0; iw2 < nw_it2 * this->npol_; iw2++)
                         {
                             int iwt2 = this->get_iwt(it2, ia2, iw2);
                             const int nu = pv->global2local_col(iwt2);
-                            if (nu < 0) continue;
+                            if (nu < 0)
+                                continue;
                             int icc;
                             ModuleBase::Vector3<double> lambda = (this->lambda_[iat1] + this->lambda_[iat2]) / 2.0;
                             if (column_major)
diff --git a/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp b/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp
index fb88b8eb57..262a967219 100644
--- a/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp
+++ b/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp
@@ -1,5 +1,3 @@
-#include <iostream>
-
 #include "module_base/matrix.h"
 #include "module_base/name_angular.h"
 #include "module_base/scalapack_connector.h"
@@ -8,6 +6,8 @@
 #include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h"
 #include "spin_constrain.h"
 
+#include <iostream>
+
 template <>
 ModuleBase::matrix SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>::cal_MW_k(
     LCAO_Matrix* LM,
@@ -17,12 +17,13 @@ ModuleBase::matrix SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>:
     int nw = this->get_nw();
     const int nlocal = (this->nspin_ == 4) ? nw / 2 : nw;
     ModuleBase::matrix MecMulP(this->nspin_, nlocal, true), orbMulP(this->nspin_, nlocal, true);
-    for(size_t ik = 0; ik != this->kv_.get_nks(); ++ik)
+    for (size_t ik = 0; ik != this->kv_.get_nks(); ++ik)
     {
-        std::complex<double> *sk = nullptr;
+        std::complex<double>* sk = nullptr;
         if (this->nspin_ == 4)
         {
-            dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(this->p_hamilt)->updateSk(ik, 1);
+            dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(this->p_hamilt)
+                ->updateSk(ik, 1);
             sk = dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(this->p_hamilt)->getSk();
         }
         else
@@ -35,7 +36,7 @@ ModuleBase::matrix SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>:
         const char T_char = 'T';
         const char N_char = 'N';
         const int one_int = 1;
-        const std::complex<double> one_float = {1.0, 0.0}, zero_float = {0.0, 0.0};        
+        const std::complex<double> one_float = {1.0, 0.0}, zero_float = {0.0, 0.0};
         pzgemm_(&N_char,
                 &T_char,
                 &nw,
@@ -59,8 +60,8 @@ ModuleBase::matrix SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>:
 #endif
     }
 #ifdef __MPI
-    MPI_Allreduce(MecMulP.c, orbMulP.c, this->nspin_*nlocal, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
-#endif 
+    MPI_Allreduce(MecMulP.c, orbMulP.c, this->nspin_ * nlocal, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+#endif
 
     return orbMulP;
 }
diff --git a/source/module_hamilt_lcao/module_deltaspin/spin_constrain.h b/source/module_hamilt_lcao/module_deltaspin/spin_constrain.h
index 24151fb833..f2a89a7f86 100644
--- a/source/module_hamilt_lcao/module_deltaspin/spin_constrain.h
+++ b/source/module_hamilt_lcao/module_deltaspin/spin_constrain.h
@@ -1,9 +1,6 @@
 #ifndef SPIN_CONSTRAIN_H
 #define SPIN_CONSTRAIN_H
 
-#include <map>
-#include <vector>
-
 #include "module_base/constants.h"
 #include "module_base/tool_quit.h"
 #include "module_base/tool_title.h"
@@ -14,90 +11,93 @@
 #include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
 #include "module_hsolver/hsolver.h"
 
+#include <map>
+#include <vector>
+
 struct ScAtomData;
 
 template <typename FPTYPE, typename Device = base_device::DEVICE_CPU>
 class SpinConstrain
 {
-public:
+  public:
     /**
      * pubic interface for spin-constrained DFT
-    */
+     */
     /// initialize spin-constrained DFT
-  void init_sc(double sc_thr_in,
-               int nsc_in,
-               int nsc_min_in,
-               double alpha_trial_in,
-               double sccut_in,
-               bool decay_grad_switch_in,
-               const UnitCell& ucell,
-               std::string sc_file,
-               int NPOL,
-               Parallel_Orbitals* ParaV_in,
-               int nspin_in,
-               K_Vectors kv_in,
-               std::string KS_SOLVER_in,
-               LCAO_Matrix* LM_in,
-               hsolver::HSolver<FPTYPE, Device>* phsol_in,
-               hamilt::Hamilt<FPTYPE, Device>* p_hamilt_in,
-               psi::Psi<FPTYPE>* psi_in,
-               elecstate::ElecState* pelec_in);
+    void init_sc(double sc_thr_in,
+                 int nsc_in,
+                 int nsc_min_in,
+                 double alpha_trial_in,
+                 double sccut_in,
+                 bool decay_grad_switch_in,
+                 const UnitCell& ucell,
+                 std::string sc_file,
+                 int NPOL,
+                 Parallel_Orbitals* ParaV_in,
+                 int nspin_in,
+                 K_Vectors kv_in,
+                 std::string KS_SOLVER_in,
+                 LCAO_Matrix* LM_in,
+                 hsolver::HSolver<FPTYPE, Device>* phsol_in,
+                 hamilt::Hamilt<FPTYPE, Device>* p_hamilt_in,
+                 psi::Psi<FPTYPE>* psi_in,
+                 elecstate::ElecState* pelec_in);
 
-  /// calculate h_lambda operator for spin-constrained DFT
-  void cal_h_lambda(std::complex<double>* h_lambda, const std::complex<double>* Sloc2, bool column_major, int isk);
+    /// calculate h_lambda operator for spin-constrained DFT
+    void cal_h_lambda(std::complex<double>* h_lambda, const std::complex<double>* Sloc2, bool column_major, int isk);
 
-  void cal_MW(const int& step, LCAO_Matrix* LM, bool print = false);
+    void cal_MW(const int& step, LCAO_Matrix* LM, bool print = false);
 
-  ModuleBase::matrix cal_MW_k(LCAO_Matrix* LM, const std::vector<std::vector<std::complex<double>>>& dm);
+    ModuleBase::matrix cal_MW_k(LCAO_Matrix* LM, const std::vector<std::vector<std::complex<double>>>& dm);
 
-  void cal_mw_from_lambda(int i_step);
+    void cal_mw_from_lambda(int i_step);
 
-  double cal_escon();
+    double cal_escon();
 
-  double get_escon();
+    double get_escon();
 
-  std::vector<std::vector<std::vector<double>>> convert(const ModuleBase::matrix& orbMulP);
+    std::vector<std::vector<std::vector<double>>> convert(const ModuleBase::matrix& orbMulP);
 
-  void run_lambda_loop(int outer_step);
+    void run_lambda_loop(int outer_step);
 
-  /// lambda loop helper functions
-  bool check_rms_stop(int outer_step, int i_step, double rms_error, double duration, double total_duration);
+    /// lambda loop helper functions
+    bool check_rms_stop(int outer_step, int i_step, double rms_error, double duration, double total_duration);
 
-  /// apply restriction
-  void check_restriction(const std::vector<ModuleBase::Vector3<double>>& search, double& alpha_trial);
+    /// apply restriction
+    void check_restriction(const std::vector<ModuleBase::Vector3<double>>& search, double& alpha_trial);
 
-  /// check gradient decay
-  bool check_gradient_decay(std::vector<ModuleBase::Vector3<double>> new_spin,
-                            std::vector<ModuleBase::Vector3<double>> old_spin,
-                            std::vector<ModuleBase::Vector3<double>> new_delta_lambda,
-                            std::vector<ModuleBase::Vector3<double>> old_delta_lambda,
-                            bool print = false);
-  /// @brief  calculate alpha_opt
-  double cal_alpha_opt(std::vector<ModuleBase::Vector3<double>> spin,
-                       std::vector<ModuleBase::Vector3<double>> spin_plus,
-                       const double alpha_trial);
-  /// print header info
-  void print_header();
-  /// print termination message
-  void print_termination();
+    /// check gradient decay
+    bool check_gradient_decay(std::vector<ModuleBase::Vector3<double>> new_spin,
+                              std::vector<ModuleBase::Vector3<double>> old_spin,
+                              std::vector<ModuleBase::Vector3<double>> new_delta_lambda,
+                              std::vector<ModuleBase::Vector3<double>> old_delta_lambda,
+                              bool print = false);
+    /// @brief  calculate alpha_opt
+    double cal_alpha_opt(std::vector<ModuleBase::Vector3<double>> spin,
+                         std::vector<ModuleBase::Vector3<double>> spin_plus,
+                         const double alpha_trial);
+    /// print header info
+    void print_header();
+    /// print termination message
+    void print_termination();
 
-  /// calculate mw from AorbMulP matrix
-  void calculate_MW(const std::vector<std::vector<std::vector<double>>>& AorbMulP);
+    /// calculate mw from AorbMulP matrix
+    void calculate_MW(const std::vector<std::vector<std::vector<double>>>& AorbMulP);
 
-  /// print mi
-  void print_Mi(bool print = false);
+    /// print mi
+    void print_Mi(bool print = false);
 
-  /// print magnetic force, defined as \frac{\delta{L}}/{\delta{Mi}} = -lambda[iat])
-  void print_Mag_Force();
+    /// print magnetic force, defined as \frac{\delta{L}}/{\delta{Mi}} = -lambda[iat])
+    void print_Mag_Force();
 
-  /// collect_mw from matrix multiplication result
-  void collect_MW(ModuleBase::matrix& MecMulP, const ModuleBase::ComplexMatrix& mud, int nw, int isk);
+    /// collect_mw from matrix multiplication result
+    void collect_MW(ModuleBase::matrix& MecMulP, const ModuleBase::ComplexMatrix& mud, int nw, int isk);
 
-public:
+  public:
     /**
      * important outter class pointers used in spin-constrained DFT
-    */
-    Parallel_Orbitals *ParaV = nullptr;
+     */
+    Parallel_Orbitals* ParaV = nullptr;
     hsolver::HSolver<FPTYPE, Device>* phsol = nullptr;
     hamilt::Hamilt<FPTYPE, Device>* p_hamilt = nullptr;
     psi::Psi<FPTYPE>* psi = nullptr;
@@ -110,7 +110,7 @@ class SpinConstrain
   public:
     /**
      * pubic methods for setting and getting spin-constrained DFT parameters
-    */
+     */
     /// Public method to access the Singleton instance
     static SpinConstrain& getScInstance();
     /// Delete copy and move constructors and assign operators
@@ -215,10 +215,10 @@ class SpinConstrain
     void bcast_ScData(std::string sc_file, int nat, int ntype);
 
   private:
-    SpinConstrain(){};                               // Private constructor
-    ~SpinConstrain(){};                              // Destructor
-    SpinConstrain& operator=(SpinConstrain const&) = delete;  // Copy assign
-    SpinConstrain& operator=(SpinConstrain &&) = delete;      // Move assign
+    SpinConstrain(){};                                       // Private constructor
+    ~SpinConstrain(){};                                      // Destructor
+    SpinConstrain& operator=(SpinConstrain const&) = delete; // Copy assign
+    SpinConstrain& operator=(SpinConstrain&&) = delete;      // Move assign
     std::map<int, std::vector<ScAtomData>> ScData;
     std::map<int, double> ScDecayGrad; // in unit of uB^2/eV
     std::vector<double> decay_grad_;   // in unit of uB^2/Ry
@@ -227,7 +227,7 @@ class SpinConstrain
     std::map<int, std::map<int, int>> lnchiCounts;
     std::vector<ModuleBase::Vector3<double>> lambda_; // in unit of Ry/uB in code, but in unit of meV/uB in input file
     std::vector<ModuleBase::Vector3<double>> target_mag_; // in unit of uB
-    std::vector<ModuleBase::Vector3<double>> Mi_; // in unit of uB
+    std::vector<ModuleBase::Vector3<double>> Mi_;         // in unit of uB
     double escon_ = 0.0;
     int nspin_ = 0;
     int npol_ = 1;
@@ -240,15 +240,15 @@ class SpinConstrain
     double sc_thr_; // in unit of uB
     std::vector<ModuleBase::Vector3<int>> constrain_;
     bool debug = false;
-    double alpha_trial_; // in unit of Ry/uB^2 = 0.01 eV/uB^2
+    double alpha_trial_;      // in unit of Ry/uB^2 = 0.01 eV/uB^2
     double restrict_current_; // in unit of Ry/uB = 3 eV/uB
 };
 
-
 /**
  * @brief struct for storing parameters of non-collinear spin-constrained DFT
  */
-struct ScAtomData {
+struct ScAtomData
+{
     int index;
     std::vector<double> lambda;
     std::vector<double> target_mag;
diff --git a/source/module_hamilt_lcao/module_deltaspin/test/cal_h_lambda_test.cpp b/source/module_hamilt_lcao/module_deltaspin/test/cal_h_lambda_test.cpp
index f3312716b7..8a340dc10f 100644
--- a/source/module_hamilt_lcao/module_deltaspin/test/cal_h_lambda_test.cpp
+++ b/source/module_hamilt_lcao/module_deltaspin/test/cal_h_lambda_test.cpp
@@ -1,10 +1,14 @@
 #include "../spin_constrain.h"
+#include "module_cell/klist.h"
+
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
-
-#include "module_cell/klist.h"
-K_Vectors::K_Vectors(){}
-K_Vectors::~K_Vectors(){}
+K_Vectors::K_Vectors()
+{
+}
+K_Vectors::~K_Vectors()
+{
+}
 
 /************************************************
  *  unit test of the function of cal_h_lambda
@@ -33,23 +37,17 @@ TEST_F(SpinConstrainTest, CalHLambda)
     paraV.set_serial(nrow, ncol);
     sc.set_ParaV(&paraV);
     EXPECT_EQ(sc.ParaV->nloc, 4);
-    std::map<int, int> atomCounts = {
-        {0, 1}
-    };
-    std::map<int, int> orbitalCounts = {
-        {0, 1}
-    };
+    std::map<int, int> atomCounts = {{0, 1}};
+    std::map<int, int> orbitalCounts = {{0, 1}};
     sc.set_atomCounts(atomCounts);
     sc.set_orbitalCounts(orbitalCounts);
     sc.set_npol(2);
     std::vector<std::complex<double>> h_lambda(sc.ParaV->nloc);
     std::fill(h_lambda.begin(), h_lambda.end(), std::complex<double>(0, 0));
-    std::vector<std::complex<double>> Sloc2 = {
-        std::complex<double>{1.0, 0.0},
-        std::complex<double>{0.0, 0.0},
-        std::complex<double>{0.0, 0.0},
-        std::complex<double>{1.0, 0.0}
-    };
+    std::vector<std::complex<double>> Sloc2 = {std::complex<double>{1.0, 0.0},
+                                               std::complex<double>{0.0, 0.0},
+                                               std::complex<double>{0.0, 0.0},
+                                               std::complex<double>{1.0, 0.0}};
     ModuleBase::Vector3<double>* sc_lambda = new ModuleBase::Vector3<double>[1];
     sc_lambda[0][0] = 1.0;
     sc_lambda[0][1] = 1.0;
@@ -58,12 +56,10 @@ TEST_F(SpinConstrainTest, CalHLambda)
     // column_major = true
     sc.cal_h_lambda(&h_lambda[0], Sloc2.data(), true, 0);
     // h_lambda = - [lambda_x * sigma_x + lambda_y * sigma_y + lambda_z * sigma_z] * Sloc2
-    std::vector<std::complex<double>> columnMajor_h_lambda = {
-        std::complex<double>{-1.0, 0.0 },
-        std::complex<double>{-1.0, 1.0 },
-        std::complex<double>{-1.0, -1.0},
-        std::complex<double>{1.0,  0.0 }
-    };
+    std::vector<std::complex<double>> columnMajor_h_lambda = {std::complex<double>{-1.0, 0.0},
+                                                              std::complex<double>{-1.0, 1.0},
+                                                              std::complex<double>{-1.0, -1.0},
+                                                              std::complex<double>{1.0, 0.0}};
     EXPECT_DOUBLE_EQ(h_lambda[0].real(), columnMajor_h_lambda[0].real());
     EXPECT_DOUBLE_EQ(h_lambda[0].imag(), columnMajor_h_lambda[0].imag());
     EXPECT_DOUBLE_EQ(h_lambda[1].real(), columnMajor_h_lambda[1].real());
@@ -75,12 +71,10 @@ TEST_F(SpinConstrainTest, CalHLambda)
     // column_major = false
     delete[] sc_lambda;
     sc.cal_h_lambda(&h_lambda[0], Sloc2.data(), false, 0);
-    std::vector<std::complex<double>> rowMajor_h_lambda = {
-        std::complex<double>{-1.0, 0.0 },
-        std::complex<double>{-1.0, -1.0},
-        std::complex<double>{-1.0, 1.0 },
-        std::complex<double>{1.0,  0.0 }
-    };
+    std::vector<std::complex<double>> rowMajor_h_lambda = {std::complex<double>{-1.0, 0.0},
+                                                           std::complex<double>{-1.0, -1.0},
+                                                           std::complex<double>{-1.0, 1.0},
+                                                           std::complex<double>{1.0, 0.0}};
     EXPECT_DOUBLE_EQ(h_lambda[0].real(), rowMajor_h_lambda[0].real());
     EXPECT_DOUBLE_EQ(h_lambda[0].imag(), rowMajor_h_lambda[0].imag());
     EXPECT_DOUBLE_EQ(h_lambda[1].real(), rowMajor_h_lambda[1].real());
@@ -102,20 +96,14 @@ TEST_F(SpinConstrainTest, CalHLambdaS2)
     paraV.set_serial(nrow, ncol);
     sc.set_ParaV(&paraV);
     EXPECT_EQ(sc.ParaV->nloc, 1);
-    std::map<int, int> atomCounts = {
-        {0, 1}
-    };
-    std::map<int, int> orbitalCounts = {
-        {0, 1}
-    };
+    std::map<int, int> atomCounts = {{0, 1}};
+    std::map<int, int> orbitalCounts = {{0, 1}};
     sc.set_atomCounts(atomCounts);
     sc.set_orbitalCounts(orbitalCounts);
     sc.set_npol(1);
     std::vector<std::complex<double>> h_lambda(sc.ParaV->nloc);
     std::fill(h_lambda.begin(), h_lambda.end(), std::complex<double>(0, 0));
-    std::vector<std::complex<double>> Sloc2 = {
-        std::complex<double>{1.0, 0.0}
-    };
+    std::vector<std::complex<double>> Sloc2 = {std::complex<double>{1.0, 0.0}};
     ModuleBase::Vector3<double>* sc_lambda = new ModuleBase::Vector3<double>[1];
     sc_lambda[0][0] = 0.0;
     sc_lambda[0][1] = 0.0;
@@ -125,14 +113,14 @@ TEST_F(SpinConstrainTest, CalHLambdaS2)
     sc.cal_h_lambda(&h_lambda[0], Sloc2.data(), true, 0);
     // h_lambda = -Sloc2[icc] * this->lambda_[iat2][2]
     std::vector<std::complex<double>> columnMajor_h_lambda = {
-        std::complex<double>{-2.0, 0.0 },
+        std::complex<double>{-2.0, 0.0},
     };
     EXPECT_DOUBLE_EQ(h_lambda[0].real(), columnMajor_h_lambda[0].real());
     EXPECT_DOUBLE_EQ(h_lambda[0].imag(), columnMajor_h_lambda[0].imag());
     sc.cal_h_lambda(&h_lambda[0], Sloc2.data(), true, 1);
     // h_lambda = -Sloc2[icc] * (-this->lambda_[iat2][2])
     std::vector<std::complex<double>> columnMajor_h_lambda1 = {
-        std::complex<double>{2.0, 0.0 },
+        std::complex<double>{2.0, 0.0},
     };
     EXPECT_DOUBLE_EQ(h_lambda[0].real(), columnMajor_h_lambda1[0].real());
     EXPECT_DOUBLE_EQ(h_lambda[0].imag(), columnMajor_h_lambda1[0].imag());
@@ -141,14 +129,14 @@ TEST_F(SpinConstrainTest, CalHLambdaS2)
     // h_lambda = -Sloc2[icc] * this->lambda_[iat2][2]
     sc.cal_h_lambda(&h_lambda[0], Sloc2.data(), false, 0);
     std::vector<std::complex<double>> rowMajor_h_lambda = {
-        std::complex<double>{-2.0, 0.0 },
+        std::complex<double>{-2.0, 0.0},
     };
     EXPECT_DOUBLE_EQ(h_lambda[0].real(), rowMajor_h_lambda[0].real());
     EXPECT_DOUBLE_EQ(h_lambda[0].imag(), rowMajor_h_lambda[0].imag());
     // h_lambda = -Sloc2[icc] * (-this->lambda_[iat2][2])
     sc.cal_h_lambda(&h_lambda[0], Sloc2.data(), false, 1);
     std::vector<std::complex<double>> rowMajor_h_lambda1 = {
-        std::complex<double>{2.0, 0.0 },
+        std::complex<double>{2.0, 0.0},
     };
     EXPECT_DOUBLE_EQ(h_lambda[0].real(), rowMajor_h_lambda1[0].real());
     EXPECT_DOUBLE_EQ(h_lambda[0].imag(), rowMajor_h_lambda1[0].imag());
diff --git a/source/module_hamilt_lcao/module_deltaspin/test/template_helpers_test.cpp b/source/module_hamilt_lcao/module_deltaspin/test/template_helpers_test.cpp
index 993a65184a..1d6f409cb4 100644
--- a/source/module_hamilt_lcao/module_deltaspin/test/template_helpers_test.cpp
+++ b/source/module_hamilt_lcao/module_deltaspin/test/template_helpers_test.cpp
@@ -1,9 +1,9 @@
-#include <algorithm>
-#include <string>
-
 #include "../spin_constrain.h"
+
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
+#include <algorithm>
+#include <string>
 
 /************************************************
  *  unit test of functions in template_helpers.cpp
diff --git a/source/module_hamilt_lcao/module_dftu/dftu.h b/source/module_hamilt_lcao/module_dftu/dftu.h
index db86ab26ad..376ce30714 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu.h
+++ b/source/module_hamilt_lcao/module_dftu/dftu.h
@@ -5,16 +5,16 @@
 #ifndef DFTU_H
 #define DFTU_H
 
+#include "module_basis/module_ao/parallel_orbitals.h"
 #include "module_cell/klist.h"
 #include "module_cell/unitcell.h"
-#include "module_basis/module_ao/parallel_orbitals.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
-#include "module_elecstate/module_charge/charge_mixing.h"
-#include "module_hamilt_general/hamilt.h"
 #include "module_elecstate/elecstate.h"
-#include "module_hamilt_lcao/module_hcontainer/hcontainer.h"
+#include "module_elecstate/module_charge/charge_mixing.h"
 #include "module_elecstate/module_dm/density_matrix.h"
+#include "module_hamilt_general/hamilt.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/force_stress_arrays.h" // mohan add 2024-06-15
+#include "module_hamilt_lcao/module_hcontainer/hcontainer.h"
 
 #include <string>
 
@@ -29,7 +29,7 @@ class DFTU
 {
 
   public:
-    DFTU(); // constructor
+    DFTU();  // constructor
     ~DFTU(); // deconstructor
 
     //=============================================================
@@ -44,22 +44,25 @@ class DFTU
 
     // calculate the energy correction
     void cal_energy_correction(const int istep);
-    double get_energy(){return EU;}
+    double get_energy()
+    {
+        return EU;
+    }
     void uramping_update(); // update U by uramping
-    bool u_converged(); // check if U is converged
+    bool u_converged();     // check if U is converged
 
-    double* U; // U (Hubbard parameter U)
+    double* U;              // U (Hubbard parameter U)
     std::vector<double> U0; // U0 (target Hubbard parameter U0)
-    int* orbital_corr; //
-    double uramping; // increase U by uramping, default is -1.0
-    int omc; // occupation matrix control
-    int mixing_dftu; //whether to mix locale
+    int* orbital_corr;      //
+    double uramping;        // increase U by uramping, default is -1.0
+    int omc;                // occupation matrix control
+    int mixing_dftu;        // whether to mix locale
 
     double EU; //+U energy
   private:
     LCAO_Matrix* LM;
     int cal_type = 3; // 1:dftu_tpye=1, dc=1; 2:dftu_type=1, dc=2; 3:dftu_tpye=2, dc=1; 4:dftu_tpye=2, dc=2;
-    
+
     // transform between iwt index and it, ia, L, N and m index
     std::vector<std::vector<std::vector<std::vector<std::vector<int>>>>>
         iatlnmipol2iwt; // iatlnm2iwt[iat][l][n][m][ipol]
@@ -69,7 +72,10 @@ class DFTU
     // For calculating contribution to Hamiltonian matrices
     //=============================================================
   public:
-    void cal_eff_pot_mat_complex(const int ik, std::complex<double>* eff_pot, const std::vector<int>& isk, const std::complex<double>* sk);
+    void cal_eff_pot_mat_complex(const int ik,
+                                 std::complex<double>* eff_pot,
+                                 const std::vector<int>& isk,
+                                 const std::complex<double>* sk);
     void cal_eff_pot_mat_real(const int ik, double* eff_pot, const std::vector<int>& isk, const double* sk);
     void cal_eff_pot_mat_R_double(const int ispin, double* SR, double* HR);
     void cal_eff_pot_mat_R_complex_double(const int ispin, std::complex<double>* SR, std::complex<double>* HR);
@@ -81,8 +87,15 @@ class DFTU
     //=============================================================
   public:
     // calculate the local occupation number matrix
-    void cal_occup_m_k(const int iter, const std::vector<std::vector<std::complex<double>>>& dm_k, const K_Vectors& kv, const double& mixing_beta, hamilt::Hamilt<std::complex<double>>* p_ham);
-    void cal_occup_m_gamma(const int iter, const std::vector<std::vector<double>>& dm_gamma, const double& mixing_beta, hamilt::Hamilt<double>* p_ham);
+    void cal_occup_m_k(const int iter,
+                       const std::vector<std::vector<std::complex<double>>>& dm_k,
+                       const K_Vectors& kv,
+                       const double& mixing_beta,
+                       hamilt::Hamilt<std::complex<double>>* p_ham);
+    void cal_occup_m_gamma(const int iter,
+                           const std::vector<std::vector<double>>& dm_gamma,
+                           const double& mixing_beta,
+                           hamilt::Hamilt<double>* p_ham);
 
     // dftu can be calculated only after locale has been initialed
     bool initialed_locale = false;
@@ -92,13 +105,14 @@ class DFTU
     void zero_locale();
     void mix_locale(const double& mixing_beta);
 
-public:
+  public:
     // local occupancy matrix of the correlated subspace
     // locale: the out put local occupation number matrix of correlated electrons in the current electronic step
     // locale_save: the input local occupation number matrix of correlated electrons in the current electronic step
     std::vector<std::vector<std::vector<std::vector<ModuleBase::matrix>>>> locale; // locale[iat][l][n][spin](m1,m2)
-    std::vector<std::vector<std::vector<std::vector<ModuleBase::matrix>>>> locale_save; // locale_save[iat][l][n][spin](m1,m2)
-private:
+    std::vector<std::vector<std::vector<std::vector<ModuleBase::matrix>>>>
+        locale_save; // locale_save[iat][l][n][spin](m1,m2)
+  private:
     //=============================================================
     // In dftu_tools.cpp
     // For calculating onsite potential, which is used
@@ -122,89 +136,80 @@ class DFTU
     // Subroutines for folding S and dS matrix
     //=============================================================
 
-	void fold_dSR_gamma(
-			const UnitCell &ucell,
-			const Parallel_Orbitals &pv,
-			Grid_Driver* gd,
-			double* dsloc_x,
-			double* dsloc_y,
-			double* dsloc_z,
-			double* dh_r,
-			const int dim1, 
-			const int dim2, 
-			double* dSR_gamma);
+    void fold_dSR_gamma(const UnitCell& ucell,
+                        const Parallel_Orbitals& pv,
+                        Grid_Driver* gd,
+                        double* dsloc_x,
+                        double* dsloc_y,
+                        double* dsloc_z,
+                        double* dh_r,
+                        const int dim1,
+                        const int dim2,
+                        double* dSR_gamma);
 
     // dim = 0 : S, for Hamiltonian
     // dim = 1-3 : dS, for force
     // dim = 4-6 : dS * dR, for stress
 
-    void folding_matrix_k(
-        ForceStressArrays &fsr,
-        const Parallel_Orbitals &pv,
-		const int ik, 
-		const int dim1, 
-		const int dim2, 
-		std::complex<double>* mat_k, 
-		const std::vector<ModuleBase::Vector3<double>> &kvec_d);
-
+    void folding_matrix_k(ForceStressArrays& fsr,
+                          const Parallel_Orbitals& pv,
+                          const int ik,
+                          const int dim1,
+                          const int dim2,
+                          std::complex<double>* mat_k,
+                          const std::vector<ModuleBase::Vector3<double>>& kvec_d);
 
     /**
      * @brief new function of folding_S_matrix
      * only for Hamiltonian now, for force and stress will be developed later
      * use HContainer as input and output in mat_k
-    */
-   void folding_matrix_k_new(const int ik, hamilt::Hamilt<std::complex<double>>* p_ham);
+     */
+    void folding_matrix_k_new(const int ik, hamilt::Hamilt<std::complex<double>>* p_ham);
 
     //=============================================================
     // In dftu_force.cpp
     // For calculating force and stress fomr DFT+U
     //=============================================================
   public:
-
-   void force_stress(const elecstate::ElecState* pelec,
-		   LCAO_Matrix& lm,
-		   const Parallel_Orbitals& pv,
-		   ForceStressArrays& fsr,
-		   ModuleBase::matrix& force_dftu,
-		   ModuleBase::matrix& stress_dftu,
-		   const K_Vectors& kv);
+    void force_stress(const elecstate::ElecState* pelec,
+                      LCAO_Matrix& lm,
+                      const Parallel_Orbitals& pv,
+                      ForceStressArrays& fsr,
+                      ModuleBase::matrix& force_dftu,
+                      ModuleBase::matrix& stress_dftu,
+                      const K_Vectors& kv);
 
   private:
-
-   void cal_force_k(
-		   ForceStressArrays &fsr,
-		   const Parallel_Orbitals &pv,
-		   const int ik,
-		   const std::complex<double>* rho_VU,
-		   ModuleBase::matrix& force_dftu,
-		   const std::vector<ModuleBase::Vector3<double>>& kvec_d);
-
-    void cal_stress_k(
-			ForceStressArrays &fsr,
-			const Parallel_Orbitals &pv,
-			const int ik,
-			const std::complex<double>* rho_VU,
-			ModuleBase::matrix& stress_dftu,
-			const std::vector<ModuleBase::Vector3<double>>& kvec_d);
-
-	void cal_force_gamma(
-			const double* rho_VU, 
-			const Parallel_Orbitals &pv,
-			double* dsloc_x,
-			double* dsloc_y,
-			double* dsloc_z,
-			ModuleBase::matrix& force_dftu);
-
-	void cal_stress_gamma(
-			const UnitCell &ucell,
-			const Parallel_Orbitals &pv,
-			Grid_Driver* gd,
-			double* dsloc_x,
-			double* dsloc_y,
-			double* dsloc_z,
-			double* dh_r,
-			const double* rho_VU, 
-			ModuleBase::matrix& stress_dftu);
+    void cal_force_k(ForceStressArrays& fsr,
+                     const Parallel_Orbitals& pv,
+                     const int ik,
+                     const std::complex<double>* rho_VU,
+                     ModuleBase::matrix& force_dftu,
+                     const std::vector<ModuleBase::Vector3<double>>& kvec_d);
+
+    void cal_stress_k(ForceStressArrays& fsr,
+                      const Parallel_Orbitals& pv,
+                      const int ik,
+                      const std::complex<double>* rho_VU,
+                      ModuleBase::matrix& stress_dftu,
+                      const std::vector<ModuleBase::Vector3<double>>& kvec_d);
+
+    void cal_force_gamma(const double* rho_VU,
+                         const Parallel_Orbitals& pv,
+                         double* dsloc_x,
+                         double* dsloc_y,
+                         double* dsloc_z,
+                         ModuleBase::matrix& force_dftu);
+
+    void cal_stress_gamma(const UnitCell& ucell,
+                          const Parallel_Orbitals& pv,
+                          Grid_Driver* gd,
+                          double* dsloc_x,
+                          double* dsloc_y,
+                          double* dsloc_z,
+                          double* dh_r,
+                          const double* rho_VU,
+                          ModuleBase::matrix& stress_dftu);
 
     //=============================================================
     // In dftu_io.cpp
@@ -214,7 +219,7 @@ class DFTU
     void output();
 
   private:
-    void write_occup_m(std::ofstream& ofs, bool diag=false);
+    void write_occup_m(std::ofstream& ofs, bool diag = false);
     void read_occup_m(const std::string& fn);
     void local_occup_bcast();
 
@@ -228,10 +233,10 @@ class DFTU
     void cal_slater_UJ(double** rho, const int& nrxx);
 
   private:
-    double lambda; // the parameter in Yukawa potential
+    double lambda;                                                 // the parameter in Yukawa potential
     std::vector<std::vector<std::vector<std::vector<double>>>> Fk; // slater integral:Fk[T][L][N][k]
-    std::vector<std::vector<std::vector<double>>> U_Yukawa; // U_Yukawa[T][L][N]
-    std::vector<std::vector<std::vector<double>>> J_Yukawa; // J_Yukawa[T][L][N]
+    std::vector<std::vector<std::vector<double>>> U_Yukawa;        // U_Yukawa[T][L][N]
+    std::vector<std::vector<std::vector<double>>> J_Yukawa;        // J_Yukawa[T][L][N]
 
     void cal_slater_Fk(const int L, const int T); // L:angular momnet, T:atom type
     void cal_yukawa_lambda(double** rho, const int& nrxx);
@@ -244,15 +249,15 @@ class DFTU
      * @brief get the density matrix of target spin
      * nspin = 1 and 4 : ispin should be 0
      * nspin = 2 : ispin should be 0/1
-    */
+     */
     const hamilt::HContainer<double>* get_dmr(int ispin) const;
     /**
      * @brief set the density matrix for DFT+U calculation
      * if the density matrix is not set or set to nullptr, the DFT+U calculation will not be performed
-    */
+     */
     void set_dmr(const elecstate::DensityMatrix<double, double>* dm_in_dftu_d);
     void set_dmr(const elecstate::DensityMatrix<std::complex<double>, double>* dm_in_dftu_cd);
-  
+
   private:
     const elecstate::DensityMatrix<double, double>* dm_in_dftu_d = nullptr;
     const elecstate::DensityMatrix<std::complex<double>, double>* dm_in_dftu_cd = nullptr;
@@ -262,6 +267,6 @@ class DFTU
 
 namespace GlobalC
 {
-	extern ModuleDFTU::DFTU dftu;
+extern ModuleDFTU::DFTU dftu;
 }
 #endif
diff --git a/source/module_hamilt_lcao/module_dftu/dftu_folding.cpp b/source/module_hamilt_lcao/module_dftu/dftu_folding.cpp
index 0abaac6ef7..70c3800089 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu_folding.cpp
+++ b/source/module_hamilt_lcao/module_dftu/dftu_folding.cpp
@@ -1,43 +1,42 @@
 #include "dftu.h"
 #include "module_base/timer.h"
-#include "module_hamilt_pw/hamilt_pwdft/global.h"
 #include "module_cell/module_neighbor/sltk_grid_driver.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h"
 #include "module_hamilt_lcao/module_hcontainer/hcontainer.h"
 #include "module_hamilt_lcao/module_hcontainer/hcontainer_funcs.h"
+#include "module_hamilt_pw/hamilt_pwdft/global.h"
 
 namespace ModuleDFTU
 {
 
-void DFTU::fold_dSR_gamma(
-    const UnitCell &ucell,
-    const Parallel_Orbitals &pv,
-    Grid_Driver* gd,
-    double* dsloc_x,
-    double* dsloc_y,
-    double* dsloc_z,
-    double* dh_r,
-    const int dim1, 
-    const int dim2, 
-    double* dSR_gamma)
+void DFTU::fold_dSR_gamma(const UnitCell& ucell,
+                          const Parallel_Orbitals& pv,
+                          Grid_Driver* gd,
+                          double* dsloc_x,
+                          double* dsloc_y,
+                          double* dsloc_z,
+                          double* dh_r,
+                          const int dim1,
+                          const int dim2,
+                          double* dSR_gamma)
 {
     ModuleBase::TITLE("DFTU", "fold_dSR_gamma");
 
     ModuleBase::GlobalFunc::ZEROS(dSR_gamma, pv.nloc);
 
     double* dS_ptr = nullptr;
-	if(dim1 == 0) 
-	{
-		dS_ptr = dsloc_x;
-	}
-	else if(dim1 == 1) 
-	{
-		dS_ptr = dsloc_y;
-	}
-	else if (dim1 == 2) 
-	{
-		dS_ptr = dsloc_z;
-	}
+    if (dim1 == 0)
+    {
+        dS_ptr = dsloc_x;
+    }
+    else if (dim1 == 1)
+    {
+        dS_ptr = dsloc_y;
+    }
+    else if (dim1 == 2)
+    {
+        dS_ptr = dsloc_z;
+    }
 
     int nnr = 0;
     ModuleBase::Vector3<double> tau1, tau2, dtau;
@@ -62,10 +61,10 @@ void DFTU::fold_dSR_gamma(
                 double distance = dtau.norm() * ucell.lat0;
                 double rcut = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ORB.Phi[T2].getRcut();
                 bool adj = false;
-				if (distance < rcut)
-				{
-					adj = true;
-				}
+                if (distance < rcut)
+                {
+                    adj = true;
+                }
                 else if (distance >= rcut)
                 {
                     for (int ad0 = 0; ad0 < gd->getAdjacentNum() + 1; ++ad0)
@@ -96,51 +95,53 @@ void DFTU::fold_dSR_gamma(
                         const int jj0 = jj / GlobalV::NPOL;
                         const int iw1_all = start1 + jj0;
                         const int mu = pv.global2local_row(iw1_all);
-						if (mu < 0) 
-						{
-							continue;
-						}
+                        if (mu < 0)
+                        {
+                            continue;
+                        }
 
                         for (int kk = 0; kk < atom2->nw * GlobalV::NPOL; ++kk)
                         {
                             const int kk0 = kk / GlobalV::NPOL;
                             const int iw2_all = start2 + kk0;
                             const int nu = pv.global2local_col(iw2_all);
-							if (nu < 0) 
-							{
-								continue;
-							}
+                            if (nu < 0)
+                            {
+                                continue;
+                            }
 
                             dSR_gamma[nu * pv.nrow + mu] += dS_ptr[nnr] * dh_r[nnr * 3 + dim2];
 
                             ++nnr;
                         } // kk
-                    } // jj
-                } // adj
-            } // ad
-        } // I1
-    } // T1
+                    }     // jj
+                }         // adj
+            }             // ad
+        }                 // I1
+    }                     // T1
 
     return;
 }
 
-void DFTU::folding_matrix_k(
-        ForceStressArrays &fsr,
-        const Parallel_Orbitals &pv,
-		const int ik, 
-		const int dim1, 
-		const int dim2, 
-		std::complex<double>* mat_k, 
-		const std::vector<ModuleBase::Vector3<double>> &kvec_d)
+void DFTU::folding_matrix_k(ForceStressArrays& fsr,
+                            const Parallel_Orbitals& pv,
+                            const int ik,
+                            const int dim1,
+                            const int dim2,
+                            std::complex<double>* mat_k,
+                            const std::vector<ModuleBase::Vector3<double>>& kvec_d)
 {
     ModuleBase::TITLE("DFTU", "folding_matrix_k");
     ModuleBase::timer::tick("DFTU", "folding_matrix_k");
     ModuleBase::GlobalFunc::ZEROS(mat_k, pv.nloc);
 
     double* mat_ptr;
-    if      (dim1 == 1 || dim1 == 4) mat_ptr = fsr.DSloc_Rx;
-    else if (dim1 == 2 || dim1 == 5) mat_ptr = fsr.DSloc_Ry;
-    else if (dim1 == 3 || dim1 == 6) mat_ptr = fsr.DSloc_Rz;
+    if (dim1 == 1 || dim1 == 4)
+        mat_ptr = fsr.DSloc_Rx;
+    else if (dim1 == 2 || dim1 == 5)
+        mat_ptr = fsr.DSloc_Ry;
+    else if (dim1 == 3 || dim1 == 6)
+        mat_ptr = fsr.DSloc_Rz;
 
     int nnr = 0;
     ModuleBase::Vector3<double> dtau;
@@ -227,13 +228,15 @@ void DFTU::folding_matrix_k(
                         // the index of orbitals in this processor
                         const int iw1_all = start1 + ii;
                         const int mu = pv.global2local_row(iw1_all);
-                        if (mu < 0) continue;
+                        if (mu < 0)
+                            continue;
 
                         for (int jj = 0; jj < atom2->nw * GlobalV::NPOL; jj++)
                         {
                             int iw2_all = start2 + jj;
                             const int nu = pv.global2local_col(iw2_all);
-                            if (nu < 0) continue;
+                            if (nu < 0)
+                                continue;
 
                             int iic;
                             if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER())
@@ -256,19 +259,18 @@ void DFTU::folding_matrix_k(
 
                             ++nnr;
                         } // kk
-                    } // jj
-                } // adj
+                    }     // jj
+                }         // adj
 
             } // ad
-        } // I1
-    } // T1
+        }     // I1
+    }         // T1
     ModuleBase::timer::tick("DFTU", "folding_matrix_k");
 
     return;
 }
 
-void DFTU::folding_matrix_k_new(const int ik,
-    hamilt::Hamilt<std::complex<double>>* p_ham)
+void DFTU::folding_matrix_k_new(const int ik, hamilt::Hamilt<std::complex<double>>* p_ham)
 {
     ModuleBase::TITLE("DFTU", "folding_matrix_k_new");
     ModuleBase::timer::tick("DFTU", "folding_matrix_k_new");
@@ -280,13 +282,13 @@ void DFTU::folding_matrix_k_new(const int ik,
     }
 
     // get SR and fold to mat_k
-    if(GlobalV::GAMMA_ONLY_LOCAL)
+    if (GlobalV::GAMMA_ONLY_LOCAL)
     {
         dynamic_cast<hamilt::HamiltLCAO<double, double>*>(p_ham)->updateSk(ik, hk_type);
     }
     else
     {
-        if(GlobalV::NSPIN != 4)
+        if (GlobalV::NSPIN != 4)
         {
             dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->updateSk(ik, hk_type);
         }
@@ -297,6 +299,4 @@ void DFTU::folding_matrix_k_new(const int ik,
     }
 }
 
-    
-
 } // namespace ModuleDFTU
diff --git a/source/module_hamilt_lcao/module_dftu/dftu_hamilt.cpp b/source/module_hamilt_lcao/module_dftu/dftu_hamilt.cpp
index a786e97a42..abfc612954 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu_hamilt.cpp
+++ b/source/module_hamilt_lcao/module_dftu/dftu_hamilt.cpp
@@ -6,7 +6,10 @@
 namespace ModuleDFTU
 {
 
-void DFTU::cal_eff_pot_mat_complex(const int ik, std::complex<double>* eff_pot, const std::vector<int>& isk, const std::complex<double>* sk)
+void DFTU::cal_eff_pot_mat_complex(const int ik,
+                                   std::complex<double>* eff_pot,
+                                   const std::vector<int>& isk,
+                                   const std::complex<double>* sk)
 {
     ModuleBase::TITLE("DFTU", "cal_eff_pot_mat");
     ModuleBase::timer::tick("DFTU", "cal_eff_pot_mat");
@@ -33,24 +36,43 @@ void DFTU::cal_eff_pot_mat_complex(const int ik, std::complex<double>* eff_pot,
     this->cal_VU_pot_mat_complex(spin, true, &VU[0]);
 
 #ifdef __MPI
-	pzgemm_(&transN, &transN,
-            &GlobalV::NLOCAL, &GlobalV::NLOCAL, &GlobalV::NLOCAL,
-            &half, 
-            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->LM->ParaV->desc,
-            sk, &one_int, &one_int, this->LM->ParaV->desc,
+    pzgemm_(&transN,
+            &transN,
+            &GlobalV::NLOCAL,
+            &GlobalV::NLOCAL,
+            &GlobalV::NLOCAL,
+            &half,
+            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU),
+            &one_int,
+            &one_int,
+            this->LM->ParaV->desc,
+            sk,
+            &one_int,
+            &one_int,
+            this->LM->ParaV->desc,
             &zero,
-            eff_pot, &one_int, &one_int, this->LM->ParaV->desc);
+            eff_pot,
+            &one_int,
+            &one_int,
+            this->LM->ParaV->desc);
 #endif
 
     for (int irc = 0; irc < this->LM->ParaV->nloc; irc++)
         VU[irc] = eff_pot[irc];
 
 #ifdef __MPI
-  	pztranc_(&GlobalV::NLOCAL, &GlobalV::NLOCAL, 
-            &one, 
-            &VU[0], &one_int, &one_int, this->LM->ParaV->desc, 
-            &one, 
-            eff_pot, &one_int, &one_int, this->LM->ParaV->desc);
+    pztranc_(&GlobalV::NLOCAL,
+             &GlobalV::NLOCAL,
+             &one,
+             &VU[0],
+             &one_int,
+             &one_int,
+             this->LM->ParaV->desc,
+             &one,
+             eff_pot,
+             &one_int,
+             &one_int,
+             this->LM->ParaV->desc);
 #endif
 
     ModuleBase::timer::tick("DFTU", "cal_eff_pot_mat");
@@ -82,24 +104,43 @@ void DFTU::cal_eff_pot_mat_real(const int ik, double* eff_pot, const std::vector
     this->cal_VU_pot_mat_real(spin, 1, &VU[0]);
 
 #ifdef __MPI
-	pdgemm_(&transN, &transN,
-            &GlobalV::NLOCAL, &GlobalV::NLOCAL, &GlobalV::NLOCAL,
-            &half, 
-            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->LM->ParaV->desc, 
-            sk, &one_int, &one_int, this->LM->ParaV->desc,
+    pdgemm_(&transN,
+            &transN,
+            &GlobalV::NLOCAL,
+            &GlobalV::NLOCAL,
+            &GlobalV::NLOCAL,
+            &half,
+            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU),
+            &one_int,
+            &one_int,
+            this->LM->ParaV->desc,
+            sk,
+            &one_int,
+            &one_int,
+            this->LM->ParaV->desc,
             &beta,
-            eff_pot, &one_int, &one_int, this->LM->ParaV->desc);
+            eff_pot,
+            &one_int,
+            &one_int,
+            this->LM->ParaV->desc);
 #endif
 
     for (int irc = 0; irc < this->LM->ParaV->nloc; irc++)
         VU[irc] = eff_pot[irc];
 
 #ifdef __MPI
-	pdtran_(&GlobalV::NLOCAL, &GlobalV::NLOCAL, 
-            &one, 
-            &VU[0], &one_int, &one_int, this->LM->ParaV->desc, 
-            &one, 
-            eff_pot, &one_int, &one_int, this->LM->ParaV->desc);
+    pdtran_(&GlobalV::NLOCAL,
+            &GlobalV::NLOCAL,
+            &one,
+            &VU[0],
+            &one_int,
+            &one_int,
+            this->LM->ParaV->desc,
+            &one,
+            eff_pot,
+            &one_int,
+            &one_int,
+            this->LM->ParaV->desc);
 #endif
 
     ModuleBase::timer::tick("DFTU", "cal_eff_pot_mat");
@@ -116,21 +157,45 @@ void DFTU::cal_eff_pot_mat_R_double(const int ispin, double* SR, double* HR)
     this->cal_VU_pot_mat_real(ispin, 1, &VU[0]);
 
 #ifdef __MPI
-    pdgemm_(&transN, &transN,
-            &GlobalV::NLOCAL, &GlobalV::NLOCAL, &GlobalV::NLOCAL,
-            &half, 
-            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->LM->ParaV->desc, 
-            SR, &one_int, &one_int, this->LM->ParaV->desc,
+    pdgemm_(&transN,
+            &transN,
+            &GlobalV::NLOCAL,
+            &GlobalV::NLOCAL,
+            &GlobalV::NLOCAL,
+            &half,
+            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU),
+            &one_int,
+            &one_int,
+            this->LM->ParaV->desc,
+            SR,
+            &one_int,
+            &one_int,
+            this->LM->ParaV->desc,
             &beta,
-            HR, &one_int, &one_int, this->LM->ParaV->desc);
+            HR,
+            &one_int,
+            &one_int,
+            this->LM->ParaV->desc);
 
-    pdgemm_(&transN, &transN,
-            &GlobalV::NLOCAL, &GlobalV::NLOCAL, &GlobalV::NLOCAL,
-            &half, 
-            SR, &one_int, &one_int, this->LM->ParaV->desc, 
-            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->LM->ParaV->desc,
+    pdgemm_(&transN,
+            &transN,
+            &GlobalV::NLOCAL,
+            &GlobalV::NLOCAL,
+            &GlobalV::NLOCAL,
+            &half,
+            SR,
+            &one_int,
+            &one_int,
+            this->LM->ParaV->desc,
+            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU),
+            &one_int,
+            &one_int,
+            this->LM->ParaV->desc,
             &one,
-            HR, &one_int, &one_int, this->LM->ParaV->desc);
+            HR,
+            &one_int,
+            &one_int,
+            this->LM->ParaV->desc);
 #endif
 
     return;
@@ -146,24 +211,48 @@ void DFTU::cal_eff_pot_mat_R_complex_double(const int ispin, std::complex<double
     this->cal_VU_pot_mat_complex(ispin, 1, &VU[0]);
 
 #ifdef __MPI
-    pzgemm_(&transN, &transN,
-            &GlobalV::NLOCAL, &GlobalV::NLOCAL, &GlobalV::NLOCAL,
-            &half, 
-            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->LM->ParaV->desc,
-            SR, &one_int, &one_int, this->LM->ParaV->desc,
+    pzgemm_(&transN,
+            &transN,
+            &GlobalV::NLOCAL,
+            &GlobalV::NLOCAL,
+            &GlobalV::NLOCAL,
+            &half,
+            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU),
+            &one_int,
+            &one_int,
+            this->LM->ParaV->desc,
+            SR,
+            &one_int,
+            &one_int,
+            this->LM->ParaV->desc,
             &zero,
-            HR, &one_int, &one_int, this->LM->ParaV->desc);
+            HR,
+            &one_int,
+            &one_int,
+            this->LM->ParaV->desc);
 
-    pzgemm_(&transN, &transN,
-            &GlobalV::NLOCAL, &GlobalV::NLOCAL, &GlobalV::NLOCAL,
-            &half, 
-            SR, &one_int, &one_int, this->LM->ParaV->desc, 
-            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->LM->ParaV->desc,
+    pzgemm_(&transN,
+            &transN,
+            &GlobalV::NLOCAL,
+            &GlobalV::NLOCAL,
+            &GlobalV::NLOCAL,
+            &half,
+            SR,
+            &one_int,
+            &one_int,
+            this->LM->ParaV->desc,
+            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU),
+            &one_int,
+            &one_int,
+            this->LM->ParaV->desc,
             &one,
-            HR, &one_int, &one_int, this->LM->ParaV->desc);
+            HR,
+            &one_int,
+            &one_int,
+            this->LM->ParaV->desc);
 #endif
 
     return;
 }
 
-}
\ No newline at end of file
+} // namespace ModuleDFTU
\ No newline at end of file
diff --git a/source/module_hamilt_lcao/module_dftu/dftu_occup.cpp b/source/module_hamilt_lcao/module_dftu/dftu_occup.cpp
index da8d93a8b7..358b3e3029 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu_occup.cpp
+++ b/source/module_hamilt_lcao/module_dftu/dftu_occup.cpp
@@ -1,29 +1,51 @@
 #include "dftu.h"
 #include "module_base/timer.h"
-#include "module_hamilt_pw/hamilt_pwdft/global.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h"
+#include "module_hamilt_pw/hamilt_pwdft/global.h"
 
 extern "C"
 {
-  //I'm not sure what's happenig here, but the interface in scalapack_connecter.h
-  //does not seem to work, so I'll use this one here
-  void pzgemm_(
-		const char *transa, const char *transb,
-		const int *M, const int *N, const int *K,
-		const std::complex<double> *alpha,
-		const std::complex<double> *A, const int *IA, const int *JA, const int *DESCA,
-		const std::complex<double> *B, const int *IB, const int *JB, const int *DESCB,
-		const std::complex<double> *beta,
-		std::complex<double> *C, const int *IC, const int *JC, const int *DESCC);
-  
-  void pdgemm_(
-		const char *transa, const char *transb,
-		const int *M, const int *N, const int *K,
-		const double *alpha,
-		const double *A, const int *IA, const int *JA, const int *DESCA,
-		const double *B, const int *IB, const int *JB, const int *DESCB,
-		const double *beta,
-		double *C, const int *IC, const int *JC, const int *DESCC);
+    // I'm not sure what's happenig here, but the interface in scalapack_connecter.h
+    // does not seem to work, so I'll use this one here
+    void pzgemm_(const char* transa,
+                 const char* transb,
+                 const int* M,
+                 const int* N,
+                 const int* K,
+                 const std::complex<double>* alpha,
+                 const std::complex<double>* A,
+                 const int* IA,
+                 const int* JA,
+                 const int* DESCA,
+                 const std::complex<double>* B,
+                 const int* IB,
+                 const int* JB,
+                 const int* DESCB,
+                 const std::complex<double>* beta,
+                 std::complex<double>* C,
+                 const int* IC,
+                 const int* JC,
+                 const int* DESCC);
+
+    void pdgemm_(const char* transa,
+                 const char* transb,
+                 const int* M,
+                 const int* N,
+                 const int* K,
+                 const double* alpha,
+                 const double* A,
+                 const int* IA,
+                 const int* JA,
+                 const int* DESCA,
+                 const double* B,
+                 const int* IB,
+                 const int* JB,
+                 const int* DESCB,
+                 const double* beta,
+                 double* C,
+                 const int* IC,
+                 const int* JC,
+                 const int* DESCC);
 }
 
 namespace ModuleDFTU
@@ -71,7 +93,8 @@ void DFTU::zero_locale()
 
     for (int T = 0; T < GlobalC::ucell.ntype; T++)
     {
-        if (orbital_corr[T] == -1) continue;
+        if (orbital_corr[T] == -1)
+            continue;
 
         for (int I = 0; I < GlobalC::ucell.atoms[T].na; I++)
         {
@@ -123,12 +146,12 @@ void DFTU::mix_locale(const double& mixing_beta)
                 {
                     if (GlobalV::NSPIN == 4)
                     {
-                        locale[iat][l][n][0] = locale[iat][l][n][0]*beta + locale_save[iat][l][n][0]*(1.0-beta);
+                        locale[iat][l][n][0] = locale[iat][l][n][0] * beta + locale_save[iat][l][n][0] * (1.0 - beta);
                     }
                     else if (GlobalV::NSPIN == 1 || GlobalV::NSPIN == 2)
                     {
-                        locale[iat][l][n][0] = locale[iat][l][n][0] * beta + locale_save[iat][l][n][0] * (1.0-beta);
-                        locale[iat][l][n][1] = locale[iat][l][n][1] * beta + locale_save[iat][l][n][1] * (1.0-beta);
+                        locale[iat][l][n][0] = locale[iat][l][n][0] * beta + locale_save[iat][l][n][0] * (1.0 - beta);
+                        locale[iat][l][n][1] = locale[iat][l][n][1] * beta + locale_save[iat][l][n][1] * (1.0 - beta);
                     }
                 }
             }
@@ -137,11 +160,11 @@ void DFTU::mix_locale(const double& mixing_beta)
     ModuleBase::timer::tick("DFTU", "mix_locale");
 }
 
-void DFTU::cal_occup_m_k(const int iter, 
-                        const std::vector<std::vector<std::complex<double>>>& dm_k,
-                        const K_Vectors& kv,
-                        const double& mixing_beta,
-                        hamilt::Hamilt<std::complex<double>>* p_ham)
+void DFTU::cal_occup_m_k(const int iter,
+                         const std::vector<std::vector<std::complex<double>>>& dm_k,
+                         const K_Vectors& kv,
+                         const double& mixing_beta,
+                         hamilt::Hamilt<std::complex<double>>* p_ham)
 {
     ModuleBase::TITLE("DFTU", "cal_occup_m_k");
     ModuleBase::timer::tick("DFTU", "cal_occup_m_k");
@@ -153,7 +176,7 @@ void DFTU::cal_occup_m_k(const int iter,
     // call SCALAPACK routine to calculate the product of the S and density matrix
     const char transN = 'N', transT = 'T';
     const int one_int = 1;
-    const std::complex<double> beta(0.0,0.0), alpha(1.0,0.0);
+    const std::complex<double> beta(0.0, 0.0), alpha(1.0, 0.0);
 
     std::vector<std::complex<double>> srho(this->LM->ParaV->nloc);
 
@@ -162,7 +185,7 @@ void DFTU::cal_occup_m_k(const int iter,
         // srho(mu,nu) = \sum_{iw} S(mu,iw)*dm_k(iw,nu)
         this->folding_matrix_k_new(ik, p_ham);
         std::complex<double>* s_k_pointer = nullptr;
-        if(GlobalV::NSPIN != 4)
+        if (GlobalV::NSPIN != 4)
         {
             s_k_pointer = dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->getSk();
         }
@@ -183,7 +206,7 @@ void DFTU::cal_occup_m_k(const int iter,
                 &one_int,
                 this->LM->ParaV->desc,
                 dm_k[ik].data(),
-                //dm_k[ik].c,
+                // dm_k[ik].c,
                 &one_int,
                 &one_int,
                 this->LM->ParaV->desc,
@@ -250,14 +273,14 @@ void DFTU::cal_occup_m_k(const int iter,
                                             locale[iat][l][n][spin](m0_all, m1_all)
                                                 += (std::conj(srho[irc_prime])).real() / 4.0;
                                     } // ipol1
-                                } // m1
-                            } // ipol0
-                        } // m0
-                    } // end n
-                } // end l
-            } // end ia
-        } // end it
-    } // ik
+                                }     // m1
+                            }         // ipol0
+                        }             // m0
+                    }                 // end n
+                }                     // end l
+            }                         // end ia
+        }                             // end it
+    }                                 // ik
 
     for (int it = 0; it < GlobalC::ucell.ntype; it++)
     {
@@ -339,11 +362,11 @@ void DFTU::cal_occup_m_k(const int iter,
                         exit(0);
                     }
                 } // end n
-            } // end l
-        } // end ia
-    } // end it
+            }     // end l
+        }         // end ia
+    }             // end it
 
-    if(mixing_dftu && initialed_locale)
+    if (mixing_dftu && initialed_locale)
     {
         this->mix_locale(mixing_beta);
     }
@@ -353,7 +376,10 @@ void DFTU::cal_occup_m_k(const int iter,
     return;
 }
 
-void DFTU::cal_occup_m_gamma(const int iter, const std::vector<std::vector<double>> &dm_gamma, const double& mixing_beta, hamilt::Hamilt<double>* p_ham)
+void DFTU::cal_occup_m_gamma(const int iter,
+                             const std::vector<std::vector<double>>& dm_gamma,
+                             const double& mixing_beta,
+                             hamilt::Hamilt<double>* p_ham)
 {
     ModuleBase::TITLE("DFTU", "cal_occup_m_gamma");
     ModuleBase::timer::tick("DFTU", "cal_occup_m_gamma");
@@ -384,7 +410,7 @@ void DFTU::cal_occup_m_gamma(const int iter, const std::vector<std::vector<doubl
                 &one_int,
                 this->LM->ParaV->desc,
                 dm_gamma[is].data(),
-                //dm_gamma[is].c,
+                // dm_gamma[is].c,
                 &one_int,
                 &one_int,
                 this->LM->ParaV->desc,
@@ -486,12 +512,12 @@ void DFTU::cal_occup_m_gamma(const int iter, const std::vector<std::vector<doubl
                         }
 
                     } // end for(n)
-                } // L
-            } // ia
-        } // it
-    } // is
+                }     // L
+            }         // ia
+        }             // it
+    }                 // is
 
-    if(mixing_dftu && initialed_locale)
+    if (mixing_dftu && initialed_locale)
     {
         this->mix_locale(mixing_beta);
     }
diff --git a/source/module_hamilt_lcao/module_hcontainer/hcontainer.h b/source/module_hamilt_lcao/module_hcontainer/hcontainer.h
index 79105ce340..9ac7ad048e 100644
--- a/source/module_hamilt_lcao/module_hcontainer/hcontainer.h
+++ b/source/module_hamilt_lcao/module_hcontainer/hcontainer.h
@@ -423,7 +423,10 @@ class HContainer
      * @brief get parallel orbital pointer to check parallel information
      * @return const Parallel_Orbitals* , if return is nullptr, it means HContainer is not in parallel mode
      */
-    const Parallel_Orbitals* get_paraV() const{return paraV;}
+    const Parallel_Orbitals* get_paraV() const
+    {
+        return paraV;
+    }
 
   private:
     // i-j atom pairs, sorted by matrix of (atom_i, atom_j)
diff --git a/source/module_io/output_sk.cpp b/source/module_io/output_sk.cpp
index 96c0663579..ea3da14e31 100644
--- a/source/module_io/output_sk.cpp
+++ b/source/module_io/output_sk.cpp
@@ -30,8 +30,7 @@ std::complex<double>* Output_Sk<std::complex<double>>::get_Sk(int ik)
     }
     if (this->nspin_ == 4)
     {
-        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(this->p_hamilt_)
-            ->updateSk(ik, 1);
+        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(this->p_hamilt_)->updateSk(ik, 1);
         return dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(this->p_hamilt_)->getSk();
     }
     else
diff --git a/source/module_io/write_Vxc.hpp b/source/module_io/write_Vxc.hpp
index 2b6b923566..9f0c1dbfb9 100644
--- a/source/module_io/write_Vxc.hpp
+++ b/source/module_io/write_Vxc.hpp
@@ -1,21 +1,24 @@
 #pragma once
-#include "module_psi/psi.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.h"
-#include "module_base/scalapack_connector.h"
 #include "module_base/parallel_reduce.h"
+#include "module_base/scalapack_connector.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h"
+#include "module_psi/psi.h"
 
 #ifndef TGINT_H
 #define TGINT_H
-template <typename T> struct TGint;
+template <typename T>
+struct TGint;
 
 template <>
-struct TGint<double> {
+struct TGint<double>
+{
     using type = Gint_Gamma;
 };
 
 template <>
-struct TGint<std::complex<double>> {
+struct TGint<std::complex<double>>
+{
     using type = Gint_k;
 };
 #endif
@@ -23,338 +26,415 @@ struct TGint<std::complex<double>> {
 namespace ModuleIO
 {
 
-    inline void gint_vl(
-			Gint_Gamma& gg, 
-			Gint_inout& io, 
-			LCAO_Matrix& lm) 
-	{ 
-		gg.cal_vlocal(&io, false); 
-	};
+inline void gint_vl(Gint_Gamma& gg, Gint_inout& io, LCAO_Matrix& lm)
+{
+    gg.cal_vlocal(&io, false);
+};
 
-	inline void gint_vl(
-			Gint_k& gk, 
-			Gint_inout& io, 
-			LCAO_Matrix& lm, 
-			ModuleBase::matrix& wg) 
-	{ 
-		gk.cal_gint(&io); 
-	};
+inline void gint_vl(Gint_k& gk, Gint_inout& io, LCAO_Matrix& lm, ModuleBase::matrix& wg)
+{
+    gk.cal_gint(&io);
+};
 
-    void set_para2d_MO(const Parallel_Orbitals& pv, const int nbands, Parallel_2D& p2d)
-    {
-        std::ofstream ofs;
+void set_para2d_MO(const Parallel_Orbitals& pv, const int nbands, Parallel_2D& p2d)
+{
+    std::ofstream ofs;
 #ifdef __MPI
-        p2d.set(nbands, nbands, pv.nb, pv.comm_2D, pv.blacs_ctxt);
+    p2d.set(nbands, nbands, pv.nb, pv.comm_2D, pv.blacs_ctxt);
 #else
-        p2d.set_serial(nbands, nbands);
+    p2d.set_serial(nbands, nbands);
 #endif
-    }
+}
 
-	std::vector<std::complex<double>> cVc(
-			std::complex<double>* V, 
-			std::complex<double>* c, 
-			int nbasis, 
-			int nbands, 
-			const Parallel_Orbitals& pv, 
-			const Parallel_2D& p2d)
-	{
-        std::vector<std::complex<double>> Vc(pv.nloc_wfc, 0.0);
-        char transa = 'N';
-        char transb = 'N';
-        const std::complex<double> alpha(1.0, 0.0);
-        const std::complex<double> beta(0.0, 0.0);
+std::vector<std::complex<double>> cVc(std::complex<double>* V,
+                                      std::complex<double>* c,
+                                      int nbasis,
+                                      int nbands,
+                                      const Parallel_Orbitals& pv,
+                                      const Parallel_2D& p2d)
+{
+    std::vector<std::complex<double>> Vc(pv.nloc_wfc, 0.0);
+    char transa = 'N';
+    char transb = 'N';
+    const std::complex<double> alpha(1.0, 0.0);
+    const std::complex<double> beta(0.0, 0.0);
 #ifdef __MPI
-        const int i1 = 1;
-        pzgemm_(&transa, &transb, &nbasis, &nbands, &nbasis, &alpha, V, &i1, 
-        &i1, pv.desc, c, &i1, &i1, pv.desc_wfc, &beta, Vc.data(), &i1, &i1, pv.desc_wfc);
+    const int i1 = 1;
+    pzgemm_(&transa,
+            &transb,
+            &nbasis,
+            &nbands,
+            &nbasis,
+            &alpha,
+            V,
+            &i1,
+            &i1,
+            pv.desc,
+            c,
+            &i1,
+            &i1,
+            pv.desc_wfc,
+            &beta,
+            Vc.data(),
+            &i1,
+            &i1,
+            pv.desc_wfc);
 #else
-        zgemm_(&transa, &transb, &nbasis, &nbands, &nbasis, &alpha, V, &nbasis, 
-        c, &nbasis, &beta, Vc.data(), &nbasis);
+    zgemm_(&transa, &transb, &nbasis, &nbands, &nbasis, &alpha, V, &nbasis, c, &nbasis, &beta, Vc.data(), &nbasis);
 #endif
-        std::vector<std::complex<double>> cVc(p2d.nloc, 0.0);
-        transa = 'C';
+    std::vector<std::complex<double>> cVc(p2d.nloc, 0.0);
+    transa = 'C';
 #ifdef __MPI
-        pzgemm_(&transa, &transb, &nbands, &nbands, &nbasis, &alpha, c, &i1, &i1, 
-        pv.desc_wfc, Vc.data(), &i1, &i1, pv.desc_wfc, &beta, cVc.data(), &i1, &i1, p2d.desc);
+    pzgemm_(&transa,
+            &transb,
+            &nbands,
+            &nbands,
+            &nbasis,
+            &alpha,
+            c,
+            &i1,
+            &i1,
+            pv.desc_wfc,
+            Vc.data(),
+            &i1,
+            &i1,
+            pv.desc_wfc,
+            &beta,
+            cVc.data(),
+            &i1,
+            &i1,
+            p2d.desc);
 #else
-        zgemm_(&transa, &transb, &nbands, &nbands, &nbasis, &alpha, c, &nbasis, 
-        Vc.data(), &nbasis, &beta, cVc.data(), &nbasis);
+    zgemm_(&transa,
+           &transb,
+           &nbands,
+           &nbands,
+           &nbasis,
+           &alpha,
+           c,
+           &nbasis,
+           Vc.data(),
+           &nbasis,
+           &beta,
+           cVc.data(),
+           &nbasis);
 #endif
-        return cVc;
-    }
+    return cVc;
+}
 
-	std::vector<double> cVc(
-			double* V, 
-			double* c, 
-			int nbasis, 
-			int nbands, 
-			const Parallel_Orbitals& pv, 
-			const Parallel_2D& p2d)
-    {
-        std::vector<double> Vc(pv.nloc_wfc, 0.0);
-        char transa = 'N';
-        char transb = 'N';
-        const double alpha = 1.0;
-        const double beta = 0.0;
+std::vector<double> cVc(double* V,
+                        double* c,
+                        int nbasis,
+                        int nbands,
+                        const Parallel_Orbitals& pv,
+                        const Parallel_2D& p2d)
+{
+    std::vector<double> Vc(pv.nloc_wfc, 0.0);
+    char transa = 'N';
+    char transb = 'N';
+    const double alpha = 1.0;
+    const double beta = 0.0;
 #ifdef __MPI
-        const int i1 = 1;
-        pdgemm_(&transa, &transb, &nbasis, &nbands, &nbasis, &alpha, V, &i1, 
-        &i1, pv.desc, c, &i1, &i1, pv.desc_wfc, &beta, Vc.data(), &i1, &i1, pv.desc_wfc);
+    const int i1 = 1;
+    pdgemm_(&transa,
+            &transb,
+            &nbasis,
+            &nbands,
+            &nbasis,
+            &alpha,
+            V,
+            &i1,
+            &i1,
+            pv.desc,
+            c,
+            &i1,
+            &i1,
+            pv.desc_wfc,
+            &beta,
+            Vc.data(),
+            &i1,
+            &i1,
+            pv.desc_wfc);
 #else
-        dgemm_(&transa, &transb, &nbasis, &nbands, &nbasis, &alpha, V, &nbasis, 
-        c, &nbasis, &beta, Vc.data(), &nbasis);
+    dgemm_(&transa, &transb, &nbasis, &nbands, &nbasis, &alpha, V, &nbasis, c, &nbasis, &beta, Vc.data(), &nbasis);
 #endif
-        std::vector<double> cVc(p2d.nloc, 0.0);
-        transa = 'T';
+    std::vector<double> cVc(p2d.nloc, 0.0);
+    transa = 'T';
 #ifdef __MPI
-        pdgemm_(&transa, &transb, &nbands, &nbands, &nbasis, &alpha, c, &i1, &i1, 
-        pv.desc_wfc, Vc.data(), &i1, &i1, pv.desc_wfc, &beta, cVc.data(), &i1, &i1, p2d.desc);
+    pdgemm_(&transa,
+            &transb,
+            &nbands,
+            &nbands,
+            &nbasis,
+            &alpha,
+            c,
+            &i1,
+            &i1,
+            pv.desc_wfc,
+            Vc.data(),
+            &i1,
+            &i1,
+            pv.desc_wfc,
+            &beta,
+            cVc.data(),
+            &i1,
+            &i1,
+            p2d.desc);
 #else
-        dgemm_(&transa, &transb, &nbands, &nbands, &nbasis, &alpha, c, 
-        &nbasis, Vc.data(), &nbasis, &beta, cVc.data(), &nbasis);
+    dgemm_(&transa,
+           &transb,
+           &nbands,
+           &nbands,
+           &nbasis,
+           &alpha,
+           c,
+           &nbasis,
+           Vc.data(),
+           &nbasis,
+           &beta,
+           cVc.data(),
+           &nbasis);
 #endif
-        return cVc;
-    }
+    return cVc;
+}
 
-    inline double get_real(const std::complex<double>& c) { return c.real(); }
+inline double get_real(const std::complex<double>& c)
+{
+    return c.real();
+}
 
-    inline double get_real(const double& d) { return d; }
+inline double get_real(const double& d)
+{
+    return d;
+}
 
-    template<typename T>
-	double all_band_energy(
-				const int ik, 
-				const std::vector<T>& mat_mo, 
-				const Parallel_2D& p2d, 
-				const ModuleBase::matrix& wg)
+template <typename T>
+double all_band_energy(const int ik, const std::vector<T>& mat_mo, const Parallel_2D& p2d, const ModuleBase::matrix& wg)
+{
+    double e = 0.0;
+    for (int i = 0; i < p2d.get_row_size(); ++i)
     {
-        double e = 0.0;
-        for (int i = 0;i < p2d.get_row_size();++i)
-		{
-			for (int j = 0;j < p2d.get_col_size();++j)
-			{
-				if (p2d.local2global_row(i) == p2d.local2global_col(j))
-				{
-					e += get_real(mat_mo[j * p2d.get_row_size() + i]) * wg(ik, p2d.local2global_row(i));
-				}
-			}
-		}
-        Parallel_Reduce::reduce_all(e);
-        return e;
+        for (int j = 0; j < p2d.get_col_size(); ++j)
+        {
+            if (p2d.local2global_row(i) == p2d.local2global_col(j))
+            {
+                e += get_real(mat_mo[j * p2d.get_row_size() + i]) * wg(ik, p2d.local2global_row(i));
+            }
+        }
     }
+    Parallel_Reduce::reduce_all(e);
+    return e;
+}
 
-    template<typename T>
-    std::vector<double> orbital_energy(
-        const int ik,
-        const int nbands,
-        const std::vector<T>& mat_mo,
-        const Parallel_2D& p2d)
-    {
+template <typename T>
+std::vector<double> orbital_energy(const int ik, const int nbands, const std::vector<T>& mat_mo, const Parallel_2D& p2d)
+{
 #ifdef __DEBUG
-        assert(nbands >= 0);
+    assert(nbands >= 0);
 #endif
-        std::vector<double> e(nbands, 0.0);
-        for (int i = 0;i < nbands;++i)
+    std::vector<double> e(nbands, 0.0);
+    for (int i = 0; i < nbands; ++i)
+    {
+        if (p2d.in_this_processor(i, i))
         {
-            if (p2d.in_this_processor(i, i))
-            {
-                const int index = p2d.global2local_col(i) * p2d.get_row_size() + p2d.global2local_row(i);
-                e[i] = get_real(mat_mo[index]);
-            }
+            const int index = p2d.global2local_col(i) * p2d.get_row_size() + p2d.global2local_row(i);
+            e[i] = get_real(mat_mo[index]);
         }
-        Parallel_Reduce::reduce_all(e.data(), nbands);
-        return e;
     }
+    Parallel_Reduce::reduce_all(e.data(), nbands);
+    return e;
+}
 
-    // mohan update 2024-04-01
-    template <typename T>
-	void set_gint_pointer(
-				Gint_Gamma &gint_gamma,
-                Gint_k &gint_k, 
-				typename TGint<T>::type*& gint);
+// mohan update 2024-04-01
+template <typename T>
+void set_gint_pointer(Gint_Gamma& gint_gamma, Gint_k& gint_k, typename TGint<T>::type*& gint);
 
-    // mohan update 2024-04-01
-	template <>
-	void set_gint_pointer<double>(
-				Gint_Gamma &gint_gamma,
-                Gint_k &gint_k, 
-				typename TGint<double>::type*& gint)
-	{
-        gint = &gint_gamma;
-    }
+// mohan update 2024-04-01
+template <>
+void set_gint_pointer<double>(Gint_Gamma& gint_gamma, Gint_k& gint_k, typename TGint<double>::type*& gint)
+{
+    gint = &gint_gamma;
+}
 
-    // mohan update 2024-04-01
-    template <>
-    void set_gint_pointer<std::complex<double>>(
-				Gint_Gamma &gint_gamma,
-                Gint_k &gint_k, 
-			    typename TGint<std::complex<double>>::type*& gint)
-    {
-        gint = &gint_k;
-    }
+// mohan update 2024-04-01
+template <>
+void set_gint_pointer<std::complex<double>>(Gint_Gamma& gint_gamma,
+                                            Gint_k& gint_k,
+                                            typename TGint<std::complex<double>>::type*& gint)
+{
+    gint = &gint_k;
+}
 
-    /// @brief  write the Vxc matrix in KS orbital representation, usefull for GW calculation
-    /// including terms: local/semi-local XC, EXX, DFTU
-    template <typename TK, typename TR>
-    void write_Vxc(
-			int nspin, 
-			int nbasis, 
-			int drank, 
-			const psi::Psi<TK>& psi, 
-			const UnitCell& ucell, 
-			Structure_Factor& sf,
-			const ModulePW::PW_Basis& rho_basis, 
-			const ModulePW::PW_Basis& rhod_basis, 
-			const ModuleBase::matrix& vloc,
-			const Charge& chg, 
-            Gint_Gamma &gint_gamma, // mohan add 2024-04-01
-            Gint_k &gint_k, // mohan add 2024-04-01
-			LCAO_Matrix& lm, 
-			Local_Orbital_Charge& loc,
-			const K_Vectors& kv, 
-			const ModuleBase::matrix& wg, 
-			Grid_Driver& gd)
-    {
-        ModuleBase::TITLE("ModuleIO", "write_Vxc");
-        const Parallel_Orbitals* pv = lm.ParaV;
-        int nbands = wg.nc;
-        // 1. real-space xc potential
-        // ModuleBase::matrix vr_xc(nspin, chg.nrxx);
-        double etxc = 0.0;
-        double vtxc = 0.0;
-        // elecstate::PotXC* potxc(&rho_basis, &etxc, vtxc, nullptr);
-        // potxc.cal_v_eff(&chg, &ucell, vr_xc);
-        elecstate::Potential* potxc = new elecstate::Potential(&rhod_basis, &rho_basis, &ucell, &vloc, &sf, &etxc, &vtxc);
-        std::vector<std::string> compnents_list = { "xc" };
-        potxc->pot_register(compnents_list);
-        potxc->update_from_charge(&chg, &ucell);
+/// @brief  write the Vxc matrix in KS orbital representation, usefull for GW calculation
+/// including terms: local/semi-local XC, EXX, DFTU
+template <typename TK, typename TR>
+void write_Vxc(int nspin,
+               int nbasis,
+               int drank,
+               const psi::Psi<TK>& psi,
+               const UnitCell& ucell,
+               Structure_Factor& sf,
+               const ModulePW::PW_Basis& rho_basis,
+               const ModulePW::PW_Basis& rhod_basis,
+               const ModuleBase::matrix& vloc,
+               const Charge& chg,
+               Gint_Gamma& gint_gamma, // mohan add 2024-04-01
+               Gint_k& gint_k,         // mohan add 2024-04-01
+               LCAO_Matrix& lm,
+               Local_Orbital_Charge& loc,
+               const K_Vectors& kv,
+               const ModuleBase::matrix& wg,
+               Grid_Driver& gd)
+{
+    ModuleBase::TITLE("ModuleIO", "write_Vxc");
+    const Parallel_Orbitals* pv = lm.ParaV;
+    int nbands = wg.nc;
+    // 1. real-space xc potential
+    // ModuleBase::matrix vr_xc(nspin, chg.nrxx);
+    double etxc = 0.0;
+    double vtxc = 0.0;
+    // elecstate::PotXC* potxc(&rho_basis, &etxc, vtxc, nullptr);
+    // potxc.cal_v_eff(&chg, &ucell, vr_xc);
+    elecstate::Potential* potxc = new elecstate::Potential(&rhod_basis, &rho_basis, &ucell, &vloc, &sf, &etxc, &vtxc);
+    std::vector<std::string> compnents_list = {"xc"};
+    potxc->pot_register(compnents_list);
+    potxc->update_from_charge(&chg, &ucell);
 
-        // 2. allocate AO-matrix 
-        // R (the number of hR: 1 for nspin=1, 4; 2 for nspin=2)
-        int nspin0 = (nspin == 2) ? 2 : 1;
-        std::vector<hamilt::HContainer<TR>> vxcs_R_ao(nspin0, hamilt::HContainer<TR>(pv));
-        for (int is = 0;is < nspin0;++is) vxcs_R_ao[is].set_zero();
-        // k (size for each k-point)
-        hamilt::HS_Matrix_K<TK> vxc_k_ao(pv, 1); // only hk is needed, sk is skipped
+    // 2. allocate AO-matrix
+    // R (the number of hR: 1 for nspin=1, 4; 2 for nspin=2)
+    int nspin0 = (nspin == 2) ? 2 : 1;
+    std::vector<hamilt::HContainer<TR>> vxcs_R_ao(nspin0, hamilt::HContainer<TR>(pv));
+    for (int is = 0; is < nspin0; ++is)
+        vxcs_R_ao[is].set_zero();
+    // k (size for each k-point)
+    hamilt::HS_Matrix_K<TK> vxc_k_ao(pv, 1); // only hk is needed, sk is skipped
 
-        // 3. allocate operators and contribute HR
-        // op (corresponding to hR)
-        typename TGint<TK>::type* gint = nullptr;
+    // 3. allocate operators and contribute HR
+    // op (corresponding to hR)
+    typename TGint<TK>::type* gint = nullptr;
 
-        set_gint_pointer<TK>(gint_gamma, gint_k, gint);
+    set_gint_pointer<TK>(gint_gamma, gint_k, gint);
 
-        std::vector<hamilt::Veff<hamilt::OperatorLCAO<TK, TR>>*> vxcs_op_ao(nspin0);
-        for (int is = 0;is < nspin0;++is)
-        {
-            vxcs_op_ao[is] = new hamilt::Veff<hamilt::OperatorLCAO<TK, TR>>(
-					gint, 
-					&loc, 
-					&vxc_k_ao, 
-					kv.kvec_d, 
-					potxc, 
-					&vxcs_R_ao[is], 
-					&ucell, 
-					&gd);
+    std::vector<hamilt::Veff<hamilt::OperatorLCAO<TK, TR>>*> vxcs_op_ao(nspin0);
+    for (int is = 0; is < nspin0; ++is)
+    {
+        vxcs_op_ao[is] = new hamilt::Veff<hamilt::OperatorLCAO<TK, TR>>(gint,
+                                                                        &loc,
+                                                                        &vxc_k_ao,
+                                                                        kv.kvec_d,
+                                                                        potxc,
+                                                                        &vxcs_R_ao[is],
+                                                                        &ucell,
+                                                                        &gd);
 
-            vxcs_op_ao[is]->contributeHR();
-        }
-        std::vector<std::vector<double>> e_orb_locxc; // orbital energy (local XC)
-        std::vector<std::vector<double>> e_orb_tot; // orbital energy (total)
+        vxcs_op_ao[is]->contributeHR();
+    }
+    std::vector<std::vector<double>> e_orb_locxc; // orbital energy (local XC)
+    std::vector<std::vector<double>> e_orb_tot;   // orbital energy (total)
 #ifdef __EXX
-        hamilt::OperatorEXX<hamilt::OperatorLCAO<TK, TR>> vexx_op_ao(&vxc_k_ao, &lm, nullptr, kv);
-        hamilt::HS_Matrix_K<TK> vexxonly_k_ao(pv, 1); // only hk is needed, sk is skipped
-        hamilt::OperatorEXX<hamilt::OperatorLCAO<TK, TR>> vexxonly_op_ao(&vexxonly_k_ao, &lm, nullptr, kv);
-        std::vector<std::vector<double>> e_orb_exx; // orbital energy (EXX)
+    hamilt::OperatorEXX<hamilt::OperatorLCAO<TK, TR>> vexx_op_ao(&vxc_k_ao, &lm, nullptr, kv);
+    hamilt::HS_Matrix_K<TK> vexxonly_k_ao(pv, 1); // only hk is needed, sk is skipped
+    hamilt::OperatorEXX<hamilt::OperatorLCAO<TK, TR>> vexxonly_op_ao(&vexxonly_k_ao, &lm, nullptr, kv);
+    std::vector<std::vector<double>> e_orb_exx; // orbital energy (EXX)
 #endif
-        hamilt::OperatorDFTU<hamilt::OperatorLCAO<TK, TR>> vdftu_op_ao(&vxc_k_ao, kv.kvec_d, nullptr, kv.isk);
+    hamilt::OperatorDFTU<hamilt::OperatorLCAO<TK, TR>> vdftu_op_ao(&vxc_k_ao, kv.kvec_d, nullptr, kv.isk);
 
-        //4. calculate and write the MO-matrix Exc
-        Parallel_2D p2d;
-        set_para2d_MO(*pv, nbands, p2d);
+    // 4. calculate and write the MO-matrix Exc
+    Parallel_2D p2d;
+    set_para2d_MO(*pv, nbands, p2d);
 
-        // ======test=======
-        // double total_energy = 0.0;
-        // double exx_energy = 0.0;
-        // ======test=======
-        for (int ik = 0;ik < kv.get_nks();++ik)
-        {
-            vxc_k_ao.set_zero_hk();
-            int is = kv.isk[ik];
-            dynamic_cast<hamilt::OperatorLCAO<TK, TR>*>(vxcs_op_ao[is])->contributeHk(ik);
-            const std::vector<TK>& vlocxc_k_mo = cVc(vxc_k_ao.get_hk(), &psi(ik, 0, 0), nbasis, nbands, *pv, p2d);
+    // ======test=======
+    // double total_energy = 0.0;
+    // double exx_energy = 0.0;
+    // ======test=======
+    for (int ik = 0; ik < kv.get_nks(); ++ik)
+    {
+        vxc_k_ao.set_zero_hk();
+        int is = kv.isk[ik];
+        dynamic_cast<hamilt::OperatorLCAO<TK, TR>*>(vxcs_op_ao[is])->contributeHk(ik);
+        const std::vector<TK>& vlocxc_k_mo = cVc(vxc_k_ao.get_hk(), &psi(ik, 0, 0), nbasis, nbands, *pv, p2d);
 
 #ifdef __EXX
-            if (GlobalC::exx_info.info_global.cal_exx)
-            {
-                e_orb_locxc.emplace_back(orbital_energy(ik, nbands, vlocxc_k_mo, p2d));
-                ModuleBase::GlobalFunc::ZEROS(vexxonly_k_ao.get_hk(), pv->nloc);
-                vexx_op_ao.contributeHk(ik);
-                vexxonly_op_ao.contributeHk(ik);
-                std::vector<TK> vexx_k_mo = cVc(vexxonly_k_ao.get_hk(), &psi(ik, 0, 0), nbasis, nbands, *pv, p2d);
-                e_orb_exx.emplace_back(orbital_energy(ik, nbands, vexx_k_mo, p2d));
-            }
-            // ======test=======
-            // exx_energy += all_band_energy(ik, vexx_k_mo, p2d, wg);
-            // ======test=======
-#endif
-			if (GlobalV::dft_plus_u) 
-			{
-				vdftu_op_ao.contributeHk(ik);
-			}
-            const std::vector<TK>& vxc_tot_k_mo = cVc(vxc_k_ao.get_hk(), &psi(ik, 0, 0), nbasis, nbands, *pv, p2d);
-            e_orb_tot.emplace_back(orbital_energy(ik, nbands, vxc_tot_k_mo, p2d));
-            // write
-            ModuleIO::save_mat(-1, vxc_tot_k_mo.data(), nbands,
-                false/*binary*/, GlobalV::out_ndigits, true/*triangle*/, false/*append*/,
-                "Vxc", "k-" + std::to_string(ik), p2d, drank);
-            // ======test=======
-            // total_energy += all_band_energy(ik, vxc_tot_k_mo, p2d, wg);
-            // ======test=======
+        if (GlobalC::exx_info.info_global.cal_exx)
+        {
+            e_orb_locxc.emplace_back(orbital_energy(ik, nbands, vlocxc_k_mo, p2d));
+            ModuleBase::GlobalFunc::ZEROS(vexxonly_k_ao.get_hk(), pv->nloc);
+            vexx_op_ao.contributeHk(ik);
+            vexxonly_op_ao.contributeHk(ik);
+            std::vector<TK> vexx_k_mo = cVc(vexxonly_k_ao.get_hk(), &psi(ik, 0, 0), nbasis, nbands, *pv, p2d);
+            e_orb_exx.emplace_back(orbital_energy(ik, nbands, vexx_k_mo, p2d));
         }
         // ======test=======
-        // total_energy -= 0.5 * exx_energy;
-        // std::cout << "total energy: " << total_energy << std::endl;
-        // std::cout << "etxc: " << etxc << std::endl;
-        // std::cout << "vtxc_cal: " << total_energy - 0.5 * exx_energy << std::endl;
-        // std::cout << "vtxc_ref: " << vtxc << std::endl;
-        // std::cout << "exx_energy: " << 0.5 * exx_energy << std::endl;
+        // exx_energy += all_band_energy(ik, vexx_k_mo, p2d, wg);
         // ======test=======
-        delete potxc;
-		for (int is = 0;is < nspin0;++is) 
-		{
-			delete vxcs_op_ao[is];
+#endif
+        if (GlobalV::dft_plus_u)
+        {
+            vdftu_op_ao.contributeHk(ik);
         }
-        // write the orbital energy for xc and exx in LibRPA format
-        auto write_orb_energy = [&kv, &nspin0, &nbands](const std::vector<std::vector<double>>& e_orb,
-            const std::string& label, const bool app = false)
-            {
-                assert(e_orb.size() == kv.get_nks());
-                const int nk = kv.get_nks() / nspin0;
-                std::ofstream ofs;
-                ofs.open(GlobalV::global_out_dir + "vxc_" + (label == "" ? "out" : label + "_out"),
-                    app ? std::ios::app : std::ios::out);
-                ofs << nk << "\n" << nspin0 << "\n" << nbands << "\n";
-                ofs << std::scientific << std::setprecision(16);
-                for (int ik = 0;ik < nk;++ik)
-                {
-                    for (int is = 0;is < nspin0;++is)
-                    {
-                        for (auto e : e_orb[is * nk + ik])
-                        {   // Hartree and eV
-                            ofs << e / 2. << "\t" << e * ModuleBase::Ry_to_eV << "\n";
-                        }
-                    }
-                }
-            };
-        if (GlobalV::MY_RANK == 0)
+        const std::vector<TK>& vxc_tot_k_mo = cVc(vxc_k_ao.get_hk(), &psi(ik, 0, 0), nbasis, nbands, *pv, p2d);
+        e_orb_tot.emplace_back(orbital_energy(ik, nbands, vxc_tot_k_mo, p2d));
+        // write
+        ModuleIO::save_mat(-1,
+                           vxc_tot_k_mo.data(),
+                           nbands,
+                           false /*binary*/,
+                           GlobalV::out_ndigits,
+                           true /*triangle*/,
+                           false /*append*/,
+                           "Vxc",
+                           "k-" + std::to_string(ik),
+                           p2d,
+                           drank);
+        // ======test=======
+        // total_energy += all_band_energy(ik, vxc_tot_k_mo, p2d, wg);
+        // ======test=======
+    }
+    // ======test=======
+    // total_energy -= 0.5 * exx_energy;
+    // std::cout << "total energy: " << total_energy << std::endl;
+    // std::cout << "etxc: " << etxc << std::endl;
+    // std::cout << "vtxc_cal: " << total_energy - 0.5 * exx_energy << std::endl;
+    // std::cout << "vtxc_ref: " << vtxc << std::endl;
+    // std::cout << "exx_energy: " << 0.5 * exx_energy << std::endl;
+    // ======test=======
+    delete potxc;
+    for (int is = 0; is < nspin0; ++is)
+    {
+        delete vxcs_op_ao[is];
+    }
+    // write the orbital energy for xc and exx in LibRPA format
+    auto write_orb_energy = [&kv, &nspin0, &nbands](const std::vector<std::vector<double>>& e_orb,
+                                                    const std::string& label,
+                                                    const bool app = false) {
+        assert(e_orb.size() == kv.get_nks());
+        const int nk = kv.get_nks() / nspin0;
+        std::ofstream ofs;
+        ofs.open(GlobalV::global_out_dir + "vxc_" + (label == "" ? "out" : label + "_out"),
+                 app ? std::ios::app : std::ios::out);
+        ofs << nk << "\n" << nspin0 << "\n" << nbands << "\n";
+        ofs << std::scientific << std::setprecision(16);
+        for (int ik = 0; ik < nk; ++ik)
         {
-            write_orb_energy(e_orb_tot, "");
-#ifdef __EXX
-            if (GlobalC::exx_info.info_global.cal_exx)
+            for (int is = 0; is < nspin0; ++is)
             {
-                write_orb_energy(e_orb_locxc, "local");
-                write_orb_energy(e_orb_exx, "exx");
+                for (auto e: e_orb[is * nk + ik])
+                { // Hartree and eV
+                    ofs << e / 2. << "\t" << e * ModuleBase::Ry_to_eV << "\n";
+                }
             }
-#endif
         }
+    };
+    if (GlobalV::MY_RANK == 0)
+    {
+        write_orb_energy(e_orb_tot, "");
+#ifdef __EXX
+        if (GlobalC::exx_info.info_global.cal_exx)
+        {
+            write_orb_energy(e_orb_locxc, "local");
+            write_orb_energy(e_orb_exx, "exx");
+        }
+#endif
     }
 }
+} // namespace ModuleIO
diff --git a/source/module_io/write_dos_lcao.cpp b/source/module_io/write_dos_lcao.cpp
index 81c30a9462..a8b2400142 100644
--- a/source/module_io/write_dos_lcao.cpp
+++ b/source/module_io/write_dos_lcao.cpp
@@ -448,8 +448,10 @@ void ModuleIO::write_dos_lcao(const psi::Psi<std::complex<double>>* psi,
                 const std::complex<double>* sk = nullptr;
                 if (GlobalV::NSPIN == 4)
                 {
-                    dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->updateSk(ik, 1);
-                    sk = dynamic_cast<const hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->getSk();
+                    dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->updateSk(ik,
+                                                                                                                   1);
+                    sk = dynamic_cast<const hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)
+                             ->getSk();
                 }
                 else
                 {
diff --git a/source/module_io/write_proj_band_lcao.cpp b/source/module_io/write_proj_band_lcao.cpp
index 32425dc130..44a386a526 100644
--- a/source/module_io/write_proj_band_lcao.cpp
+++ b/source/module_io/write_proj_band_lcao.cpp
@@ -5,17 +5,16 @@
 #include "module_base/scalapack_connector.h"
 #include "module_base/timer.h"
 #include "module_cell/module_neighbor/sltk_atom_arrange.h"
-#include "write_orb_info.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h"
+#include "write_orb_info.h"
 
-template<>
-void ModuleIO::write_proj_band_lcao(
-    const psi::Psi<double>* psi,
-    LCAO_Matrix& lm,
-    const elecstate::ElecState* pelec,
-    const K_Vectors& kv,
-    const UnitCell &ucell,
-    hamilt::Hamilt<double>* p_ham)
+template <>
+void ModuleIO::write_proj_band_lcao(const psi::Psi<double>* psi,
+                                    LCAO_Matrix& lm,
+                                    const elecstate::ElecState* pelec,
+                                    const K_Vectors& kv,
+                                    const UnitCell& ucell,
+                                    hamilt::Hamilt<double>* p_ham)
 {
     ModuleBase::TITLE("ModuleIO", "write_proj_band_lcao");
     ModuleBase::timer::tick("ModuleIO", "write_proj_band_lcao");
@@ -43,7 +42,6 @@ void ModuleIO::write_proj_band_lcao(
     weightk.create(nspin0, GlobalV::NBANDS * GlobalV::NLOCAL, true);
     weight.create(nspin0, GlobalV::NBANDS * GlobalV::NLOCAL, true);
 
-
     for (int is = 0; is < nspin0; is++)
     {
         std::vector<ModuleBase::matrix> Mulk;
@@ -60,24 +58,24 @@ void ModuleIO::write_proj_band_lcao(
 #ifdef __MPI
             const char T_char = 'T';
             pdgemv_(&T_char,
-                &GlobalV::NLOCAL,
-                &GlobalV::NLOCAL,
-                &one_float,
-                sk,
-                &one_int,
-                &one_int,
-                pv->desc,
-                psi->get_pointer(),
-                &one_int,
-                &NB,
-                pv->desc,
-                &one_int,
-                &zero_float,
-                Mulk[0].c,
-                &one_int,
-                &NB,
-                pv->desc,
-                &one_int);
+                    &GlobalV::NLOCAL,
+                    &GlobalV::NLOCAL,
+                    &one_float,
+                    sk,
+                    &one_int,
+                    &one_int,
+                    pv->desc,
+                    psi->get_pointer(),
+                    &one_int,
+                    &NB,
+                    pv->desc,
+                    &one_int,
+                    &zero_float,
+                    Mulk[0].c,
+                    &one_int,
+                    &NB,
+                    pv->desc,
+                    &one_int);
 #endif
             for (int j = 0; j < GlobalV::NLOCAL; ++j)
             {
@@ -146,14 +144,14 @@ void ModuleIO::write_proj_band_lcao(
                             int w0 = w - s0;
                             out << std::setw(13)
                                 << weight(is, ib * GlobalV::NLOCAL + s0 + 2 * w0)
-                                + weight(is, ib * GlobalV::NLOCAL + s0 + 2 * w0 + 1);
+                                       + weight(is, ib * GlobalV::NLOCAL + s0 + 2 * w0 + 1);
                         }
                     }
                     out << std::endl;
                     out << "</data>" << std::endl;
                     out << "</orbital>" << std::endl;
                 } // j
-            } // i
+            }     // i
 
             out << "</pband>" << std::endl;
             out.close();
@@ -165,14 +163,13 @@ void ModuleIO::write_proj_band_lcao(
     return;
 }
 
-template<>
-void ModuleIO::write_proj_band_lcao(
-    const psi::Psi<std::complex<double>>* psi,
-    LCAO_Matrix& lm,
-    const elecstate::ElecState* pelec,
-    const K_Vectors& kv,
-    const UnitCell& ucell,
-    hamilt::Hamilt<std::complex<double>>* p_ham)
+template <>
+void ModuleIO::write_proj_band_lcao(const psi::Psi<std::complex<double>>* psi,
+                                    LCAO_Matrix& lm,
+                                    const elecstate::ElecState* pelec,
+                                    const K_Vectors& kv,
+                                    const UnitCell& ucell,
+                                    hamilt::Hamilt<std::complex<double>>* p_ham)
 {
     ModuleBase::TITLE("ModuleIO", "write_proj_band_lcao");
     ModuleBase::timer::tick("ModuleIO", "write_proj_band_lcao");
@@ -200,46 +197,48 @@ void ModuleIO::write_proj_band_lcao(
 
     for (int is = 0; is < nspin0; is++)
     {
-            std::vector<ModuleBase::ComplexMatrix> Mulk;
-            Mulk.resize(1);
-            Mulk[0].create(pv->ncol, pv->nrow);
+        std::vector<ModuleBase::ComplexMatrix> Mulk;
+        Mulk.resize(1);
+        Mulk[0].create(pv->ncol, pv->nrow);
 
-            for (int ik = 0; ik < kv.get_nks(); ik++)
+        for (int ik = 0; ik < kv.get_nks(); ik++)
+        {
+            if (is == kv.isk[ik])
             {
-                if (is == kv.isk[ik])
+                // calculate SK for current k point
+                const std::complex<double>* sk = nullptr;
+                if (GlobalV::NSPIN == 4)
                 {
-                    // calculate SK for current k point
-                    const std::complex<double>* sk = nullptr;
-                    if (GlobalV::NSPIN == 4)
-                    {
-                        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->updateSk(ik, 1);
-                        sk = dynamic_cast<const hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->getSk();
-                    }
-                    else
-                    {
-                        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->updateSk(ik, 1);
-                        sk = dynamic_cast<const hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->getSk();
-                    }
+                    dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->updateSk(ik,
+                                                                                                                   1);
+                    sk = dynamic_cast<const hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)
+                             ->getSk();
+                }
+                else
+                {
+                    dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->updateSk(ik, 1);
+                    sk = dynamic_cast<const hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->getSk();
+                }
 
-                    // calculate Mulk
-                    psi->fix_k(ik);
-                    psi::Psi<std::complex<double>> Dwfc(psi[0], 1);
-                    std::complex<double>* p_dwfc = Dwfc.get_pointer();
-                    for (int index = 0; index < Dwfc.size(); ++index)
-                    {
-                        p_dwfc[index] = conj(p_dwfc[index]);
-                    }
+                // calculate Mulk
+                psi->fix_k(ik);
+                psi::Psi<std::complex<double>> Dwfc(psi[0], 1);
+                std::complex<double>* p_dwfc = Dwfc.get_pointer();
+                for (int index = 0; index < Dwfc.size(); ++index)
+                {
+                    p_dwfc[index] = conj(p_dwfc[index]);
+                }
 
-                    for (int i = 0; i < GlobalV::NBANDS; ++i)
-                    {
-                        const int NB = i + 1;
+                for (int i = 0; i < GlobalV::NBANDS; ++i)
+                {
+                    const int NB = i + 1;
 
-                        const double one_float[2] = { 1.0, 0.0 };
-                        const double zero_float[2] = { 0.0, 0.0 };
-                        const int one_int = 1;
-                        const char T_char = 'T';
+                    const double one_float[2] = {1.0, 0.0};
+                    const double zero_float[2] = {0.0, 0.0};
+                    const int one_int = 1;
+                    const char T_char = 'T';
 #ifdef __MPI
-                        pzgemv_(&T_char,
+                    pzgemv_(&T_char,
                             &GlobalV::NLOCAL,
                             &GlobalV::NLOCAL,
                             &one_float[0],
@@ -259,23 +258,23 @@ void ModuleIO::write_proj_band_lcao(
                             pv->desc,
                             &one_int);
 #endif
-                        for (int j = 0; j < GlobalV::NLOCAL; ++j)
-                        {
+                    for (int j = 0; j < GlobalV::NLOCAL; ++j)
+                    {
 
-                            if (pv->in_this_processor(j, i))
-                            {
+                        if (pv->in_this_processor(j, i))
+                        {
 
-                                const int ir = pv->global2local_row(j);
-                                const int ic = pv->global2local_col(i);
+                            const int ir = pv->global2local_row(j);
+                            const int ic = pv->global2local_col(i);
 
-                                weightk(ik, i * GlobalV::NLOCAL + j) = Mulk[0](ic, ir) * psi[0](ic, ir);
-                            }
+                            weightk(ik, i * GlobalV::NLOCAL + j) = Mulk[0](ic, ir) * psi[0](ic, ir);
                         }
+                    }
 
-                    } // ib
+                } // ib
 
-                } // if
-            } // ik
+            } // if
+        }     // ik
 #ifdef __MPI
         MPI_Reduce(weightk.real().c, weight.c, NUM, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
 #endif
@@ -301,12 +300,12 @@ void ModuleIO::write_proj_band_lcao(
             out << "<band_structure nkpoints=\"" << nks << "\" nbands=\"" << GlobalV::NBANDS << "\" units=\"eV\">"
                 << std::endl;
 
-			for (int ik = 0; ik < nks; ik++)
-			{
-				for (int ib = 0; ib < GlobalV::NBANDS; ib++)
-					out << " " << (pelec->ekb(ik + is * nks, ib)) * ModuleBase::Ry_to_eV;
-				out << std::endl;
-			}
+            for (int ik = 0; ik < nks; ik++)
+            {
+                for (int ib = 0; ib < GlobalV::NBANDS; ib++)
+                    out << " " << (pelec->ekb(ik + is * nks, ib)) * ModuleBase::Ry_to_eV;
+                out << std::endl;
+            }
             out << "</band_structure>" << std::endl;
 
             for (int i = 0; i < ucell.nat; i++)
@@ -332,32 +331,32 @@ void ModuleIO::write_proj_band_lcao(
                     out << std::setw(2) << "z=\"" << std::setw(40) << N1 + 1 << "\"" << std::endl;
                     out << ">" << std::endl;
                     out << "<data>" << std::endl;
-					for (int ik = 0; ik < nks; ik++)
-					{
-						for (int ib = 0; ib < GlobalV::NBANDS; ib++)
-						{
-							if (GlobalV::NSPIN == 1)
-							{
-								out << std::setw(13) << weight(ik, ib * GlobalV::NLOCAL + w);
-							}
-							else if (GlobalV::NSPIN == 2)
-							{
-								out << std::setw(13) << weight(ik + nks * is, ib * GlobalV::NLOCAL + w);
-							}
-							else if (GlobalV::NSPIN == 4)
-							{
-								int w0 = w - s0;
-								out << std::setw(13)
-									<< weight(ik, ib * GlobalV::NLOCAL + s0 + 2 * w0)
-									+ weight(ik, ib * GlobalV::NLOCAL + s0 + 2 * w0 + 1);
-							}
-						}
-						out << std::endl;
-					}
+                    for (int ik = 0; ik < nks; ik++)
+                    {
+                        for (int ib = 0; ib < GlobalV::NBANDS; ib++)
+                        {
+                            if (GlobalV::NSPIN == 1)
+                            {
+                                out << std::setw(13) << weight(ik, ib * GlobalV::NLOCAL + w);
+                            }
+                            else if (GlobalV::NSPIN == 2)
+                            {
+                                out << std::setw(13) << weight(ik + nks * is, ib * GlobalV::NLOCAL + w);
+                            }
+                            else if (GlobalV::NSPIN == 4)
+                            {
+                                int w0 = w - s0;
+                                out << std::setw(13)
+                                    << weight(ik, ib * GlobalV::NLOCAL + s0 + 2 * w0)
+                                           + weight(ik, ib * GlobalV::NLOCAL + s0 + 2 * w0 + 1);
+                            }
+                        }
+                        out << std::endl;
+                    }
                     out << "</data>" << std::endl;
                     out << "</orbital>" << std::endl;
                 } // j
-            } // i
+            }     // i
 
             out << "</pband>" << std::endl;
             out.close();
diff --git a/source/module_ri/RI_2D_Comm.h b/source/module_ri/RI_2D_Comm.h
index d4be823ddf..768c9171d6 100644
--- a/source/module_ri/RI_2D_Comm.h
+++ b/source/module_ri/RI_2D_Comm.h
@@ -7,65 +7,63 @@
 #define RI_2D_COMM_H
 
 #include "module_basis/module_ao/parallel_orbitals.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
 #include "module_cell/klist.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
 
 #include <RI/global/Tensor.h>
-
 #include <array>
-#include <vector>
-#include <tuple>
+#include <deque>
 #include <map>
 #include <set>
-#include <deque>
+#include <tuple>
+#include <vector>
 
 namespace RI_2D_Comm
 {
-	using TA = int;
-	using Tcell = int;
-	static const size_t Ndim = 3;
-	using TC = std::array<Tcell,Ndim>;
-	using TAC = std::pair<TA,TC>;
+using TA = int;
+using Tcell = int;
+static const size_t Ndim = 3;
+using TC = std::array<Tcell, Ndim>;
+using TAC = std::pair<TA, TC>;
 
-//public:
-	template<typename Tdata, typename Tmatrix>
-	extern std::vector<std::map<TA,std::map<TAC,RI::Tensor<Tdata>>>>
-	split_m2D_ktoR(const K_Vectors &kv, const std::vector<const Tmatrix*> &mks_2D, const Parallel_Orbitals &pv);
+// public:
+template <typename Tdata, typename Tmatrix>
+extern std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>> split_m2D_ktoR(
+    const K_Vectors& kv,
+    const std::vector<const Tmatrix*>& mks_2D,
+    const Parallel_Orbitals& pv);
 
-	// judge[is] = {s0, s1}
-	extern std::vector<std::tuple<std::set<TA>, std::set<TA>>>
-	get_2D_judge(const Parallel_Orbitals &pv);
+// judge[is] = {s0, s1}
+extern std::vector<std::tuple<std::set<TA>, std::set<TA>>> get_2D_judge(const Parallel_Orbitals& pv);
 
-    template<typename Tdata, typename TK>
-    extern void add_Hexx(
-        const K_Vectors& kv,
-        const int ik,
-        const double alpha,
-        const std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>>& Hs,
-        const Parallel_Orbitals& pv,
-        TK* hk);
+template <typename Tdata, typename TK>
+extern void add_Hexx(const K_Vectors& kv,
+                     const int ik,
+                     const double alpha,
+                     const std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>>& Hs,
+                     const Parallel_Orbitals& pv,
+                     TK* hk);
 
-	template<typename Tdata>
-	extern std::vector<std::vector<Tdata>> Hexxs_to_Hk(
-			const K_Vectors &kv,
-			const Parallel_Orbitals &pv, 
-			const std::vector< std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>> &Hexxs,
-			const int ik);
-	template<typename Tdata>
-	std::vector<std::vector<Tdata>> pulay_mixing(
-		const Parallel_Orbitals &pv,
-		std::deque<std::vector<std::vector<Tdata>>> &Hk_seq,
-		const std::vector<std::vector<Tdata>> &Hk_new,
-		const double mixing_beta,
-		const std::string mixing_mode);
+template <typename Tdata>
+extern std::vector<std::vector<Tdata>> Hexxs_to_Hk(
+    const K_Vectors& kv,
+    const Parallel_Orbitals& pv,
+    const std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>>& Hexxs,
+    const int ik);
+template <typename Tdata>
+std::vector<std::vector<Tdata>> pulay_mixing(const Parallel_Orbitals& pv,
+                                             std::deque<std::vector<std::vector<Tdata>>>& Hk_seq,
+                                             const std::vector<std::vector<Tdata>>& Hk_new,
+                                             const double mixing_beta,
+                                             const std::string mixing_mode);
 
-//private:
-	extern std::vector<int> get_ik_list(const K_Vectors &kv, const int is_k);
-	extern inline std::tuple<int,int,int> get_iat_iw_is_block(const int iwt);
-	extern inline int get_is_block(const int is_k, const int is_row_b, const int is_col_b);
-	extern inline std::tuple<int,int> split_is_block(const int is_b);
-	extern inline int get_iwt(const int iat, const int iw_b, const int is_b);
-}
+// private:
+extern std::vector<int> get_ik_list(const K_Vectors& kv, const int is_k);
+extern inline std::tuple<int, int, int> get_iat_iw_is_block(const int iwt);
+extern inline int get_is_block(const int is_k, const int is_row_b, const int is_col_b);
+extern inline std::tuple<int, int> split_is_block(const int is_b);
+extern inline int get_iwt(const int iat, const int iw_b, const int is_b);
+} // namespace RI_2D_Comm
 
 #include "RI_2D_Comm.hpp"
 
diff --git a/source/module_ri/RI_2D_Comm.hpp b/source/module_ri/RI_2D_Comm.hpp
index 893316f6fe..4c82e9f22a 100644
--- a/source/module_ri/RI_2D_Comm.hpp
+++ b/source/module_ri/RI_2D_Comm.hpp
@@ -8,195 +8,213 @@
 
 #include "RI_2D_Comm.h"
 #include "RI_Util.h"
-#include "module_hamilt_pw/hamilt_pwdft/global.h"
-#include "module_base/tool_title.h"
 #include "module_base/timer.h"
+#include "module_base/tool_title.h"
+#include "module_hamilt_pw/hamilt_pwdft/global.h"
 
 #include <RI/global/Global_Func-2.h>
-
 #include <cmath>
-#include <string>
 #include <stdexcept>
+#include <string>
 
-inline RI::Tensor<double> tensor_conj(const RI::Tensor<double>& t) { return t; }
+inline RI::Tensor<double> tensor_conj(const RI::Tensor<double>& t)
+{
+    return t;
+}
 inline RI::Tensor<std::complex<double>> tensor_conj(const RI::Tensor<std::complex<double>>& t)
 {
     RI::Tensor<std::complex<double>> r(t.shape);
-    for (int i = 0;i < t.data->size();++i)(*r.data)[i] = std::conj((*t.data)[i]);
+    for (int i = 0; i < t.data->size(); ++i)
+        (*r.data)[i] = std::conj((*t.data)[i]);
     return r;
 }
-template<typename Tdata, typename Tmatrix>
-auto RI_2D_Comm::split_m2D_ktoR(const K_Vectors &kv, const std::vector<const Tmatrix*> &mks_2D, const Parallel_Orbitals &pv)
--> std::vector<std::map<TA,std::map<TAC,RI::Tensor<Tdata>>>>
+template <typename Tdata, typename Tmatrix>
+auto RI_2D_Comm::split_m2D_ktoR(const K_Vectors& kv,
+                                const std::vector<const Tmatrix*>& mks_2D,
+                                const Parallel_Orbitals& pv)
+    -> std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>>
 {
-	ModuleBase::TITLE("RI_2D_Comm","split_m2D_ktoR");
-	ModuleBase::timer::tick("RI_2D_Comm", "split_m2D_ktoR");
-
-	const TC period = RI_Util::get_Born_vonKarmen_period(kv);
-	const std::map<int,int> nspin_k = {{1,1}, {2,2}, {4,1}};
-	const double SPIN_multiple = std::map<int,double>{{1,0.5}, {2,1}, {4,1}}.at(GlobalV::NSPIN);							// why?
-
-	std::vector<std::map<TA,std::map<TAC,RI::Tensor<Tdata>>>> mRs_a2D(GlobalV::NSPIN);
-	for(int is_k=0; is_k<nspin_k.at(GlobalV::NSPIN); ++is_k)
-	{
-		const std::vector<int> ik_list = RI_2D_Comm::get_ik_list(kv, is_k);
-		for(const TC &cell : RI_Util::get_Born_von_Karmen_cells(period))
-		{
-			RI::Tensor<Tdata> mR_2D;
-			for(const int ik : ik_list)
-			{
+    ModuleBase::TITLE("RI_2D_Comm", "split_m2D_ktoR");
+    ModuleBase::timer::tick("RI_2D_Comm", "split_m2D_ktoR");
+
+    const TC period = RI_Util::get_Born_vonKarmen_period(kv);
+    const std::map<int, int> nspin_k = {{1, 1}, {2, 2}, {4, 1}};
+    const double SPIN_multiple = std::map<int, double>{{1, 0.5}, {2, 1}, {4, 1}}.at(GlobalV::NSPIN); // why?
+
+    std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>> mRs_a2D(GlobalV::NSPIN);
+    for (int is_k = 0; is_k < nspin_k.at(GlobalV::NSPIN); ++is_k)
+    {
+        const std::vector<int> ik_list = RI_2D_Comm::get_ik_list(kv, is_k);
+        for (const TC& cell: RI_Util::get_Born_von_Karmen_cells(period))
+        {
+            RI::Tensor<Tdata> mR_2D;
+            for (const int ik: ik_list)
+            {
                 using Tdata_m = typename Tmatrix::value_type;
-                RI::Tensor<Tdata_m> mk_2D = RI_Util::Vector_to_Tensor<Tdata_m>(*mks_2D[ik], pv.get_col_size(), pv.get_row_size());
-				const Tdata_m frac = SPIN_multiple
-					* RI::Global_Func::convert<Tdata_m>( std::exp(
-                        -ModuleBase::TWO_PI * ModuleBase::IMAG_UNIT * (kv.kvec_c[ik] * (RI_Util::array3_to_Vector3(cell) * GlobalC::ucell.latvec))));
+                RI::Tensor<Tdata_m> mk_2D
+                    = RI_Util::Vector_to_Tensor<Tdata_m>(*mks_2D[ik], pv.get_col_size(), pv.get_row_size());
+                const Tdata_m frac
+                    = SPIN_multiple
+                      * RI::Global_Func::convert<Tdata_m>(
+                          std::exp(-ModuleBase::TWO_PI * ModuleBase::IMAG_UNIT
+                                   * (kv.kvec_c[ik] * (RI_Util::array3_to_Vector3(cell) * GlobalC::ucell.latvec))));
                 auto set_mR_2D = [&mR_2D](auto&& mk_frac) {
                     if (mR_2D.empty())
                         mR_2D = RI::Global_Func::convert<Tdata>(mk_frac);
                     else
                         mR_2D = mR_2D + RI::Global_Func::convert<Tdata>(mk_frac);
-                    };
+                };
                 if (static_cast<int>(std::round(SPIN_multiple * kv.wk[ik] * kv.get_nkstot_full())) == 2)
                     set_mR_2D(mk_2D * (frac * 0.5) + tensor_conj(mk_2D * (frac * 0.5)));
-                else set_mR_2D(mk_2D * frac);
-			}
-
-			for(int iwt0_2D=0; iwt0_2D!=mR_2D.shape[0]; ++iwt0_2D)
-			{
-				const int iwt0 =
-					ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER()
-                    ? pv.local2global_col(iwt0_2D)
-                    : pv.local2global_row(iwt0_2D);
-				int iat0, iw0_b, is0_b;
-				std::tie(iat0,iw0_b,is0_b) = RI_2D_Comm::get_iat_iw_is_block(iwt0);
-				const int it0 = GlobalC::ucell.iat2it[iat0];
-				for(int iwt1_2D=0; iwt1_2D!=mR_2D.shape[1]; ++iwt1_2D)
-				{
-					const int iwt1 =
-						ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER()
-                        ? pv.local2global_row(iwt1_2D)
-                        : pv.local2global_col(iwt1_2D);
-					int iat1, iw1_b, is1_b;
-					std::tie(iat1,iw1_b,is1_b) = RI_2D_Comm::get_iat_iw_is_block(iwt1);
-					const int it1 = GlobalC::ucell.iat2it[iat1];
-
-					const int is_b = RI_2D_Comm::get_is_block(is_k, is0_b, is1_b);
-					RI::Tensor<Tdata> &mR_a2D = mRs_a2D[is_b][iat0][{iat1,cell}];
-					if(mR_a2D.empty())
-						mR_a2D = RI::Tensor<Tdata>({static_cast<size_t>(GlobalC::ucell.atoms[it0].nw), static_cast<size_t>(GlobalC::ucell.atoms[it1].nw)});
-					mR_a2D(iw0_b,iw1_b) = mR_2D(iwt0_2D, iwt1_2D);
-				}
-			}
-		}
-	}
-	ModuleBase::timer::tick("RI_2D_Comm", "split_m2D_ktoR");
-	return mRs_a2D;
+                else
+                    set_mR_2D(mk_2D * frac);
+            }
+
+            for (int iwt0_2D = 0; iwt0_2D != mR_2D.shape[0]; ++iwt0_2D)
+            {
+                const int iwt0 = ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER() ? pv.local2global_col(iwt0_2D)
+                                                                                     : pv.local2global_row(iwt0_2D);
+                int iat0, iw0_b, is0_b;
+                std::tie(iat0, iw0_b, is0_b) = RI_2D_Comm::get_iat_iw_is_block(iwt0);
+                const int it0 = GlobalC::ucell.iat2it[iat0];
+                for (int iwt1_2D = 0; iwt1_2D != mR_2D.shape[1]; ++iwt1_2D)
+                {
+                    const int iwt1 = ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER() ? pv.local2global_row(iwt1_2D)
+                                                                                         : pv.local2global_col(iwt1_2D);
+                    int iat1, iw1_b, is1_b;
+                    std::tie(iat1, iw1_b, is1_b) = RI_2D_Comm::get_iat_iw_is_block(iwt1);
+                    const int it1 = GlobalC::ucell.iat2it[iat1];
+
+                    const int is_b = RI_2D_Comm::get_is_block(is_k, is0_b, is1_b);
+                    RI::Tensor<Tdata>& mR_a2D = mRs_a2D[is_b][iat0][{iat1, cell}];
+                    if (mR_a2D.empty())
+                        mR_a2D = RI::Tensor<Tdata>({static_cast<size_t>(GlobalC::ucell.atoms[it0].nw),
+                                                    static_cast<size_t>(GlobalC::ucell.atoms[it1].nw)});
+                    mR_a2D(iw0_b, iw1_b) = mR_2D(iwt0_2D, iwt1_2D);
+                }
+            }
+        }
+    }
+    ModuleBase::timer::tick("RI_2D_Comm", "split_m2D_ktoR");
+    return mRs_a2D;
 }
 
-
-template<typename Tdata, typename TK>
-void RI_2D_Comm::add_Hexx(
-	const K_Vectors &kv,
-	const int ik,
-	const double alpha,
-	const std::vector<std::map<TA,std::map<TAC,RI::Tensor<Tdata>>>> &Hs,
-    const Parallel_Orbitals& pv,
-    TK* hk)
+template <typename Tdata, typename TK>
+void RI_2D_Comm::add_Hexx(const K_Vectors& kv,
+                          const int ik,
+                          const double alpha,
+                          const std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>>& Hs,
+                          const Parallel_Orbitals& pv,
+                          TK* hk)
 {
-	ModuleBase::TITLE("RI_2D_Comm","add_Hexx");
-	ModuleBase::timer::tick("RI_2D_Comm", "add_Hexx");
-
-	const std::map<int, std::vector<int>> is_list = {{1,{0}}, {2,{kv.isk[ik]}}, {4,{0,1,2,3}}};
-	for(const int is_b : is_list.at(GlobalV::NSPIN))
-	{
-		int is0_b, is1_b;
-		std::tie(is0_b,is1_b) = RI_2D_Comm::split_is_block(is_b);
-		for(const auto &Hs_tmpA : Hs[is_b])
-		{
-			const TA &iat0 = Hs_tmpA.first;
-			for(const auto &Hs_tmpB : Hs_tmpA.second)
-			{
-				const TA &iat1 = Hs_tmpB.first.first;
-				const TC &cell1 = Hs_tmpB.first.second;
-				const std::complex<double> frac = alpha
-					* std::exp( ModuleBase::TWO_PI*ModuleBase::IMAG_UNIT * (kv.kvec_c[ik] * (RI_Util::array3_to_Vector3(cell1)*GlobalC::ucell.latvec)) );
-				const RI::Tensor<Tdata> &H = Hs_tmpB.second;
-				for(size_t iw0_b=0; iw0_b<H.shape[0]; ++iw0_b)
-				{
-					const int iwt0 = RI_2D_Comm::get_iwt(iat0, iw0_b, is0_b);
-                    if (pv.global2local_row(iwt0) < 0)	continue;
-					for(size_t iw1_b=0; iw1_b<H.shape[1]; ++iw1_b)
-					{
-						const int iwt1 = RI_2D_Comm::get_iwt(iat1, iw1_b, is1_b);
-                        if (pv.global2local_col(iwt1) < 0)	continue;
-                        LCAO_Matrix::set_mat2d(iwt0, iwt1, RI::Global_Func::convert<TK>(H(iw0_b, iw1_b)) * RI::Global_Func::convert<TK>(frac), pv, hk);
-					}
-				}
-			}
-		}
-	}
-	ModuleBase::timer::tick("RI_2D_Comm", "add_Hexx");
+    ModuleBase::TITLE("RI_2D_Comm", "add_Hexx");
+    ModuleBase::timer::tick("RI_2D_Comm", "add_Hexx");
+
+    const std::map<int, std::vector<int>> is_list = {{1, {0}}, {2, {kv.isk[ik]}}, {4, {0, 1, 2, 3}}};
+    for (const int is_b: is_list.at(GlobalV::NSPIN))
+    {
+        int is0_b, is1_b;
+        std::tie(is0_b, is1_b) = RI_2D_Comm::split_is_block(is_b);
+        for (const auto& Hs_tmpA: Hs[is_b])
+        {
+            const TA& iat0 = Hs_tmpA.first;
+            for (const auto& Hs_tmpB: Hs_tmpA.second)
+            {
+                const TA& iat1 = Hs_tmpB.first.first;
+                const TC& cell1 = Hs_tmpB.first.second;
+                const std::complex<double> frac
+                    = alpha
+                      * std::exp(ModuleBase::TWO_PI * ModuleBase::IMAG_UNIT
+                                 * (kv.kvec_c[ik] * (RI_Util::array3_to_Vector3(cell1) * GlobalC::ucell.latvec)));
+                const RI::Tensor<Tdata>& H = Hs_tmpB.second;
+                for (size_t iw0_b = 0; iw0_b < H.shape[0]; ++iw0_b)
+                {
+                    const int iwt0 = RI_2D_Comm::get_iwt(iat0, iw0_b, is0_b);
+                    if (pv.global2local_row(iwt0) < 0)
+                        continue;
+                    for (size_t iw1_b = 0; iw1_b < H.shape[1]; ++iw1_b)
+                    {
+                        const int iwt1 = RI_2D_Comm::get_iwt(iat1, iw1_b, is1_b);
+                        if (pv.global2local_col(iwt1) < 0)
+                            continue;
+                        LCAO_Matrix::set_mat2d(iwt0,
+                                               iwt1,
+                                               RI::Global_Func::convert<TK>(H(iw0_b, iw1_b))
+                                                   * RI::Global_Func::convert<TK>(frac),
+                                               pv,
+                                               hk);
+                    }
+                }
+            }
+        }
+    }
+    ModuleBase::timer::tick("RI_2D_Comm", "add_Hexx");
 }
 
-std::tuple<int,int,int>
-RI_2D_Comm::get_iat_iw_is_block(const int iwt)
+std::tuple<int, int, int> RI_2D_Comm::get_iat_iw_is_block(const int iwt)
 {
-	const int iat = GlobalC::ucell.iwt2iat[iwt];
-	const int iw = GlobalC::ucell.iwt2iw[iwt];
-	switch(GlobalV::NSPIN)
-	{
-		case 1: case 2:
-			return std::make_tuple(iat, iw, 0);
-		case 4:
-			return std::make_tuple(iat, iw/2, iw%2);
-		default:
-			throw std::invalid_argument(std::string(__FILE__)+" line "+std::to_string(__LINE__));
-	}
+    const int iat = GlobalC::ucell.iwt2iat[iwt];
+    const int iw = GlobalC::ucell.iwt2iw[iwt];
+    switch (GlobalV::NSPIN)
+    {
+    case 1:
+    case 2:
+        return std::make_tuple(iat, iw, 0);
+    case 4:
+        return std::make_tuple(iat, iw / 2, iw % 2);
+    default:
+        throw std::invalid_argument(std::string(__FILE__) + " line " + std::to_string(__LINE__));
+    }
 }
 
 int RI_2D_Comm::get_is_block(const int is_k, const int is_row_b, const int is_col_b)
 {
-	switch(GlobalV::NSPIN)
-	{
-		case 1:		return 0;
-		case 2:		return is_k;
-		case 4:		return is_row_b*2+is_col_b;
-		default:	throw std::invalid_argument(std::string(__FILE__)+" line "+std::to_string(__LINE__));
-	}
+    switch (GlobalV::NSPIN)
+    {
+    case 1:
+        return 0;
+    case 2:
+        return is_k;
+    case 4:
+        return is_row_b * 2 + is_col_b;
+    default:
+        throw std::invalid_argument(std::string(__FILE__) + " line " + std::to_string(__LINE__));
+    }
 }
 
-std::tuple<int,int>
-RI_2D_Comm::split_is_block(const int is_b)
+std::tuple<int, int> RI_2D_Comm::split_is_block(const int is_b)
 {
-	switch(GlobalV::NSPIN)
-	{
-		case 1:	case 2:
-			return std::make_tuple(0, 0);
-		case 4:
-			return std::make_tuple(is_b/2, is_b%2);
-		default:
-			throw std::invalid_argument(std::string(__FILE__)+" line "+std::to_string(__LINE__));
-	}
+    switch (GlobalV::NSPIN)
+    {
+    case 1:
+    case 2:
+        return std::make_tuple(0, 0);
+    case 4:
+        return std::make_tuple(is_b / 2, is_b % 2);
+    default:
+        throw std::invalid_argument(std::string(__FILE__) + " line " + std::to_string(__LINE__));
+    }
 }
 
-
-
 int RI_2D_Comm::get_iwt(const int iat, const int iw_b, const int is_b)
 {
-	const int it = GlobalC::ucell.iat2it[iat];
-	const int ia = GlobalC::ucell.iat2ia[iat];
-	int iw=-1;
-	switch(GlobalV::NSPIN)
-	{
-		case 1: case 2:
-			iw = iw_b;			break;
-		case 4:
-			iw = iw_b*2+is_b;	break;
-		default:
-			throw std::invalid_argument(std::string(__FILE__)+" line "+std::to_string(__LINE__));
-	}
-	const int iwt = GlobalC::ucell.itiaiw2iwt(it,ia,iw);
-	return iwt;
+    const int it = GlobalC::ucell.iat2it[iat];
+    const int ia = GlobalC::ucell.iat2ia[iat];
+    int iw = -1;
+    switch (GlobalV::NSPIN)
+    {
+    case 1:
+    case 2:
+        iw = iw_b;
+        break;
+    case 4:
+        iw = iw_b * 2 + is_b;
+        break;
+    default:
+        throw std::invalid_argument(std::string(__FILE__) + " line " + std::to_string(__LINE__));
+    }
+    const int iwt = GlobalC::ucell.itiaiw2iwt(it, ia, iw);
+    return iwt;
 }
 
 #endif
\ No newline at end of file

From 3608cc694c99ef643bdae5cda2fe7d4dd7fd0479 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci-lite[bot]"
 <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com>
Date: Sat, 29 Jun 2024 12:36:48 +0000
Subject: [PATCH 03/11] [pre-commit.ci lite] apply automatic fixes

---
 source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nnr.cpp  | 10 +++++-----
 .../hamilt_lcaodft/operator_lcao/op_exx_lcao.hpp       |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nnr.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nnr.cpp
index 6bdc290ab3..35c3648674 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nnr.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nnr.cpp
@@ -98,11 +98,11 @@ void Grid_Technique::cal_nnrg(Parallel_Orbitals* pv)
                         }
                     } // end iat2
                 }     // end ad
-                      //				GlobalV::ofs_running << " iat=" << iat << " nlocstartg=" << nlocstartg[iat] << " nad=" <<
-                      //nad[iat] << std::endl;
-            }         // end iat
-        }             // end I1
-    }                 // end T1
+                      //				GlobalV::ofs_running << " iat=" << iat << " nlocstartg=" << nlocstartg[iat] << "
+                  //nad=" << nad[iat] << std::endl;
+            } // end iat
+        }     // end I1
+    }         // end T1
 
     if (GlobalV::OUT_LEVEL != "m")
         ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "nnrg", this->nnrg);
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.hpp
index 92db36721e..2af239b6b7 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.hpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.hpp
@@ -26,7 +26,7 @@ OperatorEXX<OperatorLCAO<TK, TR>>::OperatorEXX(
 {
     this->cal_type = calculation_type::lcao_exx;
     if (this->restart)
-    {   ///  Now only Hexx depends on DM, so we can directly read Hexx to reduce the computational cost.
+    { ///  Now only Hexx depends on DM, so we can directly read Hexx to reduce the computational cost.
         /// If other operators depends on DM, we can also read DM and then calculate the operators to save the memory to
         /// store operator terms.
         assert(this->two_level_step != nullptr);

From 8c6a7e2c4ee96f62c9d405b3ee81e7c10baa02f4 Mon Sep 17 00:00:00 2001
From: dyzheng <zhengdy@bjaisi.com>
Date: Mon, 1 Jul 2024 11:03:39 +0800
Subject: [PATCH 04/11] Fix: compiling error in GNU and CUDA

---
 source/module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp | 1 +
 source/module_hsolver/diago_cusolver.cpp                 | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp
index f80f55749a..1ccd72c871 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp
@@ -2,6 +2,7 @@
 #define ABACUS_HS_MATRIX_K_HPP
 
 #include <vector>
+#include "module_basis/module_ao/parallel_orbitals.h"
 namespace hamilt
 {
 template <typename TK>
diff --git a/source/module_hsolver/diago_cusolver.cpp b/source/module_hsolver/diago_cusolver.cpp
index 517fad4d0c..6fcf8d6fc8 100644
--- a/source/module_hsolver/diago_cusolver.cpp
+++ b/source/module_hsolver/diago_cusolver.cpp
@@ -70,7 +70,7 @@ namespace hsolver
                              matg& mat_g)
     {
         auto a = mat_l.p;
-        int* desca = mat_l.desc;
+        const int* desca = mat_l.desc;
         int ctxt = desca[1];
         int nrows = desca[2];
         int ncols = desca[3];

From 639acad67f80c16ae469c07f24f990893628a9d5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci-lite[bot]"
 <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com>
Date: Mon, 1 Jul 2024 04:08:26 +0000
Subject: [PATCH 05/11] [pre-commit.ci lite] apply automatic fixes

---
 .../hamilt_lcaodft/LCAO_nnr.cpp               |   8 +-
 .../hamilt_lcaodft/hs_matrix_k.hpp            |   3 +-
 source/module_hsolver/diago_cusolver.cpp      | 325 +++++++++---------
 3 files changed, 167 insertions(+), 169 deletions(-)

diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nnr.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nnr.cpp
index 35c3648674..58cd99ab3e 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nnr.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nnr.cpp
@@ -99,10 +99,10 @@ void Grid_Technique::cal_nnrg(Parallel_Orbitals* pv)
                     } // end iat2
                 }     // end ad
                       //				GlobalV::ofs_running << " iat=" << iat << " nlocstartg=" << nlocstartg[iat] << "
-                  //nad=" << nad[iat] << std::endl;
-            } // end iat
-        }     // end I1
-    }         // end T1
+                      // nad=" << nad[iat] << std::endl;
+            }         // end iat
+        }             // end I1
+    }                 // end T1
 
     if (GlobalV::OUT_LEVEL != "m")
         ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "nnrg", this->nnrg);
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp
index 1ccd72c871..f3cd56e9ed 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp
@@ -1,8 +1,9 @@
 #ifndef ABACUS_HS_MATRIX_K_HPP
 #define ABACUS_HS_MATRIX_K_HPP
 
-#include <vector>
 #include "module_basis/module_ao/parallel_orbitals.h"
+
+#include <vector>
 namespace hamilt
 {
 template <typename TK>
diff --git a/source/module_hsolver/diago_cusolver.cpp b/source/module_hsolver/diago_cusolver.cpp
index 6fcf8d6fc8..2ea77a1c1b 100644
--- a/source/module_hsolver/diago_cusolver.cpp
+++ b/source/module_hsolver/diago_cusolver.cpp
@@ -1,207 +1,204 @@
 #include "diago_cusolver.h"
-#include "module_base/global_variable.h"
-#include "module_base/timer.h"
+
 #include "module_base/blacs_connector.h"
+#include "module_base/global_variable.h"
 #include "module_base/scalapack_connector.h"
+#include "module_base/timer.h"
 
+#include <memory>
 #include <type_traits>
 #include <vector>
-#include <memory>
 
 using complex = std::complex<double>;
 
 // Namespace for the diagonalization solver
 namespace hsolver
 {
-    // this struct is used for collecting matrices from all processes to root process
-    template <typename T> struct Matrix_g
-    {
-        std::shared_ptr<T> p;
-        size_t row;
-        size_t col;
-        std::shared_ptr<int> desc;
-    };
-
-    // Initialize the DecomposedState variable for real and complex numbers
-    template <typename T>
-    int DiagoCusolver<T>::DecomposedState = 0;
-
-    template <typename T>
-    DiagoCusolver<T>::DiagoCusolver(const Parallel_Orbitals* ParaV)
-    {
-        this->ParaV = ParaV;
-    }
+// this struct is used for collecting matrices from all processes to root process
+template <typename T>
+struct Matrix_g
+{
+    std::shared_ptr<T> p;
+    size_t row;
+    size_t col;
+    std::shared_ptr<int> desc;
+};
+
+// Initialize the DecomposedState variable for real and complex numbers
+template <typename T>
+int DiagoCusolver<T>::DecomposedState = 0;
+
+template <typename T>
+DiagoCusolver<T>::DiagoCusolver(const Parallel_Orbitals* ParaV)
+{
+    this->ParaV = ParaV;
+}
+
+template <typename T>
+DiagoCusolver<T>::~DiagoCusolver()
+{
+}
+
+// Wrapper for pdgemr2d and pzgemr2d
+// static inline void Cpxgemr2d(
+//    const int M, const int N,
+//    double *a, const int ia, const int ja, const int *desca,
+//    double *b, const int ib, const int jb, const int *descb,
+//    const int blacs_ctxt)
+//{
+//    pdgemr2d_(&M, &N,
+//              a, &ia, &ja, desca,
+//              b, &ib, &jb, descb,
+//              &blacs_ctxt);
+//}
+//
+// static inline void Cpxgemr2d(
+//    const int M, const int N,
+//    complex *a, const int ia, const int ja, const int *desca,
+//    complex *b, const int ib, const int jb, const int *descb,
+//    const int blacs_ctxt)
+//{
+//    pzgemr2d_(&M, &N,
+//              a, &ia, &ja, desca,
+//              b, &ib, &jb, descb,
+//              &blacs_ctxt);
+//}
+
+// Use Cpxgemr2d to collect matrices from all processes to root process
+template <typename mat, typename matg>
+static void gatherMatrix(const int myid, const int root_proc, const mat& mat_l, matg& mat_g)
+{
+    auto a = mat_l.p;
+    const int* desca = mat_l.desc;
+    int ctxt = desca[1];
+    int nrows = desca[2];
+    int ncols = desca[3];
 
-    template <typename T>
-    DiagoCusolver<T>::~DiagoCusolver()
+    if (myid == root_proc)
     {
+        mat_g.p.reset(new typename std::remove_reference<decltype(*a)>::type[nrows * ncols]);
     }
-    
-    // Wrapper for pdgemr2d and pzgemr2d
-    //static inline void Cpxgemr2d(
-    //    const int M, const int N,
-    //    double *a, const int ia, const int ja, const int *desca,
-    //    double *b, const int ib, const int jb, const int *descb,
-    //    const int blacs_ctxt) 
-    //{
-    //    pdgemr2d_(&M, &N,
-    //              a, &ia, &ja, desca,
-    //              b, &ib, &jb, descb,
-    //              &blacs_ctxt);
-    //}
-//
-    //static inline void Cpxgemr2d(
-    //    const int M, const int N,
-    //    complex *a, const int ia, const int ja, const int *desca,
-    //    complex *b, const int ib, const int jb, const int *descb,
-    //    const int blacs_ctxt) 
-    //{
-    //    pzgemr2d_(&M, &N,
-    //              a, &ia, &ja, desca,
-    //              b, &ib, &jb, descb,
-    //              &blacs_ctxt);
-    //}
-
-    // Use Cpxgemr2d to collect matrices from all processes to root process
-    template <typename mat, typename matg>
-    static void gatherMatrix(const int myid,
-                             const int root_proc,
-                             const mat& mat_l,
-                             matg& mat_g)
+    else
     {
-        auto a = mat_l.p;
-        const int* desca = mat_l.desc;
-        int ctxt = desca[1];
-        int nrows = desca[2];
-        int ncols = desca[3];
-
-        if (myid == root_proc)
-            {
-                mat_g.p.reset(new typename std::remove_reference<decltype(*a)>::type[nrows * ncols]);
-            }
-        else
-            {
-                mat_g.p.reset(new typename std::remove_reference<decltype(*a)>::type[1]);
-            }
+        mat_g.p.reset(new typename std::remove_reference<decltype(*a)>::type[1]);
+    }
 
-        // Set descb, which has all elements in the only block in the root process
-        mat_g.desc.reset(new int[9]{1, ctxt, nrows, ncols, nrows, ncols, 0, 0, nrows});
+    // Set descb, which has all elements in the only block in the root process
+    mat_g.desc.reset(new int[9]{1, ctxt, nrows, ncols, nrows, ncols, 0, 0, nrows});
 
-        mat_g.row = nrows;
-        mat_g.col = ncols;
+    mat_g.row = nrows;
+    mat_g.col = ncols;
 
-        Cpxgemr2d(nrows, ncols, a, 1, 1, desca, mat_g.p.get(), 1, 1, mat_g.desc.get(), ctxt);
-    }
+    Cpxgemr2d(nrows, ncols, a, 1, 1, desca, mat_g.p.get(), 1, 1, mat_g.desc.get(), ctxt);
+}
 
-    // Convert the Psi to a 2D block storage format
-    template <typename T>
-    static void distributePsi(const int* desc_psi, T* psi, T* psi_g)
-    {
-        int ctxt = desc_psi[1];
-        int nrows = desc_psi[2];
-        int ncols = desc_psi[3];
-        int rsrc = desc_psi[6];
-        int csrc = desc_psi[7];
+// Convert the Psi to a 2D block storage format
+template <typename T>
+static void distributePsi(const int* desc_psi, T* psi, T* psi_g)
+{
+    int ctxt = desc_psi[1];
+    int nrows = desc_psi[2];
+    int ncols = desc_psi[3];
+    int rsrc = desc_psi[6];
+    int csrc = desc_psi[7];
 
-        int descg[9] = {1, ctxt, nrows, ncols, nrows, ncols, rsrc, csrc, nrows};
-        int descl[9];
+    int descg[9] = {1, ctxt, nrows, ncols, nrows, ncols, rsrc, csrc, nrows};
+    int descl[9];
 
-        std::copy(desc_psi, desc_psi + 9, descl);
+    std::copy(desc_psi, desc_psi + 9, descl);
 
-        Cpxgemr2d(nrows, ncols, psi_g, 1, 1, descg, psi, 1, 1, descl, ctxt);
-    }
+    Cpxgemr2d(nrows, ncols, psi_g, 1, 1, descg, psi, 1, 1, descl, ctxt);
+}
 
-    // Diagonalization function
-    template <typename T>
-    void DiagoCusolver<T>::diag(hamilt::Hamilt<T>* phm_in,
-                                psi::Psi<T>& psi,
-                                Real* eigenvalue_in)
-    {
-        // Output the title for the current operation
-        ModuleBase::TITLE("DiagoCusolver", "diag");
+// Diagonalization function
+template <typename T>
+void DiagoCusolver<T>::diag(hamilt::Hamilt<T>* phm_in, psi::Psi<T>& psi, Real* eigenvalue_in)
+{
+    // Output the title for the current operation
+    ModuleBase::TITLE("DiagoCusolver", "diag");
 
-        // Create matrices for the Hamiltonian and overlap
-        hamilt::MatrixBlock<T> h_mat;
-        hamilt::MatrixBlock<T> s_mat;
-        phm_in->matrix(h_mat, s_mat);
+    // Create matrices for the Hamiltonian and overlap
+    hamilt::MatrixBlock<T> h_mat;
+    hamilt::MatrixBlock<T> s_mat;
+    phm_in->matrix(h_mat, s_mat);
 
 #ifdef __MPI
-        // global matrix
-        Matrix_g<T> h_mat_g;
-        Matrix_g<T> s_mat_g;
-
-        // get the context and process information
-        int ctxt = ParaV->blacs_ctxt;
-        int nprows = 0;
-        int npcols = 0;
-        int myprow = 0;
-        int mypcol = 0;
-        Cblacs_gridinfo(ctxt, &nprows, &npcols, &myprow, &mypcol);
-        const int num_procs = nprows * npcols;
-        const int myid = Cblacs_pnum(ctxt, myprow, mypcol);
-        const int root_proc = Cblacs_pnum(ctxt, ParaV->desc[6], ParaV->desc[7]);
+    // global matrix
+    Matrix_g<T> h_mat_g;
+    Matrix_g<T> s_mat_g;
+
+    // get the context and process information
+    int ctxt = ParaV->blacs_ctxt;
+    int nprows = 0;
+    int npcols = 0;
+    int myprow = 0;
+    int mypcol = 0;
+    Cblacs_gridinfo(ctxt, &nprows, &npcols, &myprow, &mypcol);
+    const int num_procs = nprows * npcols;
+    const int myid = Cblacs_pnum(ctxt, myprow, mypcol);
+    const int root_proc = Cblacs_pnum(ctxt, ParaV->desc[6], ParaV->desc[7]);
 #endif
 
-        // Allocate memory for eigenvalues
-        std::vector<double> eigen(GlobalV::NLOCAL, 0.0);
+    // Allocate memory for eigenvalues
+    std::vector<double> eigen(GlobalV::NLOCAL, 0.0);
 
-        // Start the timer for the cusolver operation
-        ModuleBase::timer::tick("DiagoCusolver", "cusolver");
+    // Start the timer for the cusolver operation
+    ModuleBase::timer::tick("DiagoCusolver", "cusolver");
 
 #ifdef __MPI
-        if(num_procs > 1)
-        {
-            // gather matrices from processes to root process
-            gatherMatrix(myid, root_proc, h_mat, h_mat_g);
-            gatherMatrix(myid, root_proc, s_mat, s_mat_g);
-        }
+    if (num_procs > 1)
+    {
+        // gather matrices from processes to root process
+        gatherMatrix(myid, root_proc, h_mat, h_mat_g);
+        gatherMatrix(myid, root_proc, s_mat, s_mat_g);
+    }
 #endif
 
-        // Call the dense diagonalization routine
+    // Call the dense diagonalization routine
 #ifdef __MPI
-        if(num_procs > 1)
+    if (num_procs > 1)
+    {
+        MPI_Barrier(MPI_COMM_WORLD);
+        // global psi for distribute
+        int psi_len = myid == root_proc ? h_mat_g.row * h_mat_g.col : 1;
+        std::vector<T> psi_g(psi_len);
+        if (myid == root_proc)
         {
-            MPI_Barrier(MPI_COMM_WORLD);
-            // global psi for distribute
-            int psi_len = myid == root_proc ? h_mat_g.row * h_mat_g.col : 1;
-            std::vector<T> psi_g(psi_len);
-            if (myid == root_proc)
-            {
-                this->dc.Dngvd(h_mat_g.col, h_mat_g.row, h_mat_g.p.get(), s_mat_g.p.get(), eigen.data(), psi_g.data());
-            }
-
-            MPI_Barrier(MPI_COMM_WORLD);
-            
-            // broadcast eigenvalues to all processes
-            MPI_Bcast(eigen.data(), GlobalV::NBANDS, MPI_DOUBLE, root_proc, MPI_COMM_WORLD);
-
-            // distribute psi to all processes
-            distributePsi(this->ParaV->desc_wfc, psi.get_pointer(), psi_g.data());
-        }
-        else
-        {   
-            // Be careful that h_mat.row * h_mat.col != psi.get_nbands() * psi.get_nbasis() under multi-k situation
-            std::vector<T> eigenvectors(h_mat.row * h_mat.col);
-            this->dc.Dngvd(h_mat.row, h_mat.col, h_mat.p, s_mat.p, eigen.data(), eigenvectors.data());
-            const int size = psi.get_nbands() * psi.get_nbasis();
-            BlasConnector::copy(size, eigenvectors.data(), 1, psi.get_pointer(), 1);
+            this->dc.Dngvd(h_mat_g.col, h_mat_g.row, h_mat_g.p.get(), s_mat_g.p.get(), eigen.data(), psi_g.data());
         }
+
+        MPI_Barrier(MPI_COMM_WORLD);
+
+        // broadcast eigenvalues to all processes
+        MPI_Bcast(eigen.data(), GlobalV::NBANDS, MPI_DOUBLE, root_proc, MPI_COMM_WORLD);
+
+        // distribute psi to all processes
+        distributePsi(this->ParaV->desc_wfc, psi.get_pointer(), psi_g.data());
+    }
+    else
+    {
+        // Be careful that h_mat.row * h_mat.col != psi.get_nbands() * psi.get_nbasis() under multi-k situation
+        std::vector<T> eigenvectors(h_mat.row * h_mat.col);
+        this->dc.Dngvd(h_mat.row, h_mat.col, h_mat.p, s_mat.p, eigen.data(), eigenvectors.data());
+        const int size = psi.get_nbands() * psi.get_nbasis();
+        BlasConnector::copy(size, eigenvectors.data(), 1, psi.get_pointer(), 1);
+    }
 #else
-            std::vector<T> eigenvectors(h_mat.row * h_mat.col);
-            this->dc.Dngvd(h_mat.row, h_mat.col, h_mat.p, s_mat.p, eigen.data(), eigenvectors.data());
-            const int size = psi.get_nbands() * psi.get_nbasis();
-            BlasConnector::copy(size, eigenvectors.data(), 1, psi.get_pointer(), 1);
+    std::vector<T> eigenvectors(h_mat.row * h_mat.col);
+    this->dc.Dngvd(h_mat.row, h_mat.col, h_mat.p, s_mat.p, eigen.data(), eigenvectors.data());
+    const int size = psi.get_nbands() * psi.get_nbasis();
+    BlasConnector::copy(size, eigenvectors.data(), 1, psi.get_pointer(), 1);
 #endif
-        // Stop the timer for the cusolver operation
-        ModuleBase::timer::tick("DiagoCusolver", "cusolver");
+    // Stop the timer for the cusolver operation
+    ModuleBase::timer::tick("DiagoCusolver", "cusolver");
 
-        // Copy the eigenvalues to the output arrays
-        const int inc = 1;
-        BlasConnector::copy(GlobalV::NBANDS, eigen.data(), inc, eigenvalue_in, inc);
-    }
+    // Copy the eigenvalues to the output arrays
+    const int inc = 1;
+    BlasConnector::copy(GlobalV::NBANDS, eigen.data(), inc, eigenvalue_in, inc);
+}
 
-    // Explicit instantiation of the DiagoCusolver class for real and complex numbers
-    template class DiagoCusolver<double>;
-    template class DiagoCusolver<complex>;
+// Explicit instantiation of the DiagoCusolver class for real and complex numbers
+template class DiagoCusolver<double>;
+template class DiagoCusolver<complex>;
 
 } // namespace hsolver

From 1d4110b459fca291b0c11979ac2b993d7a2d8de5 Mon Sep 17 00:00:00 2001
From: dyzheng <zhengdy@bjaisi.com>
Date: Mon, 1 Jul 2024 14:03:25 +0800
Subject: [PATCH 06/11] Fix: compiling error in CUDA

---
 source/module_hsolver/diago_cusolver.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/module_hsolver/diago_cusolver.cpp b/source/module_hsolver/diago_cusolver.cpp
index 6fcf8d6fc8..1a39ce7e65 100644
--- a/source/module_hsolver/diago_cusolver.cpp
+++ b/source/module_hsolver/diago_cusolver.cpp
@@ -90,7 +90,7 @@ namespace hsolver
         mat_g.row = nrows;
         mat_g.col = ncols;
 
-        Cpxgemr2d(nrows, ncols, a, 1, 1, desca, mat_g.p.get(), 1, 1, mat_g.desc.get(), ctxt);
+        Cpxgemr2d(nrows, ncols, a, 1, 1, const_cast<int*>(desca), mat_g.p.get(), 1, 1, mat_g.desc.get(), ctxt);
     }
 
     // Convert the Psi to a 2D block storage format

From cda444e394dbd3c18eccbf25c3f825d70e8fec6a Mon Sep 17 00:00:00 2001
From: dyzheng <zhengdy@bjaisi.com>
Date: Mon, 1 Jul 2024 17:56:26 +0800
Subject: [PATCH 07/11] Fix: bug in Overlap<OperatorLCAO>

---
 .../hamilt_lcaodft/operator_lcao/overlap_new.cpp                | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/overlap_new.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/overlap_new.cpp
index b66477f213..02737a7c2e 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/overlap_new.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/overlap_new.cpp
@@ -33,7 +33,7 @@ void hamilt::OverlapNew<hamilt::OperatorLCAO<TK, TR>>::initialize_SR(Grid_Driver
 {
     ModuleBase::TITLE("OverlapNew", "initialize_SR");
     ModuleBase::timer::tick("OverlapNew", "initialize_SR");
-    auto* paraV = this->hR->get_paraV(); // get parallel orbitals from HR
+    auto* paraV = this->SR->get_paraV(); // get parallel orbitals from HR
     // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
     for (int iat1 = 0; iat1 < ucell->nat; iat1++)
     {

From b00b1532a658dd1bdda5fbe932eb30ccf5e0c506 Mon Sep 17 00:00:00 2001
From: dyzheng <zhengdy@bjaisi.com>
Date: Thu, 4 Jul 2024 22:37:55 +0800
Subject: [PATCH 08/11] Revert "[pre-commit.ci lite] apply automatic fixes"

This reverts commit 1211fc31b7057facd54ed26b53c018c888cf84e2.
---
 source/module_esolver/esolver_ks_lcao.cpp     |   2 +-
 .../esolver_ks_lcao_tmpfunc.cpp               | 146 +++--
 source/module_hamilt_general/matrixblock.h    |   3 +-
 .../hamilt_lcaodft/FORCE_k.cpp                |   5 +-
 .../hamilt_lcaodft/LCAO_matrix.h              |   1 +
 .../hamilt_lcaodft/LCAO_nnr.cpp               | 504 +++++++++---------
 .../hamilt_lcaodft/hamilt_lcao.cpp            |   7 +-
 .../hamilt_lcaodft/hamilt_lcao.h              |  27 +-
 .../hamilt_lcaodft/hs_matrix_k.hpp            |  66 +--
 .../operator_lcao/deepks_lcao.cpp             |   2 +-
 .../operator_lcao/deepks_lcao.h               |   1 +
 .../operator_lcao/dftu_lcao.cpp               |   2 +-
 .../operator_lcao/ekinetic_new.cpp            |   2 +-
 .../operator_lcao/meta_lcao.cpp               |   3 +-
 .../hamilt_lcaodft/operator_lcao/meta_lcao.h  |  13 +-
 .../operator_lcao/nonlocal_new.cpp            |   2 +-
 .../operator_lcao/op_dftu_lcao.cpp            |  13 +-
 .../operator_lcao/op_dftu_lcao.h              |   7 +-
 .../operator_lcao/op_exx_lcao.cpp             |  28 +-
 .../operator_lcao/op_exx_lcao.h               |  38 +-
 .../operator_lcao/op_exx_lcao.hpp             | 119 +++--
 .../operator_lcao/operator_lcao.cpp           | 239 +++++----
 .../operator_lcao/operator_lcao.h             |  37 +-
 .../operator_lcao/sc_lambda_lcao.cpp          |  16 +-
 .../operator_lcao/sc_lambda_lcao.h            |  10 +-
 .../operator_lcao/td_ekinetic_lcao.cpp        |   4 +-
 .../operator_lcao/td_ekinetic_lcao.h          |  44 +-
 .../operator_lcao/test/test_dftu.cpp          |   3 +-
 .../operator_lcao/test/test_ekineticnew.cpp   |  11 +-
 .../operator_lcao/test/test_nonlocalnew.cpp   |  11 +-
 .../operator_lcao/test/test_overlapnew.cpp    |   3 +-
 .../test/test_sc_lambda_lcao.cpp              |  49 +-
 .../operator_lcao/veff_lcao.cpp               |  47 +-
 .../hamilt_lcaodft/operator_lcao/veff_lcao.h  |  24 +-
 .../module_deltaspin/cal_h_lambda.cpp         |  30 +-
 .../module_deltaspin/cal_mw.cpp               |  17 +-
 .../module_deltaspin/spin_constrain.h         | 140 ++---
 .../test/cal_h_lambda_test.cpp                |  70 ++-
 .../test/template_helpers_test.cpp            |   6 +-
 source/module_hamilt_lcao/module_dftu/dftu.h  | 191 ++++---
 .../module_dftu/dftu_folding.cpp              | 128 ++---
 .../module_dftu/dftu_hamilt.cpp               | 185 ++-----
 .../module_dftu/dftu_occup.cpp                | 128 ++---
 .../module_hcontainer/hcontainer.h            |   5 +-
 source/module_io/output_sk.cpp                |   3 +-
 source/module_io/write_dos_lcao.cpp           |   6 +-
 source/module_io/write_proj_band_lcao.cpp     | 221 ++++----
 source/module_ri/RI_2D_Comm.h                 |  88 +--
 source/module_ri/RI_2D_Comm.hpp               | 322 ++++++-----
 49 files changed, 1438 insertions(+), 1591 deletions(-)

diff --git a/source/module_esolver/esolver_ks_lcao.cpp b/source/module_esolver/esolver_ks_lcao.cpp
index 15b52729ed..5a7266a5d1 100644
--- a/source/module_esolver/esolver_ks_lcao.cpp
+++ b/source/module_esolver/esolver_ks_lcao.cpp
@@ -390,7 +390,7 @@ void ESolver_KS_LCAO<TK, TR>::cal_stress(ModuleBase::matrix& stress)
 //! mohan add 2024-05-11
 //------------------------------------------------------------------------------
 template <typename TK, typename TR>
-void ESolver_KS_LCAO<TK, TR>::after_all_runners()
+void ESolver_KS_LCAO<TK, TR>::after_all_runners(void)
 {
     ModuleBase::TITLE("ESolver_KS_LCAO", "after_all_runners");
     ModuleBase::timer::tick("ESolver_KS_LCAO", "after_all_runners");
diff --git a/source/module_esolver/esolver_ks_lcao_tmpfunc.cpp b/source/module_esolver/esolver_ks_lcao_tmpfunc.cpp
index 7dc536f139..7a5f9fdbcd 100644
--- a/source/module_esolver/esolver_ks_lcao_tmpfunc.cpp
+++ b/source/module_esolver/esolver_ks_lcao_tmpfunc.cpp
@@ -8,70 +8,104 @@
 namespace ModuleESolver
 {
 
-using namespace std;
+    using namespace std;
 
-//! dftu occupation matrix for gamma only using dm(double)
-template <>
-void ESolver_KS_LCAO<double, double>::dftu_cal_occup_m(const int& iter, const vector<vector<double>>& dm) const
-{
-    GlobalC::dftu.cal_occup_m_gamma(iter, dm, this->p_chgmix->get_mixing_beta(), this->p_hamilt);
-}
+    //! dftu occupation matrix for gamma only using dm(double)
+    template <>
+    void ESolver_KS_LCAO<double, double>::dftu_cal_occup_m(
+         const int& iter, 
+         const vector<vector<double>>& dm)const
+    {
+		GlobalC::dftu.cal_occup_m_gamma(
+				iter, 
+				dm, 
+				this->p_chgmix->get_mixing_beta(),
+                this->p_hamilt);
+    }
 
-//! dftu occupation matrix for multiple k-points using dm(complex)
-template <>
-void ESolver_KS_LCAO<complex<double>, double>::dftu_cal_occup_m(const int& iter,
-                                                                const vector<vector<complex<double>>>& dm) const
-{
-    GlobalC::dftu.cal_occup_m_k(iter, dm, this->kv, this->p_chgmix->get_mixing_beta(), this->p_hamilt);
-}
+    //! dftu occupation matrix for multiple k-points using dm(complex)
+    template <>
+    void ESolver_KS_LCAO<complex<double>, double>::dftu_cal_occup_m(
+       const int& iter, 
+       const vector<vector<complex<double>>>& dm)const
+    {
+        GlobalC::dftu.cal_occup_m_k(
+                 iter, 
+                 dm, 
+                 this->kv, 
+                 this->p_chgmix->get_mixing_beta(), 
+                 this->p_hamilt);
+    }
 
-//! dftu occupation matrix
-template <>
-void ESolver_KS_LCAO<complex<double>, complex<double>>::dftu_cal_occup_m(
-    const int& iter,
-    const vector<vector<complex<double>>>& dm) const
-{
-    GlobalC::dftu.cal_occup_m_k(iter, dm, this->kv, this->p_chgmix->get_mixing_beta(), this->p_hamilt);
-}
+    //! dftu occupation matrix
+    template <>
+    void ESolver_KS_LCAO<complex<double>, complex<double>>::dftu_cal_occup_m(
+       const int& iter, 
+       const vector<vector<complex<double>>>& dm)const
+    {
+        GlobalC::dftu.cal_occup_m_k(
+                 iter, 
+                 dm, 
+                 this->kv, 
+                 this->p_chgmix->get_mixing_beta(), 
+                 this->p_hamilt);
+    }
 
 #ifdef __DEEPKS
-template <>
-void ESolver_KS_LCAO<double, double>::dpks_cal_e_delta_band(const vector<vector<double>>& dm) const
-{
-    GlobalC::ld.cal_e_delta_band(dm);
-}
+    template<>
+    void ESolver_KS_LCAO<double, double>::dpks_cal_e_delta_band(
+       const vector<vector<double>>& dm)const
+    {
+        GlobalC::ld.cal_e_delta_band(dm);
+    }
 
-template <>
-void ESolver_KS_LCAO<complex<double>, double>::dpks_cal_e_delta_band(const vector<vector<complex<double>>>& dm) const
-{
-    GlobalC::ld.cal_e_delta_band_k(dm, this->kv.get_nks());
-}
 
-template <>
-void ESolver_KS_LCAO<complex<double>, complex<double>>::dpks_cal_e_delta_band(
-    const vector<vector<complex<double>>>& dm) const
-{
-    GlobalC::ld.cal_e_delta_band_k(dm, this->kv.get_nks());
-}
+    template<>
+    void ESolver_KS_LCAO<complex<double>, double>::dpks_cal_e_delta_band(
+        const vector<vector<complex<double>>>& dm)const
+    {
+        GlobalC::ld.cal_e_delta_band_k(dm, this->kv.get_nks());
+    }
 
-template <>
-void ESolver_KS_LCAO<double, double>::dpks_cal_projected_DM(const elecstate::DensityMatrix<double, double>* dm) const
-{
-    GlobalC::ld.cal_projected_DM(dm, GlobalC::ucell, GlobalC::ORB, GlobalC::GridD);
-}
 
-template <>
-void ESolver_KS_LCAO<complex<double>, double>::dpks_cal_projected_DM(
-    const elecstate::DensityMatrix<complex<double>, double>* dm) const
-{
-    GlobalC::ld.cal_projected_DM_k(dm, GlobalC::ucell, GlobalC::ORB, GlobalC::GridD);
-}
+    template<>
+    void ESolver_KS_LCAO<complex<double>, complex<double>>::dpks_cal_e_delta_band(
+        const vector<vector<complex<double>>>& dm)const
+    {
+        GlobalC::ld.cal_e_delta_band_k(dm, this->kv.get_nks());
+    }
 
-template <>
-void ESolver_KS_LCAO<complex<double>, complex<double>>::dpks_cal_projected_DM(
-    const elecstate::DensityMatrix<complex<double>, double>* dm) const
-{
-    GlobalC::ld.cal_projected_DM_k(dm, GlobalC::ucell, GlobalC::ORB, GlobalC::GridD);
-}
+
+    template<>
+    void ESolver_KS_LCAO<double, double>::dpks_cal_projected_DM(
+        const elecstate::DensityMatrix<double, double>* dm)const
+    {
+        GlobalC::ld.cal_projected_DM(dm,
+            GlobalC::ucell,
+            GlobalC::ORB,
+            GlobalC::GridD);
+    }
+
+
+    template<>
+    void ESolver_KS_LCAO<complex<double>, double>::dpks_cal_projected_DM(
+          const elecstate::DensityMatrix<complex<double>, double>* dm)const
+    {
+        GlobalC::ld.cal_projected_DM_k(dm,
+            GlobalC::ucell,
+            GlobalC::ORB,
+            GlobalC::GridD);
+    }
+
+
+    template<>
+    void ESolver_KS_LCAO<complex<double>, complex<double>>::dpks_cal_projected_DM(
+          const elecstate::DensityMatrix<complex<double>, double>* dm)const
+    {
+        GlobalC::ld.cal_projected_DM_k(dm,
+            GlobalC::ucell,
+            GlobalC::ORB,
+            GlobalC::GridD);
+    }
 #endif
-} // namespace ModuleESolver
+}
diff --git a/source/module_hamilt_general/matrixblock.h b/source/module_hamilt_general/matrixblock.h
index 32979d2df3..481a95b8a5 100644
--- a/source/module_hamilt_general/matrixblock.h
+++ b/source/module_hamilt_general/matrixblock.h
@@ -5,8 +5,7 @@
 namespace hamilt
 {
 
-template <typename T>
-struct MatrixBlock
+template <typename T> struct MatrixBlock
 {
     /* this is a simple template block of a matrix
        would change to Eigen in the future */
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp
index a8d2af9dda..56953592fd 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_k.cpp
@@ -143,7 +143,8 @@ void Force_LCAO<std::complex<double>>::allocate(const Parallel_Orbitals& pv,
     {
         cal_deri = false;
 
-        ModuleBase::WARNING_QUIT("cal_syns", "This function has been broken and will be fixed later.");
+        ModuleBase::WARNING_QUIT("cal_syns",
+                                 "This function has been broken and will be fixed later.");
 
         LCAO_domain::build_ST_new(lm,
                                   fsr,
@@ -160,7 +161,7 @@ void Force_LCAO<std::complex<double>>::allocate(const Parallel_Orbitals& pv,
 
         for (int ik = 0; ik < nks; ik++)
         {
-
+            
             bool bit = false; // LiuXh, 2017-03-21
             /*ModuleIO::save_mat(0,
                                lm.Hloc2.data(),
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h
index d512e7ca8d..e0bf0397ca 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h
@@ -34,6 +34,7 @@ class LCAO_Matrix
 #endif
 
   public:
+
     // LiuXh add 2019-07-15
     double**** Hloc_fixedR_tr;
     double**** SlocR_tr;
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nnr.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nnr.cpp
index 58cd99ab3e..13399b6722 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nnr.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/LCAO_nnr.cpp
@@ -1,292 +1,284 @@
+#include "module_hamilt_pw/hamilt_pwdft/global.h"
+#include "module_cell/module_neighbor/sltk_grid_driver.h"
+#include "record_adj.h" //mohan add 2012-07-06
 #include "module_base/timer.h"
 #include "module_base/tool_threading.h"
 #include "module_cell/module_neighbor/sltk_grid_driver.h"
-#include "module_hamilt_pw/hamilt_pwdft/global.h"
-#include "record_adj.h" //mohan add 2012-07-06
 #ifdef __DEEPKS
 #include "module_hamilt_lcao/module_deepks/LCAO_deepks.h"
 #endif
 #include "module_base/libm/libm.h"
-
 #include <algorithm>
 
-// This is for cell R dependent part.
+// This is for cell R dependent part. 
 void Grid_Technique::cal_nnrg(Parallel_Orbitals* pv)
 {
-    ModuleBase::TITLE("LCAO_nnr", "cal_nnrg");
-
-    this->cal_max_box_index();
-
-    this->nnrg = 0;
-
-    nlocdimg.clear();
-    nlocstartg.clear();
-    nad.clear();
-    this->nnrg_index.resize(0);
-
-    this->nad = std::vector<int>(GlobalC::ucell.nat, 0);
-    this->nlocdimg = std::vector<int>(GlobalC::ucell.nat, 0);
-    this->nlocstartg = std::vector<int>(GlobalC::ucell.nat, 0);
-
-    ModuleBase::Vector3<double> tau1, tau2, dtau;
-    ModuleBase::Vector3<double> dtau1, dtau2, tau0;
-    for (int T1 = 0; T1 < GlobalC::ucell.ntype; ++T1)
-    {
-        Atom* atom1 = &GlobalC::ucell.atoms[T1];
-        for (int I1 = 0; I1 < atom1->na; ++I1)
-        {
-            tau1 = atom1->tau[I1];
-
-            GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1);
-
-            const int iat = GlobalC::ucell.itia2iat(T1, I1);
-
-            // for grid integration (on FFT box),
-            // we only need to consider <phi_i | phi_j>,
-            // which is different from non-local term,
-            // which we need to consdier <phi_i|beta_k><beta_k|phi_j>
-
-            // whether this atom is in this processor.
-            if (this->in_this_processor[iat])
-            {
-                // starting index of adjacents.
-                this->nlocstartg[iat] = this->nnrg;
-
-                // number of adjacent atoms for atom 'iat'
-                this->nad[iat] = 0;
-
-                int count = 0;
-                for (int ad = 0; ad < GlobalC::GridD.getAdjacentNum() + 1; ++ad)
-                {
-                    const int T2 = GlobalC::GridD.getType(ad);
-                    const int I2 = GlobalC::GridD.getNatom(ad);
-                    const int iat2 = GlobalC::ucell.itia2iat(T2, I2);
-                    Atom* atom2 = &GlobalC::ucell.atoms[T2];
-
-                    // if the adjacent atom is in this processor.
-                    if (this->in_this_processor[iat2])
-                    {
-                        tau2 = GlobalC::GridD.getAdjacentTau(ad);
-                        dtau = GlobalC::GridD.getAdjacentTau(ad) - tau1;
-                        double distance = dtau.norm() * GlobalC::ucell.lat0;
-                        double rcut = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ORB.Phi[T2].getRcut();
-
-                        // if(distance < rcut)
-                        //  mohan reset this 2013-07-02 in Princeton
-                        //  we should make absolutely sure that the distance is smaller than
-                        //  GlobalC::ORB.Phi[it].getRcut this should be consistant with LCAO_nnr::cal_nnrg function
-                        //  typical example : 7 Bohr cutoff Si orbital in 14 Bohr length of cell.
-                        //  distance = 7.0000000000000000
-                        //  GlobalC::ORB.Phi[it].getRcut = 7.0000000000000008
-                        if (distance < rcut - 1.0e-15)
-                        {
-                            // storing the indexed for nnrg
-                            const int mu = pv->global2local_row(iat);
-                            const int nu = pv->global2local_col(iat2);
-                            this->nnrg_index.push_back(gridIntegral::gridIndex{this->nnrg,
-                                                                               mu,
-                                                                               nu,
-                                                                               GlobalC::GridD.getBox(ad),
-                                                                               atom1->nw,
-                                                                               atom2->nw});
-
-                            const int nelement = atom1->nw * atom2->nw;
-                            this->nnrg += nelement;
-                            this->nlocdimg[iat] += nelement;
-                            this->nad[iat]++;
-                            ++count;
-                        }
-                    } // end iat2
-                }     // end ad
-                      //				GlobalV::ofs_running << " iat=" << iat << " nlocstartg=" << nlocstartg[iat] << "
-                      // nad=" << nad[iat] << std::endl;
-            }         // end iat
-        }             // end I1
-    }                 // end T1
-
-    if (GlobalV::OUT_LEVEL != "m")
-        ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running, "nnrg", this->nnrg);
-
-    //--------------------------------------------------
-    // search again, to order each (iat2, b1, b2, b3)
-    // find_R2 is used to target DM_R.
-    // because DM_R is allocated with nnrg.
-    // So once we had dR = R2 - R1 and iat2,
-    // we need to find out the corresponding positions
-    // in DM_R
-    //--------------------------------------------------
-    if (allocate_find_R2)
-    {
-        find_R2.clear();
-        find_R2st.clear();
-        find_R2_sorted_index.clear();
-        allocate_find_R2 = false;
-    }
-    this->find_R2.resize(GlobalC::ucell.nat);
-    this->find_R2st.resize(GlobalC::ucell.nat);
-    this->find_R2_sorted_index.resize(GlobalC::ucell.nat);
-    for (int iat = 0; iat < GlobalC::ucell.nat; iat++)
-    {
-        this->find_R2_sorted_index[iat].resize(nad[iat]);
-        std::fill(this->find_R2_sorted_index[iat].begin(), this->find_R2_sorted_index[iat].end(), 0);
-        this->find_R2st[iat].resize(nad[iat]);
-        std::fill(this->find_R2st[iat].begin(), this->find_R2st[iat].end(), 0);
-        this->find_R2[iat].resize(nad[iat]);
-        std::fill(this->find_R2[iat].begin(), this->find_R2[iat].end(), 0);
-    }
-
-    allocate_find_R2 = true;
-
-    for (int T1 = 0; T1 < GlobalC::ucell.ntype; T1++)
-    {
-        for (int I1 = 0; I1 < GlobalC::ucell.atoms[T1].na; I1++)
-        {
-            //			std::cout << " T1=" << T1 << " I1=" << I1 << std::endl;
-            tau1 = GlobalC::ucell.atoms[T1].tau[I1];
-            GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1);
-            const int iat = GlobalC::ucell.itia2iat(T1, I1);
-
-            //			std::cout << " Number of adjacent = " << GlobalC::GridD.getAdjacentNum()+1 << std::endl;
-
-            int count = 0;
-            for (int ad = 0; ad < GlobalC::GridD.getAdjacentNum() + 1; ad++)
-            {
-                //		std::cout << " ad=" << ad << std::endl;
-                const int T2 = GlobalC::GridD.getType(ad);
-                const int I2 = GlobalC::GridD.getNatom(ad);
-                const int iat2 = GlobalC::ucell.itia2iat(T2, I2);
-
-                // if this atom is in this processor.
-                if (this->in_this_processor[iat])
-                {
-                    if (this->in_this_processor[iat2])
-                    {
-                        dtau = GlobalC::GridD.getAdjacentTau(ad) - tau1;
+	ModuleBase::TITLE("LCAO_nnr","cal_nnrg");
+
+	this->cal_max_box_index();
+
+	this->nnrg = 0;
+
+	nlocdimg.clear();
+	nlocstartg.clear();
+	nad.clear();
+	this->nnrg_index.resize(0);
+	
+	this->nad = std::vector<int>(GlobalC::ucell.nat, 0);
+	this->nlocdimg = std::vector<int>(GlobalC::ucell.nat, 0);
+	this->nlocstartg =std::vector<int>(GlobalC::ucell.nat, 0);
+
+	ModuleBase::Vector3<double> tau1, tau2, dtau;
+	ModuleBase::Vector3<double> dtau1, dtau2, tau0;
+	for (int T1 = 0; T1 < GlobalC::ucell.ntype; ++T1)
+	{
+		Atom* atom1 = &GlobalC::ucell.atoms[T1];
+		for (int I1 = 0; I1 < atom1->na; ++I1)
+		{
+			tau1 = atom1->tau[I1];
+
+			GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1);
+
+			const int iat = GlobalC::ucell.itia2iat(T1,I1);
+
+			// for grid integration (on FFT box),
+			// we only need to consider <phi_i | phi_j>,
+			// which is different from non-local term,
+			// which we need to consdier <phi_i|beta_k><beta_k|phi_j>
+
+			// whether this atom is in this processor.
+			if(this->in_this_processor[iat])
+			{
+				// starting index of adjacents.
+				this->nlocstartg[iat] = this->nnrg;
+
+				// number of adjacent atoms for atom 'iat'
+				this->nad[iat] = 0;
+
+				int count = 0;
+				for (int ad = 0; ad < GlobalC::GridD.getAdjacentNum()+1; ++ad)
+				{
+					const int T2 = GlobalC::GridD.getType(ad);
+					const int I2 = GlobalC::GridD.getNatom(ad);
+					const int iat2 = GlobalC::ucell.itia2iat(T2, I2);
+					Atom* atom2 = &GlobalC::ucell.atoms[T2]; 
+
+					// if the adjacent atom is in this processor.
+					if(this->in_this_processor[iat2])
+					{
+						tau2 = GlobalC::GridD.getAdjacentTau(ad);
+						dtau = GlobalC::GridD.getAdjacentTau(ad) - tau1;
+						double distance = dtau.norm() * GlobalC::ucell.lat0;
+						double rcut = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ORB.Phi[T2].getRcut();
+
+
+						//if(distance < rcut)
+						// mohan reset this 2013-07-02 in Princeton
+						// we should make absolutely sure that the distance is smaller than GlobalC::ORB.Phi[it].getRcut
+						// this should be consistant with LCAO_nnr::cal_nnrg function 
+						// typical example : 7 Bohr cutoff Si orbital in 14 Bohr length of cell.
+						// distance = 7.0000000000000000
+						// GlobalC::ORB.Phi[it].getRcut = 7.0000000000000008
+						if(distance < rcut - 1.0e-15)
+						{
+							//storing the indexed for nnrg
+							const int mu = pv->global2local_row(iat);
+							const int nu = pv->global2local_col(iat2);
+							this->nnrg_index.push_back(gridIntegral::gridIndex{this->nnrg, mu, nu, GlobalC::GridD.getBox(ad), atom1->nw, atom2->nw});
+							
+							const int nelement = atom1->nw * atom2->nw;
+							this->nnrg += nelement;
+							this->nlocdimg[iat] += nelement; 
+							this->nad[iat]++;
+							++count;
+						}
+					}// end iat2
+				}// end ad
+//				GlobalV::ofs_running << " iat=" << iat << " nlocstartg=" << nlocstartg[iat] << " nad=" << nad[iat] << std::endl;
+			}// end iat
+		}// end I1
+	}// end T1
+
+	if(GlobalV::OUT_LEVEL != "m") ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"nnrg",this->nnrg);
+
+	//--------------------------------------------------
+	// search again, to order each (iat2, b1, b2, b3)
+	// find_R2 is used to target DM_R.
+	// because DM_R is allocated with nnrg.
+	// So once we had dR = R2 - R1 and iat2,
+	// we need to find out the corresponding positions
+	// in DM_R
+	//--------------------------------------------------
+	if(allocate_find_R2)
+	{
+		find_R2.clear();
+		find_R2st.clear();
+		find_R2_sorted_index.clear();
+		allocate_find_R2 = false;
+	}
+	this->find_R2.resize(GlobalC::ucell.nat);
+	this->find_R2st.resize(GlobalC::ucell.nat);
+	this->find_R2_sorted_index.resize(GlobalC::ucell.nat);
+	for (int iat = 0; iat < GlobalC::ucell.nat; iat++)
+	{
+		this->find_R2_sorted_index[iat].resize(nad[iat]);
+		std::fill(this->find_R2_sorted_index[iat].begin(), this->find_R2_sorted_index[iat].end(), 0);
+		this->find_R2st[iat].resize(nad[iat]);
+		std::fill(this->find_R2st[iat].begin(), this->find_R2st[iat].end(), 0);
+		this->find_R2[iat].resize(nad[iat]);
+		std::fill(this->find_R2[iat].begin(), this->find_R2[iat].end(), 0);
+	}
+
+	allocate_find_R2 = true;
+
+	for (int T1 = 0; T1 < GlobalC::ucell.ntype; T1++)
+	{
+		for (int I1 = 0; I1 < GlobalC::ucell.atoms[T1].na; I1++)
+		{
+//			std::cout << " T1=" << T1 << " I1=" << I1 << std::endl; 
+			tau1 = GlobalC::ucell.atoms[T1].tau[I1];
+			GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1);
+			const int iat = GlobalC::ucell.itia2iat(T1,I1);
+
+//			std::cout << " Number of adjacent = " << GlobalC::GridD.getAdjacentNum()+1 << std::endl;
+			
+			int count=0;
+			for (int ad = 0; ad < GlobalC::GridD.getAdjacentNum()+1; ad++)
+			{
+		//		std::cout << " ad=" << ad << std::endl;
+				const int T2 = GlobalC::GridD.getType(ad);
+				const int I2 = GlobalC::GridD.getNatom(ad);
+				const int iat2 = GlobalC::ucell.itia2iat(T2,I2);
+
+				// if this atom is in this processor.
+				if(this->in_this_processor[iat])
+				{
+					if(this->in_this_processor[iat2])
+					{
+						dtau = GlobalC::GridD.getAdjacentTau(ad) - tau1;
                         double distance = dtau.norm() * GlobalC::ucell.lat0;
                         double rcut = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ORB.Phi[T2].getRcut();
 
-                        const int b1 = GlobalC::GridD.getBox(ad).x;
-                        const int b2 = GlobalC::GridD.getBox(ad).y;
-                        const int b3 = GlobalC::GridD.getBox(ad).z;
-
-                        // mohan fix bug 2011-06-26, should be '<', not '<='
-                        //			if(distance < rcut)
-
-                        // mohan reset this 2013-07-02 in Princeton
-                        // we should make absolutely sure that the distance is smaller than GlobalC::ORB.Phi[it].getRcut
-                        // this should be consistant with LCAO_nnr::cal_nnrg function
-                        // typical example : 7 Bohr cutoff Si orbital in 14 Bohr length of cell.
-                        // distance = 7.0000000000000000
-                        // GlobalC::ORB.Phi[it].getRcut = 7.0000000000000008
-                        if (distance < rcut - 1.0e-15)
-                        {
-                            //	assert( count < nad[iat] );
-                            //--------------------------------------------------------------
-                            // start positions of adjacent atom of 'iat'
-                            // note: the first is not zero.
-                            //--------------------------------------------------------------
-                            find_R2[iat][count] = this->cal_RindexAtom(b1, b2, b3, iat2);
-                            find_R2_sorted_index[iat][count] = count;
-
-                            // find_R2st
-                            // note: the first must be zero.
-                            // find_R2st: start position of each adjacen atom.
-                            if (count + 1 < nad[iat])
-                            {
-                                find_R2st[iat][count + 1]
-                                    = find_R2st[iat][count]
-                                      + GlobalC::ucell.atoms[T1].nw
-                                            * GlobalC::ucell.atoms[T2].nw; // modified by zhengdy-soc
-                            }
-                            ++count;
-                        }
-                    }
-                }
-            }
-            // Include the necessary header file
-
-            std::stable_sort(find_R2_sorted_index[iat].begin(),
-                             find_R2_sorted_index[iat].begin() + nad[iat],
-                             [&](int pos1, int pos2) { return find_R2[iat][pos1] < find_R2[iat][pos2]; });
-        }
-    }
-
-    return;
+						const int b1 = GlobalC::GridD.getBox(ad).x;
+						const int b2 = GlobalC::GridD.getBox(ad).y;
+						const int b3 = GlobalC::GridD.getBox(ad).z;
+					
+						// mohan fix bug 2011-06-26, should be '<', not '<='	
+						//			if(distance < rcut)
+
+						// mohan reset this 2013-07-02 in Princeton
+						// we should make absolutely sure that the distance is smaller than GlobalC::ORB.Phi[it].getRcut
+						// this should be consistant with LCAO_nnr::cal_nnrg function 
+						// typical example : 7 Bohr cutoff Si orbital in 14 Bohr length of cell.
+						// distance = 7.0000000000000000
+						// GlobalC::ORB.Phi[it].getRcut = 7.0000000000000008
+						if(distance < rcut - 1.0e-15)
+						{
+						//	assert( count < nad[iat] );
+							//--------------------------------------------------------------
+							// start positions of adjacent atom of 'iat'
+							// note: the first is not zero.
+							//--------------------------------------------------------------
+							find_R2[iat][count] = this->cal_RindexAtom(b1, b2, b3, iat2);
+							find_R2_sorted_index[iat][count] = count;
+
+
+							// find_R2st
+							// note: the first must be zero.
+							// find_R2st: start position of each adjacen atom.
+							if( count + 1 < nad[iat] )
+							{
+								find_R2st[iat][count+1] = find_R2st[iat][count] 
+								+ GlobalC::ucell.atoms[T1].nw * GlobalC::ucell.atoms[T2].nw; //modified by zhengdy-soc
+							}
+							++count;
+						}
+					}
+				}
+			}
+ // Include the necessary header file
+
+			std::stable_sort(find_R2_sorted_index[iat].begin(), find_R2_sorted_index[iat].begin() + nad[iat],
+				[&](int pos1, int pos2){ return find_R2[iat][pos1] < find_R2[iat][pos2]; });
+		}
+	}
+
+	return;
 }
 
 void Grid_Technique::cal_max_box_index(void)
 {
-    ModuleBase::TITLE("LCAO_nnr", "cal_max_box_index");
-    this->maxB1 = this->maxB2 = this->maxB3 = -10000;
-    this->minB1 = this->minB2 = this->minB3 = 10000;
-    for (int T1 = 0; T1 < GlobalC::ucell.ntype; T1++)
-    {
-        for (int I1 = 0; I1 < GlobalC::ucell.atoms[T1].na; I1++)
-        {
-            ModuleBase::Vector3<double> tau1 = GlobalC::ucell.atoms[T1].tau[I1];
-            // GlobalC::GridD.Find_atom(tau1);
-            GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1);
-            for (int ad = 0; ad < GlobalC::GridD.getAdjacentNum() + 1; ad++)
-            {
-                this->maxB1 = std::max(GlobalC::GridD.getBox(ad).x, maxB1);
-                this->maxB2 = std::max(GlobalC::GridD.getBox(ad).y, maxB2);
-                this->maxB3 = std::max(GlobalC::GridD.getBox(ad).z, maxB3);
-
-                this->minB1 = std::min(GlobalC::GridD.getBox(ad).x, minB1);
-                this->minB2 = std::min(GlobalC::GridD.getBox(ad).y, minB2);
-                this->minB3 = std::min(GlobalC::GridD.getBox(ad).z, minB3);
-            }
-        }
-    }
-
-    nB1 = maxB1 - minB1 + 1;
-    nB2 = maxB2 - minB2 + 1;
-    nB3 = maxB3 - minB3 + 1;
-
-    nbox = nB1 * nB2 * nB3;
-
-    // ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"nbox",nbox);
-
-    return;
+	ModuleBase::TITLE("LCAO_nnr","cal_max_box_index");
+	this->maxB1 = this->maxB2 = this->maxB3 = -10000;
+	this->minB1 = this->minB2 = this->minB3 = 10000;
+	for (int T1 = 0; T1 < GlobalC::ucell.ntype; T1++)
+	{
+		for (int I1 = 0; I1 < GlobalC::ucell.atoms[T1].na; I1++)
+		{
+			ModuleBase::Vector3<double> tau1 = GlobalC::ucell.atoms[T1].tau[I1];
+			//GlobalC::GridD.Find_atom(tau1);
+			GlobalC::GridD.Find_atom(GlobalC::ucell, tau1, T1, I1);
+			for (int ad = 0; ad < GlobalC::GridD.getAdjacentNum()+1; ad++)
+			{
+				this->maxB1 = std::max( GlobalC::GridD.getBox(ad).x, maxB1 ); 
+				this->maxB2 = std::max( GlobalC::GridD.getBox(ad).y, maxB2 ); 
+				this->maxB3 = std::max( GlobalC::GridD.getBox(ad).z, maxB3 ); 
+
+				this->minB1 = std::min( GlobalC::GridD.getBox(ad).x, minB1 ); 
+				this->minB2 = std::min( GlobalC::GridD.getBox(ad).y, minB2 ); 
+				this->minB3 = std::min( GlobalC::GridD.getBox(ad).z, minB3 ); 
+			}
+		}
+	}
+
+	nB1 = maxB1-minB1+1;
+	nB2 = maxB2-minB2+1;
+	nB3 = maxB3-minB3+1;
+
+	nbox = nB1 * nB2 * nB3;
+	
+	//ModuleBase::GlobalFunc::OUT(GlobalV::ofs_running,"nbox",nbox);
+
+	return;
 }
 
-int Grid_Technique::cal_RindexAtom(const int& u1, const int& u2, const int& u3, const int& iat2) const
+int Grid_Technique::cal_RindexAtom(const int &u1, const int &u2, const int &u3, const int &iat2) const
 {
-    const int x1 = u1 - this->minB1;
-    const int x2 = u2 - this->minB2;
-    const int x3 = u3 - this->minB3;
-
-    if (x1 < 0 || x2 < 0 || x3 < 0)
-    {
-        std::cout << " u1=" << u1 << " minB1=" << minB1 << std::endl;
-        std::cout << " u2=" << u2 << " minB2=" << minB2 << std::endl;
-        std::cout << " u3=" << u3 << " minB3=" << minB3 << std::endl;
-        ModuleBase::WARNING_QUIT("LCAO_nnr::cal_Rindex", "x1<0 || x2<0 || x3<0 !");
-    }
-
-    assert(x1 >= 0);
-    assert(x2 >= 0);
-    assert(x3 >= 0);
-
-    return (iat2 + (x3 + x2 * this->nB3 + x1 * this->nB2 * this->nB3) * GlobalC::ucell.nat);
+	const int x1 = u1 - this->minB1;
+	const int x2 = u2 - this->minB2;
+	const int x3 = u3 - this->minB3;
+	
+	if(x1<0 || x2<0 || x3<0)
+	{
+		std::cout << " u1=" << u1 << " minB1=" << minB1 << std::endl;
+		std::cout << " u2=" << u2 << " minB2=" << minB2 << std::endl;
+		std::cout << " u3=" << u3 << " minB3=" << minB3 << std::endl;
+		ModuleBase::WARNING_QUIT("LCAO_nnr::cal_Rindex","x1<0 || x2<0 || x3<0 !");
+	}
+
+	assert(x1>=0);
+	assert(x2>=0);
+	assert(x3>=0);
+
+	return (iat2 + (x3 + x2 * this->nB3 + x1 * this->nB2 * this->nB3) * GlobalC::ucell.nat);
 }
 
 int Grid_Technique::binary_search_find_R2_offset(int val, int iat) const
 {
     auto findR2 = this->find_R2[iat];
-    auto findR2_index = this->find_R2_sorted_index[iat];
+	auto findR2_index = this->find_R2_sorted_index[iat];
 
-    int left = 0;
+	int left = 0;
     int right = nad[iat] - 1;
-    while (left <= right)
+    while(left <= right)
     {
         int mid = left + ((right - left) >> 1);
-        int idx = findR2_index[mid];
-        if (val == findR2[idx])
+		int idx = findR2_index[mid];
+        if(val == findR2[idx])
         {
             return idx;
         }
-        if (val < findR2[idx])
+        if(val < findR2[idx])
         {
             right = mid - 1;
         }
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.cpp
index e7bc28325e..711c878657 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.cpp
@@ -36,9 +36,7 @@ namespace hamilt
 {
 
 template <typename TK, typename TR>
-HamiltLCAO<TK, TR>::HamiltLCAO(const Parallel_Orbitals* paraV,
-                               const K_Vectors& kv_in,
-                               const TwoCenterIntegrator& intor_overlap_orb)
+HamiltLCAO<TK, TR>::HamiltLCAO(const Parallel_Orbitals* paraV, const K_Vectors& kv_in, const TwoCenterIntegrator& intor_overlap_orb)
 {
     this->classname = "HamiltLCAO";
 
@@ -168,7 +166,8 @@ HamiltLCAO<TK, TR>::HamiltLCAO(Gint_Gamma* GG_in,
                                                                     pot_in,
                                                                     this->hR, // no explicit call yet
                                                                     &GlobalC::ucell,
-                                                                    &GlobalC::GridD);
+                                                                    &GlobalC::GridD
+                );
                 this->getOperator()->add(veff);
             }
         }
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h
index 8a3363f7b5..e227d41cc5 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h
@@ -6,11 +6,11 @@
 #include "module_elecstate/potentials/potential_new.h"
 #include "module_hamilt_general/hamilt.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp"
 #include "module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.h"
 #include "module_hamilt_lcao/module_gint/gint_gamma.h"
 #include "module_hamilt_lcao/module_gint/gint_k.h"
 #include "module_hamilt_lcao/module_hcontainer/hcontainer.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp"
 namespace hamilt
 {
 
@@ -53,29 +53,14 @@ class HamiltLCAO : public Hamilt<TK>
     /// get pointer of Operator<TK> ops
     Operator<TK>*& getOperator();
     /// get hk-pointer
-    TK* getHk() const
-    {
-        return this->hsk->get_hk();
-    }
+    TK* getHk() const{return this->hsk->get_hk();}
     /// get sk-pointer
-    TK* getSk() const
-    {
-        return this->hsk->get_sk();
-    }
-    int get_size_hsk() const
-    {
-        return this->hsk->get_size();
-    }
+    TK* getSk() const{return this->hsk->get_sk();}
+    int get_size_hsk() const{return this->hsk->get_size();}
     /// get HR pointer of *this->hR, which is a HContainer<TR> and contains H(R)
-    HContainer<TR>*& getHR()
-    {
-        return this->hR;
-    }
+    HContainer<TR>*& getHR(){return this->hR;}
     /// get SR pointer of *this->sR, which is a HContainer<TR> and contains S(R)
-    HContainer<TR>*& getSR()
-    {
-        return this->sR;
-    }
+    HContainer<TR>*& getSR(){return this->sR;}
     /// refresh the status of HR
     void refresh() override;
 
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp
index f3cd56e9ed..495af32841 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp
@@ -6,49 +6,29 @@
 #include <vector>
 namespace hamilt
 {
-template <typename TK>
-class HS_Matrix_K
-{
-  public:
-    HS_Matrix_K(const Parallel_Orbitals* paraV, bool no_s = false)
-    {
-        hk.resize(paraV->nloc);
-        if (!no_s)
-        {
-            sk.resize(paraV->nloc);
-        }
-        this->pv = paraV;
-    }
-    TK* get_hk()
-    {
-        return hk.data();
-    }
-    TK* get_sk()
-    {
-        return sk.data();
-    }
-    int get_size()
+    template <typename TK>
+    class HS_Matrix_K
     {
-        return hk.size();
-    }
-    void set_zero_hk()
-    {
-        hk.assign(hk.size(), 0);
-    }
-    void set_zero_sk()
-    {
-        sk.assign(sk.size(), 0);
-    }
-    const Parallel_Orbitals* get_pv() const
-    {
-        return this->pv;
-    }
-
-  private:
-    std::vector<TK> hk;
-    std::vector<TK> sk;
-    const Parallel_Orbitals* pv = nullptr;
-};
-} // namespace hamilt
+        public:
+            HS_Matrix_K(const Parallel_Orbitals* paraV, bool no_s=false){
+                hk.resize(paraV->nloc);
+                if(!no_s) 
+                {
+                    sk.resize(paraV->nloc);
+                }
+                this->pv = paraV;
+            }
+            TK* get_hk() {return hk.data();}
+            TK* get_sk() {return sk.data();}
+            int get_size() {return hk.size();}
+            void set_zero_hk() {hk.assign(hk.size(), 0);}
+            void set_zero_sk() {sk.assign(sk.size(), 0);}
+            const Parallel_Orbitals* get_pv() const {return this->pv;}
+        private:
+            std::vector<TK> hk;
+            std::vector<TK> sk;
+            const Parallel_Orbitals* pv = nullptr;
+    };
+}
 
 #endif
\ No newline at end of file
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp
index 5be9bba4cb..28565ecdaf 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.cpp
@@ -49,7 +49,7 @@ void hamilt::DeePKS<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* Gr
     ModuleBase::TITLE("DeePKS", "initialize_HR");
     ModuleBase::timer::tick("DeePKS", "initialize_HR");
 
-    auto* paraV = this->hR->get_paraV(); // get parallel orbitals from HR
+    auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
     // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
 
     // this->H_V_delta = new HContainer<TR>(paraV);
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.h
index 57bccacf85..b0fd1e0b46 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/deepks_lcao.h
@@ -55,6 +55,7 @@ class DeePKS<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 #endif
 
   private:
+
     elecstate::DensityMatrix<TK, double>* DM;
 
     const UnitCell* ucell = nullptr;
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.cpp
index 2186267ba0..6d40d8ffef 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_lcao.cpp
@@ -45,7 +45,7 @@ void hamilt::DFTU<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* Grid
     ModuleBase::TITLE("DFTU", "initialize_HR");
     ModuleBase::timer::tick("DFTU", "initialize_HR");
 
-    auto* paraV = this->hR->get_paraV(); // get parallel orbitals from HR
+    auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
     // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
 
     this->adjs_all.clear();
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/ekinetic_new.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/ekinetic_new.cpp
index 25963b13de..0702a14d77 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/ekinetic_new.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/ekinetic_new.cpp
@@ -44,7 +44,7 @@ void hamilt::EkineticNew<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Drive
     ModuleBase::TITLE("EkineticNew", "initialize_HR");
     ModuleBase::timer::tick("EkineticNew", "initialize_HR");
 
-    auto* paraV = this->hR->get_paraV(); // get parallel orbitals from HR
+    auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
     // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
 
     for (int iat1 = 0; iat1 < ucell->nat; iat1++)
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/meta_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/meta_lcao.cpp
index 279e679680..de325a998f 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/meta_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/meta_lcao.cpp
@@ -1,5 +1,4 @@
 #include "meta_lcao.h"
-
 #include "module_base/timer.h"
 #include "module_base/tool_title.h"
 #include "module_hamilt_pw/hamilt_pwdft/global.h"
@@ -13,4 +12,4 @@ template class Meta<OperatorLCAO<std::complex<double>, double>>;
 
 template class Meta<OperatorLCAO<std::complex<double>, std::complex<double>>>;
 
-} // namespace hamilt
\ No newline at end of file
+}
\ No newline at end of file
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/meta_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/meta_lcao.h
index 217fec4593..587ea036b8 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/meta_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/meta_lcao.h
@@ -23,9 +23,10 @@ template <typename TK, typename TR>
 class Meta<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 {
   public:
-    Meta<OperatorLCAO<TK, TR>>(HS_Matrix_K<TK>* hsk_in,
-                               const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
-                               HContainer<TR>* hR_in)
+    Meta<OperatorLCAO<TK, TR>>(
+                          HS_Matrix_K<TK>* hsk_in,
+                          const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
+                          HContainer<TR>* hR_in)
         : OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in)
     {
         this->cal_type = calculation_type::lcao_gint;
@@ -33,11 +34,9 @@ class Meta<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 
     ~Meta<OperatorLCAO<TK, TR>>(){};
 
-    virtual void contributeHR() override
-    {
-    } // do nothing now
+    virtual void contributeHR() override{}//do nothing now
 
-    virtual void contributeHk(int ik) override{}; // do nothing now
+    virtual void contributeHk(int ik) override{};//do nothing now
 
   private:
 };
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.cpp
index d2179f7621..7714cea230 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/nonlocal_new.cpp
@@ -45,7 +45,7 @@ void hamilt::NonlocalNew<hamilt::OperatorLCAO<TK, TR>>::initialize_HR(Grid_Drive
     ModuleBase::TITLE("NonlocalNew", "initialize_HR");
     ModuleBase::timer::tick("NonlocalNew", "initialize_HR");
 
-    auto* paraV = this->hR->get_paraV(); // get parallel orbitals from HR
+    auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
     // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
 
     this->adjs_all.clear();
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.cpp
index d49d4a09bc..924dcf1764 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.cpp
@@ -1,5 +1,4 @@
 #include "op_dftu_lcao.h"
-
 #include "module_base/timer.h"
 #include "module_base/tool_title.h"
 #include "module_hamilt_lcao/module_dftu/dftu.h"
@@ -14,14 +13,14 @@ template class OperatorDFTU<OperatorLCAO<std::complex<double>, double>>;
 
 template class OperatorDFTU<OperatorLCAO<std::complex<double>, std::complex<double>>>;
 
-template <typename TK, typename TR>
+template<typename TK, typename TR>
 void OperatorDFTU<OperatorLCAO<TK, TR>>::contributeHR()
 {
-    // no calculation of HR yet for DFTU operator
+    //no calculation of HR yet for DFTU operator
     return;
 }
 
-template <>
+template<>
 void OperatorDFTU<OperatorLCAO<double, double>>::contributeHk(int ik)
 {
     ModuleBase::TITLE("OperatorDFTU", "contributeHk");
@@ -39,7 +38,7 @@ void OperatorDFTU<OperatorLCAO<double, double>>::contributeHk(int ik)
     ModuleBase::timer::tick("OperatorDFTU", "contributeHk");
 }
 
-template <>
+template<>
 void OperatorDFTU<OperatorLCAO<std::complex<double>, double>>::contributeHk(int ik)
 {
     ModuleBase::TITLE("OperatorDFTU", "contributeHk");
@@ -57,7 +56,7 @@ void OperatorDFTU<OperatorLCAO<std::complex<double>, double>>::contributeHk(int
     ModuleBase::timer::tick("OperatorDFTU", "contributeHk");
 }
 
-template <>
+template<>
 void OperatorDFTU<OperatorLCAO<std::complex<double>, std::complex<double>>>::contributeHk(int ik)
 {
     ModuleBase::TITLE("OperatorDFTU", "contributeHk");
@@ -75,4 +74,4 @@ void OperatorDFTU<OperatorLCAO<std::complex<double>, std::complex<double>>>::con
     ModuleBase::timer::tick("OperatorDFTU", "contributeHk");
 }
 
-} // namespace hamilt
\ No newline at end of file
+}
\ No newline at end of file
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.h
index 8482fc5e6c..de40bce4b3 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_dftu_lcao.h
@@ -21,9 +21,9 @@ class OperatorDFTU<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 {
   public:
     OperatorDFTU<OperatorLCAO<TK, TR>>(HS_Matrix_K<TK>* hsk_in,
-                                       const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
-                                       hamilt::HContainer<TR>* hR_in,
-                                       const std::vector<int>& isk_in)
+                                  const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
+                                  hamilt::HContainer<TR>* hR_in,
+                                  const std::vector<int>& isk_in)
         : isk(isk_in), OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in)
     {
         this->cal_type = calculation_type::lcao_dftu;
@@ -34,6 +34,7 @@ class OperatorDFTU<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
     virtual void contributeHk(int ik) override;
 
   private:
+
     bool HR_fixed_done = false;
 
     const std::vector<int>& isk;
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.cpp
index 83140ec15d..f4418840b5 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.cpp
@@ -1,41 +1,25 @@
 #ifdef __EXX
 #include "op_exx_lcao.h"
-
 #include "module_base/blacs_connector.h"
 
 namespace hamilt
 {
 
-template <>
+template<>
 void OperatorEXX<OperatorLCAO<double, double>>::add_loaded_Hexx(const int ik)
 {
-    BlasConnector::axpy(this->LM->ParaV->get_local_size(),
-                        1.0,
-                        this->LM->Hexxd_k_load[ik].data(),
-                        1,
-                        this->hsk->get_hk(),
-                        1);
+    BlasConnector::axpy(this->LM->ParaV->get_local_size(), 1.0, this->LM->Hexxd_k_load[ik].data(), 1, this->hsk->get_hk(), 1);
 }
-template <>
+template<>
 void OperatorEXX<OperatorLCAO<std::complex<double>, double>>::add_loaded_Hexx(const int ik)
 {
 
-    BlasConnector::axpy(this->LM->ParaV->get_local_size(),
-                        1.0,
-                        this->LM->Hexxc_k_load[ik].data(),
-                        1,
-                        this->hsk->get_hk(),
-                        1);
+    BlasConnector::axpy(this->LM->ParaV->get_local_size(), 1.0, this->LM->Hexxc_k_load[ik].data(), 1, this->hsk->get_hk(), 1);
 }
-template <>
+template<>
 void OperatorEXX<OperatorLCAO<std::complex<double>, std::complex<double>>>::add_loaded_Hexx(const int ik)
 {
-    BlasConnector::axpy(this->LM->ParaV->get_local_size(),
-                        1.0,
-                        this->LM->Hexxc_k_load[ik].data(),
-                        1,
-                        this->hsk->get_hk(),
-                        1);
+    BlasConnector::axpy(this->LM->ParaV->get_local_size(), 1.0, this->LM->Hexxc_k_load[ik].data(), 1, this->hsk->get_hk(), 1);
 }
 
 } // namespace hamilt
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.h
index f3a3f6fb40..6cb1e8fde2 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.h
@@ -3,11 +3,10 @@
 
 #ifdef __EXX
 
+#include <RI/global/Tensor.h>
+#include "operator_lcao.h"
 #include "module_cell/klist.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
-#include "operator_lcao.h"
-
-#include <RI/global/Tensor.h>
 
 namespace hamilt
 {
@@ -26,10 +25,8 @@ template <typename TK, typename TR>
 class OperatorEXX<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 {
     using TAC = std::pair<int, std::array<int, 3>>;
-
-  public:
-    OperatorEXX<OperatorLCAO<TK, TR>>(
-        HS_Matrix_K<TK>* hsk_in,
+public:
+    OperatorEXX<OperatorLCAO<TK, TR>>(HS_Matrix_K<TK>* hsk_in,
         LCAO_Matrix* LM_in,
         hamilt::HContainer<TR>* hR_in,
         const K_Vectors& kv_in,
@@ -41,23 +38,24 @@ class OperatorEXX<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
     virtual void contributeHk(int ik) override;
 
   private:
-    bool HR_fixed_done = false;
 
-    std::vector<std::map<int, std::map<TAC, RI::Tensor<double>>>>* Hexxd = nullptr;
-    std::vector<std::map<int, std::map<TAC, RI::Tensor<std::complex<double>>>>>* Hexxc = nullptr;
+      bool HR_fixed_done = false;
+
+      std::vector<std::map<int, std::map<TAC, RI::Tensor<double>>>>* Hexxd = nullptr;
+      std::vector<std::map<int, std::map<TAC, RI::Tensor<std::complex<double>>>>>* Hexxc = nullptr;
 
-    /// @brief  the step of the outer loop.
-    /// nullptr: no dependence on the number of two_level_step, contributeHk will do enerything normally.
-    /// 0: the first outer loop. If restart, contributeHk will directly add Hexx to Hloc. else, do nothing.
-    /// >0: not the first outer loop. contributeHk will do enerything normally.
-    int* two_level_step = nullptr;
-    /// @brief if restart, read and save Hexx, and directly use it during the first outer loop.
-    bool restart = false;
+      /// @brief  the step of the outer loop.
+      /// nullptr: no dependence on the number of two_level_step, contributeHk will do enerything normally.
+      /// 0: the first outer loop. If restart, contributeHk will directly add Hexx to Hloc. else, do nothing.
+      /// >0: not the first outer loop. contributeHk will do enerything normally.
+      int* two_level_step = nullptr;
+      /// @brief if restart, read and save Hexx, and directly use it during the first outer loop.
+      bool restart = false;
 
-    void add_loaded_Hexx(const int ik);
-    const K_Vectors& kv;
+      void add_loaded_Hexx(const int ik);
+      const K_Vectors& kv;
 
-    LCAO_Matrix* LM = nullptr;
+      LCAO_Matrix* LM = nullptr;
 };
 
 } // namespace hamilt
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.hpp
index 2af239b6b7..4158abcaf5 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.hpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/op_exx_lcao.hpp
@@ -3,26 +3,31 @@
 
 #ifdef __EXX
 
-#include "module_hamilt_general/module_xc/xc_functional.h"
-#include "module_hamilt_pw/hamilt_pwdft/global.h"
-#include "module_ri/RI_2D_Comm.h"
 #include "op_exx_lcao.h"
+#include "module_ri/RI_2D_Comm.h"
+#include "module_hamilt_pw/hamilt_pwdft/global.h"
+#include "module_hamilt_general/module_xc/xc_functional.h"
 
 namespace hamilt
 {
 
 template <typename TK, typename TR>
 OperatorEXX<OperatorLCAO<TK, TR>>::OperatorEXX(
-    HS_Matrix_K<TK>* hsk_in,
-    LCAO_Matrix* LM_in,
-    hamilt::HContainer<TR>* hR_in,
-    const K_Vectors& kv_in,
-    std::vector<std::map<int, std::map<TAC, RI::Tensor<double>>>>* Hexxd_in,
-    std::vector<std::map<int, std::map<TAC, RI::Tensor<std::complex<double>>>>>* Hexxc_in,
-    int* two_level_step_in,
-    const bool restart_in)
-    : OperatorLCAO<TK, TR>(hsk_in, kv_in.kvec_d, hR_in), kv(kv_in), Hexxd(Hexxd_in), Hexxc(Hexxc_in),
-      two_level_step(two_level_step_in), restart(restart_in), LM(LM_in)
+	HS_Matrix_K<TK>* hsk_in,
+	LCAO_Matrix* LM_in,
+	hamilt::HContainer<TR>* hR_in,
+	const K_Vectors& kv_in,
+	std::vector<std::map<int, std::map<TAC, RI::Tensor<double>>>>* Hexxd_in,
+	std::vector<std::map<int, std::map<TAC, RI::Tensor<std::complex<double>>>>>* Hexxc_in,
+	int* two_level_step_in,
+	const bool restart_in)
+	: OperatorLCAO<TK, TR>(hsk_in, kv_in.kvec_d, hR_in),
+	  kv(kv_in),
+	  Hexxd(Hexxd_in),
+	  Hexxc(Hexxc_in), 
+	  two_level_step(two_level_step_in),
+	  restart(restart_in),
+	  LM(LM_in)
 {
     this->cal_type = calculation_type::lcao_exx;
     if (this->restart)
@@ -68,53 +73,53 @@ OperatorEXX<OperatorLCAO<TK, TR>>::OperatorEXX(
     }
 }
 
-template <typename TK, typename TR>
+template<typename TK, typename TR>
 void OperatorEXX<OperatorLCAO<TK, TR>>::contributeHk(int ik)
 {
-    // Peize Lin add 2016-12-03
-    if (GlobalV::CALCULATION != "nscf" && this->two_level_step != nullptr && *this->two_level_step == 0
-        && !this->restart)
-        return; // in the non-exx loop, do nothing
-    if (XC_Functional::get_func_type() == 4 || XC_Functional::get_func_type() == 5)
-    {
-        if (this->restart && this->two_level_step != nullptr)
-        {
-            if (*this->two_level_step == 0)
-            {
-                this->add_loaded_Hexx(ik);
-                return;
-            }
-            else // clear loaded Hexx and release memory
-            {
-                if (this->LM->Hexxd_k_load.size() > 0)
-                {
-                    this->LM->Hexxd_k_load.clear();
-                    this->LM->Hexxd_k_load.shrink_to_fit();
-                }
-                else if (this->LM->Hexxc_k_load.size() > 0)
-                {
-                    this->LM->Hexxc_k_load.clear();
-                    this->LM->Hexxc_k_load.shrink_to_fit();
-                }
-            }
-        }
-        // cal H(k) from H(R) normally
+	// Peize Lin add 2016-12-03
+	if (GlobalV::CALCULATION != "nscf" && this->two_level_step != nullptr && *this->two_level_step == 0 && !this->restart) return;  //in the non-exx loop, do nothing 
+	if (XC_Functional::get_func_type() == 4 || XC_Functional::get_func_type() == 5)
+	{
+		if (this->restart && this->two_level_step != nullptr)
+		{
+			if (*this->two_level_step == 0)
+			{
+				this->add_loaded_Hexx(ik);
+				return;
+			}
+			else // clear loaded Hexx and release memory
+			{
+				if (this->LM->Hexxd_k_load.size() > 0)
+				{
+					this->LM->Hexxd_k_load.clear();
+					this->LM->Hexxd_k_load.shrink_to_fit();
+				}
+				else if (this->LM->Hexxc_k_load.size() > 0)
+				{
+					this->LM->Hexxc_k_load.clear();
+					this->LM->Hexxc_k_load.shrink_to_fit();
+				}
+			}
+		}
+		// cal H(k) from H(R) normally
 
-        if (GlobalC::exx_info.info_ri.real_number)
-            RI_2D_Comm::add_Hexx(this->kv,
-                                 ik,
-                                 GlobalC::exx_info.info_global.hybrid_alpha,
-                                 this->Hexxd == nullptr ? *this->LM->Hexxd : *this->Hexxd,
-                                 *this->LM->ParaV,
-                                 this->hsk->get_hk());
-        else
-            RI_2D_Comm::add_Hexx(this->kv,
-                                 ik,
-                                 GlobalC::exx_info.info_global.hybrid_alpha,
-                                 this->Hexxc == nullptr ? *this->LM->Hexxc : *this->Hexxc,
-                                 *this->LM->ParaV,
-                                 this->hsk->get_hk());
-    }
+		if (GlobalC::exx_info.info_ri.real_number)
+			RI_2D_Comm::add_Hexx(
+				this->kv,
+				ik,
+				GlobalC::exx_info.info_global.hybrid_alpha,
+				this->Hexxd == nullptr ? *this->LM->Hexxd : *this->Hexxd,
+				*this->LM->ParaV,
+				this->hsk->get_hk());
+		else
+			RI_2D_Comm::add_Hexx(
+				this->kv,
+				ik,
+				GlobalC::exx_info.info_global.hybrid_alpha,
+				this->Hexxc == nullptr ? *this->LM->Hexxc : *this->Hexxc,
+				*this->LM->ParaV,
+				this->hsk->get_hk());
+	}
 }
 
 } // namespace hamilt
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/operator_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/operator_lcao.cpp
index 3407777da6..50937ff6af 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/operator_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/operator_lcao.cpp
@@ -1,9 +1,8 @@
 #include "operator_lcao.h"
-
 #include "module_base/timer.h"
 #include "module_base/tool_title.h"
-#include "module_hamilt_lcao/module_hcontainer/hcontainer_funcs.h"
 #include "module_hsolver/hsolver_lcao.h"
+#include "module_hamilt_lcao/module_hcontainer/hcontainer_funcs.h"
 
 #ifdef __ELPA
 #include "module_hsolver/diago_elpa.h"
@@ -12,7 +11,7 @@
 namespace hamilt
 {
 
-template <>
+template<>
 void OperatorLCAO<double, double>::get_hs_pointers()
 {
     ModuleBase::timer::tick("OperatorLCAO", "get_hs_pointers");
@@ -34,188 +33,196 @@ void OperatorLCAO<double, double>::get_hs_pointers()
     ModuleBase::timer::tick("OperatorLCAO", "get_hs_pointers");
 }
 
-template <>
+template<>
 void OperatorLCAO<std::complex<double>, double>::get_hs_pointers()
 {
     this->hmatrix_k = this->hsk->get_hk();
     this->smatrix_k = this->hsk->get_sk();
 }
 
-template <>
+template<>
 void OperatorLCAO<std::complex<double>, std::complex<double>>::get_hs_pointers()
 {
     this->hmatrix_k = this->hsk->get_hk();
     this->smatrix_k = this->hsk->get_sk();
 }
 
-template <typename TK, typename TR>
+template<typename TK, typename TR>
 void OperatorLCAO<TK, TR>::refresh_h()
 {
     // Set the matrix 'H' to zero.
     this->hsk->set_zero_hk();
 }
 
-template <typename TK, typename TR>
+template<typename TK, typename TR>
 void OperatorLCAO<TK, TR>::set_hr_done(bool hr_done_in)
 {
     this->hr_done = hr_done_in;
 }
 
-template <typename TK, typename TR>
+template<typename TK, typename TR>
 void OperatorLCAO<TK, TR>::init(const int ik_in)
 {
     ModuleBase::TITLE("OperatorLCAO", "init");
     ModuleBase::timer::tick("OperatorLCAO", "init");
-    if (this->is_first_node)
+    if(this->is_first_node)
     {
         // refresh HK
         this->refresh_h();
-        if (!this->hr_done)
+        if(!this->hr_done)
         {
             // refresh HR
             this->hR->set_zero();
         }
     }
-    switch (this->cal_type)
+    switch(this->cal_type)
     {
-    case calculation_type::lcao_overlap: {
-        // cal_type=lcao_overlap refer to overlap matrix operators, which are only rely on stucture, and not changed
-        // during SCF
-
-        if (!this->hr_done)
+        case calculation_type::lcao_overlap:
         {
-            // update SR first
-            // in cal_type=lcao_overlap, SR should be updated by each sub-chain nodes
-            OperatorLCAO<TK, TR>* last = this;
-            while (last != nullptr)
+            //cal_type=lcao_overlap refer to overlap matrix operators, which are only rely on stucture, and not changed during SCF
+
+            if(!this->hr_done)
             {
-                last->contributeHR();
-                last = dynamic_cast<OperatorLCAO<TK, TR>*>(last->next_sub_op);
+                //update SR first
+                //in cal_type=lcao_overlap, SR should be updated by each sub-chain nodes
+                OperatorLCAO<TK, TR>* last = this;
+                while(last != nullptr)
+                {
+                    last->contributeHR();
+                    last = dynamic_cast<OperatorLCAO<TK, TR>*>(last->next_sub_op);
+                }
             }
-        }
 
-        // update SK next
-        // in cal_type=lcao_overlap, SK should be update here
-        this->contributeHk(ik_in);
+            //update SK next
+            //in cal_type=lcao_overlap, SK should be update here
+            this->contributeHk(ik_in);
 
-        break;
-    }
-    case calculation_type::lcao_fixed: {
-        // cal_type=lcao_fixed refer to fixed matrix operators, which are only rely on stucture, and not changed during
-        // SCF
-
-        // update HR first
-        if (!this->hr_done)
+            break;
+        }
+        case calculation_type::lcao_fixed:
         {
-            // in cal_type=lcao_fixed, HR should be updated by each sub-chain nodes
-            OperatorLCAO<TK, TR>* last = this;
-            while (last != nullptr)
+            //cal_type=lcao_fixed refer to fixed matrix operators, which are only rely on stucture, and not changed during SCF
+
+            //update HR first
+            if(!this->hr_done)
             {
-                last->contributeHR();
-                last = dynamic_cast<OperatorLCAO<TK, TR>*>(last->next_sub_op);
+                //in cal_type=lcao_fixed, HR should be updated by each sub-chain nodes
+                OperatorLCAO<TK, TR>* last = this;
+                while(last != nullptr)
+                {
+                    last->contributeHR();
+                    last = dynamic_cast<OperatorLCAO<TK, TR>*>(last->next_sub_op);
+                }
             }
-        }
 
-        // update HK next
-        // in cal_type=lcao_fixed, HK will update in the last node with OperatorLCAO::contributeHk()
+            //update HK next
+            //in cal_type=lcao_fixed, HK will update in the last node with OperatorLCAO::contributeHk()
 
-        break;
-    }
-    case calculation_type::lcao_gint: {
-        // cal_type=lcao_gint refer to grid integral operators, which are relied on stucture and potential based on real
-        // space grids and should be updated each SCF steps
-
-        if (!this->hr_done)
+            break;
+        }
+        case calculation_type::lcao_gint:
         {
-            OperatorLCAO<TK, TR>* last = this;
-            while (last != nullptr)
+            //cal_type=lcao_gint refer to grid integral operators, which are relied on stucture and potential based on real space grids
+            //and should be updated each SCF steps
+
+            if(!this->hr_done)
             {
-                // update HR first
-                // in cal_type=lcao_gint, HR should be updated by every sub-node.
-                last->contributeHR();
+                OperatorLCAO<TK, TR>* last = this;
+                while(last != nullptr)
+                {
+                    //update HR first
+                    //in cal_type=lcao_gint, HR should be updated by every sub-node.
+                    last->contributeHR();
 
-                // update HK next
-                // in cal_type=lcao_gint, HK will update in the last node with OperatorLCAO::contributeHk()
-                last = dynamic_cast<OperatorLCAO<TK, TR>*>(last->next_sub_op);
+                    //update HK next
+                    //in cal_type=lcao_gint, HK will update in the last node with OperatorLCAO::contributeHk()
+                    last = dynamic_cast<OperatorLCAO<TK, TR>*>(last->next_sub_op);
+                }
             }
-        }
 
-        break;
-    }
+            break;
+        }
 #ifdef __DEEPKS
-    case calculation_type::lcao_deepks: {
-        // update HR first
-        if (!this->hr_done)
+        case calculation_type::lcao_deepks:
         {
-            // in cal_type=lcao_deepks, HR should be updated
-            this->contributeHR();
-        }
+            //update HR first
+            if(!this->hr_done)
+            {
+                //in cal_type=lcao_deepks, HR should be updated
+                this->contributeHR();
+            }
 
-        // update H_V_delta_k next
-        this->contributeHk(ik_in);
+            //update H_V_delta_k next
+            this->contributeHk(ik_in);
 
-        break;
-    }
+            break;
+
+        }
 #endif
-    case calculation_type::lcao_dftu: {
-        // only HK should be updated when cal_type=lcao_dftu
-        // in cal_type=lcao_dftu, HK only need to update from one node
-        if (!this->hr_done)
+        case calculation_type::lcao_dftu:
         {
-            // in cal_type=lcao_deepks, HR should be updated
-            this->contributeHR();
+            //only HK should be updated when cal_type=lcao_dftu
+            //in cal_type=lcao_dftu, HK only need to update from one node
+            if(!this->hr_done)
+            {
+                //in cal_type=lcao_deepks, HR should be updated
+                this->contributeHR();
+            }
+            break;
         }
-        break;
-    }
-    case calculation_type::lcao_sc_lambda: {
-        // update HK only
-        // in cal_type=lcao_sc_mag, HK only need to be updated
-        this->contributeHk(ik_in);
-        break;
-    }
-    case calculation_type::lcao_exx: {
-        // update HR first
-        // in cal_type=lcao_exx, HR should be updated by most priority sub-chain nodes
-        this->contributeHR();
+        case calculation_type::lcao_sc_lambda:
+        {
+            //update HK only
+            //in cal_type=lcao_sc_mag, HK only need to be updated
+            this->contributeHk(ik_in);
+            break;
+        }
+        case calculation_type::lcao_exx:
+        {
+            //update HR first
+            //in cal_type=lcao_exx, HR should be updated by most priority sub-chain nodes
+            this->contributeHR();
 
-        // update HK next
-        // in cal_type=lcao_exx, HK only need to update from one node
-        this->contributeHk(ik_in);
+            //update HK next
+            //in cal_type=lcao_exx, HK only need to update from one node
+            this->contributeHk(ik_in);
 
-        break;
-    }
-    case calculation_type::lcao_tddft_velocity: {
-        if (!this->hr_done)
+            break;
+        }
+        case calculation_type::lcao_tddft_velocity:
         {
-            // in cal_type=lcao_fixed, HR should be updated by each sub-chain nodes
-            OperatorLCAO<TK, TR>* last = this;
-            while (last != nullptr)
+            if(!this->hr_done)
             {
-                last->contributeHR();
-                last = dynamic_cast<OperatorLCAO<TK, TR>*>(last->next_sub_op);
+                //in cal_type=lcao_fixed, HR should be updated by each sub-chain nodes
+                OperatorLCAO<TK, TR>* last = this;
+                while(last != nullptr)
+                {
+                    last->contributeHR();
+                    last = dynamic_cast<OperatorLCAO<TK, TR>*>(last->next_sub_op);
+                }
             }
-        }
-        this->contributeHk(ik_in);
+            this->contributeHk(ik_in);
 
-        break;
-    }
-    default: {
-        ModuleBase::WARNING_QUIT("OperatorLCAO::init", "unknown cal_type");
-        break;
-    }
+            break;
+        }
+        default:
+        {
+            ModuleBase::WARNING_QUIT("OperatorLCAO::init", "unknown cal_type");
+            break;
+        }
     }
-    if (this->next_op != nullptr)
-    { // it is not the last node, loop next init() function
+    if(this->next_op != nullptr)
+    {//it is not the last node, loop next init() function
         // pass HR status to next node and than set HR status of this node to done
-        if (!this->hr_done)
+        if(!this->hr_done)
         {
             dynamic_cast<OperatorLCAO<TK, TR>*>(this->next_op)->hr_done = this->hr_done;
         }
         // call init() function of next node
         this->next_op->init(ik_in);
     }
-    else
-    { // it is the last node, update HK with the current total HR
+    else 
+    {//it is the last node, update HK with the current total HR
         OperatorLCAO<TK, TR>::contributeHk(ik_in);
     }
 
@@ -226,12 +233,12 @@ void OperatorLCAO<TK, TR>::init(const int ik_in)
 }
 
 // contributeHk()
-template <typename TK, typename TR>
+template<typename TK, typename TR>
 void OperatorLCAO<TK, TR>::contributeHk(int ik)
 {
     ModuleBase::TITLE("OperatorLCAO", "contributeHk");
     ModuleBase::timer::tick("OperatorLCAO", "contributeHk");
-    if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER())
+    if(ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER())
     {
         const int nrow = this->hsk->get_pv()->get_row_size();
         hamilt::folding_HR(*this->hR, this->hsk->get_hk(), this->kvec_d[ik], nrow, 1);
@@ -247,4 +254,4 @@ void OperatorLCAO<TK, TR>::contributeHk(int ik)
 template class OperatorLCAO<double, double>;
 template class OperatorLCAO<std::complex<double>, double>;
 template class OperatorLCAO<std::complex<double>, std::complex<double>>;
-} // namespace hamilt
+}  // namespace hamilt
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/operator_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/operator_lcao.h
index 024dd02fe5..5aece587ab 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/operator_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/operator_lcao.h
@@ -3,8 +3,8 @@
 #include "module_base/vector3.h"
 #include "module_hamilt_general/matrixblock.h"
 #include "module_hamilt_general/operator.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp"
 #include "module_hamilt_lcao/module_hcontainer/hcontainer.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/hs_matrix_k.hpp"
 
 namespace hamilt
 {
@@ -13,12 +13,11 @@ template <typename TK, typename TR>
 class OperatorLCAO : public Operator<TK>
 {
   public:
-    OperatorLCAO(HS_Matrix_K<TK>* hsk_in,
-                 const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
-                 HContainer<TR>* hR_in)
-        : hsk(hsk_in), kvec_d(kvec_d_in), hR(hR_in)
-    {
-    }
+    OperatorLCAO(
+        HS_Matrix_K<TK>* hsk_in, 
+        const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
+        HContainer<TR>* hR_in)
+        : hsk(hsk_in), kvec_d(kvec_d_in), hR(hR_in){}
     virtual ~OperatorLCAO()
     {
         if (this->allocated_smatrix)
@@ -55,18 +54,16 @@ class OperatorLCAO : public Operator<TK>
         this->get_hs_pointers();
 #ifdef __MPI
         hk_in = MatrixBlock<TK>{hmatrix_k,
-                                (size_t)this->hsk->get_pv()->nrow,
-                                (size_t)this->hsk->get_pv()->ncol,
-                                this->hsk->get_pv()->desc};
+                               (size_t)this->hsk->get_pv()->nrow,
+                               (size_t)this->hsk->get_pv()->ncol,
+                               this->hsk->get_pv()->desc};
         sk_in = MatrixBlock<TK>{smatrix_k,
-                                (size_t)this->hsk->get_pv()->nrow,
-                                (size_t)this->hsk->get_pv()->ncol,
-                                this->hsk->get_pv()->desc};
+                               (size_t)this->hsk->get_pv()->nrow,
+                               (size_t)this->hsk->get_pv()->ncol,
+                               this->hsk->get_pv()->desc};
 #else
-        hk_in
-            = MatrixBlock<TK>{hmatrix_k, (size_t)this->hsk->get_pv()->nrow, (size_t)this->hsk->get_pv()->ncol, nullptr};
-        sk_in
-            = MatrixBlock<TK>{smatrix_k, (size_t)this->hsk->get_pv()->nrow, (size_t)this->hsk->get_pv()->ncol, nullptr};
+        hk_in = MatrixBlock<TK>{hmatrix_k, (size_t)this->hsk->get_pv()->nrow, (size_t)this->hsk->get_pv()->ncol, nullptr};
+        sk_in = MatrixBlock<TK>{smatrix_k, (size_t)this->hsk->get_pv()->nrow, (size_t)this->hsk->get_pv()->ncol, nullptr};
 #endif
     }
 
@@ -77,7 +74,7 @@ class OperatorLCAO : public Operator<TK>
     /**
      * @brief set_HR_fixed() is used for pass HR_fixed matrix to the next node in sub-chain table
      * not used in base class, only be override in fixed Hamiltonian Operators (e.g. Ekinetic and Nonlocal)
-     */
+    */
     virtual void set_HR_fixed(void*)
     {
         return;
@@ -85,7 +82,7 @@ class OperatorLCAO : public Operator<TK>
 
     /**
      * @brief reset hr_done status
-     */
+    */
     void set_hr_done(bool hr_done_in);
 
     // protected:
@@ -109,7 +106,7 @@ class OperatorLCAO : public Operator<TK>
 
     // only used for Gamma_only case
     bool allocated_smatrix = false;
-
+    
     // if HR is calculated
     bool hr_done = false;
 };
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/sc_lambda_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/sc_lambda_lcao.cpp
index b1e87e4db3..2aa96175dd 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/sc_lambda_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/sc_lambda_lcao.cpp
@@ -1,7 +1,5 @@
 #include "sc_lambda_lcao.h"
-
 #include "module_hamilt_lcao/module_deltaspin/spin_constrain.h"
-
 #include <algorithm>
 
 namespace hamilt
@@ -24,16 +22,13 @@ void OperatorScLambda<OperatorLCAO<std::complex<double>, std::complex<double>>>:
         = SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>::getScInstance();
     std::vector<std::complex<double>> h_lambda(this->hsk->get_pv()->nloc);
     std::fill(h_lambda.begin(), h_lambda.end(), std::complex<double>(0, 0));
-    sc.cal_h_lambda(&h_lambda[0],
-                    this->hsk->get_sk(),
-                    ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(),
-                    this->isk[ik]);
+    sc.cal_h_lambda(&h_lambda[0], this->hsk->get_sk(), ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(), this->isk[ik]);
     std::complex<double>* hk = this->hsk->get_hk();
     for (int irc = 0; irc < this->hsk->get_pv()->nloc; irc++)
     {
         hk[irc] += h_lambda[irc];
     }
-    // std::cout << "OperatorScLambda contributeHk" << std::endl;
+    //std::cout << "OperatorScLambda contributeHk" << std::endl;
     ModuleBase::timer::tick("OperatorScLambda", "contributeHk");
 }
 
@@ -47,16 +42,13 @@ void OperatorScLambda<OperatorLCAO<std::complex<double>, double>>::contributeHk(
         = SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>::getScInstance();
     std::vector<std::complex<double>> h_lambda(this->hsk->get_pv()->nloc);
     std::fill(h_lambda.begin(), h_lambda.end(), std::complex<double>(0, 0));
-    sc.cal_h_lambda(&h_lambda[0],
-                    this->hsk->get_sk(),
-                    ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(),
-                    this->isk[ik]);
+    sc.cal_h_lambda(&h_lambda[0], this->hsk->get_sk(), ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER(), this->isk[ik]);
     std::complex<double>* hk = this->hsk->get_hk();
     for (int irc = 0; irc < this->hsk->get_pv()->nloc; irc++)
     {
         hk[irc] += h_lambda[irc];
     }
-    // std::cout << "OperatorScLambda contributeHk" << std::endl;
+    //std::cout << "OperatorScLambda contributeHk" << std::endl;
     ModuleBase::timer::tick("OperatorScLambda", "contributeHk");
 }
 
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/sc_lambda_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/sc_lambda_lcao.h
index 2d46c4a824..646c0415fb 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/sc_lambda_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/sc_lambda_lcao.h
@@ -18,14 +18,14 @@ class OperatorScLambda : public T
 
 #endif
 
-template <typename TK, typename TR>
+template<typename TK, typename TR>
 class OperatorScLambda<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 {
   public:
     OperatorScLambda<OperatorLCAO<TK, TR>>(HS_Matrix_K<TK>* hsk_in,
-                                           const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
-                                           hamilt::HContainer<TR>* hR_in,
-                                           const std::vector<int>& isk_in)
+                                    const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
+                                    hamilt::HContainer<TR>* hR_in,
+                                    const std::vector<int>& isk_in)
         : isk(isk_in), OperatorLCAO<TK, TR>(hsk_in, kvec_d_in, hR_in)
     {
         this->cal_type = calculation_type::lcao_sc_lambda;
@@ -34,8 +34,8 @@ class OperatorScLambda<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
     virtual void contributeHR() override;
 
     virtual void contributeHk(int ik) override;
-
   private:
+
     const std::vector<int>& isk;
 };
 
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.cpp
index b25cb9dd77..344537c952 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.cpp
@@ -266,7 +266,7 @@ void TDEkinetic<OperatorLCAO<TK, TR>>::initialize_HR(Grid_Driver* GridD)
     ModuleBase::TITLE("TDEkinetic", "initialize_HR");
     ModuleBase::timer::tick("TDEkinetic", "initialize_HR");
 
-    auto* paraV = this->hR->get_paraV(); // get parallel orbitals from HR
+    auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
     // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
 
     this->adjs_all.clear();
@@ -315,7 +315,7 @@ void TDEkinetic<OperatorLCAO<TK, TR>>::initialize_HR_tmp()
     ModuleBase::TITLE("TDEkinetic", "initialize_HR_tmp");
     ModuleBase::timer::tick("TDEkinetic", "initialize_HR_tmp");
 
-    auto* paraV = this->hR->get_paraV(); // get parallel orbitals from HR
+    auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
     // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
     for (int i = 0; i < this->hR->size_atom_pairs(); ++i)
     {
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.h
index 835323c5dd..2de5273caa 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_ekinetic_lcao.h
@@ -10,6 +10,7 @@
 #include "module_hamilt_lcao/module_tddft/td_velocity.h"
 #include "operator_lcao.h"
 
+
 namespace hamilt
 {
 
@@ -32,15 +33,15 @@ class TDEkinetic : public T
 /// - TR: data type of real space Hamiltonian
 
 template <typename TK, typename TR>
-class TDEkinetic<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
+class TDEkinetic<OperatorLCAO<TK,TR>> : public OperatorLCAO<TK, TR>
 {
   public:
     TDEkinetic<OperatorLCAO<TK, TR>>(HS_Matrix_K<TK>* hsk_in,
-                                     hamilt::HContainer<TR>* hR_in,
-                                     hamilt::HContainer<TR>* SR_in,
-                                     const K_Vectors* kv_in,
-                                     const UnitCell* ucell_in,
-                                     Grid_Driver* GridD_in);
+                                 hamilt::HContainer<TR>* hR_in,
+                                 hamilt::HContainer<TR>* SR_in,
+                                 const K_Vectors* kv_in,
+                                 const UnitCell* ucell_in,
+                                 Grid_Driver* GridD_in);
     ~TDEkinetic();
 
     virtual void contributeHR() override;
@@ -58,18 +59,13 @@ class TDEkinetic<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
     /**
      * @brief initialize HR_tmp
      * Allocate the memory for HR_tmp with the same size as HR
-     */
+    */
     void initialize_HR_tmp();
 
     /**
      * @brief calculate the HR local matrix of <I,J,R> atom pair
      */
-    void cal_HR_IJR(const int& iat1,
-                    const int& iat2,
-                    const Parallel_Orbitals* paraV,
-                    const ModuleBase::Vector3<double>& dtau,
-                    std::complex<double>* data_pointer,
-                    TR* s_pointer);
+    void cal_HR_IJR(const int& iat1,const int& iat2,const Parallel_Orbitals* paraV,const ModuleBase::Vector3<double>& dtau,std::complex<double>* data_pointer,TR* s_pointer);
 
     /**
      * @brief calculate the ekinetic matrix correction term in tddft with specific <I,J,R> atom-pairs
@@ -77,16 +73,15 @@ class TDEkinetic<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
      * loop the atom-pairs in HR and calculate the ekinetic matrix
      */
     void calculate_HR(void);
-    virtual void set_HR_fixed(void*) override;
+    virtual void set_HR_fixed(void*)override;
 
     TD_Velocity td_velocity;
 
   private:
     const UnitCell* ucell = nullptr;
-
+    
     HContainer<TR>* SR = nullptr;
-    /// @brief Store real space hamiltonian. TD term should include imaginary part, thus it has to be complex type. Only
-    /// shared between TD operators.
+    /// @brief Store real space hamiltonian. TD term should include imaginary part, thus it has to be complex type. Only shared between TD operators.
     HContainer<std::complex<double>>* hR_tmp = nullptr;
     Grid_Driver* Grid = nullptr;
 
@@ -100,14 +95,13 @@ class TDEkinetic<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
     ORB_gaunt_table MGT;
 
     /// @brief Store the two center integrals outcome <𝝓_𝝁𝟎 |𝛁| 𝝓_𝝂𝑹> for td_ekinetic term
-    std::map<size_t,                                              // TA
-             std::map<size_t,                                     // TB
-                      std::map<int,                               // LA
-                               std::map<size_t,                   // NA
-                                        std::map<int,             // LB
-                                                 std::map<size_t, // NB
-                                                          Center2_Orb::Orb11>>>>>>
-        center2_orb11_s;
+    std::map<size_t,                                // TA
+      std::map<size_t,                            // TB
+        std::map<int,                           // LA
+          std::map<size_t,                    // NA
+            std::map<int,                   // LB
+              std::map<size_t,            // NB
+                Center2_Orb::Orb11>>>>>> center2_orb11_s;
 
     /// @brief Store the vector potential for td_ekinetic term
     ModuleBase::Vector3<double> cart_At;
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_dftu.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_dftu.cpp
index 17de4e01d5..28d63c8074 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_dftu.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_dftu.cpp
@@ -160,7 +160,8 @@ TEST_F(DFTUTest, constructHRd2d)
         HR->get_wrapper()[i] = 0.0;
     }
     std::chrono::high_resolution_clock::time_point start_time = std::chrono::high_resolution_clock::now();
-    hamilt::DFTU<hamilt::OperatorLCAO<double, double>> op(&hsk, kvec_d_in, HR, ucell, &gd, &intor_, &GlobalC::dftu);
+    hamilt::DFTU<hamilt::OperatorLCAO<double, double>>
+        op(&hsk, kvec_d_in, HR, ucell, &gd, &intor_, &GlobalC::dftu);
     std::chrono::high_resolution_clock::time_point end_time = std::chrono::high_resolution_clock::now();
     std::chrono::duration<double> elapsed_time
         = std::chrono::duration_cast<std::chrono::duration<double>>(end_time - start_time);
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_ekineticnew.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_ekineticnew.cpp
index 10d20528c6..0befa32c0d 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_ekineticnew.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_ekineticnew.cpp
@@ -107,7 +107,8 @@ TEST_F(EkineticNewTest, constructHRd2d)
     hamilt::HS_Matrix_K<double> hsk(paraV, 1);
     hsk.set_zero_hk();
     Grid_Driver gd(0, 0, 0);
-    hamilt::EkineticNew<hamilt::OperatorLCAO<double, double>> op(&hsk, kvec_d_in, HR, &ucell, &gd, &intor_);
+    hamilt::EkineticNew<hamilt::OperatorLCAO<double, double>>
+        op(&hsk, kvec_d_in, HR, &ucell, &gd, &intor_);
     op.contributeHR();
     // check the value of HR
     for (int iap = 0; iap < HR->size_atom_pairs(); ++iap)
@@ -156,12 +157,8 @@ TEST_F(EkineticNewTest, constructHRd2cd)
     hamilt::HS_Matrix_K<std::complex<double>> hsk(paraV, 1);
     hsk.set_zero_hk();
     Grid_Driver gd(0, 0, 0);
-    hamilt::EkineticNew<hamilt::OperatorLCAO<std::complex<double>, double>> op(&hsk,
-                                                                               kvec_d_in,
-                                                                               HR,
-                                                                               &ucell,
-                                                                               &gd,
-                                                                               &intor_);
+    hamilt::EkineticNew<hamilt::OperatorLCAO<std::complex<double>, double>>
+        op(&hsk, kvec_d_in, HR, &ucell, &gd, &intor_);
     op.contributeHR();
     // check the value of HR
     for (int iap = 0; iap < HR->size_atom_pairs(); ++iap)
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_nonlocalnew.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_nonlocalnew.cpp
index a95e517129..a2054a6fa2 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_nonlocalnew.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_nonlocalnew.cpp
@@ -140,7 +140,8 @@ TEST_F(NonlocalNewTest, constructHRd2d)
     EXPECT_EQ(ucell.infoNL.Beta[0].get_rcut_max(), 1.0);
     EXPECT_EQ(LCAO_Orbitals::get_const_instance().Phi[0].getRcut(), 1.0);
     std::chrono::high_resolution_clock::time_point start_time = std::chrono::high_resolution_clock::now();
-    hamilt::NonlocalNew<hamilt::OperatorLCAO<double, double>> op(&hsk, kvec_d_in, HR, &ucell, &gd, &intor_);
+    hamilt::NonlocalNew<hamilt::OperatorLCAO<double, double>>
+        op(&hsk, kvec_d_in, HR, &ucell, &gd, &intor_);
     std::chrono::high_resolution_clock::time_point end_time = std::chrono::high_resolution_clock::now();
     std::chrono::duration<double> elapsed_time
         = std::chrono::duration_cast<std::chrono::duration<double>>(end_time - start_time);
@@ -209,12 +210,8 @@ TEST_F(NonlocalNewTest, constructHRd2cd)
     hamilt::HS_Matrix_K<std::complex<double>> hsk(paraV);
     hsk.set_zero_hk();
     Grid_Driver gd(0, 0, 0);
-    hamilt::NonlocalNew<hamilt::OperatorLCAO<std::complex<double>, double>> op(&hsk,
-                                                                               kvec_d_in,
-                                                                               HR,
-                                                                               &ucell,
-                                                                               &gd,
-                                                                               &intor_);
+    hamilt::NonlocalNew<hamilt::OperatorLCAO<std::complex<double>, double>>
+        op(&hsk, kvec_d_in, HR, &ucell, &gd, &intor_);
     op.contributeHR();
     // check the value of HR
     for (int iap = 0; iap < HR->size_atom_pairs(); ++iap)
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_overlapnew.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_overlapnew.cpp
index 4b2457a001..298439b074 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_overlapnew.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_overlapnew.cpp
@@ -107,7 +107,8 @@ TEST_F(OverlapNewTest, constructHRd2d)
     hamilt::HS_Matrix_K<double> hsk(paraV);
     hsk.set_zero_sk();
     Grid_Driver gd(0, 0, 0);
-    hamilt::OverlapNew<hamilt::OperatorLCAO<double, double>> op(&hsk, kvec_d_in, nullptr, SR, &ucell, &gd, &intor_);
+    hamilt::OverlapNew<hamilt::OperatorLCAO<double, double>>
+        op(&hsk, kvec_d_in, nullptr, SR, &ucell, &gd, &intor_);
     op.contributeHR();
     // check the value of SR
     for (int iap = 0; iap < SR->size_atom_pairs(); ++iap)
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_sc_lambda_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_sc_lambda_lcao.cpp
index e876255ee1..d37e94efbb 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_sc_lambda_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_sc_lambda_lcao.cpp
@@ -1,11 +1,11 @@
-#include "../sc_lambda_lcao.h"
-#include "module_cell/klist.h"
-#include "module_hamilt_lcao/module_deltaspin/spin_constrain.h"
+#include <algorithm>
+#include <random>
 
+#include "../sc_lambda_lcao.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
-#include <algorithm>
-#include <random>
+#include "module_cell/klist.h"
+#include "module_hamilt_lcao/module_deltaspin/spin_constrain.h"
 
 // mockes
 K_Vectors::K_Vectors()
@@ -96,8 +96,12 @@ TEST_F(ScLambdaLCAOTest, ContributeHk)
         = SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>::getScInstance();
     sc.set_ParaV(&paraV);
     EXPECT_EQ(sc.ParaV->nloc, 4);
-    std::map<int, int> atomCounts = {{0, 1}};
-    std::map<int, int> orbitalCounts = {{0, 1}};
+    std::map<int, int> atomCounts = {
+        {0, 1}
+    };
+    std::map<int, int> orbitalCounts = {
+        {0, 1}
+    };
     sc.set_atomCounts(atomCounts);
     sc.set_orbitalCounts(orbitalCounts);
     sc.set_nspin(4);
@@ -117,10 +121,12 @@ TEST_F(ScLambdaLCAOTest, ContributeHk)
                                                                                                      nullptr,
                                                                                                      isk);
     sc_lambda_op.contributeHk(0);
-    std::vector<std::complex<double>> columnMajor_h_lambda = {std::complex<double>{-1.0, 0.0},
-                                                              std::complex<double>{-1.0, 1.0},
-                                                              std::complex<double>{-1.0, -1.0},
-                                                              std::complex<double>{1.0, 0.0}};
+    std::vector<std::complex<double>> columnMajor_h_lambda = {
+        std::complex<double>{-1.0, 0.0},
+        std::complex<double>{-1.0, 1.0},
+        std::complex<double>{-1.0, -1.0},
+        std::complex<double>{1.0,  0.0}
+    };
     EXPECT_DOUBLE_EQ(hsk.get_hk()[0].real(), columnMajor_h_lambda[0].real());
     EXPECT_DOUBLE_EQ(hsk.get_hk()[0].imag(), columnMajor_h_lambda[0].imag());
     EXPECT_DOUBLE_EQ(hsk.get_hk()[1].real(), columnMajor_h_lambda[1].real());
@@ -149,8 +155,12 @@ TEST_F(ScLambdaLCAOTest, ContributeHkS2)
         = SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>::getScInstance();
     sc.set_ParaV(&paraV);
     EXPECT_EQ(sc.ParaV->nloc, 1);
-    std::map<int, int> atomCounts = {{0, 1}};
-    std::map<int, int> orbitalCounts = {{0, 1}};
+    std::map<int, int> atomCounts = {
+        {0, 1}
+    };
+    std::map<int, int> orbitalCounts = {
+        {0, 1}
+    };
     sc.set_atomCounts(atomCounts);
     sc.set_orbitalCounts(orbitalCounts);
     sc.set_nspin(2);
@@ -164,12 +174,15 @@ TEST_F(ScLambdaLCAOTest, ContributeHkS2)
     GlobalV::KS_SOLVER = "genelpa";
     EXPECT_TRUE(ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER());
     // set sc_lambda_op
-    auto sc_lambda_op = hamilt::OperatorScLambda<hamilt::OperatorLCAO<std::complex<double>, double>>(&hsk,
-                                                                                                     this->kvec_d,
-                                                                                                     nullptr,
-                                                                                                     isk);
+    auto sc_lambda_op
+        = hamilt::OperatorScLambda<hamilt::OperatorLCAO<std::complex<double>, double>>(&hsk,
+                                                                                    this->kvec_d,
+                                                                                    nullptr,
+                                                                                    isk);
     sc_lambda_op.contributeHk(0);
-    std::vector<std::complex<double>> columnMajor_h_lambda = {std::complex<double>{-1.0, 0.0}};
+    std::vector<std::complex<double>> columnMajor_h_lambda = {
+        std::complex<double>{-1.0,  0.0}
+    };
     EXPECT_DOUBLE_EQ(hsk.get_hk()[0].real(), columnMajor_h_lambda[0].real());
     EXPECT_DOUBLE_EQ(hsk.get_hk()[0].imag(), columnMajor_h_lambda[0].imag());
 }
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp
index 309c878a2c..39e91219a0 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.cpp
@@ -1,21 +1,22 @@
 #include "veff_lcao.h"
-
 #include "module_base/timer.h"
 #include "module_base/tool_title.h"
-#include "module_cell/unitcell.h"
 #include "module_hamilt_general/module_xc/xc_functional.h"
+#include "module_cell/unitcell.h"
 namespace hamilt
 {
 
+
 // initialize_HR()
 template <typename TK, typename TR>
-void Veff<OperatorLCAO<TK, TR>>::initialize_HR(const UnitCell* ucell_in, Grid_Driver* GridD)
+void Veff<OperatorLCAO<TK, TR>>::initialize_HR(const UnitCell* ucell_in,
+                                        Grid_Driver* GridD)
 {
     ModuleBase::TITLE("Veff", "initialize_HR");
     ModuleBase::timer::tick("Veff", "initialize_HR");
 
     this->nspin = GlobalV::NSPIN;
-    auto* paraV = this->hR->get_paraV(); // get parallel orbitals from HR
+    auto* paraV = this->hR->get_paraV();// get parallel orbitals from HR
     // TODO: if paraV is nullptr, AtomPair can not use paraV for constructor, I will repair it in the future.
 
     for (int iat1 = 0; iat1 < ucell_in->nat; iat1++)
@@ -38,8 +39,8 @@ void Veff<OperatorLCAO<TK, TR>>::initialize_HR(const UnitCell* ucell_in, Grid_Dr
             const ModuleBase::Vector3<int>& R_index2 = adjs.box[ad1];
             // choose the real adjacent atoms
             const LCAO_Orbitals& orb = LCAO_Orbitals::get_const_instance();
-            // Note: the distance of atoms should less than the cutoff radius,
-            // When equal, the theoretical value of matrix element is zero,
+            // Note: the distance of atoms should less than the cutoff radius, 
+            // When equal, the theoretical value of matrix element is zero, 
             // but the calculated value is not zero due to the numerical error, which would lead to result changes.
             if (ucell_in->cal_dtau(iat1, iat2, R_index2).norm() * ucell_in->lat0
                 < orb.Phi[T1].getRcut() + orb.Phi[T2].getRcut())
@@ -55,7 +56,9 @@ void Veff<OperatorLCAO<TK, TR>>::initialize_HR(const UnitCell* ucell_in, Grid_Dr
     ModuleBase::timer::tick("Veff", "initialize_HR");
 }
 
-template <typename TK, typename TR>
+
+
+template<typename TK, typename TR>
 void Veff<OperatorLCAO<TK, TR>>::contributeHR()
 {
     ModuleBase::TITLE("Veff", "contributeHR");
@@ -76,7 +79,7 @@ void Veff<OperatorLCAO<TK, TR>>::contributeHR()
 
     // if you change the place of the following code,
     // rememeber to delete the #include
-    if (XC_Functional::get_func_type() == 3 || XC_Functional::get_func_type() == 5)
+    if(XC_Functional::get_func_type()==3 || XC_Functional::get_func_type()==5)
     {
         Gint_inout inout(vr_eff1, vofk_eff1, 0, Gint_Tools::job_type::vlocal_meta);
         this->GK->cal_gint(&inout);
@@ -95,12 +98,12 @@ void Veff<OperatorLCAO<TK, TR>>::contributeHR()
         for (int is = 1; is < 4; is++)
         {
             vr_eff1 = this->pot->get_effective_v(is);
-            if (XC_Functional::get_func_type() == 3 || XC_Functional::get_func_type() == 5)
+            if(XC_Functional::get_func_type()==3 || XC_Functional::get_func_type()==5)
             {
                 vofk_eff1 = this->pot->get_effective_vofk(is);
             }
-
-            if (XC_Functional::get_func_type() == 3 || XC_Functional::get_func_type() == 5)
+            
+            if(XC_Functional::get_func_type()==3 || XC_Functional::get_func_type()==5)
             {
                 Gint_inout inout(vr_eff1, vofk_eff1, is, Gint_Tools::job_type::vlocal_meta);
                 this->GK->cal_gint(&inout);
@@ -112,17 +115,16 @@ void Veff<OperatorLCAO<TK, TR>>::contributeHR()
             }
         }
     }
-    this->GK->transfer_pvpR(this->hR, this->ucell, this->gd);
+    this->GK->transfer_pvpR(this->hR,this->ucell,this->gd);
 
-    if (this->nspin == 2)
-        this->current_spin = 1 - this->current_spin;
+    if(this->nspin == 2) this->current_spin = 1 - this->current_spin;
 
     ModuleBase::timer::tick("Veff", "contributeHR");
     return;
 }
 
 // special case of gamma-only
-template <>
+template<>
 void Veff<OperatorLCAO<double, double>>::contributeHR(void)
 {
     ModuleBase::TITLE("Veff", "contributeHR");
@@ -140,30 +142,29 @@ void Veff<OperatorLCAO<double, double>>::contributeHR(void)
     // and diagonalize the H matrix (T+Vl+Vnl).
     //--------------------------------------------
 
-    if (XC_Functional::get_func_type() == 3 || XC_Functional::get_func_type() == 5)
+    if(XC_Functional::get_func_type()==3 || XC_Functional::get_func_type()==5)
     {
         Gint_inout inout(vr_eff1, vofk_eff1, Gint_Tools::job_type::vlocal_meta);
-        this->GG->cal_vlocal(&inout, this->new_e_iteration);
+        this->GG->cal_vlocal(&inout,  this->new_e_iteration);
     }
     else
     {
         Gint_inout inout(vr_eff1, Gint_Tools::job_type::vlocal);
-        this->GG->cal_vlocal(&inout, this->new_e_iteration);
+        this->GG->cal_vlocal(&inout,  this->new_e_iteration);
     }
-    this->GG->transfer_pvpR(this->hR, this->ucell);
+    this->GG->transfer_pvpR(this->hR,this->ucell);
 
     this->new_e_iteration = false;
 
-    if (this->nspin == 2)
-        this->current_spin = 1 - this->current_spin;
+    if(this->nspin == 2) this->current_spin = 1 - this->current_spin;
 
     ModuleBase::timer::tick("Veff", "contributeHR");
 }
 
-// definition of class template should in the end of file to avoid compiling warning
+// definition of class template should in the end of file to avoid compiling warning 
 template class Veff<OperatorLCAO<double, double>>;
 
 template class Veff<OperatorLCAO<std::complex<double>, double>>;
 
 template class Veff<OperatorLCAO<std::complex<double>, std::complex<double>>>;
-} // namespace hamilt
+}
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h
index cf1e229d02..9c97e1020f 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/veff_lcao.h
@@ -1,12 +1,12 @@
 #ifndef VEFFLCAO_H
 #define VEFFLCAO_H
 #include "module_base/timer.h"
-#include "module_cell/module_neighbor/sltk_grid_driver.h"
-#include "module_cell/unitcell.h"
 #include "module_elecstate/potentials/potential_new.h"
 #include "module_hamilt_lcao/module_gint/gint_gamma.h"
 #include "module_hamilt_lcao/module_gint/gint_k.h"
 #include "operator_lcao.h"
+#include "module_cell/module_neighbor/sltk_grid_driver.h"
+#include "module_cell/unitcell.h"
 
 namespace hamilt
 {
@@ -24,8 +24,8 @@ class Veff : public T
 /// @brief Effective potential class, used for calculating Hamiltonian with grid integration tools
 /// If user want to separate the contribution of V_{eff} into V_{H} and V_{XC} and V_{local pseudopotential} and so on,
 /// the user can separate the Potential class into different parts, and construct different Veff class for each part.
-/// @tparam TK
-/// @tparam TR
+/// @tparam TK 
+/// @tparam TR 
 template <typename TK, typename TR>
 class Veff<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
 {
@@ -33,7 +33,7 @@ class Veff<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
     /**
      * @brief Construct a new Veff object for multi-kpoint calculation
      * @param GK_in: the pointer of Gint_k object, used for grid integration
-     */
+    */
     Veff<OperatorLCAO<TK, TR>>(Gint_k* GK_in,
                                HS_Matrix_K<TK>* hsk_in,
                                const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
@@ -52,7 +52,7 @@ class Veff<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
     /**
      * @brief Construct a new Veff object for Gamma-only calculation
      * @param GG_in: the pointer of Gint_Gamma object, used for grid integration
-     */
+    */
     Veff<OperatorLCAO<TK, TR>>(Gint_Gamma* GG_in,
                                HS_Matrix_K<TK>* hsk_in,
                                const std::vector<ModuleBase::Vector3<double>>& kvec_d_in,
@@ -73,14 +73,13 @@ class Veff<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
     /**
      * @brief contributeHR() is used to calculate the HR matrix
      * <phi_{\mu, 0}|V_{eff}|phi_{\nu, R}>
-     * the contribution of V_{eff} is calculated by the contribution of V_{H} and V_{XC} and V_{local pseudopotential}
-     * and so on. grid integration is used to calculate the contribution Hamiltonian of effective potential
+     * the contribution of V_{eff} is calculated by the contribution of V_{H} and V_{XC} and V_{local pseudopotential} and so on.
+     * grid integration is used to calculate the contribution Hamiltonian of effective potential
      */
     virtual void contributeHR() override;
-
-    const UnitCell* ucell;
-    Grid_Driver* gd;
-
+  
+  const UnitCell* ucell;
+  Grid_Driver* gd;
   private:
     // used for k-dependent grid integration.
     Gint_k* GK = nullptr;
@@ -101,6 +100,7 @@ class Veff<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
      * the size of HR will be fixed after initialization
      */
     void initialize_HR(const UnitCell* ucell_in, Grid_Driver* GridD_in);
+
 };
 
 } // namespace hamilt
diff --git a/source/module_hamilt_lcao/module_deltaspin/cal_h_lambda.cpp b/source/module_hamilt_lcao/module_deltaspin/cal_h_lambda.cpp
index 9330cdd6ea..be60643477 100644
--- a/source/module_hamilt_lcao/module_deltaspin/cal_h_lambda.cpp
+++ b/source/module_hamilt_lcao/module_deltaspin/cal_h_lambda.cpp
@@ -1,20 +1,20 @@
-#include "module_base/global_function.h"
+#include "spin_constrain.h"
 #include "module_base/timer.h"
 #include "module_base/tool_title.h"
-#include "spin_constrain.h"
-
+#include "module_base/global_function.h"
 #include <algorithm>
 
 template <>
-void SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>::cal_h_lambda(std::complex<double>* h_lambda,
-                                                                                const std::complex<double>* Sloc2,
-                                                                                bool column_major,
-                                                                                int isk)
+void SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>::cal_h_lambda(
+    std::complex<double>* h_lambda,
+    const std::complex<double>* Sloc2,
+    bool column_major,
+    int isk)
 {
-    ModuleBase::TITLE("SpinConstrain", "cal_h_lambda");
+    ModuleBase::TITLE("SpinConstrain","cal_h_lambda");
     ModuleBase::timer::tick("SpinConstrain", "cal_h_lambda");
     const Parallel_Orbitals* pv = this->ParaV;
-    for (const auto& sc_elem1: this->get_atomCounts())
+    for (const auto& sc_elem1 : this->get_atomCounts())
     {
         int it1 = sc_elem1.first;
         int nat_it1 = sc_elem1.second;
@@ -22,13 +22,12 @@ void SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>::cal_h_lambda(
         for (int ia1 = 0; ia1 < nat_it1; ia1++)
         {
             int iat1 = this->get_iat(it1, ia1);
-            for (int iw1 = 0; iw1 < nw_it1 * this->npol_; iw1++)
+            for (int iw1 = 0; iw1 < nw_it1*this->npol_; iw1++)
             {
                 int iwt1 = this->get_iwt(it1, ia1, iw1);
                 const int mu = pv->global2local_row(iwt1);
-                if (mu < 0)
-                    continue;
-                for (const auto& sc_elem2: this->get_atomCounts())
+                if (mu < 0) continue;
+                for (const auto& sc_elem2 : this->get_atomCounts())
                 {
                     int it2 = sc_elem2.first;
                     int nat_it2 = sc_elem2.second;
@@ -36,12 +35,11 @@ void SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>::cal_h_lambda(
                     for (int ia2 = 0; ia2 < nat_it2; ia2++)
                     {
                         int iat2 = this->get_iat(it2, ia2);
-                        for (int iw2 = 0; iw2 < nw_it2 * this->npol_; iw2++)
+                        for (int iw2 = 0; iw2 < nw_it2*this->npol_; iw2++)
                         {
                             int iwt2 = this->get_iwt(it2, ia2, iw2);
                             const int nu = pv->global2local_col(iwt2);
-                            if (nu < 0)
-                                continue;
+                            if (nu < 0) continue;
                             int icc;
                             ModuleBase::Vector3<double> lambda = (this->lambda_[iat1] + this->lambda_[iat2]) / 2.0;
                             if (column_major)
diff --git a/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp b/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp
index 262a967219..fb88b8eb57 100644
--- a/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp
+++ b/source/module_hamilt_lcao/module_deltaspin/cal_mw.cpp
@@ -1,3 +1,5 @@
+#include <iostream>
+
 #include "module_base/matrix.h"
 #include "module_base/name_angular.h"
 #include "module_base/scalapack_connector.h"
@@ -6,8 +8,6 @@
 #include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h"
 #include "spin_constrain.h"
 
-#include <iostream>
-
 template <>
 ModuleBase::matrix SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>::cal_MW_k(
     LCAO_Matrix* LM,
@@ -17,13 +17,12 @@ ModuleBase::matrix SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>:
     int nw = this->get_nw();
     const int nlocal = (this->nspin_ == 4) ? nw / 2 : nw;
     ModuleBase::matrix MecMulP(this->nspin_, nlocal, true), orbMulP(this->nspin_, nlocal, true);
-    for (size_t ik = 0; ik != this->kv_.get_nks(); ++ik)
+    for(size_t ik = 0; ik != this->kv_.get_nks(); ++ik)
     {
-        std::complex<double>* sk = nullptr;
+        std::complex<double> *sk = nullptr;
         if (this->nspin_ == 4)
         {
-            dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(this->p_hamilt)
-                ->updateSk(ik, 1);
+            dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(this->p_hamilt)->updateSk(ik, 1);
             sk = dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(this->p_hamilt)->getSk();
         }
         else
@@ -36,7 +35,7 @@ ModuleBase::matrix SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>:
         const char T_char = 'T';
         const char N_char = 'N';
         const int one_int = 1;
-        const std::complex<double> one_float = {1.0, 0.0}, zero_float = {0.0, 0.0};
+        const std::complex<double> one_float = {1.0, 0.0}, zero_float = {0.0, 0.0};        
         pzgemm_(&N_char,
                 &T_char,
                 &nw,
@@ -60,8 +59,8 @@ ModuleBase::matrix SpinConstrain<std::complex<double>, base_device::DEVICE_CPU>:
 #endif
     }
 #ifdef __MPI
-    MPI_Allreduce(MecMulP.c, orbMulP.c, this->nspin_ * nlocal, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
-#endif
+    MPI_Allreduce(MecMulP.c, orbMulP.c, this->nspin_*nlocal, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
+#endif 
 
     return orbMulP;
 }
diff --git a/source/module_hamilt_lcao/module_deltaspin/spin_constrain.h b/source/module_hamilt_lcao/module_deltaspin/spin_constrain.h
index f2a89a7f86..24151fb833 100644
--- a/source/module_hamilt_lcao/module_deltaspin/spin_constrain.h
+++ b/source/module_hamilt_lcao/module_deltaspin/spin_constrain.h
@@ -1,6 +1,9 @@
 #ifndef SPIN_CONSTRAIN_H
 #define SPIN_CONSTRAIN_H
 
+#include <map>
+#include <vector>
+
 #include "module_base/constants.h"
 #include "module_base/tool_quit.h"
 #include "module_base/tool_title.h"
@@ -11,93 +14,90 @@
 #include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
 #include "module_hsolver/hsolver.h"
 
-#include <map>
-#include <vector>
-
 struct ScAtomData;
 
 template <typename FPTYPE, typename Device = base_device::DEVICE_CPU>
 class SpinConstrain
 {
-  public:
+public:
     /**
      * pubic interface for spin-constrained DFT
-     */
+    */
     /// initialize spin-constrained DFT
-    void init_sc(double sc_thr_in,
-                 int nsc_in,
-                 int nsc_min_in,
-                 double alpha_trial_in,
-                 double sccut_in,
-                 bool decay_grad_switch_in,
-                 const UnitCell& ucell,
-                 std::string sc_file,
-                 int NPOL,
-                 Parallel_Orbitals* ParaV_in,
-                 int nspin_in,
-                 K_Vectors kv_in,
-                 std::string KS_SOLVER_in,
-                 LCAO_Matrix* LM_in,
-                 hsolver::HSolver<FPTYPE, Device>* phsol_in,
-                 hamilt::Hamilt<FPTYPE, Device>* p_hamilt_in,
-                 psi::Psi<FPTYPE>* psi_in,
-                 elecstate::ElecState* pelec_in);
+  void init_sc(double sc_thr_in,
+               int nsc_in,
+               int nsc_min_in,
+               double alpha_trial_in,
+               double sccut_in,
+               bool decay_grad_switch_in,
+               const UnitCell& ucell,
+               std::string sc_file,
+               int NPOL,
+               Parallel_Orbitals* ParaV_in,
+               int nspin_in,
+               K_Vectors kv_in,
+               std::string KS_SOLVER_in,
+               LCAO_Matrix* LM_in,
+               hsolver::HSolver<FPTYPE, Device>* phsol_in,
+               hamilt::Hamilt<FPTYPE, Device>* p_hamilt_in,
+               psi::Psi<FPTYPE>* psi_in,
+               elecstate::ElecState* pelec_in);
 
-    /// calculate h_lambda operator for spin-constrained DFT
-    void cal_h_lambda(std::complex<double>* h_lambda, const std::complex<double>* Sloc2, bool column_major, int isk);
+  /// calculate h_lambda operator for spin-constrained DFT
+  void cal_h_lambda(std::complex<double>* h_lambda, const std::complex<double>* Sloc2, bool column_major, int isk);
 
-    void cal_MW(const int& step, LCAO_Matrix* LM, bool print = false);
+  void cal_MW(const int& step, LCAO_Matrix* LM, bool print = false);
 
-    ModuleBase::matrix cal_MW_k(LCAO_Matrix* LM, const std::vector<std::vector<std::complex<double>>>& dm);
+  ModuleBase::matrix cal_MW_k(LCAO_Matrix* LM, const std::vector<std::vector<std::complex<double>>>& dm);
 
-    void cal_mw_from_lambda(int i_step);
+  void cal_mw_from_lambda(int i_step);
 
-    double cal_escon();
+  double cal_escon();
 
-    double get_escon();
+  double get_escon();
 
-    std::vector<std::vector<std::vector<double>>> convert(const ModuleBase::matrix& orbMulP);
+  std::vector<std::vector<std::vector<double>>> convert(const ModuleBase::matrix& orbMulP);
 
-    void run_lambda_loop(int outer_step);
+  void run_lambda_loop(int outer_step);
 
-    /// lambda loop helper functions
-    bool check_rms_stop(int outer_step, int i_step, double rms_error, double duration, double total_duration);
+  /// lambda loop helper functions
+  bool check_rms_stop(int outer_step, int i_step, double rms_error, double duration, double total_duration);
 
-    /// apply restriction
-    void check_restriction(const std::vector<ModuleBase::Vector3<double>>& search, double& alpha_trial);
+  /// apply restriction
+  void check_restriction(const std::vector<ModuleBase::Vector3<double>>& search, double& alpha_trial);
 
-    /// check gradient decay
-    bool check_gradient_decay(std::vector<ModuleBase::Vector3<double>> new_spin,
-                              std::vector<ModuleBase::Vector3<double>> old_spin,
-                              std::vector<ModuleBase::Vector3<double>> new_delta_lambda,
-                              std::vector<ModuleBase::Vector3<double>> old_delta_lambda,
-                              bool print = false);
-    /// @brief  calculate alpha_opt
-    double cal_alpha_opt(std::vector<ModuleBase::Vector3<double>> spin,
-                         std::vector<ModuleBase::Vector3<double>> spin_plus,
-                         const double alpha_trial);
-    /// print header info
-    void print_header();
-    /// print termination message
-    void print_termination();
+  /// check gradient decay
+  bool check_gradient_decay(std::vector<ModuleBase::Vector3<double>> new_spin,
+                            std::vector<ModuleBase::Vector3<double>> old_spin,
+                            std::vector<ModuleBase::Vector3<double>> new_delta_lambda,
+                            std::vector<ModuleBase::Vector3<double>> old_delta_lambda,
+                            bool print = false);
+  /// @brief  calculate alpha_opt
+  double cal_alpha_opt(std::vector<ModuleBase::Vector3<double>> spin,
+                       std::vector<ModuleBase::Vector3<double>> spin_plus,
+                       const double alpha_trial);
+  /// print header info
+  void print_header();
+  /// print termination message
+  void print_termination();
 
-    /// calculate mw from AorbMulP matrix
-    void calculate_MW(const std::vector<std::vector<std::vector<double>>>& AorbMulP);
+  /// calculate mw from AorbMulP matrix
+  void calculate_MW(const std::vector<std::vector<std::vector<double>>>& AorbMulP);
 
-    /// print mi
-    void print_Mi(bool print = false);
+  /// print mi
+  void print_Mi(bool print = false);
 
-    /// print magnetic force, defined as \frac{\delta{L}}/{\delta{Mi}} = -lambda[iat])
-    void print_Mag_Force();
+  /// print magnetic force, defined as \frac{\delta{L}}/{\delta{Mi}} = -lambda[iat])
+  void print_Mag_Force();
 
-    /// collect_mw from matrix multiplication result
-    void collect_MW(ModuleBase::matrix& MecMulP, const ModuleBase::ComplexMatrix& mud, int nw, int isk);
+  /// collect_mw from matrix multiplication result
+  void collect_MW(ModuleBase::matrix& MecMulP, const ModuleBase::ComplexMatrix& mud, int nw, int isk);
 
-  public:
+public:
     /**
      * important outter class pointers used in spin-constrained DFT
-     */
-    Parallel_Orbitals* ParaV = nullptr;
+    */
+    Parallel_Orbitals *ParaV = nullptr;
     hsolver::HSolver<FPTYPE, Device>* phsol = nullptr;
     hamilt::Hamilt<FPTYPE, Device>* p_hamilt = nullptr;
     psi::Psi<FPTYPE>* psi = nullptr;
@@ -110,7 +110,7 @@ class SpinConstrain
   public:
     /**
      * pubic methods for setting and getting spin-constrained DFT parameters
-     */
+    */
     /// Public method to access the Singleton instance
     static SpinConstrain& getScInstance();
     /// Delete copy and move constructors and assign operators
@@ -215,10 +215,10 @@ class SpinConstrain
     void bcast_ScData(std::string sc_file, int nat, int ntype);
 
   private:
-    SpinConstrain(){};                                       // Private constructor
-    ~SpinConstrain(){};                                      // Destructor
-    SpinConstrain& operator=(SpinConstrain const&) = delete; // Copy assign
-    SpinConstrain& operator=(SpinConstrain&&) = delete;      // Move assign
+    SpinConstrain(){};                               // Private constructor
+    ~SpinConstrain(){};                              // Destructor
+    SpinConstrain& operator=(SpinConstrain const&) = delete;  // Copy assign
+    SpinConstrain& operator=(SpinConstrain &&) = delete;      // Move assign
     std::map<int, std::vector<ScAtomData>> ScData;
     std::map<int, double> ScDecayGrad; // in unit of uB^2/eV
     std::vector<double> decay_grad_;   // in unit of uB^2/Ry
@@ -227,7 +227,7 @@ class SpinConstrain
     std::map<int, std::map<int, int>> lnchiCounts;
     std::vector<ModuleBase::Vector3<double>> lambda_; // in unit of Ry/uB in code, but in unit of meV/uB in input file
     std::vector<ModuleBase::Vector3<double>> target_mag_; // in unit of uB
-    std::vector<ModuleBase::Vector3<double>> Mi_;         // in unit of uB
+    std::vector<ModuleBase::Vector3<double>> Mi_; // in unit of uB
     double escon_ = 0.0;
     int nspin_ = 0;
     int npol_ = 1;
@@ -240,15 +240,15 @@ class SpinConstrain
     double sc_thr_; // in unit of uB
     std::vector<ModuleBase::Vector3<int>> constrain_;
     bool debug = false;
-    double alpha_trial_;      // in unit of Ry/uB^2 = 0.01 eV/uB^2
+    double alpha_trial_; // in unit of Ry/uB^2 = 0.01 eV/uB^2
     double restrict_current_; // in unit of Ry/uB = 3 eV/uB
 };
 
+
 /**
  * @brief struct for storing parameters of non-collinear spin-constrained DFT
  */
-struct ScAtomData
-{
+struct ScAtomData {
     int index;
     std::vector<double> lambda;
     std::vector<double> target_mag;
diff --git a/source/module_hamilt_lcao/module_deltaspin/test/cal_h_lambda_test.cpp b/source/module_hamilt_lcao/module_deltaspin/test/cal_h_lambda_test.cpp
index 8a340dc10f..f3312716b7 100644
--- a/source/module_hamilt_lcao/module_deltaspin/test/cal_h_lambda_test.cpp
+++ b/source/module_hamilt_lcao/module_deltaspin/test/cal_h_lambda_test.cpp
@@ -1,14 +1,10 @@
 #include "../spin_constrain.h"
-#include "module_cell/klist.h"
-
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
-K_Vectors::K_Vectors()
-{
-}
-K_Vectors::~K_Vectors()
-{
-}
+
+#include "module_cell/klist.h"
+K_Vectors::K_Vectors(){}
+K_Vectors::~K_Vectors(){}
 
 /************************************************
  *  unit test of the function of cal_h_lambda
@@ -37,17 +33,23 @@ TEST_F(SpinConstrainTest, CalHLambda)
     paraV.set_serial(nrow, ncol);
     sc.set_ParaV(&paraV);
     EXPECT_EQ(sc.ParaV->nloc, 4);
-    std::map<int, int> atomCounts = {{0, 1}};
-    std::map<int, int> orbitalCounts = {{0, 1}};
+    std::map<int, int> atomCounts = {
+        {0, 1}
+    };
+    std::map<int, int> orbitalCounts = {
+        {0, 1}
+    };
     sc.set_atomCounts(atomCounts);
     sc.set_orbitalCounts(orbitalCounts);
     sc.set_npol(2);
     std::vector<std::complex<double>> h_lambda(sc.ParaV->nloc);
     std::fill(h_lambda.begin(), h_lambda.end(), std::complex<double>(0, 0));
-    std::vector<std::complex<double>> Sloc2 = {std::complex<double>{1.0, 0.0},
-                                               std::complex<double>{0.0, 0.0},
-                                               std::complex<double>{0.0, 0.0},
-                                               std::complex<double>{1.0, 0.0}};
+    std::vector<std::complex<double>> Sloc2 = {
+        std::complex<double>{1.0, 0.0},
+        std::complex<double>{0.0, 0.0},
+        std::complex<double>{0.0, 0.0},
+        std::complex<double>{1.0, 0.0}
+    };
     ModuleBase::Vector3<double>* sc_lambda = new ModuleBase::Vector3<double>[1];
     sc_lambda[0][0] = 1.0;
     sc_lambda[0][1] = 1.0;
@@ -56,10 +58,12 @@ TEST_F(SpinConstrainTest, CalHLambda)
     // column_major = true
     sc.cal_h_lambda(&h_lambda[0], Sloc2.data(), true, 0);
     // h_lambda = - [lambda_x * sigma_x + lambda_y * sigma_y + lambda_z * sigma_z] * Sloc2
-    std::vector<std::complex<double>> columnMajor_h_lambda = {std::complex<double>{-1.0, 0.0},
-                                                              std::complex<double>{-1.0, 1.0},
-                                                              std::complex<double>{-1.0, -1.0},
-                                                              std::complex<double>{1.0, 0.0}};
+    std::vector<std::complex<double>> columnMajor_h_lambda = {
+        std::complex<double>{-1.0, 0.0 },
+        std::complex<double>{-1.0, 1.0 },
+        std::complex<double>{-1.0, -1.0},
+        std::complex<double>{1.0,  0.0 }
+    };
     EXPECT_DOUBLE_EQ(h_lambda[0].real(), columnMajor_h_lambda[0].real());
     EXPECT_DOUBLE_EQ(h_lambda[0].imag(), columnMajor_h_lambda[0].imag());
     EXPECT_DOUBLE_EQ(h_lambda[1].real(), columnMajor_h_lambda[1].real());
@@ -71,10 +75,12 @@ TEST_F(SpinConstrainTest, CalHLambda)
     // column_major = false
     delete[] sc_lambda;
     sc.cal_h_lambda(&h_lambda[0], Sloc2.data(), false, 0);
-    std::vector<std::complex<double>> rowMajor_h_lambda = {std::complex<double>{-1.0, 0.0},
-                                                           std::complex<double>{-1.0, -1.0},
-                                                           std::complex<double>{-1.0, 1.0},
-                                                           std::complex<double>{1.0, 0.0}};
+    std::vector<std::complex<double>> rowMajor_h_lambda = {
+        std::complex<double>{-1.0, 0.0 },
+        std::complex<double>{-1.0, -1.0},
+        std::complex<double>{-1.0, 1.0 },
+        std::complex<double>{1.0,  0.0 }
+    };
     EXPECT_DOUBLE_EQ(h_lambda[0].real(), rowMajor_h_lambda[0].real());
     EXPECT_DOUBLE_EQ(h_lambda[0].imag(), rowMajor_h_lambda[0].imag());
     EXPECT_DOUBLE_EQ(h_lambda[1].real(), rowMajor_h_lambda[1].real());
@@ -96,14 +102,20 @@ TEST_F(SpinConstrainTest, CalHLambdaS2)
     paraV.set_serial(nrow, ncol);
     sc.set_ParaV(&paraV);
     EXPECT_EQ(sc.ParaV->nloc, 1);
-    std::map<int, int> atomCounts = {{0, 1}};
-    std::map<int, int> orbitalCounts = {{0, 1}};
+    std::map<int, int> atomCounts = {
+        {0, 1}
+    };
+    std::map<int, int> orbitalCounts = {
+        {0, 1}
+    };
     sc.set_atomCounts(atomCounts);
     sc.set_orbitalCounts(orbitalCounts);
     sc.set_npol(1);
     std::vector<std::complex<double>> h_lambda(sc.ParaV->nloc);
     std::fill(h_lambda.begin(), h_lambda.end(), std::complex<double>(0, 0));
-    std::vector<std::complex<double>> Sloc2 = {std::complex<double>{1.0, 0.0}};
+    std::vector<std::complex<double>> Sloc2 = {
+        std::complex<double>{1.0, 0.0}
+    };
     ModuleBase::Vector3<double>* sc_lambda = new ModuleBase::Vector3<double>[1];
     sc_lambda[0][0] = 0.0;
     sc_lambda[0][1] = 0.0;
@@ -113,14 +125,14 @@ TEST_F(SpinConstrainTest, CalHLambdaS2)
     sc.cal_h_lambda(&h_lambda[0], Sloc2.data(), true, 0);
     // h_lambda = -Sloc2[icc] * this->lambda_[iat2][2]
     std::vector<std::complex<double>> columnMajor_h_lambda = {
-        std::complex<double>{-2.0, 0.0},
+        std::complex<double>{-2.0, 0.0 },
     };
     EXPECT_DOUBLE_EQ(h_lambda[0].real(), columnMajor_h_lambda[0].real());
     EXPECT_DOUBLE_EQ(h_lambda[0].imag(), columnMajor_h_lambda[0].imag());
     sc.cal_h_lambda(&h_lambda[0], Sloc2.data(), true, 1);
     // h_lambda = -Sloc2[icc] * (-this->lambda_[iat2][2])
     std::vector<std::complex<double>> columnMajor_h_lambda1 = {
-        std::complex<double>{2.0, 0.0},
+        std::complex<double>{2.0, 0.0 },
     };
     EXPECT_DOUBLE_EQ(h_lambda[0].real(), columnMajor_h_lambda1[0].real());
     EXPECT_DOUBLE_EQ(h_lambda[0].imag(), columnMajor_h_lambda1[0].imag());
@@ -129,14 +141,14 @@ TEST_F(SpinConstrainTest, CalHLambdaS2)
     // h_lambda = -Sloc2[icc] * this->lambda_[iat2][2]
     sc.cal_h_lambda(&h_lambda[0], Sloc2.data(), false, 0);
     std::vector<std::complex<double>> rowMajor_h_lambda = {
-        std::complex<double>{-2.0, 0.0},
+        std::complex<double>{-2.0, 0.0 },
     };
     EXPECT_DOUBLE_EQ(h_lambda[0].real(), rowMajor_h_lambda[0].real());
     EXPECT_DOUBLE_EQ(h_lambda[0].imag(), rowMajor_h_lambda[0].imag());
     // h_lambda = -Sloc2[icc] * (-this->lambda_[iat2][2])
     sc.cal_h_lambda(&h_lambda[0], Sloc2.data(), false, 1);
     std::vector<std::complex<double>> rowMajor_h_lambda1 = {
-        std::complex<double>{2.0, 0.0},
+        std::complex<double>{2.0, 0.0 },
     };
     EXPECT_DOUBLE_EQ(h_lambda[0].real(), rowMajor_h_lambda1[0].real());
     EXPECT_DOUBLE_EQ(h_lambda[0].imag(), rowMajor_h_lambda1[0].imag());
diff --git a/source/module_hamilt_lcao/module_deltaspin/test/template_helpers_test.cpp b/source/module_hamilt_lcao/module_deltaspin/test/template_helpers_test.cpp
index 1d6f409cb4..993a65184a 100644
--- a/source/module_hamilt_lcao/module_deltaspin/test/template_helpers_test.cpp
+++ b/source/module_hamilt_lcao/module_deltaspin/test/template_helpers_test.cpp
@@ -1,9 +1,9 @@
-#include "../spin_constrain.h"
+#include <algorithm>
+#include <string>
 
+#include "../spin_constrain.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
-#include <algorithm>
-#include <string>
 
 /************************************************
  *  unit test of functions in template_helpers.cpp
diff --git a/source/module_hamilt_lcao/module_dftu/dftu.h b/source/module_hamilt_lcao/module_dftu/dftu.h
index 376ce30714..db86ab26ad 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu.h
+++ b/source/module_hamilt_lcao/module_dftu/dftu.h
@@ -5,16 +5,16 @@
 #ifndef DFTU_H
 #define DFTU_H
 
-#include "module_basis/module_ao/parallel_orbitals.h"
 #include "module_cell/klist.h"
 #include "module_cell/unitcell.h"
-#include "module_elecstate/elecstate.h"
+#include "module_basis/module_ao/parallel_orbitals.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
 #include "module_elecstate/module_charge/charge_mixing.h"
-#include "module_elecstate/module_dm/density_matrix.h"
 #include "module_hamilt_general/hamilt.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/force_stress_arrays.h" // mohan add 2024-06-15
+#include "module_elecstate/elecstate.h"
 #include "module_hamilt_lcao/module_hcontainer/hcontainer.h"
+#include "module_elecstate/module_dm/density_matrix.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/force_stress_arrays.h" // mohan add 2024-06-15
 
 #include <string>
 
@@ -29,7 +29,7 @@ class DFTU
 {
 
   public:
-    DFTU();  // constructor
+    DFTU(); // constructor
     ~DFTU(); // deconstructor
 
     //=============================================================
@@ -44,25 +44,22 @@ class DFTU
 
     // calculate the energy correction
     void cal_energy_correction(const int istep);
-    double get_energy()
-    {
-        return EU;
-    }
+    double get_energy(){return EU;}
     void uramping_update(); // update U by uramping
-    bool u_converged();     // check if U is converged
+    bool u_converged(); // check if U is converged
 
-    double* U;              // U (Hubbard parameter U)
+    double* U; // U (Hubbard parameter U)
     std::vector<double> U0; // U0 (target Hubbard parameter U0)
-    int* orbital_corr;      //
-    double uramping;        // increase U by uramping, default is -1.0
-    int omc;                // occupation matrix control
-    int mixing_dftu;        // whether to mix locale
+    int* orbital_corr; //
+    double uramping; // increase U by uramping, default is -1.0
+    int omc; // occupation matrix control
+    int mixing_dftu; //whether to mix locale
 
     double EU; //+U energy
   private:
     LCAO_Matrix* LM;
     int cal_type = 3; // 1:dftu_tpye=1, dc=1; 2:dftu_type=1, dc=2; 3:dftu_tpye=2, dc=1; 4:dftu_tpye=2, dc=2;
-
+    
     // transform between iwt index and it, ia, L, N and m index
     std::vector<std::vector<std::vector<std::vector<std::vector<int>>>>>
         iatlnmipol2iwt; // iatlnm2iwt[iat][l][n][m][ipol]
@@ -72,10 +69,7 @@ class DFTU
     // For calculating contribution to Hamiltonian matrices
     //=============================================================
   public:
-    void cal_eff_pot_mat_complex(const int ik,
-                                 std::complex<double>* eff_pot,
-                                 const std::vector<int>& isk,
-                                 const std::complex<double>* sk);
+    void cal_eff_pot_mat_complex(const int ik, std::complex<double>* eff_pot, const std::vector<int>& isk, const std::complex<double>* sk);
     void cal_eff_pot_mat_real(const int ik, double* eff_pot, const std::vector<int>& isk, const double* sk);
     void cal_eff_pot_mat_R_double(const int ispin, double* SR, double* HR);
     void cal_eff_pot_mat_R_complex_double(const int ispin, std::complex<double>* SR, std::complex<double>* HR);
@@ -87,15 +81,8 @@ class DFTU
     //=============================================================
   public:
     // calculate the local occupation number matrix
-    void cal_occup_m_k(const int iter,
-                       const std::vector<std::vector<std::complex<double>>>& dm_k,
-                       const K_Vectors& kv,
-                       const double& mixing_beta,
-                       hamilt::Hamilt<std::complex<double>>* p_ham);
-    void cal_occup_m_gamma(const int iter,
-                           const std::vector<std::vector<double>>& dm_gamma,
-                           const double& mixing_beta,
-                           hamilt::Hamilt<double>* p_ham);
+    void cal_occup_m_k(const int iter, const std::vector<std::vector<std::complex<double>>>& dm_k, const K_Vectors& kv, const double& mixing_beta, hamilt::Hamilt<std::complex<double>>* p_ham);
+    void cal_occup_m_gamma(const int iter, const std::vector<std::vector<double>>& dm_gamma, const double& mixing_beta, hamilt::Hamilt<double>* p_ham);
 
     // dftu can be calculated only after locale has been initialed
     bool initialed_locale = false;
@@ -105,14 +92,13 @@ class DFTU
     void zero_locale();
     void mix_locale(const double& mixing_beta);
 
-  public:
+public:
     // local occupancy matrix of the correlated subspace
     // locale: the out put local occupation number matrix of correlated electrons in the current electronic step
     // locale_save: the input local occupation number matrix of correlated electrons in the current electronic step
     std::vector<std::vector<std::vector<std::vector<ModuleBase::matrix>>>> locale; // locale[iat][l][n][spin](m1,m2)
-    std::vector<std::vector<std::vector<std::vector<ModuleBase::matrix>>>>
-        locale_save; // locale_save[iat][l][n][spin](m1,m2)
-  private:
+    std::vector<std::vector<std::vector<std::vector<ModuleBase::matrix>>>> locale_save; // locale_save[iat][l][n][spin](m1,m2)
+private:
     //=============================================================
     // In dftu_tools.cpp
     // For calculating onsite potential, which is used
@@ -136,80 +122,89 @@ class DFTU
     // Subroutines for folding S and dS matrix
     //=============================================================
 
-    void fold_dSR_gamma(const UnitCell& ucell,
-                        const Parallel_Orbitals& pv,
-                        Grid_Driver* gd,
-                        double* dsloc_x,
-                        double* dsloc_y,
-                        double* dsloc_z,
-                        double* dh_r,
-                        const int dim1,
-                        const int dim2,
-                        double* dSR_gamma);
+	void fold_dSR_gamma(
+			const UnitCell &ucell,
+			const Parallel_Orbitals &pv,
+			Grid_Driver* gd,
+			double* dsloc_x,
+			double* dsloc_y,
+			double* dsloc_z,
+			double* dh_r,
+			const int dim1, 
+			const int dim2, 
+			double* dSR_gamma);
 
     // dim = 0 : S, for Hamiltonian
     // dim = 1-3 : dS, for force
     // dim = 4-6 : dS * dR, for stress
 
-    void folding_matrix_k(ForceStressArrays& fsr,
-                          const Parallel_Orbitals& pv,
-                          const int ik,
-                          const int dim1,
-                          const int dim2,
-                          std::complex<double>* mat_k,
-                          const std::vector<ModuleBase::Vector3<double>>& kvec_d);
+    void folding_matrix_k(
+        ForceStressArrays &fsr,
+        const Parallel_Orbitals &pv,
+		const int ik, 
+		const int dim1, 
+		const int dim2, 
+		std::complex<double>* mat_k, 
+		const std::vector<ModuleBase::Vector3<double>> &kvec_d);
+
 
     /**
      * @brief new function of folding_S_matrix
      * only for Hamiltonian now, for force and stress will be developed later
      * use HContainer as input and output in mat_k
-     */
-    void folding_matrix_k_new(const int ik, hamilt::Hamilt<std::complex<double>>* p_ham);
+    */
+   void folding_matrix_k_new(const int ik, hamilt::Hamilt<std::complex<double>>* p_ham);
 
     //=============================================================
     // In dftu_force.cpp
     // For calculating force and stress fomr DFT+U
     //=============================================================
   public:
-    void force_stress(const elecstate::ElecState* pelec,
-                      LCAO_Matrix& lm,
-                      const Parallel_Orbitals& pv,
-                      ForceStressArrays& fsr,
-                      ModuleBase::matrix& force_dftu,
-                      ModuleBase::matrix& stress_dftu,
-                      const K_Vectors& kv);
+
+   void force_stress(const elecstate::ElecState* pelec,
+		   LCAO_Matrix& lm,
+		   const Parallel_Orbitals& pv,
+		   ForceStressArrays& fsr,
+		   ModuleBase::matrix& force_dftu,
+		   ModuleBase::matrix& stress_dftu,
+		   const K_Vectors& kv);
 
   private:
-    void cal_force_k(ForceStressArrays& fsr,
-                     const Parallel_Orbitals& pv,
-                     const int ik,
-                     const std::complex<double>* rho_VU,
-                     ModuleBase::matrix& force_dftu,
-                     const std::vector<ModuleBase::Vector3<double>>& kvec_d);
-
-    void cal_stress_k(ForceStressArrays& fsr,
-                      const Parallel_Orbitals& pv,
-                      const int ik,
-                      const std::complex<double>* rho_VU,
-                      ModuleBase::matrix& stress_dftu,
-                      const std::vector<ModuleBase::Vector3<double>>& kvec_d);
-
-    void cal_force_gamma(const double* rho_VU,
-                         const Parallel_Orbitals& pv,
-                         double* dsloc_x,
-                         double* dsloc_y,
-                         double* dsloc_z,
-                         ModuleBase::matrix& force_dftu);
-
-    void cal_stress_gamma(const UnitCell& ucell,
-                          const Parallel_Orbitals& pv,
-                          Grid_Driver* gd,
-                          double* dsloc_x,
-                          double* dsloc_y,
-                          double* dsloc_z,
-                          double* dh_r,
-                          const double* rho_VU,
-                          ModuleBase::matrix& stress_dftu);
+
+   void cal_force_k(
+		   ForceStressArrays &fsr,
+		   const Parallel_Orbitals &pv,
+		   const int ik,
+		   const std::complex<double>* rho_VU,
+		   ModuleBase::matrix& force_dftu,
+		   const std::vector<ModuleBase::Vector3<double>>& kvec_d);
+
+    void cal_stress_k(
+			ForceStressArrays &fsr,
+			const Parallel_Orbitals &pv,
+			const int ik,
+			const std::complex<double>* rho_VU,
+			ModuleBase::matrix& stress_dftu,
+			const std::vector<ModuleBase::Vector3<double>>& kvec_d);
+
+	void cal_force_gamma(
+			const double* rho_VU, 
+			const Parallel_Orbitals &pv,
+			double* dsloc_x,
+			double* dsloc_y,
+			double* dsloc_z,
+			ModuleBase::matrix& force_dftu);
+
+	void cal_stress_gamma(
+			const UnitCell &ucell,
+			const Parallel_Orbitals &pv,
+			Grid_Driver* gd,
+			double* dsloc_x,
+			double* dsloc_y,
+			double* dsloc_z,
+			double* dh_r,
+			const double* rho_VU, 
+			ModuleBase::matrix& stress_dftu);
 
     //=============================================================
     // In dftu_io.cpp
@@ -219,7 +214,7 @@ class DFTU
     void output();
 
   private:
-    void write_occup_m(std::ofstream& ofs, bool diag = false);
+    void write_occup_m(std::ofstream& ofs, bool diag=false);
     void read_occup_m(const std::string& fn);
     void local_occup_bcast();
 
@@ -233,10 +228,10 @@ class DFTU
     void cal_slater_UJ(double** rho, const int& nrxx);
 
   private:
-    double lambda;                                                 // the parameter in Yukawa potential
+    double lambda; // the parameter in Yukawa potential
     std::vector<std::vector<std::vector<std::vector<double>>>> Fk; // slater integral:Fk[T][L][N][k]
-    std::vector<std::vector<std::vector<double>>> U_Yukawa;        // U_Yukawa[T][L][N]
-    std::vector<std::vector<std::vector<double>>> J_Yukawa;        // J_Yukawa[T][L][N]
+    std::vector<std::vector<std::vector<double>>> U_Yukawa; // U_Yukawa[T][L][N]
+    std::vector<std::vector<std::vector<double>>> J_Yukawa; // J_Yukawa[T][L][N]
 
     void cal_slater_Fk(const int L, const int T); // L:angular momnet, T:atom type
     void cal_yukawa_lambda(double** rho, const int& nrxx);
@@ -249,15 +244,15 @@ class DFTU
      * @brief get the density matrix of target spin
      * nspin = 1 and 4 : ispin should be 0
      * nspin = 2 : ispin should be 0/1
-     */
+    */
     const hamilt::HContainer<double>* get_dmr(int ispin) const;
     /**
      * @brief set the density matrix for DFT+U calculation
      * if the density matrix is not set or set to nullptr, the DFT+U calculation will not be performed
-     */
+    */
     void set_dmr(const elecstate::DensityMatrix<double, double>* dm_in_dftu_d);
     void set_dmr(const elecstate::DensityMatrix<std::complex<double>, double>* dm_in_dftu_cd);
-
+  
   private:
     const elecstate::DensityMatrix<double, double>* dm_in_dftu_d = nullptr;
     const elecstate::DensityMatrix<std::complex<double>, double>* dm_in_dftu_cd = nullptr;
@@ -267,6 +262,6 @@ class DFTU
 
 namespace GlobalC
 {
-extern ModuleDFTU::DFTU dftu;
+	extern ModuleDFTU::DFTU dftu;
 }
 #endif
diff --git a/source/module_hamilt_lcao/module_dftu/dftu_folding.cpp b/source/module_hamilt_lcao/module_dftu/dftu_folding.cpp
index 70c3800089..0abaac6ef7 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu_folding.cpp
+++ b/source/module_hamilt_lcao/module_dftu/dftu_folding.cpp
@@ -1,42 +1,43 @@
 #include "dftu.h"
 #include "module_base/timer.h"
+#include "module_hamilt_pw/hamilt_pwdft/global.h"
 #include "module_cell/module_neighbor/sltk_grid_driver.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h"
 #include "module_hamilt_lcao/module_hcontainer/hcontainer.h"
 #include "module_hamilt_lcao/module_hcontainer/hcontainer_funcs.h"
-#include "module_hamilt_pw/hamilt_pwdft/global.h"
 
 namespace ModuleDFTU
 {
 
-void DFTU::fold_dSR_gamma(const UnitCell& ucell,
-                          const Parallel_Orbitals& pv,
-                          Grid_Driver* gd,
-                          double* dsloc_x,
-                          double* dsloc_y,
-                          double* dsloc_z,
-                          double* dh_r,
-                          const int dim1,
-                          const int dim2,
-                          double* dSR_gamma)
+void DFTU::fold_dSR_gamma(
+    const UnitCell &ucell,
+    const Parallel_Orbitals &pv,
+    Grid_Driver* gd,
+    double* dsloc_x,
+    double* dsloc_y,
+    double* dsloc_z,
+    double* dh_r,
+    const int dim1, 
+    const int dim2, 
+    double* dSR_gamma)
 {
     ModuleBase::TITLE("DFTU", "fold_dSR_gamma");
 
     ModuleBase::GlobalFunc::ZEROS(dSR_gamma, pv.nloc);
 
     double* dS_ptr = nullptr;
-    if (dim1 == 0)
-    {
-        dS_ptr = dsloc_x;
-    }
-    else if (dim1 == 1)
-    {
-        dS_ptr = dsloc_y;
-    }
-    else if (dim1 == 2)
-    {
-        dS_ptr = dsloc_z;
-    }
+	if(dim1 == 0) 
+	{
+		dS_ptr = dsloc_x;
+	}
+	else if(dim1 == 1) 
+	{
+		dS_ptr = dsloc_y;
+	}
+	else if (dim1 == 2) 
+	{
+		dS_ptr = dsloc_z;
+	}
 
     int nnr = 0;
     ModuleBase::Vector3<double> tau1, tau2, dtau;
@@ -61,10 +62,10 @@ void DFTU::fold_dSR_gamma(const UnitCell& ucell,
                 double distance = dtau.norm() * ucell.lat0;
                 double rcut = GlobalC::ORB.Phi[T1].getRcut() + GlobalC::ORB.Phi[T2].getRcut();
                 bool adj = false;
-                if (distance < rcut)
-                {
-                    adj = true;
-                }
+				if (distance < rcut)
+				{
+					adj = true;
+				}
                 else if (distance >= rcut)
                 {
                     for (int ad0 = 0; ad0 < gd->getAdjacentNum() + 1; ++ad0)
@@ -95,53 +96,51 @@ void DFTU::fold_dSR_gamma(const UnitCell& ucell,
                         const int jj0 = jj / GlobalV::NPOL;
                         const int iw1_all = start1 + jj0;
                         const int mu = pv.global2local_row(iw1_all);
-                        if (mu < 0)
-                        {
-                            continue;
-                        }
+						if (mu < 0) 
+						{
+							continue;
+						}
 
                         for (int kk = 0; kk < atom2->nw * GlobalV::NPOL; ++kk)
                         {
                             const int kk0 = kk / GlobalV::NPOL;
                             const int iw2_all = start2 + kk0;
                             const int nu = pv.global2local_col(iw2_all);
-                            if (nu < 0)
-                            {
-                                continue;
-                            }
+							if (nu < 0) 
+							{
+								continue;
+							}
 
                             dSR_gamma[nu * pv.nrow + mu] += dS_ptr[nnr] * dh_r[nnr * 3 + dim2];
 
                             ++nnr;
                         } // kk
-                    }     // jj
-                }         // adj
-            }             // ad
-        }                 // I1
-    }                     // T1
+                    } // jj
+                } // adj
+            } // ad
+        } // I1
+    } // T1
 
     return;
 }
 
-void DFTU::folding_matrix_k(ForceStressArrays& fsr,
-                            const Parallel_Orbitals& pv,
-                            const int ik,
-                            const int dim1,
-                            const int dim2,
-                            std::complex<double>* mat_k,
-                            const std::vector<ModuleBase::Vector3<double>>& kvec_d)
+void DFTU::folding_matrix_k(
+        ForceStressArrays &fsr,
+        const Parallel_Orbitals &pv,
+		const int ik, 
+		const int dim1, 
+		const int dim2, 
+		std::complex<double>* mat_k, 
+		const std::vector<ModuleBase::Vector3<double>> &kvec_d)
 {
     ModuleBase::TITLE("DFTU", "folding_matrix_k");
     ModuleBase::timer::tick("DFTU", "folding_matrix_k");
     ModuleBase::GlobalFunc::ZEROS(mat_k, pv.nloc);
 
     double* mat_ptr;
-    if (dim1 == 1 || dim1 == 4)
-        mat_ptr = fsr.DSloc_Rx;
-    else if (dim1 == 2 || dim1 == 5)
-        mat_ptr = fsr.DSloc_Ry;
-    else if (dim1 == 3 || dim1 == 6)
-        mat_ptr = fsr.DSloc_Rz;
+    if      (dim1 == 1 || dim1 == 4) mat_ptr = fsr.DSloc_Rx;
+    else if (dim1 == 2 || dim1 == 5) mat_ptr = fsr.DSloc_Ry;
+    else if (dim1 == 3 || dim1 == 6) mat_ptr = fsr.DSloc_Rz;
 
     int nnr = 0;
     ModuleBase::Vector3<double> dtau;
@@ -228,15 +227,13 @@ void DFTU::folding_matrix_k(ForceStressArrays& fsr,
                         // the index of orbitals in this processor
                         const int iw1_all = start1 + ii;
                         const int mu = pv.global2local_row(iw1_all);
-                        if (mu < 0)
-                            continue;
+                        if (mu < 0) continue;
 
                         for (int jj = 0; jj < atom2->nw * GlobalV::NPOL; jj++)
                         {
                             int iw2_all = start2 + jj;
                             const int nu = pv.global2local_col(iw2_all);
-                            if (nu < 0)
-                                continue;
+                            if (nu < 0) continue;
 
                             int iic;
                             if (ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER())
@@ -259,18 +256,19 @@ void DFTU::folding_matrix_k(ForceStressArrays& fsr,
 
                             ++nnr;
                         } // kk
-                    }     // jj
-                }         // adj
+                    } // jj
+                } // adj
 
             } // ad
-        }     // I1
-    }         // T1
+        } // I1
+    } // T1
     ModuleBase::timer::tick("DFTU", "folding_matrix_k");
 
     return;
 }
 
-void DFTU::folding_matrix_k_new(const int ik, hamilt::Hamilt<std::complex<double>>* p_ham)
+void DFTU::folding_matrix_k_new(const int ik,
+    hamilt::Hamilt<std::complex<double>>* p_ham)
 {
     ModuleBase::TITLE("DFTU", "folding_matrix_k_new");
     ModuleBase::timer::tick("DFTU", "folding_matrix_k_new");
@@ -282,13 +280,13 @@ void DFTU::folding_matrix_k_new(const int ik, hamilt::Hamilt<std::complex<double
     }
 
     // get SR and fold to mat_k
-    if (GlobalV::GAMMA_ONLY_LOCAL)
+    if(GlobalV::GAMMA_ONLY_LOCAL)
     {
         dynamic_cast<hamilt::HamiltLCAO<double, double>*>(p_ham)->updateSk(ik, hk_type);
     }
     else
     {
-        if (GlobalV::NSPIN != 4)
+        if(GlobalV::NSPIN != 4)
         {
             dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->updateSk(ik, hk_type);
         }
@@ -299,4 +297,6 @@ void DFTU::folding_matrix_k_new(const int ik, hamilt::Hamilt<std::complex<double
     }
 }
 
+    
+
 } // namespace ModuleDFTU
diff --git a/source/module_hamilt_lcao/module_dftu/dftu_hamilt.cpp b/source/module_hamilt_lcao/module_dftu/dftu_hamilt.cpp
index abfc612954..a786e97a42 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu_hamilt.cpp
+++ b/source/module_hamilt_lcao/module_dftu/dftu_hamilt.cpp
@@ -6,10 +6,7 @@
 namespace ModuleDFTU
 {
 
-void DFTU::cal_eff_pot_mat_complex(const int ik,
-                                   std::complex<double>* eff_pot,
-                                   const std::vector<int>& isk,
-                                   const std::complex<double>* sk)
+void DFTU::cal_eff_pot_mat_complex(const int ik, std::complex<double>* eff_pot, const std::vector<int>& isk, const std::complex<double>* sk)
 {
     ModuleBase::TITLE("DFTU", "cal_eff_pot_mat");
     ModuleBase::timer::tick("DFTU", "cal_eff_pot_mat");
@@ -36,43 +33,24 @@ void DFTU::cal_eff_pot_mat_complex(const int ik,
     this->cal_VU_pot_mat_complex(spin, true, &VU[0]);
 
 #ifdef __MPI
-    pzgemm_(&transN,
-            &transN,
-            &GlobalV::NLOCAL,
-            &GlobalV::NLOCAL,
-            &GlobalV::NLOCAL,
-            &half,
-            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU),
-            &one_int,
-            &one_int,
-            this->LM->ParaV->desc,
-            sk,
-            &one_int,
-            &one_int,
-            this->LM->ParaV->desc,
+	pzgemm_(&transN, &transN,
+            &GlobalV::NLOCAL, &GlobalV::NLOCAL, &GlobalV::NLOCAL,
+            &half, 
+            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->LM->ParaV->desc,
+            sk, &one_int, &one_int, this->LM->ParaV->desc,
             &zero,
-            eff_pot,
-            &one_int,
-            &one_int,
-            this->LM->ParaV->desc);
+            eff_pot, &one_int, &one_int, this->LM->ParaV->desc);
 #endif
 
     for (int irc = 0; irc < this->LM->ParaV->nloc; irc++)
         VU[irc] = eff_pot[irc];
 
 #ifdef __MPI
-    pztranc_(&GlobalV::NLOCAL,
-             &GlobalV::NLOCAL,
-             &one,
-             &VU[0],
-             &one_int,
-             &one_int,
-             this->LM->ParaV->desc,
-             &one,
-             eff_pot,
-             &one_int,
-             &one_int,
-             this->LM->ParaV->desc);
+  	pztranc_(&GlobalV::NLOCAL, &GlobalV::NLOCAL, 
+            &one, 
+            &VU[0], &one_int, &one_int, this->LM->ParaV->desc, 
+            &one, 
+            eff_pot, &one_int, &one_int, this->LM->ParaV->desc);
 #endif
 
     ModuleBase::timer::tick("DFTU", "cal_eff_pot_mat");
@@ -104,43 +82,24 @@ void DFTU::cal_eff_pot_mat_real(const int ik, double* eff_pot, const std::vector
     this->cal_VU_pot_mat_real(spin, 1, &VU[0]);
 
 #ifdef __MPI
-    pdgemm_(&transN,
-            &transN,
-            &GlobalV::NLOCAL,
-            &GlobalV::NLOCAL,
-            &GlobalV::NLOCAL,
-            &half,
-            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU),
-            &one_int,
-            &one_int,
-            this->LM->ParaV->desc,
-            sk,
-            &one_int,
-            &one_int,
-            this->LM->ParaV->desc,
+	pdgemm_(&transN, &transN,
+            &GlobalV::NLOCAL, &GlobalV::NLOCAL, &GlobalV::NLOCAL,
+            &half, 
+            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->LM->ParaV->desc, 
+            sk, &one_int, &one_int, this->LM->ParaV->desc,
             &beta,
-            eff_pot,
-            &one_int,
-            &one_int,
-            this->LM->ParaV->desc);
+            eff_pot, &one_int, &one_int, this->LM->ParaV->desc);
 #endif
 
     for (int irc = 0; irc < this->LM->ParaV->nloc; irc++)
         VU[irc] = eff_pot[irc];
 
 #ifdef __MPI
-    pdtran_(&GlobalV::NLOCAL,
-            &GlobalV::NLOCAL,
-            &one,
-            &VU[0],
-            &one_int,
-            &one_int,
-            this->LM->ParaV->desc,
-            &one,
-            eff_pot,
-            &one_int,
-            &one_int,
-            this->LM->ParaV->desc);
+	pdtran_(&GlobalV::NLOCAL, &GlobalV::NLOCAL, 
+            &one, 
+            &VU[0], &one_int, &one_int, this->LM->ParaV->desc, 
+            &one, 
+            eff_pot, &one_int, &one_int, this->LM->ParaV->desc);
 #endif
 
     ModuleBase::timer::tick("DFTU", "cal_eff_pot_mat");
@@ -157,45 +116,21 @@ void DFTU::cal_eff_pot_mat_R_double(const int ispin, double* SR, double* HR)
     this->cal_VU_pot_mat_real(ispin, 1, &VU[0]);
 
 #ifdef __MPI
-    pdgemm_(&transN,
-            &transN,
-            &GlobalV::NLOCAL,
-            &GlobalV::NLOCAL,
-            &GlobalV::NLOCAL,
-            &half,
-            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU),
-            &one_int,
-            &one_int,
-            this->LM->ParaV->desc,
-            SR,
-            &one_int,
-            &one_int,
-            this->LM->ParaV->desc,
+    pdgemm_(&transN, &transN,
+            &GlobalV::NLOCAL, &GlobalV::NLOCAL, &GlobalV::NLOCAL,
+            &half, 
+            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->LM->ParaV->desc, 
+            SR, &one_int, &one_int, this->LM->ParaV->desc,
             &beta,
-            HR,
-            &one_int,
-            &one_int,
-            this->LM->ParaV->desc);
+            HR, &one_int, &one_int, this->LM->ParaV->desc);
 
-    pdgemm_(&transN,
-            &transN,
-            &GlobalV::NLOCAL,
-            &GlobalV::NLOCAL,
-            &GlobalV::NLOCAL,
-            &half,
-            SR,
-            &one_int,
-            &one_int,
-            this->LM->ParaV->desc,
-            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU),
-            &one_int,
-            &one_int,
-            this->LM->ParaV->desc,
+    pdgemm_(&transN, &transN,
+            &GlobalV::NLOCAL, &GlobalV::NLOCAL, &GlobalV::NLOCAL,
+            &half, 
+            SR, &one_int, &one_int, this->LM->ParaV->desc, 
+            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->LM->ParaV->desc,
             &one,
-            HR,
-            &one_int,
-            &one_int,
-            this->LM->ParaV->desc);
+            HR, &one_int, &one_int, this->LM->ParaV->desc);
 #endif
 
     return;
@@ -211,48 +146,24 @@ void DFTU::cal_eff_pot_mat_R_complex_double(const int ispin, std::complex<double
     this->cal_VU_pot_mat_complex(ispin, 1, &VU[0]);
 
 #ifdef __MPI
-    pzgemm_(&transN,
-            &transN,
-            &GlobalV::NLOCAL,
-            &GlobalV::NLOCAL,
-            &GlobalV::NLOCAL,
-            &half,
-            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU),
-            &one_int,
-            &one_int,
-            this->LM->ParaV->desc,
-            SR,
-            &one_int,
-            &one_int,
-            this->LM->ParaV->desc,
+    pzgemm_(&transN, &transN,
+            &GlobalV::NLOCAL, &GlobalV::NLOCAL, &GlobalV::NLOCAL,
+            &half, 
+            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->LM->ParaV->desc,
+            SR, &one_int, &one_int, this->LM->ParaV->desc,
             &zero,
-            HR,
-            &one_int,
-            &one_int,
-            this->LM->ParaV->desc);
+            HR, &one_int, &one_int, this->LM->ParaV->desc);
 
-    pzgemm_(&transN,
-            &transN,
-            &GlobalV::NLOCAL,
-            &GlobalV::NLOCAL,
-            &GlobalV::NLOCAL,
-            &half,
-            SR,
-            &one_int,
-            &one_int,
-            this->LM->ParaV->desc,
-            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU),
-            &one_int,
-            &one_int,
-            this->LM->ParaV->desc,
+    pzgemm_(&transN, &transN,
+            &GlobalV::NLOCAL, &GlobalV::NLOCAL, &GlobalV::NLOCAL,
+            &half, 
+            SR, &one_int, &one_int, this->LM->ParaV->desc, 
+            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->LM->ParaV->desc,
             &one,
-            HR,
-            &one_int,
-            &one_int,
-            this->LM->ParaV->desc);
+            HR, &one_int, &one_int, this->LM->ParaV->desc);
 #endif
 
     return;
 }
 
-} // namespace ModuleDFTU
\ No newline at end of file
+}
\ No newline at end of file
diff --git a/source/module_hamilt_lcao/module_dftu/dftu_occup.cpp b/source/module_hamilt_lcao/module_dftu/dftu_occup.cpp
index 358b3e3029..da8d93a8b7 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu_occup.cpp
+++ b/source/module_hamilt_lcao/module_dftu/dftu_occup.cpp
@@ -1,51 +1,29 @@
 #include "dftu.h"
 #include "module_base/timer.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h"
 #include "module_hamilt_pw/hamilt_pwdft/global.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h"
 
 extern "C"
 {
-    // I'm not sure what's happenig here, but the interface in scalapack_connecter.h
-    // does not seem to work, so I'll use this one here
-    void pzgemm_(const char* transa,
-                 const char* transb,
-                 const int* M,
-                 const int* N,
-                 const int* K,
-                 const std::complex<double>* alpha,
-                 const std::complex<double>* A,
-                 const int* IA,
-                 const int* JA,
-                 const int* DESCA,
-                 const std::complex<double>* B,
-                 const int* IB,
-                 const int* JB,
-                 const int* DESCB,
-                 const std::complex<double>* beta,
-                 std::complex<double>* C,
-                 const int* IC,
-                 const int* JC,
-                 const int* DESCC);
-
-    void pdgemm_(const char* transa,
-                 const char* transb,
-                 const int* M,
-                 const int* N,
-                 const int* K,
-                 const double* alpha,
-                 const double* A,
-                 const int* IA,
-                 const int* JA,
-                 const int* DESCA,
-                 const double* B,
-                 const int* IB,
-                 const int* JB,
-                 const int* DESCB,
-                 const double* beta,
-                 double* C,
-                 const int* IC,
-                 const int* JC,
-                 const int* DESCC);
+  //I'm not sure what's happenig here, but the interface in scalapack_connecter.h
+  //does not seem to work, so I'll use this one here
+  void pzgemm_(
+		const char *transa, const char *transb,
+		const int *M, const int *N, const int *K,
+		const std::complex<double> *alpha,
+		const std::complex<double> *A, const int *IA, const int *JA, const int *DESCA,
+		const std::complex<double> *B, const int *IB, const int *JB, const int *DESCB,
+		const std::complex<double> *beta,
+		std::complex<double> *C, const int *IC, const int *JC, const int *DESCC);
+  
+  void pdgemm_(
+		const char *transa, const char *transb,
+		const int *M, const int *N, const int *K,
+		const double *alpha,
+		const double *A, const int *IA, const int *JA, const int *DESCA,
+		const double *B, const int *IB, const int *JB, const int *DESCB,
+		const double *beta,
+		double *C, const int *IC, const int *JC, const int *DESCC);
 }
 
 namespace ModuleDFTU
@@ -93,8 +71,7 @@ void DFTU::zero_locale()
 
     for (int T = 0; T < GlobalC::ucell.ntype; T++)
     {
-        if (orbital_corr[T] == -1)
-            continue;
+        if (orbital_corr[T] == -1) continue;
 
         for (int I = 0; I < GlobalC::ucell.atoms[T].na; I++)
         {
@@ -146,12 +123,12 @@ void DFTU::mix_locale(const double& mixing_beta)
                 {
                     if (GlobalV::NSPIN == 4)
                     {
-                        locale[iat][l][n][0] = locale[iat][l][n][0] * beta + locale_save[iat][l][n][0] * (1.0 - beta);
+                        locale[iat][l][n][0] = locale[iat][l][n][0]*beta + locale_save[iat][l][n][0]*(1.0-beta);
                     }
                     else if (GlobalV::NSPIN == 1 || GlobalV::NSPIN == 2)
                     {
-                        locale[iat][l][n][0] = locale[iat][l][n][0] * beta + locale_save[iat][l][n][0] * (1.0 - beta);
-                        locale[iat][l][n][1] = locale[iat][l][n][1] * beta + locale_save[iat][l][n][1] * (1.0 - beta);
+                        locale[iat][l][n][0] = locale[iat][l][n][0] * beta + locale_save[iat][l][n][0] * (1.0-beta);
+                        locale[iat][l][n][1] = locale[iat][l][n][1] * beta + locale_save[iat][l][n][1] * (1.0-beta);
                     }
                 }
             }
@@ -160,11 +137,11 @@ void DFTU::mix_locale(const double& mixing_beta)
     ModuleBase::timer::tick("DFTU", "mix_locale");
 }
 
-void DFTU::cal_occup_m_k(const int iter,
-                         const std::vector<std::vector<std::complex<double>>>& dm_k,
-                         const K_Vectors& kv,
-                         const double& mixing_beta,
-                         hamilt::Hamilt<std::complex<double>>* p_ham)
+void DFTU::cal_occup_m_k(const int iter, 
+                        const std::vector<std::vector<std::complex<double>>>& dm_k,
+                        const K_Vectors& kv,
+                        const double& mixing_beta,
+                        hamilt::Hamilt<std::complex<double>>* p_ham)
 {
     ModuleBase::TITLE("DFTU", "cal_occup_m_k");
     ModuleBase::timer::tick("DFTU", "cal_occup_m_k");
@@ -176,7 +153,7 @@ void DFTU::cal_occup_m_k(const int iter,
     // call SCALAPACK routine to calculate the product of the S and density matrix
     const char transN = 'N', transT = 'T';
     const int one_int = 1;
-    const std::complex<double> beta(0.0, 0.0), alpha(1.0, 0.0);
+    const std::complex<double> beta(0.0,0.0), alpha(1.0,0.0);
 
     std::vector<std::complex<double>> srho(this->LM->ParaV->nloc);
 
@@ -185,7 +162,7 @@ void DFTU::cal_occup_m_k(const int iter,
         // srho(mu,nu) = \sum_{iw} S(mu,iw)*dm_k(iw,nu)
         this->folding_matrix_k_new(ik, p_ham);
         std::complex<double>* s_k_pointer = nullptr;
-        if (GlobalV::NSPIN != 4)
+        if(GlobalV::NSPIN != 4)
         {
             s_k_pointer = dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->getSk();
         }
@@ -206,7 +183,7 @@ void DFTU::cal_occup_m_k(const int iter,
                 &one_int,
                 this->LM->ParaV->desc,
                 dm_k[ik].data(),
-                // dm_k[ik].c,
+                //dm_k[ik].c,
                 &one_int,
                 &one_int,
                 this->LM->ParaV->desc,
@@ -273,14 +250,14 @@ void DFTU::cal_occup_m_k(const int iter,
                                             locale[iat][l][n][spin](m0_all, m1_all)
                                                 += (std::conj(srho[irc_prime])).real() / 4.0;
                                     } // ipol1
-                                }     // m1
-                            }         // ipol0
-                        }             // m0
-                    }                 // end n
-                }                     // end l
-            }                         // end ia
-        }                             // end it
-    }                                 // ik
+                                } // m1
+                            } // ipol0
+                        } // m0
+                    } // end n
+                } // end l
+            } // end ia
+        } // end it
+    } // ik
 
     for (int it = 0; it < GlobalC::ucell.ntype; it++)
     {
@@ -362,11 +339,11 @@ void DFTU::cal_occup_m_k(const int iter,
                         exit(0);
                     }
                 } // end n
-            }     // end l
-        }         // end ia
-    }             // end it
+            } // end l
+        } // end ia
+    } // end it
 
-    if (mixing_dftu && initialed_locale)
+    if(mixing_dftu && initialed_locale)
     {
         this->mix_locale(mixing_beta);
     }
@@ -376,10 +353,7 @@ void DFTU::cal_occup_m_k(const int iter,
     return;
 }
 
-void DFTU::cal_occup_m_gamma(const int iter,
-                             const std::vector<std::vector<double>>& dm_gamma,
-                             const double& mixing_beta,
-                             hamilt::Hamilt<double>* p_ham)
+void DFTU::cal_occup_m_gamma(const int iter, const std::vector<std::vector<double>> &dm_gamma, const double& mixing_beta, hamilt::Hamilt<double>* p_ham)
 {
     ModuleBase::TITLE("DFTU", "cal_occup_m_gamma");
     ModuleBase::timer::tick("DFTU", "cal_occup_m_gamma");
@@ -410,7 +384,7 @@ void DFTU::cal_occup_m_gamma(const int iter,
                 &one_int,
                 this->LM->ParaV->desc,
                 dm_gamma[is].data(),
-                // dm_gamma[is].c,
+                //dm_gamma[is].c,
                 &one_int,
                 &one_int,
                 this->LM->ParaV->desc,
@@ -512,12 +486,12 @@ void DFTU::cal_occup_m_gamma(const int iter,
                         }
 
                     } // end for(n)
-                }     // L
-            }         // ia
-        }             // it
-    }                 // is
+                } // L
+            } // ia
+        } // it
+    } // is
 
-    if (mixing_dftu && initialed_locale)
+    if(mixing_dftu && initialed_locale)
     {
         this->mix_locale(mixing_beta);
     }
diff --git a/source/module_hamilt_lcao/module_hcontainer/hcontainer.h b/source/module_hamilt_lcao/module_hcontainer/hcontainer.h
index 9ac7ad048e..79105ce340 100644
--- a/source/module_hamilt_lcao/module_hcontainer/hcontainer.h
+++ b/source/module_hamilt_lcao/module_hcontainer/hcontainer.h
@@ -423,10 +423,7 @@ class HContainer
      * @brief get parallel orbital pointer to check parallel information
      * @return const Parallel_Orbitals* , if return is nullptr, it means HContainer is not in parallel mode
      */
-    const Parallel_Orbitals* get_paraV() const
-    {
-        return paraV;
-    }
+    const Parallel_Orbitals* get_paraV() const{return paraV;}
 
   private:
     // i-j atom pairs, sorted by matrix of (atom_i, atom_j)
diff --git a/source/module_io/output_sk.cpp b/source/module_io/output_sk.cpp
index ea3da14e31..96c0663579 100644
--- a/source/module_io/output_sk.cpp
+++ b/source/module_io/output_sk.cpp
@@ -30,7 +30,8 @@ std::complex<double>* Output_Sk<std::complex<double>>::get_Sk(int ik)
     }
     if (this->nspin_ == 4)
     {
-        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(this->p_hamilt_)->updateSk(ik, 1);
+        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(this->p_hamilt_)
+            ->updateSk(ik, 1);
         return dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(this->p_hamilt_)->getSk();
     }
     else
diff --git a/source/module_io/write_dos_lcao.cpp b/source/module_io/write_dos_lcao.cpp
index a8b2400142..81c30a9462 100644
--- a/source/module_io/write_dos_lcao.cpp
+++ b/source/module_io/write_dos_lcao.cpp
@@ -448,10 +448,8 @@ void ModuleIO::write_dos_lcao(const psi::Psi<std::complex<double>>* psi,
                 const std::complex<double>* sk = nullptr;
                 if (GlobalV::NSPIN == 4)
                 {
-                    dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->updateSk(ik,
-                                                                                                                   1);
-                    sk = dynamic_cast<const hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)
-                             ->getSk();
+                    dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->updateSk(ik, 1);
+                    sk = dynamic_cast<const hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->getSk();
                 }
                 else
                 {
diff --git a/source/module_io/write_proj_band_lcao.cpp b/source/module_io/write_proj_band_lcao.cpp
index 44a386a526..32425dc130 100644
--- a/source/module_io/write_proj_band_lcao.cpp
+++ b/source/module_io/write_proj_band_lcao.cpp
@@ -5,16 +5,17 @@
 #include "module_base/scalapack_connector.h"
 #include "module_base/timer.h"
 #include "module_cell/module_neighbor/sltk_atom_arrange.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h"
 #include "write_orb_info.h"
+#include "module_hamilt_lcao/hamilt_lcaodft/hamilt_lcao.h"
 
-template <>
-void ModuleIO::write_proj_band_lcao(const psi::Psi<double>* psi,
-                                    LCAO_Matrix& lm,
-                                    const elecstate::ElecState* pelec,
-                                    const K_Vectors& kv,
-                                    const UnitCell& ucell,
-                                    hamilt::Hamilt<double>* p_ham)
+template<>
+void ModuleIO::write_proj_band_lcao(
+    const psi::Psi<double>* psi,
+    LCAO_Matrix& lm,
+    const elecstate::ElecState* pelec,
+    const K_Vectors& kv,
+    const UnitCell &ucell,
+    hamilt::Hamilt<double>* p_ham)
 {
     ModuleBase::TITLE("ModuleIO", "write_proj_band_lcao");
     ModuleBase::timer::tick("ModuleIO", "write_proj_band_lcao");
@@ -42,6 +43,7 @@ void ModuleIO::write_proj_band_lcao(const psi::Psi<double>* psi,
     weightk.create(nspin0, GlobalV::NBANDS * GlobalV::NLOCAL, true);
     weight.create(nspin0, GlobalV::NBANDS * GlobalV::NLOCAL, true);
 
+
     for (int is = 0; is < nspin0; is++)
     {
         std::vector<ModuleBase::matrix> Mulk;
@@ -58,24 +60,24 @@ void ModuleIO::write_proj_band_lcao(const psi::Psi<double>* psi,
 #ifdef __MPI
             const char T_char = 'T';
             pdgemv_(&T_char,
-                    &GlobalV::NLOCAL,
-                    &GlobalV::NLOCAL,
-                    &one_float,
-                    sk,
-                    &one_int,
-                    &one_int,
-                    pv->desc,
-                    psi->get_pointer(),
-                    &one_int,
-                    &NB,
-                    pv->desc,
-                    &one_int,
-                    &zero_float,
-                    Mulk[0].c,
-                    &one_int,
-                    &NB,
-                    pv->desc,
-                    &one_int);
+                &GlobalV::NLOCAL,
+                &GlobalV::NLOCAL,
+                &one_float,
+                sk,
+                &one_int,
+                &one_int,
+                pv->desc,
+                psi->get_pointer(),
+                &one_int,
+                &NB,
+                pv->desc,
+                &one_int,
+                &zero_float,
+                Mulk[0].c,
+                &one_int,
+                &NB,
+                pv->desc,
+                &one_int);
 #endif
             for (int j = 0; j < GlobalV::NLOCAL; ++j)
             {
@@ -144,14 +146,14 @@ void ModuleIO::write_proj_band_lcao(const psi::Psi<double>* psi,
                             int w0 = w - s0;
                             out << std::setw(13)
                                 << weight(is, ib * GlobalV::NLOCAL + s0 + 2 * w0)
-                                       + weight(is, ib * GlobalV::NLOCAL + s0 + 2 * w0 + 1);
+                                + weight(is, ib * GlobalV::NLOCAL + s0 + 2 * w0 + 1);
                         }
                     }
                     out << std::endl;
                     out << "</data>" << std::endl;
                     out << "</orbital>" << std::endl;
                 } // j
-            }     // i
+            } // i
 
             out << "</pband>" << std::endl;
             out.close();
@@ -163,13 +165,14 @@ void ModuleIO::write_proj_band_lcao(const psi::Psi<double>* psi,
     return;
 }
 
-template <>
-void ModuleIO::write_proj_band_lcao(const psi::Psi<std::complex<double>>* psi,
-                                    LCAO_Matrix& lm,
-                                    const elecstate::ElecState* pelec,
-                                    const K_Vectors& kv,
-                                    const UnitCell& ucell,
-                                    hamilt::Hamilt<std::complex<double>>* p_ham)
+template<>
+void ModuleIO::write_proj_band_lcao(
+    const psi::Psi<std::complex<double>>* psi,
+    LCAO_Matrix& lm,
+    const elecstate::ElecState* pelec,
+    const K_Vectors& kv,
+    const UnitCell& ucell,
+    hamilt::Hamilt<std::complex<double>>* p_ham)
 {
     ModuleBase::TITLE("ModuleIO", "write_proj_band_lcao");
     ModuleBase::timer::tick("ModuleIO", "write_proj_band_lcao");
@@ -197,48 +200,46 @@ void ModuleIO::write_proj_band_lcao(const psi::Psi<std::complex<double>>* psi,
 
     for (int is = 0; is < nspin0; is++)
     {
-        std::vector<ModuleBase::ComplexMatrix> Mulk;
-        Mulk.resize(1);
-        Mulk[0].create(pv->ncol, pv->nrow);
+            std::vector<ModuleBase::ComplexMatrix> Mulk;
+            Mulk.resize(1);
+            Mulk[0].create(pv->ncol, pv->nrow);
 
-        for (int ik = 0; ik < kv.get_nks(); ik++)
-        {
-            if (is == kv.isk[ik])
+            for (int ik = 0; ik < kv.get_nks(); ik++)
             {
-                // calculate SK for current k point
-                const std::complex<double>* sk = nullptr;
-                if (GlobalV::NSPIN == 4)
-                {
-                    dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->updateSk(ik,
-                                                                                                                   1);
-                    sk = dynamic_cast<const hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)
-                             ->getSk();
-                }
-                else
+                if (is == kv.isk[ik])
                 {
-                    dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->updateSk(ik, 1);
-                    sk = dynamic_cast<const hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->getSk();
-                }
+                    // calculate SK for current k point
+                    const std::complex<double>* sk = nullptr;
+                    if (GlobalV::NSPIN == 4)
+                    {
+                        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->updateSk(ik, 1);
+                        sk = dynamic_cast<const hamilt::HamiltLCAO<std::complex<double>, std::complex<double>>*>(p_ham)->getSk();
+                    }
+                    else
+                    {
+                        dynamic_cast<hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->updateSk(ik, 1);
+                        sk = dynamic_cast<const hamilt::HamiltLCAO<std::complex<double>, double>*>(p_ham)->getSk();
+                    }
 
-                // calculate Mulk
-                psi->fix_k(ik);
-                psi::Psi<std::complex<double>> Dwfc(psi[0], 1);
-                std::complex<double>* p_dwfc = Dwfc.get_pointer();
-                for (int index = 0; index < Dwfc.size(); ++index)
-                {
-                    p_dwfc[index] = conj(p_dwfc[index]);
-                }
+                    // calculate Mulk
+                    psi->fix_k(ik);
+                    psi::Psi<std::complex<double>> Dwfc(psi[0], 1);
+                    std::complex<double>* p_dwfc = Dwfc.get_pointer();
+                    for (int index = 0; index < Dwfc.size(); ++index)
+                    {
+                        p_dwfc[index] = conj(p_dwfc[index]);
+                    }
 
-                for (int i = 0; i < GlobalV::NBANDS; ++i)
-                {
-                    const int NB = i + 1;
+                    for (int i = 0; i < GlobalV::NBANDS; ++i)
+                    {
+                        const int NB = i + 1;
 
-                    const double one_float[2] = {1.0, 0.0};
-                    const double zero_float[2] = {0.0, 0.0};
-                    const int one_int = 1;
-                    const char T_char = 'T';
+                        const double one_float[2] = { 1.0, 0.0 };
+                        const double zero_float[2] = { 0.0, 0.0 };
+                        const int one_int = 1;
+                        const char T_char = 'T';
 #ifdef __MPI
-                    pzgemv_(&T_char,
+                        pzgemv_(&T_char,
                             &GlobalV::NLOCAL,
                             &GlobalV::NLOCAL,
                             &one_float[0],
@@ -258,23 +259,23 @@ void ModuleIO::write_proj_band_lcao(const psi::Psi<std::complex<double>>* psi,
                             pv->desc,
                             &one_int);
 #endif
-                    for (int j = 0; j < GlobalV::NLOCAL; ++j)
-                    {
-
-                        if (pv->in_this_processor(j, i))
+                        for (int j = 0; j < GlobalV::NLOCAL; ++j)
                         {
 
-                            const int ir = pv->global2local_row(j);
-                            const int ic = pv->global2local_col(i);
+                            if (pv->in_this_processor(j, i))
+                            {
 
-                            weightk(ik, i * GlobalV::NLOCAL + j) = Mulk[0](ic, ir) * psi[0](ic, ir);
+                                const int ir = pv->global2local_row(j);
+                                const int ic = pv->global2local_col(i);
+
+                                weightk(ik, i * GlobalV::NLOCAL + j) = Mulk[0](ic, ir) * psi[0](ic, ir);
+                            }
                         }
-                    }
 
-                } // ib
+                    } // ib
 
-            } // if
-        }     // ik
+                } // if
+            } // ik
 #ifdef __MPI
         MPI_Reduce(weightk.real().c, weight.c, NUM, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
 #endif
@@ -300,12 +301,12 @@ void ModuleIO::write_proj_band_lcao(const psi::Psi<std::complex<double>>* psi,
             out << "<band_structure nkpoints=\"" << nks << "\" nbands=\"" << GlobalV::NBANDS << "\" units=\"eV\">"
                 << std::endl;
 
-            for (int ik = 0; ik < nks; ik++)
-            {
-                for (int ib = 0; ib < GlobalV::NBANDS; ib++)
-                    out << " " << (pelec->ekb(ik + is * nks, ib)) * ModuleBase::Ry_to_eV;
-                out << std::endl;
-            }
+			for (int ik = 0; ik < nks; ik++)
+			{
+				for (int ib = 0; ib < GlobalV::NBANDS; ib++)
+					out << " " << (pelec->ekb(ik + is * nks, ib)) * ModuleBase::Ry_to_eV;
+				out << std::endl;
+			}
             out << "</band_structure>" << std::endl;
 
             for (int i = 0; i < ucell.nat; i++)
@@ -331,32 +332,32 @@ void ModuleIO::write_proj_band_lcao(const psi::Psi<std::complex<double>>* psi,
                     out << std::setw(2) << "z=\"" << std::setw(40) << N1 + 1 << "\"" << std::endl;
                     out << ">" << std::endl;
                     out << "<data>" << std::endl;
-                    for (int ik = 0; ik < nks; ik++)
-                    {
-                        for (int ib = 0; ib < GlobalV::NBANDS; ib++)
-                        {
-                            if (GlobalV::NSPIN == 1)
-                            {
-                                out << std::setw(13) << weight(ik, ib * GlobalV::NLOCAL + w);
-                            }
-                            else if (GlobalV::NSPIN == 2)
-                            {
-                                out << std::setw(13) << weight(ik + nks * is, ib * GlobalV::NLOCAL + w);
-                            }
-                            else if (GlobalV::NSPIN == 4)
-                            {
-                                int w0 = w - s0;
-                                out << std::setw(13)
-                                    << weight(ik, ib * GlobalV::NLOCAL + s0 + 2 * w0)
-                                           + weight(ik, ib * GlobalV::NLOCAL + s0 + 2 * w0 + 1);
-                            }
-                        }
-                        out << std::endl;
-                    }
+					for (int ik = 0; ik < nks; ik++)
+					{
+						for (int ib = 0; ib < GlobalV::NBANDS; ib++)
+						{
+							if (GlobalV::NSPIN == 1)
+							{
+								out << std::setw(13) << weight(ik, ib * GlobalV::NLOCAL + w);
+							}
+							else if (GlobalV::NSPIN == 2)
+							{
+								out << std::setw(13) << weight(ik + nks * is, ib * GlobalV::NLOCAL + w);
+							}
+							else if (GlobalV::NSPIN == 4)
+							{
+								int w0 = w - s0;
+								out << std::setw(13)
+									<< weight(ik, ib * GlobalV::NLOCAL + s0 + 2 * w0)
+									+ weight(ik, ib * GlobalV::NLOCAL + s0 + 2 * w0 + 1);
+							}
+						}
+						out << std::endl;
+					}
                     out << "</data>" << std::endl;
                     out << "</orbital>" << std::endl;
                 } // j
-            }     // i
+            } // i
 
             out << "</pband>" << std::endl;
             out.close();
diff --git a/source/module_ri/RI_2D_Comm.h b/source/module_ri/RI_2D_Comm.h
index 768c9171d6..d4be823ddf 100644
--- a/source/module_ri/RI_2D_Comm.h
+++ b/source/module_ri/RI_2D_Comm.h
@@ -7,63 +7,65 @@
 #define RI_2D_COMM_H
 
 #include "module_basis/module_ao/parallel_orbitals.h"
-#include "module_cell/klist.h"
 #include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
+#include "module_cell/klist.h"
 
 #include <RI/global/Tensor.h>
+
 #include <array>
-#include <deque>
+#include <vector>
+#include <tuple>
 #include <map>
 #include <set>
-#include <tuple>
-#include <vector>
+#include <deque>
 
 namespace RI_2D_Comm
 {
-using TA = int;
-using Tcell = int;
-static const size_t Ndim = 3;
-using TC = std::array<Tcell, Ndim>;
-using TAC = std::pair<TA, TC>;
+	using TA = int;
+	using Tcell = int;
+	static const size_t Ndim = 3;
+	using TC = std::array<Tcell,Ndim>;
+	using TAC = std::pair<TA,TC>;
 
-// public:
-template <typename Tdata, typename Tmatrix>
-extern std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>> split_m2D_ktoR(
-    const K_Vectors& kv,
-    const std::vector<const Tmatrix*>& mks_2D,
-    const Parallel_Orbitals& pv);
+//public:
+	template<typename Tdata, typename Tmatrix>
+	extern std::vector<std::map<TA,std::map<TAC,RI::Tensor<Tdata>>>>
+	split_m2D_ktoR(const K_Vectors &kv, const std::vector<const Tmatrix*> &mks_2D, const Parallel_Orbitals &pv);
 
-// judge[is] = {s0, s1}
-extern std::vector<std::tuple<std::set<TA>, std::set<TA>>> get_2D_judge(const Parallel_Orbitals& pv);
+	// judge[is] = {s0, s1}
+	extern std::vector<std::tuple<std::set<TA>, std::set<TA>>>
+	get_2D_judge(const Parallel_Orbitals &pv);
 
-template <typename Tdata, typename TK>
-extern void add_Hexx(const K_Vectors& kv,
-                     const int ik,
-                     const double alpha,
-                     const std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>>& Hs,
-                     const Parallel_Orbitals& pv,
-                     TK* hk);
+    template<typename Tdata, typename TK>
+    extern void add_Hexx(
+        const K_Vectors& kv,
+        const int ik,
+        const double alpha,
+        const std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>>& Hs,
+        const Parallel_Orbitals& pv,
+        TK* hk);
 
-template <typename Tdata>
-extern std::vector<std::vector<Tdata>> Hexxs_to_Hk(
-    const K_Vectors& kv,
-    const Parallel_Orbitals& pv,
-    const std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>>& Hexxs,
-    const int ik);
-template <typename Tdata>
-std::vector<std::vector<Tdata>> pulay_mixing(const Parallel_Orbitals& pv,
-                                             std::deque<std::vector<std::vector<Tdata>>>& Hk_seq,
-                                             const std::vector<std::vector<Tdata>>& Hk_new,
-                                             const double mixing_beta,
-                                             const std::string mixing_mode);
+	template<typename Tdata>
+	extern std::vector<std::vector<Tdata>> Hexxs_to_Hk(
+			const K_Vectors &kv,
+			const Parallel_Orbitals &pv, 
+			const std::vector< std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>> &Hexxs,
+			const int ik);
+	template<typename Tdata>
+	std::vector<std::vector<Tdata>> pulay_mixing(
+		const Parallel_Orbitals &pv,
+		std::deque<std::vector<std::vector<Tdata>>> &Hk_seq,
+		const std::vector<std::vector<Tdata>> &Hk_new,
+		const double mixing_beta,
+		const std::string mixing_mode);
 
-// private:
-extern std::vector<int> get_ik_list(const K_Vectors& kv, const int is_k);
-extern inline std::tuple<int, int, int> get_iat_iw_is_block(const int iwt);
-extern inline int get_is_block(const int is_k, const int is_row_b, const int is_col_b);
-extern inline std::tuple<int, int> split_is_block(const int is_b);
-extern inline int get_iwt(const int iat, const int iw_b, const int is_b);
-} // namespace RI_2D_Comm
+//private:
+	extern std::vector<int> get_ik_list(const K_Vectors &kv, const int is_k);
+	extern inline std::tuple<int,int,int> get_iat_iw_is_block(const int iwt);
+	extern inline int get_is_block(const int is_k, const int is_row_b, const int is_col_b);
+	extern inline std::tuple<int,int> split_is_block(const int is_b);
+	extern inline int get_iwt(const int iat, const int iw_b, const int is_b);
+}
 
 #include "RI_2D_Comm.hpp"
 
diff --git a/source/module_ri/RI_2D_Comm.hpp b/source/module_ri/RI_2D_Comm.hpp
index 4c82e9f22a..893316f6fe 100644
--- a/source/module_ri/RI_2D_Comm.hpp
+++ b/source/module_ri/RI_2D_Comm.hpp
@@ -8,213 +8,195 @@
 
 #include "RI_2D_Comm.h"
 #include "RI_Util.h"
-#include "module_base/timer.h"
-#include "module_base/tool_title.h"
 #include "module_hamilt_pw/hamilt_pwdft/global.h"
+#include "module_base/tool_title.h"
+#include "module_base/timer.h"
 
 #include <RI/global/Global_Func-2.h>
+
 #include <cmath>
-#include <stdexcept>
 #include <string>
+#include <stdexcept>
 
-inline RI::Tensor<double> tensor_conj(const RI::Tensor<double>& t)
-{
-    return t;
-}
+inline RI::Tensor<double> tensor_conj(const RI::Tensor<double>& t) { return t; }
 inline RI::Tensor<std::complex<double>> tensor_conj(const RI::Tensor<std::complex<double>>& t)
 {
     RI::Tensor<std::complex<double>> r(t.shape);
-    for (int i = 0; i < t.data->size(); ++i)
-        (*r.data)[i] = std::conj((*t.data)[i]);
+    for (int i = 0;i < t.data->size();++i)(*r.data)[i] = std::conj((*t.data)[i]);
     return r;
 }
-template <typename Tdata, typename Tmatrix>
-auto RI_2D_Comm::split_m2D_ktoR(const K_Vectors& kv,
-                                const std::vector<const Tmatrix*>& mks_2D,
-                                const Parallel_Orbitals& pv)
-    -> std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>>
+template<typename Tdata, typename Tmatrix>
+auto RI_2D_Comm::split_m2D_ktoR(const K_Vectors &kv, const std::vector<const Tmatrix*> &mks_2D, const Parallel_Orbitals &pv)
+-> std::vector<std::map<TA,std::map<TAC,RI::Tensor<Tdata>>>>
 {
-    ModuleBase::TITLE("RI_2D_Comm", "split_m2D_ktoR");
-    ModuleBase::timer::tick("RI_2D_Comm", "split_m2D_ktoR");
-
-    const TC period = RI_Util::get_Born_vonKarmen_period(kv);
-    const std::map<int, int> nspin_k = {{1, 1}, {2, 2}, {4, 1}};
-    const double SPIN_multiple = std::map<int, double>{{1, 0.5}, {2, 1}, {4, 1}}.at(GlobalV::NSPIN); // why?
-
-    std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>> mRs_a2D(GlobalV::NSPIN);
-    for (int is_k = 0; is_k < nspin_k.at(GlobalV::NSPIN); ++is_k)
-    {
-        const std::vector<int> ik_list = RI_2D_Comm::get_ik_list(kv, is_k);
-        for (const TC& cell: RI_Util::get_Born_von_Karmen_cells(period))
-        {
-            RI::Tensor<Tdata> mR_2D;
-            for (const int ik: ik_list)
-            {
+	ModuleBase::TITLE("RI_2D_Comm","split_m2D_ktoR");
+	ModuleBase::timer::tick("RI_2D_Comm", "split_m2D_ktoR");
+
+	const TC period = RI_Util::get_Born_vonKarmen_period(kv);
+	const std::map<int,int> nspin_k = {{1,1}, {2,2}, {4,1}};
+	const double SPIN_multiple = std::map<int,double>{{1,0.5}, {2,1}, {4,1}}.at(GlobalV::NSPIN);							// why?
+
+	std::vector<std::map<TA,std::map<TAC,RI::Tensor<Tdata>>>> mRs_a2D(GlobalV::NSPIN);
+	for(int is_k=0; is_k<nspin_k.at(GlobalV::NSPIN); ++is_k)
+	{
+		const std::vector<int> ik_list = RI_2D_Comm::get_ik_list(kv, is_k);
+		for(const TC &cell : RI_Util::get_Born_von_Karmen_cells(period))
+		{
+			RI::Tensor<Tdata> mR_2D;
+			for(const int ik : ik_list)
+			{
                 using Tdata_m = typename Tmatrix::value_type;
-                RI::Tensor<Tdata_m> mk_2D
-                    = RI_Util::Vector_to_Tensor<Tdata_m>(*mks_2D[ik], pv.get_col_size(), pv.get_row_size());
-                const Tdata_m frac
-                    = SPIN_multiple
-                      * RI::Global_Func::convert<Tdata_m>(
-                          std::exp(-ModuleBase::TWO_PI * ModuleBase::IMAG_UNIT
-                                   * (kv.kvec_c[ik] * (RI_Util::array3_to_Vector3(cell) * GlobalC::ucell.latvec))));
+                RI::Tensor<Tdata_m> mk_2D = RI_Util::Vector_to_Tensor<Tdata_m>(*mks_2D[ik], pv.get_col_size(), pv.get_row_size());
+				const Tdata_m frac = SPIN_multiple
+					* RI::Global_Func::convert<Tdata_m>( std::exp(
+                        -ModuleBase::TWO_PI * ModuleBase::IMAG_UNIT * (kv.kvec_c[ik] * (RI_Util::array3_to_Vector3(cell) * GlobalC::ucell.latvec))));
                 auto set_mR_2D = [&mR_2D](auto&& mk_frac) {
                     if (mR_2D.empty())
                         mR_2D = RI::Global_Func::convert<Tdata>(mk_frac);
                     else
                         mR_2D = mR_2D + RI::Global_Func::convert<Tdata>(mk_frac);
-                };
+                    };
                 if (static_cast<int>(std::round(SPIN_multiple * kv.wk[ik] * kv.get_nkstot_full())) == 2)
                     set_mR_2D(mk_2D * (frac * 0.5) + tensor_conj(mk_2D * (frac * 0.5)));
-                else
-                    set_mR_2D(mk_2D * frac);
-            }
-
-            for (int iwt0_2D = 0; iwt0_2D != mR_2D.shape[0]; ++iwt0_2D)
-            {
-                const int iwt0 = ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER() ? pv.local2global_col(iwt0_2D)
-                                                                                     : pv.local2global_row(iwt0_2D);
-                int iat0, iw0_b, is0_b;
-                std::tie(iat0, iw0_b, is0_b) = RI_2D_Comm::get_iat_iw_is_block(iwt0);
-                const int it0 = GlobalC::ucell.iat2it[iat0];
-                for (int iwt1_2D = 0; iwt1_2D != mR_2D.shape[1]; ++iwt1_2D)
-                {
-                    const int iwt1 = ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER() ? pv.local2global_row(iwt1_2D)
-                                                                                         : pv.local2global_col(iwt1_2D);
-                    int iat1, iw1_b, is1_b;
-                    std::tie(iat1, iw1_b, is1_b) = RI_2D_Comm::get_iat_iw_is_block(iwt1);
-                    const int it1 = GlobalC::ucell.iat2it[iat1];
-
-                    const int is_b = RI_2D_Comm::get_is_block(is_k, is0_b, is1_b);
-                    RI::Tensor<Tdata>& mR_a2D = mRs_a2D[is_b][iat0][{iat1, cell}];
-                    if (mR_a2D.empty())
-                        mR_a2D = RI::Tensor<Tdata>({static_cast<size_t>(GlobalC::ucell.atoms[it0].nw),
-                                                    static_cast<size_t>(GlobalC::ucell.atoms[it1].nw)});
-                    mR_a2D(iw0_b, iw1_b) = mR_2D(iwt0_2D, iwt1_2D);
-                }
-            }
-        }
-    }
-    ModuleBase::timer::tick("RI_2D_Comm", "split_m2D_ktoR");
-    return mRs_a2D;
+                else set_mR_2D(mk_2D * frac);
+			}
+
+			for(int iwt0_2D=0; iwt0_2D!=mR_2D.shape[0]; ++iwt0_2D)
+			{
+				const int iwt0 =
+					ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER()
+                    ? pv.local2global_col(iwt0_2D)
+                    : pv.local2global_row(iwt0_2D);
+				int iat0, iw0_b, is0_b;
+				std::tie(iat0,iw0_b,is0_b) = RI_2D_Comm::get_iat_iw_is_block(iwt0);
+				const int it0 = GlobalC::ucell.iat2it[iat0];
+				for(int iwt1_2D=0; iwt1_2D!=mR_2D.shape[1]; ++iwt1_2D)
+				{
+					const int iwt1 =
+						ModuleBase::GlobalFunc::IS_COLUMN_MAJOR_KS_SOLVER()
+                        ? pv.local2global_row(iwt1_2D)
+                        : pv.local2global_col(iwt1_2D);
+					int iat1, iw1_b, is1_b;
+					std::tie(iat1,iw1_b,is1_b) = RI_2D_Comm::get_iat_iw_is_block(iwt1);
+					const int it1 = GlobalC::ucell.iat2it[iat1];
+
+					const int is_b = RI_2D_Comm::get_is_block(is_k, is0_b, is1_b);
+					RI::Tensor<Tdata> &mR_a2D = mRs_a2D[is_b][iat0][{iat1,cell}];
+					if(mR_a2D.empty())
+						mR_a2D = RI::Tensor<Tdata>({static_cast<size_t>(GlobalC::ucell.atoms[it0].nw), static_cast<size_t>(GlobalC::ucell.atoms[it1].nw)});
+					mR_a2D(iw0_b,iw1_b) = mR_2D(iwt0_2D, iwt1_2D);
+				}
+			}
+		}
+	}
+	ModuleBase::timer::tick("RI_2D_Comm", "split_m2D_ktoR");
+	return mRs_a2D;
 }
 
-template <typename Tdata, typename TK>
-void RI_2D_Comm::add_Hexx(const K_Vectors& kv,
-                          const int ik,
-                          const double alpha,
-                          const std::vector<std::map<TA, std::map<TAC, RI::Tensor<Tdata>>>>& Hs,
-                          const Parallel_Orbitals& pv,
-                          TK* hk)
+
+template<typename Tdata, typename TK>
+void RI_2D_Comm::add_Hexx(
+	const K_Vectors &kv,
+	const int ik,
+	const double alpha,
+	const std::vector<std::map<TA,std::map<TAC,RI::Tensor<Tdata>>>> &Hs,
+    const Parallel_Orbitals& pv,
+    TK* hk)
 {
-    ModuleBase::TITLE("RI_2D_Comm", "add_Hexx");
-    ModuleBase::timer::tick("RI_2D_Comm", "add_Hexx");
-
-    const std::map<int, std::vector<int>> is_list = {{1, {0}}, {2, {kv.isk[ik]}}, {4, {0, 1, 2, 3}}};
-    for (const int is_b: is_list.at(GlobalV::NSPIN))
-    {
-        int is0_b, is1_b;
-        std::tie(is0_b, is1_b) = RI_2D_Comm::split_is_block(is_b);
-        for (const auto& Hs_tmpA: Hs[is_b])
-        {
-            const TA& iat0 = Hs_tmpA.first;
-            for (const auto& Hs_tmpB: Hs_tmpA.second)
-            {
-                const TA& iat1 = Hs_tmpB.first.first;
-                const TC& cell1 = Hs_tmpB.first.second;
-                const std::complex<double> frac
-                    = alpha
-                      * std::exp(ModuleBase::TWO_PI * ModuleBase::IMAG_UNIT
-                                 * (kv.kvec_c[ik] * (RI_Util::array3_to_Vector3(cell1) * GlobalC::ucell.latvec)));
-                const RI::Tensor<Tdata>& H = Hs_tmpB.second;
-                for (size_t iw0_b = 0; iw0_b < H.shape[0]; ++iw0_b)
-                {
-                    const int iwt0 = RI_2D_Comm::get_iwt(iat0, iw0_b, is0_b);
-                    if (pv.global2local_row(iwt0) < 0)
-                        continue;
-                    for (size_t iw1_b = 0; iw1_b < H.shape[1]; ++iw1_b)
-                    {
-                        const int iwt1 = RI_2D_Comm::get_iwt(iat1, iw1_b, is1_b);
-                        if (pv.global2local_col(iwt1) < 0)
-                            continue;
-                        LCAO_Matrix::set_mat2d(iwt0,
-                                               iwt1,
-                                               RI::Global_Func::convert<TK>(H(iw0_b, iw1_b))
-                                                   * RI::Global_Func::convert<TK>(frac),
-                                               pv,
-                                               hk);
-                    }
-                }
-            }
-        }
-    }
-    ModuleBase::timer::tick("RI_2D_Comm", "add_Hexx");
+	ModuleBase::TITLE("RI_2D_Comm","add_Hexx");
+	ModuleBase::timer::tick("RI_2D_Comm", "add_Hexx");
+
+	const std::map<int, std::vector<int>> is_list = {{1,{0}}, {2,{kv.isk[ik]}}, {4,{0,1,2,3}}};
+	for(const int is_b : is_list.at(GlobalV::NSPIN))
+	{
+		int is0_b, is1_b;
+		std::tie(is0_b,is1_b) = RI_2D_Comm::split_is_block(is_b);
+		for(const auto &Hs_tmpA : Hs[is_b])
+		{
+			const TA &iat0 = Hs_tmpA.first;
+			for(const auto &Hs_tmpB : Hs_tmpA.second)
+			{
+				const TA &iat1 = Hs_tmpB.first.first;
+				const TC &cell1 = Hs_tmpB.first.second;
+				const std::complex<double> frac = alpha
+					* std::exp( ModuleBase::TWO_PI*ModuleBase::IMAG_UNIT * (kv.kvec_c[ik] * (RI_Util::array3_to_Vector3(cell1)*GlobalC::ucell.latvec)) );
+				const RI::Tensor<Tdata> &H = Hs_tmpB.second;
+				for(size_t iw0_b=0; iw0_b<H.shape[0]; ++iw0_b)
+				{
+					const int iwt0 = RI_2D_Comm::get_iwt(iat0, iw0_b, is0_b);
+                    if (pv.global2local_row(iwt0) < 0)	continue;
+					for(size_t iw1_b=0; iw1_b<H.shape[1]; ++iw1_b)
+					{
+						const int iwt1 = RI_2D_Comm::get_iwt(iat1, iw1_b, is1_b);
+                        if (pv.global2local_col(iwt1) < 0)	continue;
+                        LCAO_Matrix::set_mat2d(iwt0, iwt1, RI::Global_Func::convert<TK>(H(iw0_b, iw1_b)) * RI::Global_Func::convert<TK>(frac), pv, hk);
+					}
+				}
+			}
+		}
+	}
+	ModuleBase::timer::tick("RI_2D_Comm", "add_Hexx");
 }
 
-std::tuple<int, int, int> RI_2D_Comm::get_iat_iw_is_block(const int iwt)
+std::tuple<int,int,int>
+RI_2D_Comm::get_iat_iw_is_block(const int iwt)
 {
-    const int iat = GlobalC::ucell.iwt2iat[iwt];
-    const int iw = GlobalC::ucell.iwt2iw[iwt];
-    switch (GlobalV::NSPIN)
-    {
-    case 1:
-    case 2:
-        return std::make_tuple(iat, iw, 0);
-    case 4:
-        return std::make_tuple(iat, iw / 2, iw % 2);
-    default:
-        throw std::invalid_argument(std::string(__FILE__) + " line " + std::to_string(__LINE__));
-    }
+	const int iat = GlobalC::ucell.iwt2iat[iwt];
+	const int iw = GlobalC::ucell.iwt2iw[iwt];
+	switch(GlobalV::NSPIN)
+	{
+		case 1: case 2:
+			return std::make_tuple(iat, iw, 0);
+		case 4:
+			return std::make_tuple(iat, iw/2, iw%2);
+		default:
+			throw std::invalid_argument(std::string(__FILE__)+" line "+std::to_string(__LINE__));
+	}
 }
 
 int RI_2D_Comm::get_is_block(const int is_k, const int is_row_b, const int is_col_b)
 {
-    switch (GlobalV::NSPIN)
-    {
-    case 1:
-        return 0;
-    case 2:
-        return is_k;
-    case 4:
-        return is_row_b * 2 + is_col_b;
-    default:
-        throw std::invalid_argument(std::string(__FILE__) + " line " + std::to_string(__LINE__));
-    }
+	switch(GlobalV::NSPIN)
+	{
+		case 1:		return 0;
+		case 2:		return is_k;
+		case 4:		return is_row_b*2+is_col_b;
+		default:	throw std::invalid_argument(std::string(__FILE__)+" line "+std::to_string(__LINE__));
+	}
 }
 
-std::tuple<int, int> RI_2D_Comm::split_is_block(const int is_b)
+std::tuple<int,int>
+RI_2D_Comm::split_is_block(const int is_b)
 {
-    switch (GlobalV::NSPIN)
-    {
-    case 1:
-    case 2:
-        return std::make_tuple(0, 0);
-    case 4:
-        return std::make_tuple(is_b / 2, is_b % 2);
-    default:
-        throw std::invalid_argument(std::string(__FILE__) + " line " + std::to_string(__LINE__));
-    }
+	switch(GlobalV::NSPIN)
+	{
+		case 1:	case 2:
+			return std::make_tuple(0, 0);
+		case 4:
+			return std::make_tuple(is_b/2, is_b%2);
+		default:
+			throw std::invalid_argument(std::string(__FILE__)+" line "+std::to_string(__LINE__));
+	}
 }
 
+
+
 int RI_2D_Comm::get_iwt(const int iat, const int iw_b, const int is_b)
 {
-    const int it = GlobalC::ucell.iat2it[iat];
-    const int ia = GlobalC::ucell.iat2ia[iat];
-    int iw = -1;
-    switch (GlobalV::NSPIN)
-    {
-    case 1:
-    case 2:
-        iw = iw_b;
-        break;
-    case 4:
-        iw = iw_b * 2 + is_b;
-        break;
-    default:
-        throw std::invalid_argument(std::string(__FILE__) + " line " + std::to_string(__LINE__));
-    }
-    const int iwt = GlobalC::ucell.itiaiw2iwt(it, ia, iw);
-    return iwt;
+	const int it = GlobalC::ucell.iat2it[iat];
+	const int ia = GlobalC::ucell.iat2ia[iat];
+	int iw=-1;
+	switch(GlobalV::NSPIN)
+	{
+		case 1: case 2:
+			iw = iw_b;			break;
+		case 4:
+			iw = iw_b*2+is_b;	break;
+		default:
+			throw std::invalid_argument(std::string(__FILE__)+" line "+std::to_string(__LINE__));
+	}
+	const int iwt = GlobalC::ucell.itiaiw2iwt(it,ia,iw);
+	return iwt;
 }
 
 #endif
\ No newline at end of file

From d72d89d98859756434aeda02004f6d62b897c691 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci-lite[bot]"
 <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com>
Date: Thu, 4 Jul 2024 15:40:53 +0000
Subject: [PATCH 09/11] [pre-commit.ci lite] apply automatic fixes

---
 .../module_esolver/esolver_ks_lcao_elec.cpp   |  6 +--
 .../operator_lcao/dftu_force_stress.hpp       | 22 ++++++----
 source/module_ri/RI_2D_Comm.hpp               | 44 +++++++++++++------
 3 files changed, 47 insertions(+), 25 deletions(-)

diff --git a/source/module_esolver/esolver_ks_lcao_elec.cpp b/source/module_esolver/esolver_ks_lcao_elec.cpp
index 175699c38b..2226b591d8 100644
--- a/source/module_esolver/esolver_ks_lcao_elec.cpp
+++ b/source/module_esolver/esolver_ks_lcao_elec.cpp
@@ -592,12 +592,12 @@ void ESolver_KS_LCAO<std::complex<double>, std::complex<double>>::get_S(void) {
 }
 
 template <typename TK, typename TR>
-void ESolver_KS_LCAO<TK, TR>::nscf(void) {
+void ESolver_KS_LCAO<TK, TR>::nscf() {
     ModuleBase::TITLE("ESolver_KS_LCAO", "nscf");
 
     std::cout << " NON-SELF CONSISTENT CALCULATIONS" << std::endl;
 
-    time_t time_start = std::time(NULL);
+    time_t time_start = std::time(nullptr);
 
 #ifdef __EXX
 #ifdef __MPI
@@ -630,7 +630,7 @@ void ESolver_KS_LCAO<TK, TR>::nscf(void) {
                                  "HSolver has not been initialed!");
     }
 
-    time_t time_finish = std::time(NULL);
+    time_t time_finish = std::time(nullptr);
     ModuleBase::GlobalFunc::OUT_TIME("cal_bands", time_start, time_finish);
 
     GlobalV::ofs_running << " end of band structure calculation " << std::endl;
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp
index 2209e2b426..e30bf5a8dc 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp
@@ -19,14 +19,15 @@ void DFTU<OperatorLCAO<TK, TR>>::cal_force_stress(const bool cal_force,
     // try to get the density matrix, if the density matrix is empty, skip the calculation and return
     const hamilt::HContainer<double>* dmR_tmp[this->nspin];
     dmR_tmp[0] = this->dftu->get_dmr(0);
-    if (this->nspin == 2)
+    if (this->nspin == 2) {
         dmR_tmp[1] = this->dftu->get_dmr(1);
+    }
     if (dmR_tmp[0]->size_atom_pairs() == 0)
     {
         return;
     }
     // begin the calculation of force and stress
-    ModuleBase::timer::tick("DFTU", "cal_force_stress");
+    timex::tick("DFTU", "cal_force_stress");
 
     const Parallel_Orbitals* paraV = dmR_tmp[0]->get_paraV();
     const int npol = this->ucell->get_npol();
@@ -45,8 +46,9 @@ void DFTU<OperatorLCAO<TK, TR>>::cal_force_stress(const bool cal_force,
         int T0, I0;
         ucell->iat2iait(iat0, &I0, &T0);
         const int target_L = this->dftu->orbital_corr[T0];
-        if (target_L == -1)
+        if (target_L == -1) {
             continue;
+        }
         const int tlp1 = 2 * target_L + 1;
         AdjacentAtomInfo& adjs = this->adjs_all[atom_index++];
 
@@ -161,7 +163,7 @@ void DFTU<OperatorLCAO<TK, TR>>::cal_force_stress(const bool cal_force,
                 if (tmp[0] != nullptr)
                 {
                     // calculate force
-                    if (cal_force)
+                    if (cal_force) {
                         this->cal_force_IJR(iat1,
                                             iat2,
                                             paraV,
@@ -172,9 +174,10 @@ void DFTU<OperatorLCAO<TK, TR>>::cal_force_stress(const bool cal_force,
                                             this->nspin,
                                             force_tmp1,
                                             force_tmp2);
+                    }
 
                     // calculate stress
-                    if (cal_stress)
+                    if (cal_stress) {
                         this->cal_stress_IJR(iat1,
                                              iat2,
                                              paraV,
@@ -186,6 +189,7 @@ void DFTU<OperatorLCAO<TK, TR>>::cal_force_stress(const bool cal_force,
                                              dis1,
                                              dis2,
                                              stress_tmp.data());
+                    }
                 }
             }
         }
@@ -223,7 +227,7 @@ void DFTU<OperatorLCAO<TK, TR>>::cal_force_stress(const bool cal_force,
         stress.c[3] = stress.c[1]; // stress(1,0)
     }
 
-    ModuleBase::timer::tick("DFTU", "cal_force_stress");
+    timex::tick("DFTU", "cal_force_stress");
 }
 
 template <typename TK, typename TR>
@@ -251,8 +255,9 @@ void DFTU<OperatorLCAO<TK, TR>>::cal_force_IJR(const int& iat1,
     const int m_size2 = m_size * m_size;
     // step_trace = 0 for NSPIN=1,2; ={0, 1, local_col, local_col+1} for NSPIN=4
     std::vector<int> step_trace(npol, 0);
-    if (npol == 2)
+    if (npol == 2) {
         step_trace[1] = col_indexes.size() + 1;
+    }
     double tmp[3];
     // calculate the local matrix
     for (int is = 0; is < nspin; is++)
@@ -319,8 +324,9 @@ void DFTU<OperatorLCAO<TK, TR>>::cal_stress_IJR(const int& iat1,
     const int m_size2 = m_size * m_size;
     // step_trace = 0 for NSPIN=1,2; ={0, 1, local_col, local_col+1} for NSPIN=4
     std::vector<int> step_trace(npol, 0);
-    if (npol == 2)
+    if (npol == 2) {
         step_trace[1] = col_indexes.size() + 1;
+    }
     // calculate the local matrix
     for (int is = 0; is < nspin; is++)
     {
diff --git a/source/module_ri/RI_2D_Comm.hpp b/source/module_ri/RI_2D_Comm.hpp
index 893316f6fe..af77cb9508 100644
--- a/source/module_ri/RI_2D_Comm.hpp
+++ b/source/module_ri/RI_2D_Comm.hpp
@@ -22,7 +22,9 @@ inline RI::Tensor<double> tensor_conj(const RI::Tensor<double>& t) { return t; }
 inline RI::Tensor<std::complex<double>> tensor_conj(const RI::Tensor<std::complex<double>>& t)
 {
     RI::Tensor<std::complex<double>> r(t.shape);
-    for (int i = 0;i < t.data->size();++i)(*r.data)[i] = std::conj((*t.data)[i]);
+    for (int i = 0; i < t.data->size(); ++i) {
+        (*r.data)[i] = std::conj((*t.data)[i]);
+    }
     return r;
 }
 template<typename Tdata, typename Tmatrix>
@@ -51,15 +53,21 @@ auto RI_2D_Comm::split_m2D_ktoR(const K_Vectors &kv, const std::vector<const Tma
 					* RI::Global_Func::convert<Tdata_m>( std::exp(
                         -ModuleBase::TWO_PI * ModuleBase::IMAG_UNIT * (kv.kvec_c[ik] * (RI_Util::array3_to_Vector3(cell) * GlobalC::ucell.latvec))));
                 auto set_mR_2D = [&mR_2D](auto&& mk_frac) {
-                    if (mR_2D.empty())
+                    if (mR_2D.empty()) {
                         mR_2D = RI::Global_Func::convert<Tdata>(mk_frac);
-                    else
-                        mR_2D = mR_2D + RI::Global_Func::convert<Tdata>(mk_frac);
-                    };
-                if (static_cast<int>(std::round(SPIN_multiple * kv.wk[ik] * kv.get_nkstot_full())) == 2)
+                    } else {
+                        mR_2D
+                            = mR_2D + RI::Global_Func::convert<Tdata>(mk_frac);
+                    }
+                };
+                if (static_cast<int>(std::round(SPIN_multiple * kv.wk[ik]
+                                                * kv.get_nkstot_full()))
+                    == 2) {
                     set_mR_2D(mk_2D * (frac * 0.5) + tensor_conj(mk_2D * (frac * 0.5)));
-                else set_mR_2D(mk_2D * frac);
-			}
+                } else {
+                    set_mR_2D(mk_2D * frac);
+                }
+            }
 
 			for(int iwt0_2D=0; iwt0_2D!=mR_2D.shape[0]; ++iwt0_2D)
 			{
@@ -82,9 +90,13 @@ auto RI_2D_Comm::split_m2D_ktoR(const K_Vectors &kv, const std::vector<const Tma
 
 					const int is_b = RI_2D_Comm::get_is_block(is_k, is0_b, is1_b);
 					RI::Tensor<Tdata> &mR_a2D = mRs_a2D[is_b][iat0][{iat1,cell}];
-					if(mR_a2D.empty())
-						mR_a2D = RI::Tensor<Tdata>({static_cast<size_t>(GlobalC::ucell.atoms[it0].nw), static_cast<size_t>(GlobalC::ucell.atoms[it1].nw)});
-					mR_a2D(iw0_b,iw1_b) = mR_2D(iwt0_2D, iwt1_2D);
+                    if (mR_a2D.empty()) {
+                        mR_a2D = RI::Tensor<Tdata>(
+                            {static_cast<size_t>(GlobalC::ucell.atoms[it0].nw),
+                             static_cast<size_t>(
+                                 GlobalC::ucell.atoms[it1].nw)});
+                    }
+                    mR_a2D(iw0_b,iw1_b) = mR_2D(iwt0_2D, iwt1_2D);
 				}
 			}
 		}
@@ -124,11 +136,15 @@ void RI_2D_Comm::add_Hexx(
 				for(size_t iw0_b=0; iw0_b<H.shape[0]; ++iw0_b)
 				{
 					const int iwt0 = RI_2D_Comm::get_iwt(iat0, iw0_b, is0_b);
-                    if (pv.global2local_row(iwt0) < 0)	continue;
-					for(size_t iw1_b=0; iw1_b<H.shape[1]; ++iw1_b)
+                    if (pv.global2local_row(iwt0) < 0) {
+                        continue;
+                    }
+                    for(size_t iw1_b=0; iw1_b<H.shape[1]; ++iw1_b)
 					{
 						const int iwt1 = RI_2D_Comm::get_iwt(iat1, iw1_b, is1_b);
-                        if (pv.global2local_col(iwt1) < 0)	continue;
+                        if (pv.global2local_col(iwt1) < 0) {
+                            continue;
+                        }
                         LCAO_Matrix::set_mat2d(iwt0, iwt1, RI::Global_Func::convert<TK>(H(iw0_b, iw1_b)) * RI::Global_Func::convert<TK>(frac), pv, hk);
 					}
 				}

From c2d22b380cd1995624a77d121741fc385a98bae3 Mon Sep 17 00:00:00 2001
From: dyzheng <zhengdy@bjaisi.com>
Date: Fri, 5 Jul 2024 11:01:01 +0800
Subject: [PATCH 10/11] Refactor: remove LM in DFTU

---
 source/module_esolver/esolver_ks_lcao.cpp     |  2 +-
 .../hamilt_lcaodft/FORCE_STRESS.cpp           |  1 -
 .../operator_lcao/dftu_force_stress.hpp       |  5 +-
 .../module_hamilt_lcao/module_dftu/dftu.cpp   |  4 +-
 source/module_hamilt_lcao/module_dftu/dftu.h  |  6 +-
 .../module_dftu/dftu_force.cpp                | 13 ++--
 .../module_dftu/dftu_hamilt.cpp               | 60 +++++++++----------
 .../module_dftu/dftu_occup.cpp                | 40 ++++++-------
 .../module_dftu/dftu_tools.cpp                | 16 ++---
 9 files changed, 71 insertions(+), 76 deletions(-)

diff --git a/source/module_esolver/esolver_ks_lcao.cpp b/source/module_esolver/esolver_ks_lcao.cpp
index 7fccd4b5e4..bbed3c6da3 100644
--- a/source/module_esolver/esolver_ks_lcao.cpp
+++ b/source/module_esolver/esolver_ks_lcao.cpp
@@ -212,7 +212,7 @@ void ESolver_KS_LCAO<TK, TR>::before_all_runners(Input& inp, UnitCell& ucell) {
 
     // 8) initialize DFT+U
     if (GlobalV::dft_plus_u) {
-        GlobalC::dftu.init(ucell, this->LM, this->kv.get_nks());
+        GlobalC::dftu.init(ucell, this->LM.ParaV, this->kv.get_nks());
     }
 
     // 9) initialize ppcell
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp
index fc5133470c..5aec438f66 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/FORCE_STRESS.cpp
@@ -240,7 +240,6 @@ void Force_Stress_LCAO<T>::getForceStress(const bool isforce,
         if (GlobalV::dft_plus_u == 2)
         {
             GlobalC::dftu.force_stress(pelec,
-                                       lm,
                                        pv,
                                        fsr, // mohan 2024-06-16
                                        force_dftu,
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp
index e30bf5a8dc..4d1c1bec2b 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/dftu_force_stress.hpp
@@ -1,6 +1,7 @@
 #pragma once
 #include "dftu_lcao.h"
 #include "module_base/parallel_reduce.h"
+#include "module_base/timer.h"
 
 namespace hamilt
 {
@@ -27,7 +28,7 @@ void DFTU<OperatorLCAO<TK, TR>>::cal_force_stress(const bool cal_force,
         return;
     }
     // begin the calculation of force and stress
-    timex::tick("DFTU", "cal_force_stress");
+    ModuleBase::timer::tick("DFTU", "cal_force_stress");
 
     const Parallel_Orbitals* paraV = dmR_tmp[0]->get_paraV();
     const int npol = this->ucell->get_npol();
@@ -227,7 +228,7 @@ void DFTU<OperatorLCAO<TK, TR>>::cal_force_stress(const bool cal_force,
         stress.c[3] = stress.c[1]; // stress(1,0)
     }
 
-    timex::tick("DFTU", "cal_force_stress");
+    ModuleBase::timer::tick("DFTU", "cal_force_stress");
 }
 
 template <typename TK, typename TR>
diff --git a/source/module_hamilt_lcao/module_dftu/dftu.cpp b/source/module_hamilt_lcao/module_dftu/dftu.cpp
index 14be04406e..282b9fba32 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu.cpp
+++ b/source/module_hamilt_lcao/module_dftu/dftu.cpp
@@ -36,7 +36,7 @@ DFTU::~DFTU()
 }
 
 void DFTU::init(UnitCell& cell, // unitcell class
-                LCAO_Matrix& lm,
+                const Parallel_Orbitals* pv,
                 const int& nks)
 {
     ModuleBase::TITLE("DFTU", "init");
@@ -46,7 +46,7 @@ void DFTU::init(UnitCell& cell, // unitcell class
     exit(0);
 #endif
 
-    this->LM = &lm;
+    this->paraV = pv;
 
     // needs reconstructions in future
     // global parameters, need to be removed in future
diff --git a/source/module_hamilt_lcao/module_dftu/dftu.h b/source/module_hamilt_lcao/module_dftu/dftu.h
index db86ab26ad..c673b36eb9 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu.h
+++ b/source/module_hamilt_lcao/module_dftu/dftu.h
@@ -8,7 +8,6 @@
 #include "module_cell/klist.h"
 #include "module_cell/unitcell.h"
 #include "module_basis/module_ao/parallel_orbitals.h"
-#include "module_hamilt_lcao/hamilt_lcaodft/LCAO_matrix.h"
 #include "module_elecstate/module_charge/charge_mixing.h"
 #include "module_hamilt_general/hamilt.h"
 #include "module_elecstate/elecstate.h"
@@ -39,7 +38,7 @@ class DFTU
   public:
     // allocate relevant data strcutures
     void init(UnitCell& cell, // unitcell class
-              LCAO_Matrix& lm,
+              const Parallel_Orbitals* pv,
               const int& nks);
 
     // calculate the energy correction
@@ -57,7 +56,7 @@ class DFTU
 
     double EU; //+U energy
   private:
-    LCAO_Matrix* LM;
+    const Parallel_Orbitals* paraV = nullptr;
     int cal_type = 3; // 1:dftu_tpye=1, dc=1; 2:dftu_type=1, dc=2; 3:dftu_tpye=2, dc=1; 4:dftu_tpye=2, dc=2;
     
     // transform between iwt index and it, ia, L, N and m index
@@ -162,7 +161,6 @@ class DFTU
   public:
 
    void force_stress(const elecstate::ElecState* pelec,
-		   LCAO_Matrix& lm,
 		   const Parallel_Orbitals& pv,
 		   ForceStressArrays& fsr,
 		   ModuleBase::matrix& force_dftu,
diff --git a/source/module_hamilt_lcao/module_dftu/dftu_force.cpp b/source/module_hamilt_lcao/module_dftu/dftu_force.cpp
index bc73bf878d..4354f3b9ac 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu_force.cpp
+++ b/source/module_hamilt_lcao/module_dftu/dftu_force.cpp
@@ -72,7 +72,6 @@ namespace ModuleDFTU
 {
 
 void DFTU::force_stress(const elecstate::ElecState* pelec,
-                        LCAO_Matrix& lm,
                         const Parallel_Orbitals& pv,
                         ForceStressArrays& fsr, // mohan add 2024-06-16
                         ModuleBase::matrix& force_dftu,
@@ -84,8 +83,6 @@ void DFTU::force_stress(const elecstate::ElecState* pelec,
 
     const int nlocal = GlobalV::NLOCAL;
 
-    this->LM = &lm;
-
     if (GlobalV::CAL_FORCE)
     {
         force_dftu.zero_out();
@@ -643,14 +640,14 @@ void DFTU::cal_stress_gamma(const UnitCell& ucell,
                     pv.desc);
 #endif
 
-            for (int ir = 0; ir < this->LM->ParaV->nrow; ir++)
+            for (int ir = 0; ir < this->paraV->nrow; ir++)
             {
-                const int iwt1 = this->LM->ParaV->local2global_row(ir);
+                const int iwt1 = this->paraV->local2global_row(ir);
 
-                for (int ic = 0; ic < this->LM->ParaV->ncol; ic++)
+                for (int ic = 0; ic < this->paraV->ncol; ic++)
                 {
-                    const int iwt2 = this->LM->ParaV->local2global_col(ic);
-                    const int irc = ic * this->LM->ParaV->nrow + ir;
+                    const int iwt2 = this->paraV->local2global_col(ic);
+                    const int irc = ic * this->paraV->nrow + ir;
 
                     if (iwt1 == iwt2)
                         stress_dftu(dim1, dim2) += 2.0 * dm_VU_sover[irc];
diff --git a/source/module_hamilt_lcao/module_dftu/dftu_hamilt.cpp b/source/module_hamilt_lcao/module_dftu/dftu_hamilt.cpp
index a786e97a42..717dcb853c 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu_hamilt.cpp
+++ b/source/module_hamilt_lcao/module_dftu/dftu_hamilt.cpp
@@ -18,7 +18,7 @@ void DFTU::cal_eff_pot_mat_complex(const int ik, std::complex<double>* eff_pot,
 
     int spin = isk[ik];
 
-    ModuleBase::GlobalFunc::ZEROS(eff_pot, this->LM->ParaV->nloc);
+    ModuleBase::GlobalFunc::ZEROS(eff_pot, this->paraV->nloc);
 
     //=============================================================
     //   PART2: call pblas to calculate effective potential matrix
@@ -29,28 +29,28 @@ void DFTU::cal_eff_pot_mat_complex(const int ik, std::complex<double>* eff_pot,
     const std::complex<double> half = 0.5;
     const std::complex<double> zero = 0.0;
 
-    std::vector<std::complex<double>> VU(this->LM->ParaV->nloc);
+    std::vector<std::complex<double>> VU(this->paraV->nloc);
     this->cal_VU_pot_mat_complex(spin, true, &VU[0]);
 
 #ifdef __MPI
 	pzgemm_(&transN, &transN,
             &GlobalV::NLOCAL, &GlobalV::NLOCAL, &GlobalV::NLOCAL,
             &half, 
-            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->LM->ParaV->desc,
-            sk, &one_int, &one_int, this->LM->ParaV->desc,
+            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->paraV->desc,
+            sk, &one_int, &one_int, this->paraV->desc,
             &zero,
-            eff_pot, &one_int, &one_int, this->LM->ParaV->desc);
+            eff_pot, &one_int, &one_int, this->paraV->desc);
 #endif
 
-    for (int irc = 0; irc < this->LM->ParaV->nloc; irc++)
+    for (int irc = 0; irc < this->paraV->nloc; irc++)
         VU[irc] = eff_pot[irc];
 
 #ifdef __MPI
   	pztranc_(&GlobalV::NLOCAL, &GlobalV::NLOCAL, 
             &one, 
-            &VU[0], &one_int, &one_int, this->LM->ParaV->desc, 
+            &VU[0], &one_int, &one_int, this->paraV->desc, 
             &one, 
-            eff_pot, &one_int, &one_int, this->LM->ParaV->desc);
+            eff_pot, &one_int, &one_int, this->paraV->desc);
 #endif
 
     ModuleBase::timer::tick("DFTU", "cal_eff_pot_mat");
@@ -69,7 +69,7 @@ void DFTU::cal_eff_pot_mat_real(const int ik, double* eff_pot, const std::vector
 
     int spin = isk[ik];
 
-    ModuleBase::GlobalFunc::ZEROS(eff_pot, this->LM->ParaV->nloc);
+    ModuleBase::GlobalFunc::ZEROS(eff_pot, this->paraV->nloc);
 
     //=============================================================
     //   PART2: call pblas to calculate effective potential matrix
@@ -78,28 +78,28 @@ void DFTU::cal_eff_pot_mat_real(const int ik, double* eff_pot, const std::vector
     int one_int = 1;
     double alpha = 1.0, beta = 0.0, half = 0.5, one = 1.0;
 
-    std::vector<double> VU(this->LM->ParaV->nloc);
+    std::vector<double> VU(this->paraV->nloc);
     this->cal_VU_pot_mat_real(spin, 1, &VU[0]);
 
 #ifdef __MPI
 	pdgemm_(&transN, &transN,
             &GlobalV::NLOCAL, &GlobalV::NLOCAL, &GlobalV::NLOCAL,
             &half, 
-            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->LM->ParaV->desc, 
-            sk, &one_int, &one_int, this->LM->ParaV->desc,
+            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->paraV->desc, 
+            sk, &one_int, &one_int, this->paraV->desc,
             &beta,
-            eff_pot, &one_int, &one_int, this->LM->ParaV->desc);
+            eff_pot, &one_int, &one_int, this->paraV->desc);
 #endif
 
-    for (int irc = 0; irc < this->LM->ParaV->nloc; irc++)
+    for (int irc = 0; irc < this->paraV->nloc; irc++)
         VU[irc] = eff_pot[irc];
 
 #ifdef __MPI
 	pdtran_(&GlobalV::NLOCAL, &GlobalV::NLOCAL, 
             &one, 
-            &VU[0], &one_int, &one_int, this->LM->ParaV->desc, 
+            &VU[0], &one_int, &one_int, const_cast<int*>(this->paraV->desc), 
             &one, 
-            eff_pot, &one_int, &one_int, this->LM->ParaV->desc);
+            eff_pot, &one_int, &one_int, const_cast<int*>(this->paraV->desc));
 #endif
 
     ModuleBase::timer::tick("DFTU", "cal_eff_pot_mat");
@@ -112,25 +112,25 @@ void DFTU::cal_eff_pot_mat_R_double(const int ispin, double* SR, double* HR)
     const int one_int = 1;
     const double alpha = 1.0, beta = 0.0, one = 1.0, half = 0.5;
 
-    std::vector<double> VU(this->LM->ParaV->nloc);
+    std::vector<double> VU(this->paraV->nloc);
     this->cal_VU_pot_mat_real(ispin, 1, &VU[0]);
 
 #ifdef __MPI
     pdgemm_(&transN, &transN,
             &GlobalV::NLOCAL, &GlobalV::NLOCAL, &GlobalV::NLOCAL,
             &half, 
-            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->LM->ParaV->desc, 
-            SR, &one_int, &one_int, this->LM->ParaV->desc,
+            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->paraV->desc, 
+            SR, &one_int, &one_int, this->paraV->desc,
             &beta,
-            HR, &one_int, &one_int, this->LM->ParaV->desc);
+            HR, &one_int, &one_int, this->paraV->desc);
 
     pdgemm_(&transN, &transN,
             &GlobalV::NLOCAL, &GlobalV::NLOCAL, &GlobalV::NLOCAL,
             &half, 
-            SR, &one_int, &one_int, this->LM->ParaV->desc, 
-            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->LM->ParaV->desc,
+            SR, &one_int, &one_int, this->paraV->desc, 
+            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->paraV->desc,
             &one,
-            HR, &one_int, &one_int, this->LM->ParaV->desc);
+            HR, &one_int, &one_int, this->paraV->desc);
 #endif
 
     return;
@@ -142,25 +142,25 @@ void DFTU::cal_eff_pot_mat_R_complex_double(const int ispin, std::complex<double
     const int one_int = 1;
     const std::complex<double> zero = 0.0, one = 1.0, half = 0.5;
 
-    std::vector<std::complex<double>> VU(this->LM->ParaV->nloc);
+    std::vector<std::complex<double>> VU(this->paraV->nloc);
     this->cal_VU_pot_mat_complex(ispin, 1, &VU[0]);
 
 #ifdef __MPI
     pzgemm_(&transN, &transN,
             &GlobalV::NLOCAL, &GlobalV::NLOCAL, &GlobalV::NLOCAL,
             &half, 
-            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->LM->ParaV->desc,
-            SR, &one_int, &one_int, this->LM->ParaV->desc,
+            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->paraV->desc,
+            SR, &one_int, &one_int, this->paraV->desc,
             &zero,
-            HR, &one_int, &one_int, this->LM->ParaV->desc);
+            HR, &one_int, &one_int, this->paraV->desc);
 
     pzgemm_(&transN, &transN,
             &GlobalV::NLOCAL, &GlobalV::NLOCAL, &GlobalV::NLOCAL,
             &half, 
-            SR, &one_int, &one_int, this->LM->ParaV->desc, 
-            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->LM->ParaV->desc,
+            SR, &one_int, &one_int, this->paraV->desc, 
+            ModuleBase::GlobalFunc::VECTOR_TO_PTR(VU), &one_int, &one_int, this->paraV->desc,
             &one,
-            HR, &one_int, &one_int, this->LM->ParaV->desc);
+            HR, &one_int, &one_int, this->paraV->desc);
 #endif
 
     return;
diff --git a/source/module_hamilt_lcao/module_dftu/dftu_occup.cpp b/source/module_hamilt_lcao/module_dftu/dftu_occup.cpp
index da8d93a8b7..ae710d93dd 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu_occup.cpp
+++ b/source/module_hamilt_lcao/module_dftu/dftu_occup.cpp
@@ -155,7 +155,7 @@ void DFTU::cal_occup_m_k(const int iter,
     const int one_int = 1;
     const std::complex<double> beta(0.0,0.0), alpha(1.0,0.0);
 
-    std::vector<std::complex<double>> srho(this->LM->ParaV->nloc);
+    std::vector<std::complex<double>> srho(this->paraV->nloc);
 
     for (int ik = 0; ik < kv.get_nks(); ik++)
     {
@@ -181,17 +181,17 @@ void DFTU::cal_occup_m_k(const int iter,
                 s_k_pointer,
                 &one_int,
                 &one_int,
-                this->LM->ParaV->desc,
+                this->paraV->desc,
                 dm_k[ik].data(),
                 //dm_k[ik].c,
                 &one_int,
                 &one_int,
-                this->LM->ParaV->desc,
+                this->paraV->desc,
                 &beta,
                 &srho[0],
                 &one_int,
                 &one_int,
-                this->LM->ParaV->desc);
+                this->paraV->desc);
 #endif
 
         const int spin = kv.isk[ik];
@@ -226,19 +226,19 @@ void DFTU::cal_occup_m_k(const int iter,
                             for (int ipol0 = 0; ipol0 < GlobalV::NPOL; ipol0++)
                             {
                                 const int iwt0 = this->iatlnmipol2iwt[iat][l][n][m0][ipol0];
-                                const int mu = this->LM->ParaV->global2local_row(iwt0);
-                                const int mu_prime = this->LM->ParaV->global2local_col(iwt0);
+                                const int mu = this->paraV->global2local_row(iwt0);
+                                const int mu_prime = this->paraV->global2local_col(iwt0);
 
                                 for (int m1 = 0; m1 < 2 * l + 1; m1++)
                                 {
                                     for (int ipol1 = 0; ipol1 < GlobalV::NPOL; ipol1++)
                                     {
                                         const int iwt1 = this->iatlnmipol2iwt[iat][l][n][m1][ipol1];
-                                        const int nu = this->LM->ParaV->global2local_col(iwt1);
-                                        const int nu_prime = this->LM->ParaV->global2local_row(iwt1);
+                                        const int nu = this->paraV->global2local_col(iwt1);
+                                        const int nu_prime = this->paraV->global2local_row(iwt1);
 
-                                        const int irc = nu * this->LM->ParaV->nrow + mu;
-                                        const int irc_prime = mu_prime * this->LM->ParaV->nrow + nu_prime;
+                                        const int irc = nu * this->paraV->nrow + mu;
+                                        const int irc_prime = mu_prime * this->paraV->nrow + nu_prime;
 
                                         const int m0_all = m0 + ipol0 * (2 * l + 1);
                                         const int m1_all = m1 + ipol1 * (2 * l + 1);
@@ -366,7 +366,7 @@ void DFTU::cal_occup_m_gamma(const int iter, const std::vector<std::vector<doubl
     const int one_int = 1;
     const double alpha = 1.0, beta = 0.0;
 
-    std::vector<double> srho(this->LM->ParaV->nloc);
+    std::vector<double> srho(this->paraV->nloc);
     for (int is = 0; is < GlobalV::NSPIN; is++)
     {
         // srho(mu,nu) = \sum_{iw} S(mu,iw)*dm_gamma(iw,nu)
@@ -382,17 +382,17 @@ void DFTU::cal_occup_m_gamma(const int iter, const std::vector<std::vector<doubl
                 s_gamma_pointer,
                 &one_int,
                 &one_int,
-                this->LM->ParaV->desc,
+                this->paraV->desc,
                 dm_gamma[is].data(),
                 //dm_gamma[is].c,
                 &one_int,
                 &one_int,
-                this->LM->ParaV->desc,
+                this->paraV->desc,
                 &beta,
                 &srho[0],
                 &one_int,
                 &one_int,
-                this->LM->ParaV->desc);
+                this->paraV->desc);
 #endif
 
         for (int it = 0; it < GlobalC::ucell.ntype; it++)
@@ -422,19 +422,19 @@ void DFTU::cal_occup_m_gamma(const int iter, const std::vector<std::vector<doubl
                             for (int ipol0 = 0; ipol0 < GlobalV::NPOL; ipol0++)
                             {
                                 const int iwt0 = this->iatlnmipol2iwt[iat][l][n][m0][ipol0];
-                                const int mu = this->LM->ParaV->global2local_row(iwt0);
-                                const int mu_prime = this->LM->ParaV->global2local_col(iwt0);
+                                const int mu = this->paraV->global2local_row(iwt0);
+                                const int mu_prime = this->paraV->global2local_col(iwt0);
 
                                 for (int m1 = 0; m1 < 2 * l + 1; m1++)
                                 {
                                     for (int ipol1 = 0; ipol1 < GlobalV::NPOL; ipol1++)
                                     {
                                         const int iwt1 = this->iatlnmipol2iwt[iat][l][n][m1][ipol1];
-                                        const int nu = this->LM->ParaV->global2local_col(iwt1);
-                                        const int nu_prime = this->LM->ParaV->global2local_row(iwt1);
+                                        const int nu = this->paraV->global2local_col(iwt1);
+                                        const int nu_prime = this->paraV->global2local_row(iwt1);
 
-                                        const int irc = nu * this->LM->ParaV->nrow + mu;
-                                        const int irc_prime = mu_prime * this->LM->ParaV->nrow + nu_prime;
+                                        const int irc = nu * this->paraV->nrow + mu;
+                                        const int irc_prime = mu_prime * this->paraV->nrow + nu_prime;
 
                                         if ((nu >= 0) && (mu >= 0))
                                         {
diff --git a/source/module_hamilt_lcao/module_dftu/dftu_tools.cpp b/source/module_hamilt_lcao/module_dftu/dftu_tools.cpp
index c48cfd925b..9efbdb224f 100644
--- a/source/module_hamilt_lcao/module_dftu/dftu_tools.cpp
+++ b/source/module_hamilt_lcao/module_dftu/dftu_tools.cpp
@@ -8,7 +8,7 @@ namespace ModuleDFTU
 void DFTU::cal_VU_pot_mat_complex(const int spin, const bool newlocale, std::complex<double>* VU)
 {
     ModuleBase::TITLE("DFTU", "cal_VU_pot_mat_complex");
-    ModuleBase::GlobalFunc::ZEROS(VU, this->LM->ParaV->nloc);
+    ModuleBase::GlobalFunc::ZEROS(VU, this->paraV->nloc);
 
     for (int it = 0; it < GlobalC::ucell.ntype; ++it)
     {
@@ -31,7 +31,7 @@ void DFTU::cal_VU_pot_mat_complex(const int spin, const bool newlocale, std::com
                     {
                         for (int ipol1 = 0; ipol1 < GlobalV::NPOL; ipol1++)
                         {
-                            const int mu = this->LM->ParaV->global2local_row(this->iatlnmipol2iwt[iat][L][n][m1][ipol1]);
+                            const int mu = this->paraV->global2local_row(this->iatlnmipol2iwt[iat][L][n][m1][ipol1]);
                             if (mu < 0)
                                 continue;
 
@@ -40,7 +40,7 @@ void DFTU::cal_VU_pot_mat_complex(const int spin, const bool newlocale, std::com
                                 for (int ipol2 = 0; ipol2 < GlobalV::NPOL; ipol2++)
                                 {
                                     const int nu
-                                        = this->LM->ParaV->global2local_col(this->iatlnmipol2iwt[iat][L][n][m2][ipol2]);
+                                        = this->paraV->global2local_col(this->iatlnmipol2iwt[iat][L][n][m2][ipol2]);
                                     if (nu < 0)
                                         continue;
 
@@ -48,7 +48,7 @@ void DFTU::cal_VU_pot_mat_complex(const int spin, const bool newlocale, std::com
                                     int m2_all = m2 + (2 * L + 1) * ipol2;
 
                                     double val = get_onebody_eff_pot(it, iat, L, n, spin, m1_all, m2_all, newlocale);
-                                    VU[nu * this->LM->ParaV->nrow + mu] = std::complex<double>(val, 0.0);
+                                    VU[nu * this->paraV->nrow + mu] = std::complex<double>(val, 0.0);
                                 } // ipol2
                             } // m2
                         } // ipol1
@@ -64,7 +64,7 @@ void DFTU::cal_VU_pot_mat_complex(const int spin, const bool newlocale, std::com
 void DFTU::cal_VU_pot_mat_real(const int spin, const bool newlocale, double* VU)
 {
     ModuleBase::TITLE("DFTU", "cal_VU_pot_mat_real");
-    ModuleBase::GlobalFunc::ZEROS(VU, this->LM->ParaV->nloc);
+    ModuleBase::GlobalFunc::ZEROS(VU, this->paraV->nloc);
 
     for (int it = 0; it < GlobalC::ucell.ntype; ++it)
     {
@@ -87,7 +87,7 @@ void DFTU::cal_VU_pot_mat_real(const int spin, const bool newlocale, double* VU)
                     {
                         for (int ipol1 = 0; ipol1 < GlobalV::NPOL; ipol1++)
                         {
-                            const int mu = this->LM->ParaV->global2local_row(this->iatlnmipol2iwt[iat][L][n][m1][ipol1]);
+                            const int mu = this->paraV->global2local_row(this->iatlnmipol2iwt[iat][L][n][m1][ipol1]);
                             if (mu < 0)
                                 continue;
                             for (int m2 = 0; m2 < 2 * L + 1; m2++)
@@ -95,14 +95,14 @@ void DFTU::cal_VU_pot_mat_real(const int spin, const bool newlocale, double* VU)
                                 for (int ipol2 = 0; ipol2 < GlobalV::NPOL; ipol2++)
                                 {
                                     const int nu
-                                        = this->LM->ParaV->global2local_col(this->iatlnmipol2iwt[iat][L][n][m2][ipol2]);
+                                        = this->paraV->global2local_col(this->iatlnmipol2iwt[iat][L][n][m2][ipol2]);
                                     if (nu < 0)
                                         continue;
 
                                     int m1_all = m1 + (2 * L + 1) * ipol1;
                                     int m2_all = m2 + (2 * L + 1) * ipol2;
 
-                                    VU[nu * this->LM->ParaV->nrow + mu]
+                                    VU[nu * this->paraV->nrow + mu]
                                         = this->get_onebody_eff_pot(it, iat, L, n, spin, m1_all, m2_all, newlocale);
 
                                 } // ipol2

From db5ad4692d012adc6f23096cb451c9ebcca531e0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci-lite[bot]"
 <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com>
Date: Fri, 5 Jul 2024 04:29:30 +0000
Subject: [PATCH 11/11] [pre-commit.ci lite] apply automatic fixes

---
 .../hamilt_lcaodft/operator_lcao/td_nonlocal_lcao.h             | 2 +-
 .../hamilt_lcaodft/operator_lcao/test/test_nonlocalnew.cpp      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_nonlocal_lcao.h b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_nonlocal_lcao.h
index 41a2fde405..1fbcd48029 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_nonlocal_lcao.h
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/td_nonlocal_lcao.h
@@ -76,7 +76,7 @@ class TDNonlocal<OperatorLCAO<TK, TR>> : public OperatorLCAO<TK, TR>
      */
     void initialize_HR_tmp(const Parallel_Orbitals* paraV);
     /// @brief init vector potential for td_nonlocal term
-    void init_td(void);
+    void init_td();
     /**
      * @brief calculate the non-local pseudopotential matrix with specific <I,J,R> atom-pairs
      * nearest neighbor atoms don't need to be calculated again
diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_nonlocalnew.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_nonlocalnew.cpp
index a2054a6fa2..beb5925915 100644
--- a/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_nonlocalnew.cpp
+++ b/source/module_hamilt_lcao/hamilt_lcaodft/operator_lcao/test/test_nonlocalnew.cpp
@@ -133,7 +133,7 @@ class NonlocalNewTest : public ::testing::Test
 TEST_F(NonlocalNewTest, constructHRd2d)
 {
     std::vector<ModuleBase::Vector3<double>> kvec_d_in(1, ModuleBase::Vector3<double>(0.0, 0.0, 0.0));
-    hamilt::HS_Matrix_K<double> hsk(paraV, 1);
+    hamilt::HS_Matrix_K<double> hsk(paraV, true);
     hsk.set_zero_hk();
     Grid_Driver gd(0, 0, 0);
     // check some input values