From 7acb202d4dbfdba9415be058cc4d716375291f15 Mon Sep 17 00:00:00 2001
From: jinzx10 <jzx016@hotmail.com>
Date: Fri, 12 Jul 2024 02:41:55 +0800
Subject: [PATCH 1/7] remove comm_2D from Parallel_2D

---
 source/module_base/blacs_connector.h          | 77 +++++++++++++++++++
 source/module_basis/module_ao/parallel_2d.cpp | 23 +-----
 source/module_basis/module_ao/parallel_2d.h   | 15 ++--
 .../module_ao/parallel_orbitals.cpp           |  1 -
 .../module_ao/test/parallel_2d_test.cpp       |  4 +-
 .../module_ao/test/parallel_orbitals_test.cpp |  2 +-
 source/module_esolver/esolver_ks_lcao.cpp     |  2 +-
 .../module_tddft/bandenergy.cpp               | 11 +--
 .../module_tddft/norm_psi.cpp                 |  8 +-
 .../module_tddft/propagator.cpp               |  9 +--
 .../module_tddft/test/bandenergy_test.cpp     |  4 -
 .../module_tddft/test/norm_psi_test.cpp       |  4 -
 .../module_tddft/test/propagator_test2.cpp    |  6 +-
 .../module_tddft/test/propagator_test3.cpp    |  6 +-
 .../module_hsolver/test/diago_pexsi_test.cpp  |  1 -
 source/module_io/io_dmk.cpp                   | 12 +--
 source/module_io/istate_envelope.cpp          | 41 ++++------
 source/module_io/read_wfc_lcao.cpp            |  6 +-
 source/module_io/read_wfc_nao.cpp             |  4 +-
 source/module_io/write_Vxc.hpp                |  2 +-
 source/module_io/write_dmr.cpp                |  4 +-
 source/module_io/write_wfc_nao.cpp            |  4 +-
 22 files changed, 133 insertions(+), 113 deletions(-)
diff --git a/source/module_base/blacs_connector.h b/source/module_base/blacs_connector.h
index 2d3baa4d5a..3bcc43811a 100644
--- a/source/module_base/blacs_connector.h
+++ b/source/module_base/blacs_connector.h
@@ -29,6 +29,8 @@
 #ifndef BLACS_CONNECTOR_H
 #define BLACS_CONNECTOR_H
 
+#include <complex>
+
 extern "C"
 {
 	void Cblacs_pinfo(int *myid, int *nprocs);
@@ -41,13 +43,88 @@ extern "C"
     int Cblacs_pnum(int icontxt, int prow, int pcol);
     void Cblacs_pcoord(int icontxt, int pnum, int *prow, int *pcol);
 	void Cblacs_exit(int icontxt);
+
+    // broadcast (send/recv)
+    void Cigebs2d(int ConTxt, char *scope, char *top, int m, int n, int *A, int lda);
+    void Cigebr2d(int ConTxt, char *scope, char *top, int m, int n, int *A, int lda, int rsrc, int csrc);
+
+    void Csgebs2d(int ConTxt, char *scope, char *top, int m, int n, float *A, int lda);
+    void Csgebr2d(int ConTxt, char *scope, char *top, int m, int n, float *A, int lda, int rsrc, int csrc);
+
+    void Cdgebs2d(int ConTxt, char *scope, char *top, int m, int n, double *A, int lda);
+    void Cdgebr2d(int ConTxt, char *scope, char *top, int m, int n, double *A, int lda, int rsrc, int csrc);
+
+    void Ccgebs2d(int ConTxt, char *scope, char *top, int m, int n, std::complex<float> *A, int lda);
+    void Ccgebr2d(int ConTxt, char *scope, char *top, int m, int n, std::complex<float> *A, int lda, int rsrc, int csrc);
+
+    void Czgebs2d(int ConTxt, char *scope, char *top, int m, int n, std::complex<double> *A, int lda);
+    void Czgebr2d(int ConTxt, char *scope, char *top, int m, int n, std::complex<double> *A, int lda, int rsrc, int csrc);
 }
 
+// unified interface for broadcast
+template <typename T>
+void Cxgebs2d(int ConTxt, char *scope, char *top, int m, int n, T *A, int lda)
+{
+    static_assert(
+        std::is_same<T, int>::value ||
+        std::is_same<T, float>::value ||
+        std::is_same<T, double>::value ||
+        std::is_same<T,std::complex<float>>::value ||
+        std::is_same<T,std::complex<double>>::value,
+        "Type not supported");
+
+	if (std::is_same<T, int>::value) {
+        Cigebs2d(ConTxt, scope, top, m, n, reinterpret_cast<int*>(A), lda);
+    }
+	if (std::is_same<T, float>::value) {
+        Csgebs2d(ConTxt, scope, top, m, n, reinterpret_cast<float*>(A), lda);
+    }
+	if (std::is_same<T, double>::value) {
+        Cdgebs2d(ConTxt, scope, top, m, n, reinterpret_cast<double*>(A), lda);
+    }
+	if (std::is_same<T, std::complex<float>>::value) {
+        Ccgebs2d(ConTxt, scope, top, m, n, reinterpret_cast<std::complex<float>*>(A), lda);
+    }
+	if (std::is_same<T, std::complex<double>>::value) {
+        Czgebs2d(ConTxt, scope, top, m, n, reinterpret_cast<std::complex<double>*>(A), lda);
+    }
+}
+
+template <typename T>
+void Cxgebr2d(int ConTxt, char *scope, char *top, int m, int n, T *A, int lda, int rsrc, int csrc)
+{
+    static_assert(
+        std::is_same<T, int>::value ||
+        std::is_same<T, float>::value ||
+        std::is_same<T, double>::value ||
+        std::is_same<T,std::complex<float>>::value ||
+        std::is_same<T,std::complex<double>>::value,
+        "Type not supported");
+
+	if (std::is_same<T, int>::value) {
+        Cigebr2d(ConTxt, scope, top, m, n, reinterpret_cast<int*>(A), lda, rsrc, csrc);
+    }
+	if (std::is_same<T, float>::value) {
+        Csgebr2d(ConTxt, scope, top, m, n, reinterpret_cast<float*>(A), lda, rsrc, csrc);
+    }
+	if (std::is_same<T, double>::value) {
+        Cdgebr2d(ConTxt, scope, top, m, n, reinterpret_cast<double*>(A), lda, rsrc, csrc);
+    }
+	if (std::is_same<T, std::complex<float>>::value) {
+        Ccgebr2d(ConTxt, scope, top, m, n, reinterpret_cast<std::complex<float>*>(A), lda, rsrc, csrc);
+    }
+	if (std::is_same<T, std::complex<double>>::value) {
+        Czgebr2d(ConTxt, scope, top, m, n, reinterpret_cast<std::complex<double>*>(A), lda, rsrc, csrc);
+    }
+}
+
+
 #ifdef __MPI
 #include <mpi.h>
 extern "C"
 {
     int Csys2blacs_handle(MPI_Comm SysCtxt);
+    MPI_Comm Cblacs2sys_handle(int BlacsCtxt);
 }
 #endif // __MPI
 
diff --git a/source/module_basis/module_ao/parallel_2d.cpp b/source/module_basis/module_ao/parallel_2d.cpp
index 3a225eec86..bc46b9d3c2 100644
--- a/source/module_basis/module_ao/parallel_2d.cpp
+++ b/source/module_basis/module_ao/parallel_2d.cpp
@@ -1,6 +1,5 @@
 #include "parallel_2d.h"
 
-#include "module_base/blacs_connector.h"
 #include "module_base/scalapack_connector.h"
 
 #include <cassert>
@@ -47,23 +46,11 @@ void Parallel_2D::_init_proc_grid(const MPI_Comm comm, const bool mode)
         std::swap(dim0, dim1);
     }
 
-    // create a 2D Cartesian MPI communicator (row-major by default)
-    int period[2] = {1, 1};
-    int dim[2] = {dim0, dim1};
-    const int reorder = 0;
-    MPI_Cart_create(comm, 2, dim, period, reorder, &comm_2D);
-    MPI_Cart_get(comm_2D, 2, dim, period, coord);
-
     // initialize the BLACS grid accordingly
-    blacs_ctxt = Csys2blacs_handle(comm_2D);
+    blacs_ctxt = Csys2blacs_handle(comm);
     char order = 'R'; // row-major
     Cblacs_gridinit(&blacs_ctxt, &order, dim0, dim1);
-
-    // TODO Currently MPI and BLACS are made to have the same Cartesian grid.
-    // In theory, however, BLACS would split any given communicator to create
-    // new ones for its own purpose when initializing the process grid, so it
-    // might be unnecessary to create an MPI communicator with Cartesian topology.
-    // ***This needs to be verified***
+    Cblacs_gridinfo(blacs_ctxt, &dim0, &dim1, &coord[0], &coord[1]);
 }
 
 void Parallel_2D::_set_dist_info(const int mg, const int ng, const int nb)
@@ -105,9 +92,8 @@ int Parallel_2D::init(const int mg, const int ng, const int nb, const MPI_Comm c
     return nrow == 0 || ncol == 0;
 }
 
-int Parallel_2D::set(const int mg, const int ng, const int nb, const MPI_Comm comm_2D, const int blacs_ctxt)
+int Parallel_2D::set(const int mg, const int ng, const int nb, const int blacs_ctxt)
 {
-    this->comm_2D = comm_2D;
     this->blacs_ctxt = blacs_ctxt;
     Cblacs_gridinfo(blacs_ctxt, &dim0, &dim1, &coord[0], &coord[1]);
     _set_dist_info(mg, ng, nb);
@@ -124,7 +110,7 @@ void Parallel_2D::set_serial(const int mg, const int ng)
     coord[0] = coord[1] = 0;
     nrow = mg;
     ncol = ng;
-    nloc = nrow * ncol;
+    nloc = static_cast<int64_t>(nrow) * ncol;
     local2global_row_.resize(nrow);
     local2global_col_.resize(ncol);
     std::iota(local2global_row_.begin(), local2global_row_.end(), 0);
@@ -132,7 +118,6 @@ void Parallel_2D::set_serial(const int mg, const int ng)
     global2local_row_ = local2global_row_;
     global2local_col_ = local2global_col_;
 #ifdef __MPI
-    comm_2D = MPI_COMM_NULL;
     blacs_ctxt = -1;
 #endif
 }
diff --git a/source/module_basis/module_ao/parallel_2d.h b/source/module_basis/module_ao/parallel_2d.h
index 50e62c0804..f49caefd29 100644
--- a/source/module_basis/module_ao/parallel_2d.h
+++ b/source/module_basis/module_ao/parallel_2d.h
@@ -4,9 +4,7 @@
 #include <cstdint>
 #include <vector>
 
-#ifdef __MPI
-#include <mpi.h>
-#endif
+#include "module_base/blacs_connector.h"
 
 /// @brief  This class packs the basic information of
 /// 2D-block-cyclic parallel distribution of an arbitrary matrix.
@@ -87,13 +85,12 @@ class Parallel_2D
 
     /**
      * @brief Set up the info of a block-cyclic distribution using given
-     * MPI communicator and BLACS context.
+     * BLACS context.
      *
      */
     int set(const int mg,
             const int ng,
             const int nb, // square block is assumed
-            const MPI_Comm comm_2D,
             const int blacs_ctxt);
 
     /// BLACS context
@@ -102,8 +99,7 @@ class Parallel_2D
     /// ScaLAPACK descriptor
     int desc[9] = {};
 
-    /// 2D Cartesian MPI communicator
-    MPI_Comm comm_2D = MPI_COMM_NULL;
+    MPI_Comm comm() const { return Cblacs2sys_handle(blacs_ctxt); }
 #endif
 
     void set_serial(const int mg, const int ng);
@@ -116,6 +112,9 @@ class Parallel_2D
     int nrow = 0;
     int ncol = 0;
     int64_t nloc = 0;
+    // NOTE: ScaLAPACK descriptors use int type for the number of rows and columns of
+    // both the global and local matrices, so nrow & ncol have to be int type. Their
+    // product, however, can exceed the range of int type.
 
     /// block size
     int nb = 1;
@@ -124,7 +123,7 @@ class Parallel_2D
     int dim0 = 0;
     int dim1 = 0;
 
-    /// process coordinate in the MPI Cartesian grid
+    /// process coordinate in the BLACS grid
     int coord[2] = {-1, -1};
 
   protected:
diff --git a/source/module_basis/module_ao/parallel_orbitals.cpp b/source/module_basis/module_ao/parallel_orbitals.cpp
index 98c76a7065..b4d7868e0f 100644
--- a/source/module_basis/module_ao/parallel_orbitals.cpp
+++ b/source/module_basis/module_ao/parallel_orbitals.cpp
@@ -209,7 +209,6 @@ void Parallel_Orbitals::set_desc_wfc_Eij(const int& nbasis, const int& nbands, c
 {
     ModuleBase::TITLE("Parallel_2D", "set_desc_wfc_Eij");
 #ifdef __DEBUG
-    assert(this->comm_2D != MPI_COMM_NULL);
     assert(nbasis > 0 && nbands > 0 && lld > 0);
     assert(this->nb > 0 && this->dim0 > 0 && this->dim1 > 0);
 #endif
diff --git a/source/module_basis/module_ao/test/parallel_2d_test.cpp b/source/module_basis/module_ao/test/parallel_2d_test.cpp
index 206231398a..715a11115a 100644
--- a/source/module_basis/module_ao/test/parallel_2d_test.cpp
+++ b/source/module_basis/module_ao/test/parallel_2d_test.cpp
@@ -67,7 +67,7 @@ TEST_F(test_para2d, Divide2D)
                     EXPECT_LE(p2d.dim0, p2d.dim1);
 
                 // 2. MPI 2d communicator
-                EXPECT_NE(p2d.comm_2D, MPI_COMM_NULL);
+                //EXPECT_NE(p2d.comm_2D, MPI_COMM_NULL);
 
                 // 3. local2global and local sizes
                 int lr = p2d.get_row_size();
@@ -124,7 +124,7 @@ TEST_F(test_para2d, DescReuseCtxt)
         p1.init(sizes[0].first, sizes[0].second, nb, MPI_COMM_WORLD);
 
         Parallel_2D p2; // use 2 different sizes, but they can share the same ctxt
-        p2.set(sizes[1].first, sizes[1].second, nb, p1.comm_2D, p1.blacs_ctxt);
+        p2.set(sizes[1].first, sizes[1].second, nb, p1.blacs_ctxt);
 
         EXPECT_EQ(p1.desc[1], p2.desc[1]);
 
diff --git a/source/module_basis/module_ao/test/parallel_orbitals_test.cpp b/source/module_basis/module_ao/test/parallel_orbitals_test.cpp
index 06bd6f010b..fe09d9fca6 100644
--- a/source/module_basis/module_ao/test/parallel_orbitals_test.cpp
+++ b/source/module_basis/module_ao/test/parallel_orbitals_test.cpp
@@ -60,7 +60,7 @@ TEST_F(TestParaO, Divide2D)
                 else EXPECT_LE(po.dim0, po.dim1);
 
                 //2. comm_2D
-                EXPECT_NE(po.comm_2D, MPI_COMM_NULL);
+                //EXPECT_NE(po.comm_2D, MPI_COMM_NULL);
 
                 //3. local2global and local sizes
                 int lr = po.get_row_size();
diff --git a/source/module_esolver/esolver_ks_lcao.cpp b/source/module_esolver/esolver_ks_lcao.cpp
index 601b77030b..de65c1edb7 100644
--- a/source/module_esolver/esolver_ks_lcao.cpp
+++ b/source/module_esolver/esolver_ks_lcao.cpp
@@ -573,7 +573,7 @@ void ESolver_KS_LCAO<TK, TR>::init_basis_lcao(Input& inp, UnitCell& ucell)
     try_nb += ParaV.set_nloc_wfc_Eij(GlobalV::NBANDS, GlobalV::ofs_running, GlobalV::ofs_warning);
     if (try_nb != 0)
     {
-        ParaV.set(GlobalV::NLOCAL, GlobalV::NLOCAL, 1, ParaV.comm_2D, ParaV.blacs_ctxt);
+        ParaV.set(GlobalV::NLOCAL, GlobalV::NLOCAL, 1, ParaV.blacs_ctxt);
         try_nb = ParaV.set_nloc_wfc_Eij(GlobalV::NBANDS, GlobalV::ofs_running, GlobalV::ofs_warning);
     }
 
diff --git a/source/module_hamilt_lcao/module_tddft/bandenergy.cpp b/source/module_hamilt_lcao/module_tddft/bandenergy.cpp
index b1ead443ba..37212a6606 100644
--- a/source/module_hamilt_lcao/module_tddft/bandenergy.cpp
+++ b/source/module_hamilt_lcao/module_tddft/bandenergy.cpp
@@ -103,9 +103,7 @@ void compute_ekb(const Parallel_Orbitals* pv,
     }
 
     int info;
-    int myid;
     int naroc[2];
-    MPI_Comm_rank(pv->comm_2D, &myid);
 
     double* Eii = new double[nband];
     ModuleBase::GlobalFunc::ZEROS(Eii, nband);
@@ -113,10 +111,7 @@ void compute_ekb(const Parallel_Orbitals* pv,
     {
         for (int ipcol = 0; ipcol < pv->dim1; ++ipcol)
         {
-            const int coord[2] = {iprow, ipcol};
-            int src_rank;
-            info = MPI_Cart_rank(pv->comm_2D, coord, &src_rank);
-            if (myid == src_rank)
+            if (iprow == pv->coord[0] && ipcol == pv->coord[1])
             {
                 naroc[0] = pv->nrow;
                 naroc[1] = pv->ncol;
@@ -139,7 +134,7 @@ void compute_ekb(const Parallel_Orbitals* pv,
             }
         } // loop ipcol
     }     // loop iprow
-    info = MPI_Allreduce(Eii, ekb, nband, MPI_DOUBLE, MPI_SUM, pv->comm_2D);
+    info = MPI_Allreduce(Eii, ekb, nband, MPI_DOUBLE, MPI_SUM, pv->comm());
 
     delete[] tmp1;
     delete[] Eij;
@@ -148,4 +143,4 @@ void compute_ekb(const Parallel_Orbitals* pv,
 
 #endif
 
-} // namespace module_tddft
\ No newline at end of file
+} // namespace module_tddft
diff --git a/source/module_hamilt_lcao/module_tddft/norm_psi.cpp b/source/module_hamilt_lcao/module_tddft/norm_psi.cpp
index a9e960fce9..9d708e13c7 100644
--- a/source/module_hamilt_lcao/module_tddft/norm_psi.cpp
+++ b/source/module_hamilt_lcao/module_tddft/norm_psi.cpp
@@ -92,19 +92,13 @@ void norm_psi(const Parallel_Orbitals* pv,
         GlobalV::ofs_running << std::endl;
     }
 
-    int info;
-    int myid;
-    MPI_Comm_rank(pv->comm_2D, &myid);
     int naroc[2]; // maximum number of row or column
 
     for (int iprow = 0; iprow < pv->dim0; ++iprow)
     {
         for (int ipcol = 0; ipcol < pv->dim1; ++ipcol)
         {
-            const int coord[2] = {iprow, ipcol};
-            int src_rank;
-            info = MPI_Cart_rank(pv->comm_2D, coord, &src_rank);
-            if (myid == src_rank)
+            if (iprow == pv->coord[0] && ipcol == pv->coord[1])
             {
                 naroc[0] = pv->nrow;
                 naroc[1] = pv->ncol;
diff --git a/source/module_hamilt_lcao/module_tddft/propagator.cpp b/source/module_hamilt_lcao/module_tddft/propagator.cpp
index 44f102f7c5..57601c4aaa 100644
--- a/source/module_hamilt_lcao/module_tddft/propagator.cpp
+++ b/source/module_hamilt_lcao/module_tddft/propagator.cpp
@@ -312,18 +312,13 @@ void Propagator::compute_propagator_taylor(const int nlocal,
 
     // set rank0
     int info;
-    int myid;
-    MPI_Comm_rank(this->ParaV->comm_2D, &myid);
     int naroc[2]; // maximum number of row or column
 
     for (int iprow = 0; iprow < this->ParaV->dim0; ++iprow)
     {
         for (int ipcol = 0; ipcol < this->ParaV->dim1; ++ipcol)
         {
-            const int coord[2] = {iprow, ipcol};
-            int src_rank;
-            info = MPI_Cart_rank(this->ParaV->comm_2D, coord, &src_rank);
-            if (myid == src_rank)
+            if (iprow == ParaV->coord[0] && ipcol == ParaV->coord[1])
             {
                 naroc[0] = this->ParaV->nrow;
                 naroc[1] = this->ParaV->ncol;
@@ -611,4 +606,4 @@ void Propagator::compute_propagator_etrs(const int nlocal,
 }
 
 #endif
-} // namespace module_tddft
\ No newline at end of file
+} // namespace module_tddft
diff --git a/source/module_hamilt_lcao/module_tddft/test/bandenergy_test.cpp b/source/module_hamilt_lcao/module_tddft/test/bandenergy_test.cpp
index 2c164f50d0..e89cbe91c5 100644
--- a/source/module_hamilt_lcao/module_tddft/test/bandenergy_test.cpp
+++ b/source/module_hamilt_lcao/module_tddft/test/bandenergy_test.cpp
@@ -47,13 +47,9 @@ TEST(BandEnergyTest, testBandEnergy)
     pv->nb = 1;
 
     int dim[2];
-    int period[2] = {1, 1};
-    int reorder = 0;
     dim[0] = nprow;
     dim[1] = npcol;
 
-    MPI_Cart_create(MPI_COMM_WORLD, 2, dim, period, reorder, &pv->comm_2D);
-
     // Initialize input matrices
     int info;
     int mb = 1, nb = 1, lda = nband, ldc = nlocal;
diff --git a/source/module_hamilt_lcao/module_tddft/test/norm_psi_test.cpp b/source/module_hamilt_lcao/module_tddft/test/norm_psi_test.cpp
index d92817af90..d5a9ebe175 100644
--- a/source/module_hamilt_lcao/module_tddft/test/norm_psi_test.cpp
+++ b/source/module_hamilt_lcao/module_tddft/test/norm_psi_test.cpp
@@ -44,13 +44,9 @@ TEST(NormPsiTest, testNormPsi)
     pv->nb = 1;
 
     int dim[2];
-    int period[2] = {1, 1};
-    int reorder = 0;
     dim[0] = nprow;
     dim[1] = npcol;
 
-    MPI_Cart_create(MPI_COMM_WORLD, 2, dim, period, reorder, &pv->comm_2D);
-
     // Initialize input matrices
     int info;
     int mb = 1, nb = 1, lda = nband, ldc = nlocal;
diff --git a/source/module_hamilt_lcao/module_tddft/test/propagator_test2.cpp b/source/module_hamilt_lcao/module_tddft/test/propagator_test2.cpp
index e015e17f45..cce7e5d4aa 100644
--- a/source/module_hamilt_lcao/module_tddft/test/propagator_test2.cpp
+++ b/source/module_hamilt_lcao/module_tddft/test/propagator_test2.cpp
@@ -36,13 +36,9 @@ TEST(PropagatorTest, testPropagatorTaylor)
     pv->nb = 1;
 
     int dim[2];
-    int period[2] = {1, 1};
-    int reorder = 0;
     dim[0] = nprow;
     dim[1] = npcol;
 
-    MPI_Cart_create(MPI_COMM_WORLD, 2, dim, period, reorder, &pv->comm_2D);
-
     INPUT.mdp.md_dt = 4;
 
     // Initialize input matrices
@@ -116,4 +112,4 @@ TEST(PropagatorTest, testPropagatorTaylor)
     delete[] U_operator;
     delete[] Htmp;
     delete[] Stmp;
-}
\ No newline at end of file
+}
diff --git a/source/module_hamilt_lcao/module_tddft/test/propagator_test3.cpp b/source/module_hamilt_lcao/module_tddft/test/propagator_test3.cpp
index 4023b518fe..0926fc0018 100644
--- a/source/module_hamilt_lcao/module_tddft/test/propagator_test3.cpp
+++ b/source/module_hamilt_lcao/module_tddft/test/propagator_test3.cpp
@@ -37,13 +37,9 @@ TEST(PropagatorTest, testPropagatorETRS)
     pv->nb = 1;
 
     int dim[2];
-    int period[2] = {1, 1};
-    int reorder = 0;
     dim[0] = nprow;
     dim[1] = npcol;
 
-    MPI_Cart_create(MPI_COMM_WORLD, 2, dim, period, reorder, &pv->comm_2D);
-
     INPUT.mdp.md_dt = 4;
 
     // Initialize input matrices
@@ -121,4 +117,4 @@ TEST(PropagatorTest, testPropagatorETRS)
     delete[] Htmp;
     delete[] Stmp;
     delete[] Hlaststep;
-}
\ No newline at end of file
+}
diff --git a/source/module_hsolver/test/diago_pexsi_test.cpp b/source/module_hsolver/test/diago_pexsi_test.cpp
index 32468be546..693e84f9bd 100644
--- a/source/module_hsolver/test/diago_pexsi_test.cpp
+++ b/source/module_hsolver/test/diago_pexsi_test.cpp
@@ -144,7 +144,6 @@ class PexsiPrepare
         po.nrow = hmtest.nrow;
         po.nb = nb2d;
         po.blacs_ctxt = icontxt;
-        po.comm_2D = MPI_COMM_WORLD;
         po.dim0 = nprows;
         po.dim1 = npcols;
 
diff --git a/source/module_io/io_dmk.cpp b/source/module_io/io_dmk.cpp
index 23776ac3d4..b40128a929 100644
--- a/source/module_io/io_dmk.cpp
+++ b/source/module_io/io_dmk.cpp
@@ -133,7 +133,7 @@ bool ModuleIO::read_dmk(const int nspin,
 
     int my_rank = 0;
 #ifdef __MPI
-    MPI_Comm_rank(pv.comm_2D, &my_rank);
+    MPI_Comm_rank(pv.comm(), &my_rank);
 #endif
 
     int nlocal = pv.get_global_row_size();
@@ -216,7 +216,7 @@ bool ModuleIO::read_dmk(const int nspin,
     }     // rank0
 
 #ifdef __MPI
-    MPI_Bcast(&read_success, 1, MPI_C_BOOL, 0, pv.comm_2D);
+    MPI_Bcast(&read_success, 1, MPI_C_BOOL, 0, pv.comm());
 #endif
 
     if (read_success) {
@@ -225,7 +225,7 @@ bool ModuleIO::read_dmk(const int nspin,
         dmk.resize(nspin * nk,
                    std::vector<T>(pv.get_row_size() * pv.get_col_size()));
         Parallel_2D pv_glb;
-        pv_glb.set(nlocal, nlocal, nlocal, pv.comm_2D, pv.blacs_ctxt);
+        pv_glb.set(nlocal, nlocal, nlocal, pv.blacs_ctxt);
         for (int ik = 0; ik < nspin * nk; ik++) {
             Cpxgemr2d(nlocal,
                       nlocal,
@@ -258,7 +258,7 @@ void ModuleIO::write_dmk(const std::vector<std::vector<T>>& dmk,
 
     int my_rank = 0;
 #ifdef __MPI
-    MPI_Comm_rank(pv.comm_2D, &my_rank);
+    MPI_Comm_rank(pv.comm(), &my_rank);
 #endif
 
     bool gamma_only = std::is_same<double, T>::value;
@@ -279,7 +279,7 @@ void ModuleIO::write_dmk(const std::vector<std::vector<T>>& dmk,
             // gather dmk[ik] to dmk_global
             std::vector<T> dmk_global(my_rank == 0 ? nlocal * nlocal : 0);
 #ifdef __MPI
-            pv_glb.set(nlocal, nlocal, nlocal, pv.comm_2D, pv.blacs_ctxt);
+            pv_glb.set(nlocal, nlocal, nlocal, pv.blacs_ctxt);
             Cpxgemr2d(nlocal,
                       nlocal,
                       const_cast<T*>(dmk[ik + nk * ispin].data()),
@@ -365,4 +365,4 @@ template void ModuleIO::write_dmk<std::complex<double>>(
     const int precision,
     const std::vector<double>& efs,
     const UnitCell* ucell,
-    const Parallel_2D& pv);
\ No newline at end of file
+    const Parallel_2D& pv);
diff --git a/source/module_io/istate_envelope.cpp b/source/module_io/istate_envelope.cpp
index 801b8e4db0..4081e60345 100644
--- a/source/module_io/istate_envelope.cpp
+++ b/source/module_io/istate_envelope.cpp
@@ -584,49 +584,42 @@ void IState_Envelope::wfc_2d_to_grid(const T* lowf_2d,
     // MPI and memory related
     const int mem_stride = 1;
     int mpi_info = 0;
-    auto mpi_dtype = std::is_same<T, double>::value ? MPI_DOUBLE : MPI_DOUBLE_COMPLEX;
 
     // get the rank of the current process
     int rank = 0;
-    MPI_Comm_rank(pv.comm_2D, &rank);
+    MPI_Comm_rank(pv.comm(), &rank);
 
-    // calculate the maximum number of nlocal over all processes in pv.comm_2D range
+    // calculate the maximum number of nlocal over all processes in pv.comm() range
     long buf_size;
-    mpi_info = MPI_Reduce(&pv.nloc_wfc, &buf_size, 1, MPI_LONG, MPI_MAX, 0, pv.comm_2D);
-    mpi_info = MPI_Bcast(&buf_size, 1, MPI_LONG, 0, pv.comm_2D); // get and then broadcast
+    mpi_info = MPI_Reduce(&pv.nloc_wfc, &buf_size, 1, MPI_LONG, MPI_MAX, 0, pv.comm());
+    mpi_info = MPI_Bcast(&buf_size, 1, MPI_LONG, 0, pv.comm()); // get and then broadcast
     std::vector<T> lowf_block(buf_size);
 
     // this quantity seems to have the value returned by function numroc_ in ScaLAPACK?
     int naroc[2];
 
+    // for BLACS broadcast
+    char scope = 'A';
+    char top = ' ';
+
     // loop over all processors
     for (int iprow = 0; iprow < pv.dim0; ++iprow)
     {
         for (int ipcol = 0; ipcol < pv.dim1; ++ipcol)
         {
-            // get the rank of the processor at the given coordinate
-            int rank_at_coord;
-            const int mpi_cart_coord[2] = {iprow, ipcol};
-            mpi_info = MPI_Cart_rank(pv.comm_2D, mpi_cart_coord, &rank_at_coord); // get the MPI rank
-
-            // keep in mind present function is concurrently called by all processors, thus
-            // the following code block will only be executed once for each processor, which means
-            // for each processor, get its MPI rank and MPI coord, then assign the naroc[0] and naroc[1]
-            // with the value which should have been calculated automatically by ScaLAPACK function
-            // numroc_.
-            if (rank == rank_at_coord)
+            if (iprow == pv.coord[0] && ipcol == pv.coord[1])
             {
                 BlasConnector::copy(pv.nloc_wfc, lowf_2d, mem_stride, lowf_block.data(), mem_stride);
                 naroc[0] = pv.nrow;
                 naroc[1] = pv.ncol_bands;
+                Cxgebs2d(pv.blacs_ctxt, &scope, &top, 2, 1, naroc, 2);
+                Cxgebs2d(pv.blacs_ctxt, &scope, &top, buf_size, 1, lowf_block.data(), buf_size);
+            }
+            else
+            {
+                Cxgebr2d(pv.blacs_ctxt, &scope, &top, 2, 1, naroc, 2, iprow, ipcol);
+                Cxgebr2d(pv.blacs_ctxt, &scope, &top, buf_size, 1, lowf_block.data(), buf_size, iprow, ipcol);
             }
-
-            // broadcast the number of row and column
-            mpi_info = MPI_Bcast(naroc, 2, MPI_INT, rank_at_coord, pv.comm_2D);
-
-            // broadcast the data, this means the data owned by one processor is broadcast
-            // to all other processors in the communicator.
-            mpi_info = MPI_Bcast(lowf_block.data(), buf_size, mpi_dtype, rank_at_coord, pv.comm_2D);
 
             // then use it to set the wfc_grid.
             mpi_info = this->set_wfc_grid(naroc,
@@ -666,4 +659,4 @@ int IState_Envelope::localIndex(int globalindex, int nblk, int nprocs, int& mypr
 {
     myproc = int((globalindex % (nblk * nprocs)) / nblk);
     return int(globalindex / (nblk * nprocs)) * nblk + globalindex % nblk;
-}
\ No newline at end of file
+}
diff --git a/source/module_io/read_wfc_lcao.cpp b/source/module_io/read_wfc_lcao.cpp
index 20766203de..c7652a4fd5 100644
--- a/source/module_io/read_wfc_lcao.cpp
+++ b/source/module_io/read_wfc_lcao.cpp
@@ -258,7 +258,7 @@ void ModuleIO::restart_from_file(const std::string& out_dir, // hard-code the fi
     const std::string flowf_prefix = gamma_only ? "WFC_GAMMA" : "WFC_NAO_K";
     // MPI-related variables init
     int iproc;
-    MPI_Comm_rank(p2d.comm_2D, &iproc);
+    MPI_Comm_rank(p2d.comm(), &iproc);
     // then start
     int nbands_ = -1, nbasis_ = -1;
     for (int ik = 0; ik < nks; ik++)
@@ -289,12 +289,12 @@ void ModuleIO::restart_from_file(const std::string& out_dir, // hard-code the fi
             wk.push_back(wk_);
             kvec_c.push_back(kvec);
         }
-        MPI_Barrier(p2d.comm_2D); // wait for finishing the reading task
+        MPI_Barrier(p2d.comm()); // wait for finishing the reading task
         // scatter the lowf_glb to lowf_loc
         Parallel_2D p2d_glb;
         Parallel_Common::bcast_int(nbands);
         Parallel_Common::bcast_int(nbasis);
-        p2d_glb.init(nbasis, nbands, std::max(nbasis, nbands), p2d.comm_2D); // in the same comm world
+        p2d_glb.init(nbasis, nbands, std::max(nbasis, nbands), p2d.comm()); // in the same comm world
         lowf_loc_k.resize(p2d.nrow * p2d.ncol);
         Cpxgemr2d(nbasis,
                   nbands,
diff --git a/source/module_io/read_wfc_nao.cpp b/source/module_io/read_wfc_nao.cpp
index f399713f6c..6f2edc0f21 100644
--- a/source/module_io/read_wfc_nao.cpp
+++ b/source/module_io/read_wfc_nao.cpp
@@ -46,7 +46,7 @@ bool ModuleIO::read_wfc_nao(
     psid.resize(nk, nbands_local, nlocal_local);
 
 #ifdef __MPI
-    MPI_Comm_rank(ParaV.comm_2D, &myrank);
+    MPI_Comm_rank(ParaV.comm(), &myrank);
 #endif   
 
     // lambda function to read one file
@@ -144,7 +144,7 @@ bool ModuleIO::read_wfc_nao(
         psid.fix_k(ik);
 #ifdef __MPI
         Parallel_2D pv_glb;
-        pv_glb.set(nlocal, nbands, std::max(nlocal, nbands), ParaV.comm_2D, ParaV.blacs_ctxt);
+        pv_glb.set(nlocal, nbands, std::max(nlocal, nbands), ParaV.blacs_ctxt);
         Cpxgemr2d(nlocal,
                   nbands,
                   ctot.data(),
diff --git a/source/module_io/write_Vxc.hpp b/source/module_io/write_Vxc.hpp
index 1a530c8dfd..d690d53909 100644
--- a/source/module_io/write_Vxc.hpp
+++ b/source/module_io/write_Vxc.hpp
@@ -40,7 +40,7 @@ void set_para2d_MO(const Parallel_Orbitals& pv, const int nbands, Parallel_2D& p
 {
     std::ofstream ofs;
 #ifdef __MPI
-    p2d.set(nbands, nbands, pv.nb, pv.comm_2D, pv.blacs_ctxt);
+    p2d.set(nbands, nbands, pv.nb, pv.blacs_ctxt);
 #else
     p2d.set_serial(nbands, nbands);
 #endif
diff --git a/source/module_io/write_dmr.cpp b/source/module_io/write_dmr.cpp
index c4b764dc5f..bfd954a493 100644
--- a/source/module_io/write_dmr.cpp
+++ b/source/module_io/write_dmr.cpp
@@ -70,7 +70,7 @@ void write_dmr(const std::vector<hamilt::HContainer<double>*> dmr,
             // gather the parallel matrix to serial matrix
 #ifdef __MPI
             Parallel_Orbitals serialV;
-            serialV.init(nbasis, nbasis, nbasis, paraV.comm_2D);
+            serialV.init(nbasis, nbasis, nbasis, paraV.comm());
             serialV.set_serial(nbasis, nbasis);
             serialV.set_atomic_trace(GlobalC::ucell.get_iat2iwt(), GlobalC::ucell.nat, nbasis);
             hamilt::HContainer<double> dm_serial(&serialV);
@@ -91,4 +91,4 @@ void write_dmr(const std::vector<hamilt::HContainer<double>*> dmr,
     }
 }
 
-} // namespace ModuleIO
\ No newline at end of file
+} // namespace ModuleIO
diff --git a/source/module_io/write_wfc_nao.cpp b/source/module_io/write_wfc_nao.cpp
index d342bad39c..f342f9b237 100644
--- a/source/module_io/write_wfc_nao.cpp
+++ b/source/module_io/write_wfc_nao.cpp
@@ -222,7 +222,7 @@ void write_wfc_nao(const int out_type,
     // If using MPI, the nbasis and nbands in psi is the value on local rank, 
     // so get nlocal and nbands from pv->desc_wfc[2] and pv->desc_wfc[3]
 #ifdef __MPI
-    MPI_Comm_rank(pv.comm_2D, &myid);
+    MPI_Comm_rank(pv.comm(), &myid);
     nlocal = pv.desc_wfc[2];
     nbands = pv.desc_wfc[3];
 #else
@@ -241,7 +241,7 @@ void write_wfc_nao(const int out_type,
     {
         psi.fix_k(ik);
 #ifdef __MPI        
-        pv_glb.set(nlocal, nbands, blk_glb, pv.comm_2D, pv.blacs_ctxt);   
+        pv_glb.set(nlocal, nbands, blk_glb, pv.blacs_ctxt);   
         Cpxgemr2d(nlocal,
                   nbands,
                   psi.get_pointer(),

From 9cd1697fb4cf9a9c9ee671b92986ab374faaf276 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci-lite[bot]"
 <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com>
Date: Thu, 11 Jul 2024 19:24:51 +0000
Subject: [PATCH 2/7] [pre-commit.ci lite] apply automatic fixes

---
 .../module_ao/test/parallel_2d_test.cpp        | 17 +++++++++++------
 .../module_tddft/propagator.cpp                | 18 ++++++++++++------
 source/module_io/read_wfc_lcao.cpp             | 12 ++++++++----
 3 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/source/module_basis/module_ao/test/parallel_2d_test.cpp b/source/module_basis/module_ao/test/parallel_2d_test.cpp
index 715a11115a..5073571bcc 100644
--- a/source/module_basis/module_ao/test/parallel_2d_test.cpp
+++ b/source/module_basis/module_ao/test/parallel_2d_test.cpp
@@ -61,10 +61,11 @@ TEST_F(test_para2d, Divide2D)
 
                 // 1. dim0 and dim1
                 EXPECT_EQ(p2d.dim0 * p2d.dim1, dsize);
-                if (mode)
+                if (mode) {
                     EXPECT_LE(p2d.dim1, p2d.dim0);
-                else
+                } else {
                     EXPECT_LE(p2d.dim0, p2d.dim1);
+}
 
                 // 2. MPI 2d communicator
                 //EXPECT_NE(p2d.comm_2D, MPI_COMM_NULL);
@@ -96,18 +97,22 @@ TEST_F(test_para2d, Divide2D)
                 auto sum_array = [&p2d](const int& gr, const int& gc) -> std::pair<int, int> {
                     int sum_row = 0;
                     int sum_col = 0;
-                    for (int i = 0; i < gr; ++i)
+                    for (int i = 0; i < gr; ++i) {
                         sum_row += p2d.global2local_row(i);
-                    for (int i = 0; i < gc; ++i)
+}
+                    for (int i = 0; i < gc; ++i) {
                         sum_col += p2d.global2local_col(i);
+}
                     return {sum_row, sum_col};
                 };
                 std::pair<int, int> sumrc = sum_array(gr, gc);
                 EXPECT_EQ(std::get<0>(sumrc), lr * (lr - 1) / 2 - (gr - lr));
                 EXPECT_EQ(std::get<1>(sumrc), lc * (lc - 1) / 2 - (gc - lc));
-                for (int i = 0; i < lr; ++i)
-                    for (int j = 0; j < lc; ++j)
+                for (int i = 0; i < lr; ++i) {
+                    for (int j = 0; j < lc; ++j) {
                         EXPECT_TRUE(p2d.in_this_processor(p2d.local2global_row(i), p2d.local2global_col(j)));
+}
+}
 
                 EXPECT_EQ(p2d.get_global_row_size(), gr);
                 EXPECT_EQ(p2d.get_global_col_size(), gc);
diff --git a/source/module_hamilt_lcao/module_tddft/propagator.cpp b/source/module_hamilt_lcao/module_tddft/propagator.cpp
index 57601c4aaa..8ac07aa2a6 100644
--- a/source/module_hamilt_lcao/module_tddft/propagator.cpp
+++ b/source/module_hamilt_lcao/module_tddft/propagator.cpp
@@ -241,10 +241,12 @@ void Propagator::compute_propagator_cn2(const int nlocal,
                 double aa, bb;
                 aa = U_operator[i * this->ParaV->ncol + j].real();
                 bb = U_operator[i * this->ParaV->ncol + j].imag();
-                if (std::abs(aa) < 1e-8)
+                if (std::abs(aa) < 1e-8) {
                     aa = 0.0;
-                if (std::abs(bb) < 1e-8)
+}
+                if (std::abs(bb) < 1e-8) {
                     bb = 0.0;
+}
                 GlobalV::ofs_running << aa << "+" << bb << "i ";
             }
             GlobalV::ofs_running << std::endl;
@@ -325,13 +327,15 @@ void Propagator::compute_propagator_taylor(const int nlocal,
                 for (int j = 0; j < naroc[1]; ++j)
                 {
                     int igcol = globalIndex(j, this->ParaV->nb, this->ParaV->dim1, ipcol);
-                    if (igcol >= nlocal)
+                    if (igcol >= nlocal) {
                         continue;
+}
                     for (int i = 0; i < naroc[0]; ++i)
                     {
                         int igrow = globalIndex(i, this->ParaV->nb, this->ParaV->dim0, iprow);
-                        if (igrow >= nlocal)
+                        if (igrow >= nlocal) {
                             continue;
+}
                         if (igcol == igrow)
                         {
                             rank0[j * naroc[0] + i] = {1.0, 0.0};
@@ -552,10 +556,12 @@ void Propagator::compute_propagator_taylor(const int nlocal,
                 double aa, bb;
                 aa = U_operator[i * this->ParaV->ncol + j].real();
                 bb = U_operator[i * this->ParaV->ncol + j].imag();
-                if (std::abs(aa) < 1e-8)
+                if (std::abs(aa) < 1e-8) {
                     aa = 0.0;
-                if (std::abs(bb) < 1e-8)
+}
+                if (std::abs(bb) < 1e-8) {
                     bb = 0.0;
+}
                 GlobalV::ofs_running << aa << "+" << bb << "i ";
             }
             GlobalV::ofs_running << std::endl;
diff --git a/source/module_io/read_wfc_lcao.cpp b/source/module_io/read_wfc_lcao.cpp
index c7652a4fd5..6767069e5a 100644
--- a/source/module_io/read_wfc_lcao.cpp
+++ b/source/module_io/read_wfc_lcao.cpp
@@ -25,8 +25,9 @@ void ModuleIO::read_abacus_lowf(const std::string& flowf,
 {
     // assert the T must be double or float
     std::ifstream ifs(flowf.c_str());
-    if (!ifs)
+    if (!ifs) {
         ModuleBase::WARNING_QUIT("read_abacus_lowf", "open file failed: " + flowf);
+}
     // will use line-by-line parse
     std::string line;
     bool read_kvec = false;
@@ -138,8 +139,9 @@ void ModuleIO::read_abacus_lowf(const std::string& flowf,
                                 double& wk)
 {
     std::ifstream ifs(flowf.c_str());
-    if (!ifs)
+    if (!ifs) {
         ModuleBase::WARNING_QUIT("read_abacus_lowf", "open file failed: " + flowf);
+}
     // will use line-by-line parse
     std::string line;
     bool read_kvec = false;
@@ -266,8 +268,9 @@ void ModuleIO::restart_from_file(const std::string& out_dir, // hard-code the fi
         // check existence of file
         const std::string flowf = out_dir + "/" + flowf_prefix + std::to_string(ik + 1) + ".txt";
         std::ifstream ifs(flowf);
-        if (!ifs)
+        if (!ifs) {
             ModuleBase::WARNING_QUIT("restart_from_file", "open file failed: " + flowf);
+}
 
         std::vector<T> lowf_glb;
         std::vector<T> lowf_loc_k;
@@ -400,8 +403,9 @@ void ModuleIO::restart_from_file(const std::string& out_dir, // hard-code the fi
         // check existence of file
         const std::string flowf = out_dir + "/" + flowf_prefix + std::to_string(ik + 1) + ".txt";
         const std::ifstream ifs(flowf);
-        if (!ifs)
+        if (!ifs) {
             ModuleBase::WARNING_QUIT("restart_from_file", "open file failed: " + flowf);
+}
 
         std::vector<T> lowf_;
         std::vector<double> ekb_;

From cecc8b3d6bc5aeadf36eda5c57191c3777c548ef Mon Sep 17 00:00:00 2001
From: jinzx10 <jzx016@hotmail.com>
Date: Fri, 12 Jul 2024 16:00:43 +0800
Subject: [PATCH 3/7] fix comm()

---
 source/module_basis/module_ao/parallel_2d.cpp          | 10 ++++++++++
 source/module_basis/module_ao/parallel_2d.h            |  2 +-
 .../module_tddft/test/CMakeLists.txt                   |  2 +-
 .../module_tddft/test/bandenergy_test.cpp              |  8 ++------
 .../module_tddft/test/norm_psi_test.cpp                |  2 ++
 .../module_tddft/test/propagator_test1.cpp             |  1 +
 .../module_tddft/test/propagator_test2.cpp             |  2 ++
 .../module_tddft/test/propagator_test3.cpp             |  2 ++
 8 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/source/module_basis/module_ao/parallel_2d.cpp b/source/module_basis/module_ao/parallel_2d.cpp
index bc46b9d3c2..3bd669ee55 100644
--- a/source/module_basis/module_ao/parallel_2d.cpp
+++ b/source/module_basis/module_ao/parallel_2d.cpp
@@ -29,6 +29,16 @@ int Parallel_2D::get_global_col_size() const
 }
 
 #ifdef __MPI
+MPI_Comm Parallel_2D::comm() const
+{
+    int sys_ctxt = -1;
+    Cblacs_get(blacs_ctxt, 10, &sys_ctxt);
+    // blacs_get with "what" = 10 takes a BLACS context and returns the index
+    // of the associated system context (MPI communicator) that can be used by
+    // blacs2sys_handle to get the MPI communicator.
+    return Cblacs2sys_handle(sys_ctxt);
+}
+
 void Parallel_2D::_init_proc_grid(const MPI_Comm comm, const bool mode)
 {
     // determine the number of rows and columns of the process grid
diff --git a/source/module_basis/module_ao/parallel_2d.h b/source/module_basis/module_ao/parallel_2d.h
index f49caefd29..4fa62385b4 100644
--- a/source/module_basis/module_ao/parallel_2d.h
+++ b/source/module_basis/module_ao/parallel_2d.h
@@ -99,7 +99,7 @@ class Parallel_2D
     /// ScaLAPACK descriptor
     int desc[9] = {};
 
-    MPI_Comm comm() const { return Cblacs2sys_handle(blacs_ctxt); }
+    MPI_Comm comm() const;
 #endif
 
     void set_serial(const int mg, const int ng);
diff --git a/source/module_hamilt_lcao/module_tddft/test/CMakeLists.txt b/source/module_hamilt_lcao/module_tddft/test/CMakeLists.txt
index d37a98d217..e7c7bb96e9 100644
--- a/source/module_hamilt_lcao/module_tddft/test/CMakeLists.txt
+++ b/source/module_hamilt_lcao/module_tddft/test/CMakeLists.txt
@@ -13,7 +13,7 @@ AddTest(
 AddTest(
   TARGET tddft_bandenergy_test
   LIBS ${math_libs} base device tddft_test_lib  
-  SOURCES bandenergy_test.cpp ../bandenergy.cpp 
+  SOURCES bandenergy_test.cpp ../bandenergy.cpp ../../../module_basis/module_ao/parallel_2d.cpp ../../../module_basis/module_ao/parallel_orbitals.cpp
 )
 
 AddTest(
diff --git a/source/module_hamilt_lcao/module_tddft/test/bandenergy_test.cpp b/source/module_hamilt_lcao/module_tddft/test/bandenergy_test.cpp
index e89cbe91c5..853b75be76 100644
--- a/source/module_hamilt_lcao/module_tddft/test/bandenergy_test.cpp
+++ b/source/module_hamilt_lcao/module_tddft/test/bandenergy_test.cpp
@@ -20,12 +20,6 @@
 
 #define doublethreshold 1e-8
 double module_tddft::Evolve_elec::td_print_eij = -1;
-Parallel_Orbitals::Parallel_Orbitals()
-{
-}
-Parallel_Orbitals::~Parallel_Orbitals()
-{
-}
 
 TEST(BandEnergyTest, testBandEnergy)
 {
@@ -45,6 +39,8 @@ TEST(BandEnergyTest, testBandEnergy)
     pv->dim0 = 1;
     pv->dim1 = 1;
     pv->nb = 1;
+    pv->blacs_ctxt = 0;
+    pv->coord[0] = pv->coord[1] = 0;
 
     int dim[2];
     dim[0] = nprow;
diff --git a/source/module_hamilt_lcao/module_tddft/test/norm_psi_test.cpp b/source/module_hamilt_lcao/module_tddft/test/norm_psi_test.cpp
index d5a9ebe175..6c78937803 100644
--- a/source/module_hamilt_lcao/module_tddft/test/norm_psi_test.cpp
+++ b/source/module_hamilt_lcao/module_tddft/test/norm_psi_test.cpp
@@ -42,6 +42,8 @@ TEST(NormPsiTest, testNormPsi)
     pv->dim0 = 1;
     pv->dim1 = 1;
     pv->nb = 1;
+    pv->blacs_ctxt = 0;
+    pv->coord[0] = pv->coord[1] = 0;
 
     int dim[2];
     dim[0] = nprow;
diff --git a/source/module_hamilt_lcao/module_tddft/test/propagator_test1.cpp b/source/module_hamilt_lcao/module_tddft/test/propagator_test1.cpp
index 415becbe70..c087612617 100644
--- a/source/module_hamilt_lcao/module_tddft/test/propagator_test1.cpp
+++ b/source/module_hamilt_lcao/module_tddft/test/propagator_test1.cpp
@@ -37,6 +37,7 @@ TEST(PropagatorTest, testPropagatorCN)
     pv = new Parallel_Orbitals();
     pv->nloc = nlocal * nlocal;
     pv->ncol = nlocal;
+    pv->coord[0] = pv->coord[1] = 0;
     INPUT.mdp.md_dt = 4;
 
     // Initialize input matrices
diff --git a/source/module_hamilt_lcao/module_tddft/test/propagator_test2.cpp b/source/module_hamilt_lcao/module_tddft/test/propagator_test2.cpp
index cce7e5d4aa..60bee490f7 100644
--- a/source/module_hamilt_lcao/module_tddft/test/propagator_test2.cpp
+++ b/source/module_hamilt_lcao/module_tddft/test/propagator_test2.cpp
@@ -34,6 +34,8 @@ TEST(PropagatorTest, testPropagatorTaylor)
     pv->dim0 = 1;
     pv->dim1 = 1;
     pv->nb = 1;
+    pv->blacs_ctxt = 0;
+    pv->coord[0] = pv->coord[1] = 0;
 
     int dim[2];
     dim[0] = nprow;
diff --git a/source/module_hamilt_lcao/module_tddft/test/propagator_test3.cpp b/source/module_hamilt_lcao/module_tddft/test/propagator_test3.cpp
index 0926fc0018..d42b08c379 100644
--- a/source/module_hamilt_lcao/module_tddft/test/propagator_test3.cpp
+++ b/source/module_hamilt_lcao/module_tddft/test/propagator_test3.cpp
@@ -35,6 +35,8 @@ TEST(PropagatorTest, testPropagatorETRS)
     pv->dim0 = 1;
     pv->dim1 = 1;
     pv->nb = 1;
+    pv->blacs_ctxt = 0;
+    pv->coord[0] = pv->coord[1] = 0;
 
     int dim[2];
     dim[0] = nprow;

From a0248fc6df8299a164385b99840eb2bef25656c1 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci-lite[bot]"
 <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com>
Date: Fri, 12 Jul 2024 09:20:53 +0000
Subject: [PATCH 4/7] [pre-commit.ci lite] apply automatic fixes

---
 source/module_io/istate_envelope.cpp | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/source/module_io/istate_envelope.cpp b/source/module_io/istate_envelope.cpp
index 4081e60345..81384d200f 100644
--- a/source/module_io/istate_envelope.cpp
+++ b/source/module_io/istate_envelope.cpp
@@ -155,8 +155,9 @@ void IState_Envelope::begin(const psi::Psi<double>* psid,
     for (int is = 0; is < nspin; ++is)
     {
         wfc_gamma_grid[is] = new double*[nbands];
-        for (int ib = 0; ib < nbands; ++ib)
+        for (int ib = 0; ib < nbands; ++ib) {
             wfc_gamma_grid[is][ib] = new double[gg.gridt->lgd];
+}
     }
 
     const double mem_size = sizeof(double) * double(gg.gridt->lgd) * double(nbands) * double(nspin) / 1024.0 / 1024.0;
@@ -219,8 +220,9 @@ void IState_Envelope::begin(const psi::Psi<double>* psid,
                     &(GlobalC::ucell),
                     3);
 
-                if (out_wfc_pw || out_wfc_r) // only for gamma_only now
+                if (out_wfc_pw || out_wfc_r) { // only for gamma_only now
                     this->set_pw_wfc(wfcpw, 0, ib, nspin, pes_->charge->rho_save, pw_wfc_g);
+}
             }
         }
     }
@@ -240,8 +242,9 @@ void IState_Envelope::begin(const psi::Psi<double>* psid,
 
     for (int is = 0; is < nspin; ++is)
     {
-        for (int ib = 0; ib < nbands; ++ib)
+        for (int ib = 0; ib < nbands; ++ib) {
             delete[] wfc_gamma_grid[is][ib];
+}
         delete[] wfc_gamma_grid[is];
     }
     return;
@@ -482,8 +485,9 @@ void IState_Envelope::begin(const psi::Psi<std::complex<double>>* psi,
 
     for (int ik = 0; ik < nks; ++ik)
     {
-        for (int ib = 0; ib < nbands; ++ib)
+        for (int ib = 0; ib < nbands; ++ib) {
             delete[] wfc_k_grid[ik][ib];
+}
         delete[] wfc_k_grid[ik];
     }
 
@@ -498,15 +502,18 @@ void IState_Envelope::set_pw_wfc(const ModulePW::PW_Basis_K* wfcpw,
                                  const double* const* const rho,
                                  psi::Psi<std::complex<double>>& wfc_g)
 {
-    if (ib == 0) // once is enough
+    if (ib == 0) { // once is enough
         ModuleBase::TITLE("IState_Envelope", "set_pw_wfc");
+}
 
     std::vector<std::complex<double>> Porter(wfcpw->nrxx);
     // here I refer to v_hartree, but I don't know how to deal with NSPIN=4
     const int nspin0 = (nspin == 2) ? 2 : 1;
-    for (int is = 0; is < nspin0; is++)
-        for (int ir = 0; ir < wfcpw->nrxx; ir++)
+    for (int is = 0; is < nspin0; is++) {
+        for (int ir = 0; ir < wfcpw->nrxx; ir++) {
             Porter[ir] += std::complex<double>(rho[is][ir], 0.0);
+}
+}
 
     // call FFT
     wfcpw->real2recip(Porter.data(), &wfc_g(ib, 0), ik);

From 0ada30b6220f158c88eb62c27dd79d5a68032a0a Mon Sep 17 00:00:00 2001
From: jinzx10 <jzx016@hotmail.com>
Date: Fri, 12 Jul 2024 17:45:07 +0800
Subject: [PATCH 5/7] remove comm_2D in lr

---
 source/module_lr/AX/AX_parallel.cpp            | 18 +++++++++---------
 source/module_lr/AX/test/AX_test.cpp           | 10 +++++-----
 .../module_lr/dm_trans/dm_trans_parallel.cpp   | 16 ++++++++--------
 .../module_lr/dm_trans/test/dm_trans_test.cpp  |  6 +++---
 source/module_lr/esolver_lrtd_lcao.cpp         | 10 ++++------
 source/module_lr/utils/lr_util.cpp             |  7 +++----
 source/module_lr/utils/lr_util.h               |  5 ++---
 source/module_lr/utils/lr_util.hpp             |  2 +-
 8 files changed, 35 insertions(+), 39 deletions(-)

diff --git a/source/module_lr/AX/AX_parallel.cpp b/source/module_lr/AX/AX_parallel.cpp
index 7c563e3d9b..3881a5ee09 100644
--- a/source/module_lr/AX/AX_parallel.cpp
+++ b/source/module_lr/AX/AX_parallel.cpp
@@ -22,18 +22,18 @@ namespace LR
         const bool add_on)
     {
         ModuleBase::TITLE("hamilt_lrtd", "cal_AX_pblas");
-        assert(pmat.comm_2D == pc.comm_2D);
+        assert(pmat.comm() == pc.comm());
         assert(pmat.blacs_ctxt == pc.blacs_ctxt);
 
-        if (pX.comm_2D != pmat.comm_2D || pX.blacs_ctxt != pmat.blacs_ctxt)
-            LR_Util::setup_2d_division(pX, pmat.get_block_size(), nvirt, nocc, pmat.comm_2D, pmat.blacs_ctxt);
+        if (pX.comm() != pmat.comm() || pX.blacs_ctxt != pmat.blacs_ctxt)
+            LR_Util::setup_2d_division(pX, pmat.get_block_size(), nvirt, nocc, pmat.blacs_ctxt);
         else assert(pX.get_local_size() > 0 && AX_istate.get_nbasis() == pX.get_local_size());
 
         int nks = c.get_nk();
         assert(V_istate.size() == nks);
 
         Parallel_2D pVc;        // for intermediate Vc
-        LR_Util::setup_2d_division(pVc, pmat.get_block_size(), naos, nocc, pmat.comm_2D, pmat.blacs_ctxt);
+        LR_Util::setup_2d_division(pVc, pmat.get_block_size(), naos, nocc, pmat.blacs_ctxt);
         for (int isk = 0;isk < nks;++isk)
         {
             AX_istate.fix_k(isk);
@@ -79,18 +79,18 @@ namespace LR
         const bool add_on)
     {
         ModuleBase::TITLE("hamilt_lrtd", "cal_AX_plas");
-        assert(pmat.comm_2D == pc.comm_2D);
+        assert(pmat.comm() == pc.comm());
         assert(pmat.blacs_ctxt == pc.blacs_ctxt);
 
-        if (pX.comm_2D != pmat.comm_2D || pX.blacs_ctxt != pmat.blacs_ctxt)
-            LR_Util::setup_2d_division(pX, pmat.get_block_size(), nvirt, nocc, pmat.comm_2D, pmat.blacs_ctxt);
+        if (pX.comm() != pmat.comm() || pX.blacs_ctxt != pmat.blacs_ctxt)
+            LR_Util::setup_2d_division(pX, pmat.get_block_size(), nvirt, nocc, pmat.blacs_ctxt);
         else assert(pX.get_local_size() > 0 && AX_istate.get_nbasis() == pX.get_local_size());
 
         int nks = c.get_nk();
         assert(V_istate.size() == nks);
 
         Parallel_2D pVc;        // for intermediate Vc
-        LR_Util::setup_2d_division(pVc, pmat.get_block_size(), naos, nocc, pmat.comm_2D, pmat.blacs_ctxt);
+        LR_Util::setup_2d_division(pVc, pmat.get_block_size(), naos, nocc, pmat.blacs_ctxt);
         for (int isk = 0;isk < nks;++isk)
         {
             AX_istate.fix_k(isk);
@@ -122,4 +122,4 @@ namespace LR
         }
     }
 }
-#endif
\ No newline at end of file
+#endif
diff --git a/source/module_lr/AX/test/AX_test.cpp b/source/module_lr/AX/test/AX_test.cpp
index 06ca272b97..65a459a1d8 100644
--- a/source/module_lr/AX/test/AX_test.cpp
+++ b/source/module_lr/AX/test/AX_test.cpp
@@ -117,10 +117,10 @@ TEST_F(AXTest, DoubleParallel)
         LR_Util::setup_2d_division(pV, s.nb, s.naos, s.naos);
         std::vector<container::Tensor> V(s.nks, container::Tensor(DAT::DT_DOUBLE, DEV::CpuDevice, { pV.get_col_size(), pV.get_row_size() }));
         Parallel_2D pc;
-        LR_Util::setup_2d_division(pc, s.nb, s.naos, s.nocc + s.nvirt, pV.comm_2D, pV.blacs_ctxt);
+        LR_Util::setup_2d_division(pc, s.nb, s.naos, s.nocc + s.nvirt, pV.blacs_ctxt);
         psi::Psi<double> c(s.nks, pc.get_col_size(), pc.get_row_size());
         Parallel_2D px;
-        LR_Util::setup_2d_division(px, s.nb, s.nvirt, s.nocc, pV.comm_2D, pV.blacs_ctxt);
+        LR_Util::setup_2d_division(px, s.nb, s.nvirt, s.nocc, pV.blacs_ctxt);
 
         EXPECT_EQ(pV.dim0, pc.dim0);
         EXPECT_EQ(pV.dim1, pc.dim1);
@@ -178,10 +178,10 @@ TEST_F(AXTest, ComplexParallel)
         LR_Util::setup_2d_division(pV, s.nb, s.naos, s.naos);
         std::vector<container::Tensor> V(s.nks, container::Tensor(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, { pV.get_col_size(), pV.get_row_size() }));
         Parallel_2D pc;
-        LR_Util::setup_2d_division(pc, s.nb, s.naos, s.nocc + s.nvirt, pV.comm_2D, pV.blacs_ctxt);
+        LR_Util::setup_2d_division(pc, s.nb, s.naos, s.nocc + s.nvirt, pV.blacs_ctxt);
         psi::Psi<std::complex<double>> c(s.nks, pc.get_col_size(), pc.get_row_size());
         Parallel_2D px;
-        LR_Util::setup_2d_division(px, s.nb, s.nvirt, s.nocc, pV.comm_2D, pV.blacs_ctxt);
+        LR_Util::setup_2d_division(px, s.nb, s.nvirt, s.nocc, pV.blacs_ctxt);
 
         psi::Psi<std::complex<double>> AX_pblas_loc(s.nks, nstate, px.get_local_size());
         psi::Psi<std::complex<double>> AX_gather(s.nks, nstate, s.nocc * s.nvirt, nullptr, false);
@@ -236,4 +236,4 @@ int main(int argc, char** argv)
     int result = RUN_ALL_TESTS();
     MPI_Finalize();
     return result;
-}
\ No newline at end of file
+}
diff --git a/source/module_lr/dm_trans/dm_trans_parallel.cpp b/source/module_lr/dm_trans/dm_trans_parallel.cpp
index 432ae65dda..ae857692d6 100644
--- a/source/module_lr/dm_trans/dm_trans_parallel.cpp
+++ b/source/module_lr/dm_trans/dm_trans_parallel.cpp
@@ -22,11 +22,11 @@ std::vector<container::Tensor> cal_dm_trans_pblas(const psi::Psi<double>& X_ista
                                                   const int nspin)
 {
     ModuleBase::TITLE("hamilt_lrtd", "cal_dm_trans_pblas");
-    assert(px.comm_2D == pc.comm_2D);
+    assert(px.comm() == pc.comm());
     assert(px.blacs_ctxt == pc.blacs_ctxt);
 
-    if (pmat.comm_2D != px.comm_2D || pmat.blacs_ctxt != px.blacs_ctxt)
-        LR_Util::setup_2d_division(pmat, px.get_block_size(), naos, naos, px.comm_2D, px.blacs_ctxt);
+    if (pmat.comm() != px.comm() || pmat.blacs_ctxt != px.blacs_ctxt)
+        LR_Util::setup_2d_division(pmat, px.get_block_size(), naos, naos, px.blacs_ctxt);
     else
         assert(pmat.get_local_size() > 0);
 
@@ -49,7 +49,7 @@ std::vector<container::Tensor> cal_dm_trans_pblas(const psi::Psi<double>& X_ista
 
         // 1. [X*C_occ^T]^T=C_occ*X^T
         Parallel_2D pXc; // nvirt*naos
-        LR_Util::setup_2d_division(pXc, px.get_block_size(), naos, nvirt, px.comm_2D, px.blacs_ctxt);
+        LR_Util::setup_2d_division(pXc, px.get_block_size(), naos, nvirt, px.blacs_ctxt);
         container::Tensor Xc(DAT::DT_DOUBLE,
                              DEV::CpuDevice,
                              {pXc.get_col_size(), pXc.get_row_size()}); // row is "inside"(memory contiguity) for pblas
@@ -110,11 +110,11 @@ std::vector<container::Tensor> cal_dm_trans_pblas(const psi::Psi<std::complex<do
                                                   const int nspin)
 {
     ModuleBase::TITLE("hamilt_lrtd", "cal_dm_trans_pblas");
-    assert(px.comm_2D == pc.comm_2D);
+    assert(px.comm() == pc.comm());
     assert(px.blacs_ctxt == pc.blacs_ctxt);
 
-    if (pmat.comm_2D != px.comm_2D || pmat.blacs_ctxt != px.blacs_ctxt)
-        LR_Util::setup_2d_division(pmat, px.get_block_size(), naos, naos, px.comm_2D, px.blacs_ctxt);
+    if (pmat.comm() != px.comm() || pmat.blacs_ctxt != px.blacs_ctxt)
+        LR_Util::setup_2d_division(pmat, px.get_block_size(), naos, naos, px.blacs_ctxt);
     else
         assert(pmat.get_local_size() > 0);
 
@@ -157,7 +157,7 @@ std::vector<container::Tensor> cal_dm_trans_pblas(const psi::Psi<std::complex<do
         char transa = 'N';
         char transb = 'C';
         Parallel_2D pXc;
-        LR_Util::setup_2d_division(pXc, px.get_block_size(), nvirt, naos, px.comm_2D, px.blacs_ctxt);
+        LR_Util::setup_2d_division(pXc, px.get_block_size(), nvirt, naos, px.blacs_ctxt);
         container::Tensor Xc(DAT::DT_COMPLEX_DOUBLE,
                              DEV::CpuDevice,
                              {pXc.get_col_size(), pXc.get_row_size()}); // row is "inside"(memory contiguity) for pblas
diff --git a/source/module_lr/dm_trans/test/dm_trans_test.cpp b/source/module_lr/dm_trans/test/dm_trans_test.cpp
index 668d8268bf..4e0a034f20 100644
--- a/source/module_lr/dm_trans/test/dm_trans_test.cpp
+++ b/source/module_lr/dm_trans/test/dm_trans_test.cpp
@@ -106,7 +106,7 @@ TEST_F(DMTransTest, DoubleParallel)
         LR_Util::setup_2d_division(px, s.nb, s.nvirt, s.nocc);
         psi::Psi<double> X(s.nks, nstate, px.get_local_size(), nullptr, false);
         Parallel_2D pc;
-        LR_Util::setup_2d_division(pc, s.nb, s.naos, s.nocc + s.nvirt, px.comm_2D, px.blacs_ctxt);
+        LR_Util::setup_2d_division(pc, s.nb, s.naos, s.nocc + s.nvirt, px.blacs_ctxt);
         psi::Psi<double> c(s.nks, pc.get_col_size(), pc.get_row_size());
         Parallel_2D pmat;
 
@@ -170,7 +170,7 @@ TEST_F(DMTransTest, ComplexParallel)
         LR_Util::setup_2d_division(px, s.nb, s.nvirt, s.nocc);
         psi::Psi<std::complex<double>> X(s.nks, nstate, px.get_local_size(), nullptr, false);
         Parallel_2D pc;
-        LR_Util::setup_2d_division(pc, s.nb, s.naos, s.nocc + s.nvirt, px.comm_2D, px.blacs_ctxt);
+        LR_Util::setup_2d_division(pc, s.nb, s.naos, s.nocc + s.nvirt, px.blacs_ctxt);
         psi::Psi<std::complex<double>> c(s.nks, pc.get_col_size(), pc.get_row_size());
         Parallel_2D pmat;
 
@@ -229,4 +229,4 @@ int main(int argc, char** argv)
     int result = RUN_ALL_TESTS();
     MPI_Finalize();
     return result;
-}
\ No newline at end of file
+}
diff --git a/source/module_lr/esolver_lrtd_lcao.cpp b/source/module_lr/esolver_lrtd_lcao.cpp
index 7f43274fef..cf0bb0ce78 100644
--- a/source/module_lr/esolver_lrtd_lcao.cpp
+++ b/source/module_lr/esolver_lrtd_lcao.cpp
@@ -135,8 +135,7 @@ LR::ESolver_LR<T, TR>::ESolver_LR(ModuleESolver::ESolver_KS_LCAO<T, TR>&& ks_sol
     this->eig_ks = std::move(ks_sol.pelec->ekb);
 
     this->set_dimension();
-    LR_Util::setup_2d_division(this->paraC_, 1, this->nbasis, this->nocc + this->nvirt,
-        this->paraMat_.comm_2D, this->paraMat_.blacs_ctxt);
+    LR_Util::setup_2d_division(this->paraC_, 1, this->nbasis, this->nocc + this->nvirt, this->paraMat_.blacs_ctxt);
 
     //grid integration
     this->gt_ = std::move(ks_sol.GridT);
@@ -220,8 +219,7 @@ LR::ESolver_LR<T, TR>::ESolver_LR(const Input_para& inp, Input& inp_tmp, UnitCel
     this->read_ks_wfc();
 
     this->set_dimension();
-    LR_Util::setup_2d_division(this->paraC_, 1, this->nbasis, this->nocc + this->nvirt,
-        paraMat_.comm_2D, paraMat_.blacs_ctxt);
+    LR_Util::setup_2d_division(this->paraC_, 1, this->nbasis, this->nocc + this->nvirt, paraMat_.blacs_ctxt);
 
     //allocate 2-particle state and setup 2d division
     this->nstates = inp.lr_nstates;
@@ -383,7 +381,7 @@ void LR::ESolver_LR<T, TR>::setup_eigenvectors_X()
 {
     ModuleBase::TITLE("ESolver_LR", "setup_eigenvectors_X");
     // setup ParaX
-    LR_Util::setup_2d_division(this->paraX_, 1, this->nvirt, this->nocc, this->paraC_.comm_2D, this->paraC_.blacs_ctxt);//nvirt - row, nocc - col 
+    LR_Util::setup_2d_division(this->paraX_, 1, this->nvirt, this->nocc, this->paraC_.blacs_ctxt);//nvirt - row, nocc - col 
     // if spectrum-only, read the LR-eigenstates from file and return
     if (this->input.lr_solver == "spectrum")
     {
@@ -506,4 +504,4 @@ void LR::ESolver_LR<T, TR>::read_ks_chg(Charge& chg_gs)
     }
 }
 template class LR::ESolver_LR<double, double>;
-template class LR::ESolver_LR<std::complex<double>, double>;
\ No newline at end of file
+template class LR::ESolver_LR<std::complex<double>, double>;
diff --git a/source/module_lr/utils/lr_util.cpp b/source/module_lr/utils/lr_util.cpp
index 951b38b1c2..7c1927e4ba 100644
--- a/source/module_lr/utils/lr_util.cpp
+++ b/source/module_lr/utils/lr_util.cpp
@@ -183,11 +183,10 @@ namespace LR_Util
 
 #ifdef __MPI
     // for the other matrices in the commutator other than the first one
-    void setup_2d_division(Parallel_2D& pv, int nb, int gr, int gc,
-        const MPI_Comm& comm_2D_in, const int& blacs_ctxt_in)
+    void setup_2d_division(Parallel_2D& pv, int nb, int gr, int gc, const int& blacs_ctxt_in)
     {
         ModuleBase::TITLE("LR_Util", "setup_2d_division");
-        pv.set(gr, gc, nb, comm_2D_in, blacs_ctxt_in);
+        pv.set(gr, gc, nb, blacs_ctxt_in);
     }
 #endif
 
@@ -248,4 +247,4 @@ namespace LR_Util
         }
     }
 #endif
-}
\ No newline at end of file
+}
diff --git a/source/module_lr/utils/lr_util.h b/source/module_lr/utils/lr_util.h
index 9bd90d4949..147fce62ce 100644
--- a/source/module_lr/utils/lr_util.h
+++ b/source/module_lr/utils/lr_util.h
@@ -108,8 +108,7 @@ namespace LR_Util
 
 #ifdef __MPI
     // pack the process to setup 2d divion reusing blacs_ctxt of an existing 2d-matrix
-    void setup_2d_division(Parallel_2D& pv, int nb, int gr, int gc,
-        const MPI_Comm& comm_2D_in, const int& blacs_ctxt_in);
+    void setup_2d_division(Parallel_2D& pv, int nb, int gr, int gc, const int& blacs_ctxt_in);
     /// @brief  gather 2d matrix to full matrix
     /// the defination of row and col is consistent with setup_2d_division
     template <typename T>
@@ -121,4 +120,4 @@ namespace LR_Util
     void diag_lapack(const int& n, double* mat, double* eig);
     void diag_lapack(const int& n, std::complex<double>* mat, double* eig);
 }
-#include "lr_util.hpp"
\ No newline at end of file
+#include "lr_util.hpp"
diff --git a/source/module_lr/utils/lr_util.hpp b/source/module_lr/utils/lr_util.hpp
index a6a1295c9b..0e2b29e44c 100644
--- a/source/module_lr/utils/lr_util.hpp
+++ b/source/module_lr/utils/lr_util.hpp
@@ -169,7 +169,7 @@ namespace LR_Util
                     fullmat[pv.local2global_col(j) * global_nrow + pv.local2global_row(i)] = submat[j * pv.get_row_size() + i];
 
         //reduce to root
-        MPI_Allreduce(MPI_IN_PLACE, fullmat, global_nrow * global_ncol, get_mpi_datatype(), MPI_SUM, pv.comm_2D);
+        MPI_Allreduce(MPI_IN_PLACE, fullmat, global_nrow * global_ncol, get_mpi_datatype(), MPI_SUM, pv.comm());
     };
 #endif
 

From 9a15801cec4c4b5c8dd2db568c5f6ac178c4627f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci-lite[bot]"
 <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com>
Date: Fri, 12 Jul 2024 12:28:10 +0000
Subject: [PATCH 6/7] [pre-commit.ci lite] apply automatic fixes

---
 .../module_tddft/norm_psi.cpp                 | 24 +++++---
 source/module_lr/AX/test/AX_test.cpp          | 26 +++++---
 .../module_lr/dm_trans/dm_trans_parallel.cpp  | 10 ++--
 source/module_lr/utils/lr_util.cpp            | 60 ++++++++++++-------
 4 files changed, 79 insertions(+), 41 deletions(-)

diff --git a/source/module_hamilt_lcao/module_tddft/norm_psi.cpp b/source/module_hamilt_lcao/module_tddft/norm_psi.cpp
index 9d708e13c7..cf3698b3ee 100644
--- a/source/module_hamilt_lcao/module_tddft/norm_psi.cpp
+++ b/source/module_hamilt_lcao/module_tddft/norm_psi.cpp
@@ -81,10 +81,12 @@ void norm_psi(const Parallel_Orbitals* pv,
                 double aa, bb;
                 aa = Cij[i * pv->ncol + j].real();
                 bb = Cij[i * pv->ncol + j].imag();
-                if (std::abs(aa) < 1e-8)
+                if (std::abs(aa) < 1e-8) {
                     aa = 0.0;
-                if (std::abs(bb) < 1e-8)
+}
+                if (std::abs(bb) < 1e-8) {
                     bb = 0.0;
+}
                 GlobalV::ofs_running << aa << "+" << bb << "i ";
             }
             GlobalV::ofs_running << std::endl;
@@ -105,13 +107,15 @@ void norm_psi(const Parallel_Orbitals* pv,
                 for (int j = 0; j < naroc[1]; ++j)
                 {
                     int igcol = globalIndex(j, pv->nb, pv->dim1, ipcol);
-                    if (igcol >= nband)
+                    if (igcol >= nband) {
                         continue;
+}
                     for (int i = 0; i < naroc[0]; ++i)
                     {
                         int igrow = globalIndex(i, pv->nb, pv->dim0, iprow);
-                        if (igrow >= nband)
+                        if (igrow >= nband) {
                             continue;
+}
                         if (igcol == igrow)
                         {
                             Cij[j * naroc[0] + i] = {1.0 / sqrt(Cij[j * naroc[0] + i].real()), 0.0};
@@ -169,10 +173,12 @@ void norm_psi(const Parallel_Orbitals* pv,
                 double aa, bb;
                 aa = psi_k[i * pv->ncol + j].real();
                 bb = psi_k[i * pv->ncol + j].imag();
-                if (std::abs(aa) < 1e-8)
+                if (std::abs(aa) < 1e-8) {
                     aa = 0.0;
-                if (std::abs(bb) < 1e-8)
+}
+                if (std::abs(bb) < 1e-8) {
                     bb = 0.0;
+}
                 GlobalV::ofs_running << aa << "+" << bb << "i ";
             }
             GlobalV::ofs_running << std::endl;
@@ -186,10 +192,12 @@ void norm_psi(const Parallel_Orbitals* pv,
                 double aa, bb;
                 aa = tmp1[i * pv->ncol + j].real();
                 bb = tmp1[i * pv->ncol + j].imag();
-                if (std::abs(aa) < 1e-8)
+                if (std::abs(aa) < 1e-8) {
                     aa = 0.0;
-                if (std::abs(bb) < 1e-8)
+}
+                if (std::abs(bb) < 1e-8) {
                     bb = 0.0;
+}
                 GlobalV::ofs_running << aa << "+" << bb << "i ";
             }
             GlobalV::ofs_running << std::endl;
diff --git a/source/module_lr/AX/test/AX_test.cpp b/source/module_lr/AX/test/AX_test.cpp
index 65a459a1d8..92ed30f7e9 100644
--- a/source/module_lr/AX/test/AX_test.cpp
+++ b/source/module_lr/AX/test/AX_test.cpp
@@ -41,12 +41,18 @@ class AXTest : public testing::Test
     }
 #endif
 
-    void set_ones(double* data, int size) { for (int i = 0;i < size;++i) data[i] = 1.0; };
-    void set_int(double* data, int size) { for (int i = 0;i < size;++i) data[i] = static_cast<double>(i + 1); };
-    void set_int(std::complex<double>* data, int size) { for (int i = 0;i < size;++i) data[i] = std::complex<double>(i + 1, -i - 1); };
-    void set_rand(double* data, int size) { for (int i = 0;i < size;++i) data[i] = double(rand()) / double(RAND_MAX) * 10.0 - 5.0; };
-    void set_rand(std::complex<double>* data, int size) { for (int i = 0;i < size;++i) data[i] = std::complex<double>(rand(), rand()) / double(RAND_MAX) * 10.0 - 5.0; };
-    void check_eq(double* data1, double* data2, int size) { for (int i = 0;i < size;++i) EXPECT_NEAR(data1[i], data2[i], 1e-10); };
+    void set_ones(double* data, int size) { for (int i = 0;i < size;++i) { data[i] = 1.0; 
+}};
+    void set_int(double* data, int size) { for (int i = 0;i < size;++i) { data[i] = static_cast<double>(i + 1); 
+}};
+    void set_int(std::complex<double>* data, int size) { for (int i = 0;i < size;++i) { data[i] = std::complex<double>(i + 1, -i - 1); 
+}};
+    void set_rand(double* data, int size) { for (int i = 0;i < size;++i) { data[i] = double(rand()) / double(RAND_MAX) * 10.0 - 5.0; 
+}};
+    void set_rand(std::complex<double>* data, int size) { for (int i = 0;i < size;++i) { data[i] = std::complex<double>(rand(), rand()) / double(RAND_MAX) * 10.0 - 5.0; 
+}};
+    void check_eq(double* data1, double* data2, int size) { for (int i = 0;i < size;++i) { EXPECT_NEAR(data1[i], data2[i], 1e-10); 
+}};
     void check_eq(std::complex<double>* data1, std::complex<double>* data2, int size)
     {
         for (int i = 0;i < size;++i)
@@ -70,7 +76,8 @@ TEST_F(AXTest, DoubleSerial)
             psi::Psi<double> c(s.nks, s.nocc + s.nvirt, s.naos);
             std::vector<container::Tensor> V(s.nks, container::Tensor(DAT::DT_DOUBLE, DEV::CpuDevice, { s.naos, s.naos }));
             set_rand(c.get_pointer(), size_c);
-            for (auto& v : V)set_rand(v.data<double>(), size_v);
+            for (auto& v : V) {set_rand(v.data<double>(), size_v);
+}
             AX_for.fix_b(istate);
             AX_blas.fix_b(istate);
             LR::cal_AX_forloop_serial(V, c, s.nocc, s.nvirt, AX_for);
@@ -95,7 +102,8 @@ TEST_F(AXTest, ComplexSerial)
             psi::Psi<std::complex<double>> c(s.nks, s.nocc + s.nvirt, s.naos);
             std::vector<container::Tensor> V(s.nks, container::Tensor(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, { s.naos, s.naos }));
             set_rand(c.get_pointer(), size_c);
-            for (auto& v : V)set_rand(v.data<std::complex<double>>(), size_v);
+            for (auto& v : V) {set_rand(v.data<std::complex<double>>(), size_v);
+}
             AX_for.fix_b(istate);
             AX_blas.fix_b(istate);
             LR::cal_AX_forloop_serial(V, c, s.nocc, s.nvirt, AX_for);
@@ -230,7 +238,7 @@ TEST_F(AXTest, ComplexParallel)
 
 int main(int argc, char** argv)
 {
-    srand(time(NULL));  // for random number generator
+    srand(time(nullptr));  // for random number generator
     MPI_Init(&argc, &argv);
     testing::InitGoogleTest(&argc, argv);
     int result = RUN_ALL_TESTS();
diff --git a/source/module_lr/dm_trans/dm_trans_parallel.cpp b/source/module_lr/dm_trans/dm_trans_parallel.cpp
index ae857692d6..b5de36a5a3 100644
--- a/source/module_lr/dm_trans/dm_trans_parallel.cpp
+++ b/source/module_lr/dm_trans/dm_trans_parallel.cpp
@@ -25,10 +25,11 @@ std::vector<container::Tensor> cal_dm_trans_pblas(const psi::Psi<double>& X_ista
     assert(px.comm() == pc.comm());
     assert(px.blacs_ctxt == pc.blacs_ctxt);
 
-    if (pmat.comm() != px.comm() || pmat.blacs_ctxt != px.blacs_ctxt)
+    if (pmat.comm() != px.comm() || pmat.blacs_ctxt != px.blacs_ctxt) {
         LR_Util::setup_2d_division(pmat, px.get_block_size(), naos, naos, px.blacs_ctxt);
-    else
+    } else {
         assert(pmat.get_local_size() > 0);
+}
 
     int nks = c.get_nk();
     assert(nks == X_istate.get_nk());
@@ -113,10 +114,11 @@ std::vector<container::Tensor> cal_dm_trans_pblas(const psi::Psi<std::complex<do
     assert(px.comm() == pc.comm());
     assert(px.blacs_ctxt == pc.blacs_ctxt);
 
-    if (pmat.comm() != px.comm() || pmat.blacs_ctxt != px.blacs_ctxt)
+    if (pmat.comm() != px.comm() || pmat.blacs_ctxt != px.blacs_ctxt) {
         LR_Util::setup_2d_division(pmat, px.get_block_size(), naos, naos, px.blacs_ctxt);
-    else
+    } else {
         assert(pmat.get_local_size() > 0);
+}
 
     int nks = c.get_nk();
     assert(nks == X_istate.get_nk());
diff --git a/source/module_lr/utils/lr_util.cpp b/source/module_lr/utils/lr_util.cpp
index 7c1927e4ba..83491eb343 100644
--- a/source/module_lr/utils/lr_util.cpp
+++ b/source/module_lr/utils/lr_util.cpp
@@ -62,7 +62,8 @@ namespace LR_Util
     template<>
     void matsym<double>(const double* in, const int n, const Parallel_2D& pmat, double* out)
     {
-        for (int i = 0;i < pmat.get_local_size();++i)out[i] = in[i];
+        for (int i = 0;i < pmat.get_local_size();++i) {out[i] = in[i];
+}
         const double alpha = 0.5, beta = 0.5;
         const int i1 = 1;
         pdtran_(&n, &n, &alpha, in, &i1, &i1, pmat.desc, &beta, out, &i1, &i1, pmat.desc);
@@ -71,7 +72,8 @@ namespace LR_Util
     void matsym<double>(double* inout, const int n, const Parallel_2D& pmat)
     {
         std::vector<double> tmp(n * n);
-        for (int i = 0;i < pmat.get_local_size();++i)tmp[i] = inout[i];
+        for (int i = 0;i < pmat.get_local_size();++i) {tmp[i] = inout[i];
+}
         const double alpha = 0.5, beta = 0.5;
         const int i1 = 1;
         pdtran_(&n, &n, &alpha, tmp.data(), &i1, &i1, pmat.desc, &beta, inout, &i1, &i1, pmat.desc);
@@ -79,7 +81,8 @@ namespace LR_Util
     template<>
     void matsym<std::complex<double>>(const std::complex<double>* in, const int n, const Parallel_2D& pmat, std::complex<double>* out)
     {
-        for (int i = 0;i < pmat.get_local_size();++i)out[i] = in[i];
+        for (int i = 0;i < pmat.get_local_size();++i) {out[i] = in[i];
+}
         const std::complex<double> alpha(0.5, 0.0), beta(0.5, 0.0);
         const int i1 = 1;
         pztranc_(&n, &n, &alpha, in, &i1, &i1, pmat.desc, &beta, out, &i1, &i1, pmat.desc);
@@ -88,7 +91,8 @@ namespace LR_Util
     void matsym<std::complex<double>>(std::complex<double>* inout, const int n, const Parallel_2D& pmat)
     {
         std::vector<std::complex<double>> tmp(n * n);
-        for (int i = 0;i < pmat.get_local_size();++i)tmp[i] = inout[i];
+        for (int i = 0;i < pmat.get_local_size();++i) {tmp[i] = inout[i];
+}
         const std::complex<double> alpha(0.5, 0.0), beta(0.5, 0.0);
         const int i1 = 1;
         pztranc_(&n, &n, &alpha, tmp.data(), &i1, &i1, pmat.desc, &beta, inout, &i1, &i1, pmat.desc);
@@ -97,49 +101,57 @@ namespace LR_Util
     container::Tensor mat2ten_double(ModuleBase::matrix& m)
     {
         container::Tensor t(DAT::DT_DOUBLE, DEV::CpuDevice, { m.nr, m.nc });
-        for (int i = 0;i < t.NumElements();++i)t.data<double>()[i] = m.c[i];
+        for (int i = 0;i < t.NumElements();++i) {t.data<double>()[i] = m.c[i];
+}
         return t;
     }
     std::vector<container::Tensor> mat2ten_double(std::vector<ModuleBase::matrix>& m)
     {
         std::vector<container::Tensor> t;
-        for (int i = 0;i < m.size();++i) t.push_back(mat2ten_double(m[i]));
+        for (int i = 0;i < m.size();++i) { t.push_back(mat2ten_double(m[i]));
+}
         return t;
     }
     ModuleBase::matrix ten2mat_double(container::Tensor& t)
     {
         ModuleBase::matrix m(t.shape().dims()[0], t.shape().dims()[1]);
-        for (int i = 0;i < t.NumElements();++i)m.c[i] = t.data<double>()[i];
+        for (int i = 0;i < t.NumElements();++i) {m.c[i] = t.data<double>()[i];
+}
         return m;
     }
     std::vector<ModuleBase::matrix> ten2mat_double(std::vector<container::Tensor>& t)
     {
         std::vector<ModuleBase::matrix> m;
-        for (int i = 0;i < t.size();++i) m.push_back(ten2mat_double(t[i]));
+        for (int i = 0;i < t.size();++i) { m.push_back(ten2mat_double(t[i]));
+}
         return m;
     }
     container::Tensor mat2ten_complex(ModuleBase::ComplexMatrix& m)
     {
         container::Tensor t(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, { m.nr, m.nc });
-        for (int i = 0;i < t.NumElements();++i)t.data<std::complex<double>>()[i] = m.c[i];
+        for (int i = 0;i < t.NumElements();++i) {t.data<std::complex<double>>()[i] = m.c[i];
+}
         return t;
     }
     std::vector<container::Tensor> mat2ten_complex(std::vector<ModuleBase::ComplexMatrix>& m)
     {
         std::vector<container::Tensor> t;
-        for (int i = 0;i < m.size();++i) t.push_back(mat2ten_complex(m[i]));
+        for (int i = 0;i < m.size();++i) { t.push_back(mat2ten_complex(m[i]));
+}
         return t;
     }
     ModuleBase::ComplexMatrix ten2mat_complex(container::Tensor& t)
     {
         ModuleBase::ComplexMatrix m(t.shape().dims()[0], t.shape().dims()[1]);
-        for (int i = 0;i < t.NumElements();++i)m.c[i] = t.data<std::complex<double>>()[i];
+        for (int i = 0;i < t.NumElements();++i) {m.c[i] = t.data<std::complex<double>>()[i];
+}
         return m;
     }
     std::vector<ModuleBase::ComplexMatrix> ten2mat_complex(std::vector<container::Tensor>& t)
     {
         std::vector<ModuleBase::ComplexMatrix> m;
-        for (int i = 0;i < t.size();++i) m.push_back(ten2mat_complex(t[i]));
+        for (int i = 0;i < t.size();++i) { m.push_back(ten2mat_complex(t[i]));
+}
         return m;
     }
 
@@ -147,26 +159,30 @@ namespace LR_Util
     {
         assert(v.size() == nr * nc);
         ModuleBase::matrix m(nr, nc, false);
-        for (int i = 0;i < v.size();++i) m.c[i] = v[i];
+        for (int i = 0;i < v.size();++i) { m.c[i] = v[i];
+}
         return m;
     }
     ModuleBase::ComplexMatrix vec2mat(const std::vector<std::complex<double>>& v, const int nr, const int nc)
     {
         assert(v.size() == nr * nc);
         ModuleBase::ComplexMatrix m(nr, nc, false);
-        for (int i = 0;i < v.size();++i) m.c[i] = v[i];
+        for (int i = 0;i < v.size();++i) { m.c[i] = v[i];
+}
         return m;
     }
     std::vector<ModuleBase::matrix> vec2mat(const std::vector<std::vector<double>>& v, const int nr, const int nc)
     {
         std::vector<ModuleBase::matrix> m(v.size());
-        for (int i = 0;i < v.size();++i) m[i] = vec2mat(v[i], nr, nc);
+        for (int i = 0;i < v.size();++i) { m[i] = vec2mat(v[i], nr, nc);
+}
         return m;
     }
     std::vector<ModuleBase::ComplexMatrix> vec2mat(const std::vector<std::vector<std::complex<double>>>& v, const int nr, const int nc)
     {
         std::vector<ModuleBase::ComplexMatrix> m(v.size());
-        for (int i = 0;i < v.size();++i) m[i] = vec2mat(v[i], nr, nc);
+        for (int i = 0;i < v.size();++i) { m[i] = vec2mat(v[i], nr, nc);
+}
         return m;
     }
 
@@ -201,7 +217,8 @@ namespace LR_Util
         const int lwork = work_tmp;
         double* work2 = new double[lwork];
         dsyev_(&jobz, &uplo, &n, mat, &n, eig, work2, &lwork, &info);
-        if (info) std::cout << "ERROR: Lapack solver, info=" << info << std::endl;
+        if (info) { std::cout << "ERROR: Lapack solver, info=" << info << std::endl;
+}
         delete[] work2;
     }
 
@@ -214,7 +231,8 @@ namespace LR_Util
         int info = 0;
         char jobz = 'V', uplo = 'U';
         zheev_(&jobz, &uplo, &n, mat, &n, eig, work2, &lwork, rwork, &info);
-        if (info) std::cout << "ERROR: Lapack solver, info=" << info << std::endl;
+        if (info) { std::cout << "ERROR: Lapack solver, info=" << info << std::endl;
+}
         delete[] rwork;
         delete[] work2;
     }
@@ -239,11 +257,13 @@ namespace LR_Util
         rho_basis.real2recip(rhor, rhog.data());
         for (int i = 0;i < 3;++i)
         {
-            for (int ig = 0; ig < rho_basis.npw; ig++)
+            for (int ig = 0; ig < rho_basis.npw; ig++) {
                 rhog[ig] *= pow(rho_basis.gcar[ig][i], 2);
+}
             rho_basis.recip2real(rhog.data(), tmp_rhor.data());
-            for (int ir = 0; ir < rho_basis.nrxx; ir++)
+            for (int ir = 0; ir < rho_basis.nrxx; ir++) {
                 lapn[ir] -= tmp_rhor[ir] * tpiba2;
+}
         }
     }
 #endif

From 251c4e236a47fe60fdcb4d5af50681e84ae11ded Mon Sep 17 00:00:00 2001
From: jinzx10 <jzx016@hotmail.com>
Date: Fri, 12 Jul 2024 20:55:11 +0800
Subject: [PATCH 7/7] fix comm() of uninitialized state

---
 source/module_basis/module_ao/parallel_2d.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/source/module_basis/module_ao/parallel_2d.cpp b/source/module_basis/module_ao/parallel_2d.cpp
index 3bd669ee55..30aceda761 100644
--- a/source/module_basis/module_ao/parallel_2d.cpp
+++ b/source/module_basis/module_ao/parallel_2d.cpp
@@ -31,7 +31,13 @@ int Parallel_2D::get_global_col_size() const
 #ifdef __MPI
 MPI_Comm Parallel_2D::comm() const
 {
-    int sys_ctxt = -1;
+    // it is an error to call blacs_get with an invalid BLACS context
+    if (blacs_ctxt < 0)
+    {
+        return MPI_COMM_NULL;
+    }
+
+    int sys_ctxt = 0;
     Cblacs_get(blacs_ctxt, 10, &sys_ctxt);
     // blacs_get with "what" = 10 takes a BLACS context and returns the index
     // of the associated system context (MPI communicator) that can be used by