From 7acb202d4dbfdba9415be058cc4d716375291f15 Mon Sep 17 00:00:00 2001 From: jinzx10 Date: Fri, 12 Jul 2024 02:41:55 +0800 Subject: [PATCH 1/7] remove comm_2D from Parallel_2D --- source/module_base/blacs_connector.h | 77 +++++++++++++++++++ source/module_basis/module_ao/parallel_2d.cpp | 23 +----- source/module_basis/module_ao/parallel_2d.h | 15 ++-- .../module_ao/parallel_orbitals.cpp | 1 - .../module_ao/test/parallel_2d_test.cpp | 4 +- .../module_ao/test/parallel_orbitals_test.cpp | 2 +- source/module_esolver/esolver_ks_lcao.cpp | 2 +- .../module_tddft/bandenergy.cpp | 11 +-- .../module_tddft/norm_psi.cpp | 8 +- .../module_tddft/propagator.cpp | 9 +-- .../module_tddft/test/bandenergy_test.cpp | 4 - .../module_tddft/test/norm_psi_test.cpp | 4 - .../module_tddft/test/propagator_test2.cpp | 6 +- .../module_tddft/test/propagator_test3.cpp | 6 +- .../module_hsolver/test/diago_pexsi_test.cpp | 1 - source/module_io/io_dmk.cpp | 12 +-- source/module_io/istate_envelope.cpp | 41 ++++------ source/module_io/read_wfc_lcao.cpp | 6 +- source/module_io/read_wfc_nao.cpp | 4 +- source/module_io/write_Vxc.hpp | 2 +- source/module_io/write_dmr.cpp | 4 +- source/module_io/write_wfc_nao.cpp | 4 +- 22 files changed, 133 insertions(+), 113 deletions(-) diff --git a/source/module_base/blacs_connector.h b/source/module_base/blacs_connector.h index 2d3baa4d5a..3bcc43811a 100644 --- a/source/module_base/blacs_connector.h +++ b/source/module_base/blacs_connector.h @@ -29,6 +29,8 @@ #ifndef BLACS_CONNECTOR_H #define BLACS_CONNECTOR_H +#include + extern "C" { void Cblacs_pinfo(int *myid, int *nprocs); @@ -41,13 +43,88 @@ extern "C" int Cblacs_pnum(int icontxt, int prow, int pcol); void Cblacs_pcoord(int icontxt, int pnum, int *prow, int *pcol); void Cblacs_exit(int icontxt); + + // broadcast (send/recv) + void Cigebs2d(int ConTxt, char *scope, char *top, int m, int n, int *A, int lda); + void Cigebr2d(int ConTxt, char *scope, char *top, int m, int n, int *A, int lda, int rsrc, int csrc); + + void Csgebs2d(int ConTxt, char *scope, char *top, int m, int n, float *A, int lda); + void Csgebr2d(int ConTxt, char *scope, char *top, int m, int n, float *A, int lda, int rsrc, int csrc); + + void Cdgebs2d(int ConTxt, char *scope, char *top, int m, int n, double *A, int lda); + void Cdgebr2d(int ConTxt, char *scope, char *top, int m, int n, double *A, int lda, int rsrc, int csrc); + + void Ccgebs2d(int ConTxt, char *scope, char *top, int m, int n, std::complex *A, int lda); + void Ccgebr2d(int ConTxt, char *scope, char *top, int m, int n, std::complex *A, int lda, int rsrc, int csrc); + + void Czgebs2d(int ConTxt, char *scope, char *top, int m, int n, std::complex *A, int lda); + void Czgebr2d(int ConTxt, char *scope, char *top, int m, int n, std::complex *A, int lda, int rsrc, int csrc); } +// unified interface for broadcast +template +void Cxgebs2d(int ConTxt, char *scope, char *top, int m, int n, T *A, int lda) +{ + static_assert( + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same>::value || + std::is_same>::value, + "Type not supported"); + + if (std::is_same::value) { + Cigebs2d(ConTxt, scope, top, m, n, reinterpret_cast(A), lda); + } + if (std::is_same::value) { + Csgebs2d(ConTxt, scope, top, m, n, reinterpret_cast(A), lda); + } + if (std::is_same::value) { + Cdgebs2d(ConTxt, scope, top, m, n, reinterpret_cast(A), lda); + } + if (std::is_same>::value) { + Ccgebs2d(ConTxt, scope, top, m, n, reinterpret_cast*>(A), lda); + } + if (std::is_same>::value) { + Czgebs2d(ConTxt, scope, top, m, n, reinterpret_cast*>(A), lda); + } +} + +template +void Cxgebr2d(int ConTxt, char *scope, char *top, int m, int n, T *A, int lda, int rsrc, int csrc) +{ + static_assert( + std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same>::value || + std::is_same>::value, + "Type not supported"); + + if (std::is_same::value) { + Cigebr2d(ConTxt, scope, top, m, n, reinterpret_cast(A), lda, rsrc, csrc); + } + if (std::is_same::value) { + Csgebr2d(ConTxt, scope, top, m, n, reinterpret_cast(A), lda, rsrc, csrc); + } + if (std::is_same::value) { + Cdgebr2d(ConTxt, scope, top, m, n, reinterpret_cast(A), lda, rsrc, csrc); + } + if (std::is_same>::value) { + Ccgebr2d(ConTxt, scope, top, m, n, reinterpret_cast*>(A), lda, rsrc, csrc); + } + if (std::is_same>::value) { + Czgebr2d(ConTxt, scope, top, m, n, reinterpret_cast*>(A), lda, rsrc, csrc); + } +} + + #ifdef __MPI #include extern "C" { int Csys2blacs_handle(MPI_Comm SysCtxt); + MPI_Comm Cblacs2sys_handle(int BlacsCtxt); } #endif // __MPI diff --git a/source/module_basis/module_ao/parallel_2d.cpp b/source/module_basis/module_ao/parallel_2d.cpp index 3a225eec86..bc46b9d3c2 100644 --- a/source/module_basis/module_ao/parallel_2d.cpp +++ b/source/module_basis/module_ao/parallel_2d.cpp @@ -1,6 +1,5 @@ #include "parallel_2d.h" -#include "module_base/blacs_connector.h" #include "module_base/scalapack_connector.h" #include @@ -47,23 +46,11 @@ void Parallel_2D::_init_proc_grid(const MPI_Comm comm, const bool mode) std::swap(dim0, dim1); } - // create a 2D Cartesian MPI communicator (row-major by default) - int period[2] = {1, 1}; - int dim[2] = {dim0, dim1}; - const int reorder = 0; - MPI_Cart_create(comm, 2, dim, period, reorder, &comm_2D); - MPI_Cart_get(comm_2D, 2, dim, period, coord); - // initialize the BLACS grid accordingly - blacs_ctxt = Csys2blacs_handle(comm_2D); + blacs_ctxt = Csys2blacs_handle(comm); char order = 'R'; // row-major Cblacs_gridinit(&blacs_ctxt, &order, dim0, dim1); - - // TODO Currently MPI and BLACS are made to have the same Cartesian grid. - // In theory, however, BLACS would split any given communicator to create - // new ones for its own purpose when initializing the process grid, so it - // might be unnecessary to create an MPI communicator with Cartesian topology. - // ***This needs to be verified*** + Cblacs_gridinfo(blacs_ctxt, &dim0, &dim1, &coord[0], &coord[1]); } void Parallel_2D::_set_dist_info(const int mg, const int ng, const int nb) @@ -105,9 +92,8 @@ int Parallel_2D::init(const int mg, const int ng, const int nb, const MPI_Comm c return nrow == 0 || ncol == 0; } -int Parallel_2D::set(const int mg, const int ng, const int nb, const MPI_Comm comm_2D, const int blacs_ctxt) +int Parallel_2D::set(const int mg, const int ng, const int nb, const int blacs_ctxt) { - this->comm_2D = comm_2D; this->blacs_ctxt = blacs_ctxt; Cblacs_gridinfo(blacs_ctxt, &dim0, &dim1, &coord[0], &coord[1]); _set_dist_info(mg, ng, nb); @@ -124,7 +110,7 @@ void Parallel_2D::set_serial(const int mg, const int ng) coord[0] = coord[1] = 0; nrow = mg; ncol = ng; - nloc = nrow * ncol; + nloc = static_cast(nrow) * ncol; local2global_row_.resize(nrow); local2global_col_.resize(ncol); std::iota(local2global_row_.begin(), local2global_row_.end(), 0); @@ -132,7 +118,6 @@ void Parallel_2D::set_serial(const int mg, const int ng) global2local_row_ = local2global_row_; global2local_col_ = local2global_col_; #ifdef __MPI - comm_2D = MPI_COMM_NULL; blacs_ctxt = -1; #endif } diff --git a/source/module_basis/module_ao/parallel_2d.h b/source/module_basis/module_ao/parallel_2d.h index 50e62c0804..f49caefd29 100644 --- a/source/module_basis/module_ao/parallel_2d.h +++ b/source/module_basis/module_ao/parallel_2d.h @@ -4,9 +4,7 @@ #include #include -#ifdef __MPI -#include -#endif +#include "module_base/blacs_connector.h" /// @brief This class packs the basic information of /// 2D-block-cyclic parallel distribution of an arbitrary matrix. @@ -87,13 +85,12 @@ class Parallel_2D /** * @brief Set up the info of a block-cyclic distribution using given - * MPI communicator and BLACS context. + * BLACS context. * */ int set(const int mg, const int ng, const int nb, // square block is assumed - const MPI_Comm comm_2D, const int blacs_ctxt); /// BLACS context @@ -102,8 +99,7 @@ class Parallel_2D /// ScaLAPACK descriptor int desc[9] = {}; - /// 2D Cartesian MPI communicator - MPI_Comm comm_2D = MPI_COMM_NULL; + MPI_Comm comm() const { return Cblacs2sys_handle(blacs_ctxt); } #endif void set_serial(const int mg, const int ng); @@ -116,6 +112,9 @@ class Parallel_2D int nrow = 0; int ncol = 0; int64_t nloc = 0; + // NOTE: ScaLAPACK descriptors use int type for the number of rows and columns of + // both the global and local matrices, so nrow & ncol have to be int type. Their + // product, however, can exceed the range of int type. /// block size int nb = 1; @@ -124,7 +123,7 @@ class Parallel_2D int dim0 = 0; int dim1 = 0; - /// process coordinate in the MPI Cartesian grid + /// process coordinate in the BLACS grid int coord[2] = {-1, -1}; protected: diff --git a/source/module_basis/module_ao/parallel_orbitals.cpp b/source/module_basis/module_ao/parallel_orbitals.cpp index 98c76a7065..b4d7868e0f 100644 --- a/source/module_basis/module_ao/parallel_orbitals.cpp +++ b/source/module_basis/module_ao/parallel_orbitals.cpp @@ -209,7 +209,6 @@ void Parallel_Orbitals::set_desc_wfc_Eij(const int& nbasis, const int& nbands, c { ModuleBase::TITLE("Parallel_2D", "set_desc_wfc_Eij"); #ifdef __DEBUG - assert(this->comm_2D != MPI_COMM_NULL); assert(nbasis > 0 && nbands > 0 && lld > 0); assert(this->nb > 0 && this->dim0 > 0 && this->dim1 > 0); #endif diff --git a/source/module_basis/module_ao/test/parallel_2d_test.cpp b/source/module_basis/module_ao/test/parallel_2d_test.cpp index 206231398a..715a11115a 100644 --- a/source/module_basis/module_ao/test/parallel_2d_test.cpp +++ b/source/module_basis/module_ao/test/parallel_2d_test.cpp @@ -67,7 +67,7 @@ TEST_F(test_para2d, Divide2D) EXPECT_LE(p2d.dim0, p2d.dim1); // 2. MPI 2d communicator - EXPECT_NE(p2d.comm_2D, MPI_COMM_NULL); + //EXPECT_NE(p2d.comm_2D, MPI_COMM_NULL); // 3. local2global and local sizes int lr = p2d.get_row_size(); @@ -124,7 +124,7 @@ TEST_F(test_para2d, DescReuseCtxt) p1.init(sizes[0].first, sizes[0].second, nb, MPI_COMM_WORLD); Parallel_2D p2; // use 2 different sizes, but they can share the same ctxt - p2.set(sizes[1].first, sizes[1].second, nb, p1.comm_2D, p1.blacs_ctxt); + p2.set(sizes[1].first, sizes[1].second, nb, p1.blacs_ctxt); EXPECT_EQ(p1.desc[1], p2.desc[1]); diff --git a/source/module_basis/module_ao/test/parallel_orbitals_test.cpp b/source/module_basis/module_ao/test/parallel_orbitals_test.cpp index 06bd6f010b..fe09d9fca6 100644 --- a/source/module_basis/module_ao/test/parallel_orbitals_test.cpp +++ b/source/module_basis/module_ao/test/parallel_orbitals_test.cpp @@ -60,7 +60,7 @@ TEST_F(TestParaO, Divide2D) else EXPECT_LE(po.dim0, po.dim1); //2. comm_2D - EXPECT_NE(po.comm_2D, MPI_COMM_NULL); + //EXPECT_NE(po.comm_2D, MPI_COMM_NULL); //3. local2global and local sizes int lr = po.get_row_size(); diff --git a/source/module_esolver/esolver_ks_lcao.cpp b/source/module_esolver/esolver_ks_lcao.cpp index 601b77030b..de65c1edb7 100644 --- a/source/module_esolver/esolver_ks_lcao.cpp +++ b/source/module_esolver/esolver_ks_lcao.cpp @@ -573,7 +573,7 @@ void ESolver_KS_LCAO::init_basis_lcao(Input& inp, UnitCell& ucell) try_nb += ParaV.set_nloc_wfc_Eij(GlobalV::NBANDS, GlobalV::ofs_running, GlobalV::ofs_warning); if (try_nb != 0) { - ParaV.set(GlobalV::NLOCAL, GlobalV::NLOCAL, 1, ParaV.comm_2D, ParaV.blacs_ctxt); + ParaV.set(GlobalV::NLOCAL, GlobalV::NLOCAL, 1, ParaV.blacs_ctxt); try_nb = ParaV.set_nloc_wfc_Eij(GlobalV::NBANDS, GlobalV::ofs_running, GlobalV::ofs_warning); } diff --git a/source/module_hamilt_lcao/module_tddft/bandenergy.cpp b/source/module_hamilt_lcao/module_tddft/bandenergy.cpp index b1ead443ba..37212a6606 100644 --- a/source/module_hamilt_lcao/module_tddft/bandenergy.cpp +++ b/source/module_hamilt_lcao/module_tddft/bandenergy.cpp @@ -103,9 +103,7 @@ void compute_ekb(const Parallel_Orbitals* pv, } int info; - int myid; int naroc[2]; - MPI_Comm_rank(pv->comm_2D, &myid); double* Eii = new double[nband]; ModuleBase::GlobalFunc::ZEROS(Eii, nband); @@ -113,10 +111,7 @@ void compute_ekb(const Parallel_Orbitals* pv, { for (int ipcol = 0; ipcol < pv->dim1; ++ipcol) { - const int coord[2] = {iprow, ipcol}; - int src_rank; - info = MPI_Cart_rank(pv->comm_2D, coord, &src_rank); - if (myid == src_rank) + if (iprow == pv->coord[0] && ipcol == pv->coord[1]) { naroc[0] = pv->nrow; naroc[1] = pv->ncol; @@ -139,7 +134,7 @@ void compute_ekb(const Parallel_Orbitals* pv, } } // loop ipcol } // loop iprow - info = MPI_Allreduce(Eii, ekb, nband, MPI_DOUBLE, MPI_SUM, pv->comm_2D); + info = MPI_Allreduce(Eii, ekb, nband, MPI_DOUBLE, MPI_SUM, pv->comm()); delete[] tmp1; delete[] Eij; @@ -148,4 +143,4 @@ void compute_ekb(const Parallel_Orbitals* pv, #endif -} // namespace module_tddft \ No newline at end of file +} // namespace module_tddft diff --git a/source/module_hamilt_lcao/module_tddft/norm_psi.cpp b/source/module_hamilt_lcao/module_tddft/norm_psi.cpp index a9e960fce9..9d708e13c7 100644 --- a/source/module_hamilt_lcao/module_tddft/norm_psi.cpp +++ b/source/module_hamilt_lcao/module_tddft/norm_psi.cpp @@ -92,19 +92,13 @@ void norm_psi(const Parallel_Orbitals* pv, GlobalV::ofs_running << std::endl; } - int info; - int myid; - MPI_Comm_rank(pv->comm_2D, &myid); int naroc[2]; // maximum number of row or column for (int iprow = 0; iprow < pv->dim0; ++iprow) { for (int ipcol = 0; ipcol < pv->dim1; ++ipcol) { - const int coord[2] = {iprow, ipcol}; - int src_rank; - info = MPI_Cart_rank(pv->comm_2D, coord, &src_rank); - if (myid == src_rank) + if (iprow == pv->coord[0] && ipcol == pv->coord[1]) { naroc[0] = pv->nrow; naroc[1] = pv->ncol; diff --git a/source/module_hamilt_lcao/module_tddft/propagator.cpp b/source/module_hamilt_lcao/module_tddft/propagator.cpp index 44f102f7c5..57601c4aaa 100644 --- a/source/module_hamilt_lcao/module_tddft/propagator.cpp +++ b/source/module_hamilt_lcao/module_tddft/propagator.cpp @@ -312,18 +312,13 @@ void Propagator::compute_propagator_taylor(const int nlocal, // set rank0 int info; - int myid; - MPI_Comm_rank(this->ParaV->comm_2D, &myid); int naroc[2]; // maximum number of row or column for (int iprow = 0; iprow < this->ParaV->dim0; ++iprow) { for (int ipcol = 0; ipcol < this->ParaV->dim1; ++ipcol) { - const int coord[2] = {iprow, ipcol}; - int src_rank; - info = MPI_Cart_rank(this->ParaV->comm_2D, coord, &src_rank); - if (myid == src_rank) + if (iprow == ParaV->coord[0] && ipcol == ParaV->coord[1]) { naroc[0] = this->ParaV->nrow; naroc[1] = this->ParaV->ncol; @@ -611,4 +606,4 @@ void Propagator::compute_propagator_etrs(const int nlocal, } #endif -} // namespace module_tddft \ No newline at end of file +} // namespace module_tddft diff --git a/source/module_hamilt_lcao/module_tddft/test/bandenergy_test.cpp b/source/module_hamilt_lcao/module_tddft/test/bandenergy_test.cpp index 2c164f50d0..e89cbe91c5 100644 --- a/source/module_hamilt_lcao/module_tddft/test/bandenergy_test.cpp +++ b/source/module_hamilt_lcao/module_tddft/test/bandenergy_test.cpp @@ -47,13 +47,9 @@ TEST(BandEnergyTest, testBandEnergy) pv->nb = 1; int dim[2]; - int period[2] = {1, 1}; - int reorder = 0; dim[0] = nprow; dim[1] = npcol; - MPI_Cart_create(MPI_COMM_WORLD, 2, dim, period, reorder, &pv->comm_2D); - // Initialize input matrices int info; int mb = 1, nb = 1, lda = nband, ldc = nlocal; diff --git a/source/module_hamilt_lcao/module_tddft/test/norm_psi_test.cpp b/source/module_hamilt_lcao/module_tddft/test/norm_psi_test.cpp index d92817af90..d5a9ebe175 100644 --- a/source/module_hamilt_lcao/module_tddft/test/norm_psi_test.cpp +++ b/source/module_hamilt_lcao/module_tddft/test/norm_psi_test.cpp @@ -44,13 +44,9 @@ TEST(NormPsiTest, testNormPsi) pv->nb = 1; int dim[2]; - int period[2] = {1, 1}; - int reorder = 0; dim[0] = nprow; dim[1] = npcol; - MPI_Cart_create(MPI_COMM_WORLD, 2, dim, period, reorder, &pv->comm_2D); - // Initialize input matrices int info; int mb = 1, nb = 1, lda = nband, ldc = nlocal; diff --git a/source/module_hamilt_lcao/module_tddft/test/propagator_test2.cpp b/source/module_hamilt_lcao/module_tddft/test/propagator_test2.cpp index e015e17f45..cce7e5d4aa 100644 --- a/source/module_hamilt_lcao/module_tddft/test/propagator_test2.cpp +++ b/source/module_hamilt_lcao/module_tddft/test/propagator_test2.cpp @@ -36,13 +36,9 @@ TEST(PropagatorTest, testPropagatorTaylor) pv->nb = 1; int dim[2]; - int period[2] = {1, 1}; - int reorder = 0; dim[0] = nprow; dim[1] = npcol; - MPI_Cart_create(MPI_COMM_WORLD, 2, dim, period, reorder, &pv->comm_2D); - INPUT.mdp.md_dt = 4; // Initialize input matrices @@ -116,4 +112,4 @@ TEST(PropagatorTest, testPropagatorTaylor) delete[] U_operator; delete[] Htmp; delete[] Stmp; -} \ No newline at end of file +} diff --git a/source/module_hamilt_lcao/module_tddft/test/propagator_test3.cpp b/source/module_hamilt_lcao/module_tddft/test/propagator_test3.cpp index 4023b518fe..0926fc0018 100644 --- a/source/module_hamilt_lcao/module_tddft/test/propagator_test3.cpp +++ b/source/module_hamilt_lcao/module_tddft/test/propagator_test3.cpp @@ -37,13 +37,9 @@ TEST(PropagatorTest, testPropagatorETRS) pv->nb = 1; int dim[2]; - int period[2] = {1, 1}; - int reorder = 0; dim[0] = nprow; dim[1] = npcol; - MPI_Cart_create(MPI_COMM_WORLD, 2, dim, period, reorder, &pv->comm_2D); - INPUT.mdp.md_dt = 4; // Initialize input matrices @@ -121,4 +117,4 @@ TEST(PropagatorTest, testPropagatorETRS) delete[] Htmp; delete[] Stmp; delete[] Hlaststep; -} \ No newline at end of file +} diff --git a/source/module_hsolver/test/diago_pexsi_test.cpp b/source/module_hsolver/test/diago_pexsi_test.cpp index 32468be546..693e84f9bd 100644 --- a/source/module_hsolver/test/diago_pexsi_test.cpp +++ b/source/module_hsolver/test/diago_pexsi_test.cpp @@ -144,7 +144,6 @@ class PexsiPrepare po.nrow = hmtest.nrow; po.nb = nb2d; po.blacs_ctxt = icontxt; - po.comm_2D = MPI_COMM_WORLD; po.dim0 = nprows; po.dim1 = npcols; diff --git a/source/module_io/io_dmk.cpp b/source/module_io/io_dmk.cpp index 23776ac3d4..b40128a929 100644 --- a/source/module_io/io_dmk.cpp +++ b/source/module_io/io_dmk.cpp @@ -133,7 +133,7 @@ bool ModuleIO::read_dmk(const int nspin, int my_rank = 0; #ifdef __MPI - MPI_Comm_rank(pv.comm_2D, &my_rank); + MPI_Comm_rank(pv.comm(), &my_rank); #endif int nlocal = pv.get_global_row_size(); @@ -216,7 +216,7 @@ bool ModuleIO::read_dmk(const int nspin, } // rank0 #ifdef __MPI - MPI_Bcast(&read_success, 1, MPI_C_BOOL, 0, pv.comm_2D); + MPI_Bcast(&read_success, 1, MPI_C_BOOL, 0, pv.comm()); #endif if (read_success) { @@ -225,7 +225,7 @@ bool ModuleIO::read_dmk(const int nspin, dmk.resize(nspin * nk, std::vector(pv.get_row_size() * pv.get_col_size())); Parallel_2D pv_glb; - pv_glb.set(nlocal, nlocal, nlocal, pv.comm_2D, pv.blacs_ctxt); + pv_glb.set(nlocal, nlocal, nlocal, pv.blacs_ctxt); for (int ik = 0; ik < nspin * nk; ik++) { Cpxgemr2d(nlocal, nlocal, @@ -258,7 +258,7 @@ void ModuleIO::write_dmk(const std::vector>& dmk, int my_rank = 0; #ifdef __MPI - MPI_Comm_rank(pv.comm_2D, &my_rank); + MPI_Comm_rank(pv.comm(), &my_rank); #endif bool gamma_only = std::is_same::value; @@ -279,7 +279,7 @@ void ModuleIO::write_dmk(const std::vector>& dmk, // gather dmk[ik] to dmk_global std::vector dmk_global(my_rank == 0 ? nlocal * nlocal : 0); #ifdef __MPI - pv_glb.set(nlocal, nlocal, nlocal, pv.comm_2D, pv.blacs_ctxt); + pv_glb.set(nlocal, nlocal, nlocal, pv.blacs_ctxt); Cpxgemr2d(nlocal, nlocal, const_cast(dmk[ik + nk * ispin].data()), @@ -365,4 +365,4 @@ template void ModuleIO::write_dmk>( const int precision, const std::vector& efs, const UnitCell* ucell, - const Parallel_2D& pv); \ No newline at end of file + const Parallel_2D& pv); diff --git a/source/module_io/istate_envelope.cpp b/source/module_io/istate_envelope.cpp index 801b8e4db0..4081e60345 100644 --- a/source/module_io/istate_envelope.cpp +++ b/source/module_io/istate_envelope.cpp @@ -584,49 +584,42 @@ void IState_Envelope::wfc_2d_to_grid(const T* lowf_2d, // MPI and memory related const int mem_stride = 1; int mpi_info = 0; - auto mpi_dtype = std::is_same::value ? MPI_DOUBLE : MPI_DOUBLE_COMPLEX; // get the rank of the current process int rank = 0; - MPI_Comm_rank(pv.comm_2D, &rank); + MPI_Comm_rank(pv.comm(), &rank); - // calculate the maximum number of nlocal over all processes in pv.comm_2D range + // calculate the maximum number of nlocal over all processes in pv.comm() range long buf_size; - mpi_info = MPI_Reduce(&pv.nloc_wfc, &buf_size, 1, MPI_LONG, MPI_MAX, 0, pv.comm_2D); - mpi_info = MPI_Bcast(&buf_size, 1, MPI_LONG, 0, pv.comm_2D); // get and then broadcast + mpi_info = MPI_Reduce(&pv.nloc_wfc, &buf_size, 1, MPI_LONG, MPI_MAX, 0, pv.comm()); + mpi_info = MPI_Bcast(&buf_size, 1, MPI_LONG, 0, pv.comm()); // get and then broadcast std::vector lowf_block(buf_size); // this quantity seems to have the value returned by function numroc_ in ScaLAPACK? int naroc[2]; + // for BLACS broadcast + char scope = 'A'; + char top = ' '; + // loop over all processors for (int iprow = 0; iprow < pv.dim0; ++iprow) { for (int ipcol = 0; ipcol < pv.dim1; ++ipcol) { - // get the rank of the processor at the given coordinate - int rank_at_coord; - const int mpi_cart_coord[2] = {iprow, ipcol}; - mpi_info = MPI_Cart_rank(pv.comm_2D, mpi_cart_coord, &rank_at_coord); // get the MPI rank - - // keep in mind present function is concurrently called by all processors, thus - // the following code block will only be executed once for each processor, which means - // for each processor, get its MPI rank and MPI coord, then assign the naroc[0] and naroc[1] - // with the value which should have been calculated automatically by ScaLAPACK function - // numroc_. - if (rank == rank_at_coord) + if (iprow == pv.coord[0] && ipcol == pv.coord[1]) { BlasConnector::copy(pv.nloc_wfc, lowf_2d, mem_stride, lowf_block.data(), mem_stride); naroc[0] = pv.nrow; naroc[1] = pv.ncol_bands; + Cxgebs2d(pv.blacs_ctxt, &scope, &top, 2, 1, naroc, 2); + Cxgebs2d(pv.blacs_ctxt, &scope, &top, buf_size, 1, lowf_block.data(), buf_size); + } + else + { + Cxgebr2d(pv.blacs_ctxt, &scope, &top, 2, 1, naroc, 2, iprow, ipcol); + Cxgebr2d(pv.blacs_ctxt, &scope, &top, buf_size, 1, lowf_block.data(), buf_size, iprow, ipcol); } - - // broadcast the number of row and column - mpi_info = MPI_Bcast(naroc, 2, MPI_INT, rank_at_coord, pv.comm_2D); - - // broadcast the data, this means the data owned by one processor is broadcast - // to all other processors in the communicator. - mpi_info = MPI_Bcast(lowf_block.data(), buf_size, mpi_dtype, rank_at_coord, pv.comm_2D); // then use it to set the wfc_grid. mpi_info = this->set_wfc_grid(naroc, @@ -666,4 +659,4 @@ int IState_Envelope::localIndex(int globalindex, int nblk, int nprocs, int& mypr { myproc = int((globalindex % (nblk * nprocs)) / nblk); return int(globalindex / (nblk * nprocs)) * nblk + globalindex % nblk; -} \ No newline at end of file +} diff --git a/source/module_io/read_wfc_lcao.cpp b/source/module_io/read_wfc_lcao.cpp index 20766203de..c7652a4fd5 100644 --- a/source/module_io/read_wfc_lcao.cpp +++ b/source/module_io/read_wfc_lcao.cpp @@ -258,7 +258,7 @@ void ModuleIO::restart_from_file(const std::string& out_dir, // hard-code the fi const std::string flowf_prefix = gamma_only ? "WFC_GAMMA" : "WFC_NAO_K"; // MPI-related variables init int iproc; - MPI_Comm_rank(p2d.comm_2D, &iproc); + MPI_Comm_rank(p2d.comm(), &iproc); // then start int nbands_ = -1, nbasis_ = -1; for (int ik = 0; ik < nks; ik++) @@ -289,12 +289,12 @@ void ModuleIO::restart_from_file(const std::string& out_dir, // hard-code the fi wk.push_back(wk_); kvec_c.push_back(kvec); } - MPI_Barrier(p2d.comm_2D); // wait for finishing the reading task + MPI_Barrier(p2d.comm()); // wait for finishing the reading task // scatter the lowf_glb to lowf_loc Parallel_2D p2d_glb; Parallel_Common::bcast_int(nbands); Parallel_Common::bcast_int(nbasis); - p2d_glb.init(nbasis, nbands, std::max(nbasis, nbands), p2d.comm_2D); // in the same comm world + p2d_glb.init(nbasis, nbands, std::max(nbasis, nbands), p2d.comm()); // in the same comm world lowf_loc_k.resize(p2d.nrow * p2d.ncol); Cpxgemr2d(nbasis, nbands, diff --git a/source/module_io/read_wfc_nao.cpp b/source/module_io/read_wfc_nao.cpp index f399713f6c..6f2edc0f21 100644 --- a/source/module_io/read_wfc_nao.cpp +++ b/source/module_io/read_wfc_nao.cpp @@ -46,7 +46,7 @@ bool ModuleIO::read_wfc_nao( psid.resize(nk, nbands_local, nlocal_local); #ifdef __MPI - MPI_Comm_rank(ParaV.comm_2D, &myrank); + MPI_Comm_rank(ParaV.comm(), &myrank); #endif // lambda function to read one file @@ -144,7 +144,7 @@ bool ModuleIO::read_wfc_nao( psid.fix_k(ik); #ifdef __MPI Parallel_2D pv_glb; - pv_glb.set(nlocal, nbands, std::max(nlocal, nbands), ParaV.comm_2D, ParaV.blacs_ctxt); + pv_glb.set(nlocal, nbands, std::max(nlocal, nbands), ParaV.blacs_ctxt); Cpxgemr2d(nlocal, nbands, ctot.data(), diff --git a/source/module_io/write_Vxc.hpp b/source/module_io/write_Vxc.hpp index 1a530c8dfd..d690d53909 100644 --- a/source/module_io/write_Vxc.hpp +++ b/source/module_io/write_Vxc.hpp @@ -40,7 +40,7 @@ void set_para2d_MO(const Parallel_Orbitals& pv, const int nbands, Parallel_2D& p { std::ofstream ofs; #ifdef __MPI - p2d.set(nbands, nbands, pv.nb, pv.comm_2D, pv.blacs_ctxt); + p2d.set(nbands, nbands, pv.nb, pv.blacs_ctxt); #else p2d.set_serial(nbands, nbands); #endif diff --git a/source/module_io/write_dmr.cpp b/source/module_io/write_dmr.cpp index c4b764dc5f..bfd954a493 100644 --- a/source/module_io/write_dmr.cpp +++ b/source/module_io/write_dmr.cpp @@ -70,7 +70,7 @@ void write_dmr(const std::vector*> dmr, // gather the parallel matrix to serial matrix #ifdef __MPI Parallel_Orbitals serialV; - serialV.init(nbasis, nbasis, nbasis, paraV.comm_2D); + serialV.init(nbasis, nbasis, nbasis, paraV.comm()); serialV.set_serial(nbasis, nbasis); serialV.set_atomic_trace(GlobalC::ucell.get_iat2iwt(), GlobalC::ucell.nat, nbasis); hamilt::HContainer dm_serial(&serialV); @@ -91,4 +91,4 @@ void write_dmr(const std::vector*> dmr, } } -} // namespace ModuleIO \ No newline at end of file +} // namespace ModuleIO diff --git a/source/module_io/write_wfc_nao.cpp b/source/module_io/write_wfc_nao.cpp index d342bad39c..f342f9b237 100644 --- a/source/module_io/write_wfc_nao.cpp +++ b/source/module_io/write_wfc_nao.cpp @@ -222,7 +222,7 @@ void write_wfc_nao(const int out_type, // If using MPI, the nbasis and nbands in psi is the value on local rank, // so get nlocal and nbands from pv->desc_wfc[2] and pv->desc_wfc[3] #ifdef __MPI - MPI_Comm_rank(pv.comm_2D, &myid); + MPI_Comm_rank(pv.comm(), &myid); nlocal = pv.desc_wfc[2]; nbands = pv.desc_wfc[3]; #else @@ -241,7 +241,7 @@ void write_wfc_nao(const int out_type, { psi.fix_k(ik); #ifdef __MPI - pv_glb.set(nlocal, nbands, blk_glb, pv.comm_2D, pv.blacs_ctxt); + pv_glb.set(nlocal, nbands, blk_glb, pv.blacs_ctxt); Cpxgemr2d(nlocal, nbands, psi.get_pointer(), From 9cd1697fb4cf9a9c9ee671b92986ab374faaf276 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci-lite[bot]" <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> Date: Thu, 11 Jul 2024 19:24:51 +0000 Subject: [PATCH 2/7] [pre-commit.ci lite] apply automatic fixes --- .../module_ao/test/parallel_2d_test.cpp | 17 +++++++++++------ .../module_tddft/propagator.cpp | 18 ++++++++++++------ source/module_io/read_wfc_lcao.cpp | 12 ++++++++---- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/source/module_basis/module_ao/test/parallel_2d_test.cpp b/source/module_basis/module_ao/test/parallel_2d_test.cpp index 715a11115a..5073571bcc 100644 --- a/source/module_basis/module_ao/test/parallel_2d_test.cpp +++ b/source/module_basis/module_ao/test/parallel_2d_test.cpp @@ -61,10 +61,11 @@ TEST_F(test_para2d, Divide2D) // 1. dim0 and dim1 EXPECT_EQ(p2d.dim0 * p2d.dim1, dsize); - if (mode) + if (mode) { EXPECT_LE(p2d.dim1, p2d.dim0); - else + } else { EXPECT_LE(p2d.dim0, p2d.dim1); +} // 2. MPI 2d communicator //EXPECT_NE(p2d.comm_2D, MPI_COMM_NULL); @@ -96,18 +97,22 @@ TEST_F(test_para2d, Divide2D) auto sum_array = [&p2d](const int& gr, const int& gc) -> std::pair { int sum_row = 0; int sum_col = 0; - for (int i = 0; i < gr; ++i) + for (int i = 0; i < gr; ++i) { sum_row += p2d.global2local_row(i); - for (int i = 0; i < gc; ++i) +} + for (int i = 0; i < gc; ++i) { sum_col += p2d.global2local_col(i); +} return {sum_row, sum_col}; }; std::pair sumrc = sum_array(gr, gc); EXPECT_EQ(std::get<0>(sumrc), lr * (lr - 1) / 2 - (gr - lr)); EXPECT_EQ(std::get<1>(sumrc), lc * (lc - 1) / 2 - (gc - lc)); - for (int i = 0; i < lr; ++i) - for (int j = 0; j < lc; ++j) + for (int i = 0; i < lr; ++i) { + for (int j = 0; j < lc; ++j) { EXPECT_TRUE(p2d.in_this_processor(p2d.local2global_row(i), p2d.local2global_col(j))); +} +} EXPECT_EQ(p2d.get_global_row_size(), gr); EXPECT_EQ(p2d.get_global_col_size(), gc); diff --git a/source/module_hamilt_lcao/module_tddft/propagator.cpp b/source/module_hamilt_lcao/module_tddft/propagator.cpp index 57601c4aaa..8ac07aa2a6 100644 --- a/source/module_hamilt_lcao/module_tddft/propagator.cpp +++ b/source/module_hamilt_lcao/module_tddft/propagator.cpp @@ -241,10 +241,12 @@ void Propagator::compute_propagator_cn2(const int nlocal, double aa, bb; aa = U_operator[i * this->ParaV->ncol + j].real(); bb = U_operator[i * this->ParaV->ncol + j].imag(); - if (std::abs(aa) < 1e-8) + if (std::abs(aa) < 1e-8) { aa = 0.0; - if (std::abs(bb) < 1e-8) +} + if (std::abs(bb) < 1e-8) { bb = 0.0; +} GlobalV::ofs_running << aa << "+" << bb << "i "; } GlobalV::ofs_running << std::endl; @@ -325,13 +327,15 @@ void Propagator::compute_propagator_taylor(const int nlocal, for (int j = 0; j < naroc[1]; ++j) { int igcol = globalIndex(j, this->ParaV->nb, this->ParaV->dim1, ipcol); - if (igcol >= nlocal) + if (igcol >= nlocal) { continue; +} for (int i = 0; i < naroc[0]; ++i) { int igrow = globalIndex(i, this->ParaV->nb, this->ParaV->dim0, iprow); - if (igrow >= nlocal) + if (igrow >= nlocal) { continue; +} if (igcol == igrow) { rank0[j * naroc[0] + i] = {1.0, 0.0}; @@ -552,10 +556,12 @@ void Propagator::compute_propagator_taylor(const int nlocal, double aa, bb; aa = U_operator[i * this->ParaV->ncol + j].real(); bb = U_operator[i * this->ParaV->ncol + j].imag(); - if (std::abs(aa) < 1e-8) + if (std::abs(aa) < 1e-8) { aa = 0.0; - if (std::abs(bb) < 1e-8) +} + if (std::abs(bb) < 1e-8) { bb = 0.0; +} GlobalV::ofs_running << aa << "+" << bb << "i "; } GlobalV::ofs_running << std::endl; diff --git a/source/module_io/read_wfc_lcao.cpp b/source/module_io/read_wfc_lcao.cpp index c7652a4fd5..6767069e5a 100644 --- a/source/module_io/read_wfc_lcao.cpp +++ b/source/module_io/read_wfc_lcao.cpp @@ -25,8 +25,9 @@ void ModuleIO::read_abacus_lowf(const std::string& flowf, { // assert the T must be double or float std::ifstream ifs(flowf.c_str()); - if (!ifs) + if (!ifs) { ModuleBase::WARNING_QUIT("read_abacus_lowf", "open file failed: " + flowf); +} // will use line-by-line parse std::string line; bool read_kvec = false; @@ -138,8 +139,9 @@ void ModuleIO::read_abacus_lowf(const std::string& flowf, double& wk) { std::ifstream ifs(flowf.c_str()); - if (!ifs) + if (!ifs) { ModuleBase::WARNING_QUIT("read_abacus_lowf", "open file failed: " + flowf); +} // will use line-by-line parse std::string line; bool read_kvec = false; @@ -266,8 +268,9 @@ void ModuleIO::restart_from_file(const std::string& out_dir, // hard-code the fi // check existence of file const std::string flowf = out_dir + "/" + flowf_prefix + std::to_string(ik + 1) + ".txt"; std::ifstream ifs(flowf); - if (!ifs) + if (!ifs) { ModuleBase::WARNING_QUIT("restart_from_file", "open file failed: " + flowf); +} std::vector lowf_glb; std::vector lowf_loc_k; @@ -400,8 +403,9 @@ void ModuleIO::restart_from_file(const std::string& out_dir, // hard-code the fi // check existence of file const std::string flowf = out_dir + "/" + flowf_prefix + std::to_string(ik + 1) + ".txt"; const std::ifstream ifs(flowf); - if (!ifs) + if (!ifs) { ModuleBase::WARNING_QUIT("restart_from_file", "open file failed: " + flowf); +} std::vector lowf_; std::vector ekb_; From cecc8b3d6bc5aeadf36eda5c57191c3777c548ef Mon Sep 17 00:00:00 2001 From: jinzx10 Date: Fri, 12 Jul 2024 16:00:43 +0800 Subject: [PATCH 3/7] fix comm() --- source/module_basis/module_ao/parallel_2d.cpp | 10 ++++++++++ source/module_basis/module_ao/parallel_2d.h | 2 +- .../module_tddft/test/CMakeLists.txt | 2 +- .../module_tddft/test/bandenergy_test.cpp | 8 ++------ .../module_tddft/test/norm_psi_test.cpp | 2 ++ .../module_tddft/test/propagator_test1.cpp | 1 + .../module_tddft/test/propagator_test2.cpp | 2 ++ .../module_tddft/test/propagator_test3.cpp | 2 ++ 8 files changed, 21 insertions(+), 8 deletions(-) diff --git a/source/module_basis/module_ao/parallel_2d.cpp b/source/module_basis/module_ao/parallel_2d.cpp index bc46b9d3c2..3bd669ee55 100644 --- a/source/module_basis/module_ao/parallel_2d.cpp +++ b/source/module_basis/module_ao/parallel_2d.cpp @@ -29,6 +29,16 @@ int Parallel_2D::get_global_col_size() const } #ifdef __MPI +MPI_Comm Parallel_2D::comm() const +{ + int sys_ctxt = -1; + Cblacs_get(blacs_ctxt, 10, &sys_ctxt); + // blacs_get with "what" = 10 takes a BLACS context and returns the index + // of the associated system context (MPI communicator) that can be used by + // blacs2sys_handle to get the MPI communicator. + return Cblacs2sys_handle(sys_ctxt); +} + void Parallel_2D::_init_proc_grid(const MPI_Comm comm, const bool mode) { // determine the number of rows and columns of the process grid diff --git a/source/module_basis/module_ao/parallel_2d.h b/source/module_basis/module_ao/parallel_2d.h index f49caefd29..4fa62385b4 100644 --- a/source/module_basis/module_ao/parallel_2d.h +++ b/source/module_basis/module_ao/parallel_2d.h @@ -99,7 +99,7 @@ class Parallel_2D /// ScaLAPACK descriptor int desc[9] = {}; - MPI_Comm comm() const { return Cblacs2sys_handle(blacs_ctxt); } + MPI_Comm comm() const; #endif void set_serial(const int mg, const int ng); diff --git a/source/module_hamilt_lcao/module_tddft/test/CMakeLists.txt b/source/module_hamilt_lcao/module_tddft/test/CMakeLists.txt index d37a98d217..e7c7bb96e9 100644 --- a/source/module_hamilt_lcao/module_tddft/test/CMakeLists.txt +++ b/source/module_hamilt_lcao/module_tddft/test/CMakeLists.txt @@ -13,7 +13,7 @@ AddTest( AddTest( TARGET tddft_bandenergy_test LIBS ${math_libs} base device tddft_test_lib - SOURCES bandenergy_test.cpp ../bandenergy.cpp + SOURCES bandenergy_test.cpp ../bandenergy.cpp ../../../module_basis/module_ao/parallel_2d.cpp ../../../module_basis/module_ao/parallel_orbitals.cpp ) AddTest( diff --git a/source/module_hamilt_lcao/module_tddft/test/bandenergy_test.cpp b/source/module_hamilt_lcao/module_tddft/test/bandenergy_test.cpp index e89cbe91c5..853b75be76 100644 --- a/source/module_hamilt_lcao/module_tddft/test/bandenergy_test.cpp +++ b/source/module_hamilt_lcao/module_tddft/test/bandenergy_test.cpp @@ -20,12 +20,6 @@ #define doublethreshold 1e-8 double module_tddft::Evolve_elec::td_print_eij = -1; -Parallel_Orbitals::Parallel_Orbitals() -{ -} -Parallel_Orbitals::~Parallel_Orbitals() -{ -} TEST(BandEnergyTest, testBandEnergy) { @@ -45,6 +39,8 @@ TEST(BandEnergyTest, testBandEnergy) pv->dim0 = 1; pv->dim1 = 1; pv->nb = 1; + pv->blacs_ctxt = 0; + pv->coord[0] = pv->coord[1] = 0; int dim[2]; dim[0] = nprow; diff --git a/source/module_hamilt_lcao/module_tddft/test/norm_psi_test.cpp b/source/module_hamilt_lcao/module_tddft/test/norm_psi_test.cpp index d5a9ebe175..6c78937803 100644 --- a/source/module_hamilt_lcao/module_tddft/test/norm_psi_test.cpp +++ b/source/module_hamilt_lcao/module_tddft/test/norm_psi_test.cpp @@ -42,6 +42,8 @@ TEST(NormPsiTest, testNormPsi) pv->dim0 = 1; pv->dim1 = 1; pv->nb = 1; + pv->blacs_ctxt = 0; + pv->coord[0] = pv->coord[1] = 0; int dim[2]; dim[0] = nprow; diff --git a/source/module_hamilt_lcao/module_tddft/test/propagator_test1.cpp b/source/module_hamilt_lcao/module_tddft/test/propagator_test1.cpp index 415becbe70..c087612617 100644 --- a/source/module_hamilt_lcao/module_tddft/test/propagator_test1.cpp +++ b/source/module_hamilt_lcao/module_tddft/test/propagator_test1.cpp @@ -37,6 +37,7 @@ TEST(PropagatorTest, testPropagatorCN) pv = new Parallel_Orbitals(); pv->nloc = nlocal * nlocal; pv->ncol = nlocal; + pv->coord[0] = pv->coord[1] = 0; INPUT.mdp.md_dt = 4; // Initialize input matrices diff --git a/source/module_hamilt_lcao/module_tddft/test/propagator_test2.cpp b/source/module_hamilt_lcao/module_tddft/test/propagator_test2.cpp index cce7e5d4aa..60bee490f7 100644 --- a/source/module_hamilt_lcao/module_tddft/test/propagator_test2.cpp +++ b/source/module_hamilt_lcao/module_tddft/test/propagator_test2.cpp @@ -34,6 +34,8 @@ TEST(PropagatorTest, testPropagatorTaylor) pv->dim0 = 1; pv->dim1 = 1; pv->nb = 1; + pv->blacs_ctxt = 0; + pv->coord[0] = pv->coord[1] = 0; int dim[2]; dim[0] = nprow; diff --git a/source/module_hamilt_lcao/module_tddft/test/propagator_test3.cpp b/source/module_hamilt_lcao/module_tddft/test/propagator_test3.cpp index 0926fc0018..d42b08c379 100644 --- a/source/module_hamilt_lcao/module_tddft/test/propagator_test3.cpp +++ b/source/module_hamilt_lcao/module_tddft/test/propagator_test3.cpp @@ -35,6 +35,8 @@ TEST(PropagatorTest, testPropagatorETRS) pv->dim0 = 1; pv->dim1 = 1; pv->nb = 1; + pv->blacs_ctxt = 0; + pv->coord[0] = pv->coord[1] = 0; int dim[2]; dim[0] = nprow; From a0248fc6df8299a164385b99840eb2bef25656c1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci-lite[bot]" <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> Date: Fri, 12 Jul 2024 09:20:53 +0000 Subject: [PATCH 4/7] [pre-commit.ci lite] apply automatic fixes --- source/module_io/istate_envelope.cpp | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/source/module_io/istate_envelope.cpp b/source/module_io/istate_envelope.cpp index 4081e60345..81384d200f 100644 --- a/source/module_io/istate_envelope.cpp +++ b/source/module_io/istate_envelope.cpp @@ -155,8 +155,9 @@ void IState_Envelope::begin(const psi::Psi* psid, for (int is = 0; is < nspin; ++is) { wfc_gamma_grid[is] = new double*[nbands]; - for (int ib = 0; ib < nbands; ++ib) + for (int ib = 0; ib < nbands; ++ib) { wfc_gamma_grid[is][ib] = new double[gg.gridt->lgd]; +} } const double mem_size = sizeof(double) * double(gg.gridt->lgd) * double(nbands) * double(nspin) / 1024.0 / 1024.0; @@ -219,8 +220,9 @@ void IState_Envelope::begin(const psi::Psi* psid, &(GlobalC::ucell), 3); - if (out_wfc_pw || out_wfc_r) // only for gamma_only now + if (out_wfc_pw || out_wfc_r) { // only for gamma_only now this->set_pw_wfc(wfcpw, 0, ib, nspin, pes_->charge->rho_save, pw_wfc_g); +} } } } @@ -240,8 +242,9 @@ void IState_Envelope::begin(const psi::Psi* psid, for (int is = 0; is < nspin; ++is) { - for (int ib = 0; ib < nbands; ++ib) + for (int ib = 0; ib < nbands; ++ib) { delete[] wfc_gamma_grid[is][ib]; +} delete[] wfc_gamma_grid[is]; } return; @@ -482,8 +485,9 @@ void IState_Envelope::begin(const psi::Psi>* psi, for (int ik = 0; ik < nks; ++ik) { - for (int ib = 0; ib < nbands; ++ib) + for (int ib = 0; ib < nbands; ++ib) { delete[] wfc_k_grid[ik][ib]; +} delete[] wfc_k_grid[ik]; } @@ -498,15 +502,18 @@ void IState_Envelope::set_pw_wfc(const ModulePW::PW_Basis_K* wfcpw, const double* const* const rho, psi::Psi>& wfc_g) { - if (ib == 0) // once is enough + if (ib == 0) { // once is enough ModuleBase::TITLE("IState_Envelope", "set_pw_wfc"); +} std::vector> Porter(wfcpw->nrxx); // here I refer to v_hartree, but I don't know how to deal with NSPIN=4 const int nspin0 = (nspin == 2) ? 2 : 1; - for (int is = 0; is < nspin0; is++) - for (int ir = 0; ir < wfcpw->nrxx; ir++) + for (int is = 0; is < nspin0; is++) { + for (int ir = 0; ir < wfcpw->nrxx; ir++) { Porter[ir] += std::complex(rho[is][ir], 0.0); +} +} // call FFT wfcpw->real2recip(Porter.data(), &wfc_g(ib, 0), ik); From 0ada30b6220f158c88eb62c27dd79d5a68032a0a Mon Sep 17 00:00:00 2001 From: jinzx10 Date: Fri, 12 Jul 2024 17:45:07 +0800 Subject: [PATCH 5/7] remove comm_2D in lr --- source/module_lr/AX/AX_parallel.cpp | 18 +++++++++--------- source/module_lr/AX/test/AX_test.cpp | 10 +++++----- .../module_lr/dm_trans/dm_trans_parallel.cpp | 16 ++++++++-------- .../module_lr/dm_trans/test/dm_trans_test.cpp | 6 +++--- source/module_lr/esolver_lrtd_lcao.cpp | 10 ++++------ source/module_lr/utils/lr_util.cpp | 7 +++---- source/module_lr/utils/lr_util.h | 5 ++--- source/module_lr/utils/lr_util.hpp | 2 +- 8 files changed, 35 insertions(+), 39 deletions(-) diff --git a/source/module_lr/AX/AX_parallel.cpp b/source/module_lr/AX/AX_parallel.cpp index 7c563e3d9b..3881a5ee09 100644 --- a/source/module_lr/AX/AX_parallel.cpp +++ b/source/module_lr/AX/AX_parallel.cpp @@ -22,18 +22,18 @@ namespace LR const bool add_on) { ModuleBase::TITLE("hamilt_lrtd", "cal_AX_pblas"); - assert(pmat.comm_2D == pc.comm_2D); + assert(pmat.comm() == pc.comm()); assert(pmat.blacs_ctxt == pc.blacs_ctxt); - if (pX.comm_2D != pmat.comm_2D || pX.blacs_ctxt != pmat.blacs_ctxt) - LR_Util::setup_2d_division(pX, pmat.get_block_size(), nvirt, nocc, pmat.comm_2D, pmat.blacs_ctxt); + if (pX.comm() != pmat.comm() || pX.blacs_ctxt != pmat.blacs_ctxt) + LR_Util::setup_2d_division(pX, pmat.get_block_size(), nvirt, nocc, pmat.blacs_ctxt); else assert(pX.get_local_size() > 0 && AX_istate.get_nbasis() == pX.get_local_size()); int nks = c.get_nk(); assert(V_istate.size() == nks); Parallel_2D pVc; // for intermediate Vc - LR_Util::setup_2d_division(pVc, pmat.get_block_size(), naos, nocc, pmat.comm_2D, pmat.blacs_ctxt); + LR_Util::setup_2d_division(pVc, pmat.get_block_size(), naos, nocc, pmat.blacs_ctxt); for (int isk = 0;isk < nks;++isk) { AX_istate.fix_k(isk); @@ -79,18 +79,18 @@ namespace LR const bool add_on) { ModuleBase::TITLE("hamilt_lrtd", "cal_AX_plas"); - assert(pmat.comm_2D == pc.comm_2D); + assert(pmat.comm() == pc.comm()); assert(pmat.blacs_ctxt == pc.blacs_ctxt); - if (pX.comm_2D != pmat.comm_2D || pX.blacs_ctxt != pmat.blacs_ctxt) - LR_Util::setup_2d_division(pX, pmat.get_block_size(), nvirt, nocc, pmat.comm_2D, pmat.blacs_ctxt); + if (pX.comm() != pmat.comm() || pX.blacs_ctxt != pmat.blacs_ctxt) + LR_Util::setup_2d_division(pX, pmat.get_block_size(), nvirt, nocc, pmat.blacs_ctxt); else assert(pX.get_local_size() > 0 && AX_istate.get_nbasis() == pX.get_local_size()); int nks = c.get_nk(); assert(V_istate.size() == nks); Parallel_2D pVc; // for intermediate Vc - LR_Util::setup_2d_division(pVc, pmat.get_block_size(), naos, nocc, pmat.comm_2D, pmat.blacs_ctxt); + LR_Util::setup_2d_division(pVc, pmat.get_block_size(), naos, nocc, pmat.blacs_ctxt); for (int isk = 0;isk < nks;++isk) { AX_istate.fix_k(isk); @@ -122,4 +122,4 @@ namespace LR } } } -#endif \ No newline at end of file +#endif diff --git a/source/module_lr/AX/test/AX_test.cpp b/source/module_lr/AX/test/AX_test.cpp index 06ca272b97..65a459a1d8 100644 --- a/source/module_lr/AX/test/AX_test.cpp +++ b/source/module_lr/AX/test/AX_test.cpp @@ -117,10 +117,10 @@ TEST_F(AXTest, DoubleParallel) LR_Util::setup_2d_division(pV, s.nb, s.naos, s.naos); std::vector V(s.nks, container::Tensor(DAT::DT_DOUBLE, DEV::CpuDevice, { pV.get_col_size(), pV.get_row_size() })); Parallel_2D pc; - LR_Util::setup_2d_division(pc, s.nb, s.naos, s.nocc + s.nvirt, pV.comm_2D, pV.blacs_ctxt); + LR_Util::setup_2d_division(pc, s.nb, s.naos, s.nocc + s.nvirt, pV.blacs_ctxt); psi::Psi c(s.nks, pc.get_col_size(), pc.get_row_size()); Parallel_2D px; - LR_Util::setup_2d_division(px, s.nb, s.nvirt, s.nocc, pV.comm_2D, pV.blacs_ctxt); + LR_Util::setup_2d_division(px, s.nb, s.nvirt, s.nocc, pV.blacs_ctxt); EXPECT_EQ(pV.dim0, pc.dim0); EXPECT_EQ(pV.dim1, pc.dim1); @@ -178,10 +178,10 @@ TEST_F(AXTest, ComplexParallel) LR_Util::setup_2d_division(pV, s.nb, s.naos, s.naos); std::vector V(s.nks, container::Tensor(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, { pV.get_col_size(), pV.get_row_size() })); Parallel_2D pc; - LR_Util::setup_2d_division(pc, s.nb, s.naos, s.nocc + s.nvirt, pV.comm_2D, pV.blacs_ctxt); + LR_Util::setup_2d_division(pc, s.nb, s.naos, s.nocc + s.nvirt, pV.blacs_ctxt); psi::Psi> c(s.nks, pc.get_col_size(), pc.get_row_size()); Parallel_2D px; - LR_Util::setup_2d_division(px, s.nb, s.nvirt, s.nocc, pV.comm_2D, pV.blacs_ctxt); + LR_Util::setup_2d_division(px, s.nb, s.nvirt, s.nocc, pV.blacs_ctxt); psi::Psi> AX_pblas_loc(s.nks, nstate, px.get_local_size()); psi::Psi> AX_gather(s.nks, nstate, s.nocc * s.nvirt, nullptr, false); @@ -236,4 +236,4 @@ int main(int argc, char** argv) int result = RUN_ALL_TESTS(); MPI_Finalize(); return result; -} \ No newline at end of file +} diff --git a/source/module_lr/dm_trans/dm_trans_parallel.cpp b/source/module_lr/dm_trans/dm_trans_parallel.cpp index 432ae65dda..ae857692d6 100644 --- a/source/module_lr/dm_trans/dm_trans_parallel.cpp +++ b/source/module_lr/dm_trans/dm_trans_parallel.cpp @@ -22,11 +22,11 @@ std::vector cal_dm_trans_pblas(const psi::Psi& X_ista const int nspin) { ModuleBase::TITLE("hamilt_lrtd", "cal_dm_trans_pblas"); - assert(px.comm_2D == pc.comm_2D); + assert(px.comm() == pc.comm()); assert(px.blacs_ctxt == pc.blacs_ctxt); - if (pmat.comm_2D != px.comm_2D || pmat.blacs_ctxt != px.blacs_ctxt) - LR_Util::setup_2d_division(pmat, px.get_block_size(), naos, naos, px.comm_2D, px.blacs_ctxt); + if (pmat.comm() != px.comm() || pmat.blacs_ctxt != px.blacs_ctxt) + LR_Util::setup_2d_division(pmat, px.get_block_size(), naos, naos, px.blacs_ctxt); else assert(pmat.get_local_size() > 0); @@ -49,7 +49,7 @@ std::vector cal_dm_trans_pblas(const psi::Psi& X_ista // 1. [X*C_occ^T]^T=C_occ*X^T Parallel_2D pXc; // nvirt*naos - LR_Util::setup_2d_division(pXc, px.get_block_size(), naos, nvirt, px.comm_2D, px.blacs_ctxt); + LR_Util::setup_2d_division(pXc, px.get_block_size(), naos, nvirt, px.blacs_ctxt); container::Tensor Xc(DAT::DT_DOUBLE, DEV::CpuDevice, {pXc.get_col_size(), pXc.get_row_size()}); // row is "inside"(memory contiguity) for pblas @@ -110,11 +110,11 @@ std::vector cal_dm_trans_pblas(const psi::Psi 0); @@ -157,7 +157,7 @@ std::vector cal_dm_trans_pblas(const psi::Psi X(s.nks, nstate, px.get_local_size(), nullptr, false); Parallel_2D pc; - LR_Util::setup_2d_division(pc, s.nb, s.naos, s.nocc + s.nvirt, px.comm_2D, px.blacs_ctxt); + LR_Util::setup_2d_division(pc, s.nb, s.naos, s.nocc + s.nvirt, px.blacs_ctxt); psi::Psi c(s.nks, pc.get_col_size(), pc.get_row_size()); Parallel_2D pmat; @@ -170,7 +170,7 @@ TEST_F(DMTransTest, ComplexParallel) LR_Util::setup_2d_division(px, s.nb, s.nvirt, s.nocc); psi::Psi> X(s.nks, nstate, px.get_local_size(), nullptr, false); Parallel_2D pc; - LR_Util::setup_2d_division(pc, s.nb, s.naos, s.nocc + s.nvirt, px.comm_2D, px.blacs_ctxt); + LR_Util::setup_2d_division(pc, s.nb, s.naos, s.nocc + s.nvirt, px.blacs_ctxt); psi::Psi> c(s.nks, pc.get_col_size(), pc.get_row_size()); Parallel_2D pmat; @@ -229,4 +229,4 @@ int main(int argc, char** argv) int result = RUN_ALL_TESTS(); MPI_Finalize(); return result; -} \ No newline at end of file +} diff --git a/source/module_lr/esolver_lrtd_lcao.cpp b/source/module_lr/esolver_lrtd_lcao.cpp index 7f43274fef..cf0bb0ce78 100644 --- a/source/module_lr/esolver_lrtd_lcao.cpp +++ b/source/module_lr/esolver_lrtd_lcao.cpp @@ -135,8 +135,7 @@ LR::ESolver_LR::ESolver_LR(ModuleESolver::ESolver_KS_LCAO&& ks_sol this->eig_ks = std::move(ks_sol.pelec->ekb); this->set_dimension(); - LR_Util::setup_2d_division(this->paraC_, 1, this->nbasis, this->nocc + this->nvirt, - this->paraMat_.comm_2D, this->paraMat_.blacs_ctxt); + LR_Util::setup_2d_division(this->paraC_, 1, this->nbasis, this->nocc + this->nvirt, this->paraMat_.blacs_ctxt); //grid integration this->gt_ = std::move(ks_sol.GridT); @@ -220,8 +219,7 @@ LR::ESolver_LR::ESolver_LR(const Input_para& inp, Input& inp_tmp, UnitCel this->read_ks_wfc(); this->set_dimension(); - LR_Util::setup_2d_division(this->paraC_, 1, this->nbasis, this->nocc + this->nvirt, - paraMat_.comm_2D, paraMat_.blacs_ctxt); + LR_Util::setup_2d_division(this->paraC_, 1, this->nbasis, this->nocc + this->nvirt, paraMat_.blacs_ctxt); //allocate 2-particle state and setup 2d division this->nstates = inp.lr_nstates; @@ -383,7 +381,7 @@ void LR::ESolver_LR::setup_eigenvectors_X() { ModuleBase::TITLE("ESolver_LR", "setup_eigenvectors_X"); // setup ParaX - LR_Util::setup_2d_division(this->paraX_, 1, this->nvirt, this->nocc, this->paraC_.comm_2D, this->paraC_.blacs_ctxt);//nvirt - row, nocc - col + LR_Util::setup_2d_division(this->paraX_, 1, this->nvirt, this->nocc, this->paraC_.blacs_ctxt);//nvirt - row, nocc - col // if spectrum-only, read the LR-eigenstates from file and return if (this->input.lr_solver == "spectrum") { @@ -506,4 +504,4 @@ void LR::ESolver_LR::read_ks_chg(Charge& chg_gs) } } template class LR::ESolver_LR; -template class LR::ESolver_LR, double>; \ No newline at end of file +template class LR::ESolver_LR, double>; diff --git a/source/module_lr/utils/lr_util.cpp b/source/module_lr/utils/lr_util.cpp index 951b38b1c2..7c1927e4ba 100644 --- a/source/module_lr/utils/lr_util.cpp +++ b/source/module_lr/utils/lr_util.cpp @@ -183,11 +183,10 @@ namespace LR_Util #ifdef __MPI // for the other matrices in the commutator other than the first one - void setup_2d_division(Parallel_2D& pv, int nb, int gr, int gc, - const MPI_Comm& comm_2D_in, const int& blacs_ctxt_in) + void setup_2d_division(Parallel_2D& pv, int nb, int gr, int gc, const int& blacs_ctxt_in) { ModuleBase::TITLE("LR_Util", "setup_2d_division"); - pv.set(gr, gc, nb, comm_2D_in, blacs_ctxt_in); + pv.set(gr, gc, nb, blacs_ctxt_in); } #endif @@ -248,4 +247,4 @@ namespace LR_Util } } #endif -} \ No newline at end of file +} diff --git a/source/module_lr/utils/lr_util.h b/source/module_lr/utils/lr_util.h index 9bd90d4949..147fce62ce 100644 --- a/source/module_lr/utils/lr_util.h +++ b/source/module_lr/utils/lr_util.h @@ -108,8 +108,7 @@ namespace LR_Util #ifdef __MPI // pack the process to setup 2d divion reusing blacs_ctxt of an existing 2d-matrix - void setup_2d_division(Parallel_2D& pv, int nb, int gr, int gc, - const MPI_Comm& comm_2D_in, const int& blacs_ctxt_in); + void setup_2d_division(Parallel_2D& pv, int nb, int gr, int gc, const int& blacs_ctxt_in); /// @brief gather 2d matrix to full matrix /// the defination of row and col is consistent with setup_2d_division template @@ -121,4 +120,4 @@ namespace LR_Util void diag_lapack(const int& n, double* mat, double* eig); void diag_lapack(const int& n, std::complex* mat, double* eig); } -#include "lr_util.hpp" \ No newline at end of file +#include "lr_util.hpp" diff --git a/source/module_lr/utils/lr_util.hpp b/source/module_lr/utils/lr_util.hpp index a6a1295c9b..0e2b29e44c 100644 --- a/source/module_lr/utils/lr_util.hpp +++ b/source/module_lr/utils/lr_util.hpp @@ -169,7 +169,7 @@ namespace LR_Util fullmat[pv.local2global_col(j) * global_nrow + pv.local2global_row(i)] = submat[j * pv.get_row_size() + i]; //reduce to root - MPI_Allreduce(MPI_IN_PLACE, fullmat, global_nrow * global_ncol, get_mpi_datatype(), MPI_SUM, pv.comm_2D); + MPI_Allreduce(MPI_IN_PLACE, fullmat, global_nrow * global_ncol, get_mpi_datatype(), MPI_SUM, pv.comm()); }; #endif From 9a15801cec4c4b5c8dd2db568c5f6ac178c4627f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci-lite[bot]" <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com> Date: Fri, 12 Jul 2024 12:28:10 +0000 Subject: [PATCH 6/7] [pre-commit.ci lite] apply automatic fixes --- .../module_tddft/norm_psi.cpp | 24 +++++--- source/module_lr/AX/test/AX_test.cpp | 26 +++++--- .../module_lr/dm_trans/dm_trans_parallel.cpp | 10 ++-- source/module_lr/utils/lr_util.cpp | 60 ++++++++++++------- 4 files changed, 79 insertions(+), 41 deletions(-) diff --git a/source/module_hamilt_lcao/module_tddft/norm_psi.cpp b/source/module_hamilt_lcao/module_tddft/norm_psi.cpp index 9d708e13c7..cf3698b3ee 100644 --- a/source/module_hamilt_lcao/module_tddft/norm_psi.cpp +++ b/source/module_hamilt_lcao/module_tddft/norm_psi.cpp @@ -81,10 +81,12 @@ void norm_psi(const Parallel_Orbitals* pv, double aa, bb; aa = Cij[i * pv->ncol + j].real(); bb = Cij[i * pv->ncol + j].imag(); - if (std::abs(aa) < 1e-8) + if (std::abs(aa) < 1e-8) { aa = 0.0; - if (std::abs(bb) < 1e-8) +} + if (std::abs(bb) < 1e-8) { bb = 0.0; +} GlobalV::ofs_running << aa << "+" << bb << "i "; } GlobalV::ofs_running << std::endl; @@ -105,13 +107,15 @@ void norm_psi(const Parallel_Orbitals* pv, for (int j = 0; j < naroc[1]; ++j) { int igcol = globalIndex(j, pv->nb, pv->dim1, ipcol); - if (igcol >= nband) + if (igcol >= nband) { continue; +} for (int i = 0; i < naroc[0]; ++i) { int igrow = globalIndex(i, pv->nb, pv->dim0, iprow); - if (igrow >= nband) + if (igrow >= nband) { continue; +} if (igcol == igrow) { Cij[j * naroc[0] + i] = {1.0 / sqrt(Cij[j * naroc[0] + i].real()), 0.0}; @@ -169,10 +173,12 @@ void norm_psi(const Parallel_Orbitals* pv, double aa, bb; aa = psi_k[i * pv->ncol + j].real(); bb = psi_k[i * pv->ncol + j].imag(); - if (std::abs(aa) < 1e-8) + if (std::abs(aa) < 1e-8) { aa = 0.0; - if (std::abs(bb) < 1e-8) +} + if (std::abs(bb) < 1e-8) { bb = 0.0; +} GlobalV::ofs_running << aa << "+" << bb << "i "; } GlobalV::ofs_running << std::endl; @@ -186,10 +192,12 @@ void norm_psi(const Parallel_Orbitals* pv, double aa, bb; aa = tmp1[i * pv->ncol + j].real(); bb = tmp1[i * pv->ncol + j].imag(); - if (std::abs(aa) < 1e-8) + if (std::abs(aa) < 1e-8) { aa = 0.0; - if (std::abs(bb) < 1e-8) +} + if (std::abs(bb) < 1e-8) { bb = 0.0; +} GlobalV::ofs_running << aa << "+" << bb << "i "; } GlobalV::ofs_running << std::endl; diff --git a/source/module_lr/AX/test/AX_test.cpp b/source/module_lr/AX/test/AX_test.cpp index 65a459a1d8..92ed30f7e9 100644 --- a/source/module_lr/AX/test/AX_test.cpp +++ b/source/module_lr/AX/test/AX_test.cpp @@ -41,12 +41,18 @@ class AXTest : public testing::Test } #endif - void set_ones(double* data, int size) { for (int i = 0;i < size;++i) data[i] = 1.0; }; - void set_int(double* data, int size) { for (int i = 0;i < size;++i) data[i] = static_cast(i + 1); }; - void set_int(std::complex* data, int size) { for (int i = 0;i < size;++i) data[i] = std::complex(i + 1, -i - 1); }; - void set_rand(double* data, int size) { for (int i = 0;i < size;++i) data[i] = double(rand()) / double(RAND_MAX) * 10.0 - 5.0; }; - void set_rand(std::complex* data, int size) { for (int i = 0;i < size;++i) data[i] = std::complex(rand(), rand()) / double(RAND_MAX) * 10.0 - 5.0; }; - void check_eq(double* data1, double* data2, int size) { for (int i = 0;i < size;++i) EXPECT_NEAR(data1[i], data2[i], 1e-10); }; + void set_ones(double* data, int size) { for (int i = 0;i < size;++i) { data[i] = 1.0; +}}; + void set_int(double* data, int size) { for (int i = 0;i < size;++i) { data[i] = static_cast(i + 1); +}}; + void set_int(std::complex* data, int size) { for (int i = 0;i < size;++i) { data[i] = std::complex(i + 1, -i - 1); +}}; + void set_rand(double* data, int size) { for (int i = 0;i < size;++i) { data[i] = double(rand()) / double(RAND_MAX) * 10.0 - 5.0; +}}; + void set_rand(std::complex* data, int size) { for (int i = 0;i < size;++i) { data[i] = std::complex(rand(), rand()) / double(RAND_MAX) * 10.0 - 5.0; +}}; + void check_eq(double* data1, double* data2, int size) { for (int i = 0;i < size;++i) { EXPECT_NEAR(data1[i], data2[i], 1e-10); +}}; void check_eq(std::complex* data1, std::complex* data2, int size) { for (int i = 0;i < size;++i) @@ -70,7 +76,8 @@ TEST_F(AXTest, DoubleSerial) psi::Psi c(s.nks, s.nocc + s.nvirt, s.naos); std::vector V(s.nks, container::Tensor(DAT::DT_DOUBLE, DEV::CpuDevice, { s.naos, s.naos })); set_rand(c.get_pointer(), size_c); - for (auto& v : V)set_rand(v.data(), size_v); + for (auto& v : V) {set_rand(v.data(), size_v); +} AX_for.fix_b(istate); AX_blas.fix_b(istate); LR::cal_AX_forloop_serial(V, c, s.nocc, s.nvirt, AX_for); @@ -95,7 +102,8 @@ TEST_F(AXTest, ComplexSerial) psi::Psi> c(s.nks, s.nocc + s.nvirt, s.naos); std::vector V(s.nks, container::Tensor(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, { s.naos, s.naos })); set_rand(c.get_pointer(), size_c); - for (auto& v : V)set_rand(v.data>(), size_v); + for (auto& v : V) {set_rand(v.data>(), size_v); +} AX_for.fix_b(istate); AX_blas.fix_b(istate); LR::cal_AX_forloop_serial(V, c, s.nocc, s.nvirt, AX_for); @@ -230,7 +238,7 @@ TEST_F(AXTest, ComplexParallel) int main(int argc, char** argv) { - srand(time(NULL)); // for random number generator + srand(time(nullptr)); // for random number generator MPI_Init(&argc, &argv); testing::InitGoogleTest(&argc, argv); int result = RUN_ALL_TESTS(); diff --git a/source/module_lr/dm_trans/dm_trans_parallel.cpp b/source/module_lr/dm_trans/dm_trans_parallel.cpp index ae857692d6..b5de36a5a3 100644 --- a/source/module_lr/dm_trans/dm_trans_parallel.cpp +++ b/source/module_lr/dm_trans/dm_trans_parallel.cpp @@ -25,10 +25,11 @@ std::vector cal_dm_trans_pblas(const psi::Psi& X_ista assert(px.comm() == pc.comm()); assert(px.blacs_ctxt == pc.blacs_ctxt); - if (pmat.comm() != px.comm() || pmat.blacs_ctxt != px.blacs_ctxt) + if (pmat.comm() != px.comm() || pmat.blacs_ctxt != px.blacs_ctxt) { LR_Util::setup_2d_division(pmat, px.get_block_size(), naos, naos, px.blacs_ctxt); - else + } else { assert(pmat.get_local_size() > 0); +} int nks = c.get_nk(); assert(nks == X_istate.get_nk()); @@ -113,10 +114,11 @@ std::vector cal_dm_trans_pblas(const psi::Psi 0); +} int nks = c.get_nk(); assert(nks == X_istate.get_nk()); diff --git a/source/module_lr/utils/lr_util.cpp b/source/module_lr/utils/lr_util.cpp index 7c1927e4ba..83491eb343 100644 --- a/source/module_lr/utils/lr_util.cpp +++ b/source/module_lr/utils/lr_util.cpp @@ -62,7 +62,8 @@ namespace LR_Util template<> void matsym(const double* in, const int n, const Parallel_2D& pmat, double* out) { - for (int i = 0;i < pmat.get_local_size();++i)out[i] = in[i]; + for (int i = 0;i < pmat.get_local_size();++i) {out[i] = in[i]; +} const double alpha = 0.5, beta = 0.5; const int i1 = 1; pdtran_(&n, &n, &alpha, in, &i1, &i1, pmat.desc, &beta, out, &i1, &i1, pmat.desc); @@ -71,7 +72,8 @@ namespace LR_Util void matsym(double* inout, const int n, const Parallel_2D& pmat) { std::vector tmp(n * n); - for (int i = 0;i < pmat.get_local_size();++i)tmp[i] = inout[i]; + for (int i = 0;i < pmat.get_local_size();++i) {tmp[i] = inout[i]; +} const double alpha = 0.5, beta = 0.5; const int i1 = 1; pdtran_(&n, &n, &alpha, tmp.data(), &i1, &i1, pmat.desc, &beta, inout, &i1, &i1, pmat.desc); @@ -79,7 +81,8 @@ namespace LR_Util template<> void matsym>(const std::complex* in, const int n, const Parallel_2D& pmat, std::complex* out) { - for (int i = 0;i < pmat.get_local_size();++i)out[i] = in[i]; + for (int i = 0;i < pmat.get_local_size();++i) {out[i] = in[i]; +} const std::complex alpha(0.5, 0.0), beta(0.5, 0.0); const int i1 = 1; pztranc_(&n, &n, &alpha, in, &i1, &i1, pmat.desc, &beta, out, &i1, &i1, pmat.desc); @@ -88,7 +91,8 @@ namespace LR_Util void matsym>(std::complex* inout, const int n, const Parallel_2D& pmat) { std::vector> tmp(n * n); - for (int i = 0;i < pmat.get_local_size();++i)tmp[i] = inout[i]; + for (int i = 0;i < pmat.get_local_size();++i) {tmp[i] = inout[i]; +} const std::complex alpha(0.5, 0.0), beta(0.5, 0.0); const int i1 = 1; pztranc_(&n, &n, &alpha, tmp.data(), &i1, &i1, pmat.desc, &beta, inout, &i1, &i1, pmat.desc); @@ -97,49 +101,57 @@ namespace LR_Util container::Tensor mat2ten_double(ModuleBase::matrix& m) { container::Tensor t(DAT::DT_DOUBLE, DEV::CpuDevice, { m.nr, m.nc }); - for (int i = 0;i < t.NumElements();++i)t.data()[i] = m.c[i]; + for (int i = 0;i < t.NumElements();++i) {t.data()[i] = m.c[i]; +} return t; } std::vector mat2ten_double(std::vector& m) { std::vector t; - for (int i = 0;i < m.size();++i) t.push_back(mat2ten_double(m[i])); + for (int i = 0;i < m.size();++i) { t.push_back(mat2ten_double(m[i])); +} return t; } ModuleBase::matrix ten2mat_double(container::Tensor& t) { ModuleBase::matrix m(t.shape().dims()[0], t.shape().dims()[1]); - for (int i = 0;i < t.NumElements();++i)m.c[i] = t.data()[i]; + for (int i = 0;i < t.NumElements();++i) {m.c[i] = t.data()[i]; +} return m; } std::vector ten2mat_double(std::vector& t) { std::vector m; - for (int i = 0;i < t.size();++i) m.push_back(ten2mat_double(t[i])); + for (int i = 0;i < t.size();++i) { m.push_back(ten2mat_double(t[i])); +} return m; } container::Tensor mat2ten_complex(ModuleBase::ComplexMatrix& m) { container::Tensor t(DAT::DT_COMPLEX_DOUBLE, DEV::CpuDevice, { m.nr, m.nc }); - for (int i = 0;i < t.NumElements();++i)t.data>()[i] = m.c[i]; + for (int i = 0;i < t.NumElements();++i) {t.data>()[i] = m.c[i]; +} return t; } std::vector mat2ten_complex(std::vector& m) { std::vector t; - for (int i = 0;i < m.size();++i) t.push_back(mat2ten_complex(m[i])); + for (int i = 0;i < m.size();++i) { t.push_back(mat2ten_complex(m[i])); +} return t; } ModuleBase::ComplexMatrix ten2mat_complex(container::Tensor& t) { ModuleBase::ComplexMatrix m(t.shape().dims()[0], t.shape().dims()[1]); - for (int i = 0;i < t.NumElements();++i)m.c[i] = t.data>()[i]; + for (int i = 0;i < t.NumElements();++i) {m.c[i] = t.data>()[i]; +} return m; } std::vector ten2mat_complex(std::vector& t) { std::vector m; - for (int i = 0;i < t.size();++i) m.push_back(ten2mat_complex(t[i])); + for (int i = 0;i < t.size();++i) { m.push_back(ten2mat_complex(t[i])); +} return m; } @@ -147,26 +159,30 @@ namespace LR_Util { assert(v.size() == nr * nc); ModuleBase::matrix m(nr, nc, false); - for (int i = 0;i < v.size();++i) m.c[i] = v[i]; + for (int i = 0;i < v.size();++i) { m.c[i] = v[i]; +} return m; } ModuleBase::ComplexMatrix vec2mat(const std::vector>& v, const int nr, const int nc) { assert(v.size() == nr * nc); ModuleBase::ComplexMatrix m(nr, nc, false); - for (int i = 0;i < v.size();++i) m.c[i] = v[i]; + for (int i = 0;i < v.size();++i) { m.c[i] = v[i]; +} return m; } std::vector vec2mat(const std::vector>& v, const int nr, const int nc) { std::vector m(v.size()); - for (int i = 0;i < v.size();++i) m[i] = vec2mat(v[i], nr, nc); + for (int i = 0;i < v.size();++i) { m[i] = vec2mat(v[i], nr, nc); +} return m; } std::vector vec2mat(const std::vector>>& v, const int nr, const int nc) { std::vector m(v.size()); - for (int i = 0;i < v.size();++i) m[i] = vec2mat(v[i], nr, nc); + for (int i = 0;i < v.size();++i) { m[i] = vec2mat(v[i], nr, nc); +} return m; } @@ -201,7 +217,8 @@ namespace LR_Util const int lwork = work_tmp; double* work2 = new double[lwork]; dsyev_(&jobz, &uplo, &n, mat, &n, eig, work2, &lwork, &info); - if (info) std::cout << "ERROR: Lapack solver, info=" << info << std::endl; + if (info) { std::cout << "ERROR: Lapack solver, info=" << info << std::endl; +} delete[] work2; } @@ -214,7 +231,8 @@ namespace LR_Util int info = 0; char jobz = 'V', uplo = 'U'; zheev_(&jobz, &uplo, &n, mat, &n, eig, work2, &lwork, rwork, &info); - if (info) std::cout << "ERROR: Lapack solver, info=" << info << std::endl; + if (info) { std::cout << "ERROR: Lapack solver, info=" << info << std::endl; +} delete[] rwork; delete[] work2; } @@ -239,11 +257,13 @@ namespace LR_Util rho_basis.real2recip(rhor, rhog.data()); for (int i = 0;i < 3;++i) { - for (int ig = 0; ig < rho_basis.npw; ig++) + for (int ig = 0; ig < rho_basis.npw; ig++) { rhog[ig] *= pow(rho_basis.gcar[ig][i], 2); +} rho_basis.recip2real(rhog.data(), tmp_rhor.data()); - for (int ir = 0; ir < rho_basis.nrxx; ir++) + for (int ir = 0; ir < rho_basis.nrxx; ir++) { lapn[ir] -= tmp_rhor[ir] * tpiba2; +} } } #endif From 251c4e236a47fe60fdcb4d5af50681e84ae11ded Mon Sep 17 00:00:00 2001 From: jinzx10 Date: Fri, 12 Jul 2024 20:55:11 +0800 Subject: [PATCH 7/7] fix comm() of uninitialized state --- source/module_basis/module_ao/parallel_2d.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/source/module_basis/module_ao/parallel_2d.cpp b/source/module_basis/module_ao/parallel_2d.cpp index 3bd669ee55..30aceda761 100644 --- a/source/module_basis/module_ao/parallel_2d.cpp +++ b/source/module_basis/module_ao/parallel_2d.cpp @@ -31,7 +31,13 @@ int Parallel_2D::get_global_col_size() const #ifdef __MPI MPI_Comm Parallel_2D::comm() const { - int sys_ctxt = -1; + // it is an error to call blacs_get with an invalid BLACS context + if (blacs_ctxt < 0) + { + return MPI_COMM_NULL; + } + + int sys_ctxt = 0; Cblacs_get(blacs_ctxt, 10, &sys_ctxt); // blacs_get with "what" = 10 takes a BLACS context and returns the index // of the associated system context (MPI communicator) that can be used by