From 6e82ca4a485a56540a7b3ec062f2b7d0d02f49c5 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 25 Jul 2024 04:32:55 -0400 Subject: [PATCH] Reapply "debug" This reverts commit e26482833aa16c4c4d2d6bf07fc701dcdf4e0204. --- .github/workflows/test_cc.yml | 2 +- source/op/pt/comm.cc | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test_cc.yml b/.github/workflows/test_cc.yml index ebbfc4d960..05344f6994 100644 --- a/.github/workflows/test_cc.yml +++ b/.github/workflows/test_cc.yml @@ -53,7 +53,7 @@ jobs: env: DP_BUILD_TESTING: 1 if: ${{ !matrix.check_memleak }} - - run: pytest --cov=deepmd source/lmp/tests + - run: pytest -s --cov=deepmd source/lmp/tests env: OMP_NUM_THREADS: 1 TF_INTRA_OP_PARALLELISM_THREADS: 1 diff --git a/source/op/pt/comm.cc b/source/op/pt/comm.cc index a25dfbd542..e886ab245a 100644 --- a/source/op/pt/comm.cc +++ b/source/op/pt/comm.cc @@ -82,6 +82,7 @@ class Border : public torch::autograd::Function { int nghost = nghost_tensor.item(); int ntotal = nlocal + nghost; torch::Tensor recv_g1_tensor = g1; + printf("nlocal: %d, nghost: %d, lineno: %d\n", nlocal, nghost, __LINE__); #ifdef USE_MPI int mpi_init = 0; @@ -120,6 +121,9 @@ class Border : public torch::autograd::Function { for (int iswap = 0; iswap < nswap; ++iswap) { int nrecv = recvnum[iswap]; int nsend = sendnum[iswap]; + printf("me: %d, iswap: %d, nrecv: %d, nsend: %d, lineno: %d\n", me, iswap, + nrecv, nsend, __LINE__); + torch::Tensor isendlist = torch::from_blob(sendlist[iswap], {nsend}, int32_options) .to(recv_g1_tensor.device()); @@ -214,6 +218,7 @@ class Border : public torch::autograd::Function { MPI_Comm_size(world, &world_size); MPI_Datatype mpi_type = get_mpi_type(); MPI_Request request; + printf("world_size: %d, rank: %d, lineno: %d\n", world_size, me, __LINE__); #if defined(GOOGLE_CUDA) || defined(TENSORFLOW_USE_ROCM) if (world_size != 1) { int version, subversion; @@ -265,6 +270,8 @@ class Border : public torch::autograd::Function { for (int iswap = nswap - 1; iswap >= 0; --iswap) { int nrecv = recvnum[iswap]; int nsend = sendnum[iswap]; + printf("me: %d, iswap: %d, nrecv: %d, nsend: %d, lineno: %d\n", me, iswap, + nrecv, nsend, __LINE__); torch::Tensor irecvlist; if (nrecv) {