From a64aa3dc8f3c51b97de158f32bea6ef5851bde12 Mon Sep 17 00:00:00 2001 From: Jonathan Schilling Date: Mon, 3 Jun 2024 16:40:49 +0200 Subject: [PATCH 1/6] Use a more direct (i.e. no loops) implementation of mpi_calc_myrange. This one also checks that no errornous mystart, myend are created if more ranks are tried to be assigned than work items (e.g. loop iterations, field lines to be traced, ...) are available --- LIBSTELL/Sources/Modules/mpi_params.f | 75 +++++++++++++++++++-------- 1 file changed, 53 insertions(+), 22 deletions(-) diff --git a/LIBSTELL/Sources/Modules/mpi_params.f b/LIBSTELL/Sources/Modules/mpi_params.f index cd2ed82c7..5b1347ff8 100644 --- a/LIBSTELL/Sources/Modules/mpi_params.f +++ b/LIBSTELL/Sources/Modules/mpi_params.f @@ -23,7 +23,7 @@ MODULE mpi_params INTEGER :: MPI_COMM_WORKERS_OK=-1, worker_id_ok=-1 !communicator subgroup, vmec ran ok INTEGER :: MPI_COMM_SHARMEM = 718, myid_sharmem=-1 !communicator for shared memory INTEGER :: MPI_COMM_STEL = 327 !communicator which is a copy of MPI_COMM_WORLD (user must set this up) - INTEGER :: MPI_COMM_MYWORLD = 411 !communicator + INTEGER :: MPI_COMM_MYWORLD = 411 !communicator INTEGER :: MPI_COMM_FIELDLINES = 328 !communicator for FIELDLINES code INTEGER :: MPI_COMM_TORLINES = 329 !communicator for TORLINES code INTEGER :: MPI_COMM_BEAMS = 330 !communicator for BEAMS3D code @@ -34,12 +34,12 @@ MODULE mpi_params INTEGER :: MPI_COMM_PARVMEC = 101 !communicator for PARVMEC code CONTAINS - + SUBROUTINE mpi_stel_abort(error) #if defined(MPI_OPT) USE MPI #endif - IMPLICIT NONE + IMPLICIT NONE INTEGER, INTENT(in) :: error INTEGER :: length, temp CHARACTER(LEN=MPI_MAX_ERROR_STRING) :: message @@ -48,7 +48,7 @@ SUBROUTINE mpi_stel_abort(error) WRITE(6,*) '!!!!!!!!!!!!MPI_ERROR DETECTED!!!!!!!!!!!!!!' WRITE(6,*) ' MESSAGE: ',message(1:length) WRITE(6,*) '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' - CALL FLUSH(6) + CALL FLUSH(6) #else WRITE(6,*) '!!!!!!!!!!!!MPI_ERROR DETECTED!!!!!!!!!!!!!!' WRITE(6,*) ' MPI_STEL_ABORT CALLED BUT NO MPI' @@ -57,6 +57,9 @@ SUBROUTINE mpi_stel_abort(error) !CALL MPI_ABORT(MPI_COMM_STEL,1,temp) END SUBROUTINE mpi_stel_abort + ! Distribute the workload of operating on over n1:n2 (inclusive) + ! over the compute ranks available in the given communicator + ! and return the local ranges to be worked on in mystart and myend. SUBROUTINE MPI_CALC_MYRANGE(comm,n1,n2,mystart,myend) #if defined(MPI_OPT) USE mpi @@ -65,26 +68,54 @@ SUBROUTINE MPI_CALC_MYRANGE(comm,n1,n2,mystart,myend) INTEGER, INTENT(inout) :: comm INTEGER, INTENT(in) :: n1, n2 INTEGER, INTENT(out) :: mystart, myend - INTEGER :: delta, local_size, local_rank, istat, maxend, k, i - mystart = n1; myend = n2 + INTEGER :: total_work, work_per_rank, work_remainder + INTEGER :: local_size, local_rank, istat + + ! Default if not using MPI: just work on full range + mystart = n1 + myend = n2 + #if defined(MPI_OPT) - CALL MPI_COMM_SIZE( comm, local_size, istat) - CALL MPI_COMM_RANK( comm, local_rank, istat ) - delta = CEILING(DBLE(n2-n1+1)/DBLE(local_size)) - mystart = n1 + local_rank*delta - myend = mystart + delta - 1 - maxend = local_size*delta - IF (maxend>n2) THEN - k = maxend-n2 - DO i = (local_size-k), local_size-1 - IF (local_rank > i) THEN - mystart = mystart - 1 - myend = myend - 1 - ELSEIF (local_rank==i) THEN - myend = myend - 1 - END IF - END DO + + ! Total number of items to work on. + ! NOTE: n2 is the upper range bound, inclusive! + total_work = n2 - n1 + 1 + + ! `local_size` is the number of available ranks + CALL MPI_COMM_SIZE(comm, local_size, istat) + + IF (local_size .gt. total_work) THEN + STOP "cannot assign more ranks than work items" END IF + + ! `local_rank` is the ID of the rank to compute `mystart` and `myend` for + CALL MPI_COMM_RANK(comm, local_rank, istat) + + ! size of chunks that are present in all ranks + work_per_rank = total_work / local_size + + ! number of work items that remain after distributing + ! equal chunks of work to all ranks + work_remainder = MODULO(total_work, local_size) + + ! ranges corresponding to working on evenly distributed chunks + mystart = n1 + local_rank * work_per_rank + myend = n1 + (local_rank + 1) * work_per_rank - 1 + + IF (local_rank .lt. work_remainder) THEN + ! The first `work_remainder` ranks get one additional item to work on. + ! This takes care of the additional `work_remainder` items + ! that need to be worked on, on top of the evenly distributed chunks. + mystart = mystart + local_rank + myend = myend + local_rank + 1 + ELSE + ! All following ranks after the first `work_remainder` ones + ! get their ranges just shifted by a constant offset, + ! since they don't do any additional work. + mystart = mystart + work_remainder + myend = myend + work_remainder + END IF + #endif RETURN END SUBROUTINE MPI_CALC_MYRANGE From 6fa5c3b11814b866e1f03866e8cd33286b023308 Mon Sep 17 00:00:00 2001 From: Jonathan Schilling Date: Tue, 4 Jun 2024 08:57:30 +0200 Subject: [PATCH 2/6] remove stop statement, add additional comments --- LIBSTELL/Sources/Modules/mpi_params.f | 47 +++++++++++++++++---------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/LIBSTELL/Sources/Modules/mpi_params.f b/LIBSTELL/Sources/Modules/mpi_params.f index 5b1347ff8..b8bc835d4 100644 --- a/LIBSTELL/Sources/Modules/mpi_params.f +++ b/LIBSTELL/Sources/Modules/mpi_params.f @@ -57,40 +57,48 @@ SUBROUTINE mpi_stel_abort(error) !CALL MPI_ABORT(MPI_COMM_STEL,1,temp) END SUBROUTINE mpi_stel_abort - ! Distribute the workload of operating on over n1:n2 (inclusive) - ! over the compute ranks available in the given communicator - ! and return the local ranges to be worked on in mystart and myend. + !> Distribute the workload of operating on over n1:n2 (inclusive) + !> over the compute ranks available in the given communicator + !> and return the local ranges to be worked on in mystart and myend. + ! + !> This routine must __always__ run, + !> hence no `STOP` statements or similar are allowed here. + !> If more ranks than work items are available in the communicator, + !> this routine returns `myend` > `mystart`, + !> which implies that loops like `DO i = mystart, myend` are simply skipped + !> in ranks that do not get a share of the workload. SUBROUTINE MPI_CALC_MYRANGE(comm,n1,n2,mystart,myend) #if defined(MPI_OPT) USE mpi #endif IMPLICIT NONE - INTEGER, INTENT(inout) :: comm - INTEGER, INTENT(in) :: n1, n2 - INTEGER, INTENT(out) :: mystart, myend - INTEGER :: total_work, work_per_rank, work_remainder + INTEGER, INTENT(inout) :: comm !< communicator to distribute work over + INTEGER, INTENT(in) :: n1 !< lower bound of range to work on + INTEGER, INTENT(in) :: n2 !< upper bound of range to work on (inclusive) + INTEGER, INTENT(out) :: mystart !< lower bound of chunk this rank should work on + INTEGER, INTENT(out) :: myend !< upper bound of chunk this rank should work on (inclusive) + INTEGER :: local_size, local_rank, istat + INTEGER :: total_work, work_per_rank, work_remainder ! Default if not using MPI: just work on full range mystart = n1 - myend = n2 + myend = n2 #if defined(MPI_OPT) - ! Total number of items to work on. - ! NOTE: n2 is the upper range bound, inclusive! - total_work = n2 - n1 + 1 - - ! `local_size` is the number of available ranks + ! `local_size` is the number of available ranks. + ! We assume it is always > 0, i.e., 1, 2, 3, ... CALL MPI_COMM_SIZE(comm, local_size, istat) - IF (local_size .gt. total_work) THEN - STOP "cannot assign more ranks than work items" - END IF - - ! `local_rank` is the ID of the rank to compute `mystart` and `myend` for + ! `local_rank` is the ID of the rank to compute `mystart` and `myend` for. + ! We assume it is always >= 0, i.e., 0, 1, 2, ... CALL MPI_COMM_RANK(comm, local_rank, istat) + ! Total number of items to work on. + ! NOTE: n2 is the upper range bound, inclusive! + total_work = n2 - n1 + 1 + ! size of chunks that are present in all ranks work_per_rank = total_work / local_size @@ -99,6 +107,9 @@ SUBROUTINE MPI_CALC_MYRANGE(comm,n1,n2,mystart,myend) work_remainder = MODULO(total_work, local_size) ! ranges corresponding to working on evenly distributed chunks + ! `myend` is inclusive, i.e., the indices to work on are + ! { mystart, mystart+1, ..., myend-1, myend }. + ! Thus, one can use code like `DO i = mystart, myend`. mystart = n1 + local_rank * work_per_rank myend = n1 + (local_rank + 1) * work_per_rank - 1 From 19d10941b7485cf6709f66e8da520ba5160bd305 Mon Sep 17 00:00:00 2001 From: Jonathan Schilling Date: Tue, 4 Jun 2024 10:30:55 +0200 Subject: [PATCH 3/6] add comment on upper limit of --- LIBSTELL/Sources/Modules/mpi_params.f | 1 + 1 file changed, 1 insertion(+) diff --git a/LIBSTELL/Sources/Modules/mpi_params.f b/LIBSTELL/Sources/Modules/mpi_params.f index b8bc835d4..55a2fdc57 100644 --- a/LIBSTELL/Sources/Modules/mpi_params.f +++ b/LIBSTELL/Sources/Modules/mpi_params.f @@ -93,6 +93,7 @@ SUBROUTINE MPI_CALC_MYRANGE(comm,n1,n2,mystart,myend) ! `local_rank` is the ID of the rank to compute `mystart` and `myend` for. ! We assume it is always >= 0, i.e., 0, 1, 2, ... + ! and only up to `local_size - 1`. CALL MPI_COMM_RANK(comm, local_rank, istat) ! Total number of items to work on. From 700b1fe71a592f2439f8430ec358700669e99edb Mon Sep 17 00:00:00 2001 From: Jonathan Schilling Date: Tue, 4 Jun 2024 10:31:20 +0200 Subject: [PATCH 4/6] clarify comment on upper limit of --- LIBSTELL/Sources/Modules/mpi_params.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LIBSTELL/Sources/Modules/mpi_params.f b/LIBSTELL/Sources/Modules/mpi_params.f index 55a2fdc57..381b2b07d 100644 --- a/LIBSTELL/Sources/Modules/mpi_params.f +++ b/LIBSTELL/Sources/Modules/mpi_params.f @@ -93,7 +93,7 @@ SUBROUTINE MPI_CALC_MYRANGE(comm,n1,n2,mystart,myend) ! `local_rank` is the ID of the rank to compute `mystart` and `myend` for. ! We assume it is always >= 0, i.e., 0, 1, 2, ... - ! and only up to `local_size - 1`. + ! and only up to `local_size - 1` (inclusive). CALL MPI_COMM_RANK(comm, local_rank, istat) ! Total number of items to work on. From 4946db132bde7591ef9668ab874454021500a7d5 Mon Sep 17 00:00:00 2001 From: Jonathan Schilling Date: Tue, 4 Jun 2024 10:32:34 +0200 Subject: [PATCH 5/6] comment on integer division --- LIBSTELL/Sources/Modules/mpi_params.f | 3 +++ 1 file changed, 3 insertions(+) diff --git a/LIBSTELL/Sources/Modules/mpi_params.f b/LIBSTELL/Sources/Modules/mpi_params.f index 381b2b07d..df886ed1e 100644 --- a/LIBSTELL/Sources/Modules/mpi_params.f +++ b/LIBSTELL/Sources/Modules/mpi_params.f @@ -101,6 +101,9 @@ SUBROUTINE MPI_CALC_MYRANGE(comm,n1,n2,mystart,myend) total_work = n2 - n1 + 1 ! size of chunks that are present in all ranks + ! (Note that we _intend_ use integer division here, + ! since the remainder is handled explicitly + ! via the `work_remainder` variable below.) work_per_rank = total_work / local_size ! number of work items that remain after distributing From 954a0fef12011138223fcc1b4239d0ee930223ee Mon Sep 17 00:00:00 2001 From: Jonathan Schilling Date: Tue, 4 Jun 2024 10:33:35 +0200 Subject: [PATCH 6/6] improve comment on integer division --- LIBSTELL/Sources/Modules/mpi_params.f | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LIBSTELL/Sources/Modules/mpi_params.f b/LIBSTELL/Sources/Modules/mpi_params.f index df886ed1e..1ae4b0e44 100644 --- a/LIBSTELL/Sources/Modules/mpi_params.f +++ b/LIBSTELL/Sources/Modules/mpi_params.f @@ -101,7 +101,7 @@ SUBROUTINE MPI_CALC_MYRANGE(comm,n1,n2,mystart,myend) total_work = n2 - n1 + 1 ! size of chunks that are present in all ranks - ! (Note that we _intend_ use integer division here, + ! (Note that we use integer division here intentionally, ! since the remainder is handled explicitly ! via the `work_remainder` variable below.) work_per_rank = total_work / local_size