diff --git a/src/externals/mct/mct/Makefile b/src/externals/mct/mct/Makefile index edc5d11a8dfe..97aa186e77b9 100644 --- a/src/externals/mct/mct/Makefile +++ b/src/externals/mct/mct/Makefile @@ -31,6 +31,7 @@ SRCS_F90 = m_MCTWorld.F90 \ m_SparseMatrixPlus.F90 \ m_Router.F90 \ m_Rearranger.F90 \ + m_SPMDutils.F90 \ m_Transfer.F90 OBJS_ALL = $(SRCS_F90:.F90=.o) @@ -82,6 +83,7 @@ m_GlobalMap.o: m_GlobalSegMap.o: m_GlobalSegMapComms.o: m_GlobalSegMap.o m_Navigator.o: +m_SPMDutils.o: m_AttrVectComms.o: m_AttrVect.o m_GlobalMap.o m_AttrVectReduce.o: m_AttrVect.o m_AccumulatorComms.o: m_AttrVect.o m_GlobalMap.o m_AttrVectComms.o @@ -91,7 +93,7 @@ m_GeneralGridComms.o: m_AttrVect.o m_GeneralGrid.o m_AttrVectComms.o m_GlobalMap m_MatAttrVectMul.o: m_AttrVect.o m_SparseMatrix.o m_GlobalMap.o m_GlobalSegMap.o m_SparseMatrixPlus.o m_Rearranger.o m_Merge.o: m_AttrVect.o m_GeneralGrid.o m_Router.o: m_GlobalToLocal.o m_MCTWorld.o m_GlobalSegMap.o m_ExchangeMaps.o -m_Rearranger.o: m_Router.o m_MCTWorld.o m_GlobalSegMap.o m_AttrVect.o +m_Rearranger.o: m_Router.o m_MCTWorld.o m_GlobalSegMap.o m_AttrVect.o m_SPMDutils.o m_GlobalToLocal.o: m_GlobalSegMap.o m_ExchangeMaps.o: m_GlobalMap.o m_GlobalSegMap.o m_MCTWorld.o m_ConvertMaps.o m_ConvertMaps.o: m_GlobalMap.o m_GlobalSegMap.o m_MCTWorld.o diff --git a/src/externals/mct/mct/m_AttrVect.F90 b/src/externals/mct/mct/m_AttrVect.F90 index 93f0f14d1cda..d186fb84ce58 100644 --- a/src/externals/mct/mct/m_AttrVect.F90 +++ b/src/externals/mct/mct/m_AttrVect.F90 @@ -844,15 +844,19 @@ subroutine zero_(aV, zeroReals, zeroInts) if(myZeroInts) then ! zero out INTEGER attributes if(List_allocated(aV%iList)) then -!CDIR COLLAPSE - if(associated(aV%iAttr) .and. (nIAttr_(aV)>0)) aV%iAttr=0 + if(associated(aV%iAttr) .and. (nIAttr_(aV)>0)) then +!DIR$ COLLAPSE + aV%iAttr=0 + endif endif endif if(myZeroReals) then ! zero out REAL attributes if(List_allocated(aV%rList)) then -!CDIR COLLAPSE - if(associated(aV%rAttr) .and. (nRAttr_(aV)>0)) aV%rAttr=0._FP + if(associated(aV%rAttr) .and. (nRAttr_(aV)>0)) then +!DIR$ COLLAPSE + aV%rAttr=0._FP + endif endif endif @@ -2502,9 +2506,6 @@ subroutine RCopy_(aVin, aVout, vector, sharedIndices) use m_die , only : die use m_stdio , only : stderr - use m_List, only : GetSharedListIndices - use m_List, only : GetIndices => get_indices - implicit none ! !INPUT PARAMETERS: @@ -2527,8 +2528,7 @@ subroutine RCopy_(aVin, aVout, vector, sharedIndices) integer :: i,j,ier ! dummy variables integer :: aVsize ! The lsize of aVin and aVout - integer :: num_inindices, num_outindices ! Number of matching indices in aV - integer :: inxmin, outxmin, inx, outx ! Index variables + integer :: inxmin, outxmin ! Index variables logical :: usevector ! true if vector flag is present and true. character*7 :: data_flag ! character variable used as data type flag type(AVSharedIndicesOneType) :: mySharedIndices ! copied from sharedIndices, or @@ -2586,23 +2586,20 @@ subroutine RCopy_(aVin, aVout, vector, sharedIndices) if(mySharedIndices%contiguous) then + outxmin=mySharedIndices%aVindices2(1)-1 + inxmin=mySharedIndices%aVindices1(1)-1 if(usevector) then - outxmin=mySharedIndices%aVindices2(1)-1 - inxmin=mySharedIndices%aVindices1(1)-1 !$OMP PARALLEL DO PRIVATE(i,j) do i=1,mySharedIndices%num_indices !CDIR SELECT(VECTOR) -!DIR$ CONCURRENT +!DIR$ IVDEP do j=1,aVsize aVout%rAttr(outxmin+i,j) = aVin%rAttr(inxmin+i,j) enddo enddo else - outxmin=mySharedIndices%aVindices2(1)-1 - inxmin=mySharedIndices%aVindices1(1)-1 -!$OMP PARALLEL DO PRIVAtE(j,i) +!$OMP PARALLEL DO PRIVATE(j,i) COLLAPSE(2) do j=1,aVsize -!DIR$ CONCURRENT do i=1,mySharedIndices%num_indices aVout%rAttr(outxmin+i,j) = aVin%rAttr(inxmin+i,j) enddo @@ -2611,13 +2608,10 @@ subroutine RCopy_(aVin, aVout, vector, sharedIndices) else -!$OMP PARALLEL DO PRIVATE(j,i,outx,inx) +!$OMP PARALLEL DO PRIVATE(j,i) COLLAPSE(2) do j=1,aVsize -!DIR$ CONCURRENT do i=1,mySharedIndices%num_indices - outx=mySharedIndices%aVindices2(i) - inx=mySharedIndices%aVindices1(i) - aVout%rAttr(outx,j) = aVin%rAttr(inx,j) + aVout%rAttr(mySharedIndices%aVindices2(i),j) = aVin%rAttr(mySharedIndices%aVindices1(i),j) enddo enddo @@ -2665,7 +2659,6 @@ subroutine RCopyL_(aVin, aVout, rList, TrList, vector) use m_die , only : die use m_stdio , only : stderr - use m_List, only : GetSharedListIndices use m_List, only : GetIndices => get_indices implicit none @@ -2693,12 +2686,10 @@ subroutine RCopyL_(aVin, aVout, rList, TrList, vector) integer :: i,j,ier ! dummy variables integer :: num_indices ! Overlapping attribute index number integer :: aVsize ! The lsize of aVin and aVout - integer :: num_inindices, num_outindices ! Number of matching indices in aV - integer :: inxmin, outxmin, inx, outx ! Index variables + integer :: inxmin, outxmin ! Index variables logical :: TrListIsPresent ! true if list argument is present logical :: contiguous ! true if index segments are contiguous in memory logical :: usevector ! true if vector flag is present and true. - character*7 :: data_flag ! character variable used as data type flag ! Overlapping attribute index storage arrays: integer, dimension(:), pointer :: aVinindices, aVoutindices @@ -2756,48 +2747,48 @@ subroutine RCopyL_(aVin, aVout, rList, TrList, vector) ! Check if the indices are contiguous in memory for faster copy contiguous=.true. do i=2,num_indices - if(aVinindices(i) /= aVinindices(i-1)+1) contiguous = .false. + if(aVinindices(i) /= aVinindices(i-1)+1) then + contiguous = .false. + exit + endif enddo if(contiguous) then do i=2,num_indices - if(aVoutindices(i) /= aVoutindices(i-1)+1) contiguous=.false. + if(aVoutindices(i) /= aVoutindices(i-1)+1) then + contiguous=.false. + exit + endif enddo endif ! Start copying (arranged loop order optimized for xlf90) if(contiguous) then + outxmin=aVoutindices(1)-1 + inxmin=aVinindices(1)-1 if(usevector) then - outxmin=aVoutindices(1)-1 - inxmin=aVinindices(1)-1 !$OMP PARALLEL DO PRIVATE(i,j) do i=1,num_indices -!DIR$ CONCURRENT - do j=1,aVsize - aVout%rAttr(outxmin+i,j) = aVin%rAttr(inxmin+i,j) - enddo - enddo +!DIR$ IVDEP + do j=1,aVsize + aVout%rAttr(outxmin+i,j) = aVin%rAttr(inxmin+i,j) + enddo + enddo else - outxmin=aVoutindices(1)-1 - inxmin=aVinindices(1)-1 -!$OMP PARALLEL DO PRIVATE(j,i) +!$OMP PARALLEL DO PRIVATE(j,i) COLLAPSE(2) do j=1,aVsize -!DIR$ CONCURRENT - do i=1,num_indices - aVout%rAttr(outxmin+i,j) = aVin%rAttr(inxmin+i,j) - enddo - enddo + do i=1,num_indices + aVout%rAttr(outxmin+i,j) = aVin%rAttr(inxmin+i,j) + enddo + enddo endif else -!$OMP PARALLEL DO PRIVATE(j,i,outx,inx) +!$OMP PARALLEL DO PRIVATE(j,i) COLLAPSE(2) do j=1,aVsize -!DIR$ CONCURRENT do i=1,num_indices - outx=aVoutindices(i) - inx=aVinindices(i) - aVout%rAttr(outx,j) = aVin%rAttr(inx,j) + aVout%rAttr(aVoutindices(i),j) = aVin%rAttr(aVinindices(i),j) enddo enddo @@ -2842,9 +2833,6 @@ subroutine ICopy_(aVin, aVout, vector, sharedIndices) use m_die , only : die use m_stdio , only : stderr - use m_List, only : GetSharedListIndices - use m_List, only : GetIndices => get_indices - implicit none ! !INPUT PARAMETERS: @@ -2868,8 +2856,7 @@ subroutine ICopy_(aVin, aVout, vector, sharedIndices) integer :: i,j,ier ! dummy variables integer :: aVsize ! The lsize of aVin and aVout - integer :: num_inindices, num_outindices ! Number of matching indices in aV - integer :: inxmin, outxmin, inx, outx ! Index variables + integer :: inxmin, outxmin ! Index variables logical :: usevector ! true if vector flag is present and true. character*7 :: data_flag ! character variable used as data type flag type(AVSharedIndicesOneType) :: mySharedIndices ! copied from sharedIndices, or @@ -2926,23 +2913,20 @@ subroutine ICopy_(aVin, aVout, vector, sharedIndices) if(mySharedIndices%contiguous) then + outxmin=mySharedIndices%aVindices2(1)-1 + inxmin=mySharedIndices%aVindices1(1)-1 if(usevector) then - outxmin=mySharedIndices%aVindices2(1)-1 - inxmin=mySharedIndices%aVindices1(1)-1 !$OMP PARALLEL DO PRIVATE(i,j) do i=1,mySharedIndices%num_indices !CDIR SELECT(VECTOR) -!DIR$ CONCURRENT +!DIR$ IVDEP do j=1,aVsize aVout%iAttr(outxmin+i,j) = aVin%iAttr(inxmin+i,j) enddo enddo else - outxmin=mySharedIndices%aVindices2(1)-1 - inxmin=mySharedIndices%aVindices1(1)-1 -!$OMP PARALLEL DO PRIVATE(j,i) +!$OMP PARALLEL DO PRIVATE(j,i) COLLAPSE(2) do j=1,aVsize -!DIR$ CONCURRENT do i=1,mySharedIndices%num_indices aVout%iAttr(outxmin+i,j) = aVin%iAttr(inxmin+i,j) enddo @@ -2951,13 +2935,10 @@ subroutine ICopy_(aVin, aVout, vector, sharedIndices) else -!$OMP PARALLEL DO PRIVATE(j,i,outx,inx) +!$OMP PARALLEL DO PRIVATE(j,i) COLLAPSE(2) do j=1,aVsize -!DIR$ CONCURRENT do i=1,mySharedIndices%num_indices - outx=mySharedIndices%aVindices2(i) - inx=mySharedIndices%aVindices1(i) - aVout%iAttr(outx,j) = aVin%iAttr(inx,j) + aVout%iAttr(mySharedIndices%aVindices2(i),j) = aVin%iAttr(mySharedIndices%aVindices1(i),j) enddo enddo @@ -3031,12 +3012,10 @@ subroutine ICopyL_(aVin, aVout, iList, TiList, vector) integer :: i,j,ier ! dummy variables integer :: num_indices ! Overlapping attribute index number integer :: aVsize ! The lsize of aVin and aVout - integer :: num_inindices, num_outindices ! Number of matching indices in aV - integer :: inxmin, outxmin, inx, outx ! Index variables + integer :: inxmin, outxmin ! Index variables logical :: TiListIsPresent ! true if list argument is present logical :: contiguous ! true if index segments are contiguous in memory logical :: usevector ! true if vector flag is present and true. - character*7 :: data_flag ! character variable used as data type flag ! Overlapping attribute index storage arrays: integer, dimension(:), pointer :: aVinindices, aVoutindices @@ -3094,49 +3073,49 @@ subroutine ICopyL_(aVin, aVout, iList, TiList, vector) ! Check if the indices are contiguous in memory for faster copy contiguous=.true. do i=2,num_indices - if(aVinindices(i) /= aVinindices(i-1)+1) contiguous = .false. + if(aVinindices(i) /= aVinindices(i-1)+1) then + contiguous = .false. + exit + endif enddo if(contiguous) then do i=2,num_indices - if(aVoutindices(i) /= aVoutindices(i-1)+1) contiguous=.false. + if(aVoutindices(i) /= aVoutindices(i-1)+1) then + contiguous=.false. + exit + endif enddo endif ! Start copying (arranged loop order optimized for xlf90) if(contiguous) then + outxmin=aVoutindices(1)-1 + inxmin=aVinindices(1)-1 if(usevector) then - outxmin=aVoutindices(1)-1 - inxmin=aVinindices(1)-1 !$OMP PARALLEL DO PRIVAtE(i,j) do i=1,num_indices !CDIR SELECT(VECTOR) -!DIR$ CONCURRENT +!DIR$ IVDEP do j=1,aVsize - aVout%iAttr(outxmin+i,j) = aVin%iAttr(inxmin+i,j) + aVout%iAttr(outxmin+i,j) = aVin%iAttr(inxmin+i,j) enddo enddo else - outxmin=aVoutindices(1)-1 - inxmin=aVinindices(1)-1 -!$OMP PARALLEL DO PRIVATE(j,i) +!$OMP PARALLEL DO PRIVATE(j,i) COLLAPSE(2) do j=1,aVsize -!DIR$ CONCURRENT do i=1,num_indices - aVout%iAttr(outxmin+i,j) = aVin%iAttr(inxmin+i,j) + aVout%iAttr(outxmin+i,j) = aVin%iAttr(inxmin+i,j) enddo enddo endif else -!$OMP PARALLEL DO PRIVATE(j,i,outx,inx) +!$OMP PARALLEL DO PRIVATE(j,i) COLLAPSE(2) do j=1,aVsize -!DIR$ CONCURRENT do i=1,num_indices - outx=aVoutindices(i) - inx=aVinindices(i) - aVout%iAttr(outx,j) = aVin%iAttr(inx,j) + aVout%iAttr(aVoutindices(i),j) = aVin%iAttr(aVinindices(i),j) enddo enddo @@ -3198,9 +3177,6 @@ subroutine Copy_(aVin, aVout, rList, TrList, iList, TiList, vector, sharedIndice use m_die , only : die, warn use m_stdio , only : stderr - use m_List, only : GetSharedListIndices - use m_List, only : GetIndices => get_indices - implicit none ! !INPUT PARAMETERS: @@ -3244,7 +3220,6 @@ subroutine Copy_(aVin, aVout, rList, TrList, iList, TiList, vector, sharedIndice logical :: TiListIsPresent, TrListIsPresent! true if list argument is present logical :: contiguous ! true if index segments are contiguous in memory logical :: usevector ! true if vector flag is present and true. - character*7 :: data_flag ! character variable used as data type flag ! Overlapping attribute index storage arrays: integer, dimension(:), pointer :: aVinindices, aVoutindices @@ -3267,48 +3242,48 @@ subroutine Copy_(aVin, aVout, rList, TrList, iList, TiList, vector, sharedIndice ! Copy the listed real attributes if(present(rList)) then - ! TrList is present if it is provided and its length>0 - TrListIsPresent = .false. - if(present(TrList)) then - if(len_trim(TrList) > 0) then - TrListIsPresent = .true. - endif - endif + ! TrList is present if it is provided and its length>0 + TrListIsPresent = .false. + if(present(TrList)) then + if(len_trim(TrList) > 0) then + TrListIsPresent = .true. + endif + endif - if(present(sharedIndices)) then - call warn(myname_,'Use of sharedIndices not implemented in RCopyL; & + if(present(sharedIndices)) then + call warn(myname_,'Use of sharedIndices not implemented in RCopyL; & &ignoring sharedIndices',1) - end if + endif - if(TrListIsPresent) then - call RCopyL_(aVin,aVout,rList,TrList,vector=usevector) - else - call RCopyL_(aVin,aVout,rList,vector=usevector) - endif + if(TrListIsPresent) then + call RCopyL_(aVin,aVout,rList,TrList,vector=usevector) + else + call RCopyL_(aVin,aVout,rList,vector=usevector) + endif endif ! if(present(rList) ! Copy the listed integer attributes if(present(iList)) then - ! TiList is present if its provided and its length>0 - TiListIsPresent = .false. - if(present(TiList)) then - if(len_trim(TiList) > 0) then - TiListIsPresent = .true. - endif - endif + ! TiList is present if its provided and its length>0 + TiListIsPresent = .false. + if(present(TiList)) then + if(len_trim(TiList) > 0) then + TiListIsPresent = .true. + endif + endif - if(present(sharedIndices)) then - call warn(myname_,'Use of sharedIndices not implemented in ICopyL; & + if(present(sharedIndices)) then + call warn(myname_,'Use of sharedIndices not implemented in ICopyL; & &ignoring sharedIndices',1) - end if + endif - if(TiListIsPresent) then - call ICopyL_(aVin,aVout,iList,TiList,vector=usevector) - else - call ICopyL_(aVin,aVout,iList,vector=usevector) - endif + if(TiListIsPresent) then + call ICopyL_(aVin,aVout,iList,TiList,vector=usevector) + else + call ICopyL_(aVin,aVout,iList,vector=usevector) + endif endif ! if(present(iList)) @@ -4009,12 +3984,14 @@ subroutine SharedIndicesOneType_(aV1, aV2, attrib, sharedIndices) do i=2,sharedIndices%num_indices if(sharedIndices%aVindices1(i) /= sharedIndices%aVindices1(i-1)+1) then sharedIndices%contiguous = .false. + exit endif enddo if(sharedIndices%contiguous) then do i=2,sharedIndices%num_indices if(sharedIndices%aVindices2(i) /= sharedIndices%aVindices2(i-1)+1) then sharedIndices%contiguous=.false. + exit endif enddo endif diff --git a/src/externals/mct/mct/m_GlobalSegMap.F90 b/src/externals/mct/mct/m_GlobalSegMap.F90 index f59901e929c1..a1960885fa8e 100644 --- a/src/externals/mct/mct/m_GlobalSegMap.F90 +++ b/src/externals/mct/mct/m_GlobalSegMap.F90 @@ -1742,9 +1742,6 @@ subroutine active_pes_(GSMap, n_active, pe_list) ! !USES: ! use m_die , only : die - use m_SortingTools , only : IndexSet - use m_SortingTools , only : IndexSort - use m_SortingTools , only : Permute implicit none @@ -1764,22 +1761,25 @@ subroutine active_pes_(GSMap, n_active, pe_list) character(len=*),parameter :: myname_=myname//'::active_pes_' integer :: count, i, n, ngseg, ierr - logical :: new - integer, dimension(:), allocatable :: temp_list - integer, dimension(:), allocatable :: perm + integer :: max_activepe, p + logical, dimension(:), allocatable :: process_list ! retrieve total number of segments in the map: ngseg = ngseg_(GSMap) + ! retrieve maximum active process id in the map: + + max_activepe = maxval(GSMap%pe_loc(:)) + ! allocate workspace to tally process id list: - allocate(temp_list(ngseg), stat=ierr) - if(ierr /= 0) call die(myname_,'allocate(temp_list...',ierr) + allocate(process_list(0:max_activepe), stat=ierr) + if(ierr /= 0) call die(myname_,'allocate(process_list)',ierr) - ! initialize temp_list to -1 (which can never be a process id) + ! initialize process_list to false (i.e. no active pes) - temp_list = -1 + process_list = .false. ! initialize the distinct active process count: @@ -1790,25 +1790,10 @@ subroutine active_pes_(GSMap, n_active, pe_list) do n=1,ngseg if(GSMap%pe_loc(n) >= 0) then ! a legitimate pe_location - ! assume initially that GSMap%pe_loc(n) is a process id previously - ! not encountered - - new = .true. - - ! test this proposition against the growing list of distinct - ! process ids stored in temp_list(:) - - do i=1, count - if(GSMap%pe_loc(n) == temp_list(i)) new = .false. - end do - - ! If GSMap%pe_loc(n) represents a previously unencountered - ! process id, increment the count, and add this id to the list - - if(new) then - count = count + 1 - temp_list(count) = GSMap%pe_loc(n) - endif + if (.not. process_list(GSMap%pe_loc(n))) then + process_list(GSMap%pe_loc(n)) = .true. + count = count + 1 + endif else ! a negative entry in GSMap%pe_loc(n) ierr = 2 @@ -1817,41 +1802,37 @@ subroutine active_pes_(GSMap, n_active, pe_list) end do ! If the argument pe_list is present, we must allocate this - ! array, fill it, and sort it + ! array and fill it if(present(pe_list)) then - ! allocate pe_list and permutation array perm + ! allocate pe_list - allocate(pe_list(count), perm(count), stat=ierr) + allocate(pe_list(count), stat=ierr) if (ierr /= 0) then - call die(myname_,'allocate(pe_list...',ierr) + call die(myname_,'allocate(pe_list)',ierr) endif - do n=1,count - pe_list(n) = temp_list(n) - end do - - ! sorting and permutation... - - call IndexSet(perm) - call IndexSort(count, perm, pe_list, descend=.false.) - call Permute(pe_list, perm, count) - - ! deallocate permutation array... - - deallocate(perm, stat=ierr) - if (ierr /= 0) then - call die(myname_,'deallocate(perm)',ierr) + i = 0 + do p=0,max_activepe + if (process_list(p)) then + i = i+1 + if (i > count) exit + pe_list(i) = p + endif + enddo + + if (i > count) then + call die(myname_,'pe_list fill error',count) endif endif ! if(present(pe_list))... - ! deallocate work array temp_list... + ! deallocate work array process_list... - deallocate(temp_list, stat=ierr) + deallocate(process_list, stat=ierr) if (ierr /= 0) then - call die(myname_,'deallocate(temp_list)',ierr) + call die(myname_,'deallocate(process_list)',ierr) endif ! finally, store the active process count in output variable @@ -1903,15 +1884,37 @@ subroutine peLocs_(pointGSMap, npoints, points, pe_locs) ! !REVISION HISTORY: ! 18Apr01 - J.W. Larson - initial version. +! 18Oct16 - P. Worley - added algorithm options: +! new default changes complexity from O(npoints*ngseg) to +! O(gsize + ngseg) (worst case), and much better in current +! usage. Worst case memory requirements are O(gsize), but +! not seen in current usage. Other new algorithm is a little +! slower in practice, and worst case memory requirement is +! O(ngseg), which is also not seen in current usage. +! Original algorithm is recovered if compiled with +! LOW_MEMORY_PELOCS defined. Otherwise nondefault new +! algorithm is enabled if compiled with MEDIUM_MEMORY_PELOCS +! defined. !EOP ___________________________________________________________________ character(len=*),parameter :: myname_=myname//'::peLocs_' integer :: ierr integer :: iseg, ngseg, ipoint integer :: lower_index, upper_index + integer :: min_points_index, max_points_index +#if defined MEDIUM_MEMORY_PELOCS + integer :: ifseg, nfseg + integer, dimension(:), allocatable :: feasible_seg +#else + integer, dimension(:), allocatable :: pindices_to_pes +#endif ! Input argument checks: + if (npoints < 1) then + return + endif + if(size(points) < npoints) then ierr = size(points) call die(myname_,'input points list array too small',ierr) @@ -1934,21 +1937,158 @@ subroutine peLocs_(pointGSMap, npoints, points, pe_locs) ngseg = ngseg_(pointGSMap) +#if defined LOW_MEMORY_PELOCS + do ipoint=1,npoints ! loop over points do iseg=1,ngseg ! loop over segments - lower_index = pointGSMap%start(iseg) - upper_index = lower_index + pointGSMap%length(iseg) - 1 + lower_index = pointGSMap%start(iseg) + upper_index = lower_index + pointGSMap%length(iseg) - 1 - if((points(ipoint) >= lower_index) .and. & - (points(ipoint) <= upper_index)) then - pe_locs(ipoint) = pointGSMap%pe_loc(iseg) - endif + if((points(ipoint) >= lower_index) .and. & + (points(ipoint) <= upper_index)) then + pe_locs(ipoint) = pointGSMap%pe_loc(iseg) + + exit + + endif end do ! do iseg=1, ngseg + + end do ! do ipoint=1,npoints + +#elif defined MEDIUM_MEMORY_PELOCS + +! Determine index range for points vector + max_points_index = 0 + min_points_index = pointGSMap%gsize + 1 + do ipoint=1,npoints ! loop over points + + max_points_index = max(points(ipoint), max_points_index) + min_points_index = min(points(ipoint), min_points_index) + + end do ! do ipoint=1,npoints + +! Determine number of segments that need to be examined + nfseg = 0 + do iseg=1,ngseg ! loop over segments + + lower_index = pointGSMap%start(iseg) + upper_index = lower_index + pointGSMap%length(iseg) - 1 + + if ((lower_index <= max_points_index) .and. & + (upper_index >= min_points_index) ) then + + nfseg = nfseg + 1 + + endif + + end do ! do iseg=1, ngseg + + if(nfseg < 1) then + ierr = nfseg + call die(myname_,'no feasible segments',ierr) + endif + + ! Allocate temporary array + allocate(feasible_seg(nfseg), stat=ierr) + if (ierr /= 0) then + call die(myname_,'allocate(feasible_seg)',ierr) + endif + + ! Determine segments that need to be examined + feasible_seg(:) = 1 + nfseg = 0 + do iseg=1,ngseg ! loop over segments + + lower_index = pointGSMap%start(iseg) + upper_index = lower_index + pointGSMap%length(iseg) - 1 + + if ((lower_index <= max_points_index) .and. & + (upper_index >= min_points_index) ) then + + nfseg = nfseg + 1 + feasible_seg(nfseg) = iseg + + endif + + end do ! do iseg=1, ngseg + + ! Calculate map from local points to pes + do ipoint=1,npoints ! loop over points + + do ifseg=1,nfseg ! loop over feasible segments + + iseg = feasible_seg(ifseg) + lower_index = pointGSMap%start(iseg) + upper_index = lower_index + pointGSMap%length(iseg) - 1 + + if((points(ipoint) >= lower_index) .and. & + (points(ipoint) <= upper_index) ) then + pe_locs(ipoint) = pointGSMap%pe_loc(iseg) + exit + endif + + end do ! do ifseg=1,nfseg + end do ! do ipoint=1,npoints + + ! Clean up + deallocate(feasible_seg, stat=ierr) + if (ierr /= 0) then + call die(myname_,'deallocate(feasible_seg)',ierr) + endif + +#else + +! Determine index range for points assigned to points vector + max_points_index = 0 + min_points_index = pointGSMap%gsize + 1 + do ipoint=1,npoints ! loop over points + + max_points_index = max(points(ipoint), max_points_index) + min_points_index = min(points(ipoint), min_points_index) + + end do ! do ipoint=1,npoints + +! Allocate temporary array + allocate(pindices_to_pes(min_points_index:max_points_index), stat=ierr) + if (ierr /= 0) then + call die(myname_,'allocate(pindices_to_pes)',ierr) + endif + +! Calculate map from (global) point indices to pes + do iseg=1,ngseg ! loop over segments + + lower_index = pointGSMap%start(iseg) + upper_index = lower_index + pointGSMap%length(iseg) - 1 + + lower_index = max(lower_index, min_points_index) + upper_index = min(upper_index, max_points_index) + + if (lower_index <= upper_index) then + do ipoint=lower_index,upper_index + pindices_to_pes(ipoint) = pointGSMap%pe_loc(iseg) + enddo + endif + + end do ! do iseg=1, ngseg + +! Calculate map from local point indices to pes + do ipoint=1,npoints ! loop over points + + pe_locs(ipoint) = pindices_to_pes(points(ipoint)) + end do ! do ipoint=1,npoints +! Clean up + deallocate(pindices_to_pes, stat=ierr) + if (ierr /= 0) then + call die(myname_,'deallocate(pindices_to_pes)',ierr) + endif + +#endif + end subroutine peLocs_ !~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/externals/mct/mct/m_MatAttrVectMul.F90 b/src/externals/mct/mct/m_MatAttrVectMul.F90 index f6937c17b26b..080214c677a3 100644 --- a/src/externals/mct/mct/m_MatAttrVectMul.F90 +++ b/src/externals/mct/mct/m_MatAttrVectMul.F90 @@ -108,7 +108,6 @@ subroutine sMatAvMult_DataLocal_(xAV, sMat, yAV, Vector, rList, TrList) use m_AttrVect, only : AttrVect use m_AttrVect, only : AttrVect_lsize => lsize use m_AttrVect, only : AttrVect_zero => zero - use m_AttrVect, only : AttrVect_nRAttr => nRAttr use m_AttrVect, only : AttrVect_indexRA => indexRA use m_AttrVect, only : SharedAttrIndexList @@ -188,6 +187,7 @@ subroutine sMatAvMult_DataLocal_(xAV, sMat, yAV, Vector, rList, TrList) logical :: usevector,TrListIsPresent,rListIsPresent logical :: contiguous,ycontiguous + usevector = .false. if(present(Vector)) then if(Vector) usevector = .true. @@ -237,11 +237,11 @@ subroutine sMatAvMult_DataLocal_(xAV, sMat, yAV, Vector, rList, TrList) call SparseMatrix_vecinit(sMat) endif -!DIR$ CONCURRENT +!DIR$ IVDEP do m=1,num_indices do l=1,sMat%tbl_end !CDIR NOLOOPCHG -!DIR$ CONCURRENT +!DIR$ IVDEP do i=sMat%row_s(l),sMat%row_e(l) col = sMat%tcol(i,l) wgt = sMat%twgt(i,l) @@ -261,7 +261,7 @@ subroutine sMatAvMult_DataLocal_(xAV, sMat, yAV, Vector, rList, TrList) ! loop over attributes being regridded. -!DIR$ CONCURRENT +!DIR$ IVDEP do m=1,num_indices yAV%rAttr(m,row) = yAV%rAttr(m,row) + wgt * xAV%rAttr(m,col) @@ -318,13 +318,17 @@ subroutine sMatAvMult_DataLocal_(xAV, sMat, yAV, Vector, rList, TrList) contiguous=.true. ycontiguous=.true. do i=2,num_indices - if(xaVindices(i) /= xAVindices(i-1)+1) contiguous = .false. + if(xaVindices(i) /= xAVindices(i-1)+1) then + contiguous = .false. + exit + endif enddo if(contiguous) then do i=2,num_indices if(yAVindices(i) /= yAVindices(i-1)+1) then contiguous=.false. ycontiguous=.false. + exit endif enddo endif @@ -335,6 +339,7 @@ subroutine sMatAvMult_DataLocal_(xAV, sMat, yAV, Vector, rList, TrList) if(ycontiguous) then outxmin=yaVindices(1)-1 +!dir$ collapse do j=1,ysize do i=1,numav yAV%rAttr(outxmin+i,j)=0._FP @@ -360,14 +365,13 @@ subroutine sMatAvMult_DataLocal_(xAV, sMat, yAV, Vector, rList, TrList) wgt = sMat%data%rAttr(iwgt,n) ! loop over attributes being regridded. -!DIR$ CONCURRENT +!DIR$ IVDEP do m=1,num_indices yAV%rAttr(outxmin+m,row) = & yAV%rAttr(outxmin+m,row) + & wgt * xAV%rAttr(inxmin+m,col) end do ! m=1,num_indices end do ! n=1,num_elements - else do n=1,num_elements @@ -376,7 +380,7 @@ subroutine sMatAvMult_DataLocal_(xAV, sMat, yAV, Vector, rList, TrList) wgt = sMat%data%rAttr(iwgt,n) ! loop over attributes being regridded. -!DIR$ CONCURRENT +!DIR$ IVDEP do m=1,num_indices yAV%rAttr(yAVindices(m),row) = & yAV%rAttr(yAVindices(m),row) + & @@ -508,7 +512,8 @@ subroutine sMatAvMult_SMPlus_(xAV, sMatPlus, yAV, Vector, rList, TrList) call AttrVect_zero(xPrimeAV) ! Rearrange data from x to get x' call Rearrange(xAV, xPrimeAV, sMatPlus%XToXPrime, & - sMatPlus%Tag ,vector=usevector) + tag=sMatPlus%Tag, vector=usevector,& + alltoall=.true., handshake=.true. ) ! Perform perfectly data-local multiply y = Mx' if (present(TrList).and.present(rList)) then @@ -554,13 +559,15 @@ subroutine sMatAvMult_SMPlus_(xAV, sMatPlus, yAV, Vector, rList, TrList) ! Rearrange/reduce partial sums in y' to get y if (present(TrList).or.present(rList)) then - call Rearrange(yPrimeAV, yAVre, sMatPlus%YPrimeToY, sMatPlus%Tag, & - .TRUE., Vector=usevector) + call Rearrange(yPrimeAV, yAVre, sMatPlus%YPrimeToY, & + tag=sMatPlus%Tag, sum=.TRUE., Vector=usevector, & + alltoall=.true., handshake=.true. ) call AttrVect_Rcopy(yAVre,yAV,vector=usevector) call AttrVect_clean(yAVre, ierr) else - call Rearrange(yPrimeAV, yAV, sMatPlus%YPrimeToY, sMatPlus%Tag, & - .TRUE., Vector=usevector) + call Rearrange(yPrimeAV, yAV, sMatPlus%YPrimeToY, & + tag=sMatPlus%Tag, sum=.TRUE., Vector=usevector, & + alltoall=.true., handshake=.true. ) endif ! Clean up space occupied by y' call AttrVect_clean(yPrimeAV, ierr) @@ -586,8 +593,9 @@ subroutine sMatAvMult_SMPlus_(xAV, sMatPlus, yAV, Vector, rList, TrList) endif ! Rearrange data from x to get x' - call Rearrange(xAV, xPrimeAV, sMatPlus%XToXPrime, sMatPlus%Tag, & - Vector=usevector) + call Rearrange(xAV, xPrimeAV, sMatPlus%XToXPrime, & + tag=sMatPlus%Tag, Vector=usevector, & + alltoall=.true., handshake=.true. ) ! Perform perfectly data-local multiply y' = Mx' if (present(TrList).and.present(rList)) then @@ -603,13 +611,15 @@ subroutine sMatAvMult_SMPlus_(xAV, sMatPlus, yAV, Vector, rList, TrList) ! Rearrange/reduce partial sums in y' to get y if (present(TrList).or.present(rList)) then - call Rearrange(yPrimeAV, yAVre, sMatPlus%YPrimeToY, sMatPlus%Tag, & - .TRUE., Vector=usevector) + call Rearrange(yPrimeAV, yAVre, sMatPlus%YPrimeToY, & + tag=sMatPlus%Tag, sum=.TRUE., Vector=usevector, & + alltoall=.true., handshake=.true. ) call AttrVect_Rcopy(yAVre,yAV,vector=usevector) call AttrVect_clean(yAVre, ierr) else - call Rearrange(yPrimeAV, yAV, sMatPlus%YPrimeToY, sMatPlus%Tag, & - .TRUE., Vector=usevector) + call Rearrange(yPrimeAV, yAV, sMatPlus%YPrimeToY, & + tag=sMatPlus%Tag, sum=.TRUE., Vector=usevector, & + alltoall=.true., handshake=.true. ) endif ! Clean up space occupied by x' diff --git a/src/externals/mct/mct/m_Rearranger.F90 b/src/externals/mct/mct/m_Rearranger.F90 index dc81dde9b5d9..d75a4a54a1d4 100644 --- a/src/externals/mct/mct/m_Rearranger.F90 +++ b/src/externals/mct/mct/m_Rearranger.F90 @@ -531,7 +531,8 @@ end subroutine clean_ ! ! !INTERFACE: - subroutine rearrange_(SourceAVin,TargetAV,InRearranger,Tag,Sum,Vector,AlltoAll) + subroutine rearrange_(SourceAVin,TargetAV,InRearranger,Tag,Sum,& + Vector,AlltoAll,HandShake,ISend,MaxReq) ! ! !USES: @@ -548,6 +549,7 @@ subroutine rearrange_(SourceAVin,TargetAV,InRearranger,Tag,Sum,Vector,AlltoAll) use m_AttrVect, only : nIAttr,nRAttr use m_AttrVect, only : Permute,Unpermute use m_Router, only : Router + use m_SPMDutils, only : m_swapm_int, m_swapm_FP use m_realkinds, only : FP use m_mpif90 use m_die @@ -567,6 +569,9 @@ subroutine rearrange_(SourceAVin,TargetAV,InRearranger,Tag,Sum,Vector,AlltoAll) logical, optional, intent(in) :: Sum logical, optional, intent(in) :: Vector logical, optional, intent(in) :: AlltoAll + logical, optional, intent(in) :: HandShake + logical, optional, intent(in) :: ISend + integer, optional, intent(in) :: MaxReq ! !REVISION HISTORY: ! 31Jan02 - E.T. Ong - initial prototype @@ -581,6 +586,8 @@ subroutine rearrange_(SourceAVin,TargetAV,InRearranger,Tag,Sum,Vector,AlltoAll) ! 14Oct06 - R. Jacob - check value of Sum argument. ! 25Jan08 - R. Jacob - Permute/unpermute if the internal ! routers permarr is defined. +! 29Sep16 - P. Worley - added swapm variant of +! alltoall option !EOP ___________________________________________________________________ character(len=*),parameter :: myname_=myname//'::Rearrange_' @@ -591,11 +598,11 @@ subroutine rearrange_(SourceAVin,TargetAV,InRearranger,Tag,Sum,Vector,AlltoAll) integer :: mp_Type_rp integer :: mytag integer :: ISendSize, RSendSize, IRecvSize, RRecvSize - logical :: usevector, usealltoall + logical :: usevector, usealltoall, useswapm logical :: DoSum logical :: Sendunordered logical :: Recvunordered - real(FP) :: realtyp + real(FP):: realtyp !----------------------------------------------------------------------- ! DECLARE STRUCTURES FOR MPI ARGUMENTS. @@ -633,6 +640,10 @@ subroutine rearrange_(SourceAVin,TargetAV,InRearranger,Tag,Sum,Vector,AlltoAll) integer,dimension(:),allocatable :: IRecvBuf real(FP),dimension(:),allocatable :: RRecvBuf + ! declare arrays to hold MPI data types for m_swapm_XXX calls + integer :: ITypes(0:max_nprocs-1) + integer :: RTypes(0:max_nprocs-1) + ! Structure to hold MPI request information for sends integer :: send_ireqs(max_nprocs) integer :: send_rreqs(max_nprocs) @@ -654,6 +665,16 @@ subroutine rearrange_(SourceAVin,TargetAV,InRearranger,Tag,Sum,Vector,AlltoAll) type(AttrVect),pointer :: SourceAv type(AttrVect),target :: SourceAvtmp + ! local swapm protocol variables and defaults + logical,parameter :: DEF_SWAPM_HS = .true. + logical swapm_hs + + logical,parameter :: DEF_SWAPM_ISEND = .false. + logical swapm_isend + + integer,parameter :: DEF_SWAPM_MAXREQ = 512 + integer swapm_maxreq + !:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: Sendunordered=associated(InRearranger%SendRouter%permarr) @@ -708,8 +729,44 @@ subroutine rearrange_(SourceAVin,TargetAV,InRearranger,Tag,Sum,Vector,AlltoAll) endif usealltoall=.false. - if(present(Alltoall)) then - if(Alltoall) usealltoall=.true. + if(present(AlltoAll)) then + if(AlltoAll) usealltoall=.true. + endif +!pw++ + ! forcing use of alltoall protocol until additional tuning + ! capabilities are added to calling routines +!pw usealltoall=.true. +!pw-- + + useswapm=.false. + if (usealltoall) then + ! if any swapm-related optional parameters are present, + ! enable swapm variant of alltoall + + swapm_hs = DEF_SWAPM_HS + if(present(HandShake)) then + if(HandShake) swapm_hs=.true. + useswapm=.true. + endif + + swapm_isend = DEF_SWAPM_ISEND + if(present(ISend)) then + if(ISend) swapm_isend=.true. + useswapm=.true. + endif + + swapm_maxreq = DEF_SWAPM_MAXREQ + if(present(MaxReq)) then + swapm_maxreq=MaxReq + useswapm=.true. + endif + +!pw++ + ! forcing use of swapm variant of alltoall protocol + ! until additional tuning capabilities are added to + ! calling routines +!pw useswapm=.true. +!pw-- endif DoSum=.false. @@ -735,9 +792,6 @@ subroutine rearrange_(SourceAVin,TargetAV,InRearranger,Tag,Sum,Vector,AlltoAll) ! ALLOCATE DATA STRUCTURES ! - ! IF SENDING DATA - if(SendRout%nprocs > 0) then - ! IF SENDING INTEGER DATA if(numi .ge. 1) then @@ -769,10 +823,7 @@ subroutine rearrange_(SourceAVin,TargetAV,InRearranger,Tag,Sum,Vector,AlltoAll) endif - endif - ! IF RECEVING DATA - if(RecvRout%nprocs > 0) then ! IF RECEIVING INTEGER DATA if(numi .ge. 1) then @@ -805,8 +856,6 @@ subroutine rearrange_(SourceAVin,TargetAV,InRearranger,Tag,Sum,Vector,AlltoAll) endif - endif - !:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: ! INVERT PE LIST ! @@ -855,6 +904,10 @@ subroutine rearrange_(SourceAVin,TargetAV,InRearranger,Tag,Sum,Vector,AlltoAll) RRdispls(pe) = RRecvLoc(proc) - 1 endif enddo + + ! SET MPI DATA TYPES + ITypes(:) = MP_INTEGER + RTypes(:) = mp_Type_rp endif !:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: @@ -1090,6 +1143,26 @@ subroutine rearrange_(SourceAVin,TargetAV,InRearranger,Tag,Sum,Vector,AlltoAll) if (usealltoall) then + if (useswapm) then + + if (numi .ge. 1) then + call m_swapm_int(max_pe, myPid, & + ISendBuf, ISendSize, ISendCnts, ISdispls, ITypes, & + IRecvBuf, IRecvSize, IRecvCnts, IRdispls, ITypes, & + ThisMCTWorld%MCT_comm, & + swapm_hs, swapm_isend, swapm_maxreq ) + endif + + if (numr .ge. 1) then + call m_swapm_FP (max_pe, myPid, & + RSendBuf, RSendSize, RSendCnts, RSdispls, RTypes, & + RRecvBuf, RRecvSize, RRecvCnts, RRdispls, RTypes, & + ThisMCTWorld%MCT_comm, & + swapm_hs, swapm_isend, swapm_maxreq ) + endif + + else + if (numi .ge. 1) then call MPI_Alltoallv(ISendBuf, ISendCnts, ISdispls, MP_INTEGER, & IRecvBuf, IRecvCnts, IRdispls, MP_INTEGER, & @@ -1102,6 +1175,8 @@ subroutine rearrange_(SourceAVin,TargetAV,InRearranger,Tag,Sum,Vector,AlltoAll) ThisMCTWorld%MCT_comm,ier) endif + endif + else ! WAIT FOR THE NONBLOCKING SENDS TO COMPLETE @@ -1250,30 +1325,12 @@ subroutine rearrange_(SourceAVin,TargetAV,InRearranger,Tag,Sum,Vector,AlltoAll) ! DEALLOCATE ALL STRUCTURES - if(SendRout%nprocs > 0) then - if(numi .ge. 1) then ! Deallocate the send buffer deallocate(ISendBuf,stat=ier) if(ier/=0) call die(myname_,'deallocate(ISendBuf)',ier) - endif - - if(numr .ge. 1) then - - ! Deallocate the send buffer - deallocate(RSendBuf,stat=ier) - if(ier/=0) call die(myname_,'deallocate(RSendBuf)',ier) - - endif - - endif - - if(RecvRout%nprocs > 0) then - - if(numi .ge. 1) then - ! Deallocate the receive buffer deallocate(IRecvBuf,stat=ier) if(ier/=0) call die(myname_,'deallocate(IRecvBuf)',ier) @@ -1282,14 +1339,16 @@ subroutine rearrange_(SourceAVin,TargetAV,InRearranger,Tag,Sum,Vector,AlltoAll) if(numr .ge. 1) then + ! Deallocate the send buffer + deallocate(RSendBuf,stat=ier) + if(ier/=0) call die(myname_,'deallocate(RSendBuf)',ier) + ! Deallocate the receive buffer deallocate(RRecvBuf,stat=ier) if(ier/=0) call die(myname_,'deallocate(RRecvBuf)',ier) endif - endif - nullify(SendRout,RecvRout) end subroutine rearrange_ diff --git a/src/externals/mct/mct/m_Router.F90 b/src/externals/mct/mct/m_Router.F90 index 305d2d7e389f..f8788d608f79 100644 --- a/src/externals/mct/mct/m_Router.F90 +++ b/src/externals/mct/mct/m_Router.F90 @@ -268,6 +268,8 @@ subroutine initp_(inGSMap,inRGSMap,mycomm,Rout,name ) ! increasing. Instead, permute it to increasing and proceed. ! 07Sep12 - T. Craig - Replace a double loop with a single ! to improve speed for large proc and segment counts. +! 12Nov16 - P. Worley - eliminate iterations in nested +! loop that can be determined to be unnecessary !EOP ------------------------------------------------------------------- character(len=*),parameter :: myname_=myname//'::initp_' @@ -281,24 +283,30 @@ subroutine initp_(inGSMap,inRGSMap,mycomm,Rout,name ) integer :: my_left ! Left point in local segment (global memory) integer :: my_right ! Right point in local segment (global memory) + integer :: my_leftmost ! Leftmost point in local segments (global memory) + integer :: my_rightmost ! Rightmost point in local segments (global memory) integer :: r_left ! Left point in remote segment (global memory) integer :: r_right ! Right point in remote segment (global memory) + integer :: r_leftmost ! Leftmost point and rightmost point + integer :: r_rightmost ! in remote segments in given process (global memory) integer :: nsegs_overlap ! Number of segments that overlap between two procs integer :: ngseg, nlseg integer :: myseg, rseg - integer :: prev_right ! Rightmost local point in previous overlapped segment + integer :: rseg_leftbase, rseg_start + integer :: prev_right ! Rightmost local point in previous overlapped segment integer :: local_left, local_right integer,allocatable :: mygs_lb(:),mygs_ub(:),mygs_len(:),mygs_lstart(:) integer :: r_ngseg - integer :: r_max_nlseg ! max number of local segments in RGSMap integer,allocatable :: rgs_count(:),rgs_lb(:,:),rgs_ub(:,:) integer,allocatable :: nsegs_overlap_arr(:) integer :: overlap_left, overlap_right, overlap_diff integer :: proc, nprocs + integer :: feas_proc, feas_nprocs + integer,allocatable :: feas_procs(:), inv_feas_procs(:) integer :: max_rgs_count, max_overlap_segs type(GlobalSegMap) :: GSMap @@ -416,71 +424,109 @@ subroutine initp_(inGSMap,inRGSMap,mycomm,Rout,name ) call zeit_co(trim(tagname)) endif - !! -!! determine the segments in RGSMap that are local to each proc +!! determine the possibly overlapping segments +!! in RGSMap that are local to each proc !! - nprocs=ThisMCTWorld%nprocspid(othercomp) r_ngseg = GlobalSegMap_ngseg(RGSMap) - !! original size of rgs_lb()/ub() was (r_ngseg,nprocs) - !! at the cost of looping to compute it (within GlobalSegMap_max_nlseg), - !! reduced size to (r_max_nlseg,nprocs) - !! further reduction could be made by flattening it to one dimension - !! of size (r_ngseg) and allocating another array to index into it. - !! would not improve overall mem use unless this were also done for - !! tmpsegstart()/count() and possibly seg_starts()/lengths (the - !! latter would be a major change). + if (nlseg > 0) then + my_leftmost = mygs_lb(1) + my_rightmost = mygs_ub(nlseg) - if(present(name)) then - tagname='04'//name//'rloop' - call zeit_ci(trim(tagname)) - endif - r_max_nlseg = GlobalSegMap_max_nlseg(RGSMap) - - allocate( rgs_count(nprocs) , & - rgs_lb(r_max_nlseg,nprocs), rgs_ub(r_max_nlseg,nprocs), & - nsegs_overlap_arr(nprocs), stat=ier ) - if(ier/=0) call die(myname_,'allocate rgs, nsegs',ier) - -! tcraig, updated loop - rgs_count = 0 !! number of segments in RGSMap local to proc - - do i=1,r_ngseg - proc = RGSMap%pe_loc(i) + 1 -! if (proc < 1 .or. proc > nprocs) then -! write(stderr,*) myname_,"proc pe_loc error",i,proc -! call die(myname_,'pe_loc error',0) -! endif - rgs_count(proc) = rgs_count(proc) +1 - rgs_lb( rgs_count(proc) , proc )=RGSMap%start(i) - rgs_ub( rgs_count(proc) , proc )=RGSMap%start(i) + RGSMap%length(i) -1 - enddo +!! +!! count number of potentially overlapping remote segments +!! and which and how many processes hold these +!! + if(present(name)) then + tagname='04'//name//'rloop' + call zeit_ci(trim(tagname)) + endif - if(present(name)) then - call zeit_co(trim(tagname)) - endif + !! number of potentially overlapping segments in RGSMap local to proc + !! and mapping from processes that hold these to actual process id + allocate( rgs_count(nprocs), feas_procs(nprocs), & + inv_feas_procs(nprocs), stat=ier ) + if(ier/=0) call die(myname_,'allocate rgs_count, feas_procs',ier) + + rgs_count = 0 + do i=1,r_ngseg + r_left = RGSMap%start(i) + r_right = RGSMap%start(i) + RGSMap%length(i) - 1 + + if (.not. (my_rightmost < r_left .or. & ! potential overlap + my_leftmost > r_right ) ) then + proc = RGSMap%pe_loc(i) + 1 +! if (proc < 1 .or. proc > nprocs) then +! write(stderr,*) myname_,"proc pe_loc error",i,proc +! call die(myname_,'pe_loc error',0) +! endif + rgs_count(proc) = rgs_count(proc) + 1 + endif -!!! -!!! this is purely for error checking + enddo - if(present(name)) then - tagname='05'//name//'erchck' - call zeit_ci(trim(tagname)) - endif - do proc = 1, nprocs - if (rgs_count(proc) > r_max_nlseg) then - write(stderr,*) myname_,"overflow on rgs array",proc,rgs_count(proc) - call die(myname_,'overflow on rgs',0) + feas_nprocs = 0 + feas_procs = -1 + inv_feas_procs = -1 + do proc=1,nprocs + if (rgs_count(proc) > 0) then + feas_nprocs = feas_nprocs + 1 + feas_procs(feas_nprocs) = proc + inv_feas_procs(proc) = feas_nprocs + endif + enddo + +!! +!! build list of potentially overlapping remote segments +!! + !! original size of rgs_lb()/ub() was (r_ngseg,nprocs) + !! at the cost of looping to compute it (within GlobalSegMap_max_nlseg), + !! reduced size to (r_max_nlseg,nprocs) + !! then further reduced to (max_rgs_count,feas_nprocs) + + max_rgs_count=0 + do proc=1,nprocs + max_rgs_count = max( max_rgs_count, rgs_count(proc) ) + enddo + + allocate( rgs_lb(max_rgs_count,feas_nprocs), & + rgs_ub(max_rgs_count,feas_nprocs), & + nsegs_overlap_arr(feas_nprocs), stat=ier ) + if(ier/=0) call die(myname_,'allocate rgs, nsegs',ier) + + !! (note: redefining rgs_count to be indexed as 1:feas_nprocs + !! instead of as 1:nprocs) + rgs_count = 0 + do i=1,r_ngseg + r_left = RGSMap%start(i) + r_right = RGSMap%start(i) + RGSMap%length(i) -1 + + if (.not. (my_rightmost < r_left .or. & ! potential overlap + my_leftmost > r_right) ) then + proc = RGSMap%pe_loc(i) + 1 + feas_proc = inv_feas_procs(proc) + rgs_count(feas_proc) = rgs_count(feas_proc) + 1 + rgs_lb( rgs_count(feas_proc) , feas_proc ) = RGSMap%start(i) + rgs_ub( rgs_count(feas_proc) , feas_proc ) = RGSMap%start(i) + RGSMap%length(i) -1 + endif + + enddo + + deallocate(inv_feas_procs,stat=ier) + if(ier/=0) call die(myname_,'deallocate inv_feas_procs',ier) + + if(present(name)) then + call zeit_co(trim(tagname)) endif - enddo - if(present(name)) then - call zeit_co(trim(tagname)) - endif -!!! + else + + max_rgs_count = 0 + feas_nprocs = 0 + endif !!!!!!!!!!!!!!!!!! @@ -492,86 +538,97 @@ subroutine initp_(inGSMap,inRGSMap,mycomm,Rout,name ) tagname='06'//name//'loop2' call zeit_ci(trim(tagname)) endif - max_rgs_count=0 - do proc=1,nprocs - max_rgs_count = max( max_rgs_count, rgs_count(proc) ) - enddo max_overlap_segs = max(nlseg,max_rgs_count) - allocate(tmpsegcount(ThisMCTWorld%nprocspid(othercomp), max_overlap_segs),& - tmpsegstart(ThisMCTWorld%nprocspid(othercomp), max_overlap_segs),& - tmppe_list(ThisMCTWorld%nprocspid(othercomp)),stat=ier) + allocate(tmpsegcount(feas_nprocs, max_overlap_segs),& + tmpsegstart(feas_nprocs, max_overlap_segs),& + tmppe_list(feas_nprocs),stat=ier) if(ier/=0) & call die( myname_,'allocate tmpsegcount etc. size ', & - ThisMCTWorld%nprocspid(othercomp), & - ' by ',max_overlap_segs) + feas_nprocs, ' by ',max_overlap_segs) - - tmpsegcount=0 - tmpsegstart=0 + if (feas_nprocs > 0) then + tmpsegcount=0 + tmpsegstart=0 + endif count =0 maxsegcount=0 !!!!!!!!!!!!!!!!!! - - do proc = 1, nprocs + do feas_proc = 1, feas_nprocs nsegs_overlap = 0 - tmppe_list(proc) = .FALSE. ! no overlaps with proc yet + tmppe_list(feas_proc) = .FALSE. ! no overlaps with proc yet + + r_leftmost = rgs_lb(1,feas_proc) + r_rightmost = rgs_ub(rgs_count(feas_proc),feas_proc) + + rseg_leftbase = 0 + do myseg = 1, nlseg ! loop over local segs on 'myPID' + + my_left = mygs_lb(myseg) + my_right= mygs_ub(myseg) - if ( rgs_count(proc) > 0 ) then - do myseg = 1, nlseg ! loop over local segs on 'myPID' + ! determine whether any overlap + if (.not. (my_right < r_leftmost .or. & + my_left > r_rightmost) ) then - my_left = mygs_lb(myseg) - my_right= mygs_ub(myseg) + rseg_start = rseg_leftbase + 1 ! rseg loop index to start searching from - do rseg = 1, rgs_count(proc) ! loop over remote segs on 'proc' + ! loop over candidate overlapping remote segs on 'feas_proc' + do rseg = rseg_start, rgs_count(feas_proc) - r_left = rgs_lb(rseg,proc) - r_right = rgs_ub(rseg,proc) + r_right = rgs_ub(rseg,feas_proc) + if (r_right < my_left ) then ! to the left + rseg_leftbase = rseg ! remember to start to the right of + ! this for next myseg + cycle ! try the next remote segment + endif - if (.not. (my_right < r_left .or. & ! overlap - my_left > r_right) ) then + r_left = rgs_lb(rseg,feas_proc) + if (r_left > my_right) exit ! to the right, so no more segments + ! need to be examined - if (nsegs_overlap == 0) then ! first overlap w/this proc - count = count + 1 - tmppe_list(proc) = .TRUE. - prev_right = -9999 - else - prev_right = local_right - endif + ! otherwise, overlaps + if (nsegs_overlap == 0) then ! first overlap w/this proc + count = count + 1 + tmppe_list(feas_proc) = .TRUE. + prev_right = -9999 + else + prev_right = local_right + endif - overlap_left=max(my_left, r_left) - overlap_right=min(my_right, r_right) - overlap_diff= overlap_right - overlap_left + overlap_left=max(my_left, r_left) + overlap_right=min(my_right, r_right) + overlap_diff= overlap_right - overlap_left - local_left = mygs_lstart(myseg) + (overlap_left - my_left) - local_right = local_left + overlap_diff + local_left = mygs_lstart(myseg) + (overlap_left - my_left) + local_right = local_left + overlap_diff - ! non-contiguous w/prev one - if (local_left /= (prev_right+1) ) then - nsegs_overlap = nsegs_overlap + 1 - tmpsegstart(count, nsegs_overlap) = local_left - endif + ! non-contiguous w/prev one + if (local_left /= (prev_right+1) ) then + nsegs_overlap = nsegs_overlap + 1 + tmpsegstart(count, nsegs_overlap) = local_left + endif - tmpsegcount(count, nsegs_overlap) = & - tmpsegcount(count, nsegs_overlap) + overlap_diff + 1 + tmpsegcount(count, nsegs_overlap) = & + tmpsegcount(count, nsegs_overlap) + overlap_diff + 1 - endif enddo - enddo - endif - nsegs_overlap_arr(proc)=nsegs_overlap + endif + + enddo + + nsegs_overlap_arr(feas_proc)=nsegs_overlap enddo !! pull this out of the loop to vectorize - do proc=1,nprocs - maxsegcount=max(maxsegcount,nsegs_overlap_arr(proc)) + do feas_proc = 1, feas_nprocs + maxsegcount=max(maxsegcount,nsegs_overlap_arr(feas_proc)) enddo - if (maxsegcount > max_overlap_segs) & call die( myname_,'overran max_overlap_segs =', & max_overlap_segs, ' count = ',maxsegcount) @@ -581,11 +638,14 @@ subroutine initp_(inGSMap,inRGSMap,mycomm,Rout,name ) ! 'mysize =',mysize - deallocate( mygs_lb, mygs_ub, mygs_len, mygs_lstart, & - rgs_count, rgs_lb, rgs_ub, & - nsegs_overlap_arr, stat=ier) - if(ier/=0) call die(myname_,'deallocate mygs,rgs,nsegs',ier) + deallocate( mygs_lb, mygs_ub, mygs_len, mygs_lstart, stat=ier) + if(ier/=0) call die(myname_,'deallocate mygs,nsegs',ier) + if (nlseg > 0) then + deallocate( rgs_count, rgs_lb, rgs_ub, & + nsegs_overlap_arr, stat=ier) + if(ier/=0) call die(myname_,'deallocate p_rgs, nsegs',ier) + endif ! call shr_timer_stop(t_loop2) ! rml timers if(present(name)) then @@ -593,7 +653,6 @@ subroutine initp_(inGSMap,inRGSMap,mycomm,Rout,name ) endif - !. . . . . . . . . . . . . . . . . . . . . . . . @@ -631,43 +690,45 @@ subroutine initp_(inGSMap,inRGSMap,mycomm,Rout,name ) allocate(Rout%rp1(count),stat=ier) if(ier/=0) call die(myname_,'allocate(rp1)',ier) - - m=0 - do i=1,ThisMCTWorld%nprocspid(othercomp) - if(tmppe_list(i))then + do i=1,feas_nprocs + if(tmppe_list(i))then m=m+1 ! load processor rank in MCT_comm - Rout%pe_list(m)=ThisMCTWorld%idGprocid(othercomp,i-1) - endif - enddo + proc = feas_procs(i) + Rout%pe_list(m)=ThisMCTWorld%idGprocid(othercomp,proc-1) + endif + enddo - lmaxsize=0 - do i=1,count - totallength=0 - do j=1,maxsegcount - if(tmpsegcount(i,j) /= 0) then - Rout%num_segs(i)=j - Rout%seg_starts(i,j)=tmpsegstart(i,j) - Rout%seg_lengths(i,j)=tmpsegcount(i,j) - totallength=totallength+Rout%seg_lengths(i,j) - endif - enddo - Rout%locsize(i)=totallength - lmaxsize=MAX(lmaxsize,totallength) + lmaxsize=0 + do i=1,count + totallength=0 + do j=1,maxsegcount + if(tmpsegcount(i,j) /= 0) then + Rout%num_segs(i)=j + Rout%seg_starts(i,j)=tmpsegstart(i,j) + Rout%seg_lengths(i,j)=tmpsegcount(i,j) + totallength=totallength+Rout%seg_lengths(i,j) + endif enddo + Rout%locsize(i)=totallength + lmaxsize=MAX(lmaxsize,totallength) + enddo - Rout%maxsize=lmaxsize - Rout%lAvsize=mysize + Rout%maxsize=lmaxsize + Rout%lAvsize=mysize + if (nlseg > 0) then + deallocate(feas_procs,stat=ier) + if(ier/=0) call die(myname_,'deallocate feas_procs',ier) + endif deallocate(tmpsegstart,tmpsegcount,tmppe_list,stat=ier) - if(ier/=0) call die(myname_,'deallocate()',ier) + if(ier/=0) call die(myname_,'deallocate tmp',ier) call GlobalSegMap_clean(RGSMap) call GlobalSegMap_clean(GSMap) - if(present(name)) then call zeit_co(trim(tagname)) endif diff --git a/src/externals/mct/mct/m_SPMDutils.F90 b/src/externals/mct/mct/m_SPMDutils.F90 new file mode 100644 index 000000000000..d2bbd59cfa54 --- /dev/null +++ b/src/externals/mct/mct/m_SPMDutils.F90 @@ -0,0 +1,1148 @@ +!~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +! Math and Computer Science Division, Argonne National Laboratory ! +!----------------------------------------------------------------------- +! CVS $Id$ +! CVS $Name$ +!BOP ------------------------------------------------------------------- +! +! !MODULE: m_SPMDutils -- Communication operators to address performance +! issues for specific communication patterns +! +! !DESCRIPTION: +! This module provides the swapm equivalent to MPI_Alltoallv that +! has proven to be more robust with respect to performance than the +! MPI collective or the native MCT communication algorithms when the +! communication pattern is sparse and when load imbalance or send/receive +! asymmetry leads some processes to be flooded by unexpected messages. +! +! Based on algorithms implemented in CAM, but this version modelled after +! pio_spmd_utils.F90 in PIO1 +! +! !SEE ALSO: +! m_Rearranger +! +! +! !INTERFACE: + +! Disable the use of the MPI ready send protocol by default, to +! address recurrent issues with poor performance or incorrect +! functionality in MPI libraries. When support is known to be robust, +! or for experimentation, can be re-enabled by defining the CPP token +! _USE_MPI_RSEND during the build process. +! +#ifndef _USE_MPI_RSEND +#define MPI_RSEND MPI_SEND +#define mpi_rsend mpi_send +#define MPI_IRSEND MPI_ISEND +#define mpi_irsend mpi_isend +#endif + + module m_SPMDutils + + implicit none + + private ! except + +! !PUBLIC MEMBER FUNCTIONS: + + public :: m_swapm_int ! swapm alternative to MPI_AlltoallV for integer data + public :: m_swapm_FP ! swapm alternative to MPI_AlltoallV for FP data + +! !DEFINED PARAMETERS: + + character(len=*), parameter :: myname='MCT::m_SPMDutils' + +! !REVISION HISTORY: +! 28Sep16 - P. Worley - initial prototype +!EOP ___________________________________________________________________ + + contains + +!======================================================================== +! + + integer function pair(np,p,k) + + integer np,p,k,q + q = ieor(p,k) + if(q.gt.np-1) then + pair = -1 + else + pair = q + endif + return + + end function pair + +! +!======================================================================== +! + + integer function ceil2(n) + integer n,p + p=1 + do while(p.lt.n) + p=p*2 + enddo + ceil2=p + return + end function ceil2 + +! +!======================================================================== +! + subroutine m_swapm_int ( nprocs, mytask, & + sndbuf, sbuf_siz, sndlths, sdispls, stypes, & + rcvbuf, rbuf_siz, rcvlths, rdispls, rtypes, & + comm, comm_hs, comm_isend, comm_maxreq ) + +!----------------------------------------------------------------------- +! +!> Purpose: +!! Reduced version of original swapm (for swap of multiple messages +!! using MPI point-to-point routines), more efficiently implementing a +!! subset of the swap protocols. +!! +!! Method: +!! comm_protocol: +!! comm_isend == .true.: use nonblocking send, else use blocking send +!! comm_hs == .true.: use handshaking protocol +!! comm_maxreq: +!! =-1,0: do not limit number of outstanding send/receive requests +!! >0: do not allow more than min(comm_maxreq, steps) outstanding +!! nonblocking send requests or nonblocking receive requests +!! +!! Author of original version: P. Worley +!! Ported from PIO1: P. Worley, September 2016 +!< +!----------------------------------------------------------------------- +!----------------------------------------------------------------------- + use m_mpif90 + use m_realkinds, only : FP + use m_die, only : MP_perr_die + + implicit none +!---------------------------Input arguments-------------------------- +! + integer, intent(in) :: nprocs ! size of communicator + integer, intent(in) :: mytask ! MPI task id with communicator + integer, intent(in) :: sbuf_siz ! size of send buffer + integer, intent(in) :: rbuf_siz ! size of receive buffer + + integer, intent(in) :: sndlths(0:nprocs-1)! length of outgoing message + integer, intent(in) :: sdispls(0:nprocs-1)! offset from beginning of send + ! buffer where outgoing messages + ! should be sent from + integer, intent(in) :: stypes(0:nprocs-1) ! MPI data types + integer, intent(in) :: rcvlths(0:nprocs-1)! length of incoming messages + integer, intent(in) :: rdispls(0:nprocs-1)! offset from beginning of receive + ! buffer where incoming messages + ! should be placed + integer, intent(in) :: rtypes(0:nprocs-1) ! MPI data types + integer, intent(in) :: sndbuf(sbuf_siz) ! outgoing message buffer + + integer, intent(in) :: comm ! MPI communicator + logical, intent(in) :: comm_hs ! handshaking protocol? + logical, intent(in) :: comm_isend ! nonblocking send protocol? + integer, intent(in) :: comm_maxreq ! maximum number of outstanding + ! nonblocking requests + +!---------------------------Output arguments-------------------------- +! + integer, intent(out) :: rcvbuf(rbuf_siz) ! incoming message buffer + +! +!---------------------------Local workspace------------------------------------------- +! + character(len=*), parameter :: subName=myname//'::m_swapm_int' + + integer :: steps ! number of swaps to initiate + integer :: swapids(nprocs) ! MPI process id of swap partners + integer :: p ! process index + integer :: istep ! loop index + integer :: tag ! MPI message tag + integer :: offset_t ! MPI message tag offset, for addressing + ! message conflict bug (if necessary) + integer :: offset_s ! index of message beginning in + ! send buffer + integer :: offset_r ! index of message beginning in + ! receive buffer + integer :: sndids(nprocs) ! send request ids + integer :: rcvids(nprocs) ! receive request ids + integer :: hs_rcvids(nprocs) ! handshake receive request ids + + integer :: maxreq, maxreqh ! maximum number of outstanding + ! nonblocking requests (and half) + integer :: hs ! handshake variable + integer :: rstep ! "receive" step index + + logical :: handshake, sendd ! protocol option flags + + integer :: ier ! return error status + integer :: status(MP_STATUS_SIZE) ! MPI status +! +!------------------------------------------------------------------------------------- +! +#ifdef _NO_M_SWAPM_TAG_OFFSET + offset_t = 0 +#else + offset_t = nprocs +#endif +! + ! if necessary, send to self + if (sndlths(mytask) > 0) then + tag = mytask + offset_t + + offset_r = rdispls(mytask)+1 + call mpi_irecv( rcvbuf(offset_r), rcvlths(mytask), rtypes(mytask), & + mytask, tag, comm, rcvids(1), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + + offset_s = sdispls(mytask)+1 + call mpi_send( sndbuf(offset_s), sndlths(mytask), stypes(mytask), & + mytask, tag, comm, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_SEND',ier) + + call mpi_wait( rcvids(1), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + + ! calculate swap partners and communication ordering + steps = 0 + do istep=1,ceil2(nprocs)-1 + p = pair(nprocs,istep,mytask) + if (p >= 0) then + if (sndlths(p) > 0 .or. rcvlths(p) > 0) then + steps = steps + 1 + swapids(steps) = p + end if + end if + end do + + if (steps .eq. 0) return + + ! identify communication protocol + if (comm_isend) then + sendd = .false. + else + sendd = .true. + endif + handshake = comm_hs + + ! identify maximum number of outstanding nonblocking requests to permit + if (steps .eq. 1) then + maxreq = 1 + maxreqh = 1 + else + if (comm_maxreq >= -1) then + maxreq = comm_maxreq + else + maxreq = steps + endif + + if ((maxreq .le. steps) .and. (maxreq > 0)) then + if (maxreq > 1) then + maxreqh = maxreq/2 + else + maxreq = 2 + maxreqh = 1 + endif + else + maxreq = steps + maxreqh = steps + endif + endif + +! Four protocol options: +! (1) handshaking + blocking sends + if ((handshake) .and. (sendd)) then + + ! Initialize hs variable + hs = 1 + + ! Post initial handshake receive requests + do istep=1,maxreq + p = swapids(istep) + if (sndlths(p) > 0) then + tag = mytask + offset_t + call mpi_irecv( hs, 1, MP_INTEGER, p, tag, comm, & + hs_rcvids(istep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + endif + enddo + + ! Post initial receive requests + do istep=1,maxreq + p = swapids(istep) + if (rcvlths(p) > 0) then + tag = p + offset_t + + offset_r = rdispls(p)+1 + call mpi_irecv( rcvbuf(offset_r), rcvlths(p), rtypes(p), & + p, tag, comm, rcvids(istep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + + call mpi_send ( hs, 1, MP_INTEGER, p, tag, comm, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_SEND',ier) + endif + enddo + rstep = maxreq + + ! Send (and start receiving) data + do istep=1,steps + p = swapids(istep) + + ! Submit new rsend request + if (sndlths(p) > 0) then + tag = mytask + offset_t + + offset_s = sdispls(p)+1 + call mpi_wait ( hs_rcvids(istep), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + + call mpi_rsend ( sndbuf(offset_s), sndlths(p), stypes(p), & + p, tag, comm, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_RSEND',ier) + endif + + if (istep > maxreqh) then + + ! Wait for oldest irecv request to complete + p = swapids(istep-maxreqh) + if (rcvlths(p) > 0) then + call mpi_wait( rcvids(istep-maxreqh), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + + if (rstep < steps) then + rstep = rstep + 1 + p = swapids(rstep) + + ! Submit a new handshake irecv request + if (sndlths(p) > 0) then + tag = mytask + offset_t + call mpi_irecv( hs, 1, MP_INTEGER, p, tag, comm, & + hs_rcvids(rstep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + endif + + ! Submit a new irecv request + if (rcvlths(p) > 0) then + tag = p + offset_t + + offset_r = rdispls(p)+1 + call mpi_irecv( rcvbuf(offset_r), rcvlths(p), rtypes(p), & + p, tag, comm, rcvids(rstep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + + call mpi_send ( hs, 1, MP_INTEGER, p, tag, comm, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_SEND',ier) + endif + endif + + endif +! + enddo + + ! wait for rest of receive requests to complete + do istep=steps-maxreqh+1,steps + p = swapids(istep) + if (rcvlths(p) > 0) then + call mpi_wait( rcvids(istep), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + enddo + +! (2) handshaking + nonblocking sends + elseif ((handshake) .and. (.not. sendd)) then + + ! Initialize hs variable + hs = 1 + + ! Post initial handshake receive requests + do istep=1,maxreq + p = swapids(istep) + if (sndlths(p) > 0) then + tag = mytask + offset_t + call mpi_irecv( hs, 1, MP_INTEGER, p, tag, comm, & + hs_rcvids(istep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + endif + enddo + + ! Post initial receive requests + do istep=1,maxreq + p = swapids(istep) + if (rcvlths(p) > 0) then + tag = p + offset_t + + offset_r = rdispls(p)+1 + call mpi_irecv( rcvbuf(offset_r), rcvlths(p), rtypes(p), & + p, tag, comm, rcvids(istep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + + call mpi_send ( hs, 1, MP_INTEGER, p, tag, comm, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_SEND',ier) + endif + enddo + rstep = maxreq + + ! Send (and start receiving) data + do istep=1,steps + p = swapids(istep) + + ! Submit new irsend request + if (sndlths(p) > 0) then + tag = mytask + offset_t + + offset_s = sdispls(p)+1 + call mpi_wait ( hs_rcvids(istep), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + + call mpi_irsend( sndbuf(offset_s), sndlths(p), stypes(p), & + p, tag, comm, sndids(istep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRSEND',ier) + endif + + if (istep > maxreqh) then + + ! Wait for oldest irecv request to complete + p = swapids(istep-maxreqh) + if (rcvlths(p) > 0) then + call mpi_wait( rcvids(istep-maxreqh), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + + if (rstep < steps) then + rstep = rstep + 1 + p = swapids(rstep) + + ! Submit a new handshake irecv request + if (sndlths(p) > 0) then + tag = mytask + offset_t + call mpi_irecv( hs, 1, MP_INTEGER, p, tag, comm, & + hs_rcvids(rstep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + endif + + ! Submit a new irecv request + if (rcvlths(p) > 0) then + tag = p + offset_t + + offset_r = rdispls(p)+1 + call mpi_irecv( rcvbuf(offset_r), rcvlths(p), rtypes(p), & + p, tag, comm, rcvids(rstep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + + call mpi_send ( hs, 1, MP_INTEGER, p, tag, comm, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_SEND',ier) + endif + endif + + ! Wait for outstanding i(r)send request to complete + p = swapids(istep-maxreqh) + if (sndlths(p) > 0) then + call mpi_wait( sndids(istep-maxreqh), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + + endif + + enddo + + ! wait for rest of send and receive requests to complete + do istep=steps-maxreqh+1,steps + p = swapids(istep) + if (rcvlths(p) > 0) then + call mpi_wait( rcvids(istep), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + if (sndlths(p) > 0) then + call mpi_wait( sndids(istep), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + enddo + +! (3) no handshaking + blocking sends + elseif ((.not. handshake) .and. (sendd)) then + + ! Post receive requests + do istep=1,maxreq + p = swapids(istep) + if (rcvlths(p) > 0) then + tag = p + offset_t + + offset_r = rdispls(p)+1 + call mpi_irecv( rcvbuf(offset_r), rcvlths(p), rtypes(p), & + p, tag, comm, rcvids(istep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + endif + enddo + rstep = maxreq + + ! Send (and start receiving) data + do istep=1,steps + p = swapids(istep) + + ! Submit new send request + if (sndlths(p) > 0) then + tag = mytask + offset_t + + offset_s = sdispls(p)+1 + call mpi_send( sndbuf(offset_s), sndlths(p), stypes(p), & + p, tag, comm, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_SEND',ier) + endif + + if (istep > maxreqh) then + + ! Wait for oldest irecv request to complete + p = swapids(istep-maxreqh) + if (rcvlths(p) > 0) then + call mpi_wait( rcvids(istep-maxreqh), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + + ! Submit a new irecv request + if (rstep < steps) then + rstep = rstep + 1 + p = swapids(rstep) + if (rcvlths(p) > 0) then + tag = p + offset_t + + offset_r = rdispls(p)+1 + call mpi_irecv( rcvbuf(offset_r), rcvlths(p), rtypes(p), & + p, tag, comm, rcvids(rstep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + endif + endif + + endif + + enddo + + ! wait for rest of send and receive requests to complete + do istep=steps-maxreqh+1,steps + p = swapids(istep) + if (rcvlths(p) > 0) then + call mpi_wait( rcvids(istep), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + enddo + +! (4) no handshaking + nonblocking sends + elseif ((.not. handshake) .and. (.not. sendd)) then + + ! Post receive requests + do istep=1,maxreq + p = swapids(istep) + if (rcvlths(p) > 0) then + tag = p + offset_t + + offset_r = rdispls(p)+1 + call mpi_irecv( rcvbuf(offset_r), rcvlths(p), rtypes(p), & + p, tag, comm, rcvids(istep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + endif + enddo + rstep = maxreq + + ! Send (and start receiving) data + do istep=1,steps + p = swapids(istep) + + ! Submit new isend request + if (sndlths(p) > 0) then + tag = mytask + offset_t + + offset_s = sdispls(p)+1 + call mpi_isend( sndbuf(offset_s), sndlths(p), stypes(p), & + p, tag, comm, sndids(istep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_ISEND',ier) + endif + + if (istep > maxreqh) then + + ! Wait for oldest irecv request to complete + p = swapids(istep-maxreqh) + if (rcvlths(p) > 0) then + call mpi_wait( rcvids(istep-maxreqh), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + + ! Submit a new irecv request + if (rstep < steps) then + rstep = rstep + 1 + p = swapids(rstep) + if (rcvlths(p) > 0) then + tag = p + offset_t + + offset_r = rdispls(p)+1 + call mpi_irecv( rcvbuf(offset_r), rcvlths(p), rtypes(p), & + p, tag, comm, rcvids(rstep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + endif + endif + + ! Wait for outstanding i(r)send request to complete + p = swapids(istep-maxreqh) + if (sndlths(p) > 0) then + call mpi_wait( sndids(istep-maxreqh), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + + endif + + enddo + + ! wait for rest of send and receive requests to complete + do istep=steps-maxreqh+1,steps + p = swapids(istep) + if (rcvlths(p) > 0) then + call mpi_wait( rcvids(istep), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + if (sndlths(p) > 0) then + call mpi_wait( sndids(istep), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + enddo + + endif + + return + + end subroutine m_swapm_int + +! +!======================================================================== +! + subroutine m_swapm_FP ( nprocs, mytask, & + sndbuf, sbuf_siz, sndlths, sdispls, stypes, & + rcvbuf, rbuf_siz, rcvlths, rdispls, rtypes, & + comm, comm_hs, comm_isend, comm_maxreq ) + +!----------------------------------------------------------------------- +! +!> Purpose: +!! Reduced version of original swapm (for swap of multiple messages +!! using MPI point-to-point routines), more efficiently implementing a +!! subset of the swap protocols. +!! +!! Method: +!! comm_protocol: +!! comm_isend == .true.: use nonblocking send, else use blocking send +!! comm_hs == .true.: use handshaking protocol +!! comm_maxreq: +!! =-1,0: do not limit number of outstanding send/receive requests +!! >0: do not allow more than min(comm_maxreq, steps) outstanding +!! nonblocking send requests or nonblocking receive requests +!! +!! Author of original version: P. Worley +!! Ported from PIO1: P. Worley, September 2016 +!< +!----------------------------------------------------------------------- +!----------------------------------------------------------------------- + use m_mpif90 + use m_realkinds, only : FP + use m_die, only : MP_perr_die + + implicit none +!---------------------------Input arguments-------------------------- +! + integer, intent(in) :: nprocs ! size of communicator + integer, intent(in) :: mytask ! MPI task id with communicator + integer, intent(in) :: sbuf_siz ! size of send buffer + integer, intent(in) :: rbuf_siz ! size of receive buffer + + integer, intent(in) :: sndlths(0:nprocs-1)! length of outgoing message + integer, intent(in) :: sdispls(0:nprocs-1)! offset from beginning of send + ! buffer where outgoing messages + ! should be sent from + integer, intent(in) :: stypes(0:nprocs-1) ! MPI data types + integer, intent(in) :: rcvlths(0:nprocs-1)! length of incoming messages + integer, intent(in) :: rdispls(0:nprocs-1)! offset from beginning of receive + ! buffer where incoming messages + ! should be placed + integer, intent(in) :: rtypes(0:nprocs-1) ! MPI data types + real(FP),intent(in) :: sndbuf(sbuf_siz) ! outgoing message buffer + + integer, intent(in) :: comm ! MPI communicator + logical, intent(in) :: comm_hs ! handshaking protocol? + logical, intent(in) :: comm_isend ! nonblocking send protocol? + integer, intent(in) :: comm_maxreq ! maximum number of outstanding + ! nonblocking requests + +!---------------------------Output arguments-------------------------- +! + real(FP), intent(out) :: rcvbuf(rbuf_siz) ! incoming message buffer + +! +!---------------------------Local workspace------------------------------------------- +! + character(len=*), parameter :: subName=myname//'::m_swapm_FP' + + integer :: steps ! number of swaps to initiate + integer :: swapids(nprocs) ! MPI process id of swap partners + integer :: p ! process index + integer :: istep ! loop index + integer :: tag ! MPI message tag + integer :: offset_t ! MPI message tag offset, for addressing + ! message conflict bug (if necessary) + integer :: offset_s ! index of message beginning in + ! send buffer + integer :: offset_r ! index of message beginning in + ! receive buffer + integer :: sndids(nprocs) ! send request ids + integer :: rcvids(nprocs) ! receive request ids + integer :: hs_rcvids(nprocs) ! handshake receive request ids + + integer :: maxreq, maxreqh ! maximum number of outstanding + ! nonblocking requests (and half) + integer :: hs ! handshake variable + integer :: rstep ! "receive" step index + + logical :: handshake, sendd ! protocol option flags + + integer :: ier ! return error status + integer :: status(MP_STATUS_SIZE) ! MPI status +! +!------------------------------------------------------------------------------------- +! +#ifdef _NO_M_SWAPM_TAG_OFFSET + offset_t = 0 +#else + offset_t = nprocs +#endif +! + ! if necessary, send to self + if (sndlths(mytask) > 0) then + tag = mytask + offset_t + + offset_r = rdispls(mytask)+1 + call mpi_irecv( rcvbuf(offset_r), rcvlths(mytask), rtypes(mytask), & + mytask, tag, comm, rcvids(1), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + + offset_s = sdispls(mytask)+1 + call mpi_send( sndbuf(offset_s), sndlths(mytask), stypes(mytask), & + mytask, tag, comm, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_SEND',ier) + + call mpi_wait( rcvids(1), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + + ! calculate swap partners and communication ordering + steps = 0 + do istep=1,ceil2(nprocs)-1 + p = pair(nprocs,istep,mytask) + if (p >= 0) then + if (sndlths(p) > 0 .or. rcvlths(p) > 0) then + steps = steps + 1 + swapids(steps) = p + end if + end if + end do + + if (steps .eq. 0) return + + ! identify communication protocol + if (comm_isend) then + sendd = .false. + else + sendd = .true. + endif + handshake = comm_hs + + ! identify maximum number of outstanding nonblocking requests to permit + if (steps .eq. 1) then + maxreq = 1 + maxreqh = 1 + else + if (comm_maxreq >= -1) then + maxreq = comm_maxreq + else + maxreq = steps + endif + + if ((maxreq .le. steps) .and. (maxreq > 0)) then + if (maxreq > 1) then + maxreqh = maxreq/2 + else + maxreq = 2 + maxreqh = 1 + endif + else + maxreq = steps + maxreqh = steps + endif + endif + +! Four protocol options: +! (1) handshaking + blocking sends + if ((handshake) .and. (sendd)) then + + ! Initialize hs variable + hs = 1 + + ! Post initial handshake receive requests + do istep=1,maxreq + p = swapids(istep) + if (sndlths(p) > 0) then + tag = mytask + offset_t + call mpi_irecv( hs, 1, MP_INTEGER, p, tag, comm, & + hs_rcvids(istep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + endif + enddo + + ! Post initial receive requests + do istep=1,maxreq + p = swapids(istep) + if (rcvlths(p) > 0) then + tag = p + offset_t + + offset_r = rdispls(p)+1 + call mpi_irecv( rcvbuf(offset_r), rcvlths(p), rtypes(p), & + p, tag, comm, rcvids(istep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + + call mpi_send ( hs, 1, MP_INTEGER, p, tag, comm, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_SEND',ier) + endif + enddo + rstep = maxreq + + ! Send (and start receiving) data + do istep=1,steps + p = swapids(istep) + + ! Submit new rsend request + if (sndlths(p) > 0) then + tag = mytask + offset_t + + offset_s = sdispls(p)+1 + call mpi_wait ( hs_rcvids(istep), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + + call mpi_rsend ( sndbuf(offset_s), sndlths(p), stypes(p), & + p, tag, comm, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_RSEND',ier) + endif + + if (istep > maxreqh) then + + ! Wait for oldest irecv request to complete + p = swapids(istep-maxreqh) + if (rcvlths(p) > 0) then + call mpi_wait( rcvids(istep-maxreqh), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + + if (rstep < steps) then + rstep = rstep + 1 + p = swapids(rstep) + + ! Submit a new handshake irecv request + if (sndlths(p) > 0) then + tag = mytask + offset_t + call mpi_irecv( hs, 1, MP_INTEGER, p, tag, comm, & + hs_rcvids(rstep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + endif + + ! Submit a new irecv request + if (rcvlths(p) > 0) then + tag = p + offset_t + + offset_r = rdispls(p)+1 + call mpi_irecv( rcvbuf(offset_r), rcvlths(p), rtypes(p), & + p, tag, comm, rcvids(rstep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + + call mpi_send ( hs, 1, MP_INTEGER, p, tag, comm, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_SEND',ier) + endif + endif + + endif +! + enddo + + ! wait for rest of receive requests to complete + do istep=steps-maxreqh+1,steps + p = swapids(istep) + if (rcvlths(p) > 0) then + call mpi_wait( rcvids(istep), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + enddo + +! (2) handshaking + nonblocking sends + elseif ((handshake) .and. (.not. sendd)) then + + ! Initialize hs variable + hs = 1 + + ! Post initial handshake receive requests + do istep=1,maxreq + p = swapids(istep) + if (sndlths(p) > 0) then + tag = mytask + offset_t + call mpi_irecv( hs, 1, MP_INTEGER, p, tag, comm, & + hs_rcvids(istep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + endif + enddo + + ! Post initial receive requests + do istep=1,maxreq + p = swapids(istep) + if (rcvlths(p) > 0) then + tag = p + offset_t + + offset_r = rdispls(p)+1 + call mpi_irecv( rcvbuf(offset_r), rcvlths(p), rtypes(p), & + p, tag, comm, rcvids(istep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + + call mpi_send ( hs, 1, MP_INTEGER, p, tag, comm, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_SEND',ier) + endif + enddo + rstep = maxreq + + ! Send (and start receiving) data + do istep=1,steps + p = swapids(istep) + + ! Submit new irsend request + if (sndlths(p) > 0) then + tag = mytask + offset_t + + offset_s = sdispls(p)+1 + call mpi_wait ( hs_rcvids(istep), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + + call mpi_irsend( sndbuf(offset_s), sndlths(p), stypes(p), & + p, tag, comm, sndids(istep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRSEND',ier) + endif + + if (istep > maxreqh) then + + ! Wait for oldest irecv request to complete + p = swapids(istep-maxreqh) + if (rcvlths(p) > 0) then + call mpi_wait( rcvids(istep-maxreqh), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + + if (rstep < steps) then + rstep = rstep + 1 + p = swapids(rstep) + + ! Submit a new handshake irecv request + if (sndlths(p) > 0) then + tag = mytask + offset_t + call mpi_irecv( hs, 1, MP_INTEGER, p, tag, comm, & + hs_rcvids(rstep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + endif + + ! Submit a new irecv request + if (rcvlths(p) > 0) then + tag = p + offset_t + + offset_r = rdispls(p)+1 + call mpi_irecv( rcvbuf(offset_r), rcvlths(p), rtypes(p), & + p, tag, comm, rcvids(rstep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + + call mpi_send ( hs, 1, MP_INTEGER, p, tag, comm, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_SEND',ier) + endif + endif + + ! Wait for outstanding i(r)send request to complete + p = swapids(istep-maxreqh) + if (sndlths(p) > 0) then + call mpi_wait( sndids(istep-maxreqh), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + + endif + + enddo + + ! wait for rest of send and receive requests to complete + do istep=steps-maxreqh+1,steps + p = swapids(istep) + if (rcvlths(p) > 0) then + call mpi_wait( rcvids(istep), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + if (sndlths(p) > 0) then + call mpi_wait( sndids(istep), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + enddo + +! (3) no handshaking + blocking sends + elseif ((.not. handshake) .and. (sendd)) then + + ! Post receive requests + do istep=1,maxreq + p = swapids(istep) + if (rcvlths(p) > 0) then + tag = p + offset_t + + offset_r = rdispls(p)+1 + call mpi_irecv( rcvbuf(offset_r), rcvlths(p), rtypes(p), & + p, tag, comm, rcvids(istep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + endif + enddo + rstep = maxreq + + ! Send (and start receiving) data + do istep=1,steps + p = swapids(istep) + + ! Submit new send request + if (sndlths(p) > 0) then + tag = mytask + offset_t + + offset_s = sdispls(p)+1 + call mpi_send( sndbuf(offset_s), sndlths(p), stypes(p), & + p, tag, comm, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_SEND',ier) + endif + + if (istep > maxreqh) then + + ! Wait for oldest irecv request to complete + p = swapids(istep-maxreqh) + if (rcvlths(p) > 0) then + call mpi_wait( rcvids(istep-maxreqh), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + + ! Submit a new irecv request + if (rstep < steps) then + rstep = rstep + 1 + p = swapids(rstep) + if (rcvlths(p) > 0) then + tag = p + offset_t + + offset_r = rdispls(p)+1 + call mpi_irecv( rcvbuf(offset_r), rcvlths(p), rtypes(p), & + p, tag, comm, rcvids(rstep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + endif + endif + + endif + + enddo + + ! wait for rest of send and receive requests to complete + do istep=steps-maxreqh+1,steps + p = swapids(istep) + if (rcvlths(p) > 0) then + call mpi_wait( rcvids(istep), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + enddo + +! (4) no handshaking + nonblocking sends + elseif ((.not. handshake) .and. (.not. sendd)) then + + ! Post receive requests + do istep=1,maxreq + p = swapids(istep) + if (rcvlths(p) > 0) then + tag = p + offset_t + + offset_r = rdispls(p)+1 + call mpi_irecv( rcvbuf(offset_r), rcvlths(p), rtypes(p), & + p, tag, comm, rcvids(istep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + endif + enddo + rstep = maxreq + + ! Send (and start receiving) data + do istep=1,steps + p = swapids(istep) + + ! Submit new isend request + if (sndlths(p) > 0) then + tag = mytask + offset_t + + offset_s = sdispls(p)+1 + call mpi_isend( sndbuf(offset_s), sndlths(p), stypes(p), & + p, tag, comm, sndids(istep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_ISEND',ier) + endif + + if (istep > maxreqh) then + + ! Wait for oldest irecv request to complete + p = swapids(istep-maxreqh) + if (rcvlths(p) > 0) then + call mpi_wait( rcvids(istep-maxreqh), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + + ! Submit a new irecv request + if (rstep < steps) then + rstep = rstep + 1 + p = swapids(rstep) + if (rcvlths(p) > 0) then + tag = p + offset_t + + offset_r = rdispls(p)+1 + call mpi_irecv( rcvbuf(offset_r), rcvlths(p), rtypes(p), & + p, tag, comm, rcvids(rstep), ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_IRECV',ier) + endif + endif + + ! Wait for outstanding i(r)send request to complete + p = swapids(istep-maxreqh) + if (sndlths(p) > 0) then + call mpi_wait( sndids(istep-maxreqh), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + + endif + + enddo + + ! wait for rest of send and receive requests to complete + do istep=steps-maxreqh+1,steps + p = swapids(istep) + if (rcvlths(p) > 0) then + call mpi_wait( rcvids(istep), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + if (sndlths(p) > 0) then + call mpi_wait( sndids(istep), status, ier ) + if(ier /= 0) call MP_perr_die(subName,'MPI_WAIT',ier) + endif + enddo + + endif + + return + + end subroutine m_swapm_FP + +end module m_SPMDutils + + + + + diff --git a/src/externals/mct/mpeu/m_FcComms.F90 b/src/externals/mct/mpeu/m_FcComms.F90 index bdd76ea95ec9..0bd675c2b75d 100644 --- a/src/externals/mct/mpeu/m_FcComms.F90 +++ b/src/externals/mct/mpeu/m_FcComms.F90 @@ -13,10 +13,13 @@ ! ! !INTERFACE: ! -! Workaround for performance issue with rsend on cray systems with -! gemini interconnect +! Disable the use of the MPI ready send protocol by default, to +! address recurrent issues with poor performance or incorrect +! functionality in MPI libraries. When support is known to be robust, +! or for experimentation, can be re-enabled by defining the CPP token +! _USE_MPI_RSEND during the build process. ! -#ifdef _NO_MPI_RSEND +#ifndef _USE_MPI_RSEND #define MPI_RSEND MPI_SEND #define mpi_rsend mpi_send #define MPI_IRSEND MPI_ISEND diff --git a/src/externals/mct/mpi-serial/Makefile b/src/externals/mct/mpi-serial/Makefile index 024d9f862415..0b1ca1db6c2d 100644 --- a/src/externals/mct/mpi-serial/Makefile +++ b/src/externals/mct/mpi-serial/Makefile @@ -18,6 +18,8 @@ SRCS_C = mpi.c \ list.c \ handles.c \ comm.c \ + error.c \ + ic_merge.c \ group.c \ time.c \ pack.c \ diff --git a/src/externals/mct/mpi-serial/NOTES b/src/externals/mct/mpi-serial/NOTES deleted file mode 100644 index 7387f3f8193f..000000000000 --- a/src/externals/mct/mpi-serial/NOTES +++ /dev/null @@ -1,46 +0,0 @@ - -cart.c - new file, cleaned -collective.c - done -comm.c - done -copy.c - new file, cleaned -getcount.c - new file, cleaned -group.c - copied over git updates -handles.c - nothing to merge, svn updates OK -list.c - svn OK -mpi.c - merged git in -mpi.h - merged git but need to fix some types -fort.F90 - merged git in -mpif.master.h -> mpif.h NOTE: need to add types in type.h,c -Makefile - had to uncomment some things to get mpif.h to build -op.c - new file -pack.c - format more like git, has new code -probe.c - new file -recv.c - done -req.c - merged in git -send.c - merged in git - -time.c - no changes -type.c - new file -type_const.c - new file - - - -*** NOTES - -*** need to look at Request struct, add a type - so that send.c and recv.c can use distinct send and recv types - -*** need to add types in mpi.h and mpif.master.h to type.{c,h} - - -*** need to look at config and how it sets _RSIZE_ and _DSIZE_ - - previously: MCT's configure set env FORT_SIZE - choose mpif.h from mpif.$FORT_SIZE.h - - now: FORT_SIZE ignored - configures sets FSIZE_REAL and FSIZE_DPRECISION based on mpi-serial's configure (default 4/8) - does not need mpif.master.h template -> mpif.$FORT_SIZE.h - - so... did i clobber good value of mpif.h ? - diff --git a/src/externals/mct/mpi-serial/README b/src/externals/mct/mpi-serial/README index 20e377602bc3..aaa728501f9b 100644 --- a/src/externals/mct/mpi-serial/README +++ b/src/externals/mct/mpi-serial/README @@ -64,6 +64,9 @@ List of MPI calls supported mpi_abort mpi_error_string mpi_initialized + mpi_get_processor_name + mpi_get_library_version + mpi_wtime comm and group ops mpi_comm_free @@ -74,17 +77,36 @@ List of MPI calls supported mpi_comm_split mpi_comm_group mpi_group_incl + mpi_group_range_incl + mpi_group_union + mpi_group_intersection + mpi_group_difference + mpi_group_translate_ranks mpi_group_free + mpi_cart_create + mpi_cart_coords + mpi_dims_create send/receive ops mpi_irecv mpi_recv mpi_test + mpi_testany + mpi_testall + mpi_testsome mpi_wait mpi_waitany mpi_waitall + mpi_waitsome mpi_isend mpi_send + mpi_ssend + mpi_rsend + mpi_irsend + mpi_sendrecv + mpi_iprobe + mpi_probe + mpi_request_free collective operations mpi_barrier @@ -92,11 +114,27 @@ List of MPI calls supported mpi_gather mpi_gatherv mpi_allgather + mpi_scatter mpi_scatterv mpi_reduce mpi_allreduce - - + mpi_reduce_scatter + mpi_scan + mpi_alltoall + mpi_alltoallv + mpi_alltoallw + mpi_op_create + mpi_op_free + + data types and info objects + mpi_get_count + mpi_get_elements + mpi_pack + mpi_pack_size + mpi_unpack + mpi_info_create + mpi_info_set + mpi_info_free ----- EOF diff --git a/src/externals/mct/mpi-serial/collective.c b/src/externals/mct/mpi-serial/collective.c index cead061130f0..9a9736b4b649 100644 --- a/src/externals/mct/mpi-serial/collective.c +++ b/src/externals/mct/mpi-serial/collective.c @@ -488,27 +488,6 @@ int MPI_Alltoallw(void *sendbuf, int *sendcounts, } -/*********/ - - -FC_FUNC( mpi_op_create , MPI_OP_CREATE ) - ( void *function, int *commute, int *op, int *ierror ) -{ - *ierror=MPI_Op_create(function,*commute,op); -} - - - -int MPI_Op_create(MPI_User_function *function, int commute, MPI_Op *op) -{ - *op=MPI_OP_NULL; - - return(MPI_SUCCESS); - -} - - - /*********/ diff --git a/src/externals/mct/mpi-serial/error.c b/src/externals/mct/mpi-serial/error.c new file mode 100644 index 000000000000..d26cfd164f91 --- /dev/null +++ b/src/externals/mct/mpi-serial/error.c @@ -0,0 +1,13 @@ + +#include "mpiP.h" + +/* + * Error handling code + * Just a stub for now to support the MPI interface without actually + * doing anything + */ + + int MPI_Errhandler_set(MPI_Comm comm, MPI_Errhandler handle) + { + return(MPI_SUCCESS); + } diff --git a/src/externals/mct/mpi-serial/ic_merge.c b/src/externals/mct/mpi-serial/ic_merge.c new file mode 100644 index 000000000000..ea19b387155b --- /dev/null +++ b/src/externals/mct/mpi-serial/ic_merge.c @@ -0,0 +1,15 @@ + +#include "mpiP.h" + +/* + * MPI_Intercomm_merge - Creates an intracommunicator from an intercommunicator + * This is just a stub for now to support mpi function calls even in Serial + * applications. In the case of a serial program, this function is a no-op and + * only ever returns MPI_SUCCESS + */ + +int MPI_Intercomm_merge( MPI_Comm intercomm, int high, MPI_Comm *newintracomm ) +{ + newintracomm = (MPI_Comm *)intercomm; + return(MPI_SUCCESS); +} diff --git a/src/externals/mct/mpi-serial/mpi.h b/src/externals/mct/mpi-serial/mpi.h index 529b57b853ab..9183bf89d200 100644 --- a/src/externals/mct/mpi-serial/mpi.h +++ b/src/externals/mct/mpi-serial/mpi.h @@ -1,4 +1,3 @@ - #ifndef _MPI_H_ #define _MPI_H_ @@ -48,7 +47,6 @@ typedef int MPI_Group; #define MPI_ERR_IN_STATUS (-1) #define MPI_ERR_LASTCODE (-1) - /* * MPI_UNDEFINED * @@ -191,6 +189,14 @@ typedef struct /* Fortran: INTEGER status(MPI_STATUS_SIZE) */ #define MPI_STATUSES_IGNORE ((MPI_Status *)0) +/* + * MPI Errhandling stubs (Not functional currently) + */ +typedef int MPI_Errhandler; + +#define MPI_ERRORS_ARE_FATAL ((MPI_Errhandler)0) +#define MPI_ERRORS_RETURN ((MPI_Errhandler)-1) + /* * Collective operations @@ -246,7 +252,8 @@ typedef int MPI_Info; /* handle */ extern int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader, MPI_Comm peer_comm, int remote_leader, int tag, MPI_Comm *newintercomm); - +extern int MPI_Intercomm_merge(MPI_Comm intercomm, int high, + MPI_Comm *newintercomm); extern int MPI_Cart_create(MPI_Comm comm_old, int ndims, int *dims, int *periods, int reorder, MPI_Comm *comm_cart); extern int MPI_Cart_get(MPI_Comm comm, int maxdims, int *dims, @@ -389,6 +396,9 @@ extern int MPI_Iprobe(int source, int tag, MPI_Comm comm, extern int MPI_Pack_size(int incount, MPI_Datatype type, MPI_Comm comm, MPI_Aint * size); +/* Error handling stub, not currently functional */ +extern int MPI_Errhandler_set(MPI_Comm comm, MPI_Errhandler handle); + /* new type functions */ extern int MPI_Get_count(MPI_Status *status, MPI_Datatype datatype, int *count); extern int MPI_Get_elements(MPI_Status *status, MPI_Datatype datatype, int *count); @@ -400,6 +410,9 @@ extern int MPI_Type_vector(int count, int blocklen, int stride, MPI_Datatype old extern int MPI_Type_hvector(int count, int blocklen, MPI_Aint stride, MPI_Datatype oldtype, MPI_Datatype *newtype); +extern int MPI_Type_create_hvector(int count, int blocklen, MPI_Aint stride, + MPI_Datatype oldtype, MPI_Datatype *newtype); + extern int MPI_Type_indexed(int count, int *blocklens, int *displacements, MPI_Datatype oldtype, MPI_Datatype *newtype); diff --git a/src/externals/mct/mpi-serial/mpif.h b/src/externals/mct/mpi-serial/mpif.h index b8071791e4bd..b4537b5d4a2e 100644 --- a/src/externals/mct/mpi-serial/mpif.h +++ b/src/externals/mct/mpi-serial/mpif.h @@ -1,334 +1,327 @@ -!!! -!!! NOTE: The files mpif.realXdoubleY.h are generated from -!!! mpif.master.h using make-mpif and later copied to mpif.h -!!! during the library make. All modifications should be -!!! made to mpif.master.h -!!! - - ! ! MPI_COMM_WORLD ! - INTEGER MPI_COMM_WORLD - parameter (mpi_comm_world=1) +INTEGER MPI_COMM_WORLD +parameter (mpi_comm_world=1) ! ! ! - integer MPI_BOTTOM - parameter (MPI_BOTTOM=0) +integer MPI_BOTTOM +parameter (MPI_BOTTOM=0) ! ! source,tag -! + ! - integer MPI_ANY_SOURCE, MPI_ANY_TAG - parameter (mpi_any_source=-1, mpi_any_tag= -1) + integer MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_TAG_UB + parameter (mpi_any_source=-1, mpi_any_tag= -1, mpi_tag_ub=1681915906) - integer MPI_PROC_NULL, MPI_ROOT - parameter (MPI_PROC_NULL=-2, MPI_ROOT=-3) + integer MPI_PROC_NULL, MPI_ROOT + parameter (MPI_PROC_NULL=-2, MPI_ROOT=-3) - integer MPI_COMM_NULL, MPI_REQUEST_NULL - parameter (MPI_COMM_NULL=0, MPI_REQUEST_NULL=0) + integer MPI_COMM_NULL, MPI_REQUEST_NULL + parameter (MPI_COMM_NULL=0, MPI_REQUEST_NULL=0) - integer MPI_GROUP_NULL, MPI_GROUP_EMPTY - parameter (MPI_GROUP_NULL=0, MPI_GROUP_EMPTY= -1) + integer MPI_GROUP_NULL, MPI_GROUP_EMPTY + parameter (MPI_GROUP_NULL=0, MPI_GROUP_EMPTY= -1) - integer MPI_MAX_ERROR_STRING - parameter (MPI_MAX_ERROR_STRING=128) + integer MPI_MAX_ERROR_STRING + parameter (MPI_MAX_ERROR_STRING=128) - integer MPI_MAX_PROCESSOR_NAME - parameter (MPI_MAX_PROCESSOR_NAME=128) + integer MPI_MAX_PROCESSOR_NAME + parameter (MPI_MAX_PROCESSOR_NAME=128) -! -! Return codes -! + ! + ! Return codes + ! - integer MPI_SUCCESS - parameter (MPI_SUCCESS=0) + integer MPI_SUCCESS + parameter (MPI_SUCCESS=0) - integer MPI_ERR_BUFFER - parameter (MPI_ERR_BUFFER= -1) + integer MPI_ERR_BUFFER + parameter (MPI_ERR_BUFFER= -1) - integer MPI_ERR_COUNT - parameter (MPI_ERR_COUNT= -1) + integer MPI_ERR_COUNT + parameter (MPI_ERR_COUNT= -1) - integer MPI_ERR_TYPE - parameter (MPI_ERR_TYPE= -1) + integer MPI_ERR_TYPE + parameter (MPI_ERR_TYPE= -1) - integer MPI_ERR_TAG - parameter (MPI_ERR_TAG= -1) + integer MPI_ERR_TAG + parameter (MPI_ERR_TAG= -1) - integer MPI_ERR_COMM - parameter (MPI_ERR_COMM= -1) + integer MPI_ERR_COMM + parameter (MPI_ERR_COMM= -1) - integer MPI_ERR_RANK - parameter (MPI_ERR_RANK= -1) + integer MPI_ERR_RANK + parameter (MPI_ERR_RANK= -1) - integer MPI_ERR_REQUEST - parameter (MPI_ERR_REQUEST= -1) + integer MPI_ERR_REQUEST + parameter (MPI_ERR_REQUEST= -1) - integer MPI_ERR_ROOT - parameter (MPI_ERR_ROOT= -1) + integer MPI_ERR_ROOT + parameter (MPI_ERR_ROOT= -1) - integer MPI_ERR_GROUP - parameter (MPI_ERR_GROUP= -1) + integer MPI_ERR_GROUP + parameter (MPI_ERR_GROUP= -1) - integer MPI_ERR_OP - parameter (MPI_ERR_OP= -1) + integer MPI_ERR_OP + parameter (MPI_ERR_OP= -1) - integer MPI_ERR_TOPOLOGY - parameter (MPI_ERR_TOPOLOGY= -1) + integer MPI_ERR_TOPOLOGY + parameter (MPI_ERR_TOPOLOGY= -1) - integer MPI_ERR_DIMS - parameter (MPI_ERR_DIMS= -1) + integer MPI_ERR_DIMS + parameter (MPI_ERR_DIMS= -1) - integer MPI_ERR_ARG - parameter (MPI_ERR_ARG= -1) + integer MPI_ERR_ARG + parameter (MPI_ERR_ARG= -1) - integer MPI_ERR_UNKNOWN - parameter (MPI_ERR_UNKNOWN= -1) + integer MPI_ERR_UNKNOWN + parameter (MPI_ERR_UNKNOWN= -1) - integer MPI_ERR_TRUNCATE - parameter (MPI_ERR_TRUNCATE= -1) + integer MPI_ERR_TRUNCATE + parameter (MPI_ERR_TRUNCATE= -1) - integer MPI_ERR_OTHER - parameter (MPI_ERR_OTHER= -1) + integer MPI_ERR_OTHER + parameter (MPI_ERR_OTHER= -1) - integer MPI_ERR_INTERN - parameter (MPI_ERR_INTERN= -1) + integer MPI_ERR_INTERN + parameter (MPI_ERR_INTERN= -1) - integer MPI_PENDING - parameter (MPI_PENDING= -1) + integer MPI_PENDING + parameter (MPI_PENDING= -1) - integer MPI_ERR_IN_STATUS - parameter (MPI_ERR_IN_STATUS= -1) + integer MPI_ERR_IN_STATUS + parameter (MPI_ERR_IN_STATUS= -1) - integer MPI_ERR_LASTCODE - parameter (MPI_ERR_LASTCODE= -1) + integer MPI_ERR_LASTCODE + parameter (MPI_ERR_LASTCODE= -1) -! -! + integer MPI_ERRORS_RETURN + parameter (MPI_ERRORS_RETURN= -1) + ! + ! - integer MPI_UNDEFINED - parameter (MPI_UNDEFINED= -1) + integer MPI_UNDEFINED + parameter (MPI_UNDEFINED= -1) -! -! MPI_Status -! -! The values in this section MUST match the struct definition -! in mpi.h -! + ! + ! MPI_Status + ! + ! The values in this section MUST match the struct definition + ! in mpi.h + ! - INTEGER MPI_STATUS_SIZE - PARAMETER (MPI_STATUS_SIZE=4) - INTEGER MPI_SOURCE, MPI_TAG, MPI_ERROR - PARAMETER(MPI_SOURCE=1, MPI_TAG=2, MPI_ERROR=3) - ! There is a 4th value only used internally + INTEGER MPI_STATUS_SIZE + PARAMETER (MPI_STATUS_SIZE=4) - INTEGER MPI_STATUS_IGNORE(MPI_STATUS_SIZE) - INTEGER MPI_STATUSES_IGNORE(MPI_STATUS_SIZE,1) - COMMON /MPISERIAL/ MPI_STATUS_IGNORE - COMMON /MPISERIAL/ MPI_STATUSES_IGNORE + INTEGER MPI_SOURCE, MPI_TAG, MPI_ERROR + PARAMETER(MPI_SOURCE=1, MPI_TAG=2, MPI_ERROR=3) + ! There is a 4th value only used internally -! -! MPI_IN_PLACE -! + INTEGER MPI_STATUS_IGNORE(MPI_STATUS_SIZE) + INTEGER MPI_STATUSES_IGNORE(MPI_STATUS_SIZE,1) + COMMON /MPISERIAL/ MPI_STATUS_IGNORE + COMMON /MPISERIAL/ MPI_STATUSES_IGNORE - INTEGER MPI_IN_PLACE - COMMON /MPISERIAL/ MPI_IN_PLACE + ! + ! MPI_IN_PLACE + ! - SAVE /MPISERIAL/ ! Technically needed in case goes out of scope + INTEGER MPI_IN_PLACE + COMMON /MPISERIAL/ MPI_IN_PLACE + SAVE /MPISERIAL/ ! Technically needed in case goes out of scope -! -! MPI_Datatype values -! -! New datatype values -! Type constants represent integer handles, matching up to the index of the -! type array equal to the absolute value of the constant plus one. For -! example, MPI_BYTE=-12, corresponding to type index 11. -! (Array in type_const.c) -! + ! + ! MPI_Datatype values + ! + ! New datatype values + ! Type constants represent integer handles, matching up to the index of the + ! type array equal to the absolute value of the constant plus one. For + ! example, MPI_BYTE=-12, corresponding to type index 11. + ! (Array in type_const.c) + ! - INTEGER MPI_DATATYPE_NULL - PARAMETER (MPI_DATATYPE_NULL=0) - INTEGER MPI_BYTE - PARAMETER (MPI_BYTE=-12) + INTEGER MPI_DATATYPE_NULL + PARAMETER (MPI_DATATYPE_NULL=0) - INTEGER MPI_PACKED - PARAMETER (MPI_PACKED=-13) + INTEGER MPI_BYTE + PARAMETER (MPI_BYTE=-12) - INTEGER MPI_LB - PARAMETER (MPI_LB=-14) + INTEGER MPI_PACKED + PARAMETER (MPI_PACKED=-13) - INTEGER MPI_UB - PARAMETER (MPI_UB=-15) + INTEGER MPI_LB + PARAMETER (MPI_LB=-14) - INTEGER MPI_INTEGER - PARAMETER (MPI_INTEGER=-16) + INTEGER MPI_UB + PARAMETER (MPI_UB=-15) - INTEGER MPI_REAL - PARAMETER (MPI_REAL=-17) + INTEGER MPI_INTEGER + PARAMETER (MPI_INTEGER=-16) - INTEGER MPI_DOUBLE_PRECISION - PARAMETER (MPI_DOUBLE_PRECISION=-18) + INTEGER MPI_REAL + PARAMETER (MPI_REAL=-17) - INTEGER MPI_COMPLEX - PARAMETER (MPI_COMPLEX=-19) + INTEGER MPI_DOUBLE_PRECISION + PARAMETER (MPI_DOUBLE_PRECISION=-18) - INTEGER MPI_DOUBLE_COMPLEX - PARAMETER (MPI_DOUBLE_COMPLEX=-20) + INTEGER MPI_COMPLEX + PARAMETER (MPI_COMPLEX=-19) - INTEGER MPI_LOGICAL - PARAMETER (MPI_LOGICAL=-21) + INTEGER MPI_DOUBLE_COMPLEX + PARAMETER (MPI_DOUBLE_COMPLEX=-20) - INTEGER MPI_CHARACTER - PARAMETER (MPI_CHARACTER=-22) + INTEGER MPI_LOGICAL + PARAMETER (MPI_LOGICAL=-21) - integer MPI_2REAL - parameter (MPI_2REAL= -23) + INTEGER MPI_CHARACTER + PARAMETER (MPI_CHARACTER=-22) - integer MPI_2DOUBLE_PRECISION - parameter (MPI_2DOUBLE_PRECISION= -24) + integer MPI_2REAL + parameter (MPI_2REAL= -23) - integer MPI_2INTEGER - parameter (MPI_2INTEGER= -25) + integer MPI_2DOUBLE_PRECISION + parameter (MPI_2DOUBLE_PRECISION= -24) + integer MPI_2INTEGER + parameter (MPI_2INTEGER= -25) -! -! Size-specific types -! - INTEGER MPI_INTEGER1 - PARAMETER (MPI_INTEGER1= -32 ) + ! + ! Size-specific types + ! - INTEGER MPI_INTEGER2 - PARAMETER (MPI_INTEGER2= -33 ) + INTEGER MPI_INTEGER1 + PARAMETER (MPI_INTEGER1= -32 ) - INTEGER MPI_INTEGER4 - PARAMETER (MPI_INTEGER4= -34 ) + INTEGER MPI_INTEGER2 + PARAMETER (MPI_INTEGER2= -33 ) - INTEGER MPI_INTEGER8 - PARAMETER (MPI_INTEGER8= -35 ) + INTEGER MPI_INTEGER4 + PARAMETER (MPI_INTEGER4= -34 ) - INTEGER MPI_INTEGER16 - PARAMETER (MPI_INTEGER16= -36 ) + INTEGER MPI_INTEGER8 + PARAMETER (MPI_INTEGER8= -35 ) + INTEGER MPI_INTEGER16 + PARAMETER (MPI_INTEGER16= -36 ) - INTEGER MPI_REAL4 - PARAMETER (MPI_REAL4= -37 ) - INTEGER MPI_REAL8 - PARAMETER (MPI_REAL8= -38 ) + INTEGER MPI_REAL4 + PARAMETER (MPI_REAL4= -37 ) - INTEGER MPI_REAL16 - PARAMETER (MPI_REAL16= -39 ) + INTEGER MPI_REAL8 + PARAMETER (MPI_REAL8= -38 ) + INTEGER MPI_REAL16 + PARAMETER (MPI_REAL16= -39 ) - integer MPI_COMPLEX8 - parameter (MPI_COMPLEX8= -40 ) - integer MPI_COMPLEX16 - parameter (MPI_COMPLEX16= -41 ) + integer MPI_COMPLEX8 + parameter (MPI_COMPLEX8= -40 ) - integer MPI_COMPLEX32 - parameter (MPI_COMPLEX32= -42 ) + integer MPI_COMPLEX16 + parameter (MPI_COMPLEX16= -41 ) - integer MPI_LONG_LONG_INT - parameter (MPI_LONG_LONG_INT= -43) + integer MPI_COMPLEX32 + parameter (MPI_COMPLEX32= -42 ) - integer MPI_LONG_LONG - parameter (MPI_LONG_LONG= MPI_LONG_LONG_INT) + integer MPI_LONG_LONG_INT + parameter (MPI_LONG_LONG_INT= -43) - integer MPI_UNSIGNED_LONG_LONG - parameter (MPI_UNSIGNED_LONG_LONG= -44) + integer MPI_LONG_LONG + parameter (MPI_LONG_LONG= MPI_LONG_LONG_INT) - integer MPI_OFFSET - parameter (MPI_OFFSET= -45) + integer MPI_UNSIGNED_LONG_LONG + parameter (MPI_UNSIGNED_LONG_LONG= -44) -! -! MPI_Op values -! -! (All are handled as no-op so no value is necessary; but provide one -! anyway just in case.) -! - - INTEGER MPI_SUM - PARAMETER (MPI_SUM=0) - INTEGER MPI_MAX - PARAMETER (MPI_MAX=0) - INTEGER MPI_MIN - PARAMETER (MPI_MIN=0) - INTEGER MPI_PROD - PARAMETER (MPI_PROD=0) - INTEGER MPI_LAND - PARAMETER (MPI_LAND=0) - INTEGER MPI_BAND - PARAMETER (MPI_BAND=0) - INTEGER MPI_LOR - PARAMETER (MPI_LOR=0) - INTEGER MPI_BOR - PARAMETER (MPI_BOR=0) - INTEGER MPI_LXOR - PARAMETER (MPI_LXOR=0) - INTEGER MPI_BXOR - PARAMETER (MPI_BXOR=0) - INTEGER MPI_MINLOC - PARAMETER (MPI_MINLOC=0) - INTEGER MPI_MAXLOC - PARAMETER (MPI_MAXLOC=0) - INTEGER MPI_OP_NULL - PARAMETER (MPI_OP_NULL=0) + integer MPI_OFFSET + parameter (MPI_OFFSET= -45) -! -! MPI_Wtime -! + ! + ! MPI_Op values + ! + ! (All are handled as no-op so no value is necessary; but provide one + ! anyway just in case.) + ! - DOUBLE PRECISION MPI_WTIME - EXTERNAL MPI_WTIME + INTEGER MPI_SUM + PARAMETER (MPI_SUM=0) + INTEGER MPI_MAX + PARAMETER (MPI_MAX=0) + INTEGER MPI_MIN + PARAMETER (MPI_MIN=0) + INTEGER MPI_PROD + PARAMETER (MPI_PROD=0) + INTEGER MPI_LAND + PARAMETER (MPI_LAND=0) + INTEGER MPI_BAND + PARAMETER (MPI_BAND=0) + INTEGER MPI_LOR + PARAMETER (MPI_LOR=0) + INTEGER MPI_BOR + PARAMETER (MPI_BOR=0) + INTEGER MPI_LXOR + PARAMETER (MPI_LXOR=0) + INTEGER MPI_BXOR + PARAMETER (MPI_BXOR=0) + INTEGER MPI_MINLOC + PARAMETER (MPI_MINLOC=0) + INTEGER MPI_MAXLOC + PARAMETER (MPI_MAXLOC=0) + INTEGER MPI_OP_NULL + PARAMETER (MPI_OP_NULL=0) + ! + ! MPI_Wtime + ! -! -! Kinds -! + DOUBLE PRECISION MPI_WTIME + EXTERNAL MPI_WTIME - INTEGER MPI_OFFSET_KIND - PARAMETER (MPI_OFFSET_KIND=selected_int_kind(13)) - INTEGER MPI_MODE_RDONLY - PARAMETER (MPI_MODE_RDONLY=0) + ! + ! Kinds + ! - INTEGER MPI_MODE_CREATE - PARAMETER (MPI_MODE_CREATE=1) + INTEGER MPI_OFFSET_KIND + PARAMETER (MPI_OFFSET_KIND=selected_int_kind(13)) - INTEGER MPI_MODE_RDWR - PARAMETER (MPI_MODE_RDWR=2) + INTEGER MPI_MODE_RDONLY + PARAMETER (MPI_MODE_RDONLY=0) + INTEGER MPI_MODE_CREATE + PARAMETER (MPI_MODE_CREATE=1) -! -! Info -! + INTEGER MPI_MODE_RDWR + PARAMETER (MPI_MODE_RDWR=2) - INTEGER MPI_INFO_NULL - PARAMETER (MPI_INFO_NULL=0) + ! + ! Info + ! -! -! Library version string (must match C value) -! + INTEGER MPI_INFO_NULL + PARAMETER (MPI_INFO_NULL=0) - INTEGER MPI_MAX_LIBRARY_VERSION_STRING - PARAMETER (MPI_MAX_LIBRARY_VERSION_STRING=80) + ! + ! Library version string (must match C value) + ! + INTEGER MPI_MAX_LIBRARY_VERSION_STRING + PARAMETER (MPI_MAX_LIBRARY_VERSION_STRING=80) diff --git a/src/externals/mct/mpi-serial/tests/ftest_old.F90 b/src/externals/mct/mpi-serial/tests/ftest_old.F90 index 93075219def5..938d4472a94c 100644 --- a/src/externals/mct/mpi-serial/tests/ftest_old.F90 +++ b/src/externals/mct/mpi-serial/tests/ftest_old.F90 @@ -99,7 +99,7 @@ program test call mpi_waitall(5,rreq,status,ier) print *,'recvs on MPI_COMM_WORLD done' - + do i=1,5 print *, 'Status source=',status(MPI_SOURCE,i), & ' tag=',status(MPI_TAG,i) @@ -137,13 +137,13 @@ program test MPI_COMM_WORLD) print *,temp end do - + do i=1,5 if (rbuf(i) .ne. sbuf(i)) then errcount = errcount + 1 print *,"Error for pack/send/unpack" print *,"found ",rbuf(i)," should be ",sbuf(i) - end if + end if end do ! @@ -160,6 +160,6 @@ program test else print *,"No errors" end if - + end diff --git a/src/externals/mct/mpi-serial/type.c b/src/externals/mct/mpi-serial/type.c index 22e3d305b387..8dd93f274148 100644 --- a/src/externals/mct/mpi-serial/type.c +++ b/src/externals/mct/mpi-serial/type.c @@ -448,6 +448,23 @@ int MPI_Type_hvector(int count, int blocklen, MPI_Aint stride, return Type_hvector(count, blocklen, stride, old_ptr, new_ptr); } +FC_FUNC( mpi_type_create_hvector, MPI_TYPE_CREATE_HVECTOR ) + (int * count, long * blocklen, long * stride, + int * oldtype, int * newtype, int * ierr) +{ + *ierr = MPI_Type_create_hvector(*count, *blocklen, *stride, *oldtype, newtype); +} + +int MPI_Type_create_hvector(int count, int blocklen, MPI_Aint stride, + MPI_Datatype oldtype, MPI_Datatype * newtype) +{ + Datatype old_ptr = *(Datatype*) mpi_handle_to_datatype(oldtype); + Datatype * new_ptr; + + mpi_alloc_handle(newtype, (void**) &new_ptr); + return Type_hvector(count, blocklen, stride, old_ptr, new_ptr); +} + int Type_hvector(int count, int blocklen, MPI_Aint stride, Datatype oldtype, Datatype *newtype) diff --git a/src/externals/mct/testsystem/testall/ReadSparseMatrixAsc.F90 b/src/externals/mct/testsystem/testall/ReadSparseMatrixAsc.F90 index b865e44ddaff..a0ce00128b82 100644 --- a/src/externals/mct/testsystem/testall/ReadSparseMatrixAsc.F90 +++ b/src/externals/mct/testsystem/testall/ReadSparseMatrixAsc.F90 @@ -25,6 +25,7 @@ subroutine ReadSparseMatrixAsc(sMat, fileID, src_dims, dst_dims) use m_List, only : List_init => init use m_List, only : List_clean => clean + use m_AttrVect, only : Attrvect_zero => zero use m_SparseMatrix, only : SparseMatrix use m_SparseMatrix, only : SparseMatrix_Init => init use m_SparseMatrix, only : SparseMatrix_Clean => clean @@ -155,6 +156,7 @@ subroutine ReadSparseMatrixAsc(sMat, fileID, src_dims, dst_dims) nRows = dst_dims(1) * dst_dims(2) nColumns = src_dims(1) * src_dims(2) call SparseMatrix_init(sMat, nRows, nColumns, num_elements) + call AttrVect_zero(sMat%data) ! ...and store them. diff --git a/src/externals/mct/testsystem/testall/cpl.F90 b/src/externals/mct/testsystem/testall/cpl.F90 index a709dd7d3296..0a1235d9d0f0 100644 --- a/src/externals/mct/testsystem/testall/cpl.F90 +++ b/src/externals/mct/testsystem/testall/cpl.F90 @@ -875,7 +875,7 @@ subroutine cpl (CPL_World) call zeit_co('COcnRouterInit') ! rml print router info - call MCT_Router_print(Atm2Cpl,CPL_World,90) + if(myProc==0)call MCT_Router_print(Atm2Cpl,CPL_World,90) close(90) call Router_test(Atm2Cpl,"CPL::Atm2Cpl",7000+myProc)