Skip to content

Commit

Permalink
allocate memory for omp cache, instead of using threadprivate variable
Browse files Browse the repository at this point in the history
  • Loading branch information
dliptai committed Mar 10, 2022
1 parent 13127df commit b296377
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 29 deletions.
8 changes: 4 additions & 4 deletions build/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -1781,7 +1781,7 @@ endif
SOURCES= physcon.f90 ${CONFIG} ${SRCKERNEL} io.F90 units.f90 boundary.f90 \
mpi_utils.F90 dtype_kdtree.F90 utils_omp.F90 utils_cpuinfo.f90 \
utils_allocate.f90 icosahedron.f90 \
utils_mathfunc.f90 part.F90 ${DOMAIN} utils_timing.f90 mpi_balance.F90 \
utils_mathfunc.f90 part.F90 omp_cache.f90 ${DOMAIN} utils_timing.f90 mpi_balance.F90 \
setup_params.f90 timestep.f90 utils_dumpfiles.f90 utils_indtimesteps.F90 utils_infiles.f90 \
utils_sort.f90 utils_supertimestep.F90 utils_tables.f90 utils_gravwave.f90 \
utils_sphNG.f90 utils_vectors.f90 utils_datafiles.f90 datafiles.f90 \
Expand Down Expand Up @@ -1861,8 +1861,8 @@ edit: checksetup
# these are the sources for anything which uses the readwrite_dumps module
#
SRCDUMP= physcon.f90 ${CONFIG} ${SRCKERNEL} io.F90 units.f90 boundary.f90 mpi_utils.F90 \
utils_timing.f90 utils_infiles.f90 dtype_kdtree.f90 utils_allocate.f90 part.F90 ${DOMAIN} \
mpi_dens.F90 mpi_force.F90 \
utils_timing.f90 utils_infiles.f90 dtype_kdtree.f90 utils_allocate.f90 part.F90 \
omp_cache.f90 ${DOMAIN} mpi_dens.F90 mpi_force.F90 \
mpi_balance.F90 mpi_stack.F90 mpi_derivs.F90 kdtree.F90 linklist_kdtree.F90 \
utils_dumpfiles.f90 utils_vectors.f90 utils_mathfunc.f90 \
utils_datafiles.f90 utils_filenames.f90 utils_tables.f90 datafiles.f90 gitinfo.f90 \
Expand Down Expand Up @@ -2301,7 +2301,7 @@ cleanphantomsinks:
# these are the sources for the multirun utility
#
SRCMULT = physcon.f90 ${CONFIG} ${SRCKERNEL} io.F90 mpi_utils.F90 utils_timing.f90 dtype_kdtree.F90 ${SRCFASTMATH} \
units.f90 boundary.f90 utils_allocate.f90 part.F90 timestep.f90 setup_params.f90 \
units.f90 boundary.f90 utils_allocate.f90 part.F90 omp_cache.f90 timestep.f90 setup_params.f90 \
utils_filenames.f90 utils_mathfunc.f90 utils_vectors.f90 utils_omp.F90 utils_datafiles.f90 datafiles.f90 utils_tables.f90 utils_infiles.f90 \
${SRCEOS} viscosity.f90 options.f90 damping.f90 utils_gravwave.f90 \
utils_dumpfiles.f90 utils_summary.f90 centreofmass.f90 \
Expand Down
39 changes: 23 additions & 16 deletions src/main/dens.F90
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ module densityforce

!--kernel related parameters
!real, parameter :: cnormk = 1./pi, wab0 = 1., gradh0 = -3.*wab0, radkern2 = 4F.0
integer, parameter :: isizecellcache = 50000

integer, parameter :: isizeneighcache = 0
integer, parameter :: maxdensits = 50

Expand Down Expand Up @@ -137,6 +137,8 @@ subroutine densityiterate(icall,npart,nactive,xyzh,vxyzu,divcurlv,divcurlB,Bevol
use part, only:ngradh
use viscosity, only:irealvisc
use io_summary,only:summary_variable,iosumhup,iosumhdn
use omp_cache, only:ompcache,maxcellcache

integer, intent(in) :: icall,npart,nactive
real, intent(inout) :: xyzh(:,:)
real, intent(in) :: vxyzu(:,:),fxyzu(:,:),fext(:,:)
Expand All @@ -150,9 +152,6 @@ subroutine densityiterate(icall,npart,nactive,xyzh,vxyzu,divcurlv,divcurlB,Bevol
real, intent(inout) :: radprop(:,:)
real(kind=4), intent(out) :: dvdx(:,:)

real, save :: xyzcache(isizecellcache,3)
!$omp threadprivate(xyzcache)

integer :: i,icell
integer :: nneigh,np,npcell
integer :: nwarnup,nwarndown,nwarnroundoff
Expand All @@ -174,14 +173,16 @@ subroutine densityiterate(icall,npart,nactive,xyzh,vxyzu,divcurlv,divcurlB,Bevol
integer :: n_remote_its,nlocal
real :: ntotal
logical :: iterations_finished,do_export
integer :: ithread,OMP_GET_THREAD_NUM


if (mpi) then
call init_cell_exchange(xrecvbuf,irequestrecv)
call reset_stacks
endif

if (iverbose >= 3 .and. id==master) &
write(iprint,*) ' cell cache =',isizecellcache,' neigh cache = ',isizeneighcache,' icall = ',icall
write(iprint,*) ' cell cache =',maxcellcache,' neigh cache = ',isizeneighcache,' icall = ',icall

if (icall==0 .or. icall==1) then
call reset_neighbour_stats(nneightry,nneighact,maxneightry,maxneighact,ncalc,nrelink)
Expand Down Expand Up @@ -283,7 +284,11 @@ subroutine densityiterate(icall,npart,nactive,xyzh,vxyzu,divcurlv,divcurlB,Bevol
!$omp reduction(+:nrelink) &
!$omp reduction(+:stressmax) &
!$omp reduction(max:rhomax) &
!$omp private(i)
!$omp shared(ompcache) &
!$omp private(i,ithread)

ithread = OMP_GET_THREAD_NUM() + 1


!$omp do schedule(runtime)
over_cells: do icell=1,int(ncells)
Expand All @@ -295,7 +300,7 @@ subroutine densityiterate(icall,npart,nactive,xyzh,vxyzu,divcurlv,divcurlB,Bevol
!
!--get the neighbour list and fill the cell cache
!
call get_neighbour_list(icell,listneigh,nneigh,xyzh,xyzcache,isizecellcache,getj=.false., &
call get_neighbour_list(icell,listneigh,nneigh,xyzh,ompcache(:,1:3,ithread),maxcellcache,getj=.false., &
remote_export=remote_export)
do_export = any(remote_export)
cell%icell = icell
Expand All @@ -319,7 +324,7 @@ subroutine densityiterate(icall,npart,nactive,xyzh,vxyzu,divcurlv,divcurlB,Bevol
!$omp end critical (send_and_recv_remote)
endif

call compute_cell(cell,listneigh,nneigh,getdv,getdB,Bevol,xyzh,vxyzu,fxyzu,fext,xyzcache,rad)
call compute_cell(cell,listneigh,nneigh,getdv,getdB,Bevol,xyzh,vxyzu,fxyzu,fext,ompcache(:,1:3,ithread),rad)

if (do_export) then
! write directly to stack
Expand All @@ -333,7 +338,7 @@ subroutine densityiterate(icall,npart,nactive,xyzh,vxyzu,divcurlv,divcurlB,Bevol
if (.not. converged) then
if (redo_neighbours) then
call set_hmaxcell(cell%icell,cell%hmax)
call get_neighbour_list(-1,listneigh,nneigh,xyzh,xyzcache,isizecellcache,getj=.false., &
call get_neighbour_list(-1,listneigh,nneigh,xyzh,ompcache(:,1:3,ithread),maxcellcache,getj=.false., &
cell_xpos=cell%xpos,cell_xsizei=cell%xsizei,cell_rcuti=cell%rcuti, &
remote_export=remote_export)
cell%remote_export(1:nprocs) = remote_export
Expand All @@ -349,7 +354,7 @@ subroutine densityiterate(icall,npart,nactive,xyzh,vxyzu,divcurlv,divcurlB,Bevol
nrelink = nrelink + 1
endif

call compute_cell(cell,listneigh,nneigh,getdv,getdB,Bevol,xyzh,vxyzu,fxyzu,fext,xyzcache,rad)
call compute_cell(cell,listneigh,nneigh,getdv,getdB,Bevol,xyzh,vxyzu,fxyzu,fext,ompcache(:,1:3,ithread),rad)

if (do_export) then
stack_waiting%cells(cell%waiting_index) = cell
Expand Down Expand Up @@ -402,10 +407,10 @@ subroutine densityiterate(icall,npart,nactive,xyzh,vxyzu,divcurlv,divcurlB,Bevol
cell = stack_remote%cells(i)

! icell is unused (-1 here)
call get_neighbour_list(-1,listneigh,nneigh,xyzh,xyzcache,isizecellcache,getj=.false., &
call get_neighbour_list(-1,listneigh,nneigh,xyzh,ompcache(:,1:3,ithread),maxcellcache,getj=.false., &
cell_xpos=cell%xpos,cell_xsizei=cell%xsizei,cell_rcuti=cell%rcuti)

call compute_cell(cell,listneigh,nneigh,getdv,getdB,Bevol,xyzh,vxyzu,fxyzu,fext,xyzcache,rad)
call compute_cell(cell,listneigh,nneigh,getdv,getdB,Bevol,xyzh,vxyzu,fxyzu,fext,ompcache(:,1:3,ithread),rad)

cell%remote_export(id+1) = .false.

Expand Down Expand Up @@ -458,7 +463,7 @@ subroutine densityiterate(icall,npart,nactive,xyzh,vxyzu,divcurlv,divcurlB,Bevol
!$omp end critical (send_and_recv_remote)
if (.not. converged) then
call set_hmaxcell(cell%icell,cell%hmax)
call get_neighbour_list(-1,listneigh,nneigh,xyzh,xyzcache,isizecellcache,getj=.false., &
call get_neighbour_list(-1,listneigh,nneigh,xyzh,ompcache(:,1:3,ithread),maxcellcache,getj=.false., &
cell_xpos=cell%xpos,cell_xsizei=cell%xsizei,cell_rcuti=cell%rcuti, &
remote_export=remote_export)
cell%remote_export(1:nprocs) = remote_export
Expand All @@ -468,7 +473,7 @@ subroutine densityiterate(icall,npart,nactive,xyzh,vxyzu,divcurlv,divcurlB,Bevol
! direction export (0)
call send_cell(cell,0,irequestsend,xsendbuf)
!$omp end critical (send_and_recv_remote)
call compute_cell(cell,listneigh,nneigh,getdv,getdB,Bevol,xyzh,vxyzu,fxyzu,fext,xyzcache,rad)
call compute_cell(cell,listneigh,nneigh,getdv,getdB,Bevol,xyzh,vxyzu,fxyzu,fext,ompcache(:,1:3,ithread),rad)

stack_redo%cells(cell%waiting_index) = cell
else
Expand Down Expand Up @@ -558,6 +563,7 @@ pure subroutine get_density_sums(i,xpartveci,hi,hi1,hi21,iamtypei,iamgasi,iamdus
use kernel, only:get_kernel,get_kernel_grav1
use part, only:iphase,iamgas,iamdust,iamtype,maxphase,ibasetype,igas,idust,rhoh,massoftype,iradxi
use dim, only:ndivcurlv,gravity,maxp,nalpha,use_dust,do_radiation
use omp_cache,only:maxcellcache
integer, intent(in) :: i
real, intent(in) :: xpartveci(:)
real(kind=8), intent(in) :: hi,hi1,hi21
Expand Down Expand Up @@ -622,7 +628,7 @@ pure subroutine get_density_sums(i,xpartveci,hi,hi1,hi21,iamtypei,iamgasi,iamdus
if (ifilledneighcache .and. n <= isizeneighcache) then
rij2 = dxcache(1,n)
else
if (ifilledcellcache .and. n <= isizecellcache) then
if (ifilledcellcache .and. n <= maxcellcache) then
! positions from cache are already mod boundary
dx = xpartveci(ixi) - xyzcache(n,1)
dy = xpartveci(iyi) - xyzcache(n,2)
Expand Down Expand Up @@ -1156,6 +1162,7 @@ pure subroutine compute_cell(cell,listneigh,nneigh,getdv,getdB,Bevol,xyzh,vxyzu,
use viscosity, only:irealvisc
use io, only:id
use dim, only:mpi
use omp_cache, only:maxcellcache

type(celldens), intent(inout) :: cell

Expand All @@ -1165,7 +1172,7 @@ pure subroutine compute_cell(cell,listneigh,nneigh,getdv,getdB,Bevol,xyzh,vxyzu,
logical, intent(in) :: getdB
real, intent(in) :: Bevol(:,:)
real, intent(in) :: xyzh(:,:),vxyzu(:,:),fxyzu(:,:),fext(:,:)
real, intent(in) :: xyzcache(isizecellcache,3)
real, intent(in) :: xyzcache(maxcellcache,3)
real, intent(in) :: rad(:,:)

real :: dxcache(7,isizeneighcache)
Expand Down
21 changes: 12 additions & 9 deletions src/main/force.F90
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,6 @@ module forces
character(len=80), parameter, public :: & ! module version
modid="$Id$"

integer, parameter :: maxcellcache = 50000

public :: force, reconstruct_dv ! latter to avoid compiler warning

!--indexing for xpartveci array
Expand Down Expand Up @@ -224,6 +222,7 @@ subroutine force(icall,npart,xyzh,vxyzu,fxyzu,divcurlv,divcurlB,Bevol,dBevol,&
use mpistack, only:stack_remote => force_stack_1
use mpistack, only:stack_waiting => force_stack_2
use io_summary, only:iosumdtr
use omp_cache, only:ompcache,maxcellcache
integer, intent(in) :: icall,npart
real, intent(in) :: xyzh(:,:)
real, intent(inout) :: vxyzu(:,:)
Expand All @@ -243,8 +242,6 @@ subroutine force(icall,npart,xyzh,vxyzu,fxyzu,divcurlv,divcurlB,Bevol,dBevol,&
real, intent(inout) :: radprop(:,:)
real, intent(in) :: dens(:), metrics(:,:,:,:)

real, save :: xyzcache(maxcellcache,4)
!$omp threadprivate(xyzcache)
integer :: i,icell,nneigh
integer :: nstokes,nsuper,ndrag,ndustres
real :: dtmini,dtohm,dthall,dtambi,dtvisc
Expand Down Expand Up @@ -278,6 +275,7 @@ subroutine force(icall,npart,xyzh,vxyzu,fxyzu,divcurlv,divcurlB,Bevol,dBevol,&
logical :: remote_export(nprocs), do_export
type(cellforce) :: cell,xrecvbuf(nprocs),xsendbuf
integer :: irequestsend(nprocs),irequestrecv(nprocs)
integer :: ithread,OMP_GET_THREAD_NUM

#ifdef IND_TIMESTEPS
nbinmaxnew = 0
Expand Down Expand Up @@ -431,7 +429,11 @@ subroutine force(icall,npart,xyzh,vxyzu,fxyzu,divcurlv,divcurlB,Bevol,dBevol,&
!$omp shared(dustfrac) &
!$omp shared(ddustevol) &
!$omp shared(deltav) &
!$omp shared(ibin_wake,ibinnow_m1)
!$omp shared(ibin_wake,ibinnow_m1) &
!$omp shared(ompcache) &
!$omp private(ithread)

ithread = OMP_GET_THREAD_NUM() + 1

!$omp do schedule(runtime)
over_cells: do icell=1,int(ncells)
Expand All @@ -452,7 +454,7 @@ subroutine force(icall,npart,xyzh,vxyzu,fxyzu,divcurlv,divcurlB,Bevol,dBevol,&
!--get the neighbour list and fill the cell cache
!

call get_neighbour_list(icell,listneigh,nneigh,xyzh,xyzcache,maxcellcache,getj=.true., &
call get_neighbour_list(icell,listneigh,nneigh,xyzh,ompcache(:,:,ithread),maxcellcache,getj=.true., &
#ifdef GRAVITY
f=cell%fgrav, &
#endif
Expand All @@ -475,7 +477,7 @@ subroutine force(icall,npart,xyzh,vxyzu,fxyzu,divcurlv,divcurlB,Bevol,dBevol,&

call compute_cell(cell,listneigh,nneigh,Bevol,xyzh,vxyzu,fxyzu, &
iphase,divcurlv,divcurlB,alphaind,eta_nimhd,eos_vars, &
dustfrac,dustprop,gradh,ibinnow_m1,ibin_wake,stressmax,xyzcache,&
dustfrac,dustprop,gradh,ibinnow_m1,ibin_wake,stressmax,ompcache(:,:,ithread),&
rad,radprop,dens,metrics)

if (do_export) then
Expand Down Expand Up @@ -516,15 +518,15 @@ subroutine force(icall,npart,xyzh,vxyzu,fxyzu,divcurlv,divcurlB,Bevol,dBevol,&
over_remote: do i = 1,stack_remote%n
cell = stack_remote%cells(i)

call get_neighbour_list(-1,listneigh,nneigh,xyzh,xyzcache,maxcellcache,getj=.true., &
call get_neighbour_list(-1,listneigh,nneigh,xyzh,ompcache(:,:,ithread),maxcellcache,getj=.true., &
#ifdef GRAVITY
f=cell%fgrav, &
#endif
cell_xpos=cell%xpos,cell_xsizei=cell%xsizei,cell_rcuti=cell%rcuti)

call compute_cell(cell,listneigh,nneigh,Bevol,xyzh,vxyzu,fxyzu, &
iphase,divcurlv,divcurlB,alphaind,eta_nimhd,eos_vars, &
dustfrac,dustprop,gradh,ibinnow_m1,ibin_wake,stressmax,xyzcache,&
dustfrac,dustprop,gradh,ibinnow_m1,ibin_wake,stressmax,ompcache(:,:,ithread),&
rad,radprop,dens,metrics)

cell%remote_export(id+1) = .false.
Expand Down Expand Up @@ -867,6 +869,7 @@ subroutine compute_forces(i,iamgasi,iamdusti,xpartveci,hi,hi1,hi21,hi41,gradhi,g
#endif
use utils_gr, only:get_bigv
use radiation_utils, only:get_rad_R
use omp_cache, only:maxcellcache
integer, intent(in) :: i
logical, intent(in) :: iamgasi,iamdusti
real, intent(in) :: xpartveci(:)
Expand Down
6 changes: 6 additions & 0 deletions src/main/memory.F90
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ subroutine allocate_memory(n, part_only)
use allocutils, only:nbytes_allocated,bytes2human
use part, only:allocate_part
use linklist, only:allocate_linklist,ifirstincell
use omp_cache, only:allocate_cache
#ifdef PHOTO
use photoevap, only:allocate_photoevap
#endif
Expand Down Expand Up @@ -83,6 +84,8 @@ subroutine allocate_memory(n, part_only)
#endif
endif

call allocate_cache

call bytes2human(nbytes_allocated, sizestring)
if (nprocs == 1) then
write(iprint, '(a)') '------------------------------------------------------------'
Expand All @@ -102,6 +105,7 @@ subroutine deallocate_memory(part_only)
use photoevap, only:deallocate_photoevap
#endif
use allocutils, only:nbytes_allocated
use omp_cache, only:deallocate_cache

logical, optional, intent(in) :: part_only
logical :: part_only_
Expand All @@ -120,6 +124,8 @@ subroutine deallocate_memory(part_only)
#endif
endif

call deallocate_cache

nbytes_allocated = 0
call update_max_sizes(0)

Expand Down
39 changes: 39 additions & 0 deletions src/main/omp_cache.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
!--------------------------------------------------------------------------!
! The Phantom Smoothed Particle Hydrodynamics code, by Daniel Price et al. !
! Copyright (c) 2007-2022 The Authors (see AUTHORS) !
! See LICENCE file for usage and distribution conditions !
! http://phantomsph.bitbucket.io/ !
!--------------------------------------------------------------------------!
module omp_cache

implicit none

integer, parameter :: maxcellcache = 50000

real, allocatable :: ompcache(:,:,:)

public :: allocate_cache
public :: deallocate_cache
public :: ompcache
public :: maxcellcache

private

contains

subroutine allocate_cache
use allocutils, only:allocate_array
integer :: OMP_GET_MAX_THREADS, nthread

nthread = OMP_GET_MAX_THREADS()
call allocate_array('ompcache', ompcache, maxcellcache, 4, nthread)

end subroutine allocate_cache

subroutine deallocate_cache

if (allocated(ompcache)) deallocate(ompcache)

end subroutine deallocate_cache

end module omp_cache

0 comments on commit b296377

Please sign in to comment.