From f6b5013210a63881d0f9077b9677bb7c3d5bcd8e Mon Sep 17 00:00:00 2001
From: Pedro Costa
Date: Mon, 15 Jan 2024 15:59:48 +0100
Subject: [PATCH] Incorporate pressure extrapolation inside `mom` subroutines
directly to save memory.
---
src/main.f90 | 34 +++++++++++++++++++---------------
src/mom.f90 | 45 +++++++++++++++++++++++++++------------------
src/rk.f90 | 5 +++--
3 files changed, 49 insertions(+), 35 deletions(-)
diff --git a/src/main.f90 b/src/main.f90
index 68ddcdf..1d9c4a9 100644
--- a/src/main.f90
+++ b/src/main.f90
@@ -78,7 +78,7 @@ program cans
use mod_common_cudecomp, only: istream_acc_queue_1
#endif
use mod_timer , only: timer_tic,timer_toc,timer_print
- use mod_updatep , only: updatep,extrapl_p
+ use mod_updatep , only: updatep
use mod_utils , only: bulk_mean,bulk_mean_12
!@acc use mod_utils , only: device_memory_footprint
use mod_types
@@ -150,8 +150,11 @@ program cans
v( 0:n(1)+1,0:n(2)+1,0:n(3)+1), &
w( 0:n(1)+1,0:n(2)+1,0:n(3)+1), &
p( 0:n(1)+1,0:n(2)+1,0:n(3)+1), &
- pp(0:n(1)+1,0:n(2)+1,0:n(3)+1), &
- po(0:n(1)+1,0:n(2)+1,0:n(3)+1))
+ pp(0:n(1)+1,0:n(2)+1,0:n(3)+1))
+#if !defined(_CONSTANT_COEFFS_POISSON)
+ allocate(po,mold=pp)
+ po(:,:,:) = 0._rp
+#endif
#if defined(_SCALAR)
allocate(s,mold=pp)
#endif
@@ -275,7 +278,6 @@ program cans
time = 0.
!$acc update self(zc,dzc,dzf)
call initflow(inivel,bcvel,ng,lo,l,dl,zc,zf,dzc,dzf,rho12(1),mu12(1),bforce,is_wallturb,time,u,v,w,p)
- po(:,:,:) = p(:,:,:)
#if defined(_SCALAR)
call initscal(inisca,bcsca,ng,lo,l,dl,dzf,zc,s)
#endif
@@ -294,7 +296,7 @@ program cans
end if
call acdi_set_gamma(n,acdi_gam_factor,u,v,w,gam)
if(myid == 0) print*, 'Gamma = ', gam, 'Epsilon = ', seps
- !$acc enter data copyin(u,v,w,p) create(pp,po)
+ !$acc enter data copyin(u,v,w,p) create(pp)
call bounduvw(cbcvel,n,bcvel,nb,is_bound,.false.,dl,dzc,dzf,u,v,w)
call boundp(cbcpre,n,bcpre,nb,is_bound,dl,dzc,p)
#if defined(_SCALAR)
@@ -350,10 +352,6 @@ program cans
!$acc update device(u,v,w,p)
call bounduvw(cbcvel,n,bcvel,nb,is_bound,.false.,dl,dzc,dzf,u,v,w)
else
-#if defined(_CONSTANT_COEFFS_POISSON)
- call extrapl_p(dt,dto,p,po,pp)
- call boundp(cbcpre,n,bcpre,nb,is_bound,dl,dzc,pp)
-#endif
rho_av = 0.
if(any(abs(gacc(:))>0. .and. cbcpre(0,:)//cbcpre(1,:) == 'PP')) then
call bulk_mean_12(n,grid_vol_ratio_c,psi,rho12,rho_av)
@@ -362,15 +360,21 @@ program cans
bforce,gacc,sigma,rho_av,rho12,mu12,beta12,rho0,psi,kappa,s, &
p,pp,u,v,w)
call bounduvw(cbcvel,n,bcvel,nb,is_bound,.false.,dl,dzc,dzf,u,v,w)
- call fillps(n,dli,dzfi,dti,u,v,w,pp)
#if defined(_CONSTANT_COEFFS_POISSON)
- call updt_rhs_b(['c','c','c'],cbcpre,n,is_bound,rhsbp%x,rhsbp%y,rhsbp%z,pp)
- call solver(n,ng,arrplanp,normfftp,lambdaxyp,ap,bp,cp,cbcpre,['c','c','c'],pp)
+ !$acc kernels
+ pp(:,:,:) = p(:,:,:)
+ !$acc end kernels
+ call boundp(cbcpre,n,bcpre,nb,is_bound,dl,dzc,pp)
+#endif
+ call fillps(n,dli,dzfi,dti,u,v,w,p)
+#if defined(_CONSTANT_COEFFS_POISSON)
+ call updt_rhs_b(['c','c','c'],cbcpre,n,is_bound,rhsbp%x,rhsbp%y,rhsbp%z,p)
+ call solver(n,ng,arrplanp,normfftp,lambdaxyp,ap,bp,cp,cbcpre,['c','c','c'],p)
#else
- call solver_vc(ng,lo,hi,cbcpre,bcpre,dli,dzci,dzfi,is_bound,rho12,psi,pp,po)
+ call solver_vc(ng,lo,hi,cbcpre,bcpre,dli,dzci,dzfi,is_bound,rho12,psi,p,po)
#endif
- call boundp(cbcpre,n,bcpre,nb,is_bound,dl,dzc,pp)
- call correc(n,dli,dzci,rho0,rho12,dt,pp,psi,u,v,w)
+ call boundp(cbcpre,n,bcpre,nb,is_bound,dl,dzc,p)
+ call correc(n,dli,dzci,rho0,rho12,dt,p,psi,u,v,w)
call bounduvw(cbcvel,n,bcvel,nb,is_bound,.true.,dl,dzc,dzf,u,v,w)
call updatep(pp,p)
call boundp(cbcpre,n,bcpre,nb,is_bound,dl,dzc,p)
diff --git a/src/mom.f90 b/src/mom.f90
index 2542861..2b871a6 100644
--- a/src/mom.f90
+++ b/src/mom.f90
@@ -254,10 +254,10 @@ subroutine momz_d(nx,ny,nz,dxi,dyi,dzci,dzfi,rho12,mu12,psi,u,v,w,dwdt)
end do
end subroutine momz_d
!
- subroutine momx_p(nx,ny,nz,dxi,bforce,gacc,rho0,rho_av,rho12,psi,p,pp,dudt)
+ subroutine momx_p(nx,ny,nz,dxi,dt_r,bforce,gacc,rho0,rho_av,rho12,psi,p,pp,dudt)
implicit none
integer , intent(in) :: nx,ny,nz
- real(rp), intent(in) :: dxi
+ real(rp), intent(in) :: dxi,dt_r
real(rp), intent(in) :: bforce,gacc,rho0,rho_av,rho12(2)
real(rp), dimension(0:,0:,0:), intent(in ) :: psi,p,pp
real(rp), dimension( :, :, :), intent(inout) :: dudt
@@ -276,7 +276,9 @@ subroutine momx_p(nx,ny,nz,dxi,bforce,gacc,rho0,rho_av,rho12,psi,p,pp,dudt)
!
dudt(i,j,k) = dudt(i,j,k) + bforce/rhop + gacc*(1.-rho_av/rhop) + &
#if defined(_CONSTANT_COEFFS_POISSON)
- dpdl/rho0 + (1./rhop-1./rho0)*(pp(i+1,j,k)-pp(i,j,k))*dxi
+ dpdl/rho0 + (1./rhop-1./rho0)*( ((1.+dt_r)*p(i+1,j,k)+dt_r*(pp(i+1,j,k))) - &
+ ((1.+dt_r)*p(i ,j,k)+dt_r*(pp(i ,j,k))) &
+ )*dxi
#else
dpdl/rhop
#endif
@@ -285,10 +287,10 @@ subroutine momx_p(nx,ny,nz,dxi,bforce,gacc,rho0,rho_av,rho12,psi,p,pp,dudt)
end do
end subroutine momx_p
!
- subroutine momy_p(nx,ny,nz,dyi,bforce,gacc,rho0,rho_av,rho12,psi,p,pp,dvdt)
+ subroutine momy_p(nx,ny,nz,dyi,dt_r,bforce,gacc,rho0,rho_av,rho12,psi,p,pp,dvdt)
implicit none
integer , intent(in) :: nx,ny,nz
- real(rp), intent(in) :: dyi
+ real(rp), intent(in) :: dyi,dt_r
real(rp), intent(in) :: bforce,gacc,rho0,rho_av,rho12(2)
real(rp), dimension(0:,0:,0:), intent(in ) :: psi,p,pp
real(rp), dimension( :, :, :), intent(inout) :: dvdt
@@ -307,7 +309,9 @@ subroutine momy_p(nx,ny,nz,dyi,bforce,gacc,rho0,rho_av,rho12,psi,p,pp,dvdt)
!
dvdt(i,j,k) = dvdt(i,j,k) + bforce/rhop + gacc*(1.-rho_av/rhop) + &
#if defined(_CONSTANT_COEFFS_POISSON)
- dpdl/rho0 + (1./rhop-1./rho0)*(pp(i,j+1,k)-pp(i,j,k))*dyi
+ dpdl/rho0 + (1./rhop-1./rho0)*( ((1.+dt_r)*p(i,j+1,k)+dt_r*(pp(i,j+1,k))) - &
+ ((1.+dt_r)*p(i,j ,k)+dt_r*(pp(i,j ,k))) &
+ )*dyi
#else
dpdl/rhop
#endif
@@ -316,10 +320,11 @@ subroutine momy_p(nx,ny,nz,dyi,bforce,gacc,rho0,rho_av,rho12,psi,p,pp,dvdt)
end do
end subroutine momy_p
!
- subroutine momz_p(nx,ny,nz,dzci,bforce,gacc,rho0,rho_av,rho12,psi,p,pp,dwdt)
+ subroutine momz_p(nx,ny,nz,dzci,dt_r,bforce,gacc,rho0,rho_av,rho12,psi,p,pp,dwdt)
implicit none
integer , intent(in) :: nx,ny,nz
real(rp), intent(in), dimension(0:) :: dzci
+ real(rp), intent(in) :: dt_r
real(rp), intent(in) :: bforce,gacc,rho0,rho_av,rho12(2)
real(rp), dimension(0:,0:,0:), intent(in ) :: psi,p,pp
real(rp), dimension( :, :, :), intent(inout) :: dwdt
@@ -338,7 +343,9 @@ subroutine momz_p(nx,ny,nz,dzci,bforce,gacc,rho0,rho_av,rho12,psi,p,pp,dwdt)
!
dwdt(i,j,k) = dwdt(i,j,k) + bforce/rhop + gacc*(1.-rho_av/rhop) + &
#if defined(_CONSTANT_COEFFS_POISSON)
- dpdl/rho0 + (1./rhop-1./rho0)*(pp(i,j,k+1)-pp(i,j,k))*dzci(k)
+ dpdl/rho0 + (1./rhop-1./rho0)*( ((1.+dt_r)*p(i,j,k+1)+dt_r*(pp(i,j,k+1))) - &
+ ((1.+dt_r)*p(i,j,k )+dt_r*(pp(i,j,k ))) &
+ )*dzci(k)
#else
dpdl/rhop
#endif
@@ -714,12 +721,13 @@ subroutine mom_xyz_ad(nx,ny,nz,dxi,dyi,dzci,dzfi,rho12,mu12, &
end do
end subroutine mom_xyz_ad
!
- subroutine mom_xyz_oth(nx,ny,nz,dxi,dyi,dzci,rho12,beta12,bforce,gacc,sigma,rho0,rho_av, &
+ subroutine mom_xyz_oth(nx,ny,nz,dxi,dyi,dzci,dt_r,rho12,beta12,bforce,gacc,sigma,rho0,rho_av, &
p,pp,psi,kappa,s,dudt,dvdt,dwdt)
implicit none
integer , intent(in) :: nx,ny,nz
real(rp), intent(in) :: dxi,dyi
real(rp), intent(in), dimension(0:) :: dzci
+ real(rp), intent(in) :: dt_r
real(rp), intent(in), dimension(2) :: rho12,beta12
real(rp), intent(in), dimension(3) :: bforce,gacc
real(rp), intent(in) :: sigma
@@ -754,10 +762,10 @@ subroutine mom_xyz_oth(nx,ny,nz,dxi,dyi,dzci,rho12,beta12,bforce,gacc,sigma,rho0
p_cpc = p(i ,j+1,k )
p_ccp = p(i ,j ,k+1)
!
- q_ccc = pp(i ,j ,k )
- q_pcc = pp(i+1,j ,k )
- q_cpc = pp(i ,j+1,k )
- q_ccp = pp(i ,j ,k+1)
+ q_ccc = (1.+dt_r)*p(i ,j ,k )-dt_r*pp(i ,j ,k )
+ q_pcc = (1.+dt_r)*p(i+1,j ,k )-dt_r*pp(i+1,j ,k )
+ q_cpc = (1.+dt_r)*p(i ,j+1,k )-dt_r*pp(i ,j+1,k )
+ q_ccp = (1.+dt_r)*p(i ,j ,k+1)-dt_r*pp(i ,j ,k+1)
!
#if defined(_SCALAR) && defined(_BOUSSINESQ_BUOYANCY)
s_ccc = s(i ,j ,k )
@@ -838,12 +846,13 @@ subroutine mom_xyz_oth(nx,ny,nz,dxi,dyi,dzci,rho12,beta12,bforce,gacc,sigma,rho0
end do
end subroutine mom_xyz_oth
!
- subroutine mom_xyz_all(nx,ny,nz,dxi,dyi,dzci,dzfi,rho12,mu12,beta12,bforce,gacc,sigma,rho0,rho_av, &
+ subroutine mom_xyz_all(nx,ny,nz,dxi,dyi,dzci,dzfi,dt_r,rho12,mu12,beta12,bforce,gacc,sigma,rho0,rho_av, &
u,v,w,p,pp,psi,kappa,s,dudt,dvdt,dwdt)
implicit none
integer , intent(in) :: nx,ny,nz
real(rp), intent(in) :: dxi,dyi
real(rp), intent(in), dimension(0:) :: dzci,dzfi
+ real(rp), intent(in) :: dt_r
real(rp), intent(in), dimension(2) :: rho12,mu12,beta12
real(rp), intent(in), dimension(3) :: bforce,gacc
real(rp), intent(in) :: sigma
@@ -927,10 +936,10 @@ subroutine mom_xyz_all(nx,ny,nz,dxi,dyi,dzci,dzfi,rho12,mu12,beta12,bforce,gacc,
p_cpc = p(i ,j+1,k )
p_ccp = p(i ,j ,k+1)
!
- q_ccc = pp(i ,j ,k )
- q_pcc = pp(i+1,j ,k )
- q_cpc = pp(i ,j+1,k )
- q_ccp = pp(i ,j ,k+1)
+ q_ccc = (1.+dt_r)*p(i ,j ,k )-dt_r*pp(i ,j ,k )
+ q_pcc = (1.+dt_r)*p(i+1,j ,k )-dt_r*pp(i+1,j ,k )
+ q_cpc = (1.+dt_r)*p(i ,j+1,k )-dt_r*pp(i ,j+1,k )
+ q_ccp = (1.+dt_r)*p(i ,j ,k+1)-dt_r*pp(i ,j ,k+1)
!
#if defined(_SCALAR) && defined(_BOUSSINESQ_BUOYANCY)
s_ccc = s(i ,j ,k )
diff --git a/src/rk.f90 b/src/rk.f90
index 6db6c34..66b2e07 100644
--- a/src/rk.f90
+++ b/src/rk.f90
@@ -37,12 +37,13 @@ subroutine rk(rkpar,n,dli,dzci,dzfi,dt, &
real(rp), pointer , contiguous , dimension(:,:,:), save :: dudtrk ,dvdtrk ,dwdtrk , &
dudtrko ,dvdtrko ,dwdtrko
logical, save :: is_first = .true.
- real(rp) :: factor1,factor2,factor12
+ real(rp) :: factor1,factor2,factor12,dt_r
integer :: i,j,k
!
factor1 = rkpar(1)*dt
factor2 = rkpar(2)*dt
factor12 = factor1+factor2
+ dt_r = -2.*rkpar(2) ! N.B.: For AB2 only, rkpar(2) = -(dt/dto)/2 = dt_r/2
!
! initialization
!
@@ -96,7 +97,7 @@ subroutine rk(rkpar,n,dli,dzci,dzfi,dt, &
!
! pressure, surface tension, and buoyancy terms
!
- call mom_xyz_oth(n(1),n(2),n(3),dli(1),dli(2),dzci,rho12,beta12,bforce,gacc,sigma,rho0,rho_av, &
+ call mom_xyz_oth(n(1),n(2),n(3),dli(1),dli(2),dzci,dt_r,rho12,beta12,bforce,gacc,sigma,rho0,rho_av, &
p,pp,psi,kappa,s,dudtrk,dvdtrk,dwdtrk)
!
!$acc parallel loop collapse(3) default(present) async(1)