From f6b5013210a63881d0f9077b9677bb7c3d5bcd8e Mon Sep 17 00:00:00 2001 From: Pedro Costa Date: Mon, 15 Jan 2024 15:59:48 +0100 Subject: [PATCH] Incorporate pressure extrapolation inside `mom` subroutines directly to save memory. --- src/main.f90 | 34 +++++++++++++++++++--------------- src/mom.f90 | 45 +++++++++++++++++++++++++++------------------ src/rk.f90 | 5 +++-- 3 files changed, 49 insertions(+), 35 deletions(-) diff --git a/src/main.f90 b/src/main.f90 index 68ddcdf..1d9c4a9 100644 --- a/src/main.f90 +++ b/src/main.f90 @@ -78,7 +78,7 @@ program cans use mod_common_cudecomp, only: istream_acc_queue_1 #endif use mod_timer , only: timer_tic,timer_toc,timer_print - use mod_updatep , only: updatep,extrapl_p + use mod_updatep , only: updatep use mod_utils , only: bulk_mean,bulk_mean_12 !@acc use mod_utils , only: device_memory_footprint use mod_types @@ -150,8 +150,11 @@ program cans v( 0:n(1)+1,0:n(2)+1,0:n(3)+1), & w( 0:n(1)+1,0:n(2)+1,0:n(3)+1), & p( 0:n(1)+1,0:n(2)+1,0:n(3)+1), & - pp(0:n(1)+1,0:n(2)+1,0:n(3)+1), & - po(0:n(1)+1,0:n(2)+1,0:n(3)+1)) + pp(0:n(1)+1,0:n(2)+1,0:n(3)+1)) +#if !defined(_CONSTANT_COEFFS_POISSON) + allocate(po,mold=pp) + po(:,:,:) = 0._rp +#endif #if defined(_SCALAR) allocate(s,mold=pp) #endif @@ -275,7 +278,6 @@ program cans time = 0. !$acc update self(zc,dzc,dzf) call initflow(inivel,bcvel,ng,lo,l,dl,zc,zf,dzc,dzf,rho12(1),mu12(1),bforce,is_wallturb,time,u,v,w,p) - po(:,:,:) = p(:,:,:) #if defined(_SCALAR) call initscal(inisca,bcsca,ng,lo,l,dl,dzf,zc,s) #endif @@ -294,7 +296,7 @@ program cans end if call acdi_set_gamma(n,acdi_gam_factor,u,v,w,gam) if(myid == 0) print*, 'Gamma = ', gam, 'Epsilon = ', seps - !$acc enter data copyin(u,v,w,p) create(pp,po) + !$acc enter data copyin(u,v,w,p) create(pp) call bounduvw(cbcvel,n,bcvel,nb,is_bound,.false.,dl,dzc,dzf,u,v,w) call boundp(cbcpre,n,bcpre,nb,is_bound,dl,dzc,p) #if defined(_SCALAR) @@ -350,10 +352,6 @@ program cans !$acc update device(u,v,w,p) call bounduvw(cbcvel,n,bcvel,nb,is_bound,.false.,dl,dzc,dzf,u,v,w) else -#if defined(_CONSTANT_COEFFS_POISSON) - call extrapl_p(dt,dto,p,po,pp) - call boundp(cbcpre,n,bcpre,nb,is_bound,dl,dzc,pp) -#endif rho_av = 0. if(any(abs(gacc(:))>0. .and. cbcpre(0,:)//cbcpre(1,:) == 'PP')) then call bulk_mean_12(n,grid_vol_ratio_c,psi,rho12,rho_av) @@ -362,15 +360,21 @@ program cans bforce,gacc,sigma,rho_av,rho12,mu12,beta12,rho0,psi,kappa,s, & p,pp,u,v,w) call bounduvw(cbcvel,n,bcvel,nb,is_bound,.false.,dl,dzc,dzf,u,v,w) - call fillps(n,dli,dzfi,dti,u,v,w,pp) #if defined(_CONSTANT_COEFFS_POISSON) - call updt_rhs_b(['c','c','c'],cbcpre,n,is_bound,rhsbp%x,rhsbp%y,rhsbp%z,pp) - call solver(n,ng,arrplanp,normfftp,lambdaxyp,ap,bp,cp,cbcpre,['c','c','c'],pp) + !$acc kernels + pp(:,:,:) = p(:,:,:) + !$acc end kernels + call boundp(cbcpre,n,bcpre,nb,is_bound,dl,dzc,pp) +#endif + call fillps(n,dli,dzfi,dti,u,v,w,p) +#if defined(_CONSTANT_COEFFS_POISSON) + call updt_rhs_b(['c','c','c'],cbcpre,n,is_bound,rhsbp%x,rhsbp%y,rhsbp%z,p) + call solver(n,ng,arrplanp,normfftp,lambdaxyp,ap,bp,cp,cbcpre,['c','c','c'],p) #else - call solver_vc(ng,lo,hi,cbcpre,bcpre,dli,dzci,dzfi,is_bound,rho12,psi,pp,po) + call solver_vc(ng,lo,hi,cbcpre,bcpre,dli,dzci,dzfi,is_bound,rho12,psi,p,po) #endif - call boundp(cbcpre,n,bcpre,nb,is_bound,dl,dzc,pp) - call correc(n,dli,dzci,rho0,rho12,dt,pp,psi,u,v,w) + call boundp(cbcpre,n,bcpre,nb,is_bound,dl,dzc,p) + call correc(n,dli,dzci,rho0,rho12,dt,p,psi,u,v,w) call bounduvw(cbcvel,n,bcvel,nb,is_bound,.true.,dl,dzc,dzf,u,v,w) call updatep(pp,p) call boundp(cbcpre,n,bcpre,nb,is_bound,dl,dzc,p) diff --git a/src/mom.f90 b/src/mom.f90 index 2542861..2b871a6 100644 --- a/src/mom.f90 +++ b/src/mom.f90 @@ -254,10 +254,10 @@ subroutine momz_d(nx,ny,nz,dxi,dyi,dzci,dzfi,rho12,mu12,psi,u,v,w,dwdt) end do end subroutine momz_d ! - subroutine momx_p(nx,ny,nz,dxi,bforce,gacc,rho0,rho_av,rho12,psi,p,pp,dudt) + subroutine momx_p(nx,ny,nz,dxi,dt_r,bforce,gacc,rho0,rho_av,rho12,psi,p,pp,dudt) implicit none integer , intent(in) :: nx,ny,nz - real(rp), intent(in) :: dxi + real(rp), intent(in) :: dxi,dt_r real(rp), intent(in) :: bforce,gacc,rho0,rho_av,rho12(2) real(rp), dimension(0:,0:,0:), intent(in ) :: psi,p,pp real(rp), dimension( :, :, :), intent(inout) :: dudt @@ -276,7 +276,9 @@ subroutine momx_p(nx,ny,nz,dxi,bforce,gacc,rho0,rho_av,rho12,psi,p,pp,dudt) ! dudt(i,j,k) = dudt(i,j,k) + bforce/rhop + gacc*(1.-rho_av/rhop) + & #if defined(_CONSTANT_COEFFS_POISSON) - dpdl/rho0 + (1./rhop-1./rho0)*(pp(i+1,j,k)-pp(i,j,k))*dxi + dpdl/rho0 + (1./rhop-1./rho0)*( ((1.+dt_r)*p(i+1,j,k)+dt_r*(pp(i+1,j,k))) - & + ((1.+dt_r)*p(i ,j,k)+dt_r*(pp(i ,j,k))) & + )*dxi #else dpdl/rhop #endif @@ -285,10 +287,10 @@ subroutine momx_p(nx,ny,nz,dxi,bforce,gacc,rho0,rho_av,rho12,psi,p,pp,dudt) end do end subroutine momx_p ! - subroutine momy_p(nx,ny,nz,dyi,bforce,gacc,rho0,rho_av,rho12,psi,p,pp,dvdt) + subroutine momy_p(nx,ny,nz,dyi,dt_r,bforce,gacc,rho0,rho_av,rho12,psi,p,pp,dvdt) implicit none integer , intent(in) :: nx,ny,nz - real(rp), intent(in) :: dyi + real(rp), intent(in) :: dyi,dt_r real(rp), intent(in) :: bforce,gacc,rho0,rho_av,rho12(2) real(rp), dimension(0:,0:,0:), intent(in ) :: psi,p,pp real(rp), dimension( :, :, :), intent(inout) :: dvdt @@ -307,7 +309,9 @@ subroutine momy_p(nx,ny,nz,dyi,bforce,gacc,rho0,rho_av,rho12,psi,p,pp,dvdt) ! dvdt(i,j,k) = dvdt(i,j,k) + bforce/rhop + gacc*(1.-rho_av/rhop) + & #if defined(_CONSTANT_COEFFS_POISSON) - dpdl/rho0 + (1./rhop-1./rho0)*(pp(i,j+1,k)-pp(i,j,k))*dyi + dpdl/rho0 + (1./rhop-1./rho0)*( ((1.+dt_r)*p(i,j+1,k)+dt_r*(pp(i,j+1,k))) - & + ((1.+dt_r)*p(i,j ,k)+dt_r*(pp(i,j ,k))) & + )*dyi #else dpdl/rhop #endif @@ -316,10 +320,11 @@ subroutine momy_p(nx,ny,nz,dyi,bforce,gacc,rho0,rho_av,rho12,psi,p,pp,dvdt) end do end subroutine momy_p ! - subroutine momz_p(nx,ny,nz,dzci,bforce,gacc,rho0,rho_av,rho12,psi,p,pp,dwdt) + subroutine momz_p(nx,ny,nz,dzci,dt_r,bforce,gacc,rho0,rho_av,rho12,psi,p,pp,dwdt) implicit none integer , intent(in) :: nx,ny,nz real(rp), intent(in), dimension(0:) :: dzci + real(rp), intent(in) :: dt_r real(rp), intent(in) :: bforce,gacc,rho0,rho_av,rho12(2) real(rp), dimension(0:,0:,0:), intent(in ) :: psi,p,pp real(rp), dimension( :, :, :), intent(inout) :: dwdt @@ -338,7 +343,9 @@ subroutine momz_p(nx,ny,nz,dzci,bforce,gacc,rho0,rho_av,rho12,psi,p,pp,dwdt) ! dwdt(i,j,k) = dwdt(i,j,k) + bforce/rhop + gacc*(1.-rho_av/rhop) + & #if defined(_CONSTANT_COEFFS_POISSON) - dpdl/rho0 + (1./rhop-1./rho0)*(pp(i,j,k+1)-pp(i,j,k))*dzci(k) + dpdl/rho0 + (1./rhop-1./rho0)*( ((1.+dt_r)*p(i,j,k+1)+dt_r*(pp(i,j,k+1))) - & + ((1.+dt_r)*p(i,j,k )+dt_r*(pp(i,j,k ))) & + )*dzci(k) #else dpdl/rhop #endif @@ -714,12 +721,13 @@ subroutine mom_xyz_ad(nx,ny,nz,dxi,dyi,dzci,dzfi,rho12,mu12, & end do end subroutine mom_xyz_ad ! - subroutine mom_xyz_oth(nx,ny,nz,dxi,dyi,dzci,rho12,beta12,bforce,gacc,sigma,rho0,rho_av, & + subroutine mom_xyz_oth(nx,ny,nz,dxi,dyi,dzci,dt_r,rho12,beta12,bforce,gacc,sigma,rho0,rho_av, & p,pp,psi,kappa,s,dudt,dvdt,dwdt) implicit none integer , intent(in) :: nx,ny,nz real(rp), intent(in) :: dxi,dyi real(rp), intent(in), dimension(0:) :: dzci + real(rp), intent(in) :: dt_r real(rp), intent(in), dimension(2) :: rho12,beta12 real(rp), intent(in), dimension(3) :: bforce,gacc real(rp), intent(in) :: sigma @@ -754,10 +762,10 @@ subroutine mom_xyz_oth(nx,ny,nz,dxi,dyi,dzci,rho12,beta12,bforce,gacc,sigma,rho0 p_cpc = p(i ,j+1,k ) p_ccp = p(i ,j ,k+1) ! - q_ccc = pp(i ,j ,k ) - q_pcc = pp(i+1,j ,k ) - q_cpc = pp(i ,j+1,k ) - q_ccp = pp(i ,j ,k+1) + q_ccc = (1.+dt_r)*p(i ,j ,k )-dt_r*pp(i ,j ,k ) + q_pcc = (1.+dt_r)*p(i+1,j ,k )-dt_r*pp(i+1,j ,k ) + q_cpc = (1.+dt_r)*p(i ,j+1,k )-dt_r*pp(i ,j+1,k ) + q_ccp = (1.+dt_r)*p(i ,j ,k+1)-dt_r*pp(i ,j ,k+1) ! #if defined(_SCALAR) && defined(_BOUSSINESQ_BUOYANCY) s_ccc = s(i ,j ,k ) @@ -838,12 +846,13 @@ subroutine mom_xyz_oth(nx,ny,nz,dxi,dyi,dzci,rho12,beta12,bforce,gacc,sigma,rho0 end do end subroutine mom_xyz_oth ! - subroutine mom_xyz_all(nx,ny,nz,dxi,dyi,dzci,dzfi,rho12,mu12,beta12,bforce,gacc,sigma,rho0,rho_av, & + subroutine mom_xyz_all(nx,ny,nz,dxi,dyi,dzci,dzfi,dt_r,rho12,mu12,beta12,bforce,gacc,sigma,rho0,rho_av, & u,v,w,p,pp,psi,kappa,s,dudt,dvdt,dwdt) implicit none integer , intent(in) :: nx,ny,nz real(rp), intent(in) :: dxi,dyi real(rp), intent(in), dimension(0:) :: dzci,dzfi + real(rp), intent(in) :: dt_r real(rp), intent(in), dimension(2) :: rho12,mu12,beta12 real(rp), intent(in), dimension(3) :: bforce,gacc real(rp), intent(in) :: sigma @@ -927,10 +936,10 @@ subroutine mom_xyz_all(nx,ny,nz,dxi,dyi,dzci,dzfi,rho12,mu12,beta12,bforce,gacc, p_cpc = p(i ,j+1,k ) p_ccp = p(i ,j ,k+1) ! - q_ccc = pp(i ,j ,k ) - q_pcc = pp(i+1,j ,k ) - q_cpc = pp(i ,j+1,k ) - q_ccp = pp(i ,j ,k+1) + q_ccc = (1.+dt_r)*p(i ,j ,k )-dt_r*pp(i ,j ,k ) + q_pcc = (1.+dt_r)*p(i+1,j ,k )-dt_r*pp(i+1,j ,k ) + q_cpc = (1.+dt_r)*p(i ,j+1,k )-dt_r*pp(i ,j+1,k ) + q_ccp = (1.+dt_r)*p(i ,j ,k+1)-dt_r*pp(i ,j ,k+1) ! #if defined(_SCALAR) && defined(_BOUSSINESQ_BUOYANCY) s_ccc = s(i ,j ,k ) diff --git a/src/rk.f90 b/src/rk.f90 index 6db6c34..66b2e07 100644 --- a/src/rk.f90 +++ b/src/rk.f90 @@ -37,12 +37,13 @@ subroutine rk(rkpar,n,dli,dzci,dzfi,dt, & real(rp), pointer , contiguous , dimension(:,:,:), save :: dudtrk ,dvdtrk ,dwdtrk , & dudtrko ,dvdtrko ,dwdtrko logical, save :: is_first = .true. - real(rp) :: factor1,factor2,factor12 + real(rp) :: factor1,factor2,factor12,dt_r integer :: i,j,k ! factor1 = rkpar(1)*dt factor2 = rkpar(2)*dt factor12 = factor1+factor2 + dt_r = -2.*rkpar(2) ! N.B.: For AB2 only, rkpar(2) = -(dt/dto)/2 = dt_r/2 ! ! initialization ! @@ -96,7 +97,7 @@ subroutine rk(rkpar,n,dli,dzci,dzfi,dt, & ! ! pressure, surface tension, and buoyancy terms ! - call mom_xyz_oth(n(1),n(2),n(3),dli(1),dli(2),dzci,rho12,beta12,bforce,gacc,sigma,rho0,rho_av, & + call mom_xyz_oth(n(1),n(2),n(3),dli(1),dli(2),dzci,dt_r,rho12,beta12,bforce,gacc,sigma,rho0,rho_av, & p,pp,psi,kappa,s,dudtrk,dvdtrk,dwdtrk) ! !$acc parallel loop collapse(3) default(present) async(1)