Skip to content

Commit

Permalink
Merge branch 'worleyph/cam/pdf_closure_call_opt' (PR #1033)
Browse files Browse the repository at this point in the history
Working around implicit array copies in CLUBB subroutine calls

In the routine advance_clubb_core
(in cam/src/physics/clubb/advance_clubb_core_module.F90) there are loops
of the form:

do k = 1, gr%nz, 1
call pdf_closure &
(...
zm2zt( wpthlp, k ), rtpthlp_zt(k), sclrm(k,:), & ! intent(in)
wpsclrp_zt(k,:), sclrp2_zt(k,:), sclrprtp_zt(k,:),& ! intent(in)
sclrpthlp_zt(k,:), k, & ! intent(in)
wphydrometp_zt(k,:), wp2hmp(k,:), & ! intent(in)
...
rtphmp_zt(k,:), thlphmp_zt(k,:), & ! intent(in)
wpsclrprtp(k,:), wpsclrp2(k,:), sclrpthvp_zt(k,:),& ! intent(out)
wpsclrpthlp(k,:), sclrprcp_zt(k,:), wp2sclrp(k,:),& ! intent(out)
...
)

Each of the 15 arrays of the form XXX(k,:) is declared internally as
an array of size XXX(:), and the compilers apparently are creating
local temporaries and copying into and out of these. This is pretty
low level (being inside loops over first chunks, then local columns,
and then nadv).

Explicitly allocating temporary arrays of the correct dimensions and
copying into (for intent(in)) and out of (for intent(out)) external to
the call to pdf_closure improves performance.

For the Intel compiler on Titan, this drops the cost by around 15%.
For the PGI compiler on Titan, this decreases the cost by a factor of
6.

This modification only modifies two of the loops containing calls to
pdf_closure, as these are the only two that are exercized in the
current ACME test cases. There are two others that should be modified in
analogous ways if l_use_ice_latent is true.

Fixes #1031
[BFB]

* worleyph/cam/pdf_closure_call_opt:
  Rearranged code so that it passes Skybridge testing
  Added an if condition for fixing ONE TRIP behavior of some compilers.
  Working around implicit array segment copies
  • Loading branch information
singhbalwinder committed Feb 23, 2017
2 parents 5933a33 + ada7b79 commit 89bcfd7
Showing 1 changed file with 93 additions and 14 deletions.
107 changes: 93 additions & 14 deletions components/cam/src/physics/clubb/advance_clubb_core_module.F90
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,40 @@ subroutine advance_clubb_core &

real( kind = core_rknd ) :: Lscale_max

! work arrays used to improve performance of calls to pdf_closure
real( kind = core_rknd ), dimension(sclr_dim) :: &
tmp_sclrm, &
tmp_wpsclrp_zt, &
tmp_sclrp2_zt, &
tmp_sclrprtp_zt, &
tmp_sclrpthlp_zt, &
tmp_wphydrometp_zt, &
tmp_wp2hmp, &
tmp_rtphmp_zt, &
tmp_thlphmp_zt, &
tmp_wpsclrprtp, &
tmp_wpsclrp2, &
tmp_sclrpthvp_zt, &
tmp_wpsclrpthlp, &
tmp_sclrprcp_zt, &
tmp_wp2sclrp, &
!
tmp_sclrm_zm, &
tmp_wpsclrp, &
tmp_sclrp2, &
tmp_sclrprtp, &
tmp_sclrpthlp, &
tmp_wphydrometp, &
tmp_wp2hmp_zm, &
tmp_rtphmp, &
tmp_thlphmp, &
tmp_wpsclrprtp_zm, &
tmp_wpsclrp2_zm, &
tmp_sclrpthvp, &
tmp_wpsclrpthlp_zm, &
tmp_sclrprcp, &
tmp_wp2sclrp_zm

!----- Begin Code -----

! Determine the maximum allowable value for Lscale (in meters).
Expand Down Expand Up @@ -1010,28 +1044,41 @@ subroutine advance_clubb_core &

do k = 1, gr%nz, 1

! To avoid inefficient implicit temporary array creation and
! copies in call to pdf_closure, perform copies explicitly.
! Copy in for intent(in) here
tmp_sclrm = sclrm(k,:) ! intent(in)
tmp_wpsclrp_zt = wpsclrp_zt(k,:) ! intent(in)
tmp_sclrp2_zt = sclrp2_zt(k,:) ! intent(in)
tmp_sclrprtp_zt = sclrprtp_zt(k,:) ! intent(in)
tmp_sclrpthlp_zt = sclrpthlp_zt(k,:) ! intent(in)
tmp_wphydrometp_zt = wphydrometp_zt(k,:) ! intent(in)
tmp_wp2hmp = wp2hmp(k,:) ! intent(in)
tmp_rtphmp_zt = rtphmp_zt(k,:) ! intent(in)
tmp_thlphmp_zt = thlphmp_zt(k,:) ! intent(in)

call pdf_closure &
( hydromet_dim, p_in_Pa(k), exner(k), thv_ds_zt(k), wm_zt(k), & ! intent(in)
wp2_zt(k), wp3(k), sigma_sqd_w_zt(k), & ! intent(in)
Skw_zt(k), rtm(k), rtp2_zt(k), & ! intent(in)
zm2zt( wprtp, k ), thlm(k), thlp2_zt(k), & ! intent(in)
zm2zt( wpthlp, k ), rtpthlp_zt(k), sclrm(k,:), & ! intent(in)
wpsclrp_zt(k,:), sclrp2_zt(k,:), sclrprtp_zt(k,:), & ! intent(in)
sclrpthlp_zt(k,:), k, & ! intent(in)
zm2zt( wpthlp, k ), rtpthlp_zt(k), tmp_sclrm, & ! intent(in)
tmp_wpsclrp_zt, tmp_sclrp2_zt, tmp_sclrprtp_zt, & ! intent(in)
tmp_sclrpthlp_zt, k, & ! intent(in)
#ifdef GFDL
RH_crit(k, : , :), do_liquid_only_in_clubb, & ! intent(in)
#endif
wphydrometp_zt(k,:), wp2hmp(k,:), & ! intent(in)
rtphmp_zt(k,:), thlphmp_zt(k,:), & ! intent(in)
tmp_wphydrometp_zt, tmp_wp2hmp, & ! intent(in)
tmp_rtphmp_zt, tmp_thlphmp_zt, & ! intent(in)
wp4_zt(k), wprtp2(k), wp2rtp(k), & ! intent(out)
wpthlp2(k), wp2thlp(k), wprtpthlp(k), & ! intent(out)
cloud_frac(k), ice_supersat_frac(k), & ! intent(out)
rcm(k), wpthvp_zt(k), wp2thvp(k), rtpthvp_zt(k), & ! intent(out)
thlpthvp_zt(k), wprcp_zt(k), wp2rcp(k), rtprcp_zt(k), & ! intent(out)
thlprcp_zt(k), rcp2_zt(k), pdf_params(k), & ! intent(out)
err_code_pdf_closure, & ! intent(out)
wpsclrprtp(k,:), wpsclrp2(k,:), sclrpthvp_zt(k,:), & ! intent(out)
wpsclrpthlp(k,:), sclrprcp_zt(k,:), wp2sclrp(k,:), & ! intent(out)
tmp_wpsclrprtp, tmp_wpsclrp2, tmp_sclrpthvp_zt, & ! intent(out)
tmp_wpsclrpthlp, tmp_sclrprcp_zt, tmp_wp2sclrp, & ! intent(out)
rc_coef_zt(k) ) ! intent(out)

! Subroutine may produce NaN values, and if so, exit
Expand All @@ -1047,6 +1094,16 @@ subroutine advance_clubb_core &
err_code = err_code_pdf_closure
end if

! To avoid inefficient implicit temporary array creation and
! copies in call to pdf_closure, perform copies explicitly.
! Copy out for intent(out) here
wpsclrprtp(k,:) = tmp_wpsclrprtp ! intent(out)
wpsclrp2(k,:) = tmp_wpsclrp2 ! intent(out)
sclrpthvp_zt(k,:) = tmp_sclrpthvp_zt ! intent(out)
wpsclrpthlp(k,:) = tmp_wpsclrpthlp ! intent(out)
sclrprcp_zt(k,:) = tmp_sclrprcp_zt ! intent(out)
wp2sclrp(k,:) = tmp_wp2sclrp ! intent(out)

end do ! k = 1, gr%nz, 1

if ( l_refine_grid_in_cloud ) then
Expand Down Expand Up @@ -1158,28 +1215,41 @@ subroutine advance_clubb_core &
! Call pdf_closure to output the variables which belong on the momentum grid.
do k = 1, gr%nz, 1

! To avoid inefficient implicit temporary array creation and
! copies in call to pdf_closure, perform copies explicitly.
! Copy in for intent(in) here
tmp_sclrm_zm = sclrm_zm(k,:) ! intent(in)
tmp_wpsclrp = wpsclrp(k,:) ! intent(in)
tmp_sclrp2 = sclrp2(k,:) ! intent(in)
tmp_sclrprtp = sclrprtp(k,:) ! intent(in)
tmp_sclrpthlp = sclrpthlp(k,:) ! intent(in)
tmp_wphydrometp = wphydrometp(k,:) ! intent(in)
tmp_wp2hmp_zm = wp2hmp_zm(k,:) ! intent(in)
tmp_rtphmp = rtphmp(k,:) ! intent(in)
tmp_thlphmp = thlphmp(k,:) ! intent(in)

call pdf_closure &
( hydromet_dim, p_in_Pa_zm(k), exner_zm(k), thv_ds_zm(k), wm_zm(k), & ! intent(in)
wp2(k), wp3_zm(k), sigma_sqd_w(k), & ! intent(in)
Skw_zm(k), rtm_zm(k), rtp2(k), & ! intent(in)
wprtp(k), thlm_zm(k), thlp2(k), & ! intent(in)
wpthlp(k), rtpthlp(k), sclrm_zm(k,:), & ! intent(in)
wpsclrp(k,:), sclrp2(k,:), sclrprtp(k,:), & ! intent(in)
sclrpthlp(k,:), k, & ! intent(in)
wpthlp(k), rtpthlp(k), tmp_sclrm_zm, & ! intent(in)
tmp_wpsclrp, tmp_sclrp2, tmp_sclrprtp, & ! intent(in)
tmp_sclrpthlp, k, & ! intent(in)
#ifdef GFDL
RH_crit(k, : , :), do_liquid_only_in_clubb, & ! intent(in)
#endif
wphydrometp(k,:), wp2hmp_zm(k,:), & ! intent(in)
rtphmp(k,:), thlphmp(k,:), & ! intent(in)
tmp_wphydrometp, tmp_wp2hmp_zm, & ! intent(in)
tmp_rtphmp, tmp_thlphmp, & ! intent(in)
wp4(k), wprtp2_zm(k), wp2rtp_zm(k), & ! intent(out)
wpthlp2_zm(k), wp2thlp_zm(k), wprtpthlp_zm(k), & ! intent(out)
cloud_frac_zm(k), ice_supersat_frac_zm(k), & ! intent(out)
rcm_zm(k), wpthvp(k), wp2thvp_zm(k), rtpthvp(k), & ! intent(out)
thlpthvp(k), wprcp(k), wp2rcp_zm(k), rtprcp(k), & ! intent(out)
thlprcp(k), rcp2(k), pdf_params_zm(k), & ! intent(out)
err_code_pdf_closure, & ! intent(out)
wpsclrprtp_zm(k,:), wpsclrp2_zm(k,:), sclrpthvp(k,:), & ! intent(out)
wpsclrpthlp_zm(k,:), sclrprcp(k,:), wp2sclrp_zm(k,:), & ! intent(out)
tmp_wpsclrprtp_zm, tmp_wpsclrp2_zm, tmp_sclrpthvp, & ! intent(out)
tmp_wpsclrpthlp_zm, tmp_sclrprcp, tmp_wp2sclrp_zm, & ! intent(out)
rc_coef(k) ) ! intent(out)

! Subroutine may produce NaN values, and if so, exit
Expand All @@ -1196,6 +1266,15 @@ subroutine advance_clubb_core &
err_code = err_code_pdf_closure
end if

! To avoid inefficient implicit temporary array creation and
! copies in call to pdf_closure, perform copies explicitly.
! Copy out for intent(out) here
wpsclrprtp_zm(k,:) = tmp_wpsclrprtp_zm ! intent(out)
wpsclrp2_zm(k,:) = tmp_wpsclrp2_zm ! intent(out)
sclrpthvp(k,:) = tmp_sclrpthvp ! intent(out)
wpsclrpthlp_zm(k,:) = tmp_wpsclrpthlp_zm ! intent(out)
sclrprcp(k,:) = tmp_sclrprcp ! intent(out)
wp2sclrp_zm(k,:) = tmp_wp2sclrp_zm ! intent(out)
end do ! k = 1, gr%nz, 1

else ! l_call_pdf_closure_twice is false
Expand Down

0 comments on commit 89bcfd7

Please sign in to comment.