From 85423e5315f2c8ec50c4ca4196f7b34a3834e5f4 Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Sun, 26 Apr 2020 21:38:54 -0400 Subject: [PATCH 1/9] move the fourth order center <-> average stuff to C++ --- Source/hydro/fourth_center_average.cpp | 240 +++++++++++++++ Source/hydro/fourth_order_nd.F90 | 410 ------------------------- 2 files changed, 240 insertions(+), 410 deletions(-) create mode 100644 Source/hydro/fourth_center_average.cpp diff --git a/Source/hydro/fourth_center_average.cpp b/Source/hydro/fourth_center_average.cpp new file mode 100644 index 0000000000..b27941e108 --- /dev/null +++ b/Source/hydro/fourth_center_average.cpp @@ -0,0 +1,240 @@ + + +// Note: pretty much all of these routines below assume that dx(1) = dx(2) = dx(3) + +Real compute_laplacian(int i, int j, int k, int n, + Array4 const a, + GpuArray& domlo, GpuArray& domhi) { + + Real lapx = 0.0; + Real lapy = 0.0; + Real lapz = 0.0; + + // we use 2nd-order accurate one-sided stencils at the physical + // boundaries note: this differs from the suggestion in MC2011 -- + // they just suggest using the Laplacian from +1 off the interior. + // I like the one-sided better. + + if (i == domlo[0] && physbc_lo[0] != Interior) { + lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i+1,j,k,n) + 4.0_rt*a(i+2,j,k,n) - a(i+3,j,k,n); + + } else if (i == domhi[0] && physbc_hi[0] != Interior) { + lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i-1,j,k,n) + 4.0_rt*a(i-2,j,k,n) - a(i-3,j,k,n); + + } else { + lapx = a(i+1,j,k,n) - TWO*a(i,j,k,n) + a(i-1,j,k,n); + } + +#if AMREX_SPACEDIM >= 2 + if (j == domlo[1] && physbc_lo[1] != Interior) { + lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j+1,k,n) + 4.0_rt*a(i,j+2,k,n) - a(i,j+3,k,n); + + } else if (j == domhi[1] && physbc_hi[1] != Interior) { + lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j-1,k,n) + 4.0_rt*a(i,j-2,k,n) - a(i,j-3,k,n); + + } else { + lapy = a(i,j+1,k,n) - TWO*a(i,j,k,n) + a(i,j-1,k,n); + } +#endif + +#if AMREX_SPACEDIM == 3 + if (k == domlo[2] && physbc_lo[2] != Interior) { + lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k+1,n) + 4.0_rt*a(i,j,k+2,n) - a(i,j,k+3,n); + + } else if (k == domhi[2] && physbc_hi[2] != Interior) { + lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k-1,n) + 4.0_rt*a(i,j,k-2,n) - a(i,j,k-3,n); + + } else { + lapz = a(i,j,k+1,n) - TWO*a(i,j,k,n) + a(i,j,k-1,n); + } +#endif + + return lapx + lapy + lapz; +} + + +Real trans_laplacian(int i, int j, int k, int n, + int idir, + Array4 const a, + GpuArray& domlo, GpuArray& domhi) { + + Real lapx = 0.0; + Real lapy = 0.0; + Real lapz = 0.0; + + // we use 2nd-order accurate one-sided stencils at the physical boundaries + if (idir != 0) { + + if (i == domlo[0] && physbc_lo[0] != Interior) { + lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i+1,j,k,n) + 4.0_rt*a(i+2,j,k,n) - a(i+3,j,k,n); + + } else if (i == domhi[0] && physbc_hi[0] != Interior) { + lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i-1,j,k,n) + 4.0_rt*a(i-2,j,k,n) - a(i-3,j,k,n); + + } else { + lapx = a(i+1,j,k,n) - TWO*a(i,j,k,n) + a(i-1,j,k,n); + } + } + + if (idir != 1) { + + if (j == domlo[1] && physbc_lo[1] != Interior) { + lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j+1,k,n) + 4.0_rt*a(i,j+2,k,n) - a(i,j+3,k,n); + + } else if (j == domhi[1] && physbc_hi[1] != Interior) { + lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j-1,k,n) + 4.0_rt*a(i,j-2,k,n) - a(i,j-3,k,n); + + } else { + lapy = a(i,j+1,k,n) - TWO*a(i,j,k,n) + a(i,j-1,k,n); + } + } + +#if AMREX_SPACEDIM == 3 + if (idir != 2) { + + if (k == domlo[2] && physbc_lo[2] != Interior) { + lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k+1,n) + 4.0_rt*a(i,j,k+2,n) - a(i,j,k+3,n); + + } else if (k == domhi[2] && physbc_hi[2] != Interior) { + lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k-1,n) + 4.0_rt*a(i,j,k-2,n) - a(i,j,k-3,n); + + } else { + lapz = a(i,j,k+1,n) - TWO*a(i,j,k,n) + a(i,j,k-1,n); + } + } +#endif + + return lapx + lapy + lapz; +} + + +void make_cell_center(const Box& bx, + Array4 const& U, + Array4 const& U_cc, + GpuArray& domlo, GpuArray& domhi) { + + // Take a cell-average state U and a convert it to a cell-center + // state U_cc via U_cc = U - 1/24 L U + + U_lo = U.lbound(); + U_hi = U.ubound(); + + AMREX_ASSERT(U_lo[0] <= lo[0]-1 && U_hi[0] >= hi[0]+1 && + (AMREX_SPACEDIM >= 2 && (U_lo[1] <= lo[1]-1 && U_hi[1] >= hi[1]+1)) && + (AMREX_SPACEDIM == 3 && (U_lo[2] <= lo[2]-1 && U_hi[2] >= hi[2]+1))); + + amrex::ParallelFor(bx, U.nComp(), + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + Real lap = compute_laplacian(i, j, k, n, U, domlo, domhi); + + U_cc(i,j,k,n) = U(i,j,k,n) - (1.0_rt/24.0_rt) * lap; + + }); +} + +void make_cell_center_in_place(const Box& bx, + Array4 const& U, + Array4 const& tmp, + GpuArray& domlo, GpuArray& domhi) { + + // Take a cell-average state U and make it cell-centered in place + // via U <- U - 1/24 L U. Note that this operation is not tile + // safe. + + // here, tmp is a temporary memory space we use to store one-component's Laplacian + + for (int n = 0; n < U.nComp(); n++) { + + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + tmp(i,j,k) = compute_laplacian(i, j, k, n, U, domlo, domhi); + }); + + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + U(i,j,k,n) = U(i,j,k,n) - (1.0_rt/24.0_rt) * tmp(i,j,k); + }); + } +} + + +void compute_lap_term(const Box& bx, + Array4 const& Y, + Array4 const& lap, const int ncomp, + GpuArray& domlo, GpuArray& domhi) { + + // Computes the h**2/24 L U term that is used in correcting + // cell-center to averages (and back) + + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + lap(i,j,k) = (1.0_rt/24.0_rt) * + compute_laplacian(i, j, k, ncomp, U, domlo, domhi); + }); + +} + + +void make_fourth_average(const Box& bx, + Array4 const& q, + Array4 const& q_bar, + GpuArray& domlo, GpuArray& domhi) { + + // Take the cell-center state q and another state q_bar (e.g., + // constructed from the cell-average U) and replace the cell-center + // q with a 4th-order accurate cell-average, q <- q + 1/24 L q_bar + + amrex::ParallelFor(bx, q.nComp(), + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + Real lap = compute_laplacian(i, j, k, n, q_bar, domlo, domhi); + + q(i,j,k,n) += (1.0_rt/24.0_rt) * lap; + }); +} + + +void make_fourth_in_place(const Box& bx, + Array4 const& q, + Array4 const& tmp, + GpuArray& domlo, GpuArray& domhi) { + + // Take the cell-center q and makes it a cell-average q, in place + // (e.g. q is overwritten by its average), q <- q + 1/24 L q. + // Note: this routine is not tile safe. + + for (int n = 0; n < q.nComp(); n++) { + make_fourth_in_place_n(bx, q, n, tmp, domlo, domhi); + } +} + + +void make_fourth_in_place_n(const Box& bx, + Array4 const& q, const int ncomp, + Array4 const& tmp, + GpuArray& domlo, GpuArray& domhi) { + + // Take the cell-center q and makes it a cell-average q, in place + // (e.g. q is overwritten by its average), q <- q + 1/24 L q. + // Note: this routine is not tile safe. + // + // This version operates on a single component. Here ncomp is the + // component to update + + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + tmp(i,j,k) = compute_laplacian(i, j, k, ncomp, q, domlo, domhi); + }); + + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + q(i,j,k,ncomp) += (1.0_rt/24.0_rt) * lap(i,j,k); + }); + +} diff --git a/Source/hydro/fourth_order_nd.F90 b/Source/hydro/fourth_order_nd.F90 index 7c0e427277..78a0abe6c9 100644 --- a/Source/hydro/fourth_order_nd.F90 +++ b/Source/hydro/fourth_order_nd.F90 @@ -790,414 +790,4 @@ subroutine ca_states(lo, hi, & end subroutine ca_states - - ! Note: pretty much all of these routines below assume that dx(1) = dx(2) = dx(3) - pure function compute_laplacian(i, j, k, n, & - a, a_lo, a_hi, nc, & - domlo, domhi) result (lap) - - use prob_params_module, only : physbc_lo, physbc_hi, Interior - implicit none - - integer, intent(in) :: i, j, k, n - integer, intent(in) :: a_lo(3), a_hi(3) - integer, intent(in) :: nc - real(rt), intent(in) :: a(a_lo(1):a_hi(1), a_lo(2):a_hi(2), a_lo(3):a_hi(3), nc) - integer, intent(in) :: domlo(3), domhi(3) - - real(rt) :: lapx, lapy, lapz - real(rt) :: lap - - lapx = ZERO - lapy = ZERO - lapz = ZERO - - ! we use 2nd-order accurate one-sided stencils at the physical - ! boundaries note: this differs from the suggestion in MC2011 -- - ! they just suggest using the Laplacian from +1 off the interior. - ! I like the one-sided better. - - if (i == domlo(1) .and. physbc_lo(1) /= Interior) then - lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i+1,j,k,n) + 4.0_rt*a(i+2,j,k,n) - a(i+3,j,k,n) - - else if (i == domhi(1) .and. physbc_hi(1) /= Interior) then - lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i-1,j,k,n) + 4.0_rt*a(i-2,j,k,n) - a(i-3,j,k,n) - - else - lapx = a(i+1,j,k,n) - TWO*a(i,j,k,n) + a(i-1,j,k,n) - end if - -#if AMREX_SPACEDIM >= 2 - if (j == domlo(2) .and. physbc_lo(2) /= Interior) then - lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j+1,k,n) + 4.0_rt*a(i,j+2,k,n) - a(i,j+3,k,n) - - else if (j == domhi(2) .and. physbc_hi(2) /= Interior) then - lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j-1,k,n) + 4.0_rt*a(i,j-2,k,n) - a(i,j-3,k,n) - - else - lapy = a(i,j+1,k,n) - TWO*a(i,j,k,n) + a(i,j-1,k,n) - end if - -#endif - -#if AMREX_SPACEDIM == 3 - if (k == domlo(3) .and. physbc_lo(3) /= Interior) then - lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k+1,n) + 4.0_rt*a(i,j,k+2,n) - a(i,j,k+3,n) - - else if (k == domhi(3) .and. physbc_hi(3) /= Interior) then - lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k-1,n) + 4.0_rt*a(i,j,k-2,n) - a(i,j,k-3,n) - - else - lapz = a(i,j,k+1,n) - TWO*a(i,j,k,n) + a(i,j,k-1,n) - end if -#endif - - lap = lapx + lapy + lapz - - end function compute_laplacian - - subroutine trans_laplacian(i, j, k, n, & - idir, & - a, a_lo, a_hi, nc, & - lap, & - domlo, domhi) bind(C, name="trans_laplacian") - - use prob_params_module, only : physbc_lo, physbc_hi, Interior - implicit none - - integer, intent(in), value :: i, j, k, n - integer, intent(in), value :: idir - integer, intent(in) :: a_lo(3), a_hi(3) - integer, intent(in), value :: nc - real(rt), intent(in) :: a(a_lo(1):a_hi(1), a_lo(2):a_hi(2), a_lo(3):a_hi(3), nc) - integer, intent(in) :: domlo(3), domhi(3) - real(rt), intent(out) :: lap - - real(rt) :: lapx, lapy, lapz - - lapx = ZERO - lapy = ZERO - lapz = ZERO - - ! we use 2nd-order accurate one-sided stencils at the physical boundaries - if (idir /= 1) then - - if (i == domlo(1) .and. physbc_lo(1) /= Interior) then - lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i+1,j,k,n) + 4.0_rt*a(i+2,j,k,n) - a(i+3,j,k,n) - - else if (i == domhi(1) .and. physbc_hi(1) /= Interior) then - lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i-1,j,k,n) + 4.0_rt*a(i-2,j,k,n) - a(i-3,j,k,n) - - else - lapx = a(i+1,j,k,n) - TWO*a(i,j,k,n) + a(i-1,j,k,n) - end if - end if - - if (idir /= 2) then - - if (j == domlo(2) .and. physbc_lo(2) /= Interior) then - lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j+1,k,n) + 4.0_rt*a(i,j+2,k,n) - a(i,j+3,k,n) - - else if (j == domhi(2) .and. physbc_hi(2) /= Interior) then - lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j-1,k,n) + 4.0_rt*a(i,j-2,k,n) - a(i,j-3,k,n) - - else - lapy = a(i,j+1,k,n) - TWO*a(i,j,k,n) + a(i,j-1,k,n) - end if - end if - -#if AMREX_SPACEDIM == 3 - if (idir /= 3) then - - if (k == domlo(3) .and. physbc_lo(3) /= Interior) then - lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k+1,n) + 4.0_rt*a(i,j,k+2,n) - a(i,j,k+3,n) - - else if (k == domhi(3) .and. physbc_hi(3) /= Interior) then - lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k-1,n) + 4.0_rt*a(i,j,k-2,n) - a(i,j,k-3,n) - - else - lapz = a(i,j,k+1,n) - TWO*a(i,j,k,n) + a(i,j,k-1,n) - end if - end if -#endif - - lap = lapx + lapy + lapz - - end subroutine trans_laplacian - - - subroutine ca_make_cell_center(lo, hi, & - U, U_lo, U_hi, nc, & - U_cc, U_cc_lo, U_cc_hi, nc_cc, & - domlo, domhi) & - bind(C, name="ca_make_cell_center") - ! Take a cell-average state U and a convert it to a cell-center - ! state U_cc via U_cc = U - 1/24 L U - - implicit none - - integer, intent(in) :: lo(3), hi(3) - integer, intent(in) :: U_lo(3), U_hi(3) - integer, intent(in) :: U_cc_lo(3), U_cc_hi(3) - integer, intent(in) :: nc, nc_cc - real(rt), intent(in) :: U(U_lo(1):U_hi(1), U_lo(2):U_hi(2), U_lo(3):U_hi(3), nc) - real(rt), intent(inout) :: U_cc(U_cc_lo(1):U_cc_hi(1), U_cc_lo(2):U_cc_hi(2), U_cc_lo(3):U_cc_hi(3), nc_cc) - integer, intent(in) :: domlo(3), domhi(3) - - integer :: i, j, k, n - real(rt) :: lap - - if (U_lo(1) > lo(1)-1 .or. U_hi(1) < hi(1)+1 .or. & - (AMREX_SPACEDIM >= 2 .and. (U_lo(2) > lo(2)-1 .or. U_hi(2) < hi(2)+1)) .or. & - (AMREX_SPACEDIM == 3 .and. (U_lo(3) > lo(3)-1 .or. U_hi(3) < hi(3)+1))) then - call bl_error("insufficient ghostcells in ca_make_cell_center") - end if - - do n = 1, nc - - do k = lo(3), hi(3) - do j = lo(2), hi(2) - do i = lo(1), hi(1) - - lap = compute_laplacian(i, j, k, n, & - U, U_lo, U_hi, nc, & - domlo, domhi) - - U_cc(i,j,k,n) = U(i,j,k,n) - TWENTYFOURTH * lap - - end do - end do - end do - end do - - end subroutine ca_make_cell_center - - subroutine ca_make_cell_center_in_place(lo, hi, & - U, U_lo, U_hi, nc, & - domlo, domhi) & - bind(C, name="ca_make_cell_center_in_place") - ! Take a cell-average state U and make it cell-centered in place - ! via U <- U - 1/24 L U. Note that this operation is not tile - ! safe. - - use amrex_mempool_module, only : bl_allocate, bl_deallocate - - implicit none - - integer, intent(in) :: lo(3), hi(3) - integer, intent(in) :: U_lo(3), U_hi(3) - integer, intent(in) :: nc - real(rt), intent(inout) :: U(U_lo(1):U_hi(1), U_lo(2):U_hi(2), U_lo(3):U_hi(3), nc) - integer, intent(in) :: domlo(3), domhi(3) - - integer :: i, j, k, n - - real(rt), pointer :: lap(:,:,:) - - call bl_allocate(lap, lo, hi) - - do n = 1, nc - - do k = lo(3), hi(3) - do j = lo(2), hi(2) - do i = lo(1), hi(1) - - lap(i,j,k) = compute_laplacian(i, j, k, n, & - U, U_lo, U_hi, nc, & - domlo, domhi) - - end do - end do - end do - - do k = lo(3), hi(3) - do j = lo(2), hi(2) - do i = lo(1), hi(1) - U(i,j,k,n) = U(i,j,k,n) - TWENTYFOURTH * lap(i,j,k) - end do - end do - end do - end do - - call bl_deallocate(lap) - - end subroutine ca_make_cell_center_in_place - - subroutine ca_compute_lap_term(lo, hi, & - U, U_lo, U_hi, nc, & - lap, lap_lo, lap_hi, ncomp, & - domlo, domhi) & - bind(C, name="ca_compute_lap_term") - ! Computes the h**2/24 L U term that is used in correcting - ! cell-center to averages (and back) - - implicit none - - integer, intent(in) :: lo(3), hi(3) - integer, intent(in) :: U_lo(3), U_hi(3) - integer, intent(in) :: lap_lo(3), lap_hi(3) - integer, intent(in) :: nc - real(rt), intent(in) :: U(U_lo(1):U_hi(1), U_lo(2):U_hi(2), U_lo(3):U_hi(3), nc) - real(rt), intent(inout) :: lap(lap_lo(1):lap_hi(1), lap_lo(2):lap_hi(2), lap_lo(3):lap_hi(3)) - integer, intent(in), value :: ncomp - integer, intent(in) :: domlo(3), domhi(3) - - ! note: ncomp is C++ index (0-based) - - integer :: i, j, k - - do k = lo(3), hi(3) - do j = lo(2), hi(2) - do i = lo(1), hi(1) - - lap(i,j,k) = compute_laplacian(i, j, k, ncomp+1, & - U, U_lo, U_hi, nc, & - domlo, domhi) - - end do - end do - end do - - lap(lo(1):hi(1), lo(2):hi(2), lo(3):hi(3)) = & - TWENTYFOURTH * lap(lo(1):hi(1), lo(2):hi(2), lo(3):hi(3)) - - end subroutine ca_compute_lap_term - - subroutine ca_make_fourth_average(lo, hi, & - q, q_lo, q_hi, nc, & - q_bar, q_bar_lo, q_bar_hi, nc_bar, & - domlo, domhi) & - bind(C, name="ca_make_fourth_average") - ! Take the cell-center state q and another state q_bar (e.g., - ! constructed from the cell-average U) and replace the cell-center - ! q with a 4th-order accurate cell-average, q <- q + 1/24 L q_bar - - integer, intent(in) :: lo(3), hi(3) - integer, intent(in) :: q_lo(3), q_hi(3) - integer, intent(in) :: q_bar_lo(3), q_bar_hi(3) - integer, intent(in) :: nc, nc_bar - real(rt), intent(inout) :: q(q_lo(1):q_hi(1), q_lo(2):q_hi(2), q_lo(3):q_hi(3), nc) - real(rt), intent(in) :: q_bar(q_bar_lo(1):q_bar_hi(1), q_bar_lo(2):q_bar_hi(2), q_bar_lo(3):q_bar_hi(3), nc_bar) - integer, intent(in) :: domlo(3), domhi(3) - - integer :: i, j, k, n - real(rt) :: lap - - do n = 1, nc - do k = lo(3), hi(3) - do j = lo(2), hi(2) - do i = lo(1), hi(1) - - lap = compute_laplacian(i, j, k, n, & - q_bar, q_bar_lo, q_bar_hi, nc, & - domlo, domhi) - - q(i,j,k,n) = q(i,j,k,n) + TWENTYFOURTH * lap - - end do - end do - end do - end do - - end subroutine ca_make_fourth_average - - subroutine ca_make_fourth_in_place(lo, hi, & - q, q_lo, q_hi, nc, & - domlo, domhi) & - bind(C, name="ca_make_fourth_in_place") - ! Take the cell-center q and makes it a cell-average q, in place - ! (e.g. q is overwritten by its average), q <- q + 1/24 L q. - ! Note: this routine is not tile safe. - - use amrex_mempool_module, only : bl_allocate, bl_deallocate - - integer, intent(in) :: lo(3), hi(3) - integer, intent(in) :: q_lo(3), q_hi(3) - integer, intent(in) :: nc - real(rt), intent(inout) :: q(q_lo(1):q_hi(1), q_lo(2):q_hi(2), q_lo(3):q_hi(3), nc) - integer, intent(in) :: domlo(3), domhi(3) - - integer :: i, j, k, n - real(rt), pointer :: lap(:,:,:) - - call bl_allocate(lap, lo, hi) - - do n = 1, nc - - do k = lo(3), hi(3) - do j = lo(2), hi(2) - do i = lo(1), hi(1) - - lap(i,j,k) = compute_laplacian(i, j, k, n, & - q, q_lo, q_hi, nc, & - domlo, domhi) - - end do - end do - end do - - do k = lo(3), hi(3) - do j = lo(2), hi(2) - do i = lo(1), hi(1) - q(i,j,k,n) = q(i,j,k,n) + TWENTYFOURTH * lap(i,j,k) - end do - end do - end do - end do - - call bl_deallocate(lap) - - end subroutine ca_make_fourth_in_place - - subroutine ca_make_fourth_in_place_n(lo, hi, & - q, q_lo, q_hi, nc, ncomp, & - domlo, domhi) & - bind(C, name="ca_make_fourth_in_place_n") - ! Take the cell-center q and makes it a cell-average q, in place - ! (e.g. q is overwritten by its average), q <- q + 1/24 L q. - ! Note: this routine is not tile safe. - ! - ! This version operates on a single component. Here ncomp is the - ! component to update -- we expect this to come in 0-based from - ! C++, so we add 1 - - use amrex_mempool_module, only : bl_allocate, bl_deallocate - - integer, intent(in) :: lo(3), hi(3) - integer, intent(in) :: q_lo(3), q_hi(3) - integer, intent(in) :: nc - integer, intent(in), value :: ncomp - real(rt), intent(inout) :: q(q_lo(1):q_hi(1), q_lo(2):q_hi(2), q_lo(3):q_hi(3), nc) - integer, intent(in) :: domlo(3), domhi(3) - - integer :: i, j, k - real(rt), pointer :: lap(:,:,:) - - call bl_allocate(lap, lo, hi) - - do k = lo(3), hi(3) - do j = lo(2), hi(2) - do i = lo(1), hi(1) - - lap(i,j,k) = compute_laplacian(i, j, k, ncomp+1, & - q, q_lo, q_hi, nc, & - domlo, domhi) - - end do - end do - end do - - do k = lo(3), hi(3) - do j = lo(2), hi(2) - do i = lo(1), hi(1) - q(i,j,k,ncomp+1) = q(i,j,k,ncomp+1) + TWENTYFOURTH * lap(i,j,k) - end do - end do - end do - - call bl_deallocate(lap) - - end subroutine ca_make_fourth_in_place_n - - end module fourth_order From 3ab2be039e076222640a7a61da5176cfc51e01b9 Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Mon, 27 Apr 2020 11:03:04 -0400 Subject: [PATCH 2/9] more work on porting the average stuff to C++ --- Source/driver/Castro.cpp | 73 ++++++++++++++++---------- Source/driver/Castro_F.H | 26 --------- Source/driver/Castro_advance_sdc.cpp | 51 +++++++++--------- Source/hydro/Castro_hydro.cpp | 25 ++++----- Source/hydro/Castro_mol_hydro.cpp | 47 +++++++---------- Source/hydro/Make.package | 2 + Source/hydro/fourth_center_average.H | 43 +++++++++++++++ Source/hydro/fourth_center_average.cpp | 11 +++- Source/sdc/Castro_sdc.cpp | 61 ++++++++++++--------- 9 files changed, 188 insertions(+), 151 deletions(-) create mode 100644 Source/hydro/fourth_center_average.H diff --git a/Source/driver/Castro.cpp b/Source/driver/Castro.cpp index 61bdcddaca..8ef5e17722 100644 --- a/Source/driver/Castro.cpp +++ b/Source/driver/Castro.cpp @@ -1056,16 +1056,20 @@ Castro::initData () AmrLevel::FillPatch(*this, Sborder, NUM_GROW, cur_time, State_Type, 0, NUM_STATE); // note: this cannot be tiled - const int* domain_lo = geom.Domain().loVect(); - const int* domain_hi = geom.Domain().hiVect(); + auto const domain_lo = geom.Domain().loVect3d(); + auto const domain_hi = geom.Domain().hiVect3d(); + + FArrayBox tmp; for (MFIter mfi(S_new); mfi.isValid(); ++mfi) { - const Box& box = mfi.validbox(); + const Box& box = mfi.validbox(); + + tmp.resize(box, 1); + Elixir elix_tmp = tmp.elixir(); + auto tmp_arr = tmp.array(); - ca_make_fourth_in_place(BL_TO_FORTRAN_BOX(box), - BL_TO_FORTRAN_FAB(Sborder[mfi]), - AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi)); + make_fourth_in_place(box, Sborder.array(mfi), tmp_arr, domain_lo, domain_hi); } // now copy back the averages @@ -1079,16 +1083,20 @@ Castro::initData () AmrLevel::FillPatch(*this, Sborder, NUM_GROW, cur_time, State_Type, 0, NUM_STATE); // convert to centers -- not tile safe - const int* domain_lo = geom.Domain().loVect(); - const int* domain_hi = geom.Domain().hiVect(); + auto const domain_lo = geom.Domain().loVect3d(); + auto const domain_hi = geom.Domain().hiVect3d(); + + FArrayBox tmp; for (MFIter mfi(S_new); mfi.isValid(); ++mfi) { const Box& box = mfi.growntilebox(2); - ca_make_cell_center_in_place(BL_TO_FORTRAN_BOX(box), - BL_TO_FORTRAN_FAB(Sborder[mfi]), - AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi)); + tmp.resize(box, 1); + Elixir elix_tmp = tmp.elixir(); + auto tmp_arr = tmp.array(); + + make_cell_center_in_place(box, Sborder.array(mfi), tmp_arr, domain_lo, domain_hi); } // reset the energy -- do this in one ghost cell so we can average in place below @@ -1131,9 +1139,11 @@ Castro::initData () { const Box& box = mfi.validbox(); - ca_make_fourth_in_place(BL_TO_FORTRAN_BOX(box), - BL_TO_FORTRAN_FAB(Sborder[mfi]), - AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi)); + tmp.resize(box, 1); + Elixir elix_tmp = tmp.elixir(); + auto tmp_arr = tmp.array(); + + make_fourth_in_place(box, Sborder.array(mfi), tmp_arr, domain_lo, domain_hi); } // now copy back the averages for UEINT and UTEMP only @@ -3319,22 +3329,24 @@ Castro::computeTemp(MultiFab& State, Real time, int ng) // convert to cell centers -- this will result in Stemp being // cell centered only on 1 ghost cells - const int* domain_lo = geom.Domain().loVect(); - const int* domain_hi = geom.Domain().hiVect(); + auto const domain_lo = geom.Domain().loVect3d(); + auto const domain_hi = geom.Domain().hiVect3d(); + + FArrayBox tmp; for (MFIter mfi(Stemp); mfi.isValid(); ++mfi) { const Box& bx = mfi.growntilebox(1); const Box& bx0 = mfi.tilebox(); const int idx = mfi.tileIndex(); - ca_compute_lap_term(BL_TO_FORTRAN_BOX(bx0), - BL_TO_FORTRAN_FAB(Stemp[mfi]), - BL_TO_FORTRAN_ANYD(Eint_lap[mfi]), UEINT, - AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi)); + compute_lap_term(bx0, Stemp.array(mfi), Eint_lap.array(mfi), UEINT, + domain_lo, domain_hi); + + tmp.resize(bx, 1); + Elixir elix_tmp = tmp.elixir(); + auto tmp_arr = tmp.array(); - ca_make_cell_center_in_place(BL_TO_FORTRAN_BOX(bx), - BL_TO_FORTRAN_FAB(Stemp[mfi]), - AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi)); + make_cell_center_in_place(bx, Stemp.arry(mfi), tmp_arr, domain_lo, domain_hi); } @@ -3421,19 +3433,22 @@ Castro::computeTemp(MultiFab& State, Real time, int ng) // cell-averages -- this is 4th-order and will be a no-op for // those zones where e wasn't changed. - const int* domain_lo = geom.Domain().loVect(); - const int* domain_hi = geom.Domain().hiVect(); + auto const domain_lo = geom.Domain().loVect3d(); + auto const domain_hi = geom.Domain().hiVect3d(); + + FArrayBox tmp; for (MFIter mfi(Stemp); mfi.isValid(); ++mfi) { const Box& bx = mfi.tilebox(); const int idx = mfi.tileIndex(); - // only temperature - ca_make_fourth_in_place_n(BL_TO_FORTRAN_BOX(bx), - BL_TO_FORTRAN_FAB(Stemp[mfi]), UTEMP, - AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi)); + tmp.resize(bx, 1); + Elixir elix_tmp = tmp.elixir(); + auto tmp_arr = tmp.array(); + // only temperature + make_fourth_in_place_n(bx, Stemp.array(mfi), UTEMP, tmp_arr, domain_lo, domain_hi); } // correct UEINT diff --git a/Source/driver/Castro_F.H b/Source/driver/Castro_F.H index 3bc9265ace..c0ecda1c16 100644 --- a/Source/driver/Castro_F.H +++ b/Source/driver/Castro_F.H @@ -180,32 +180,6 @@ extern "C" const amrex::Real* dx, amrex::Real* dt); #endif - void ca_make_fourth_average(const int lo[], const int hi[], - BL_FORT_FAB_ARG_3D(q), const int* nc, - const BL_FORT_FAB_ARG_3D(q_bar), const int* nc_bar, - const int* domlo, const int* domhi); - - void ca_make_fourth_in_place(const int lo[], const int hi[], - BL_FORT_FAB_ARG_3D(q), const int* nc, - const int* domlo, const int* domhi); - - void ca_make_fourth_in_place_n(const int lo[], const int hi[], - BL_FORT_FAB_ARG_3D(q), const int* nc, const int ncomp, - const int* domlo, const int* domhi); - - void ca_compute_lap_term(const int lo[], const int hi[], - const BL_FORT_FAB_ARG_3D(U), const int* nc, - BL_FORT_FAB_ARG_ANYD(lap), const int ncomp, - const int* domlo, const int* domhi); - - void ca_make_cell_center(const int lo[], const int hi[], - const BL_FORT_FAB_ARG_3D(U), const int* nc, - BL_FORT_FAB_ARG_3D(U_cc), const int* nc_cc, - const int* domlo, const int* domhi); - - void ca_make_cell_center_in_place(const int lo[], const int hi[], - BL_FORT_FAB_ARG_3D(q), const int* nc, - const int* domlo, const int* domhi); #ifdef RADIATION void ca_inelastic_sct diff --git a/Source/driver/Castro_advance_sdc.cpp b/Source/driver/Castro_advance_sdc.cpp index d1b9191b89..f39fffaa26 100644 --- a/Source/driver/Castro_advance_sdc.cpp +++ b/Source/driver/Castro_advance_sdc.cpp @@ -40,8 +40,8 @@ Castro::do_advance_sdc (Real time, MultiFab& S_old = get_old_data(State_Type); MultiFab& S_new = get_new_data(State_Type); - const int* domain_lo = geom.Domain().loVect(); - const int* domain_hi = geom.Domain().hiVect(); + auto const domain_lo = geom.Domain().loVect3d(); + auto const domain_hi = geom.Domain().hiVect3d(); // Perform initialization steps. @@ -103,10 +103,8 @@ Castro::do_advance_sdc (Real time, for (MFIter mfi(S_new); mfi.isValid(); ++mfi) { const Box& gbx = mfi.growntilebox(1); - ca_make_cell_center(BL_TO_FORTRAN_BOX(gbx), - BL_TO_FORTRAN_FAB(Sborder[mfi]), - BL_TO_FORTRAN_FAB(Sburn[mfi]), - AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi)); + + make_cell_center(gbx, Sborder.array(mfi), Sburn.array(mfi), domain_lo, domain_hi); } @@ -122,11 +120,16 @@ Castro::do_advance_sdc (Real time, AmrLevel::FillPatch(*this, old_source, old_source.nGrow(), prev_time, Source_Type, 0, NSRC); // Now convert to cell averages. This loop cannot be tiled. + FArrayBox tmp; + for (MFIter mfi(S_new); mfi.isValid(); ++mfi) { const Box& bx = mfi.tilebox(); - ca_make_fourth_in_place(BL_TO_FORTRAN_BOX(bx), - BL_TO_FORTRAN_FAB(old_source[mfi]), - AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi)); + + tmp.resize(bx, 1); + Elixir elix_tmp = tmp.elixir(); + auto tmp_arr = tmp.array(); + + make_fourth_in_place(bx, old_source.array(mfi), tmp_arr, domain_lo, domain_hi); } } else { @@ -305,6 +308,7 @@ Castro::do_advance_sdc (Real time, FArrayBox U_center; FArrayBox R_center; + FArrayBox tmp; // this cannot be tiled for (MFIter mfi(R_new); mfi.isValid(); ++mfi) { @@ -313,33 +317,30 @@ Castro::do_advance_sdc (Real time, if (sdc_order == 4) { - const int* domain_lo = geom.Domain().loVect(); - const int* domain_hi = geom.Domain().hiVect(); - // convert S_new to cell-centers U_center.resize(obx, NUM_STATE); - ca_make_cell_center(BL_TO_FORTRAN_BOX(obx), - BL_TO_FORTRAN_FAB(Sborder[mfi]), - BL_TO_FORTRAN_FAB(U_center), - AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi)); + Elixir elix_u_center = U_center.elixir(); + auto const U_center_arr = U_center.array(); + + make_cell_center(obx, Sborder.array(mfi), U_center_arr, domain_lo, domain_hi); // pass in the reaction source and state at centers, including one ghost cell // and derive everything that is needed including 1 ghost cell R_center.resize(obx, R_new.nComp()); + Elixir elix_r_center = R_center.elixir(); + auto const R_center_arr = R_center.array(); + Array4 const Sburn_arr = Sburn.array(mfi); - Array4 const U_center_arr = U_center.array(); - Array4 const R_center_arr = R_center.array(); + // we don't worry about the difference between centers and averages - ca_store_reaction_state(obx, - Sburn_arr, - U_center_arr, - R_center_arr); + ca_store_reaction_state(obx, Sburn_arr, U_center_arr, R_center_arr); // convert R_new from centers to averages in place - ca_make_fourth_in_place(BL_TO_FORTRAN_BOX(bx), - BL_TO_FORTRAN_FAB(R_center), - AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi)); + tmp.resize(bx, 1); + Elixir elix_tmp = tmp.elixir(); + auto const tmp_arr = tmp.array(); + make_fourth_in_place(bx, R_center_arr, tmp_arr, domain_lo, domain_hi); // store R_new[mfi].copy(R_center, bx, 0, bx, 0, R_new.nComp()); diff --git a/Source/hydro/Castro_hydro.cpp b/Source/hydro/Castro_hydro.cpp index 0e4d8f0f33..e8777f66cc 100644 --- a/Source/hydro/Castro_hydro.cpp +++ b/Source/hydro/Castro_hydro.cpp @@ -124,11 +124,13 @@ Castro::cons_to_prim_fourth(const Real time) // convert the conservative state cell averages to primitive cell // averages with 4th order accuracy - const int* domain_lo = geom.Domain().loVect(); - const int* domain_hi = geom.Domain().hiVect(); + auto const domain_lo = geom.Domain().loVect3d(); + auto const domain_hi = geom.Domain().hiVect3d(); MultiFab& S_new = get_new_data(State_Type); + FArrayBox U_cc; + // we don't support radiation here #ifdef RADIATION amrex::Abort("radiation not supported to fourth order"); @@ -147,13 +149,11 @@ Castro::cons_to_prim_fourth(const Real time) // convert U_avg to U_cc -- this will use a Laplacian // operation and will result in U_cc defined only on // NUM_GROW-1 ghost cells at the end. - FArrayBox U_cc; U_cc.resize(qbx, NUM_STATE); + Elixir elix_u_cc = U_cc.elixir(); + auto const U_cc_arr = U_cc.array(); - ca_make_cell_center(BL_TO_FORTRAN_BOX(qbxm1), - BL_TO_FORTRAN_FAB(Sborder[mfi]), - BL_TO_FORTRAN_FAB(U_cc), - AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi)); + make_cell_center(qbxm1, Sborder.array(mfi), U_cc_arr, domain_lo, domain_hi); // enforce the minimum density on the new cell-centered state ca_enforce_minimum_density @@ -218,17 +218,12 @@ Castro::cons_to_prim_fourth(const Real time) // this will create q, qaux in NUM_GROW-1 ghost cells, but that's // we need here - ca_make_fourth_average(BL_TO_FORTRAN_BOX(qbxm1), - BL_TO_FORTRAN_FAB(q[mfi]), - BL_TO_FORTRAN_FAB(q_bar[mfi]), - AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi)); + make_fourth_average(qbxm1, q.array(mfi), q_bar.array(mfi), domain_lo, domain_hi); // not sure if we need to convert qaux this way, or if we can // just evaluate it (we may not need qaux at all actually) - ca_make_fourth_average(BL_TO_FORTRAN_BOX(qbxm1), - BL_TO_FORTRAN_FAB(qaux[mfi]), - BL_TO_FORTRAN_FAB(qaux_bar[mfi]), - AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi)); + + make_fourth_average(qbmx1, qaux.array(mfi), qaux_bar.array(mfi), domain_lo, domain_hi); } diff --git a/Source/hydro/Castro_mol_hydro.cpp b/Source/hydro/Castro_mol_hydro.cpp index 7417c836da..cb3ce1bfac 100644 --- a/Source/hydro/Castro_mol_hydro.cpp +++ b/Source/hydro/Castro_mol_hydro.cpp @@ -41,8 +41,8 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) BL_PROFILE_VAR("Castro::advance_hydro_ca_umdrv()", CA_UMDRV); - const int* domain_lo = geom.Domain().loVect(); - const int* domain_hi = geom.Domain().hiVect(); + const auto domain_lo = geom.Domain().loVect3d(); + const auto domain_hi = geom.Domain().hiVect3d(); #ifdef _OPENMP #pragma omp parallel @@ -253,11 +253,8 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) if (do_hydro == 0) { - Array4 const f_avg_arr = f_avg.array(); - AMREX_PARALLEL_FOR_4D(nbx, NUM_STATE, i, j, k, n, { f_avg_arr(i,j,k,n) = 0.0;}); - } #ifdef DIFFUSION @@ -289,23 +286,19 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) #if AMREX_SPACEDIM >= 2 // construct the face-center interface states q_fc - AMREX_PARALLEL_FOR_4D(nbx, NQ, i, j, k, n, { - bool test = (n == QGC) || (n == QTEMP); - - if (test) continue; + amrex::ParallelFor(bx, NQ, + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + bool test = (n == QGC) || (n == QTEMP); - Real lap = 0.0; - int ncomp_f = n + 1; + if (test) return; - trans_laplacian(i, j, k, ncomp_f, - idir_f, - BL_TO_FORTRAN_ANYD(q_avg), NQ, - &lap, - ARLIM_3D(domain_lo), ARLIM_3D(domain_hi)); + Real lap = trans_laplacian(i, j, k, n, idir, + q_avg_arr, domain_lo, domain_hi); - q_fc_arr(i,j,k,n) = q_avg_arr(i,j,k,n) - 1.0/24.0 * lap; + q_fc_arr(i,j,k,n) = q_avg_arr(i,j,k,n) - (1.0_rt/24.0_rt) * lap; - }); + }); // compute the face-center fluxes F(q_fc) Array4 const f_arr = (flux[idir]).array(); @@ -336,20 +329,16 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) // compute the final fluxes Array4 const flux_arr = (flux[idir]).array(); - AMREX_PARALLEL_FOR_4D(nbx, NUM_STATE, i, j, k, n, { - - Real lap = 0.0; - int ncomp_f = n + 1; + amrex::ParallelFor(nbx, NUM_STATE, + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { - trans_laplacian(i, j, k, ncomp_f, - idir_f, - BL_TO_FORTRAN_ANYD(f_avg), NUM_STATE, - &lap, - ARLIM_3D(domain_lo), ARLIM_3D(domain_hi)); + Real lap = trans_laplacian(i, j, k, n, idir, + f_avg_arr, domain_lo, domain_hi); - flux_arr(i,j,k,n) = flux_arr(i,j,k,n) + 1.0/24.0 * lap; + flux_arr(i,j,k,n) += (1.0_rt/24.0_rt) * lap; - }); + }); #endif diff --git a/Source/hydro/Make.package b/Source/hydro/Make.package index b56b2b6ac0..8fcfca5467 100644 --- a/Source/hydro/Make.package +++ b/Source/hydro/Make.package @@ -33,6 +33,8 @@ ca_F90EXE_sources += Castro_ctu_nd.F90 ifeq ($(USE_TRUE_SDC),TRUE) ca_F90EXE_sources += fourth_order_nd.F90 + CEXE_sources += fourth_center_average.cpp + CEXE_headers += fourth_center_average.H ca_F90EXE_sources += Castro_mol_nd.F90 ca_F90EXE_sources += Castro_fourth_order_nd.F90 endif diff --git a/Source/hydro/fourth_center_average.H b/Source/hydro/fourth_center_average.H new file mode 100644 index 0000000000..4af48507da --- /dev/null +++ b/Source/hydro/fourth_center_average.H @@ -0,0 +1,43 @@ +#ifndef _FOURTH_CENTER_AVERAGE_H_ +#define _FOURTH_CENTER_AVERAGE_H_ + +Real compute_laplacian(int i, int j, int k, int n, + Array4 const a, + GpuArray& domlo, GpuArray& domhi); + +Real trans_laplacian(int i, int j, int k, int n, + int idir, + Array4 const a, + GpuArray& domlo, GpuArray& domhi); + +void make_cell_center(const Box& bx, + Array4 const& U, + Array4 const& U_cc, + GpuArray& domlo, GpuArray& domhi); + +void make_cell_center_in_place(const Box& bx, + Array4 const& U, + Array4 const& tmp, + GpuArray& domlo, GpuArray& domhi); + +void compute_lap_term(const Box& bx, + Array4 const& U, + Array4 const& lap, const int ncomp, + GpuArray& domlo, GpuArray& domhi); + +void make_fourth_average(const Box& bx, + Array4 const& q, + Array4 const& q_bar, + GpuArray& domlo, GpuArray& domhi); + +void make_fourth_in_place(const Box& bx, + Array4 const& q, + Array4 const& tmp, + GpuArray& domlo, GpuArray& domhi); + +void make_fourth_in_place_n(const Box& bx, + Array4 const& q, const int ncomp, + Array4 const& tmp, + GpuArray& domlo, GpuArray& domhi); + +#endif diff --git a/Source/hydro/fourth_center_average.cpp b/Source/hydro/fourth_center_average.cpp index b27941e108..7fbcbe2903 100644 --- a/Source/hydro/fourth_center_average.cpp +++ b/Source/hydro/fourth_center_average.cpp @@ -1,3 +1,12 @@ +#include +#include + +#include + +#include "castro_params.H" + +using namespace amrex; + // Note: pretty much all of these routines below assume that dx(1) = dx(2) = dx(3) @@ -162,7 +171,7 @@ void make_cell_center_in_place(const Box& bx, void compute_lap_term(const Box& bx, - Array4 const& Y, + Array4 const& U, Array4 const& lap, const int ncomp, GpuArray& domlo, GpuArray& domhi) { diff --git a/Source/sdc/Castro_sdc.cpp b/Source/sdc/Castro_sdc.cpp index b1e3b1fce6..65cfae3999 100644 --- a/Source/sdc/Castro_sdc.cpp +++ b/Source/sdc/Castro_sdc.cpp @@ -26,8 +26,8 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt) // for 4th order reactive SDC, we need to first compute the source, C // and do a ghost cell fill on it - const int* domain_lo = geom.Domain().loVect(); - const int* domain_hi = geom.Domain().hiVect(); + auto const domain_lo = geom.Domain().loVect3d(); + auto const domain_hi = geom.Domain().hiVect3d(); // the timestep from m to m+1 Real dt_m = (dt_sdc[m_end] - dt_sdc[m_start]) * dt; @@ -123,6 +123,7 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt) FArrayBox C_center; FArrayBox U_new_center; FArrayBox R_new; + FArrayBox tmp; FArrayBox C2; @@ -193,10 +194,10 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt) // convert the starting U to cell-centered on a fab-by-fab basis // -- including one ghost cell U_center.resize(bx1, NUM_STATE); - ca_make_cell_center(BL_TO_FORTRAN_BOX(bx1), - BL_TO_FORTRAN_FAB(Sborder[mfi]), - BL_TO_FORTRAN_FAB(U_center), - AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi)); + Elixir elix_u_center = U_center.elixir(); + auto U_center_arr = U_center.array(); + + make_cell_center(bx1, Sborder.array(mfi), U_center_arr, domain_lo, domain_hi); // sometimes the Laplacian can make the species go negative near discontinuities ca_normalize_species(AMREX_INT_ANYD(bx1.loVect()), AMREX_INT_ANYD(bx1.hiVect()), @@ -204,21 +205,20 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt) // convert the C source to cell-centers C_center.resize(bx1, NUM_STATE); - ca_make_cell_center(BL_TO_FORTRAN_BOX(bx1), - BL_TO_FORTRAN_FAB(C_source[mfi]), - BL_TO_FORTRAN_FAB(C_center), - AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi)); + Elixir elix_c_center = C_center.elixir(); + auto C_center_arr = C_center.array(); + + make_cell_center(bx1, C_source.array(mfi), C_center_arr, domain_lo, domain_hi); // solve for the updated cell-center U using our cell-centered C -- we // need to do this with one ghost cell U_new_center.resize(bx1, NUM_STATE); + Elixir elix_u_new_center = U_new_center.elixir(); + auto U_new_center_arr = U_new_center.array(); // initialize U_new with our guess for the new state, stored as // an average in Sburn - ca_make_cell_center(BL_TO_FORTRAN_BOX(bx1), - BL_TO_FORTRAN_FAB(Sburn[mfi]), - BL_TO_FORTRAN_FAB(U_new_center), - AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi)); + make_cell_center(bx1, Sburn.array(mfi), U_new_center_arr, domain_lo, domain_hi); ca_sdc_update_centers_o4(BL_TO_FORTRAN_BOX(bx1), &dt_m, BL_TO_FORTRAN_3D(U_center), @@ -236,9 +236,11 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt) BL_TO_FORTRAN_3D(U_new_center), BL_TO_FORTRAN_3D(R_new)); - ca_make_fourth_in_place(BL_TO_FORTRAN_BOX(bx), - BL_TO_FORTRAN_FAB(R_new), - AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi)); + tmp.resize(bx, 1); + Elixir elix_tmp = tmp.elixir(); + auto const tmp_arr = tmp.array(); + + make_fourth_in_place(bx, R_new_arr, tmp_arr, domain_lo, domain_hi); // now do the conservative update using this to get // We'll also need to pass in @@ -313,8 +315,8 @@ Castro::construct_old_react_source(MultiFab& U_state, BL_PROFILE("Castro::construct_old_react_source()"); - const int* domain_lo = geom.Domain().loVect(); - const int* domain_hi = geom.Domain().hiVect(); + auto const domain_lo = geom.Domain().loVect3d(); + auto const domain_hi = geom.Domain().hiVect3d(); if (sdc_order == 4 && input_is_average) { @@ -323,6 +325,7 @@ Castro::construct_old_react_source(MultiFab& U_state, FArrayBox U_center; FArrayBox R_center; + FArrayBox tmp; for (MFIter mfi(U_state); mfi.isValid(); ++mfi) { @@ -332,13 +335,16 @@ Castro::construct_old_react_source(MultiFab& U_state, // Convert to centers U_center.resize(obx, NUM_STATE); - ca_make_cell_center(BL_TO_FORTRAN_BOX(obx), - BL_TO_FORTRAN_FAB(U_state[mfi]), - BL_TO_FORTRAN_FAB(U_center), - AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi)); + Elixir elix_u_center = U_center.elixir(); + auto const U_center_arr = U_center.array(); + + make_cell_center(obx, U_state.array(mfi), U_center_arr, domain_lo, domain_hi); // burn, including one ghost cell R_center.resize(obx, NUM_STATE); + Elixir elix_r_center = R_center.elixir(); + auto const R_center_arr = R_center.array(); + ca_instantaneous_react(BL_TO_FORTRAN_BOX(obx), BL_TO_FORTRAN_3D(U_center), BL_TO_FORTRAN_3D(R_center)); @@ -349,9 +355,12 @@ Castro::construct_old_react_source(MultiFab& U_state, Sburn[mfi].copy(R_center, obx, 0, obx, 0, NUM_STATE); // convert R to averages (in place) - ca_make_fourth_in_place(BL_TO_FORTRAN_BOX(bx), - BL_TO_FORTRAN_FAB(R_center), - AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi)); + + tmp.resize(bx, 1); + Elixir elix_tmp = tmp.elixir(); + auto const tmp_arr = tmp.array(); + + make_fourth_in_place(bx, R_center_arr, tmp_arr, domain_lo, domain_hi); // copy this to the center R_source[mfi].copy(R_center, bx, 0, bx, 0, NUM_STATE); From 557114ed462d68c71b6c3369beb26cd9a13a2f1c Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Mon, 27 Apr 2020 12:31:33 -0400 Subject: [PATCH 3/9] this compiles now --- Source/driver/Castro.cpp | 18 +- Source/driver/Castro_advance_sdc.cpp | 4 +- Source/hydro/Castro_hydro.H | 46 +++++ Source/hydro/Castro_hydro.cpp | 7 +- Source/hydro/Castro_mol_hydro.cpp | 24 ++- Source/hydro/fourth_center_average.H | 148 +++++++++++---- Source/hydro/fourth_center_average.cpp | 240 ++++++++++--------------- 7 files changed, 283 insertions(+), 204 deletions(-) diff --git a/Source/driver/Castro.cpp b/Source/driver/Castro.cpp index 8ef5e17722..4cfa4542c7 100644 --- a/Source/driver/Castro.cpp +++ b/Source/driver/Castro.cpp @@ -1056,8 +1056,8 @@ Castro::initData () AmrLevel::FillPatch(*this, Sborder, NUM_GROW, cur_time, State_Type, 0, NUM_STATE); // note: this cannot be tiled - auto const domain_lo = geom.Domain().loVect3d(); - auto const domain_hi = geom.Domain().hiVect3d(); + auto domain_lo = geom.Domain().loVect3d(); + auto domain_hi = geom.Domain().hiVect3d(); FArrayBox tmp; @@ -1083,8 +1083,8 @@ Castro::initData () AmrLevel::FillPatch(*this, Sborder, NUM_GROW, cur_time, State_Type, 0, NUM_STATE); // convert to centers -- not tile safe - auto const domain_lo = geom.Domain().loVect3d(); - auto const domain_hi = geom.Domain().hiVect3d(); + auto domain_lo = geom.Domain().loVect3d(); + auto domain_hi = geom.Domain().hiVect3d(); FArrayBox tmp; @@ -3329,8 +3329,8 @@ Castro::computeTemp(MultiFab& State, Real time, int ng) // convert to cell centers -- this will result in Stemp being // cell centered only on 1 ghost cells - auto const domain_lo = geom.Domain().loVect3d(); - auto const domain_hi = geom.Domain().hiVect3d(); + auto domain_lo = geom.Domain().loVect3d(); + auto domain_hi = geom.Domain().hiVect3d(); FArrayBox tmp; @@ -3346,7 +3346,7 @@ Castro::computeTemp(MultiFab& State, Real time, int ng) Elixir elix_tmp = tmp.elixir(); auto tmp_arr = tmp.array(); - make_cell_center_in_place(bx, Stemp.arry(mfi), tmp_arr, domain_lo, domain_hi); + make_cell_center_in_place(bx, Stemp.array(mfi), tmp_arr, domain_lo, domain_hi); } @@ -3433,8 +3433,8 @@ Castro::computeTemp(MultiFab& State, Real time, int ng) // cell-averages -- this is 4th-order and will be a no-op for // those zones where e wasn't changed. - auto const domain_lo = geom.Domain().loVect3d(); - auto const domain_hi = geom.Domain().hiVect3d(); + auto domain_lo = geom.Domain().loVect3d(); + auto domain_hi = geom.Domain().hiVect3d(); FArrayBox tmp; diff --git a/Source/driver/Castro_advance_sdc.cpp b/Source/driver/Castro_advance_sdc.cpp index f39fffaa26..a0579916c6 100644 --- a/Source/driver/Castro_advance_sdc.cpp +++ b/Source/driver/Castro_advance_sdc.cpp @@ -40,8 +40,8 @@ Castro::do_advance_sdc (Real time, MultiFab& S_old = get_old_data(State_Type); MultiFab& S_new = get_new_data(State_Type); - auto const domain_lo = geom.Domain().loVect3d(); - auto const domain_hi = geom.Domain().hiVect3d(); + auto domain_lo = geom.Domain().loVect3d(); + auto domain_hi = geom.Domain().hiVect3d(); // Perform initialization steps. diff --git a/Source/hydro/Castro_hydro.H b/Source/hydro/Castro_hydro.H index 5ae037bc01..ed37d67bfd 100644 --- a/Source/hydro/Castro_hydro.H +++ b/Source/hydro/Castro_hydro.H @@ -541,4 +541,50 @@ /// @param ng Number of ghost cells /// void linear_to_hybrid_momentum(amrex::MultiFab& state, int ng = 0); + +#endif + +#ifdef TRUE_SDC + + Real compute_laplacian(int i, int j, int k, int n, + Array4 const a, + GpuArray const& lo_periodic, GpuArray const& hi_periodic, + GpuArray const& domlo, GpuArray const& domhi); + + Real trans_laplacian(int i, int j, int k, int n, + int idir, + Array4 const a, + GpuArray const& lo_periodic, GpuArray const& hi_periodic, + GpuArray const& domlo, GpuArray const& domhi); + + void make_cell_center(const Box& bx, + Array4 const& U, + Array4 const& U_cc, + GpuArray& domlo, GpuArray& domhi); + + void make_cell_center_in_place(const Box& bx, + Array4 const& U, + Array4 const& tmp, + GpuArray& domlo, GpuArray& domhi); + + void compute_lap_term(const Box& bx, + Array4 const& U, + Array4 const& lap, const int ncomp, + GpuArray& domlo, GpuArray& domhi); + + void make_fourth_average(const Box& bx, + Array4 const& q, + Array4 const& q_bar, + GpuArray& domlo, GpuArray& domhi); + + void make_fourth_in_place(const Box& bx, + Array4 const& q, + Array4 const& tmp, + GpuArray& domlo, GpuArray& domhi); + + void make_fourth_in_place_n(const Box& bx, + Array4 const& q, const int ncomp, + Array4 const& tmp, + GpuArray& domlo, GpuArray& domhi); + #endif diff --git a/Source/hydro/Castro_hydro.cpp b/Source/hydro/Castro_hydro.cpp index e8777f66cc..74718b8697 100644 --- a/Source/hydro/Castro_hydro.cpp +++ b/Source/hydro/Castro_hydro.cpp @@ -124,8 +124,8 @@ Castro::cons_to_prim_fourth(const Real time) // convert the conservative state cell averages to primitive cell // averages with 4th order accuracy - auto const domain_lo = geom.Domain().loVect3d(); - auto const domain_hi = geom.Domain().hiVect3d(); + auto domain_lo = geom.Domain().loVect3d(); + auto domain_hi = geom.Domain().hiVect3d(); MultiFab& S_new = get_new_data(State_Type); @@ -182,7 +182,6 @@ Castro::cons_to_prim_fourth(const Real time) // convert U_cc to q_cc (we'll store this temporarily in q, // qaux). This will remain valid only on the NUM_GROW-1 ghost // cells. - auto U_cc_arr = U_cc.array(); auto q_arr = q.array(mfi); auto qaux_arr = qaux.array(mfi); @@ -223,7 +222,7 @@ Castro::cons_to_prim_fourth(const Real time) // not sure if we need to convert qaux this way, or if we can // just evaluate it (we may not need qaux at all actually) - make_fourth_average(qbmx1, qaux.array(mfi), qaux_bar.array(mfi), domain_lo, domain_hi); + make_fourth_average(qbxm1, qaux.array(mfi), qaux_bar.array(mfi), domain_lo, domain_hi); } diff --git a/Source/hydro/Castro_mol_hydro.cpp b/Source/hydro/Castro_mol_hydro.cpp index cb3ce1bfac..aca963366a 100644 --- a/Source/hydro/Castro_mol_hydro.cpp +++ b/Source/hydro/Castro_mol_hydro.cpp @@ -7,6 +7,8 @@ #include "diffusion_util.H" #endif +#include "fourth_center_average.H" + using namespace amrex; /// @@ -41,8 +43,8 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) BL_PROFILE_VAR("Castro::advance_hydro_ca_umdrv()", CA_UMDRV); - const auto domain_lo = geom.Domain().loVect3d(); - const auto domain_hi = geom.Domain().hiVect3d(); + GpuArray domain_lo = geom.Domain().loVect3d(); + GpuArray domain_hi = geom.Domain().hiVect3d(); #ifdef _OPENMP #pragma omp parallel @@ -286,6 +288,16 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) #if AMREX_SPACEDIM >= 2 // construct the face-center interface states q_fc + const int* lo_bc = phys_bc.lo(); + const int* hi_bc = phys_bc.hi(); + + GpuArray lo_periodic; + GpuArray hi_periodic; + for (int idir = 0; idir < 3; idir++) { + lo_periodic[idir] = lo_bc[idir] == Interior; + hi_periodic[idir] = hi_bc[idir] == Interior; + } + amrex::ParallelFor(bx, NQ, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { @@ -293,8 +305,8 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) if (test) return; - Real lap = trans_laplacian(i, j, k, n, idir, - q_avg_arr, domain_lo, domain_hi); + Real lap = trans_laplacian(i, j, k, n, idir, q_avg_arr, + lo_periodic, hi_periodic, domain_lo, domain_hi); q_fc_arr(i,j,k,n) = q_avg_arr(i,j,k,n) - (1.0_rt/24.0_rt) * lap; @@ -333,8 +345,8 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { - Real lap = trans_laplacian(i, j, k, n, idir, - f_avg_arr, domain_lo, domain_hi); + Real lap = trans_laplacian(i, j, k, n, idir, f_avg_arr, + lo_periodic, hi_periodic, domain_lo, domain_hi); flux_arr(i,j,k,n) += (1.0_rt/24.0_rt) * lap; diff --git a/Source/hydro/fourth_center_average.H b/Source/hydro/fourth_center_average.H index 4af48507da..eda63cec16 100644 --- a/Source/hydro/fourth_center_average.H +++ b/Source/hydro/fourth_center_average.H @@ -1,43 +1,115 @@ #ifndef _FOURTH_CENTER_AVERAGE_H_ #define _FOURTH_CENTER_AVERAGE_H_ -Real compute_laplacian(int i, int j, int k, int n, - Array4 const a, - GpuArray& domlo, GpuArray& domhi); - -Real trans_laplacian(int i, int j, int k, int n, - int idir, - Array4 const a, - GpuArray& domlo, GpuArray& domhi); - -void make_cell_center(const Box& bx, - Array4 const& U, - Array4 const& U_cc, - GpuArray& domlo, GpuArray& domhi); - -void make_cell_center_in_place(const Box& bx, - Array4 const& U, - Array4 const& tmp, - GpuArray& domlo, GpuArray& domhi); - -void compute_lap_term(const Box& bx, - Array4 const& U, - Array4 const& lap, const int ncomp, - GpuArray& domlo, GpuArray& domhi); - -void make_fourth_average(const Box& bx, - Array4 const& q, - Array4 const& q_bar, - GpuArray& domlo, GpuArray& domhi); - -void make_fourth_in_place(const Box& bx, - Array4 const& q, - Array4 const& tmp, - GpuArray& domlo, GpuArray& domhi); - -void make_fourth_in_place_n(const Box& bx, - Array4 const& q, const int ncomp, - Array4 const& tmp, - GpuArray& domlo, GpuArray& domhi); +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +Real +Castro:: compute_laplacian(int i, int j, int k, int n, + Array4 const a, + GpuArray const& lo_periodic, GpuArray const& hi_periodic, + GpuArray const& domlo, GpuArray const& domhi) { + + Real lapx = 0.0; + Real lapy = 0.0; + Real lapz = 0.0; + + // we use 2nd-order accurate one-sided stencils at the physical + // boundaries note: this differs from the suggestion in MC2011 -- + // they just suggest using the Laplacian from +1 off the interior. + // I like the one-sided better. + + if (i == domlo[0] && !lo_periodic[0]) { + lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i+1,j,k,n) + 4.0_rt*a(i+2,j,k,n) - a(i+3,j,k,n); + + } else if (i == domhi[0] && !hi_periodic[0]) { + lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i-1,j,k,n) + 4.0_rt*a(i-2,j,k,n) - a(i-3,j,k,n); + + } else { + lapx = a(i+1,j,k,n) - 2.0_rt*a(i,j,k,n) + a(i-1,j,k,n); + } + +#if AMREX_SPACEDIM >= 2 + if (j == domlo[1] && !lo_periodic[1]) { + lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j+1,k,n) + 4.0_rt*a(i,j+2,k,n) - a(i,j+3,k,n); + + } else if (j == domhi[1] && !hi_periodic[1]) { + lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j-1,k,n) + 4.0_rt*a(i,j-2,k,n) - a(i,j-3,k,n); + + } else { + lapy = a(i,j+1,k,n) - 2.0_rt*a(i,j,k,n) + a(i,j-1,k,n); + } +#endif + +#if AMREX_SPACEDIM == 3 + if (k == domlo[2] && !lo_periodic[2]) { + lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k+1,n) + 4.0_rt*a(i,j,k+2,n) - a(i,j,k+3,n); + + } else if (k == domhi[2] && !hi_periodic[2]) { + lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k-1,n) + 4.0_rt*a(i,j,k-2,n) - a(i,j,k-3,n); + + } else { + lapz = a(i,j,k+1,n) - 2.0_rt*a(i,j,k,n) + a(i,j,k-1,n); + } +#endif + + return lapx + lapy + lapz; +} + +AMREX_GPU_DEVICE AMREX_FORCE_INLINE +Real +Castro::trans_laplacian(int i, int j, int k, int n, + int idir, + Array4 const a, + GpuArray const& lo_periodic, GpuArray const& hi_periodic, + GpuArray const& domlo, GpuArray const& domhi) { + + Real lapx = 0.0; + Real lapy = 0.0; + Real lapz = 0.0; + + // we use 2nd-order accurate one-sided stencils at the physical boundaries + if (idir != 0) { + + if (i == domlo[0] && !lo_periodic[0]) { + lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i+1,j,k,n) + 4.0_rt*a(i+2,j,k,n) - a(i+3,j,k,n); + + } else if (i == domhi[0] && !hi_periodic[0]) { + lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i-1,j,k,n) + 4.0_rt*a(i-2,j,k,n) - a(i-3,j,k,n); + + } else { + lapx = a(i+1,j,k,n) - 2.0_rt*a(i,j,k,n) + a(i-1,j,k,n); + } + } + + if (idir != 1) { + + if (j == domlo[1] && !lo_periodic[1]) { + lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j+1,k,n) + 4.0_rt*a(i,j+2,k,n) - a(i,j+3,k,n); + + } else if (j == domhi[1] && !hi_periodic[1]) { + lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j-1,k,n) + 4.0_rt*a(i,j-2,k,n) - a(i,j-3,k,n); + + } else { + lapy = a(i,j+1,k,n) - 2.0_rt*a(i,j,k,n) + a(i,j-1,k,n); + } + } + +#if AMREX_SPACEDIM == 3 + if (idir != 2) { + + if (k == domlo[2] && !lo_periodic[2]) { + lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k+1,n) + 4.0_rt*a(i,j,k+2,n) - a(i,j,k+3,n); + + } else if (k == domhi[2] && !hi_periodic[2]) { + lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k-1,n) + 4.0_rt*a(i,j,k-2,n) - a(i,j,k-3,n); + + } else { + lapz = a(i,j,k+1,n) - 2.0_rt*a(i,j,k,n) + a(i,j,k-1,n); + } + } +#endif + + return lapx + lapy + lapz; +} + #endif diff --git a/Source/hydro/fourth_center_average.cpp b/Source/hydro/fourth_center_average.cpp index 7fbcbe2903..9f715082b8 100644 --- a/Source/hydro/fourth_center_average.cpp +++ b/Source/hydro/fourth_center_average.cpp @@ -1,151 +1,53 @@ -#include -#include - -#include - -#include "castro_params.H" +#include "Castro.H" +#include "fourth_center_average.H" using namespace amrex; - - // Note: pretty much all of these routines below assume that dx(1) = dx(2) = dx(3) -Real compute_laplacian(int i, int j, int k, int n, - Array4 const a, - GpuArray& domlo, GpuArray& domhi) { - - Real lapx = 0.0; - Real lapy = 0.0; - Real lapz = 0.0; - - // we use 2nd-order accurate one-sided stencils at the physical - // boundaries note: this differs from the suggestion in MC2011 -- - // they just suggest using the Laplacian from +1 off the interior. - // I like the one-sided better. - - if (i == domlo[0] && physbc_lo[0] != Interior) { - lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i+1,j,k,n) + 4.0_rt*a(i+2,j,k,n) - a(i+3,j,k,n); - - } else if (i == domhi[0] && physbc_hi[0] != Interior) { - lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i-1,j,k,n) + 4.0_rt*a(i-2,j,k,n) - a(i-3,j,k,n); - - } else { - lapx = a(i+1,j,k,n) - TWO*a(i,j,k,n) + a(i-1,j,k,n); - } - -#if AMREX_SPACEDIM >= 2 - if (j == domlo[1] && physbc_lo[1] != Interior) { - lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j+1,k,n) + 4.0_rt*a(i,j+2,k,n) - a(i,j+3,k,n); - - } else if (j == domhi[1] && physbc_hi[1] != Interior) { - lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j-1,k,n) + 4.0_rt*a(i,j-2,k,n) - a(i,j-3,k,n); - - } else { - lapy = a(i,j+1,k,n) - TWO*a(i,j,k,n) + a(i,j-1,k,n); - } -#endif - -#if AMREX_SPACEDIM == 3 - if (k == domlo[2] && physbc_lo[2] != Interior) { - lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k+1,n) + 4.0_rt*a(i,j,k+2,n) - a(i,j,k+3,n); - - } else if (k == domhi[2] && physbc_hi[2] != Interior) { - lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k-1,n) + 4.0_rt*a(i,j,k-2,n) - a(i,j,k-3,n); - - } else { - lapz = a(i,j,k+1,n) - TWO*a(i,j,k,n) + a(i,j,k-1,n); - } -#endif - - return lapx + lapy + lapz; -} - - -Real trans_laplacian(int i, int j, int k, int n, - int idir, - Array4 const a, - GpuArray& domlo, GpuArray& domhi) { - - Real lapx = 0.0; - Real lapy = 0.0; - Real lapz = 0.0; - - // we use 2nd-order accurate one-sided stencils at the physical boundaries - if (idir != 0) { - - if (i == domlo[0] && physbc_lo[0] != Interior) { - lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i+1,j,k,n) + 4.0_rt*a(i+2,j,k,n) - a(i+3,j,k,n); - - } else if (i == domhi[0] && physbc_hi[0] != Interior) { - lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i-1,j,k,n) + 4.0_rt*a(i-2,j,k,n) - a(i-3,j,k,n); - - } else { - lapx = a(i+1,j,k,n) - TWO*a(i,j,k,n) + a(i-1,j,k,n); - } - } - - if (idir != 1) { - if (j == domlo[1] && physbc_lo[1] != Interior) { - lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j+1,k,n) + 4.0_rt*a(i,j+2,k,n) - a(i,j+3,k,n); - - } else if (j == domhi[1] && physbc_hi[1] != Interior) { - lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j-1,k,n) + 4.0_rt*a(i,j-2,k,n) - a(i,j-3,k,n); - - } else { - lapy = a(i,j+1,k,n) - TWO*a(i,j,k,n) + a(i,j-1,k,n); - } - } - -#if AMREX_SPACEDIM == 3 - if (idir != 2) { - - if (k == domlo[2] && physbc_lo[2] != Interior) { - lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k+1,n) + 4.0_rt*a(i,j,k+2,n) - a(i,j,k+3,n); - - } else if (k == domhi[2] && physbc_hi[2] != Interior) { - lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k-1,n) + 4.0_rt*a(i,j,k-2,n) - a(i,j,k-3,n); - - } else { - lapz = a(i,j,k+1,n) - TWO*a(i,j,k,n) + a(i,j,k-1,n); - } - } -#endif - - return lapx + lapy + lapz; -} - - -void make_cell_center(const Box& bx, - Array4 const& U, - Array4 const& U_cc, - GpuArray& domlo, GpuArray& domhi) { +void +Castro::make_cell_center(const Box& bx, + Array4 const& U, + Array4 const& U_cc, + GpuArray& domlo, GpuArray& domhi) { // Take a cell-average state U and a convert it to a cell-center // state U_cc via U_cc = U - 1/24 L U - U_lo = U.lbound(); - U_hi = U.ubound(); + auto U_lo = lbound(U); + auto U_hi = ubound(U); AMREX_ASSERT(U_lo[0] <= lo[0]-1 && U_hi[0] >= hi[0]+1 && (AMREX_SPACEDIM >= 2 && (U_lo[1] <= lo[1]-1 && U_hi[1] >= hi[1]+1)) && (AMREX_SPACEDIM == 3 && (U_lo[2] <= lo[2]-1 && U_hi[2] >= hi[2]+1))); + const int* lo_bc = phys_bc.lo(); + const int* hi_bc = phys_bc.hi(); + + GpuArray lo_periodic; + GpuArray hi_periodic; + for (int idir = 0; idir < 3; idir++) { + lo_periodic[idir] = lo_bc[idir] == Interior; + hi_periodic[idir] = hi_bc[idir] == Interior; + } + amrex::ParallelFor(bx, U.nComp(), [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { - Real lap = compute_laplacian(i, j, k, n, U, domlo, domhi); + Real lap = compute_laplacian(i, j, k, n, U, + lo_periodic, hi_periodic, domlo, domhi); U_cc(i,j,k,n) = U(i,j,k,n) - (1.0_rt/24.0_rt) * lap; }); } -void make_cell_center_in_place(const Box& bx, - Array4 const& U, - Array4 const& tmp, - GpuArray& domlo, GpuArray& domhi) { +void +Castro::make_cell_center_in_place(const Box& bx, + Array4 const& U, + Array4 const& tmp, + GpuArray& domlo, GpuArray& domhi) { // Take a cell-average state U and make it cell-centered in place // via U <- U - 1/24 L U. Note that this operation is not tile @@ -153,12 +55,23 @@ void make_cell_center_in_place(const Box& bx, // here, tmp is a temporary memory space we use to store one-component's Laplacian + const int* lo_bc = phys_bc.lo(); + const int* hi_bc = phys_bc.hi(); + + GpuArray lo_periodic; + GpuArray hi_periodic; + for (int idir = 0; idir < 3; idir++) { + lo_periodic[idir] = lo_bc[idir] == Interior; + hi_periodic[idir] = hi_bc[idir] == Interior; + } + for (int n = 0; n < U.nComp(); n++) { amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - tmp(i,j,k) = compute_laplacian(i, j, k, n, U, domlo, domhi); + tmp(i,j,k) = compute_laplacian(i, j, k, n, U, + lo_periodic, hi_periodic, domlo, domhi); }); amrex::ParallelFor(bx, @@ -170,47 +83,72 @@ void make_cell_center_in_place(const Box& bx, } -void compute_lap_term(const Box& bx, - Array4 const& U, - Array4 const& lap, const int ncomp, - GpuArray& domlo, GpuArray& domhi) { +void +Castro::compute_lap_term(const Box& bx, + Array4 const& U, + Array4 const& lap, const int ncomp, + GpuArray& domlo, GpuArray& domhi) { // Computes the h**2/24 L U term that is used in correcting // cell-center to averages (and back) + const int* lo_bc = phys_bc.lo(); + const int* hi_bc = phys_bc.hi(); + + GpuArray lo_periodic; + GpuArray hi_periodic; + for (int idir = 0; idir < 3; idir++) { + lo_periodic[idir] = lo_bc[idir] == Interior; + hi_periodic[idir] = hi_bc[idir] == Interior; + } + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { lap(i,j,k) = (1.0_rt/24.0_rt) * - compute_laplacian(i, j, k, ncomp, U, domlo, domhi); + compute_laplacian(i, j, k, ncomp, U, + lo_periodic, hi_periodic, domlo, domhi); }); } -void make_fourth_average(const Box& bx, - Array4 const& q, - Array4 const& q_bar, - GpuArray& domlo, GpuArray& domhi) { +void +Castro::make_fourth_average(const Box& bx, + Array4 const& q, + Array4 const& q_bar, + GpuArray& domlo, GpuArray& domhi) { // Take the cell-center state q and another state q_bar (e.g., // constructed from the cell-average U) and replace the cell-center // q with a 4th-order accurate cell-average, q <- q + 1/24 L q_bar + const int* lo_bc = phys_bc.lo(); + const int* hi_bc = phys_bc.hi(); + + GpuArray lo_periodic; + GpuArray hi_periodic; + for (int idir = 0; idir < 3; idir++) { + lo_periodic[idir] = lo_bc[idir] == Interior; + hi_periodic[idir] = hi_bc[idir] == Interior; + } + amrex::ParallelFor(bx, q.nComp(), [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { - Real lap = compute_laplacian(i, j, k, n, q_bar, domlo, domhi); + Real lap = compute_laplacian(i, j, k, n, q_bar, + lo_periodic, hi_periodic, domlo, domhi); q(i,j,k,n) += (1.0_rt/24.0_rt) * lap; }); } -void make_fourth_in_place(const Box& bx, - Array4 const& q, - Array4 const& tmp, - GpuArray& domlo, GpuArray& domhi) { +void +Castro::make_fourth_in_place(const Box& bx, + Array4 const& q, + Array4 const& tmp, + GpuArray& domlo, GpuArray& domhi) { // Take the cell-center q and makes it a cell-average q, in place // (e.g. q is overwritten by its average), q <- q + 1/24 L q. @@ -222,10 +160,11 @@ void make_fourth_in_place(const Box& bx, } -void make_fourth_in_place_n(const Box& bx, - Array4 const& q, const int ncomp, - Array4 const& tmp, - GpuArray& domlo, GpuArray& domhi) { +void +Castro::make_fourth_in_place_n(const Box& bx, + Array4 const& q, const int ncomp, + Array4 const& tmp, + GpuArray& domlo, GpuArray& domhi) { // Take the cell-center q and makes it a cell-average q, in place // (e.g. q is overwritten by its average), q <- q + 1/24 L q. @@ -234,16 +173,27 @@ void make_fourth_in_place_n(const Box& bx, // This version operates on a single component. Here ncomp is the // component to update + const int* lo_bc = phys_bc.lo(); + const int* hi_bc = phys_bc.hi(); + + GpuArray lo_periodic; + GpuArray hi_periodic; + for (int idir = 0; idir < 3; idir++) { + lo_periodic[idir] = lo_bc[idir] == Interior; + hi_periodic[idir] = hi_bc[idir] == Interior; + } + amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - tmp(i,j,k) = compute_laplacian(i, j, k, ncomp, q, domlo, domhi); + tmp(i,j,k) = compute_laplacian(i, j, k, ncomp, q, + lo_periodic, hi_periodic, domlo, domhi); }); amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - q(i,j,k,ncomp) += (1.0_rt/24.0_rt) * lap(i,j,k); + q(i,j,k,ncomp) += (1.0_rt/24.0_rt) * tmp(i,j,k); }); } From 969b046d60ae0a083821b24d3e20d56639d6e3e1 Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Mon, 27 Apr 2020 13:07:24 -0400 Subject: [PATCH 4/9] fix assert --- Source/hydro/fourth_center_average.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Source/hydro/fourth_center_average.cpp b/Source/hydro/fourth_center_average.cpp index 9f715082b8..1a5685995a 100644 --- a/Source/hydro/fourth_center_average.cpp +++ b/Source/hydro/fourth_center_average.cpp @@ -18,9 +18,12 @@ Castro::make_cell_center(const Box& bx, auto U_lo = lbound(U); auto U_hi = ubound(U); - AMREX_ASSERT(U_lo[0] <= lo[0]-1 && U_hi[0] >= hi[0]+1 && - (AMREX_SPACEDIM >= 2 && (U_lo[1] <= lo[1]-1 && U_hi[1] >= hi[1]+1)) && - (AMREX_SPACEDIM == 3 && (U_lo[2] <= lo[2]-1 && U_hi[2] >= hi[2]+1))); + auto lo = bx.loVect(); + auto hi = bx.hiVect(); + + AMREX_ASSERT(U_lo.x <= lo[0]-1 && U_hi.x >= hi[0]+1 && + (AMREX_SPACEDIM >= 2 && (U_lo.y <= lo[1]-1 && U_hi.y >= hi[1]+1)) && + (AMREX_SPACEDIM == 3 && (U_lo.z <= lo[2]-1 && U_hi.z >= hi[2]+1))); const int* lo_bc = phys_bc.lo(); const int* hi_bc = phys_bc.hi(); From afce97b212f95bd25676099cd2070cf75401d754 Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Mon, 27 Apr 2020 13:18:04 -0400 Subject: [PATCH 5/9] fix the fix --- Source/hydro/fourth_center_average.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Source/hydro/fourth_center_average.cpp b/Source/hydro/fourth_center_average.cpp index 1a5685995a..95053f3e0f 100644 --- a/Source/hydro/fourth_center_average.cpp +++ b/Source/hydro/fourth_center_average.cpp @@ -21,9 +21,13 @@ Castro::make_cell_center(const Box& bx, auto lo = bx.loVect(); auto hi = bx.hiVect(); - AMREX_ASSERT(U_lo.x <= lo[0]-1 && U_hi.x >= hi[0]+1 && - (AMREX_SPACEDIM >= 2 && (U_lo.y <= lo[1]-1 && U_hi.y >= hi[1]+1)) && - (AMREX_SPACEDIM == 3 && (U_lo.z <= lo[2]-1 && U_hi.z >= hi[2]+1))); + AMREX_ASSERT(U_lo.x <= lo[0]-1 && U_hi.x >= hi[0]+1); +#if AMREX_SPACEDIM >= 2 + AMREX_ASSERT(U_lo.y <= lo[1]-1 && U_hi.y >= hi[1]+1); +#endif +#if AMREX_SPACEDIM == 3 + AMREX_ASSERT(U_lo.z <= lo[2]-1 && U_hi.z >= hi[2]+1); +#endif const int* lo_bc = phys_bc.lo(); const int* hi_bc = phys_bc.hi(); From 4322ff6484aa64b934b2a5a0cd8955ec922d9e8a Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Mon, 27 Apr 2020 13:23:00 -0400 Subject: [PATCH 6/9] fix a copy for SDC -- wrong number of ghost cells were assumed --- Source/driver/Castro_advance_sdc.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/driver/Castro_advance_sdc.cpp b/Source/driver/Castro_advance_sdc.cpp index d1b9191b89..a7da9e7b1c 100644 --- a/Source/driver/Castro_advance_sdc.cpp +++ b/Source/driver/Castro_advance_sdc.cpp @@ -147,7 +147,7 @@ Castro::do_advance_sdc (Real time, // store the result in sources_for_hydro -- this is what will // be used in the final conservative update - MultiFab::Copy(sources_for_hydro, old_source, 0, 0, NSRC, sources_for_hydro.nGrow()); + MultiFab::Copy(sources_for_hydro, old_source, 0, 0, NSRC, old_source.nGrow()); } else { sources_for_hydro.setVal(0.0, 0); From 2415b8c400461f08a2aad9cac36adf1f7fb20970 Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Mon, 27 Apr 2020 13:38:39 -0400 Subject: [PATCH 7/9] fix a box --- Source/hydro/Castro_mol_hydro.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/hydro/Castro_mol_hydro.cpp b/Source/hydro/Castro_mol_hydro.cpp index aca963366a..a6f021a2aa 100644 --- a/Source/hydro/Castro_mol_hydro.cpp +++ b/Source/hydro/Castro_mol_hydro.cpp @@ -298,7 +298,7 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) hi_periodic[idir] = hi_bc[idir] == Interior; } - amrex::ParallelFor(bx, NQ, + amrex::ParallelFor(nbx, NQ, [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept { bool test = (n == QGC) || (n == QTEMP); From fd605153687a4d2fdbb2ab746ed54ac02bfaf850 Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Mon, 27 Apr 2020 14:18:41 -0400 Subject: [PATCH 8/9] fix compilation --- Source/sdc/Castro_sdc.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Source/sdc/Castro_sdc.cpp b/Source/sdc/Castro_sdc.cpp index 65cfae3999..3aebc95047 100644 --- a/Source/sdc/Castro_sdc.cpp +++ b/Source/sdc/Castro_sdc.cpp @@ -26,8 +26,8 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt) // for 4th order reactive SDC, we need to first compute the source, C // and do a ghost cell fill on it - auto const domain_lo = geom.Domain().loVect3d(); - auto const domain_hi = geom.Domain().hiVect3d(); + auto domain_lo = geom.Domain().loVect3d(); + auto domain_hi = geom.Domain().hiVect3d(); // the timestep from m to m+1 Real dt_m = (dt_sdc[m_end] - dt_sdc[m_start]) * dt; @@ -123,7 +123,7 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt) FArrayBox C_center; FArrayBox U_new_center; FArrayBox R_new; - FArrayBox tmp; + FArrayBox tlap; FArrayBox C2; @@ -230,17 +230,17 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt) // place (only for the interior) R_new.resize(bx1, NUM_STATE); Elixir elix_R_new = R_new.elixir(); - Array4 const& R_new_arr=R_new.array(); + Array4 const& R_new_arr = R_new.array(); ca_instantaneous_react(BL_TO_FORTRAN_BOX(bx1), BL_TO_FORTRAN_3D(U_new_center), BL_TO_FORTRAN_3D(R_new)); - tmp.resize(bx, 1); - Elixir elix_tmp = tmp.elixir(); - auto const tmp_arr = tmp.array(); + tlap.resize(bx, 1); + Elixir elix_tlap = tlap.elixir(); + auto const tlap_arr = tlap.array(); - make_fourth_in_place(bx, R_new_arr, tmp_arr, domain_lo, domain_hi); + make_fourth_in_place(bx, R_new_arr, tlap_arr, domain_lo, domain_hi); // now do the conservative update using this to get // We'll also need to pass in @@ -315,8 +315,8 @@ Castro::construct_old_react_source(MultiFab& U_state, BL_PROFILE("Castro::construct_old_react_source()"); - auto const domain_lo = geom.Domain().loVect3d(); - auto const domain_hi = geom.Domain().hiVect3d(); + auto domain_lo = geom.Domain().loVect3d(); + auto domain_hi = geom.Domain().hiVect3d(); if (sdc_order == 4 && input_is_average) { From 859a796e15a65bc38e2651836b5aaca344261797 Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Wed, 6 May 2020 09:24:26 -0400 Subject: [PATCH 9/9] address some PR reviews --- Source/hydro/Castro_hydro.H | 19 ++++++------ Source/hydro/Castro_mol_hydro.cpp | 6 ++-- Source/hydro/fourth_center_average.H | 8 ++--- Source/hydro/fourth_center_average.cpp | 42 +++++++++++++------------- 4 files changed, 38 insertions(+), 37 deletions(-) diff --git a/Source/hydro/Castro_hydro.H b/Source/hydro/Castro_hydro.H index ed37d67bfd..f1e238c668 100644 --- a/Source/hydro/Castro_hydro.H +++ b/Source/hydro/Castro_hydro.H @@ -545,46 +545,47 @@ #endif #ifdef TRUE_SDC - + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Real compute_laplacian(int i, int j, int k, int n, Array4 const a, - GpuArray const& lo_periodic, GpuArray const& hi_periodic, + GpuArray const& lo_periodic, GpuArray const& hi_periodic, GpuArray const& domlo, GpuArray const& domhi); + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Real trans_laplacian(int i, int j, int k, int n, int idir, Array4 const a, - GpuArray const& lo_periodic, GpuArray const& hi_periodic, + GpuArray const& lo_periodic, GpuArray const& hi_periodic, GpuArray const& domlo, GpuArray const& domhi); void make_cell_center(const Box& bx, Array4 const& U, Array4 const& U_cc, - GpuArray& domlo, GpuArray& domhi); + GpuArray const& domlo, GpuArray const& domhi); void make_cell_center_in_place(const Box& bx, Array4 const& U, Array4 const& tmp, - GpuArray& domlo, GpuArray& domhi); + GpuArray const& domlo, GpuArray const& domhi); void compute_lap_term(const Box& bx, Array4 const& U, Array4 const& lap, const int ncomp, - GpuArray& domlo, GpuArray& domhi); + GpuArray const& domlo, GpuArray const& domhi); void make_fourth_average(const Box& bx, Array4 const& q, Array4 const& q_bar, - GpuArray& domlo, GpuArray& domhi); + GpuArray const& domlo, GpuArray const& domhi); void make_fourth_in_place(const Box& bx, Array4 const& q, Array4 const& tmp, - GpuArray& domlo, GpuArray& domhi); + GpuArray const& domlo, GpuArray const& domhi); void make_fourth_in_place_n(const Box& bx, Array4 const& q, const int ncomp, Array4 const& tmp, - GpuArray& domlo, GpuArray& domhi); + GpuArray const& domlo, GpuArray const& domhi); #endif diff --git a/Source/hydro/Castro_mol_hydro.cpp b/Source/hydro/Castro_mol_hydro.cpp index a6f021a2aa..05c0690324 100644 --- a/Source/hydro/Castro_mol_hydro.cpp +++ b/Source/hydro/Castro_mol_hydro.cpp @@ -291,9 +291,9 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) const int* lo_bc = phys_bc.lo(); const int* hi_bc = phys_bc.hi(); - GpuArray lo_periodic; - GpuArray hi_periodic; - for (int idir = 0; idir < 3; idir++) { + GpuArray lo_periodic; + GpuArray hi_periodic; + for (int idir = 0; idir < AMREX_SPACEDIM; idir++) { lo_periodic[idir] = lo_bc[idir] == Interior; hi_periodic[idir] = hi_bc[idir] == Interior; } diff --git a/Source/hydro/fourth_center_average.H b/Source/hydro/fourth_center_average.H index eda63cec16..86bb837091 100644 --- a/Source/hydro/fourth_center_average.H +++ b/Source/hydro/fourth_center_average.H @@ -1,11 +1,11 @@ #ifndef _FOURTH_CENTER_AVERAGE_H_ #define _FOURTH_CENTER_AVERAGE_H_ -AMREX_GPU_DEVICE AMREX_FORCE_INLINE +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Real Castro:: compute_laplacian(int i, int j, int k, int n, Array4 const a, - GpuArray const& lo_periodic, GpuArray const& hi_periodic, + GpuArray const& lo_periodic, GpuArray const& hi_periodic, GpuArray const& domlo, GpuArray const& domhi) { Real lapx = 0.0; @@ -54,12 +54,12 @@ Castro:: compute_laplacian(int i, int j, int k, int n, return lapx + lapy + lapz; } -AMREX_GPU_DEVICE AMREX_FORCE_INLINE +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Real Castro::trans_laplacian(int i, int j, int k, int n, int idir, Array4 const a, - GpuArray const& lo_periodic, GpuArray const& hi_periodic, + GpuArray const& lo_periodic, GpuArray const& hi_periodic, GpuArray const& domlo, GpuArray const& domhi) { Real lapx = 0.0; diff --git a/Source/hydro/fourth_center_average.cpp b/Source/hydro/fourth_center_average.cpp index 95053f3e0f..04a909b795 100644 --- a/Source/hydro/fourth_center_average.cpp +++ b/Source/hydro/fourth_center_average.cpp @@ -10,7 +10,7 @@ void Castro::make_cell_center(const Box& bx, Array4 const& U, Array4 const& U_cc, - GpuArray& domlo, GpuArray& domhi) { + GpuArray const& domlo, GpuArray const& domhi) { // Take a cell-average state U and a convert it to a cell-center // state U_cc via U_cc = U - 1/24 L U @@ -32,9 +32,9 @@ Castro::make_cell_center(const Box& bx, const int* lo_bc = phys_bc.lo(); const int* hi_bc = phys_bc.hi(); - GpuArray lo_periodic; - GpuArray hi_periodic; - for (int idir = 0; idir < 3; idir++) { + GpuArray lo_periodic; + GpuArray hi_periodic; + for (int idir = 0; idir < AMREX_SPACEDIM; idir++) { lo_periodic[idir] = lo_bc[idir] == Interior; hi_periodic[idir] = hi_bc[idir] == Interior; } @@ -54,7 +54,7 @@ void Castro::make_cell_center_in_place(const Box& bx, Array4 const& U, Array4 const& tmp, - GpuArray& domlo, GpuArray& domhi) { + GpuArray const& domlo, GpuArray const& domhi) { // Take a cell-average state U and make it cell-centered in place // via U <- U - 1/24 L U. Note that this operation is not tile @@ -65,9 +65,9 @@ Castro::make_cell_center_in_place(const Box& bx, const int* lo_bc = phys_bc.lo(); const int* hi_bc = phys_bc.hi(); - GpuArray lo_periodic; - GpuArray hi_periodic; - for (int idir = 0; idir < 3; idir++) { + GpuArray lo_periodic; + GpuArray hi_periodic; + for (int idir = 0; idir < AMREX_SPACEDIM; idir++) { lo_periodic[idir] = lo_bc[idir] == Interior; hi_periodic[idir] = hi_bc[idir] == Interior; } @@ -94,7 +94,7 @@ void Castro::compute_lap_term(const Box& bx, Array4 const& U, Array4 const& lap, const int ncomp, - GpuArray& domlo, GpuArray& domhi) { + GpuArray const& domlo, GpuArray const& domhi) { // Computes the h**2/24 L U term that is used in correcting // cell-center to averages (and back) @@ -102,9 +102,9 @@ Castro::compute_lap_term(const Box& bx, const int* lo_bc = phys_bc.lo(); const int* hi_bc = phys_bc.hi(); - GpuArray lo_periodic; - GpuArray hi_periodic; - for (int idir = 0; idir < 3; idir++) { + GpuArray lo_periodic; + GpuArray hi_periodic; + for (int idir = 0; idir < AMREX_SPACEDIM; idir++) { lo_periodic[idir] = lo_bc[idir] == Interior; hi_periodic[idir] = hi_bc[idir] == Interior; } @@ -124,7 +124,7 @@ void Castro::make_fourth_average(const Box& bx, Array4 const& q, Array4 const& q_bar, - GpuArray& domlo, GpuArray& domhi) { + GpuArray const& domlo, GpuArray const& domhi) { // Take the cell-center state q and another state q_bar (e.g., // constructed from the cell-average U) and replace the cell-center @@ -133,9 +133,9 @@ Castro::make_fourth_average(const Box& bx, const int* lo_bc = phys_bc.lo(); const int* hi_bc = phys_bc.hi(); - GpuArray lo_periodic; - GpuArray hi_periodic; - for (int idir = 0; idir < 3; idir++) { + GpuArray lo_periodic; + GpuArray hi_periodic; + for (int idir = 0; idir < AMREX_SPACEDIM; idir++) { lo_periodic[idir] = lo_bc[idir] == Interior; hi_periodic[idir] = hi_bc[idir] == Interior; } @@ -155,7 +155,7 @@ void Castro::make_fourth_in_place(const Box& bx, Array4 const& q, Array4 const& tmp, - GpuArray& domlo, GpuArray& domhi) { + GpuArray const& domlo, GpuArray const& domhi) { // Take the cell-center q and makes it a cell-average q, in place // (e.g. q is overwritten by its average), q <- q + 1/24 L q. @@ -171,7 +171,7 @@ void Castro::make_fourth_in_place_n(const Box& bx, Array4 const& q, const int ncomp, Array4 const& tmp, - GpuArray& domlo, GpuArray& domhi) { + GpuArray const& domlo, GpuArray const& domhi) { // Take the cell-center q and makes it a cell-average q, in place // (e.g. q is overwritten by its average), q <- q + 1/24 L q. @@ -183,9 +183,9 @@ Castro::make_fourth_in_place_n(const Box& bx, const int* lo_bc = phys_bc.lo(); const int* hi_bc = phys_bc.hi(); - GpuArray lo_periodic; - GpuArray hi_periodic; - for (int idir = 0; idir < 3; idir++) { + GpuArray lo_periodic; + GpuArray hi_periodic; + for (int idir = 0; idir < AMREX_SPACEDIM; idir++) { lo_periodic[idir] = lo_bc[idir] == Interior; hi_periodic[idir] = hi_bc[idir] == Interior; }