diff --git a/Source/driver/Castro.cpp b/Source/driver/Castro.cpp index 8286447d83..797f141e32 100644 --- a/Source/driver/Castro.cpp +++ b/Source/driver/Castro.cpp @@ -1109,16 +1109,20 @@ Castro::initData () AmrLevel::FillPatch(*this, Sborder, NUM_GROW, cur_time, State_Type, 0, NUM_STATE); // note: this cannot be tiled - const int* domain_lo = geom.Domain().loVect(); - const int* domain_hi = geom.Domain().hiVect(); + auto domain_lo = geom.Domain().loVect3d(); + auto domain_hi = geom.Domain().hiVect3d(); + + FArrayBox tmp; for (MFIter mfi(S_new); mfi.isValid(); ++mfi) { - const Box& box = mfi.validbox(); + const Box& box = mfi.validbox(); + + tmp.resize(box, 1); + Elixir elix_tmp = tmp.elixir(); + auto tmp_arr = tmp.array(); - ca_make_fourth_in_place(BL_TO_FORTRAN_BOX(box), - BL_TO_FORTRAN_FAB(Sborder[mfi]), - AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi)); + make_fourth_in_place(box, Sborder.array(mfi), tmp_arr, domain_lo, domain_hi); } // now copy back the averages @@ -1132,16 +1136,20 @@ Castro::initData () AmrLevel::FillPatch(*this, Sborder, NUM_GROW, cur_time, State_Type, 0, NUM_STATE); // convert to centers -- not tile safe - const int* domain_lo = geom.Domain().loVect(); - const int* domain_hi = geom.Domain().hiVect(); + auto domain_lo = geom.Domain().loVect3d(); + auto domain_hi = geom.Domain().hiVect3d(); + + FArrayBox tmp; for (MFIter mfi(S_new); mfi.isValid(); ++mfi) { const Box& box = mfi.growntilebox(2); - ca_make_cell_center_in_place(BL_TO_FORTRAN_BOX(box), - BL_TO_FORTRAN_FAB(Sborder[mfi]), - AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi)); + tmp.resize(box, 1); + Elixir elix_tmp = tmp.elixir(); + auto tmp_arr = tmp.array(); + + make_cell_center_in_place(box, Sborder.array(mfi), tmp_arr, domain_lo, domain_hi); } // reset the energy -- do this in one ghost cell so we can average in place below @@ -1184,9 +1192,11 @@ Castro::initData () { const Box& box = mfi.validbox(); - ca_make_fourth_in_place(BL_TO_FORTRAN_BOX(box), - BL_TO_FORTRAN_FAB(Sborder[mfi]), - AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi)); + tmp.resize(box, 1); + Elixir elix_tmp = tmp.elixir(); + auto tmp_arr = tmp.array(); + + make_fourth_in_place(box, Sborder.array(mfi), tmp_arr, domain_lo, domain_hi); } // now copy back the averages for UEINT and UTEMP only @@ -3472,22 +3482,24 @@ Castro::computeTemp( // convert to cell centers -- this will result in Stemp being // cell centered only on 1 ghost cells - const int* domain_lo = geom.Domain().loVect(); - const int* domain_hi = geom.Domain().hiVect(); + auto domain_lo = geom.Domain().loVect3d(); + auto domain_hi = geom.Domain().hiVect3d(); + + FArrayBox tmp; for (MFIter mfi(Stemp); mfi.isValid(); ++mfi) { const Box& bx = mfi.growntilebox(1); const Box& bx0 = mfi.tilebox(); const int idx = mfi.tileIndex(); - ca_compute_lap_term(BL_TO_FORTRAN_BOX(bx0), - BL_TO_FORTRAN_FAB(Stemp[mfi]), - BL_TO_FORTRAN_ANYD(Eint_lap[mfi]), UEINT, - AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi)); + compute_lap_term(bx0, Stemp.array(mfi), Eint_lap.array(mfi), UEINT, + domain_lo, domain_hi); + + tmp.resize(bx, 1); + Elixir elix_tmp = tmp.elixir(); + auto tmp_arr = tmp.array(); - ca_make_cell_center_in_place(BL_TO_FORTRAN_BOX(bx), - BL_TO_FORTRAN_FAB(Stemp[mfi]), - AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi)); + make_cell_center_in_place(bx, Stemp.array(mfi), tmp_arr, domain_lo, domain_hi); } @@ -3578,19 +3590,22 @@ Castro::computeTemp( // cell-averages -- this is 4th-order and will be a no-op for // those zones where e wasn't changed. - const int* domain_lo = geom.Domain().loVect(); - const int* domain_hi = geom.Domain().hiVect(); + auto domain_lo = geom.Domain().loVect3d(); + auto domain_hi = geom.Domain().hiVect3d(); + + FArrayBox tmp; for (MFIter mfi(Stemp); mfi.isValid(); ++mfi) { const Box& bx = mfi.tilebox(); const int idx = mfi.tileIndex(); - // only temperature - ca_make_fourth_in_place_n(BL_TO_FORTRAN_BOX(bx), - BL_TO_FORTRAN_FAB(Stemp[mfi]), UTEMP, - AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi)); + tmp.resize(bx, 1); + Elixir elix_tmp = tmp.elixir(); + auto tmp_arr = tmp.array(); + // only temperature + make_fourth_in_place_n(bx, Stemp.array(mfi), UTEMP, tmp_arr, domain_lo, domain_hi); } // correct UEINT diff --git a/Source/driver/Castro_F.H b/Source/driver/Castro_F.H index b146b43de3..b3a5be337e 100644 --- a/Source/driver/Castro_F.H +++ b/Source/driver/Castro_F.H @@ -185,32 +185,6 @@ extern "C" const amrex::Real* dx, amrex::Real* dt); #endif - void ca_make_fourth_average(const int lo[], const int hi[], - BL_FORT_FAB_ARG_3D(q), const int* nc, - const BL_FORT_FAB_ARG_3D(q_bar), const int* nc_bar, - const int* domlo, const int* domhi); - - void ca_make_fourth_in_place(const int lo[], const int hi[], - BL_FORT_FAB_ARG_3D(q), const int* nc, - const int* domlo, const int* domhi); - - void ca_make_fourth_in_place_n(const int lo[], const int hi[], - BL_FORT_FAB_ARG_3D(q), const int* nc, const int ncomp, - const int* domlo, const int* domhi); - - void ca_compute_lap_term(const int lo[], const int hi[], - const BL_FORT_FAB_ARG_3D(U), const int* nc, - BL_FORT_FAB_ARG_ANYD(lap), const int ncomp, - const int* domlo, const int* domhi); - - void ca_make_cell_center(const int lo[], const int hi[], - const BL_FORT_FAB_ARG_3D(U), const int* nc, - BL_FORT_FAB_ARG_3D(U_cc), const int* nc_cc, - const int* domlo, const int* domhi); - - void ca_make_cell_center_in_place(const int lo[], const int hi[], - BL_FORT_FAB_ARG_3D(q), const int* nc, - const int* domlo, const int* domhi); #ifdef RADIATION void ca_inelastic_sct diff --git a/Source/driver/Castro_advance_sdc.cpp b/Source/driver/Castro_advance_sdc.cpp index 2c17b05fb4..37b61f157d 100644 --- a/Source/driver/Castro_advance_sdc.cpp +++ b/Source/driver/Castro_advance_sdc.cpp @@ -41,8 +41,8 @@ Castro::do_advance_sdc (Real time, MultiFab& S_old = get_old_data(State_Type); MultiFab& S_new = get_new_data(State_Type); - const int* domain_lo = geom.Domain().loVect(); - const int* domain_hi = geom.Domain().hiVect(); + auto domain_lo = geom.Domain().loVect3d(); + auto domain_hi = geom.Domain().hiVect3d(); // Perform initialization steps. @@ -104,10 +104,8 @@ Castro::do_advance_sdc (Real time, for (MFIter mfi(S_new); mfi.isValid(); ++mfi) { const Box& gbx = mfi.growntilebox(1); - ca_make_cell_center(BL_TO_FORTRAN_BOX(gbx), - BL_TO_FORTRAN_FAB(Sborder[mfi]), - BL_TO_FORTRAN_FAB(Sburn[mfi]), - AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi)); + + make_cell_center(gbx, Sborder.array(mfi), Sburn.array(mfi), domain_lo, domain_hi); } @@ -123,11 +121,16 @@ Castro::do_advance_sdc (Real time, AmrLevel::FillPatch(*this, old_source, old_source.nGrow(), prev_time, Source_Type, 0, NSRC); // Now convert to cell averages. This loop cannot be tiled. + FArrayBox tmp; + for (MFIter mfi(S_new); mfi.isValid(); ++mfi) { const Box& bx = mfi.tilebox(); - ca_make_fourth_in_place(BL_TO_FORTRAN_BOX(bx), - BL_TO_FORTRAN_FAB(old_source[mfi]), - AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi)); + + tmp.resize(bx, 1); + Elixir elix_tmp = tmp.elixir(); + auto tmp_arr = tmp.array(); + + make_fourth_in_place(bx, old_source.array(mfi), tmp_arr, domain_lo, domain_hi); } } else { @@ -306,6 +309,7 @@ Castro::do_advance_sdc (Real time, FArrayBox U_center; FArrayBox R_center; + FArrayBox tmp; // this cannot be tiled for (MFIter mfi(R_new); mfi.isValid(); ++mfi) { @@ -314,33 +318,30 @@ Castro::do_advance_sdc (Real time, if (sdc_order == 4) { - const int* domain_lo = geom.Domain().loVect(); - const int* domain_hi = geom.Domain().hiVect(); - // convert S_new to cell-centers U_center.resize(obx, NUM_STATE); - ca_make_cell_center(BL_TO_FORTRAN_BOX(obx), - BL_TO_FORTRAN_FAB(Sborder[mfi]), - BL_TO_FORTRAN_FAB(U_center), - AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi)); + Elixir elix_u_center = U_center.elixir(); + auto const U_center_arr = U_center.array(); + + make_cell_center(obx, Sborder.array(mfi), U_center_arr, domain_lo, domain_hi); // pass in the reaction source and state at centers, including one ghost cell // and derive everything that is needed including 1 ghost cell R_center.resize(obx, R_new.nComp()); + Elixir elix_r_center = R_center.elixir(); + auto const R_center_arr = R_center.array(); + Array4 const Sburn_arr = Sburn.array(mfi); - Array4 const U_center_arr = U_center.array(); - Array4 const R_center_arr = R_center.array(); + // we don't worry about the difference between centers and averages - ca_store_reaction_state(obx, - Sburn_arr, - U_center_arr, - R_center_arr); + ca_store_reaction_state(obx, Sburn_arr, U_center_arr, R_center_arr); // convert R_new from centers to averages in place - ca_make_fourth_in_place(BL_TO_FORTRAN_BOX(bx), - BL_TO_FORTRAN_FAB(R_center), - AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi)); + tmp.resize(bx, 1); + Elixir elix_tmp = tmp.elixir(); + auto const tmp_arr = tmp.array(); + make_fourth_in_place(bx, R_center_arr, tmp_arr, domain_lo, domain_hi); // store R_new[mfi].copy(R_center, bx, 0, bx, 0, R_new.nComp()); diff --git a/Source/hydro/Castro_hydro.H b/Source/hydro/Castro_hydro.H index 5ae037bc01..f1e238c668 100644 --- a/Source/hydro/Castro_hydro.H +++ b/Source/hydro/Castro_hydro.H @@ -541,4 +541,51 @@ /// @param ng Number of ghost cells /// void linear_to_hybrid_momentum(amrex::MultiFab& state, int ng = 0); + +#endif + +#ifdef TRUE_SDC + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + Real compute_laplacian(int i, int j, int k, int n, + Array4 const a, + GpuArray const& lo_periodic, GpuArray const& hi_periodic, + GpuArray const& domlo, GpuArray const& domhi); + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + Real trans_laplacian(int i, int j, int k, int n, + int idir, + Array4 const a, + GpuArray const& lo_periodic, GpuArray const& hi_periodic, + GpuArray const& domlo, GpuArray const& domhi); + + void make_cell_center(const Box& bx, + Array4 const& U, + Array4 const& U_cc, + GpuArray const& domlo, GpuArray const& domhi); + + void make_cell_center_in_place(const Box& bx, + Array4 const& U, + Array4 const& tmp, + GpuArray const& domlo, GpuArray const& domhi); + + void compute_lap_term(const Box& bx, + Array4 const& U, + Array4 const& lap, const int ncomp, + GpuArray const& domlo, GpuArray const& domhi); + + void make_fourth_average(const Box& bx, + Array4 const& q, + Array4 const& q_bar, + GpuArray const& domlo, GpuArray const& domhi); + + void make_fourth_in_place(const Box& bx, + Array4 const& q, + Array4 const& tmp, + GpuArray const& domlo, GpuArray const& domhi); + + void make_fourth_in_place_n(const Box& bx, + Array4 const& q, const int ncomp, + Array4 const& tmp, + GpuArray const& domlo, GpuArray const& domhi); + #endif diff --git a/Source/hydro/Castro_hydro.cpp b/Source/hydro/Castro_hydro.cpp index 0e4d8f0f33..74718b8697 100644 --- a/Source/hydro/Castro_hydro.cpp +++ b/Source/hydro/Castro_hydro.cpp @@ -124,11 +124,13 @@ Castro::cons_to_prim_fourth(const Real time) // convert the conservative state cell averages to primitive cell // averages with 4th order accuracy - const int* domain_lo = geom.Domain().loVect(); - const int* domain_hi = geom.Domain().hiVect(); + auto domain_lo = geom.Domain().loVect3d(); + auto domain_hi = geom.Domain().hiVect3d(); MultiFab& S_new = get_new_data(State_Type); + FArrayBox U_cc; + // we don't support radiation here #ifdef RADIATION amrex::Abort("radiation not supported to fourth order"); @@ -147,13 +149,11 @@ Castro::cons_to_prim_fourth(const Real time) // convert U_avg to U_cc -- this will use a Laplacian // operation and will result in U_cc defined only on // NUM_GROW-1 ghost cells at the end. - FArrayBox U_cc; U_cc.resize(qbx, NUM_STATE); + Elixir elix_u_cc = U_cc.elixir(); + auto const U_cc_arr = U_cc.array(); - ca_make_cell_center(BL_TO_FORTRAN_BOX(qbxm1), - BL_TO_FORTRAN_FAB(Sborder[mfi]), - BL_TO_FORTRAN_FAB(U_cc), - AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi)); + make_cell_center(qbxm1, Sborder.array(mfi), U_cc_arr, domain_lo, domain_hi); // enforce the minimum density on the new cell-centered state ca_enforce_minimum_density @@ -182,7 +182,6 @@ Castro::cons_to_prim_fourth(const Real time) // convert U_cc to q_cc (we'll store this temporarily in q, // qaux). This will remain valid only on the NUM_GROW-1 ghost // cells. - auto U_cc_arr = U_cc.array(); auto q_arr = q.array(mfi); auto qaux_arr = qaux.array(mfi); @@ -218,17 +217,12 @@ Castro::cons_to_prim_fourth(const Real time) // this will create q, qaux in NUM_GROW-1 ghost cells, but that's // we need here - ca_make_fourth_average(BL_TO_FORTRAN_BOX(qbxm1), - BL_TO_FORTRAN_FAB(q[mfi]), - BL_TO_FORTRAN_FAB(q_bar[mfi]), - AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi)); + make_fourth_average(qbxm1, q.array(mfi), q_bar.array(mfi), domain_lo, domain_hi); // not sure if we need to convert qaux this way, or if we can // just evaluate it (we may not need qaux at all actually) - ca_make_fourth_average(BL_TO_FORTRAN_BOX(qbxm1), - BL_TO_FORTRAN_FAB(qaux[mfi]), - BL_TO_FORTRAN_FAB(qaux_bar[mfi]), - AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi)); + + make_fourth_average(qbxm1, qaux.array(mfi), qaux_bar.array(mfi), domain_lo, domain_hi); } diff --git a/Source/hydro/Castro_mol_hydro.cpp b/Source/hydro/Castro_mol_hydro.cpp index 7417c836da..05c0690324 100644 --- a/Source/hydro/Castro_mol_hydro.cpp +++ b/Source/hydro/Castro_mol_hydro.cpp @@ -7,6 +7,8 @@ #include "diffusion_util.H" #endif +#include "fourth_center_average.H" + using namespace amrex; /// @@ -41,8 +43,8 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) BL_PROFILE_VAR("Castro::advance_hydro_ca_umdrv()", CA_UMDRV); - const int* domain_lo = geom.Domain().loVect(); - const int* domain_hi = geom.Domain().hiVect(); + GpuArray domain_lo = geom.Domain().loVect3d(); + GpuArray domain_hi = geom.Domain().hiVect3d(); #ifdef _OPENMP #pragma omp parallel @@ -253,11 +255,8 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) if (do_hydro == 0) { - Array4 const f_avg_arr = f_avg.array(); - AMREX_PARALLEL_FOR_4D(nbx, NUM_STATE, i, j, k, n, { f_avg_arr(i,j,k,n) = 0.0;}); - } #ifdef DIFFUSION @@ -289,23 +288,29 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) #if AMREX_SPACEDIM >= 2 // construct the face-center interface states q_fc - AMREX_PARALLEL_FOR_4D(nbx, NQ, i, j, k, n, { - bool test = (n == QGC) || (n == QTEMP); + const int* lo_bc = phys_bc.lo(); + const int* hi_bc = phys_bc.hi(); + + GpuArray lo_periodic; + GpuArray hi_periodic; + for (int idir = 0; idir < AMREX_SPACEDIM; idir++) { + lo_periodic[idir] = lo_bc[idir] == Interior; + hi_periodic[idir] = hi_bc[idir] == Interior; + } - if (test) continue; + amrex::ParallelFor(nbx, NQ, + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + bool test = (n == QGC) || (n == QTEMP); - Real lap = 0.0; - int ncomp_f = n + 1; + if (test) return; - trans_laplacian(i, j, k, ncomp_f, - idir_f, - BL_TO_FORTRAN_ANYD(q_avg), NQ, - &lap, - ARLIM_3D(domain_lo), ARLIM_3D(domain_hi)); + Real lap = trans_laplacian(i, j, k, n, idir, q_avg_arr, + lo_periodic, hi_periodic, domain_lo, domain_hi); - q_fc_arr(i,j,k,n) = q_avg_arr(i,j,k,n) - 1.0/24.0 * lap; + q_fc_arr(i,j,k,n) = q_avg_arr(i,j,k,n) - (1.0_rt/24.0_rt) * lap; - }); + }); // compute the face-center fluxes F(q_fc) Array4 const f_arr = (flux[idir]).array(); @@ -336,20 +341,16 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) // compute the final fluxes Array4 const flux_arr = (flux[idir]).array(); - AMREX_PARALLEL_FOR_4D(nbx, NUM_STATE, i, j, k, n, { - - Real lap = 0.0; - int ncomp_f = n + 1; + amrex::ParallelFor(nbx, NUM_STATE, + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { - trans_laplacian(i, j, k, ncomp_f, - idir_f, - BL_TO_FORTRAN_ANYD(f_avg), NUM_STATE, - &lap, - ARLIM_3D(domain_lo), ARLIM_3D(domain_hi)); + Real lap = trans_laplacian(i, j, k, n, idir, f_avg_arr, + lo_periodic, hi_periodic, domain_lo, domain_hi); - flux_arr(i,j,k,n) = flux_arr(i,j,k,n) + 1.0/24.0 * lap; + flux_arr(i,j,k,n) += (1.0_rt/24.0_rt) * lap; - }); + }); #endif diff --git a/Source/hydro/Make.package b/Source/hydro/Make.package index b56b2b6ac0..8fcfca5467 100644 --- a/Source/hydro/Make.package +++ b/Source/hydro/Make.package @@ -33,6 +33,8 @@ ca_F90EXE_sources += Castro_ctu_nd.F90 ifeq ($(USE_TRUE_SDC),TRUE) ca_F90EXE_sources += fourth_order_nd.F90 + CEXE_sources += fourth_center_average.cpp + CEXE_headers += fourth_center_average.H ca_F90EXE_sources += Castro_mol_nd.F90 ca_F90EXE_sources += Castro_fourth_order_nd.F90 endif diff --git a/Source/hydro/fourth_center_average.H b/Source/hydro/fourth_center_average.H new file mode 100644 index 0000000000..86bb837091 --- /dev/null +++ b/Source/hydro/fourth_center_average.H @@ -0,0 +1,115 @@ +#ifndef _FOURTH_CENTER_AVERAGE_H_ +#define _FOURTH_CENTER_AVERAGE_H_ + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real +Castro:: compute_laplacian(int i, int j, int k, int n, + Array4 const a, + GpuArray const& lo_periodic, GpuArray const& hi_periodic, + GpuArray const& domlo, GpuArray const& domhi) { + + Real lapx = 0.0; + Real lapy = 0.0; + Real lapz = 0.0; + + // we use 2nd-order accurate one-sided stencils at the physical + // boundaries note: this differs from the suggestion in MC2011 -- + // they just suggest using the Laplacian from +1 off the interior. + // I like the one-sided better. + + if (i == domlo[0] && !lo_periodic[0]) { + lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i+1,j,k,n) + 4.0_rt*a(i+2,j,k,n) - a(i+3,j,k,n); + + } else if (i == domhi[0] && !hi_periodic[0]) { + lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i-1,j,k,n) + 4.0_rt*a(i-2,j,k,n) - a(i-3,j,k,n); + + } else { + lapx = a(i+1,j,k,n) - 2.0_rt*a(i,j,k,n) + a(i-1,j,k,n); + } + +#if AMREX_SPACEDIM >= 2 + if (j == domlo[1] && !lo_periodic[1]) { + lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j+1,k,n) + 4.0_rt*a(i,j+2,k,n) - a(i,j+3,k,n); + + } else if (j == domhi[1] && !hi_periodic[1]) { + lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j-1,k,n) + 4.0_rt*a(i,j-2,k,n) - a(i,j-3,k,n); + + } else { + lapy = a(i,j+1,k,n) - 2.0_rt*a(i,j,k,n) + a(i,j-1,k,n); + } +#endif + +#if AMREX_SPACEDIM == 3 + if (k == domlo[2] && !lo_periodic[2]) { + lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k+1,n) + 4.0_rt*a(i,j,k+2,n) - a(i,j,k+3,n); + + } else if (k == domhi[2] && !hi_periodic[2]) { + lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k-1,n) + 4.0_rt*a(i,j,k-2,n) - a(i,j,k-3,n); + + } else { + lapz = a(i,j,k+1,n) - 2.0_rt*a(i,j,k,n) + a(i,j,k-1,n); + } +#endif + + return lapx + lapy + lapz; +} + +AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE +Real +Castro::trans_laplacian(int i, int j, int k, int n, + int idir, + Array4 const a, + GpuArray const& lo_periodic, GpuArray const& hi_periodic, + GpuArray const& domlo, GpuArray const& domhi) { + + Real lapx = 0.0; + Real lapy = 0.0; + Real lapz = 0.0; + + // we use 2nd-order accurate one-sided stencils at the physical boundaries + if (idir != 0) { + + if (i == domlo[0] && !lo_periodic[0]) { + lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i+1,j,k,n) + 4.0_rt*a(i+2,j,k,n) - a(i+3,j,k,n); + + } else if (i == domhi[0] && !hi_periodic[0]) { + lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i-1,j,k,n) + 4.0_rt*a(i-2,j,k,n) - a(i-3,j,k,n); + + } else { + lapx = a(i+1,j,k,n) - 2.0_rt*a(i,j,k,n) + a(i-1,j,k,n); + } + } + + if (idir != 1) { + + if (j == domlo[1] && !lo_periodic[1]) { + lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j+1,k,n) + 4.0_rt*a(i,j+2,k,n) - a(i,j+3,k,n); + + } else if (j == domhi[1] && !hi_periodic[1]) { + lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j-1,k,n) + 4.0_rt*a(i,j-2,k,n) - a(i,j-3,k,n); + + } else { + lapy = a(i,j+1,k,n) - 2.0_rt*a(i,j,k,n) + a(i,j-1,k,n); + } + } + +#if AMREX_SPACEDIM == 3 + if (idir != 2) { + + if (k == domlo[2] && !lo_periodic[2]) { + lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k+1,n) + 4.0_rt*a(i,j,k+2,n) - a(i,j,k+3,n); + + } else if (k == domhi[2] && !hi_periodic[2]) { + lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k-1,n) + 4.0_rt*a(i,j,k-2,n) - a(i,j,k-3,n); + + } else { + lapz = a(i,j,k+1,n) - 2.0_rt*a(i,j,k,n) + a(i,j,k-1,n); + } + } +#endif + + return lapx + lapy + lapz; +} + + +#endif diff --git a/Source/hydro/fourth_center_average.cpp b/Source/hydro/fourth_center_average.cpp new file mode 100644 index 0000000000..04a909b795 --- /dev/null +++ b/Source/hydro/fourth_center_average.cpp @@ -0,0 +1,206 @@ +#include "Castro.H" +#include "fourth_center_average.H" + +using namespace amrex; + +// Note: pretty much all of these routines below assume that dx(1) = dx(2) = dx(3) + + +void +Castro::make_cell_center(const Box& bx, + Array4 const& U, + Array4 const& U_cc, + GpuArray const& domlo, GpuArray const& domhi) { + + // Take a cell-average state U and a convert it to a cell-center + // state U_cc via U_cc = U - 1/24 L U + + auto U_lo = lbound(U); + auto U_hi = ubound(U); + + auto lo = bx.loVect(); + auto hi = bx.hiVect(); + + AMREX_ASSERT(U_lo.x <= lo[0]-1 && U_hi.x >= hi[0]+1); +#if AMREX_SPACEDIM >= 2 + AMREX_ASSERT(U_lo.y <= lo[1]-1 && U_hi.y >= hi[1]+1); +#endif +#if AMREX_SPACEDIM == 3 + AMREX_ASSERT(U_lo.z <= lo[2]-1 && U_hi.z >= hi[2]+1); +#endif + + const int* lo_bc = phys_bc.lo(); + const int* hi_bc = phys_bc.hi(); + + GpuArray lo_periodic; + GpuArray hi_periodic; + for (int idir = 0; idir < AMREX_SPACEDIM; idir++) { + lo_periodic[idir] = lo_bc[idir] == Interior; + hi_periodic[idir] = hi_bc[idir] == Interior; + } + + amrex::ParallelFor(bx, U.nComp(), + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + Real lap = compute_laplacian(i, j, k, n, U, + lo_periodic, hi_periodic, domlo, domhi); + + U_cc(i,j,k,n) = U(i,j,k,n) - (1.0_rt/24.0_rt) * lap; + + }); +} + +void +Castro::make_cell_center_in_place(const Box& bx, + Array4 const& U, + Array4 const& tmp, + GpuArray const& domlo, GpuArray const& domhi) { + + // Take a cell-average state U and make it cell-centered in place + // via U <- U - 1/24 L U. Note that this operation is not tile + // safe. + + // here, tmp is a temporary memory space we use to store one-component's Laplacian + + const int* lo_bc = phys_bc.lo(); + const int* hi_bc = phys_bc.hi(); + + GpuArray lo_periodic; + GpuArray hi_periodic; + for (int idir = 0; idir < AMREX_SPACEDIM; idir++) { + lo_periodic[idir] = lo_bc[idir] == Interior; + hi_periodic[idir] = hi_bc[idir] == Interior; + } + + for (int n = 0; n < U.nComp(); n++) { + + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + tmp(i,j,k) = compute_laplacian(i, j, k, n, U, + lo_periodic, hi_periodic, domlo, domhi); + }); + + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + U(i,j,k,n) = U(i,j,k,n) - (1.0_rt/24.0_rt) * tmp(i,j,k); + }); + } +} + + +void +Castro::compute_lap_term(const Box& bx, + Array4 const& U, + Array4 const& lap, const int ncomp, + GpuArray const& domlo, GpuArray const& domhi) { + + // Computes the h**2/24 L U term that is used in correcting + // cell-center to averages (and back) + + const int* lo_bc = phys_bc.lo(); + const int* hi_bc = phys_bc.hi(); + + GpuArray lo_periodic; + GpuArray hi_periodic; + for (int idir = 0; idir < AMREX_SPACEDIM; idir++) { + lo_periodic[idir] = lo_bc[idir] == Interior; + hi_periodic[idir] = hi_bc[idir] == Interior; + } + + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + lap(i,j,k) = (1.0_rt/24.0_rt) * + compute_laplacian(i, j, k, ncomp, U, + lo_periodic, hi_periodic, domlo, domhi); + }); + +} + + +void +Castro::make_fourth_average(const Box& bx, + Array4 const& q, + Array4 const& q_bar, + GpuArray const& domlo, GpuArray const& domhi) { + + // Take the cell-center state q and another state q_bar (e.g., + // constructed from the cell-average U) and replace the cell-center + // q with a 4th-order accurate cell-average, q <- q + 1/24 L q_bar + + const int* lo_bc = phys_bc.lo(); + const int* hi_bc = phys_bc.hi(); + + GpuArray lo_periodic; + GpuArray hi_periodic; + for (int idir = 0; idir < AMREX_SPACEDIM; idir++) { + lo_periodic[idir] = lo_bc[idir] == Interior; + hi_periodic[idir] = hi_bc[idir] == Interior; + } + + amrex::ParallelFor(bx, q.nComp(), + [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + { + Real lap = compute_laplacian(i, j, k, n, q_bar, + lo_periodic, hi_periodic, domlo, domhi); + + q(i,j,k,n) += (1.0_rt/24.0_rt) * lap; + }); +} + + +void +Castro::make_fourth_in_place(const Box& bx, + Array4 const& q, + Array4 const& tmp, + GpuArray const& domlo, GpuArray const& domhi) { + + // Take the cell-center q and makes it a cell-average q, in place + // (e.g. q is overwritten by its average), q <- q + 1/24 L q. + // Note: this routine is not tile safe. + + for (int n = 0; n < q.nComp(); n++) { + make_fourth_in_place_n(bx, q, n, tmp, domlo, domhi); + } +} + + +void +Castro::make_fourth_in_place_n(const Box& bx, + Array4 const& q, const int ncomp, + Array4 const& tmp, + GpuArray const& domlo, GpuArray const& domhi) { + + // Take the cell-center q and makes it a cell-average q, in place + // (e.g. q is overwritten by its average), q <- q + 1/24 L q. + // Note: this routine is not tile safe. + // + // This version operates on a single component. Here ncomp is the + // component to update + + const int* lo_bc = phys_bc.lo(); + const int* hi_bc = phys_bc.hi(); + + GpuArray lo_periodic; + GpuArray hi_periodic; + for (int idir = 0; idir < AMREX_SPACEDIM; idir++) { + lo_periodic[idir] = lo_bc[idir] == Interior; + hi_periodic[idir] = hi_bc[idir] == Interior; + } + + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + tmp(i,j,k) = compute_laplacian(i, j, k, ncomp, q, + lo_periodic, hi_periodic, domlo, domhi); + }); + + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { + q(i,j,k,ncomp) += (1.0_rt/24.0_rt) * tmp(i,j,k); + }); + +} diff --git a/Source/hydro/fourth_order_nd.F90 b/Source/hydro/fourth_order_nd.F90 index 7c0e427277..78a0abe6c9 100644 --- a/Source/hydro/fourth_order_nd.F90 +++ b/Source/hydro/fourth_order_nd.F90 @@ -790,414 +790,4 @@ subroutine ca_states(lo, hi, & end subroutine ca_states - - ! Note: pretty much all of these routines below assume that dx(1) = dx(2) = dx(3) - pure function compute_laplacian(i, j, k, n, & - a, a_lo, a_hi, nc, & - domlo, domhi) result (lap) - - use prob_params_module, only : physbc_lo, physbc_hi, Interior - implicit none - - integer, intent(in) :: i, j, k, n - integer, intent(in) :: a_lo(3), a_hi(3) - integer, intent(in) :: nc - real(rt), intent(in) :: a(a_lo(1):a_hi(1), a_lo(2):a_hi(2), a_lo(3):a_hi(3), nc) - integer, intent(in) :: domlo(3), domhi(3) - - real(rt) :: lapx, lapy, lapz - real(rt) :: lap - - lapx = ZERO - lapy = ZERO - lapz = ZERO - - ! we use 2nd-order accurate one-sided stencils at the physical - ! boundaries note: this differs from the suggestion in MC2011 -- - ! they just suggest using the Laplacian from +1 off the interior. - ! I like the one-sided better. - - if (i == domlo(1) .and. physbc_lo(1) /= Interior) then - lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i+1,j,k,n) + 4.0_rt*a(i+2,j,k,n) - a(i+3,j,k,n) - - else if (i == domhi(1) .and. physbc_hi(1) /= Interior) then - lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i-1,j,k,n) + 4.0_rt*a(i-2,j,k,n) - a(i-3,j,k,n) - - else - lapx = a(i+1,j,k,n) - TWO*a(i,j,k,n) + a(i-1,j,k,n) - end if - -#if AMREX_SPACEDIM >= 2 - if (j == domlo(2) .and. physbc_lo(2) /= Interior) then - lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j+1,k,n) + 4.0_rt*a(i,j+2,k,n) - a(i,j+3,k,n) - - else if (j == domhi(2) .and. physbc_hi(2) /= Interior) then - lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j-1,k,n) + 4.0_rt*a(i,j-2,k,n) - a(i,j-3,k,n) - - else - lapy = a(i,j+1,k,n) - TWO*a(i,j,k,n) + a(i,j-1,k,n) - end if - -#endif - -#if AMREX_SPACEDIM == 3 - if (k == domlo(3) .and. physbc_lo(3) /= Interior) then - lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k+1,n) + 4.0_rt*a(i,j,k+2,n) - a(i,j,k+3,n) - - else if (k == domhi(3) .and. physbc_hi(3) /= Interior) then - lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k-1,n) + 4.0_rt*a(i,j,k-2,n) - a(i,j,k-3,n) - - else - lapz = a(i,j,k+1,n) - TWO*a(i,j,k,n) + a(i,j,k-1,n) - end if -#endif - - lap = lapx + lapy + lapz - - end function compute_laplacian - - subroutine trans_laplacian(i, j, k, n, & - idir, & - a, a_lo, a_hi, nc, & - lap, & - domlo, domhi) bind(C, name="trans_laplacian") - - use prob_params_module, only : physbc_lo, physbc_hi, Interior - implicit none - - integer, intent(in), value :: i, j, k, n - integer, intent(in), value :: idir - integer, intent(in) :: a_lo(3), a_hi(3) - integer, intent(in), value :: nc - real(rt), intent(in) :: a(a_lo(1):a_hi(1), a_lo(2):a_hi(2), a_lo(3):a_hi(3), nc) - integer, intent(in) :: domlo(3), domhi(3) - real(rt), intent(out) :: lap - - real(rt) :: lapx, lapy, lapz - - lapx = ZERO - lapy = ZERO - lapz = ZERO - - ! we use 2nd-order accurate one-sided stencils at the physical boundaries - if (idir /= 1) then - - if (i == domlo(1) .and. physbc_lo(1) /= Interior) then - lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i+1,j,k,n) + 4.0_rt*a(i+2,j,k,n) - a(i+3,j,k,n) - - else if (i == domhi(1) .and. physbc_hi(1) /= Interior) then - lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i-1,j,k,n) + 4.0_rt*a(i-2,j,k,n) - a(i-3,j,k,n) - - else - lapx = a(i+1,j,k,n) - TWO*a(i,j,k,n) + a(i-1,j,k,n) - end if - end if - - if (idir /= 2) then - - if (j == domlo(2) .and. physbc_lo(2) /= Interior) then - lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j+1,k,n) + 4.0_rt*a(i,j+2,k,n) - a(i,j+3,k,n) - - else if (j == domhi(2) .and. physbc_hi(2) /= Interior) then - lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j-1,k,n) + 4.0_rt*a(i,j-2,k,n) - a(i,j-3,k,n) - - else - lapy = a(i,j+1,k,n) - TWO*a(i,j,k,n) + a(i,j-1,k,n) - end if - end if - -#if AMREX_SPACEDIM == 3 - if (idir /= 3) then - - if (k == domlo(3) .and. physbc_lo(3) /= Interior) then - lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k+1,n) + 4.0_rt*a(i,j,k+2,n) - a(i,j,k+3,n) - - else if (k == domhi(3) .and. physbc_hi(3) /= Interior) then - lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k-1,n) + 4.0_rt*a(i,j,k-2,n) - a(i,j,k-3,n) - - else - lapz = a(i,j,k+1,n) - TWO*a(i,j,k,n) + a(i,j,k-1,n) - end if - end if -#endif - - lap = lapx + lapy + lapz - - end subroutine trans_laplacian - - - subroutine ca_make_cell_center(lo, hi, & - U, U_lo, U_hi, nc, & - U_cc, U_cc_lo, U_cc_hi, nc_cc, & - domlo, domhi) & - bind(C, name="ca_make_cell_center") - ! Take a cell-average state U and a convert it to a cell-center - ! state U_cc via U_cc = U - 1/24 L U - - implicit none - - integer, intent(in) :: lo(3), hi(3) - integer, intent(in) :: U_lo(3), U_hi(3) - integer, intent(in) :: U_cc_lo(3), U_cc_hi(3) - integer, intent(in) :: nc, nc_cc - real(rt), intent(in) :: U(U_lo(1):U_hi(1), U_lo(2):U_hi(2), U_lo(3):U_hi(3), nc) - real(rt), intent(inout) :: U_cc(U_cc_lo(1):U_cc_hi(1), U_cc_lo(2):U_cc_hi(2), U_cc_lo(3):U_cc_hi(3), nc_cc) - integer, intent(in) :: domlo(3), domhi(3) - - integer :: i, j, k, n - real(rt) :: lap - - if (U_lo(1) > lo(1)-1 .or. U_hi(1) < hi(1)+1 .or. & - (AMREX_SPACEDIM >= 2 .and. (U_lo(2) > lo(2)-1 .or. U_hi(2) < hi(2)+1)) .or. & - (AMREX_SPACEDIM == 3 .and. (U_lo(3) > lo(3)-1 .or. U_hi(3) < hi(3)+1))) then - call bl_error("insufficient ghostcells in ca_make_cell_center") - end if - - do n = 1, nc - - do k = lo(3), hi(3) - do j = lo(2), hi(2) - do i = lo(1), hi(1) - - lap = compute_laplacian(i, j, k, n, & - U, U_lo, U_hi, nc, & - domlo, domhi) - - U_cc(i,j,k,n) = U(i,j,k,n) - TWENTYFOURTH * lap - - end do - end do - end do - end do - - end subroutine ca_make_cell_center - - subroutine ca_make_cell_center_in_place(lo, hi, & - U, U_lo, U_hi, nc, & - domlo, domhi) & - bind(C, name="ca_make_cell_center_in_place") - ! Take a cell-average state U and make it cell-centered in place - ! via U <- U - 1/24 L U. Note that this operation is not tile - ! safe. - - use amrex_mempool_module, only : bl_allocate, bl_deallocate - - implicit none - - integer, intent(in) :: lo(3), hi(3) - integer, intent(in) :: U_lo(3), U_hi(3) - integer, intent(in) :: nc - real(rt), intent(inout) :: U(U_lo(1):U_hi(1), U_lo(2):U_hi(2), U_lo(3):U_hi(3), nc) - integer, intent(in) :: domlo(3), domhi(3) - - integer :: i, j, k, n - - real(rt), pointer :: lap(:,:,:) - - call bl_allocate(lap, lo, hi) - - do n = 1, nc - - do k = lo(3), hi(3) - do j = lo(2), hi(2) - do i = lo(1), hi(1) - - lap(i,j,k) = compute_laplacian(i, j, k, n, & - U, U_lo, U_hi, nc, & - domlo, domhi) - - end do - end do - end do - - do k = lo(3), hi(3) - do j = lo(2), hi(2) - do i = lo(1), hi(1) - U(i,j,k,n) = U(i,j,k,n) - TWENTYFOURTH * lap(i,j,k) - end do - end do - end do - end do - - call bl_deallocate(lap) - - end subroutine ca_make_cell_center_in_place - - subroutine ca_compute_lap_term(lo, hi, & - U, U_lo, U_hi, nc, & - lap, lap_lo, lap_hi, ncomp, & - domlo, domhi) & - bind(C, name="ca_compute_lap_term") - ! Computes the h**2/24 L U term that is used in correcting - ! cell-center to averages (and back) - - implicit none - - integer, intent(in) :: lo(3), hi(3) - integer, intent(in) :: U_lo(3), U_hi(3) - integer, intent(in) :: lap_lo(3), lap_hi(3) - integer, intent(in) :: nc - real(rt), intent(in) :: U(U_lo(1):U_hi(1), U_lo(2):U_hi(2), U_lo(3):U_hi(3), nc) - real(rt), intent(inout) :: lap(lap_lo(1):lap_hi(1), lap_lo(2):lap_hi(2), lap_lo(3):lap_hi(3)) - integer, intent(in), value :: ncomp - integer, intent(in) :: domlo(3), domhi(3) - - ! note: ncomp is C++ index (0-based) - - integer :: i, j, k - - do k = lo(3), hi(3) - do j = lo(2), hi(2) - do i = lo(1), hi(1) - - lap(i,j,k) = compute_laplacian(i, j, k, ncomp+1, & - U, U_lo, U_hi, nc, & - domlo, domhi) - - end do - end do - end do - - lap(lo(1):hi(1), lo(2):hi(2), lo(3):hi(3)) = & - TWENTYFOURTH * lap(lo(1):hi(1), lo(2):hi(2), lo(3):hi(3)) - - end subroutine ca_compute_lap_term - - subroutine ca_make_fourth_average(lo, hi, & - q, q_lo, q_hi, nc, & - q_bar, q_bar_lo, q_bar_hi, nc_bar, & - domlo, domhi) & - bind(C, name="ca_make_fourth_average") - ! Take the cell-center state q and another state q_bar (e.g., - ! constructed from the cell-average U) and replace the cell-center - ! q with a 4th-order accurate cell-average, q <- q + 1/24 L q_bar - - integer, intent(in) :: lo(3), hi(3) - integer, intent(in) :: q_lo(3), q_hi(3) - integer, intent(in) :: q_bar_lo(3), q_bar_hi(3) - integer, intent(in) :: nc, nc_bar - real(rt), intent(inout) :: q(q_lo(1):q_hi(1), q_lo(2):q_hi(2), q_lo(3):q_hi(3), nc) - real(rt), intent(in) :: q_bar(q_bar_lo(1):q_bar_hi(1), q_bar_lo(2):q_bar_hi(2), q_bar_lo(3):q_bar_hi(3), nc_bar) - integer, intent(in) :: domlo(3), domhi(3) - - integer :: i, j, k, n - real(rt) :: lap - - do n = 1, nc - do k = lo(3), hi(3) - do j = lo(2), hi(2) - do i = lo(1), hi(1) - - lap = compute_laplacian(i, j, k, n, & - q_bar, q_bar_lo, q_bar_hi, nc, & - domlo, domhi) - - q(i,j,k,n) = q(i,j,k,n) + TWENTYFOURTH * lap - - end do - end do - end do - end do - - end subroutine ca_make_fourth_average - - subroutine ca_make_fourth_in_place(lo, hi, & - q, q_lo, q_hi, nc, & - domlo, domhi) & - bind(C, name="ca_make_fourth_in_place") - ! Take the cell-center q and makes it a cell-average q, in place - ! (e.g. q is overwritten by its average), q <- q + 1/24 L q. - ! Note: this routine is not tile safe. - - use amrex_mempool_module, only : bl_allocate, bl_deallocate - - integer, intent(in) :: lo(3), hi(3) - integer, intent(in) :: q_lo(3), q_hi(3) - integer, intent(in) :: nc - real(rt), intent(inout) :: q(q_lo(1):q_hi(1), q_lo(2):q_hi(2), q_lo(3):q_hi(3), nc) - integer, intent(in) :: domlo(3), domhi(3) - - integer :: i, j, k, n - real(rt), pointer :: lap(:,:,:) - - call bl_allocate(lap, lo, hi) - - do n = 1, nc - - do k = lo(3), hi(3) - do j = lo(2), hi(2) - do i = lo(1), hi(1) - - lap(i,j,k) = compute_laplacian(i, j, k, n, & - q, q_lo, q_hi, nc, & - domlo, domhi) - - end do - end do - end do - - do k = lo(3), hi(3) - do j = lo(2), hi(2) - do i = lo(1), hi(1) - q(i,j,k,n) = q(i,j,k,n) + TWENTYFOURTH * lap(i,j,k) - end do - end do - end do - end do - - call bl_deallocate(lap) - - end subroutine ca_make_fourth_in_place - - subroutine ca_make_fourth_in_place_n(lo, hi, & - q, q_lo, q_hi, nc, ncomp, & - domlo, domhi) & - bind(C, name="ca_make_fourth_in_place_n") - ! Take the cell-center q and makes it a cell-average q, in place - ! (e.g. q is overwritten by its average), q <- q + 1/24 L q. - ! Note: this routine is not tile safe. - ! - ! This version operates on a single component. Here ncomp is the - ! component to update -- we expect this to come in 0-based from - ! C++, so we add 1 - - use amrex_mempool_module, only : bl_allocate, bl_deallocate - - integer, intent(in) :: lo(3), hi(3) - integer, intent(in) :: q_lo(3), q_hi(3) - integer, intent(in) :: nc - integer, intent(in), value :: ncomp - real(rt), intent(inout) :: q(q_lo(1):q_hi(1), q_lo(2):q_hi(2), q_lo(3):q_hi(3), nc) - integer, intent(in) :: domlo(3), domhi(3) - - integer :: i, j, k - real(rt), pointer :: lap(:,:,:) - - call bl_allocate(lap, lo, hi) - - do k = lo(3), hi(3) - do j = lo(2), hi(2) - do i = lo(1), hi(1) - - lap(i,j,k) = compute_laplacian(i, j, k, ncomp+1, & - q, q_lo, q_hi, nc, & - domlo, domhi) - - end do - end do - end do - - do k = lo(3), hi(3) - do j = lo(2), hi(2) - do i = lo(1), hi(1) - q(i,j,k,ncomp+1) = q(i,j,k,ncomp+1) + TWENTYFOURTH * lap(i,j,k) - end do - end do - end do - - call bl_deallocate(lap) - - end subroutine ca_make_fourth_in_place_n - - end module fourth_order diff --git a/Source/sdc/Castro_sdc.cpp b/Source/sdc/Castro_sdc.cpp index b1e3b1fce6..3aebc95047 100644 --- a/Source/sdc/Castro_sdc.cpp +++ b/Source/sdc/Castro_sdc.cpp @@ -26,8 +26,8 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt) // for 4th order reactive SDC, we need to first compute the source, C // and do a ghost cell fill on it - const int* domain_lo = geom.Domain().loVect(); - const int* domain_hi = geom.Domain().hiVect(); + auto domain_lo = geom.Domain().loVect3d(); + auto domain_hi = geom.Domain().hiVect3d(); // the timestep from m to m+1 Real dt_m = (dt_sdc[m_end] - dt_sdc[m_start]) * dt; @@ -123,6 +123,7 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt) FArrayBox C_center; FArrayBox U_new_center; FArrayBox R_new; + FArrayBox tlap; FArrayBox C2; @@ -193,10 +194,10 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt) // convert the starting U to cell-centered on a fab-by-fab basis // -- including one ghost cell U_center.resize(bx1, NUM_STATE); - ca_make_cell_center(BL_TO_FORTRAN_BOX(bx1), - BL_TO_FORTRAN_FAB(Sborder[mfi]), - BL_TO_FORTRAN_FAB(U_center), - AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi)); + Elixir elix_u_center = U_center.elixir(); + auto U_center_arr = U_center.array(); + + make_cell_center(bx1, Sborder.array(mfi), U_center_arr, domain_lo, domain_hi); // sometimes the Laplacian can make the species go negative near discontinuities ca_normalize_species(AMREX_INT_ANYD(bx1.loVect()), AMREX_INT_ANYD(bx1.hiVect()), @@ -204,21 +205,20 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt) // convert the C source to cell-centers C_center.resize(bx1, NUM_STATE); - ca_make_cell_center(BL_TO_FORTRAN_BOX(bx1), - BL_TO_FORTRAN_FAB(C_source[mfi]), - BL_TO_FORTRAN_FAB(C_center), - AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi)); + Elixir elix_c_center = C_center.elixir(); + auto C_center_arr = C_center.array(); + + make_cell_center(bx1, C_source.array(mfi), C_center_arr, domain_lo, domain_hi); // solve for the updated cell-center U using our cell-centered C -- we // need to do this with one ghost cell U_new_center.resize(bx1, NUM_STATE); + Elixir elix_u_new_center = U_new_center.elixir(); + auto U_new_center_arr = U_new_center.array(); // initialize U_new with our guess for the new state, stored as // an average in Sburn - ca_make_cell_center(BL_TO_FORTRAN_BOX(bx1), - BL_TO_FORTRAN_FAB(Sburn[mfi]), - BL_TO_FORTRAN_FAB(U_new_center), - AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi)); + make_cell_center(bx1, Sburn.array(mfi), U_new_center_arr, domain_lo, domain_hi); ca_sdc_update_centers_o4(BL_TO_FORTRAN_BOX(bx1), &dt_m, BL_TO_FORTRAN_3D(U_center), @@ -230,15 +230,17 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt) // place (only for the interior) R_new.resize(bx1, NUM_STATE); Elixir elix_R_new = R_new.elixir(); - Array4 const& R_new_arr=R_new.array(); + Array4 const& R_new_arr = R_new.array(); ca_instantaneous_react(BL_TO_FORTRAN_BOX(bx1), BL_TO_FORTRAN_3D(U_new_center), BL_TO_FORTRAN_3D(R_new)); - ca_make_fourth_in_place(BL_TO_FORTRAN_BOX(bx), - BL_TO_FORTRAN_FAB(R_new), - AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi)); + tlap.resize(bx, 1); + Elixir elix_tlap = tlap.elixir(); + auto const tlap_arr = tlap.array(); + + make_fourth_in_place(bx, R_new_arr, tlap_arr, domain_lo, domain_hi); // now do the conservative update using this to get // We'll also need to pass in @@ -313,8 +315,8 @@ Castro::construct_old_react_source(MultiFab& U_state, BL_PROFILE("Castro::construct_old_react_source()"); - const int* domain_lo = geom.Domain().loVect(); - const int* domain_hi = geom.Domain().hiVect(); + auto domain_lo = geom.Domain().loVect3d(); + auto domain_hi = geom.Domain().hiVect3d(); if (sdc_order == 4 && input_is_average) { @@ -323,6 +325,7 @@ Castro::construct_old_react_source(MultiFab& U_state, FArrayBox U_center; FArrayBox R_center; + FArrayBox tmp; for (MFIter mfi(U_state); mfi.isValid(); ++mfi) { @@ -332,13 +335,16 @@ Castro::construct_old_react_source(MultiFab& U_state, // Convert to centers U_center.resize(obx, NUM_STATE); - ca_make_cell_center(BL_TO_FORTRAN_BOX(obx), - BL_TO_FORTRAN_FAB(U_state[mfi]), - BL_TO_FORTRAN_FAB(U_center), - AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi)); + Elixir elix_u_center = U_center.elixir(); + auto const U_center_arr = U_center.array(); + + make_cell_center(obx, U_state.array(mfi), U_center_arr, domain_lo, domain_hi); // burn, including one ghost cell R_center.resize(obx, NUM_STATE); + Elixir elix_r_center = R_center.elixir(); + auto const R_center_arr = R_center.array(); + ca_instantaneous_react(BL_TO_FORTRAN_BOX(obx), BL_TO_FORTRAN_3D(U_center), BL_TO_FORTRAN_3D(R_center)); @@ -349,9 +355,12 @@ Castro::construct_old_react_source(MultiFab& U_state, Sburn[mfi].copy(R_center, obx, 0, obx, 0, NUM_STATE); // convert R to averages (in place) - ca_make_fourth_in_place(BL_TO_FORTRAN_BOX(bx), - BL_TO_FORTRAN_FAB(R_center), - AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi)); + + tmp.resize(bx, 1); + Elixir elix_tmp = tmp.elixir(); + auto const tmp_arr = tmp.array(); + + make_fourth_in_place(bx, R_center_arr, tmp_arr, domain_lo, domain_hi); // copy this to the center R_source[mfi].copy(R_center, bx, 0, bx, 0, NUM_STATE);