diff --git a/Source/driver/Castro.cpp b/Source/driver/Castro.cpp
index 8286447d83..797f141e32 100644
--- a/Source/driver/Castro.cpp
+++ b/Source/driver/Castro.cpp
@@ -1109,16 +1109,20 @@ Castro::initData ()
            AmrLevel::FillPatch(*this, Sborder, NUM_GROW, cur_time, State_Type, 0, NUM_STATE);
 
            // note: this cannot be tiled
-           const int* domain_lo = geom.Domain().loVect();
-           const int* domain_hi = geom.Domain().hiVect();
+           auto domain_lo = geom.Domain().loVect3d();
+           auto domain_hi = geom.Domain().hiVect3d();
+
+           FArrayBox tmp;
 
            for (MFIter mfi(S_new); mfi.isValid(); ++mfi)
              {
-               const Box& box     = mfi.validbox();
+               const Box& box = mfi.validbox();
+
+               tmp.resize(box, 1);
+               Elixir elix_tmp = tmp.elixir();
+               auto tmp_arr = tmp.array();
 
-               ca_make_fourth_in_place(BL_TO_FORTRAN_BOX(box),
-                                       BL_TO_FORTRAN_FAB(Sborder[mfi]),
-                                       AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi));
+               make_fourth_in_place(box, Sborder.array(mfi), tmp_arr, domain_lo, domain_hi);
              }
 
            // now copy back the averages
@@ -1132,16 +1136,20 @@ Castro::initData ()
          AmrLevel::FillPatch(*this, Sborder, NUM_GROW, cur_time, State_Type, 0, NUM_STATE);
 
          // convert to centers -- not tile safe
-         const int* domain_lo = geom.Domain().loVect();
-         const int* domain_hi = geom.Domain().hiVect();
+         auto domain_lo = geom.Domain().loVect3d();
+         auto domain_hi = geom.Domain().hiVect3d();
+
+         FArrayBox tmp;
 
          for (MFIter mfi(S_new); mfi.isValid(); ++mfi)
            {
              const Box& box = mfi.growntilebox(2);
 
-             ca_make_cell_center_in_place(BL_TO_FORTRAN_BOX(box),
-                                          BL_TO_FORTRAN_FAB(Sborder[mfi]),
-                                          AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi));
+             tmp.resize(box, 1);
+             Elixir elix_tmp = tmp.elixir();
+             auto tmp_arr = tmp.array();
+
+             make_cell_center_in_place(box, Sborder.array(mfi), tmp_arr, domain_lo, domain_hi);
            }
 
          // reset the energy -- do this in one ghost cell so we can average in place below
@@ -1184,9 +1192,11 @@ Castro::initData ()
            {
              const Box& box = mfi.validbox();
 
-             ca_make_fourth_in_place(BL_TO_FORTRAN_BOX(box),
-                                     BL_TO_FORTRAN_FAB(Sborder[mfi]),
-                                     AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi));
+             tmp.resize(box, 1);
+             Elixir elix_tmp = tmp.elixir();
+             auto tmp_arr = tmp.array();
+
+             make_fourth_in_place(box, Sborder.array(mfi), tmp_arr, domain_lo, domain_hi);
            }
 
          // now copy back the averages for UEINT and UTEMP only
@@ -3472,22 +3482,24 @@ Castro::computeTemp(
 
     // convert to cell centers -- this will result in Stemp being
     // cell centered only on 1 ghost cells
-    const int* domain_lo = geom.Domain().loVect();
-    const int* domain_hi = geom.Domain().hiVect();
+    auto domain_lo = geom.Domain().loVect3d();
+    auto domain_hi = geom.Domain().hiVect3d();
+
+    FArrayBox tmp;
 
     for (MFIter mfi(Stemp); mfi.isValid(); ++mfi) {
       const Box& bx = mfi.growntilebox(1);
       const Box& bx0 = mfi.tilebox();
       const int idx = mfi.tileIndex();
 
-      ca_compute_lap_term(BL_TO_FORTRAN_BOX(bx0),
-                          BL_TO_FORTRAN_FAB(Stemp[mfi]),
-                          BL_TO_FORTRAN_ANYD(Eint_lap[mfi]), UEINT,
-                          AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi));
+      compute_lap_term(bx0, Stemp.array(mfi), Eint_lap.array(mfi), UEINT,
+                       domain_lo, domain_hi);
+
+      tmp.resize(bx, 1);
+      Elixir elix_tmp = tmp.elixir();
+      auto tmp_arr = tmp.array();
 
-      ca_make_cell_center_in_place(BL_TO_FORTRAN_BOX(bx),
-                                   BL_TO_FORTRAN_FAB(Stemp[mfi]),
-                                   AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi));
+      make_cell_center_in_place(bx, Stemp.array(mfi), tmp_arr, domain_lo, domain_hi);
 
     }
 
@@ -3578,19 +3590,22 @@ Castro::computeTemp(
     // cell-averages -- this is 4th-order and will be a no-op for
     // those zones where e wasn't changed.
 
-    const int* domain_lo = geom.Domain().loVect();
-    const int* domain_hi = geom.Domain().hiVect();
+    auto domain_lo = geom.Domain().loVect3d();
+    auto domain_hi = geom.Domain().hiVect3d();
+
+    FArrayBox tmp;
 
     for (MFIter mfi(Stemp); mfi.isValid(); ++mfi) {
 
       const Box& bx = mfi.tilebox();
       const int idx = mfi.tileIndex();
 
-      // only temperature
-      ca_make_fourth_in_place_n(BL_TO_FORTRAN_BOX(bx),
-                                BL_TO_FORTRAN_FAB(Stemp[mfi]), UTEMP,
-                                AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi));
+      tmp.resize(bx, 1);
+      Elixir elix_tmp = tmp.elixir();
+      auto tmp_arr = tmp.array();
 
+      // only temperature
+      make_fourth_in_place_n(bx, Stemp.array(mfi), UTEMP, tmp_arr, domain_lo, domain_hi);
     }
 
     // correct UEINT
diff --git a/Source/driver/Castro_F.H b/Source/driver/Castro_F.H
index b146b43de3..b3a5be337e 100644
--- a/Source/driver/Castro_F.H
+++ b/Source/driver/Castro_F.H
@@ -185,32 +185,6 @@ extern "C"
      const amrex::Real* dx, amrex::Real* dt);
 #endif
 
-  void ca_make_fourth_average(const int lo[], const int hi[],
-                              BL_FORT_FAB_ARG_3D(q), const int* nc,
-                              const BL_FORT_FAB_ARG_3D(q_bar), const int* nc_bar,
-                              const int* domlo, const int* domhi);
-
-  void ca_make_fourth_in_place(const int lo[], const int hi[],
-                               BL_FORT_FAB_ARG_3D(q), const int* nc,
-                               const int* domlo, const int* domhi);
-
-  void ca_make_fourth_in_place_n(const int lo[], const int hi[],
-                                 BL_FORT_FAB_ARG_3D(q), const int* nc, const int ncomp,
-                                 const int* domlo, const int* domhi);
-
-  void ca_compute_lap_term(const int lo[], const int hi[],
-                           const BL_FORT_FAB_ARG_3D(U), const int* nc,
-                           BL_FORT_FAB_ARG_ANYD(lap), const int ncomp,
-                           const int* domlo, const int* domhi);
-
-  void ca_make_cell_center(const int lo[], const int hi[],
-                           const BL_FORT_FAB_ARG_3D(U), const int* nc,
-                           BL_FORT_FAB_ARG_3D(U_cc), const int* nc_cc,
-                           const int* domlo, const int* domhi);
-
-  void ca_make_cell_center_in_place(const int lo[], const int hi[],
-                                    BL_FORT_FAB_ARG_3D(q), const int* nc,
-                                    const int* domlo, const int* domhi);
 
 #ifdef RADIATION
   void ca_inelastic_sct
diff --git a/Source/driver/Castro_advance_sdc.cpp b/Source/driver/Castro_advance_sdc.cpp
index 2c17b05fb4..37b61f157d 100644
--- a/Source/driver/Castro_advance_sdc.cpp
+++ b/Source/driver/Castro_advance_sdc.cpp
@@ -41,8 +41,8 @@ Castro::do_advance_sdc (Real time,
   MultiFab& S_old = get_old_data(State_Type);
   MultiFab& S_new = get_new_data(State_Type);
 
-  const int* domain_lo = geom.Domain().loVect();
-  const int* domain_hi = geom.Domain().hiVect();
+  auto domain_lo = geom.Domain().loVect3d();
+  auto domain_hi = geom.Domain().hiVect3d();
 
   // Perform initialization steps.
 
@@ -104,10 +104,8 @@ Castro::do_advance_sdc (Real time,
 
           for (MFIter mfi(S_new); mfi.isValid(); ++mfi) {
             const Box& gbx = mfi.growntilebox(1);
-            ca_make_cell_center(BL_TO_FORTRAN_BOX(gbx),
-                                BL_TO_FORTRAN_FAB(Sborder[mfi]),
-                                BL_TO_FORTRAN_FAB(Sburn[mfi]),
-                                AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi));
+
+            make_cell_center(gbx, Sborder.array(mfi), Sburn.array(mfi), domain_lo, domain_hi);
 
           }
 
@@ -123,11 +121,16 @@ Castro::do_advance_sdc (Real time,
           AmrLevel::FillPatch(*this, old_source, old_source.nGrow(), prev_time, Source_Type, 0, NSRC);
 
           // Now convert to cell averages.  This loop cannot be tiled.
+          FArrayBox tmp;
+
           for (MFIter mfi(S_new); mfi.isValid(); ++mfi) {
             const Box& bx = mfi.tilebox();
-            ca_make_fourth_in_place(BL_TO_FORTRAN_BOX(bx),
-                                    BL_TO_FORTRAN_FAB(old_source[mfi]),
-                                    AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi));
+
+            tmp.resize(bx, 1);
+            Elixir elix_tmp = tmp.elixir();
+            auto tmp_arr = tmp.array();
+
+            make_fourth_in_place(bx, old_source.array(mfi), tmp_arr, domain_lo, domain_hi);
           }
 
         } else {
@@ -306,6 +309,7 @@ Castro::do_advance_sdc (Real time,
 
   FArrayBox U_center;
   FArrayBox R_center;
+  FArrayBox tmp;
 
   // this cannot be tiled
   for (MFIter mfi(R_new); mfi.isValid(); ++mfi) {
@@ -314,33 +318,30 @@ Castro::do_advance_sdc (Real time,
 
     if (sdc_order == 4) {
 
-      const int* domain_lo = geom.Domain().loVect();
-      const int* domain_hi = geom.Domain().hiVect();
-
       // convert S_new to cell-centers
       U_center.resize(obx, NUM_STATE);
-      ca_make_cell_center(BL_TO_FORTRAN_BOX(obx),
-                          BL_TO_FORTRAN_FAB(Sborder[mfi]),
-                          BL_TO_FORTRAN_FAB(U_center),
-                          AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi));
+      Elixir elix_u_center = U_center.elixir();
+      auto const U_center_arr = U_center.array();
+
+      make_cell_center(obx, Sborder.array(mfi), U_center_arr, domain_lo, domain_hi);
 
       // pass in the reaction source and state at centers, including one ghost cell
       // and derive everything that is needed including 1 ghost cell
       R_center.resize(obx, R_new.nComp());
+      Elixir elix_r_center = R_center.elixir();
+      auto const R_center_arr = R_center.array();
+
       Array4<const Real> const Sburn_arr = Sburn.array(mfi);
-      Array4<const Real> const U_center_arr = U_center.array();
-      Array4<Real> const R_center_arr = R_center.array();
+
       // we don't worry about the difference between centers and averages
-      ca_store_reaction_state(obx,
-                              Sburn_arr,
-                              U_center_arr,
-                              R_center_arr);
+      ca_store_reaction_state(obx, Sburn_arr, U_center_arr, R_center_arr);
 
       // convert R_new from centers to averages in place
-      ca_make_fourth_in_place(BL_TO_FORTRAN_BOX(bx),
-                              BL_TO_FORTRAN_FAB(R_center),
-                              AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi));
+      tmp.resize(bx, 1);
+      Elixir elix_tmp = tmp.elixir();
+      auto const tmp_arr = tmp.array();
 
+      make_fourth_in_place(bx, R_center_arr, tmp_arr, domain_lo, domain_hi);
 
       // store
       R_new[mfi].copy(R_center, bx, 0, bx, 0, R_new.nComp());
diff --git a/Source/hydro/Castro_hydro.H b/Source/hydro/Castro_hydro.H
index 5ae037bc01..f1e238c668 100644
--- a/Source/hydro/Castro_hydro.H
+++ b/Source/hydro/Castro_hydro.H
@@ -541,4 +541,51 @@
 /// @param ng       Number of ghost cells
 ///
     void linear_to_hybrid_momentum(amrex::MultiFab& state, int ng = 0);
+
+#endif
+
+#ifdef TRUE_SDC
+    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+    Real compute_laplacian(int i, int j, int k, int n,
+                           Array4<Real const> const a,
+                           GpuArray<bool, AMREX_SPACEDIM> const& lo_periodic, GpuArray<bool, AMREX_SPACEDIM> const& hi_periodic,
+                           GpuArray<int, 3> const& domlo, GpuArray<int, 3> const& domhi);
+
+    AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+    Real trans_laplacian(int i, int j, int k, int n,
+                         int idir,
+                         Array4<Real const> const a,
+                         GpuArray<bool, AMREX_SPACEDIM> const& lo_periodic, GpuArray<bool, AMREX_SPACEDIM> const& hi_periodic,
+                         GpuArray<int, 3> const& domlo, GpuArray<int, 3> const& domhi);
+
+    void make_cell_center(const Box& bx,
+                          Array4<Real const> const& U,
+                          Array4<Real> const& U_cc,
+                          GpuArray<int, 3> const& domlo, GpuArray<int, 3> const& domhi);
+
+    void make_cell_center_in_place(const Box& bx,
+                                   Array4<Real> const& U,
+                                   Array4<Real> const& tmp,
+                                   GpuArray<int, 3> const& domlo, GpuArray<int, 3> const& domhi);
+
+    void compute_lap_term(const Box& bx,
+                          Array4<Real const> const& U,
+                          Array4<Real> const& lap, const int ncomp,
+                          GpuArray<int, 3> const& domlo, GpuArray<int, 3> const& domhi);
+
+    void make_fourth_average(const Box& bx,
+                             Array4<Real> const& q,
+                             Array4<Real const> const& q_bar,
+                             GpuArray<int, 3> const& domlo, GpuArray<int, 3> const& domhi);
+
+    void make_fourth_in_place(const Box& bx,
+                              Array4<Real> const& q,
+                              Array4<Real> const& tmp,
+                              GpuArray<int, 3> const& domlo, GpuArray<int, 3> const& domhi);
+
+    void make_fourth_in_place_n(const Box& bx,
+                                Array4<Real> const& q, const int ncomp,
+                                Array4<Real> const& tmp,
+                                GpuArray<int, 3> const& domlo, GpuArray<int, 3> const& domhi);
+
 #endif
diff --git a/Source/hydro/Castro_hydro.cpp b/Source/hydro/Castro_hydro.cpp
index 0e4d8f0f33..74718b8697 100644
--- a/Source/hydro/Castro_hydro.cpp
+++ b/Source/hydro/Castro_hydro.cpp
@@ -124,11 +124,13 @@ Castro::cons_to_prim_fourth(const Real time)
     // convert the conservative state cell averages to primitive cell
     // averages with 4th order accuracy
 
-    const int* domain_lo = geom.Domain().loVect();
-    const int* domain_hi = geom.Domain().hiVect();
+    auto domain_lo = geom.Domain().loVect3d();
+    auto domain_hi = geom.Domain().hiVect3d();
 
     MultiFab& S_new = get_new_data(State_Type);
 
+    FArrayBox U_cc;
+
     // we don't support radiation here
 #ifdef RADIATION
     amrex::Abort("radiation not supported to fourth order");
@@ -147,13 +149,11 @@ Castro::cons_to_prim_fourth(const Real time)
       // convert U_avg to U_cc -- this will use a Laplacian
       // operation and will result in U_cc defined only on
       // NUM_GROW-1 ghost cells at the end.
-      FArrayBox U_cc;
       U_cc.resize(qbx, NUM_STATE);
+      Elixir elix_u_cc = U_cc.elixir();
+      auto const U_cc_arr = U_cc.array();
 
-      ca_make_cell_center(BL_TO_FORTRAN_BOX(qbxm1),
-                          BL_TO_FORTRAN_FAB(Sborder[mfi]),
-                          BL_TO_FORTRAN_FAB(U_cc),
-                          AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi));
+      make_cell_center(qbxm1, Sborder.array(mfi), U_cc_arr, domain_lo, domain_hi);
 
       // enforce the minimum density on the new cell-centered state
       ca_enforce_minimum_density
@@ -182,7 +182,6 @@ Castro::cons_to_prim_fourth(const Real time)
       // convert U_cc to q_cc (we'll store this temporarily in q,
       // qaux).  This will remain valid only on the NUM_GROW-1 ghost
       // cells.
-      auto U_cc_arr = U_cc.array();
       auto q_arr = q.array(mfi);
       auto qaux_arr = qaux.array(mfi);
 
@@ -218,17 +217,12 @@ Castro::cons_to_prim_fourth(const Real time)
       // this will create q, qaux in NUM_GROW-1 ghost cells, but that's
       // we need here
 
-      ca_make_fourth_average(BL_TO_FORTRAN_BOX(qbxm1),
-                             BL_TO_FORTRAN_FAB(q[mfi]),
-                             BL_TO_FORTRAN_FAB(q_bar[mfi]),
-                             AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi));
+      make_fourth_average(qbxm1, q.array(mfi), q_bar.array(mfi), domain_lo, domain_hi);
 
       // not sure if we need to convert qaux this way, or if we can
       // just evaluate it (we may not need qaux at all actually)
-      ca_make_fourth_average(BL_TO_FORTRAN_BOX(qbxm1),
-                             BL_TO_FORTRAN_FAB(qaux[mfi]),
-                             BL_TO_FORTRAN_FAB(qaux_bar[mfi]),
-                             AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi));
+
+      make_fourth_average(qbxm1, qaux.array(mfi), qaux_bar.array(mfi), domain_lo, domain_hi);
 
     }
 
diff --git a/Source/hydro/Castro_mol_hydro.cpp b/Source/hydro/Castro_mol_hydro.cpp
index 7417c836da..05c0690324 100644
--- a/Source/hydro/Castro_mol_hydro.cpp
+++ b/Source/hydro/Castro_mol_hydro.cpp
@@ -7,6 +7,8 @@
 #include "diffusion_util.H"
 #endif
 
+#include "fourth_center_average.H"
+
 using namespace amrex;
 
 ///
@@ -41,8 +43,8 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update)
 
   BL_PROFILE_VAR("Castro::advance_hydro_ca_umdrv()", CA_UMDRV);
 
-  const int* domain_lo = geom.Domain().loVect();
-  const int* domain_hi = geom.Domain().hiVect();
+  GpuArray<int, 3> domain_lo = geom.Domain().loVect3d();
+  GpuArray<int, 3> domain_hi = geom.Domain().hiVect3d();
 
 #ifdef _OPENMP
 #pragma omp parallel
@@ -253,11 +255,8 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update)
 
 
             if (do_hydro == 0) {
-              Array4<Real> const f_avg_arr = f_avg.array();
-
               AMREX_PARALLEL_FOR_4D(nbx, NUM_STATE, i, j, k, n, {
                 f_avg_arr(i,j,k,n) = 0.0;});
-
             }
 
 #ifdef DIFFUSION
@@ -289,23 +288,29 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update)
 
 #if AMREX_SPACEDIM >= 2
             // construct the face-center interface states q_fc
-            AMREX_PARALLEL_FOR_4D(nbx, NQ, i, j, k, n, {
-                bool test = (n == QGC) || (n == QTEMP);
+            const int* lo_bc = phys_bc.lo();
+            const int* hi_bc = phys_bc.hi();
+
+            GpuArray<bool, AMREX_SPACEDIM> lo_periodic;
+            GpuArray<bool, AMREX_SPACEDIM> hi_periodic;
+            for (int idir = 0; idir < AMREX_SPACEDIM; idir++) {
+              lo_periodic[idir] = lo_bc[idir] == Interior;
+              hi_periodic[idir] = hi_bc[idir] == Interior;
+            }
 
-                if (test) continue;
+            amrex::ParallelFor(nbx, NQ,
+            [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
+            {
+              bool test = (n == QGC) || (n == QTEMP);
 
-                Real lap = 0.0;
-                int ncomp_f = n + 1;
+              if (test) return;
 
-                trans_laplacian(i, j, k, ncomp_f,
-                                idir_f,
-                                BL_TO_FORTRAN_ANYD(q_avg), NQ,
-                                &lap,
-                                ARLIM_3D(domain_lo), ARLIM_3D(domain_hi));
+              Real lap = trans_laplacian(i, j, k, n, idir, q_avg_arr,
+                                         lo_periodic, hi_periodic, domain_lo, domain_hi);
 
-                q_fc_arr(i,j,k,n) = q_avg_arr(i,j,k,n) - 1.0/24.0 * lap;
+              q_fc_arr(i,j,k,n) = q_avg_arr(i,j,k,n) - (1.0_rt/24.0_rt) * lap;
 
-              });
+            });
 
             // compute the face-center fluxes F(q_fc)
             Array4<Real> const f_arr = (flux[idir]).array();
@@ -336,20 +341,16 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update)
             // compute the final fluxes
             Array4<Real> const flux_arr = (flux[idir]).array();
 
-            AMREX_PARALLEL_FOR_4D(nbx, NUM_STATE, i, j, k, n, {
-
-                Real lap = 0.0;
-                int ncomp_f = n + 1;
+            amrex::ParallelFor(nbx, NUM_STATE,
+            [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
+            {
 
-                trans_laplacian(i, j, k, ncomp_f,
-                                idir_f,
-                                BL_TO_FORTRAN_ANYD(f_avg), NUM_STATE,
-                                &lap,
-                                ARLIM_3D(domain_lo), ARLIM_3D(domain_hi));
+              Real lap = trans_laplacian(i, j, k, n, idir, f_avg_arr,
+                                         lo_periodic, hi_periodic, domain_lo, domain_hi);
 
-                flux_arr(i,j,k,n) = flux_arr(i,j,k,n) + 1.0/24.0 * lap;
+              flux_arr(i,j,k,n) += (1.0_rt/24.0_rt) * lap;
 
-              });
+            });
 
 #endif
 
diff --git a/Source/hydro/Make.package b/Source/hydro/Make.package
index b56b2b6ac0..8fcfca5467 100644
--- a/Source/hydro/Make.package
+++ b/Source/hydro/Make.package
@@ -33,6 +33,8 @@ ca_F90EXE_sources += Castro_ctu_nd.F90
 
 ifeq ($(USE_TRUE_SDC),TRUE)
   ca_F90EXE_sources += fourth_order_nd.F90
+  CEXE_sources += fourth_center_average.cpp
+  CEXE_headers += fourth_center_average.H
   ca_F90EXE_sources += Castro_mol_nd.F90
   ca_F90EXE_sources += Castro_fourth_order_nd.F90
 endif
diff --git a/Source/hydro/fourth_center_average.H b/Source/hydro/fourth_center_average.H
new file mode 100644
index 0000000000..86bb837091
--- /dev/null
+++ b/Source/hydro/fourth_center_average.H
@@ -0,0 +1,115 @@
+#ifndef _FOURTH_CENTER_AVERAGE_H_
+#define _FOURTH_CENTER_AVERAGE_H_
+
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+Real
+Castro:: compute_laplacian(int i, int j, int k, int n,
+                           Array4<Real const> const a,
+                           GpuArray<bool, AMREX_SPACEDIM> const& lo_periodic, GpuArray<bool, AMREX_SPACEDIM> const& hi_periodic,
+                           GpuArray<int, 3> const& domlo, GpuArray<int, 3> const& domhi) {
+
+  Real lapx = 0.0;
+  Real lapy = 0.0;
+  Real lapz = 0.0;
+
+  // we use 2nd-order accurate one-sided stencils at the physical
+  // boundaries note: this differs from the suggestion in MC2011 --
+  // they just suggest using the Laplacian from +1 off the interior.
+  // I like the one-sided better.
+
+  if (i == domlo[0] && !lo_periodic[0]) {
+    lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i+1,j,k,n) + 4.0_rt*a(i+2,j,k,n) - a(i+3,j,k,n);
+
+  } else if (i == domhi[0] && !hi_periodic[0]) {
+    lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i-1,j,k,n) + 4.0_rt*a(i-2,j,k,n) - a(i-3,j,k,n);
+
+  } else {
+    lapx = a(i+1,j,k,n) - 2.0_rt*a(i,j,k,n) + a(i-1,j,k,n);
+  }
+
+#if AMREX_SPACEDIM >= 2
+  if (j == domlo[1] && !lo_periodic[1]) {
+    lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j+1,k,n) + 4.0_rt*a(i,j+2,k,n) - a(i,j+3,k,n);
+
+  } else if (j == domhi[1] && !hi_periodic[1]) {
+    lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j-1,k,n) + 4.0_rt*a(i,j-2,k,n) - a(i,j-3,k,n);
+
+  } else {
+    lapy = a(i,j+1,k,n) - 2.0_rt*a(i,j,k,n) + a(i,j-1,k,n);
+  }
+#endif
+
+#if AMREX_SPACEDIM == 3
+  if (k == domlo[2] && !lo_periodic[2]) {
+    lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k+1,n) + 4.0_rt*a(i,j,k+2,n) - a(i,j,k+3,n);
+
+  } else if (k == domhi[2] && !hi_periodic[2]) {
+    lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k-1,n) + 4.0_rt*a(i,j,k-2,n) - a(i,j,k-3,n);
+
+  } else {
+    lapz = a(i,j,k+1,n) - 2.0_rt*a(i,j,k,n) + a(i,j,k-1,n);
+  }
+#endif
+
+  return lapx + lapy + lapz;
+}
+
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+Real
+Castro::trans_laplacian(int i, int j, int k, int n,
+                        int idir,
+                        Array4<Real const> const a,
+                        GpuArray<bool, AMREX_SPACEDIM> const& lo_periodic, GpuArray<bool, AMREX_SPACEDIM> const& hi_periodic,
+                        GpuArray<int, 3> const& domlo, GpuArray<int, 3> const& domhi) {
+
+  Real lapx = 0.0;
+  Real lapy = 0.0;
+  Real lapz = 0.0;
+
+  // we use 2nd-order accurate one-sided stencils at the physical boundaries
+  if (idir != 0) {
+
+    if (i == domlo[0] && !lo_periodic[0]) {
+      lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i+1,j,k,n) + 4.0_rt*a(i+2,j,k,n) - a(i+3,j,k,n);
+
+    } else if (i == domhi[0] && !hi_periodic[0]) {
+      lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i-1,j,k,n) + 4.0_rt*a(i-2,j,k,n) - a(i-3,j,k,n);
+
+    } else {
+      lapx = a(i+1,j,k,n) - 2.0_rt*a(i,j,k,n) + a(i-1,j,k,n);
+    }
+  }
+
+  if (idir != 1) {
+
+    if (j == domlo[1] && !lo_periodic[1]) {
+      lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j+1,k,n) + 4.0_rt*a(i,j+2,k,n) - a(i,j+3,k,n);
+
+    } else if (j == domhi[1] && !hi_periodic[1]) {
+      lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j-1,k,n) + 4.0_rt*a(i,j-2,k,n) - a(i,j-3,k,n);
+
+    } else {
+      lapy = a(i,j+1,k,n) - 2.0_rt*a(i,j,k,n) + a(i,j-1,k,n);
+    }
+  }
+
+#if AMREX_SPACEDIM == 3
+  if (idir != 2) {
+
+    if (k == domlo[2] && !lo_periodic[2]) {
+      lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k+1,n) + 4.0_rt*a(i,j,k+2,n) - a(i,j,k+3,n);
+
+    } else if (k == domhi[2] && !hi_periodic[2]) {
+      lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k-1,n) + 4.0_rt*a(i,j,k-2,n) - a(i,j,k-3,n);
+
+    } else {
+      lapz = a(i,j,k+1,n) - 2.0_rt*a(i,j,k,n) + a(i,j,k-1,n);
+    }
+  }
+#endif
+
+  return lapx + lapy + lapz;
+}
+
+
+#endif
diff --git a/Source/hydro/fourth_center_average.cpp b/Source/hydro/fourth_center_average.cpp
new file mode 100644
index 0000000000..04a909b795
--- /dev/null
+++ b/Source/hydro/fourth_center_average.cpp
@@ -0,0 +1,206 @@
+#include "Castro.H"
+#include "fourth_center_average.H"
+
+using namespace amrex;
+
+// Note: pretty much all of these routines below assume that dx(1) = dx(2) = dx(3)
+
+
+void
+Castro::make_cell_center(const Box& bx,
+                         Array4<Real const> const& U,
+                         Array4<Real> const& U_cc,
+                         GpuArray<int, 3> const& domlo, GpuArray<int, 3> const& domhi) {
+
+  // Take a cell-average state U and a convert it to a cell-center
+  // state U_cc via U_cc = U - 1/24 L U
+
+  auto U_lo = lbound(U);
+  auto U_hi = ubound(U);
+
+  auto lo = bx.loVect();
+  auto hi = bx.hiVect();
+
+  AMREX_ASSERT(U_lo.x <= lo[0]-1 && U_hi.x >= hi[0]+1);
+#if AMREX_SPACEDIM >= 2
+  AMREX_ASSERT(U_lo.y <= lo[1]-1 && U_hi.y >= hi[1]+1);
+#endif
+#if AMREX_SPACEDIM == 3
+  AMREX_ASSERT(U_lo.z <= lo[2]-1 && U_hi.z >= hi[2]+1);
+#endif
+
+  const int* lo_bc = phys_bc.lo();
+  const int* hi_bc = phys_bc.hi();
+
+  GpuArray<bool, AMREX_SPACEDIM> lo_periodic;
+  GpuArray<bool, AMREX_SPACEDIM> hi_periodic;
+  for (int idir = 0; idir < AMREX_SPACEDIM; idir++) {
+    lo_periodic[idir] = lo_bc[idir] == Interior;
+    hi_periodic[idir] = hi_bc[idir] == Interior;
+  }
+
+  amrex::ParallelFor(bx, U.nComp(),
+  [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
+  {
+    Real lap = compute_laplacian(i, j, k, n, U,
+                                 lo_periodic, hi_periodic, domlo, domhi);
+
+    U_cc(i,j,k,n) = U(i,j,k,n) - (1.0_rt/24.0_rt) * lap;
+
+  });
+}
+
+void
+Castro::make_cell_center_in_place(const Box& bx,
+                                  Array4<Real> const& U,
+                                  Array4<Real> const& tmp,
+                                  GpuArray<int, 3> const& domlo, GpuArray<int, 3> const& domhi) {
+
+  // Take a cell-average state U and make it cell-centered in place
+  // via U <- U - 1/24 L U.  Note that this operation is not tile
+  // safe.
+
+  // here, tmp is a temporary memory space we use to store one-component's Laplacian
+
+  const int* lo_bc = phys_bc.lo();
+  const int* hi_bc = phys_bc.hi();
+
+  GpuArray<bool, AMREX_SPACEDIM> lo_periodic;
+  GpuArray<bool, AMREX_SPACEDIM> hi_periodic;
+  for (int idir = 0; idir < AMREX_SPACEDIM; idir++) {
+    lo_periodic[idir] = lo_bc[idir] == Interior;
+    hi_periodic[idir] = hi_bc[idir] == Interior;
+  }
+
+  for (int n = 0; n < U.nComp(); n++) {
+
+    amrex::ParallelFor(bx,
+    [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+    {
+      tmp(i,j,k) = compute_laplacian(i, j, k, n, U,
+                                     lo_periodic, hi_periodic, domlo, domhi);
+    });
+
+    amrex::ParallelFor(bx,
+    [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+    {
+      U(i,j,k,n) = U(i,j,k,n) - (1.0_rt/24.0_rt) * tmp(i,j,k);
+    });
+  }
+}
+
+
+void
+Castro::compute_lap_term(const Box& bx,
+                         Array4<Real const> const& U,
+                         Array4<Real> const& lap, const int ncomp,
+                         GpuArray<int, 3> const& domlo, GpuArray<int, 3> const& domhi) {
+
+  // Computes the h**2/24 L U term that is used in correcting
+  // cell-center to averages (and back)
+
+  const int* lo_bc = phys_bc.lo();
+  const int* hi_bc = phys_bc.hi();
+
+  GpuArray<bool, AMREX_SPACEDIM> lo_periodic;
+  GpuArray<bool, AMREX_SPACEDIM> hi_periodic;
+  for (int idir = 0; idir < AMREX_SPACEDIM; idir++) {
+    lo_periodic[idir] = lo_bc[idir] == Interior;
+    hi_periodic[idir] = hi_bc[idir] == Interior;
+  }
+
+  amrex::ParallelFor(bx,
+  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+  {
+    lap(i,j,k) = (1.0_rt/24.0_rt) *
+      compute_laplacian(i, j, k, ncomp, U,
+                        lo_periodic, hi_periodic, domlo, domhi);
+  });
+
+}
+
+
+void
+Castro::make_fourth_average(const Box& bx,
+                            Array4<Real> const& q,
+                            Array4<Real const> const& q_bar,
+                            GpuArray<int, 3> const& domlo, GpuArray<int, 3> const& domhi) {
+
+  // Take the cell-center state q and another state q_bar (e.g.,
+  // constructed from the cell-average U) and replace the cell-center
+  // q with a 4th-order accurate cell-average, q <- q + 1/24 L q_bar
+
+  const int* lo_bc = phys_bc.lo();
+  const int* hi_bc = phys_bc.hi();
+
+  GpuArray<bool, AMREX_SPACEDIM> lo_periodic;
+  GpuArray<bool, AMREX_SPACEDIM> hi_periodic;
+  for (int idir = 0; idir < AMREX_SPACEDIM; idir++) {
+    lo_periodic[idir] = lo_bc[idir] == Interior;
+    hi_periodic[idir] = hi_bc[idir] == Interior;
+  }
+
+  amrex::ParallelFor(bx, q.nComp(),
+  [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept
+  {
+    Real lap = compute_laplacian(i, j, k, n, q_bar,
+                                 lo_periodic, hi_periodic, domlo, domhi);
+
+    q(i,j,k,n) += (1.0_rt/24.0_rt) * lap;
+  });
+}
+
+
+void
+Castro::make_fourth_in_place(const Box& bx,
+                             Array4<Real> const& q,
+                             Array4<Real> const& tmp,
+                             GpuArray<int, 3> const& domlo, GpuArray<int, 3> const& domhi) {
+
+  // Take the cell-center q and makes it a cell-average q, in place
+  // (e.g. q is overwritten by its average), q <- q + 1/24 L q.
+  // Note: this routine is not tile safe.
+
+  for (int n = 0; n < q.nComp(); n++) {
+    make_fourth_in_place_n(bx, q, n, tmp, domlo, domhi);
+  }
+}
+
+
+void
+Castro::make_fourth_in_place_n(const Box& bx,
+                               Array4<Real> const& q, const int ncomp,
+                               Array4<Real> const& tmp,
+                               GpuArray<int, 3> const& domlo, GpuArray<int, 3> const& domhi) {
+
+  // Take the cell-center q and makes it a cell-average q, in place
+  // (e.g. q is overwritten by its average), q <- q + 1/24 L q.
+  // Note: this routine is not tile safe.
+  //
+  // This version operates on a single component.  Here ncomp is the
+  // component to update
+
+  const int* lo_bc = phys_bc.lo();
+  const int* hi_bc = phys_bc.hi();
+
+  GpuArray<bool, AMREX_SPACEDIM> lo_periodic;
+  GpuArray<bool, AMREX_SPACEDIM> hi_periodic;
+  for (int idir = 0; idir < AMREX_SPACEDIM; idir++) {
+    lo_periodic[idir] = lo_bc[idir] == Interior;
+    hi_periodic[idir] = hi_bc[idir] == Interior;
+  }
+
+  amrex::ParallelFor(bx,
+  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+  {
+    tmp(i,j,k) = compute_laplacian(i, j, k, ncomp, q,
+                                   lo_periodic, hi_periodic, domlo, domhi);
+  });
+
+  amrex::ParallelFor(bx,
+  [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
+  {
+    q(i,j,k,ncomp) += (1.0_rt/24.0_rt) * tmp(i,j,k);
+  });
+
+}
diff --git a/Source/hydro/fourth_order_nd.F90 b/Source/hydro/fourth_order_nd.F90
index 7c0e427277..78a0abe6c9 100644
--- a/Source/hydro/fourth_order_nd.F90
+++ b/Source/hydro/fourth_order_nd.F90
@@ -790,414 +790,4 @@ subroutine ca_states(lo, hi, &
 
   end subroutine ca_states
 
-
-  ! Note: pretty much all of these routines below assume that dx(1) = dx(2) = dx(3)
-  pure function compute_laplacian(i, j, k, n, &
-                                  a, a_lo, a_hi, nc, &
-                                  domlo, domhi) result (lap)
-
-    use prob_params_module, only : physbc_lo, physbc_hi, Interior
-    implicit none
-
-    integer, intent(in) :: i, j, k, n
-    integer, intent(in) :: a_lo(3), a_hi(3)
-    integer, intent(in) :: nc
-    real(rt), intent(in) :: a(a_lo(1):a_hi(1), a_lo(2):a_hi(2), a_lo(3):a_hi(3), nc)
-    integer, intent(in) :: domlo(3), domhi(3)
-
-    real(rt) :: lapx, lapy, lapz
-    real(rt) :: lap
-
-    lapx = ZERO
-    lapy = ZERO
-    lapz = ZERO
-
-    ! we use 2nd-order accurate one-sided stencils at the physical
-    ! boundaries note: this differs from the suggestion in MC2011 --
-    ! they just suggest using the Laplacian from +1 off the interior.
-    ! I like the one-sided better.
-
-    if (i == domlo(1) .and. physbc_lo(1) /= Interior) then
-       lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i+1,j,k,n) + 4.0_rt*a(i+2,j,k,n) - a(i+3,j,k,n)
-
-    else if (i == domhi(1) .and. physbc_hi(1) /= Interior) then
-       lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i-1,j,k,n) + 4.0_rt*a(i-2,j,k,n) - a(i-3,j,k,n)
-
-    else
-       lapx = a(i+1,j,k,n) - TWO*a(i,j,k,n) + a(i-1,j,k,n)
-    end if
-
-#if AMREX_SPACEDIM >= 2
-    if (j == domlo(2) .and. physbc_lo(2) /= Interior) then
-       lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j+1,k,n) + 4.0_rt*a(i,j+2,k,n) - a(i,j+3,k,n)
-
-    else if (j == domhi(2) .and. physbc_hi(2) /= Interior) then
-       lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j-1,k,n) + 4.0_rt*a(i,j-2,k,n) - a(i,j-3,k,n)
-
-    else
-       lapy = a(i,j+1,k,n) - TWO*a(i,j,k,n) + a(i,j-1,k,n)
-    end if
-
-#endif
-
-#if AMREX_SPACEDIM == 3
-    if (k == domlo(3) .and. physbc_lo(3) /= Interior) then
-       lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k+1,n) + 4.0_rt*a(i,j,k+2,n) - a(i,j,k+3,n)
-
-    else if (k == domhi(3) .and. physbc_hi(3) /= Interior) then
-       lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k-1,n) + 4.0_rt*a(i,j,k-2,n) - a(i,j,k-3,n)
-
-    else
-       lapz = a(i,j,k+1,n) - TWO*a(i,j,k,n) + a(i,j,k-1,n)
-    end if
-#endif
-
-    lap = lapx + lapy + lapz
-
-  end function compute_laplacian
-
-  subroutine trans_laplacian(i, j, k, n, &
-                             idir, &
-                             a, a_lo, a_hi, nc, &
-                             lap, &
-                             domlo, domhi) bind(C, name="trans_laplacian")
-
-    use prob_params_module, only : physbc_lo, physbc_hi, Interior
-    implicit none
-
-    integer, intent(in), value :: i, j, k, n
-    integer, intent(in), value :: idir
-    integer, intent(in) :: a_lo(3), a_hi(3)
-    integer, intent(in), value :: nc
-    real(rt), intent(in) :: a(a_lo(1):a_hi(1), a_lo(2):a_hi(2), a_lo(3):a_hi(3), nc)
-    integer, intent(in) :: domlo(3), domhi(3)
-    real(rt), intent(out) :: lap
-
-    real(rt) :: lapx, lapy, lapz
-
-    lapx = ZERO
-    lapy = ZERO
-    lapz = ZERO
-
-    ! we use 2nd-order accurate one-sided stencils at the physical boundaries
-    if (idir /= 1) then
-
-       if (i == domlo(1) .and. physbc_lo(1) /= Interior) then
-          lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i+1,j,k,n) + 4.0_rt*a(i+2,j,k,n) - a(i+3,j,k,n)
-
-       else if (i == domhi(1) .and. physbc_hi(1) /= Interior) then
-          lapx = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i-1,j,k,n) + 4.0_rt*a(i-2,j,k,n) - a(i-3,j,k,n)
-
-       else
-          lapx = a(i+1,j,k,n) - TWO*a(i,j,k,n) + a(i-1,j,k,n)
-       end if
-    end if
-
-    if (idir /= 2) then
-
-       if (j == domlo(2) .and. physbc_lo(2) /= Interior) then
-          lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j+1,k,n) + 4.0_rt*a(i,j+2,k,n) - a(i,j+3,k,n)
-
-       else if (j == domhi(2) .and. physbc_hi(2) /= Interior) then
-          lapy = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j-1,k,n) + 4.0_rt*a(i,j-2,k,n) - a(i,j-3,k,n)
-
-       else
-          lapy = a(i,j+1,k,n) - TWO*a(i,j,k,n) + a(i,j-1,k,n)
-       end if
-    end if
-
-#if AMREX_SPACEDIM == 3
-    if (idir /= 3) then
-
-       if (k == domlo(3) .and. physbc_lo(3) /= Interior) then
-          lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k+1,n) + 4.0_rt*a(i,j,k+2,n) - a(i,j,k+3,n)
-
-       else if (k == domhi(3) .and. physbc_hi(3) /= Interior) then
-          lapz = 2.0_rt*a(i,j,k,n) - 5.0_rt*a(i,j,k-1,n) + 4.0_rt*a(i,j,k-2,n) - a(i,j,k-3,n)
-
-       else
-          lapz = a(i,j,k+1,n) - TWO*a(i,j,k,n) + a(i,j,k-1,n)
-       end if
-    end if
-#endif
-
-    lap = lapx + lapy + lapz
-
-  end subroutine trans_laplacian
-
-
-  subroutine ca_make_cell_center(lo, hi, &
-                                 U, U_lo, U_hi, nc, &
-                                 U_cc, U_cc_lo, U_cc_hi, nc_cc, &
-                                 domlo, domhi) &
-                                 bind(C, name="ca_make_cell_center")
-    ! Take a cell-average state U and a convert it to a cell-center
-    ! state U_cc via U_cc = U - 1/24 L U
-
-    implicit none
-
-    integer, intent(in) :: lo(3), hi(3)
-    integer, intent(in) :: U_lo(3), U_hi(3)
-    integer, intent(in) :: U_cc_lo(3), U_cc_hi(3)
-    integer, intent(in) :: nc, nc_cc
-    real(rt), intent(in) :: U(U_lo(1):U_hi(1), U_lo(2):U_hi(2), U_lo(3):U_hi(3), nc)
-    real(rt), intent(inout) :: U_cc(U_cc_lo(1):U_cc_hi(1), U_cc_lo(2):U_cc_hi(2), U_cc_lo(3):U_cc_hi(3), nc_cc)
-    integer, intent(in) :: domlo(3), domhi(3)
-
-    integer :: i, j, k, n
-    real(rt) :: lap
-
-    if (U_lo(1) > lo(1)-1 .or. U_hi(1) < hi(1)+1 .or. &
-        (AMREX_SPACEDIM >= 2 .and. (U_lo(2) > lo(2)-1 .or. U_hi(2) < hi(2)+1)) .or. &
-        (AMREX_SPACEDIM == 3 .and. (U_lo(3) > lo(3)-1 .or. U_hi(3) < hi(3)+1))) then
-       call bl_error("insufficient ghostcells in ca_make_cell_center")
-    end if
-
-    do n = 1, nc
-
-       do k = lo(3), hi(3)
-          do j = lo(2), hi(2)
-             do i = lo(1), hi(1)
-
-                lap = compute_laplacian(i, j, k, n, &
-                                        U, U_lo, U_hi, nc, &
-                                        domlo, domhi)
-
-                U_cc(i,j,k,n) = U(i,j,k,n) - TWENTYFOURTH * lap
-
-             end do
-          end do
-       end do
-    end do
-
-  end subroutine ca_make_cell_center
-
-  subroutine ca_make_cell_center_in_place(lo, hi, &
-                                          U, U_lo, U_hi, nc, &
-                                          domlo, domhi) &
-                                          bind(C, name="ca_make_cell_center_in_place")
-    ! Take a cell-average state U and make it cell-centered in place
-    ! via U <- U - 1/24 L U.  Note that this operation is not tile
-    ! safe.
-
-    use amrex_mempool_module, only : bl_allocate, bl_deallocate
-
-    implicit none
-
-    integer, intent(in) :: lo(3), hi(3)
-    integer, intent(in) :: U_lo(3), U_hi(3)
-    integer, intent(in) :: nc
-    real(rt), intent(inout) :: U(U_lo(1):U_hi(1), U_lo(2):U_hi(2), U_lo(3):U_hi(3), nc)
-    integer, intent(in) :: domlo(3), domhi(3)
-
-    integer :: i, j, k, n
-
-    real(rt), pointer :: lap(:,:,:)
-
-    call bl_allocate(lap, lo, hi)
-
-    do n = 1, nc
-
-       do k = lo(3), hi(3)
-          do j = lo(2), hi(2)
-             do i = lo(1), hi(1)
-
-                lap(i,j,k) = compute_laplacian(i, j, k, n, &
-                                               U, U_lo, U_hi, nc, &
-                                               domlo, domhi)
-
-             end do
-          end do
-       end do
-
-       do k = lo(3), hi(3)
-          do j = lo(2), hi(2)
-             do i = lo(1), hi(1)
-                U(i,j,k,n) = U(i,j,k,n) - TWENTYFOURTH * lap(i,j,k)
-             end do
-          end do
-       end do
-    end do
-
-    call bl_deallocate(lap)
-
-  end subroutine ca_make_cell_center_in_place
-
-  subroutine ca_compute_lap_term(lo, hi, &
-                                 U, U_lo, U_hi, nc, &
-                                 lap, lap_lo, lap_hi, ncomp, &
-                                 domlo, domhi) &
-                                 bind(C, name="ca_compute_lap_term")
-    ! Computes the h**2/24 L U term that is used in correcting
-    ! cell-center to averages (and back)
-
-    implicit none
-
-    integer, intent(in) :: lo(3), hi(3)
-    integer, intent(in) :: U_lo(3), U_hi(3)
-    integer, intent(in) :: lap_lo(3), lap_hi(3)
-    integer, intent(in) :: nc
-    real(rt), intent(in) :: U(U_lo(1):U_hi(1), U_lo(2):U_hi(2), U_lo(3):U_hi(3), nc)
-    real(rt), intent(inout) :: lap(lap_lo(1):lap_hi(1), lap_lo(2):lap_hi(2), lap_lo(3):lap_hi(3))
-    integer, intent(in), value :: ncomp
-    integer, intent(in) :: domlo(3), domhi(3)
-
-    ! note: ncomp is C++ index (0-based)
-
-    integer :: i, j, k
-
-    do k = lo(3), hi(3)
-       do j = lo(2), hi(2)
-          do i = lo(1), hi(1)
-
-             lap(i,j,k) = compute_laplacian(i, j, k, ncomp+1, &
-                                            U, U_lo, U_hi, nc, &
-                                            domlo, domhi)
-
-          end do
-       end do
-    end do
-
-    lap(lo(1):hi(1), lo(2):hi(2), lo(3):hi(3)) = &
-         TWENTYFOURTH * lap(lo(1):hi(1), lo(2):hi(2), lo(3):hi(3))
-
-  end subroutine ca_compute_lap_term
-
-  subroutine ca_make_fourth_average(lo, hi, &
-                                    q, q_lo, q_hi, nc, &
-                                    q_bar, q_bar_lo, q_bar_hi, nc_bar, &
-                                    domlo, domhi) &
-                                    bind(C, name="ca_make_fourth_average")
-    ! Take the cell-center state q and another state q_bar (e.g.,
-    ! constructed from the cell-average U) and replace the cell-center
-    ! q with a 4th-order accurate cell-average, q <- q + 1/24 L q_bar
-
-    integer, intent(in) :: lo(3), hi(3)
-    integer, intent(in) :: q_lo(3), q_hi(3)
-    integer, intent(in) :: q_bar_lo(3), q_bar_hi(3)
-    integer, intent(in) :: nc, nc_bar
-    real(rt), intent(inout) :: q(q_lo(1):q_hi(1), q_lo(2):q_hi(2), q_lo(3):q_hi(3), nc)
-    real(rt), intent(in) :: q_bar(q_bar_lo(1):q_bar_hi(1), q_bar_lo(2):q_bar_hi(2), q_bar_lo(3):q_bar_hi(3), nc_bar)
-    integer, intent(in) :: domlo(3), domhi(3)
-
-    integer :: i, j, k, n
-    real(rt) :: lap
-
-    do n = 1, nc
-       do k = lo(3), hi(3)
-          do j = lo(2), hi(2)
-             do i = lo(1), hi(1)
-
-                lap = compute_laplacian(i, j, k, n, &
-                                        q_bar, q_bar_lo, q_bar_hi, nc, &
-                                        domlo, domhi)
-
-                q(i,j,k,n) = q(i,j,k,n) + TWENTYFOURTH * lap
-
-             end do
-          end do
-       end do
-    end do
-
-  end subroutine ca_make_fourth_average
-
-  subroutine ca_make_fourth_in_place(lo, hi, &
-                                     q, q_lo, q_hi, nc, &
-                                     domlo, domhi) &
-                                     bind(C, name="ca_make_fourth_in_place")
-    ! Take the cell-center q and makes it a cell-average q, in place
-    ! (e.g. q is overwritten by its average), q <- q + 1/24 L q.
-    ! Note: this routine is not tile safe.
-
-    use amrex_mempool_module, only : bl_allocate, bl_deallocate
-
-    integer, intent(in) :: lo(3), hi(3)
-    integer, intent(in) :: q_lo(3), q_hi(3)
-    integer, intent(in) :: nc
-    real(rt), intent(inout) :: q(q_lo(1):q_hi(1), q_lo(2):q_hi(2), q_lo(3):q_hi(3), nc)
-    integer, intent(in) :: domlo(3), domhi(3)
-
-    integer :: i, j, k, n
-    real(rt), pointer :: lap(:,:,:)
-
-    call bl_allocate(lap, lo, hi)
-
-    do n = 1, nc
-
-       do k = lo(3), hi(3)
-          do j = lo(2), hi(2)
-             do i = lo(1), hi(1)
-
-                lap(i,j,k) = compute_laplacian(i, j, k, n, &
-                                               q, q_lo, q_hi, nc, &
-                                               domlo, domhi)
-
-             end do
-          end do
-       end do
-
-       do k = lo(3), hi(3)
-          do j = lo(2), hi(2)
-             do i = lo(1), hi(1)
-                q(i,j,k,n) = q(i,j,k,n) + TWENTYFOURTH * lap(i,j,k)
-             end do
-          end do
-       end do
-    end do
-
-    call bl_deallocate(lap)
-
-  end subroutine ca_make_fourth_in_place
-
-  subroutine ca_make_fourth_in_place_n(lo, hi, &
-                                       q, q_lo, q_hi, nc, ncomp, &
-                                       domlo, domhi) &
-                                       bind(C, name="ca_make_fourth_in_place_n")
-    ! Take the cell-center q and makes it a cell-average q, in place
-    ! (e.g. q is overwritten by its average), q <- q + 1/24 L q.
-    ! Note: this routine is not tile safe.
-    !
-    ! This version operates on a single component.  Here ncomp is the
-    ! component to update -- we expect this to come in 0-based from
-    ! C++, so we add 1
-
-    use amrex_mempool_module, only : bl_allocate, bl_deallocate
-
-    integer, intent(in) :: lo(3), hi(3)
-    integer, intent(in) :: q_lo(3), q_hi(3)
-    integer, intent(in) :: nc
-    integer, intent(in), value :: ncomp
-    real(rt), intent(inout) :: q(q_lo(1):q_hi(1), q_lo(2):q_hi(2), q_lo(3):q_hi(3), nc)
-    integer, intent(in) :: domlo(3), domhi(3)
-
-    integer :: i, j, k
-    real(rt), pointer :: lap(:,:,:)
-
-    call bl_allocate(lap, lo, hi)
-
-    do k = lo(3), hi(3)
-       do j = lo(2), hi(2)
-          do i = lo(1), hi(1)
-
-             lap(i,j,k) = compute_laplacian(i, j, k, ncomp+1, &
-                                            q, q_lo, q_hi, nc, &
-                                            domlo, domhi)
-
-          end do
-       end do
-    end do
-
-    do k = lo(3), hi(3)
-       do j = lo(2), hi(2)
-          do i = lo(1), hi(1)
-             q(i,j,k,ncomp+1) = q(i,j,k,ncomp+1) + TWENTYFOURTH * lap(i,j,k)
-          end do
-       end do
-    end do
-
-    call bl_deallocate(lap)
-
-  end subroutine ca_make_fourth_in_place_n
-
-
 end module fourth_order
diff --git a/Source/sdc/Castro_sdc.cpp b/Source/sdc/Castro_sdc.cpp
index b1e3b1fce6..3aebc95047 100644
--- a/Source/sdc/Castro_sdc.cpp
+++ b/Source/sdc/Castro_sdc.cpp
@@ -26,8 +26,8 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt)
     // for 4th order reactive SDC, we need to first compute the source, C
     // and do a ghost cell fill on it
 
-    const int* domain_lo = geom.Domain().loVect();
-    const int* domain_hi = geom.Domain().hiVect();
+    auto domain_lo = geom.Domain().loVect3d();
+    auto domain_hi = geom.Domain().hiVect3d();
 
     // the timestep from m to m+1
     Real dt_m = (dt_sdc[m_end] - dt_sdc[m_start]) * dt;
@@ -123,6 +123,7 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt)
     FArrayBox C_center;
     FArrayBox U_new_center;
     FArrayBox R_new;
+    FArrayBox tlap;
 
     FArrayBox C2;
 
@@ -193,10 +194,10 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt)
             // convert the starting U to cell-centered on a fab-by-fab basis
             // -- including one ghost cell
             U_center.resize(bx1, NUM_STATE);
-            ca_make_cell_center(BL_TO_FORTRAN_BOX(bx1),
-                                BL_TO_FORTRAN_FAB(Sborder[mfi]),
-                                BL_TO_FORTRAN_FAB(U_center),
-                                AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi));
+            Elixir elix_u_center = U_center.elixir();
+            auto U_center_arr = U_center.array();
+
+            make_cell_center(bx1, Sborder.array(mfi), U_center_arr, domain_lo, domain_hi);
 
             // sometimes the Laplacian can make the species go negative near discontinuities
             ca_normalize_species(AMREX_INT_ANYD(bx1.loVect()), AMREX_INT_ANYD(bx1.hiVect()),
@@ -204,21 +205,20 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt)
 
             // convert the C source to cell-centers
             C_center.resize(bx1, NUM_STATE);
-            ca_make_cell_center(BL_TO_FORTRAN_BOX(bx1),
-                                BL_TO_FORTRAN_FAB(C_source[mfi]),
-                                BL_TO_FORTRAN_FAB(C_center),
-                                AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi));
+            Elixir elix_c_center = C_center.elixir();
+            auto C_center_arr = C_center.array();
+
+            make_cell_center(bx1, C_source.array(mfi), C_center_arr, domain_lo, domain_hi);
 
             // solve for the updated cell-center U using our cell-centered C -- we
             // need to do this with one ghost cell
             U_new_center.resize(bx1, NUM_STATE);
+            Elixir elix_u_new_center = U_new_center.elixir();
+            auto U_new_center_arr = U_new_center.array();
 
             // initialize U_new with our guess for the new state, stored as
             // an average in Sburn
-            ca_make_cell_center(BL_TO_FORTRAN_BOX(bx1),
-                                BL_TO_FORTRAN_FAB(Sburn[mfi]),
-                                BL_TO_FORTRAN_FAB(U_new_center),
-                                AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi));
+            make_cell_center(bx1, Sburn.array(mfi), U_new_center_arr, domain_lo, domain_hi);
 
             ca_sdc_update_centers_o4(BL_TO_FORTRAN_BOX(bx1), &dt_m,
                                      BL_TO_FORTRAN_3D(U_center),
@@ -230,15 +230,17 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt)
             // place (only for the interior)
             R_new.resize(bx1, NUM_STATE);
             Elixir elix_R_new = R_new.elixir();
-            Array4<const Real> const& R_new_arr=R_new.array();
+            Array4<Real> const& R_new_arr = R_new.array();
 
             ca_instantaneous_react(BL_TO_FORTRAN_BOX(bx1),
                                    BL_TO_FORTRAN_3D(U_new_center),
                                    BL_TO_FORTRAN_3D(R_new));
 
-            ca_make_fourth_in_place(BL_TO_FORTRAN_BOX(bx),
-                                    BL_TO_FORTRAN_FAB(R_new),
-                                    AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi));
+            tlap.resize(bx, 1);
+            Elixir elix_tlap = tlap.elixir();
+            auto const tlap_arr = tlap.array();
+
+            make_fourth_in_place(bx, R_new_arr, tlap_arr, domain_lo, domain_hi);
 
             // now do the conservative update using this <R> to get <U>
             // We'll also need to pass in <C>
@@ -313,8 +315,8 @@ Castro::construct_old_react_source(MultiFab& U_state,
 
     BL_PROFILE("Castro::construct_old_react_source()");
 
-    const int* domain_lo = geom.Domain().loVect();
-    const int* domain_hi = geom.Domain().hiVect();
+    auto domain_lo = geom.Domain().loVect3d();
+    auto domain_hi = geom.Domain().hiVect3d();
 
     if (sdc_order == 4 && input_is_average)
     {
@@ -323,6 +325,7 @@ Castro::construct_old_react_source(MultiFab& U_state,
 
         FArrayBox U_center;
         FArrayBox R_center;
+        FArrayBox tmp;
 
         for (MFIter mfi(U_state); mfi.isValid(); ++mfi)
         {
@@ -332,13 +335,16 @@ Castro::construct_old_react_source(MultiFab& U_state,
 
             // Convert to centers
             U_center.resize(obx, NUM_STATE);
-            ca_make_cell_center(BL_TO_FORTRAN_BOX(obx),
-                                BL_TO_FORTRAN_FAB(U_state[mfi]),
-                                BL_TO_FORTRAN_FAB(U_center),
-                                AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi));
+            Elixir elix_u_center = U_center.elixir();
+            auto const U_center_arr = U_center.array();
+
+            make_cell_center(obx, U_state.array(mfi), U_center_arr, domain_lo, domain_hi);
 
             // burn, including one ghost cell
             R_center.resize(obx, NUM_STATE);
+            Elixir elix_r_center = R_center.elixir();
+            auto const R_center_arr = R_center.array();
+
             ca_instantaneous_react(BL_TO_FORTRAN_BOX(obx),
                                    BL_TO_FORTRAN_3D(U_center),
                                    BL_TO_FORTRAN_3D(R_center));
@@ -349,9 +355,12 @@ Castro::construct_old_react_source(MultiFab& U_state,
             Sburn[mfi].copy(R_center, obx, 0, obx, 0, NUM_STATE);
 
             // convert R to averages (in place)
-            ca_make_fourth_in_place(BL_TO_FORTRAN_BOX(bx),
-                                    BL_TO_FORTRAN_FAB(R_center),
-                                    AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi));
+
+            tmp.resize(bx, 1);
+            Elixir elix_tmp = tmp.elixir();
+            auto const tmp_arr = tmp.array();
+
+            make_fourth_in_place(bx, R_center_arr, tmp_arr, domain_lo, domain_hi);
 
             // copy this to the center
             R_source[mfi].copy(R_center, bx, 0, bx, 0, NUM_STATE);