From 83b0e3a0a0356d6c8456971aa5658634a23428dd Mon Sep 17 00:00:00 2001
From: Michael Zingale <michael.zingale@stonybrook.edu>
Date: Fri, 6 Oct 2023 11:48:36 -0400
Subject: [PATCH] refactor SDC Newton solver to use burn_t (#2600)

this cleans up some interfaces and reduces memory requirements.
---
 Source/sdc/Castro_sdc.cpp      |  9 -----
 Source/sdc/Castro_sdc_util.H   | 15 +++----
 Source/sdc/Make.package        |  1 +
 Source/sdc/sdc_cons_to_burn.H  | 59 ++++++++++++++++++++++++++++
 Source/sdc/sdc_newton_solve.H  | 71 ++++++++++++++--------------------
 Source/sdc/sdc_react_util.H    | 48 +++++++++++------------
 Source/sdc/vode_rhs_true_sdc.H | 17 +-------
 7 files changed, 122 insertions(+), 98 deletions(-)
 create mode 100644 Source/sdc/sdc_cons_to_burn.H
diff --git a/Source/sdc/Castro_sdc.cpp b/Source/sdc/Castro_sdc.cpp
index 8da8e9925a..5974e11698 100644
--- a/Source/sdc/Castro_sdc.cpp
+++ b/Source/sdc/Castro_sdc.cpp
@@ -241,9 +241,6 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt)
             Elixir elix_R_new = R_new.elixir();
             Array4<Real> const& R_new_arr = R_new.array();
 
-            // ca_instantaneous_react(BL_TO_FORTRAN_BOX(bx1),
-            //                        BL_TO_FORTRAN_3D(U_new_center),
-            //                        BL_TO_FORTRAN_3D(R_new));
             amrex::ParallelFor(bx1,
             [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
             {
@@ -359,9 +356,6 @@ Castro::construct_old_react_source(MultiFab& U_state,
             Elixir elix_r_center = R_center.elixir();
             auto const R_center_arr = R_center.array();
 
-            // ca_instantaneous_react(BL_TO_FORTRAN_BOX(obx),
-            //                        BL_TO_FORTRAN_3D(U_center),
-            //                        BL_TO_FORTRAN_3D(R_center));
             amrex::ParallelFor(obx,
             [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
             {
@@ -399,9 +393,6 @@ Castro::construct_old_react_source(MultiFab& U_state,
             auto const R_source_arr = R_source.array(mfi);
 
             // construct the reactive source term
-            // ca_instantaneous_react(BL_TO_FORTRAN_BOX(bx),
-            //                        BL_TO_FORTRAN_3D(U_state[mfi]),
-            //                        BL_TO_FORTRAN_3D(R_source[mfi]));
             amrex::ParallelFor(bx,
             [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept
             {
diff --git a/Source/sdc/Castro_sdc_util.H b/Source/sdc/Castro_sdc_util.H
index 164daff206..3a4e71d2d4 100644
--- a/Source/sdc/Castro_sdc_util.H
+++ b/Source/sdc/Castro_sdc_util.H
@@ -132,8 +132,6 @@ sdc_update_o2(const int i, const int j, const int k,
 
     // Here, dt_m is the timestep between time-nodes m and m+1
 
-    burn_t burn_state;
-
     GpuArray<Real, NUM_STATE> U_old;
     GpuArray<Real, NUM_STATE> U_new;
     GpuArray<Real, NUM_STATE> R_full;
@@ -170,9 +168,13 @@ sdc_update_o2(const int i, const int j, const int k,
 
         sdc_solve(dt_m, U_old, U_new, C_zone, sdc_iteration);
 
-        // we solved our system to some tolerance, but let's be sure we are conservative by
-        // reevaluating the reactions and { doing the full step update
-        single_zone_react_source(U_new, R_full, burn_state);
+        // we solved our system to some tolerance, but let's be sure
+        // we are conservative by reevaluating the reactions and
+        // doing the full step update
+        burn_t burn_state;
+
+        copy_cons_to_burn_type(U_new, burn_state);
+        single_zone_react_source(burn_state, R_full);
     }
 
     for (int n = 0; n < NUM_STATE; ++n) {
@@ -219,8 +221,7 @@ instantaneous_react(const int i, const int j, const int k,
     // (in that case, I am not sure if I can assume UTEMP is defined)
 
     if (okay_to_burn(i, j, k, state)) {
-        burn_t burn_state;
-        single_zone_react_source(i, j, k, state, R_source, burn_state);
+        single_zone_react_source(i, j, k, state, R_source);
     } else {
         for (int n = 0; n < NUM_STATE; ++n) {
             R_source(i, j, k, n) = 0.0_rt;
diff --git a/Source/sdc/Make.package b/Source/sdc/Make.package
index c135c88138..b942621345 100644
--- a/Source/sdc/Make.package
+++ b/Source/sdc/Make.package
@@ -1,4 +1,5 @@
 CEXE_headers += Castro_sdc.H
+CEXE_headers += sdc_const_to_burn.H
 
 CEXE_sources += sdc_util.cpp
 
diff --git a/Source/sdc/sdc_cons_to_burn.H b/Source/sdc/sdc_cons_to_burn.H
new file mode 100644
index 0000000000..d4ef43fd8b
--- /dev/null
+++ b/Source/sdc/sdc_cons_to_burn.H
@@ -0,0 +1,59 @@
+#ifndef SDC_CONS_TO_BURN_H
+#define SDC_CONS_TO_BURN_H
+
+#include <burn_type.H>
+
+AMREX_GPU_HOST_DEVICE AMREX_INLINE
+void
+copy_cons_to_burn_type(const int i, const int j, const int k,
+                       Array4<const Real> const& state,
+                       burn_t& burn_state) {
+
+    burn_state.y[SRHO] = state(i,j,k,URHO);
+    burn_state.y[SMX] = state(i,j,k,UMX);
+    burn_state.y[SMY] = state(i,j,k,UMY);
+    burn_state.y[SMZ] = state(i,j,k,UMZ);
+    burn_state.y[SEDEN] = state(i,j,k,UEDEN);
+    burn_state.y[SEINT] = state(i,j,k,UEINT);
+    for (int n = 0; n < NumSpec; n++) {
+        burn_state.y[SFS+n] = state(i,j,k,UFS+n);
+    }
+#if NAUX_NET > 0
+    for (int n = 0; n < NumAux; n++) {
+        burn_state.y[SFX+n] = state(i,j,k,UFX+n);
+    }
+#endif
+
+    burn_state.T = state(i,j,k,UTEMP);
+    burn_state.rho = state(i,j,k,URHO);
+
+}
+
+AMREX_GPU_HOST_DEVICE AMREX_INLINE
+void
+copy_cons_to_burn_type(GpuArray<Real, NUM_STATE> const& state,
+                       burn_t& burn_state) {
+
+    burn_state.y[SRHO] = state[URHO];
+    burn_state.y[SMX] = state[UMX];
+    burn_state.y[SMY] = state[UMY];
+    burn_state.y[SMZ] = state[UMZ];
+    burn_state.y[SEDEN] = state[UEDEN];
+    burn_state.y[SEINT] = state[UEINT];
+    for (int n = 0; n < NumSpec; n++) {
+        burn_state.y[SFS+n] = state[UFS+n];
+    }
+#if NAUX_NET > 0
+    for (int n = 0; n < NumAux; n++) {
+        burn_state.y[SFX+n] = state[UFX+n];
+    }
+#endif
+
+    burn_state.T = state[UTEMP];
+    burn_state.rho = state[URHO];
+
+}
+
+
+
+#endif
diff --git a/Source/sdc/sdc_newton_solve.H b/Source/sdc/sdc_newton_solve.H
index 312837cc68..ef30b3b76b 100644
--- a/Source/sdc/sdc_newton_solve.H
+++ b/Source/sdc/sdc_newton_solve.H
@@ -20,68 +20,63 @@ f_sdc_jac(const Real dt_m,
           JacNetArray2D& Jac,
           Array1D<Real, 1, NumSpec+1>& f_source,
           const Real rho,
-          GpuArray<Real, 3>& mom_source,
-          const Real T_old,
-          const Real E_var) {
+          const Real T_old) {
 
     // This is used with the Newton solve and returns f and the Jacobian
 
-    GpuArray<Real, NUM_STATE> U_full;
     GpuArray<Real, NUM_STATE> R_full;
 
-    // we are not solving the momentum equations
-    // create a full state -- we need this for some interfaces
+    // create a burn_t -- this will hold the full state, reconstructed
+    // from the current solution U
 
-    U_full[URHO] = rho;
+    burn_t burn_state;
+
+    burn_state.y[SRHO] = rho;
 
     for (int n = 0; n < NumSpec; ++n) {
-        U_full[UFS+n] = U(n+1);
+        burn_state.y[SFS+n] = U(n+1);
     }
 
-    U_full[UEINT] = U(NumSpec+1);
-    U_full[UEDEN] = E_var;
+    burn_state.y[SEINT] = U(NumSpec+1);
+
+    // we are not solving or accessing the momentum or total energy,
+    // so we'll just set those to zero here
 
     for (int n = 0; n < 3; ++n) {
-        U_full[UMX+n] = mom_source[n];
+        burn_state.y[SMX+n] = 0.0_rt;
     }
 
+    burn_state.y[SEDEN] = 0.0_rt;
+
     // normalize the species
     auto sum_rhoX = 0.0_rt;
     for (int n = 0; n < NumSpec; ++n) {
-        U_full[UFS+n] = amrex::max(network_rp::small_x, U_full[UFS+n]);
-        sum_rhoX += U_full[UFS+n];
+        // TODO: this should have a rho weighting on small_x
+        burn_state.y[SFS+n] = amrex::max(network_rp::small_x, burn_state.y[SFS+n]);
+        sum_rhoX += burn_state.y[SFS+n];
     }
 
     for (int n = 0; n < NumSpec; ++n) {
-        U_full[UFS+n] *= U_full[URHO] / sum_rhoX;
+        burn_state.y[SFS+n] *= burn_state.y[SRHO] / sum_rhoX;
     }
 
-    // compute the temperature and species derivatives --
-    // maybe this should be done using the burn_state
-    // returned by single_zone_react_source, since it is
-    // more consistent T from e
+    // compute the temperature -- this can be removed shortly, since
+    // we already do this in single_zone_react_source
 
-    eos_extra_t eos_state;
-    eos_state.rho = U_full[URHO];
-    eos_state.T = T_old;   // initial guess
+    burn_state.rho = burn_state.y[SRHO];
+    burn_state.T = T_old;   // initial guess
     for (int n = 0; n < NumSpec; ++n) {
-        eos_state.xn[n] = U_full[UFS+n] / U_full[URHO];
+        burn_state.xn[n] = burn_state.y[SFS+n] / burn_state.y[SRHO];
     }
 #if NAUX_NET > 0
     amrex::Error("error: aux data not currently supported in true SDC");
 #endif
 
-    eos_state.e = U_full[UEINT] / U_full[URHO];
+    burn_state.e = burn_state.y[SEINT] / burn_state.y[SRHO];
 
-    eos(eos_input_re, eos_state);
+    eos(eos_input_re, burn_state);
 
-    U_full[UTEMP] = eos_state.T;
-
-    // we'll create a burn_state to pass stuff from the RHS to the Jac function
-
-    burn_t burn_state;
-
-    single_zone_react_source(U_full, R_full, burn_state);
+    single_zone_react_source(burn_state, R_full);
 
     // we are solving J dU = -f
     // where f is Eq. 36 evaluated with the current guess for U
@@ -98,7 +93,7 @@ f_sdc_jac(const Real dt_m,
     // form from the simplified-SDC paper (Zingale et al. 2022).  Note:
     // we are not including density anymore.
 
-    single_zone_jac(U_full, burn_state, dt_m, Jac);
+    single_zone_jac(burn_state, dt_m, Jac);
 
     // Our Jacobian has the form:  J = I - dt dR/dw dwdU
     // (Eq. 38), so now we fix that
@@ -140,7 +135,6 @@ sdc_newton_solve(const Real dt_m,
 
     Array1D<Real, 1, NumSpec+1> U_react;
     Array1D<Real, 1, NumSpec+1> f_source;
-    GpuArray<Real, 3> mom_source;
     Array1D<Real, 1, NumSpec+1> dU_react;
     Array1D<Real, 1, NumSpec+1> f;
     RArray1D f_rhs;
@@ -171,19 +165,10 @@ sdc_newton_solve(const Real dt_m,
     }
     f_source(NumSpec+1) = U_old[UEINT] + dt_m * C[UEINT];
 
-    // set the momenta to be U_new
-    for (int n = 0; n < 3; ++n) {
-        mom_source[n] = U_new[UMX+n];
-    }
-
     // temperature will be used as an initial guess in the EOS
 
     Real T_old = U_old[UTEMP];
 
-    // we should be able to do an update for this somehow?
-
-    Real E_var = U_new[UEDEN];
-
     // we need the density too
 
     Real rho_new = U_new[URHO];
@@ -208,7 +193,7 @@ sdc_newton_solve(const Real dt_m,
 
     while (!converged && iter < MAX_ITER) {
         int info = 0;
-        f_sdc_jac(dt_m, U_react, f, Jac, f_source, rho_new, mom_source, T_old, E_var);
+        f_sdc_jac(dt_m, U_react, f, Jac, f_source, rho_new, T_old);
 
         // solve the linear system: Jac dU_react = -f
 #ifdef NEW_NETWORK_IMPLEMENTATION
diff --git a/Source/sdc/sdc_react_util.H b/Source/sdc/sdc_react_util.H
index 5c06d58908..dffd1dd1f0 100644
--- a/Source/sdc/sdc_react_util.H
+++ b/Source/sdc/sdc_react_util.H
@@ -1,33 +1,36 @@
 #ifndef SDC_REACT_UTIL_H
 #define SDC_REACT_UTIL_H
 
+#include <burn_type.H>
+#include <sdc_cons_to_burn.H>
+
 AMREX_GPU_HOST_DEVICE AMREX_INLINE
 void
-single_zone_react_source(GpuArray<Real, NUM_STATE> const& state,
-                         GpuArray<Real, NUM_STATE>& R,
-                         burn_t& burn_state) {
+single_zone_react_source(burn_t& burn_state,
+                         GpuArray<Real, NUM_STATE>& R) {
 
     // note: we want this burn_state to come out of here so we can
     // reuse it elsewhere
 
-    auto rhoInv = 1.0_rt / state[URHO];
+    // we assume that burn_state.y[] holds the current state, except
+    // for temperature guess, and we will initialize the pieces needed
+    // to call the RHS
+
+    auto rhoInv = 1.0_rt / burn_state.y[SRHO];
 
-    burn_state.rho = state[URHO];
-    burn_state.T = state[UTEMP];
-    burn_state.e = state[UEINT] * rhoInv;
+    burn_state.rho = burn_state.y[SRHO];
+    burn_state.e = burn_state.y[SEINT] * rhoInv;
 
     for (int n = 0; n < NumSpec; ++n) {
-       burn_state.xn[n] = amrex::max(amrex::min(state[UFS+n] * rhoInv, 1.0_rt), small_x);
+       burn_state.xn[n] = amrex::max(amrex::min(burn_state.y[SFS+n] * rhoInv, 1.0_rt), small_x);
     }
 
 #if NAUX_NET > 0
     for (int n = 0; n < NumAux; ++n) {
-       burn_state.aux[n] = state[UFX+n] * rhoInv;
+       burn_state.aux[n] = burn_state.y[SFX+n] * rhoInv;
     }
 #endif
 
-    eos_t eos_state;
-
     // Ensure that the temperature going in is consistent with the internal energy.
     eos(eos_input_re, burn_state);
 
@@ -45,28 +48,26 @@ single_zone_react_source(GpuArray<Real, NUM_STATE> const& state,
 
     // species rates come back in terms of molar fractions
     for (int n = 0; n < NumSpec; ++n) {
-        R[UFS+n] = state[URHO] * aion[n] * ydot(1+n);
+        R[UFS+n] = burn_state.rho * aion[n] * ydot(1+n);
     }
 
-    R[UEDEN] = state[URHO] * ydot(net_ienuc);
-    R[UEINT] = state[URHO] * ydot(net_ienuc);
+    R[UEDEN] = burn_state.rho * ydot(net_ienuc);
+    R[UEINT] = burn_state.rho * ydot(net_ienuc);
 }
 
 AMREX_GPU_HOST_DEVICE AMREX_INLINE
 void
 single_zone_react_source(const int i, const int j, const int k,
                          Array4<const Real> const& state,
-                         Array4<Real> const& R,
-                         burn_t& burn_state) {
+                         Array4<Real> const& R) {
 
-    GpuArray<Real, NUM_STATE> state_arr;
-    GpuArray<Real, NUM_STATE> R_arr; 
+    GpuArray<Real, NUM_STATE> R_arr;
 
-    for (int n = 0; n < NUM_STATE; ++n) {
-        state_arr[n] = state(i,j,k,n);
-    }
+    burn_t burn_state;
+
+    copy_cons_to_burn_type(i, j, k, state, burn_state);
 
-    single_zone_react_source(state_arr, R_arr, burn_state);
+    single_zone_react_source(burn_state, R_arr);
 
     for (int n = 0; n < NUM_STATE; ++n) {
         R(i,j,k,n) = R_arr[n];
@@ -75,8 +76,7 @@ single_zone_react_source(const int i, const int j, const int k,
 
 AMREX_GPU_HOST_DEVICE AMREX_INLINE
 void
-single_zone_jac(GpuArray<Real, NUM_STATE> const& state,
-                burn_t& burn_state, const Real dt,
+single_zone_jac(burn_t& burn_state, const Real dt,
                 JacNetArray2D& Jac) {
 
 #ifdef SIMPLIFIED_SDC
diff --git a/Source/sdc/vode_rhs_true_sdc.H b/Source/sdc/vode_rhs_true_sdc.H
index 755412197c..6a53995a12 100644
--- a/Source/sdc/vode_rhs_true_sdc.H
+++ b/Source/sdc/vode_rhs_true_sdc.H
@@ -12,7 +12,7 @@
 #include <actual_rhs.H>
 #endif
 #include <Castro_react_util.H>
-
+#include <sdc_cons_to_burn.H>
 #include <burner.H>
 
 
@@ -55,20 +55,7 @@ sdc_vode_solve(const Real dt_m,
 
     // store the state
 
-    burn_state.y[SRHO] = U_old[URHO];
-    burn_state.y[SMX] = U_old[UMX];
-    burn_state.y[SMY] = U_old[UMY];
-    burn_state.y[SMZ] = U_old[UMZ];
-    burn_state.y[SEDEN] = U_old[UEDEN];
-    burn_state.y[SEINT] = U_old[UEINT];
-    for (int n = 0; n < NumSpec; n++) {
-        burn_state.y[SFS+n] = U_old[UFS+n];
-    }
-#if NAUX_NET > 0
-    for (int n = 0; n < NumAux; n++) {
-        burn_state.y[SFX+n] = U_old[UFX+n];
-    }
-#endif
+    copy_cons_to_burn_type(U_old, burn_state);
 
     // we need an initial T guess for the EOS
     burn_state.T = U_old[UTEMP];