From 83b0e3a0a0356d6c8456971aa5658634a23428dd Mon Sep 17 00:00:00 2001 From: Michael Zingale Date: Fri, 6 Oct 2023 11:48:36 -0400 Subject: [PATCH] refactor SDC Newton solver to use burn_t (#2600) this cleans up some interfaces and reduces memory requirements. --- Source/sdc/Castro_sdc.cpp | 9 ----- Source/sdc/Castro_sdc_util.H | 15 +++---- Source/sdc/Make.package | 1 + Source/sdc/sdc_cons_to_burn.H | 59 ++++++++++++++++++++++++++++ Source/sdc/sdc_newton_solve.H | 71 ++++++++++++++-------------------- Source/sdc/sdc_react_util.H | 48 +++++++++++------------ Source/sdc/vode_rhs_true_sdc.H | 17 +------- 7 files changed, 122 insertions(+), 98 deletions(-) create mode 100644 Source/sdc/sdc_cons_to_burn.H diff --git a/Source/sdc/Castro_sdc.cpp b/Source/sdc/Castro_sdc.cpp index 8da8e9925a..5974e11698 100644 --- a/Source/sdc/Castro_sdc.cpp +++ b/Source/sdc/Castro_sdc.cpp @@ -241,9 +241,6 @@ Castro::do_sdc_update(int m_start, int m_end, Real dt) Elixir elix_R_new = R_new.elixir(); Array4 const& R_new_arr = R_new.array(); - // ca_instantaneous_react(BL_TO_FORTRAN_BOX(bx1), - // BL_TO_FORTRAN_3D(U_new_center), - // BL_TO_FORTRAN_3D(R_new)); amrex::ParallelFor(bx1, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { @@ -359,9 +356,6 @@ Castro::construct_old_react_source(MultiFab& U_state, Elixir elix_r_center = R_center.elixir(); auto const R_center_arr = R_center.array(); - // ca_instantaneous_react(BL_TO_FORTRAN_BOX(obx), - // BL_TO_FORTRAN_3D(U_center), - // BL_TO_FORTRAN_3D(R_center)); amrex::ParallelFor(obx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { @@ -399,9 +393,6 @@ Castro::construct_old_react_source(MultiFab& U_state, auto const R_source_arr = R_source.array(mfi); // construct the reactive source term - // ca_instantaneous_react(BL_TO_FORTRAN_BOX(bx), - // BL_TO_FORTRAN_3D(U_state[mfi]), - // BL_TO_FORTRAN_3D(R_source[mfi])); amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { diff --git a/Source/sdc/Castro_sdc_util.H b/Source/sdc/Castro_sdc_util.H index 164daff206..3a4e71d2d4 100644 --- a/Source/sdc/Castro_sdc_util.H +++ b/Source/sdc/Castro_sdc_util.H @@ -132,8 +132,6 @@ sdc_update_o2(const int i, const int j, const int k, // Here, dt_m is the timestep between time-nodes m and m+1 - burn_t burn_state; - GpuArray U_old; GpuArray U_new; GpuArray R_full; @@ -170,9 +168,13 @@ sdc_update_o2(const int i, const int j, const int k, sdc_solve(dt_m, U_old, U_new, C_zone, sdc_iteration); - // we solved our system to some tolerance, but let's be sure we are conservative by - // reevaluating the reactions and { doing the full step update - single_zone_react_source(U_new, R_full, burn_state); + // we solved our system to some tolerance, but let's be sure + // we are conservative by reevaluating the reactions and + // doing the full step update + burn_t burn_state; + + copy_cons_to_burn_type(U_new, burn_state); + single_zone_react_source(burn_state, R_full); } for (int n = 0; n < NUM_STATE; ++n) { @@ -219,8 +221,7 @@ instantaneous_react(const int i, const int j, const int k, // (in that case, I am not sure if I can assume UTEMP is defined) if (okay_to_burn(i, j, k, state)) { - burn_t burn_state; - single_zone_react_source(i, j, k, state, R_source, burn_state); + single_zone_react_source(i, j, k, state, R_source); } else { for (int n = 0; n < NUM_STATE; ++n) { R_source(i, j, k, n) = 0.0_rt; diff --git a/Source/sdc/Make.package b/Source/sdc/Make.package index c135c88138..b942621345 100644 --- a/Source/sdc/Make.package +++ b/Source/sdc/Make.package @@ -1,4 +1,5 @@ CEXE_headers += Castro_sdc.H +CEXE_headers += sdc_const_to_burn.H CEXE_sources += sdc_util.cpp diff --git a/Source/sdc/sdc_cons_to_burn.H b/Source/sdc/sdc_cons_to_burn.H new file mode 100644 index 0000000000..d4ef43fd8b --- /dev/null +++ b/Source/sdc/sdc_cons_to_burn.H @@ -0,0 +1,59 @@ +#ifndef SDC_CONS_TO_BURN_H +#define SDC_CONS_TO_BURN_H + +#include + +AMREX_GPU_HOST_DEVICE AMREX_INLINE +void +copy_cons_to_burn_type(const int i, const int j, const int k, + Array4 const& state, + burn_t& burn_state) { + + burn_state.y[SRHO] = state(i,j,k,URHO); + burn_state.y[SMX] = state(i,j,k,UMX); + burn_state.y[SMY] = state(i,j,k,UMY); + burn_state.y[SMZ] = state(i,j,k,UMZ); + burn_state.y[SEDEN] = state(i,j,k,UEDEN); + burn_state.y[SEINT] = state(i,j,k,UEINT); + for (int n = 0; n < NumSpec; n++) { + burn_state.y[SFS+n] = state(i,j,k,UFS+n); + } +#if NAUX_NET > 0 + for (int n = 0; n < NumAux; n++) { + burn_state.y[SFX+n] = state(i,j,k,UFX+n); + } +#endif + + burn_state.T = state(i,j,k,UTEMP); + burn_state.rho = state(i,j,k,URHO); + +} + +AMREX_GPU_HOST_DEVICE AMREX_INLINE +void +copy_cons_to_burn_type(GpuArray const& state, + burn_t& burn_state) { + + burn_state.y[SRHO] = state[URHO]; + burn_state.y[SMX] = state[UMX]; + burn_state.y[SMY] = state[UMY]; + burn_state.y[SMZ] = state[UMZ]; + burn_state.y[SEDEN] = state[UEDEN]; + burn_state.y[SEINT] = state[UEINT]; + for (int n = 0; n < NumSpec; n++) { + burn_state.y[SFS+n] = state[UFS+n]; + } +#if NAUX_NET > 0 + for (int n = 0; n < NumAux; n++) { + burn_state.y[SFX+n] = state[UFX+n]; + } +#endif + + burn_state.T = state[UTEMP]; + burn_state.rho = state[URHO]; + +} + + + +#endif diff --git a/Source/sdc/sdc_newton_solve.H b/Source/sdc/sdc_newton_solve.H index 312837cc68..ef30b3b76b 100644 --- a/Source/sdc/sdc_newton_solve.H +++ b/Source/sdc/sdc_newton_solve.H @@ -20,68 +20,63 @@ f_sdc_jac(const Real dt_m, JacNetArray2D& Jac, Array1D& f_source, const Real rho, - GpuArray& mom_source, - const Real T_old, - const Real E_var) { + const Real T_old) { // This is used with the Newton solve and returns f and the Jacobian - GpuArray U_full; GpuArray R_full; - // we are not solving the momentum equations - // create a full state -- we need this for some interfaces + // create a burn_t -- this will hold the full state, reconstructed + // from the current solution U - U_full[URHO] = rho; + burn_t burn_state; + + burn_state.y[SRHO] = rho; for (int n = 0; n < NumSpec; ++n) { - U_full[UFS+n] = U(n+1); + burn_state.y[SFS+n] = U(n+1); } - U_full[UEINT] = U(NumSpec+1); - U_full[UEDEN] = E_var; + burn_state.y[SEINT] = U(NumSpec+1); + + // we are not solving or accessing the momentum or total energy, + // so we'll just set those to zero here for (int n = 0; n < 3; ++n) { - U_full[UMX+n] = mom_source[n]; + burn_state.y[SMX+n] = 0.0_rt; } + burn_state.y[SEDEN] = 0.0_rt; + // normalize the species auto sum_rhoX = 0.0_rt; for (int n = 0; n < NumSpec; ++n) { - U_full[UFS+n] = amrex::max(network_rp::small_x, U_full[UFS+n]); - sum_rhoX += U_full[UFS+n]; + // TODO: this should have a rho weighting on small_x + burn_state.y[SFS+n] = amrex::max(network_rp::small_x, burn_state.y[SFS+n]); + sum_rhoX += burn_state.y[SFS+n]; } for (int n = 0; n < NumSpec; ++n) { - U_full[UFS+n] *= U_full[URHO] / sum_rhoX; + burn_state.y[SFS+n] *= burn_state.y[SRHO] / sum_rhoX; } - // compute the temperature and species derivatives -- - // maybe this should be done using the burn_state - // returned by single_zone_react_source, since it is - // more consistent T from e + // compute the temperature -- this can be removed shortly, since + // we already do this in single_zone_react_source - eos_extra_t eos_state; - eos_state.rho = U_full[URHO]; - eos_state.T = T_old; // initial guess + burn_state.rho = burn_state.y[SRHO]; + burn_state.T = T_old; // initial guess for (int n = 0; n < NumSpec; ++n) { - eos_state.xn[n] = U_full[UFS+n] / U_full[URHO]; + burn_state.xn[n] = burn_state.y[SFS+n] / burn_state.y[SRHO]; } #if NAUX_NET > 0 amrex::Error("error: aux data not currently supported in true SDC"); #endif - eos_state.e = U_full[UEINT] / U_full[URHO]; + burn_state.e = burn_state.y[SEINT] / burn_state.y[SRHO]; - eos(eos_input_re, eos_state); + eos(eos_input_re, burn_state); - U_full[UTEMP] = eos_state.T; - - // we'll create a burn_state to pass stuff from the RHS to the Jac function - - burn_t burn_state; - - single_zone_react_source(U_full, R_full, burn_state); + single_zone_react_source(burn_state, R_full); // we are solving J dU = -f // where f is Eq. 36 evaluated with the current guess for U @@ -98,7 +93,7 @@ f_sdc_jac(const Real dt_m, // form from the simplified-SDC paper (Zingale et al. 2022). Note: // we are not including density anymore. - single_zone_jac(U_full, burn_state, dt_m, Jac); + single_zone_jac(burn_state, dt_m, Jac); // Our Jacobian has the form: J = I - dt dR/dw dwdU // (Eq. 38), so now we fix that @@ -140,7 +135,6 @@ sdc_newton_solve(const Real dt_m, Array1D U_react; Array1D f_source; - GpuArray mom_source; Array1D dU_react; Array1D f; RArray1D f_rhs; @@ -171,19 +165,10 @@ sdc_newton_solve(const Real dt_m, } f_source(NumSpec+1) = U_old[UEINT] + dt_m * C[UEINT]; - // set the momenta to be U_new - for (int n = 0; n < 3; ++n) { - mom_source[n] = U_new[UMX+n]; - } - // temperature will be used as an initial guess in the EOS Real T_old = U_old[UTEMP]; - // we should be able to do an update for this somehow? - - Real E_var = U_new[UEDEN]; - // we need the density too Real rho_new = U_new[URHO]; @@ -208,7 +193,7 @@ sdc_newton_solve(const Real dt_m, while (!converged && iter < MAX_ITER) { int info = 0; - f_sdc_jac(dt_m, U_react, f, Jac, f_source, rho_new, mom_source, T_old, E_var); + f_sdc_jac(dt_m, U_react, f, Jac, f_source, rho_new, T_old); // solve the linear system: Jac dU_react = -f #ifdef NEW_NETWORK_IMPLEMENTATION diff --git a/Source/sdc/sdc_react_util.H b/Source/sdc/sdc_react_util.H index 5c06d58908..dffd1dd1f0 100644 --- a/Source/sdc/sdc_react_util.H +++ b/Source/sdc/sdc_react_util.H @@ -1,33 +1,36 @@ #ifndef SDC_REACT_UTIL_H #define SDC_REACT_UTIL_H +#include +#include + AMREX_GPU_HOST_DEVICE AMREX_INLINE void -single_zone_react_source(GpuArray const& state, - GpuArray& R, - burn_t& burn_state) { +single_zone_react_source(burn_t& burn_state, + GpuArray& R) { // note: we want this burn_state to come out of here so we can // reuse it elsewhere - auto rhoInv = 1.0_rt / state[URHO]; + // we assume that burn_state.y[] holds the current state, except + // for temperature guess, and we will initialize the pieces needed + // to call the RHS + + auto rhoInv = 1.0_rt / burn_state.y[SRHO]; - burn_state.rho = state[URHO]; - burn_state.T = state[UTEMP]; - burn_state.e = state[UEINT] * rhoInv; + burn_state.rho = burn_state.y[SRHO]; + burn_state.e = burn_state.y[SEINT] * rhoInv; for (int n = 0; n < NumSpec; ++n) { - burn_state.xn[n] = amrex::max(amrex::min(state[UFS+n] * rhoInv, 1.0_rt), small_x); + burn_state.xn[n] = amrex::max(amrex::min(burn_state.y[SFS+n] * rhoInv, 1.0_rt), small_x); } #if NAUX_NET > 0 for (int n = 0; n < NumAux; ++n) { - burn_state.aux[n] = state[UFX+n] * rhoInv; + burn_state.aux[n] = burn_state.y[SFX+n] * rhoInv; } #endif - eos_t eos_state; - // Ensure that the temperature going in is consistent with the internal energy. eos(eos_input_re, burn_state); @@ -45,28 +48,26 @@ single_zone_react_source(GpuArray const& state, // species rates come back in terms of molar fractions for (int n = 0; n < NumSpec; ++n) { - R[UFS+n] = state[URHO] * aion[n] * ydot(1+n); + R[UFS+n] = burn_state.rho * aion[n] * ydot(1+n); } - R[UEDEN] = state[URHO] * ydot(net_ienuc); - R[UEINT] = state[URHO] * ydot(net_ienuc); + R[UEDEN] = burn_state.rho * ydot(net_ienuc); + R[UEINT] = burn_state.rho * ydot(net_ienuc); } AMREX_GPU_HOST_DEVICE AMREX_INLINE void single_zone_react_source(const int i, const int j, const int k, Array4 const& state, - Array4 const& R, - burn_t& burn_state) { + Array4 const& R) { - GpuArray state_arr; - GpuArray R_arr; + GpuArray R_arr; - for (int n = 0; n < NUM_STATE; ++n) { - state_arr[n] = state(i,j,k,n); - } + burn_t burn_state; + + copy_cons_to_burn_type(i, j, k, state, burn_state); - single_zone_react_source(state_arr, R_arr, burn_state); + single_zone_react_source(burn_state, R_arr); for (int n = 0; n < NUM_STATE; ++n) { R(i,j,k,n) = R_arr[n]; @@ -75,8 +76,7 @@ single_zone_react_source(const int i, const int j, const int k, AMREX_GPU_HOST_DEVICE AMREX_INLINE void -single_zone_jac(GpuArray const& state, - burn_t& burn_state, const Real dt, +single_zone_jac(burn_t& burn_state, const Real dt, JacNetArray2D& Jac) { #ifdef SIMPLIFIED_SDC diff --git a/Source/sdc/vode_rhs_true_sdc.H b/Source/sdc/vode_rhs_true_sdc.H index 755412197c..6a53995a12 100644 --- a/Source/sdc/vode_rhs_true_sdc.H +++ b/Source/sdc/vode_rhs_true_sdc.H @@ -12,7 +12,7 @@ #include #endif #include - +#include #include @@ -55,20 +55,7 @@ sdc_vode_solve(const Real dt_m, // store the state - burn_state.y[SRHO] = U_old[URHO]; - burn_state.y[SMX] = U_old[UMX]; - burn_state.y[SMY] = U_old[UMY]; - burn_state.y[SMZ] = U_old[UMZ]; - burn_state.y[SEDEN] = U_old[UEDEN]; - burn_state.y[SEINT] = U_old[UEINT]; - for (int n = 0; n < NumSpec; n++) { - burn_state.y[SFS+n] = U_old[UFS+n]; - } -#if NAUX_NET > 0 - for (int n = 0; n < NumAux; n++) { - burn_state.y[SFX+n] = U_old[UFX+n]; - } -#endif + copy_cons_to_burn_type(U_old, burn_state); // we need an initial T guess for the EOS burn_state.T = U_old[UTEMP];