From 67518b0da3c0d86673de7828b98290c75300482d Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Wed, 23 Oct 2024 22:58:55 +0100
Subject: [PATCH 01/43] Separate Jacobians for "electron_split_lu" into
 separate function

Cleaner, and might help compile-time.
---
 .../src/electron_kinetic_equation.jl          | 354 ++++++++++--------
 1 file changed, 190 insertions(+), 164 deletions(-)

diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index 7359576e2..83664c4cf 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -800,7 +800,6 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos
                 if nl_solver_params.solves_since_precon_update[] ≥ nl_solver_params.preconditioner_update_interval
                     nl_solver_params.solves_since_precon_update[] = 0
 
-                    dt = t_params.dt[]
                     vth = @view moments.electron.vth[:,ir]
                     me = composition.me_over_mi
                     dens = @view moments.electron.dens[:,ir]
@@ -825,171 +824,16 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos
                     begin_vperp_vpa_region()
                     update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir)
                     @loop_vperp_vpa ivperp ivpa begin
-                        z_matrix = allocate_float(z.n, z.n)
-                        z_matrix .= 0.0
-
-                        z_speed = @view z_advect[1].speed[:,ivpa,ivperp,ir]
-                        for ielement ∈ 1:z.nelement_local
-                            imin = z.imin[ielement] - (ielement != 1)
-                            imax = z.imax[ielement]
-                            if ielement == 1
-                                z_matrix[imin,imin:imax] .+= z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement]
-                            else
-                                if z_speed[imin] < 0.0
-                                    z_matrix[imin,imin:imax] .+= z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement]
-                                elseif z_speed[imin] > 0.0
-                                    # Do nothing
-                                else
-                                    z_matrix[imin,imin:imax] .+= 0.5 .* z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement]
-                                end
-                            end
-                            z_matrix[imin+1:imax-1,imin:imax] .+= z_spectral.lobatto.Dmat[2:end-1,:] ./ z.element_scale[ielement]
-                            if ielement == z.nelement_local
-                                z_matrix[imax,imin:imax] .+= z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement]
-                            else
-                                if z_speed[imax] < 0.0
-                                    # Do nothing
-                                elseif z_speed[imax] > 0.0
-                                    z_matrix[imax,imin:imax] .+= z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement]
-                                else
-                                    z_matrix[imax,imin:imax] .+= 0.5 .* z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement]
-                                end
-                            end
-                        end
-                        # Multiply by advection speed
-                        for row ∈ 1:z.n
-                            z_matrix[row,:] .*= dt * z_speed[row]
-                        end
-
-                        # Diagonal entries
-                        for row ∈ 1:z.n
-                            z_matrix[row,row] += 1.0
-
-                            # Terms from `add_contribution_from_pdf_term!()`
-                            z_matrix[row,row] += dt * (0.5 * dqpar_dz[row] / ppar[row]
-                                                       + vpa.grid[ivpa] * vth[row] * (ddens_dz[row] / dens[row]
-                                                                                      - dvth_dz[row] / vth[row]))
-                        end
-                        if external_source_settings.electron.active
-                            for row ∈ 1:z.n
-                                # Source terms from `add_contribution_from_pdf_term!()`
-                                z_matrix[row,row] += dt * (1.5 * source_density_amplitude[row] / dens[row]
-                                                           - (0.5 * source_pressure_amplitude[row]
-                                                              + source_momentum_amplitude[row]) / ppar[row]
-                                                          )
-                            end
-                            if external_source_settings.electron.source_type == "energy"
-                                for row ∈ 1:z.n
-                                    # Contribution from `external_electron_source!()`
-                                    z_matrix[row,row] += dt * source_amplitude[row]
-                                end
-                            end
-                        end
-                        if collisions.krook.nuee0 > 0.0 || collisions.krook.nuei0 > 0.0
-                            for row ∈ 1:z.n
-                                # Contribution from electron_krook_collisions!()
-                                nu_ee = get_collision_frequency_ee(collisions, dens[row], vth[row])
-                                nu_ei = get_collision_frequency_ei(collisions, dens[row], vth[row])
-                                z_matrix[row,row] += dt * (nu_ee + nu_ei)
-                            end
-                        end
-
+                        z_matrix, ppar_matrix = get_electron_split_Jacobians!(
+                             ivperp, ivpa, ppar, moments, collisions, composition, z,
+                             vperp, vpa, z_spectral, vperp_spectral, vpa_spectral,
+                             z_advect, vpa_advect, scratch_dummy,
+                             external_source_settings, num_diss_params, t_params, ion_dt,
+                             ir, evolve_ppar)
                         @timeit_debug global_timer "lu" nl_solver_params.preconditioners.z[ivpa,ivperp,ir] = lu(sparse(z_matrix))
-                    end
-
-                    if z.irank == 0
-                        ppar_matrix = allocate_float(z.n, z.n)
-                        ppar_matrix .= 0.0
-
-                        if composition.electron_physics == kinetic_electrons_with_temperature_equation
-                            error("kinetic_electrons_with_temperature_equation not "
-                                  * "supported yet in preconditioner")
-                        elseif composition.electron_physics != kinetic_electrons
-                            error("Unsupported electron_physics=$(composition.electron_physics) "
-                                  * "in electron_backward_euler!() preconditioner.")
-                        end
-
-                        # Reconstruct w_∥^3 moment of g_e from already-calculated qpar
-                        @views third_moment = @. 0.5 * moments.electron.qpar[:,ir] / electron_ppar_new / vth
-
-                        # Note that as
-                        #   qpar = 2 * ppar * vth * third_moment
-                        #        = 2 * ppar^(3/2) / dens^(1/2) / me^(1/2) * third_moment
-                        # we have that
-                        #   d(qpar)/dz = 2 * ppar^(3/2) / dens^(1/2) / me^(1/2) * d(third_moment)/dz
-                        #                - ppar^(3/2) / dens^(3/2) / me^(1/2) * third_moment * d(dens)/dz
-                        #                + 3 * ppar^(1/2) / dens^(1/2) / me^(1/2) * third_moment * d(ppar)/dz
-                        # so for the Jacobian
-                        #   d[d(qpar)/dz)]/d[ppar]
-                        #     = 3 * ppar^(1/2) / dens^(1/2) / me^(1/2) * d(third_moment)/dz
-                        #       - 3/2 * ppar^(1/2) / dens^(3/2) / me^(1/2) * third_moment * d(dens)/dz
-                        #       + 3/2 / ppar^(1/2) / dens^(1/2) / me^(1/2) * third_moment * d(ppar)/dz
-                        #       + 3 * ppar^(1/2) / dens^(1/2) / me^(1/2) * third_moment * d(.)/dz
-                        dthird_moment_dz = z.scratch2
-                        derivative_z!(z.scratch2, third_moment, buffer_1, buffer_2,
-                                      buffer_3, buffer_4, z_spectral, z)
-
-                        # Diagonal terms
-                        for row ∈ 1:z.n
-                            ppar_matrix[row,row] = 1.0
-
-                            # 3*ppar*dupar_dz
-                            ppar_matrix[row,row] += 3.0 * dt * dupar_dz[row]
-
-                            # terms from d(qpar)/dz
-                            ppar_matrix[row,row] +=
-                                dt * (3.0 * sqrt(electron_ppar_new[row] / dens[row] / me) * dthird_moment_dz[row]
-                                      - 1.5 * sqrt(electron_ppar_new[row] / me) / dens[row]^1.5 * third_moment[row] * ddens_dz[row]
-                                      + 1.5 / sqrt(electron_ppar_new[row] / dens[row] / me) * third_moment[row] * dppar_dz[row])
-                        end
-                        if ion_dt !== nothing
-                            # Backward-Euler forcing term
-                            for row ∈ 1:z.n
-                                ppar_matrix[row,row] += dt / ion_dt 
-                            end
-                        end
-
-
-                        # d(.)/dz terms
-                        # Note that the z-derivative matrix is local to this block, and
-                        # for the preconditioner we do not include any distributed-MPI
-                        # communication (we rely on the JFNK iteration to sort out the
-                        # coupling between blocks).
-                        if !isa(z_spectral, gausslegendre_info)
-                            error("Only gausslegendre_pseudospectral coordinate type is "
-                                  * "supported by electron_backward_euler!() "
-                                  * "preconditioner because we need differentiation"
-                                  * "matrices.")
-                        end
-                        z_deriv_matrix = z_spectral.D_matrix
-                        for row ∈ 1:z.n
-                            @. ppar_matrix[row,:] +=
-                                dt * (upar[row]
-                                      + 3.0 * sqrt(electron_ppar_new[row] / dens[row] / me) * third_moment[row]) *
-                                z_deriv_matrix[row,:]
+                        if ivperp == 1 && ivpa == 1
+                            @timeit_debug global_timer "lu" nl_solver_params.preconditioners.ppar[ir] = lu(sparse(ppar_matrix))
                         end
-
-                        if num_diss_params.electron.moment_dissipation_coefficient > 0.0
-                            error("z-diffusion of electron_ppar not yet supported in "
-                                  * "preconditioner")
-                        end
-                        if collisions.nu_ei > 0.0
-                            error("electron-ion collision terms for electron_ppar not yet "
-                                  * "supported in preconditioner")
-                        end
-                        if composition.n_neutral_species > 0 && collisions.charge_exchange_electron > 0.0
-                            error("electron 'charge exchange' terms for electron_ppar not yet "
-                                  * "supported in preconditioner")
-                        end
-                        if composition.n_neutral_species > 0 && collisions.ionization_electron > 0.0
-                            error("electron ionization terms for electron_ppar not yet "
-                                  * "supported in preconditioner")
-                        end
-
-                        @timeit_debug global_timer "lu" nl_solver_params.preconditioners.ppar[ir] = lu(sparse(ppar_matrix))
-                    else
-                        ppar_matrix = allocate_float(0, 0)
-                        ppar_matrix[] = 1.0
                     end
                 end
 
@@ -3040,6 +2884,188 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati
     return nothing
 end
 
+"""
+    get_electron_split_Jacobians!(ivperp, ivpa, ppar, moments, collisions, composition,
+                                  z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral,
+                                  z_advect, vpa_advect, scratch_dummy,
+                                  external_source_settings, num_diss_params, t_params,
+                                  ion_dt, ir, evolve_ppar
+
+Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equation and (if
+`evolve_ppar=true`) the electron energy equation.
+"""
+@timeit global_timer get_electron_split_Jacobians!(
+                         ivperp, ivpa, ppar, moments, collisions, composition, z,
+                         vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect,
+                         vpa_advect, scratch_dummy, external_source_settings,
+                         num_diss_params, t_params, ion_dt, ir, evolve_ppar) = begin
+
+    dt = t_params.dt[]
+
+    z_matrix = allocate_float(z.n, z.n)
+    z_matrix .= 0.0
+
+    z_speed = @view z_advect[1].speed[:,ivpa,ivperp,ir]
+    for ielement ∈ 1:z.nelement_local
+        imin = z.imin[ielement] - (ielement != 1)
+        imax = z.imax[ielement]
+        if ielement == 1
+            z_matrix[imin,imin:imax] .+= z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement]
+        else
+            if z_speed[imin] < 0.0
+                z_matrix[imin,imin:imax] .+= z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement]
+            elseif z_speed[imin] > 0.0
+                # Do nothing
+            else
+                z_matrix[imin,imin:imax] .+= 0.5 .* z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement]
+            end
+        end
+        z_matrix[imin+1:imax-1,imin:imax] .+= z_spectral.lobatto.Dmat[2:end-1,:] ./ z.element_scale[ielement]
+        if ielement == z.nelement_local
+            z_matrix[imax,imin:imax] .+= z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement]
+        else
+            if z_speed[imax] < 0.0
+                # Do nothing
+            elseif z_speed[imax] > 0.0
+                z_matrix[imax,imin:imax] .+= z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement]
+            else
+                z_matrix[imax,imin:imax] .+= 0.5 .* z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement]
+            end
+        end
+    end
+    # Multiply by advection speed
+    for row ∈ 1:z.n
+        z_matrix[row,:] .*= dt * z_speed[row]
+    end
+
+    # Diagonal entries
+    for row ∈ 1:z.n
+        z_matrix[row,row] += 1.0
+
+        # Terms from `add_contribution_from_pdf_term!()`
+        z_matrix[row,row] += dt * (0.5 * dqpar_dz[row] / ppar[row]
+                                   + vpa.grid[ivpa] * vth[row] * (ddens_dz[row] / dens[row]
+                                                                  - dvth_dz[row] / vth[row]))
+    end
+    if external_source_settings.electron.active
+        for row ∈ 1:z.n
+            # Source terms from `add_contribution_from_pdf_term!()`
+            z_matrix[row,row] += dt * (1.5 * source_density_amplitude[row] / dens[row]
+                                       - (0.5 * source_pressure_amplitude[row]
+                                          + source_momentum_amplitude[row]) / ppar[row]
+                                      )
+        end
+        if external_source_settings.electron.source_type == "energy"
+            for row ∈ 1:z.n
+                # Contribution from `external_electron_source!()`
+                z_matrix[row,row] += dt * source_amplitude[row]
+            end
+        end
+    end
+    if collisions.krook.nuee0 > 0.0 || collisions.krook.nuei0 > 0.0
+        for row ∈ 1:z.n
+            # Contribution from electron_krook_collisions!()
+            nu_ee = get_collision_frequency_ee(collisions, dens[row], vth[row])
+            nu_ei = get_collision_frequency_ei(collisions, dens[row], vth[row])
+            z_matrix[row,row] += dt * (nu_ee + nu_ei)
+        end
+    end
+
+    if z.irank == 0 && ivperp == 1 && ivpa == 1
+        ppar_matrix = allocate_float(z.n, z.n)
+        ppar_matrix .= 0.0
+
+        if composition.electron_physics == kinetic_electrons_with_temperature_equation
+            error("kinetic_electrons_with_temperature_equation not "
+                  * "supported yet in preconditioner")
+        elseif composition.electron_physics != kinetic_electrons
+            error("Unsupported electron_physics=$(composition.electron_physics) "
+                  * "in electron_backward_euler!() preconditioner.")
+        end
+
+        # Reconstruct w_∥^3 moment of g_e from already-calculated qpar
+        @views third_moment = @. 0.5 * moments.electron.qpar[:,ir] / electron_ppar_new / vth
+
+        # Note that as
+        #   qpar = 2 * ppar * vth * third_moment
+        #        = 2 * ppar^(3/2) / dens^(1/2) / me^(1/2) * third_moment
+        # we have that
+        #   d(qpar)/dz = 2 * ppar^(3/2) / dens^(1/2) / me^(1/2) * d(third_moment)/dz
+        #                - ppar^(3/2) / dens^(3/2) / me^(1/2) * third_moment * d(dens)/dz
+        #                + 3 * ppar^(1/2) / dens^(1/2) / me^(1/2) * third_moment * d(ppar)/dz
+        # so for the Jacobian
+        #   d[d(qpar)/dz)]/d[ppar]
+        #     = 3 * ppar^(1/2) / dens^(1/2) / me^(1/2) * d(third_moment)/dz
+        #       - 3/2 * ppar^(1/2) / dens^(3/2) / me^(1/2) * third_moment * d(dens)/dz
+        #       + 3/2 / ppar^(1/2) / dens^(1/2) / me^(1/2) * third_moment * d(ppar)/dz
+        #       + 3 * ppar^(1/2) / dens^(1/2) / me^(1/2) * third_moment * d(.)/dz
+        dthird_moment_dz = z.scratch2
+        derivative_z!(z.scratch2, third_moment, buffer_1, buffer_2,
+                      buffer_3, buffer_4, z_spectral, z)
+
+        # Diagonal terms
+        for row ∈ 1:z.n
+            ppar_matrix[row,row] = 1.0
+
+            # 3*ppar*dupar_dz
+            ppar_matrix[row,row] += 3.0 * dt * dupar_dz[row]
+
+            # terms from d(qpar)/dz
+            ppar_matrix[row,row] +=
+                dt * (3.0 * sqrt(electron_ppar_new[row] / dens[row] / me) * dthird_moment_dz[row]
+                      - 1.5 * sqrt(electron_ppar_new[row] / me) / dens[row]^1.5 * third_moment[row] * ddens_dz[row]
+                      + 1.5 / sqrt(electron_ppar_new[row] / dens[row] / me) * third_moment[row] * dppar_dz[row])
+        end
+        if ion_dt !== nothing
+            # Backward-Euler forcing term
+            for row ∈ 1:z.n
+                ppar_matrix[row,row] += dt / ion_dt
+            end
+        end
+
+        # d(.)/dz terms
+        # Note that the z-derivative matrix is local to this block, and
+        # for the preconditioner we do not include any distributed-MPI
+        # communication (we rely on the JFNK iteration to sort out the
+        # coupling between blocks).
+        if !isa(z_spectral, gausslegendre_info)
+            error("Only gausslegendre_pseudospectral coordinate type is "
+                  * "supported by electron_backward_euler!() "
+                  * "preconditioner because we need differentiation"
+                  * "matrices.")
+        end
+        z_deriv_matrix = z_spectral.D_matrix
+        for row ∈ 1:z.n
+            @. ppar_matrix[row,:] +=
+                dt * (upar[row]
+                      + 3.0 * sqrt(electron_ppar_new[row] / dens[row] / me) * third_moment[row]) *
+                z_deriv_matrix[row,:]
+        end
+
+        if num_diss_params.electron.moment_dissipation_coefficient > 0.0
+            error("z-diffusion of electron_ppar not yet supported in "
+                  * "preconditioner")
+        end
+        if collisions.nu_ei > 0.0
+            error("electron-ion collision terms for electron_ppar not yet "
+                  * "supported in preconditioner")
+        end
+        if composition.n_neutral_species > 0 && collisions.charge_exchange_electron > 0.0
+            error("electron 'charge exchange' terms for electron_ppar not yet "
+                  * "supported in preconditioner")
+        end
+        if composition.n_neutral_species > 0 && collisions.ionization_electron > 0.0
+            error("electron ionization terms for electron_ppar not yet "
+                  * "supported in preconditioner")
+        end
+    else
+        ppar_matrix = allocate_float(0, 0)
+        ppar_matrix[] = 1.0
+    end
+
+    return z_matrix, ppar_matrix
+end
+
 #"""
 #electron_kinetic_equation_residual! calculates the residual of the (time-independent) electron kinetic equation
 #INPUTS:

From 8be9e5aa4c6f800c85b681e191cf15ea524367ad Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Wed, 23 Oct 2024 23:11:52 +0100
Subject: [PATCH 02/43] Use Val() types for preconditioner_type

Hopefully helps the compiler to remove unneeded branches at compile
time.
---
 moment_kinetics/src/electron_kinetic_equation.jl |  6 +++---
 moment_kinetics/src/nonlinear_solvers.jl         | 14 +++++++-------
 moment_kinetics/src/time_advance.jl              |  6 +++---
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index 83664c4cf..49645690e 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -796,7 +796,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos
                        scratch_dummy, z, z_spectral,
                        num_diss_params.electron.moment_dissipation_coefficient, ir)
 
-            if nl_solver_params.preconditioner_type == "electron_split_lu"
+            if nl_solver_params.preconditioner_type === Val(:electron_split_lu)
                 if nl_solver_params.solves_since_precon_update[] ≥ nl_solver_params.preconditioner_update_interval
                     nl_solver_params.solves_since_precon_update[] = 0
 
@@ -863,7 +863,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos
 
                 left_preconditioner = identity
                 right_preconditioner = split_precon!
-            elseif nl_solver_params.preconditioner_type == "electron_lu"
+            elseif nl_solver_params.preconditioner_type === Val(:electron_lu)
 
                 if t_params.dt[] > 1.5 * nl_solver_params.precon_dt[] ||
                         t_params.dt[] < 2.0/3.0 * nl_solver_params.precon_dt[]
@@ -993,7 +993,7 @@ global_rank[] == 0 && println("recalculating precon")
 
                 left_preconditioner = identity
                 right_preconditioner = lu_precon!
-            elseif nl_solver_params.preconditioner_type == "none"
+            elseif nl_solver_params.preconditioner_type === Val(:none)
                 left_preconditioner = identity
                 right_preconditioner = identity
             else
diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl
index fa330441c..0958070a8 100644
--- a/moment_kinetics/src/nonlinear_solvers.jl
+++ b/moment_kinetics/src/nonlinear_solvers.jl
@@ -42,7 +42,7 @@ using MPI
 using SparseArrays
 using StatsBase: mean
 
-struct nl_solver_info{TH,TV,Tcsg,Tlig,Tprecon}
+struct nl_solver_info{TH,TV,Tcsg,Tlig,Tprecon,Tpretype}
     rtol::mk_float
     atol::mk_float
     nonlinear_max_iterations::mk_int
@@ -67,7 +67,7 @@ struct nl_solver_info{TH,TV,Tcsg,Tlig,Tprecon}
     serial_solve::Bool
     max_nonlinear_iterations_this_step::Base.RefValue{mk_int}
     max_linear_iterations_this_step::Base.RefValue{mk_int}
-    preconditioner_type::String
+    preconditioner_type::Tpretype
     preconditioner_update_interval::mk_int
     preconditioners::Tprecon
 end
@@ -83,7 +83,7 @@ for example a preconditioner object for each point in that outer loop.
 """
 function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); default_rtol=1.0e-5,
                                default_atol=1.0e-12, serial_solve=false,
-                               electron_ppar_pdf_solve=false, preconditioner_type="none")
+                               electron_ppar_pdf_solve=false, preconditioner_type=Val(:none))
     nl_solver_section = set_defaults_and_check_section!(
         input_dict, "nonlinear_solver";
         rtol=default_rtol,
@@ -157,12 +157,12 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa
         end
     end
 
-    if preconditioner_type == "lu"
+    if preconditioner_type === Val(:lu)
         # Create dummy LU solver objects so we can create an array for preconditioners.
         # These will be calculated properly within the time loop.
         preconditioners = fill(lu(sparse(1.0*I, total_size_coords, total_size_coords)),
                                reverse(outer_coord_sizes))
-    elseif preconditioner_type == "electron_split_lu"
+    elseif preconditioner_type === Val(:electron_split_lu)
         preconditioners = (z=fill(lu(sparse(1.0*I, coords.z.n, coords.z.n)),
                                   tuple(coords.vpa.n, reverse(outer_coord_sizes)...)),
                            vpa=fill(lu(sparse(1.0*I, coords.vpa.n, coords.vpa.n)),
@@ -170,7 +170,7 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa
                            ppar=fill(lu(sparse(1.0*I, coords.z.n, coords.z.n)),
                                      reverse(outer_coord_sizes)),
                           )
-    elseif preconditioner_type == "electron_lu"
+    elseif preconditioner_type === Val(:electron_lu)
         pdf_plus_ppar_size = total_size_coords + coords.z.n
         preconditioners = fill((lu(sparse(1.0*I, 1, 1)),
                                 allocate_shared_float(pdf_plus_ppar_size, pdf_plus_ppar_size),
@@ -178,7 +178,7 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa
                                 allocate_shared_float(pdf_plus_ppar_size),
                                ),
                                reverse(outer_coord_sizes))
-    elseif preconditioner_type == "none"
+    elseif preconditioner_type === Val(:none)
         preconditioners = nothing
     else
         error("Unrecognised preconditioner_type=$preconditioner_type")
diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl
index 57ac3167e..30ec91cbd 100644
--- a/moment_kinetics/src/time_advance.jl
+++ b/moment_kinetics/src/time_advance.jl
@@ -671,7 +671,7 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop
                               default_rtol=t_params.rtol / 10.0,
                               default_atol=t_params.atol / 10.0,
                               electron_ppar_pdf_solve=true,
-                              preconditioner_type="electron_lu")
+                              preconditioner_type=Val(:electron_lu))
     nl_solver_ion_advance_params =
         setup_nonlinear_solve(t_params.implicit_ion_advance, input_dict,
                               (s=composition.n_ion_species, r=r, z=z, vperp=vperp,
@@ -679,7 +679,7 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop
                               ();
                               default_rtol=t_params.rtol / 10.0,
                               default_atol=t_params.atol / 10.0,
-                              preconditioner_type="lu")
+                              preconditioner_type=Val(:lu))
     # Implicit solve for vpa_advection term should be done in serial, as it will be called
     # within a parallelised s_r_z_vperp loop.
     nl_solver_vpa_advection_params =
@@ -687,7 +687,7 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop
                               (composition.n_ion_species, r, z, vperp);
                               default_rtol=t_params.rtol / 10.0,
                               default_atol=t_params.atol / 10.0,
-                              serial_solve=true, preconditioner_type="lu")
+                              serial_solve=true, preconditioner_type=Val(:lu))
     if nl_solver_ion_advance_params !== nothing &&
             nl_solver_vpa_advection_params !== nothing
         error("Cannot use implicit_ion_advance and implicit_vpa_advection at the same "

From 51c5180c413a03bf6d5def1f7e8e8078bfa724e0 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Thu, 24 Oct 2024 12:29:03 +0100
Subject: [PATCH 03/43] Improve type-stability in electron_backward_euler!()

Hopefully improves compile/run time.
---
 .../src/electron_kinetic_equation.jl          | 73 ++++++++++---------
 moment_kinetics/src/time_advance.jl           |  3 +-
 2 files changed, 42 insertions(+), 34 deletions(-)

diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index 49645690e..1a7327edd 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -665,7 +665,9 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos
 
     reduced_by_ion_dt = false
     if ion_dt !== nothing
-        evolve_ppar = true
+        if !evolve_ppar
+            error("evolve_ppar must be `true` when `ion_dt` is passed. ion_dt=$ion_dt")
+        end
 
         # Use forward-Euler step (with `ion_dt` as the timestep) as initial guess for
         # updated electron_ppar
@@ -742,7 +744,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos
                                  t_params.moments_output_counter[], r, z, vperp, vpa)
         end
     end
-    electron_pdf_converged = false
+    electron_pdf_converged = Ref(false)
     # No paralleism in r for now - will need to add a specially adapted shared-memory
     # parallelism scheme to allow it for 2D1V or 2D2V simulations.
     for ir ∈ 1:r.n
@@ -753,11 +755,11 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos
         buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1]
 
         # initialise the electron pdf convergence flag to false
-        electron_pdf_converged = false
+        electron_pdf_converged[] = false
 
         first_step = true
         # evolve (artificially) in time until the residual is less than the tolerance
-        while (!electron_pdf_converged
+        while (!electron_pdf_converged[]
                && ((max_electron_pdf_iterations !== nothing && t_params.step_counter[] - initial_step_counter < max_electron_pdf_iterations)
                    || (max_electron_sim_time !== nothing && t_params.t[] - initial_time < max_electron_sim_time))
                && t_params.dt[] > 0.0 && !isnan(t_params.dt[]))
@@ -922,33 +924,33 @@ global_rank[] == 0 && println("recalculating precon")
                 function lu_precon!(x)
                     precon_ppar, precon_f = x
 
-                    precon_lu, _, input_buffer, output_buffer =
+                    precon_lu, _, this_input_buffer, this_output_buffer =
                         nl_solver_params.preconditioners[ir]
 
                     begin_serial_region()
                     counter = 1
                     @loop_z_vperp_vpa iz ivperp ivpa begin
-                        input_buffer[counter] = precon_f[ivpa,ivperp,iz]
+                        this_input_buffer[counter] = precon_f[ivpa,ivperp,iz]
                         counter += 1
                     end
                     @loop_z iz begin
-                        input_buffer[counter] = precon_ppar[iz]
+                        this_input_buffer[counter] = precon_ppar[iz]
                         counter += 1
                     end
 
                     begin_serial_region()
                     @serial_region begin
-                        @timeit_debug global_timer "ldiv!" ldiv!(output_buffer, precon_lu, input_buffer)
+                        @timeit_debug global_timer "ldiv!" ldiv!(this_output_buffer, precon_lu, this_input_buffer)
                     end
 
                     begin_serial_region()
                     counter = 1
                     @loop_z_vperp_vpa iz ivperp ivpa begin
-                        precon_f[ivpa,ivperp,iz] = output_buffer[counter]
+                        precon_f[ivpa,ivperp,iz] = this_output_buffer[counter]
                         counter += 1
                     end
                     @loop_z iz begin
-                        precon_ppar[iz] = output_buffer[counter]
+                        precon_ppar[iz] = this_output_buffer[counter]
                         counter += 1
                     end
 
@@ -1003,8 +1005,8 @@ global_rank[] == 0 && println("recalculating precon")
 
             # Do a backward-Euler update of the electron pdf, and (if evove_ppar=true) the
             # electron parallel pressure.
-            function residual_func!(residual, new_variables)
-                electron_ppar_residual, f_electron_residual = residual
+            function residual_func!(this_residual, new_variables)
+                electron_ppar_residual, f_electron_residual = this_residual
                 electron_ppar_newvar, f_electron_newvar = new_variables
 
                 # enforce the boundary condition(s) on the electron pdf
@@ -1259,48 +1261,53 @@ global_rank[] == 0 && println("recalculating precon")
                                      buffer_3, buffer_4, z_spectral, z)
             end
 
-            residual = -1.0
+            residual_norm = -1.0
             if newton_success
                 # Calculate residuals to decide if iteration is converged.
-                # Might want an option to calculate the residual only after a certain number
-                # of iterations (especially during initialization when there are likely to be
-                # a large number of iterations required) to avoid the expense, and especially
-                # the global MPI.Bcast()?
+                # Might want an option to calculate the r_normesidual only after a certain
+                # number of iterations (especially during initialization when there are
+                # likely to be a large number of iterations required) to avoid the
+                # expense, and especially the global MPI.Bcast()?
                 begin_z_vperp_vpa_region()
-                residual = steady_state_residuals(new_scratch.pdf_electron,
-                                                  old_scratch.pdf_electron,
-                                                  t_params.dt[]; use_mpi=true,
-                                                  only_max_abs=true)
                 if global_rank[] == 0
-                    residual = first(values(residual))[1]
+                    ss_residual_norms = steady_state_residuals(new_scratch.pdf_electron,
+                                                               old_scratch.pdf_electron,
+                                                               t_params.dt[]; use_mpi=true,
+                                                               only_max_abs=true)
+                    residual_norm = first(values(ss_residual_norms))[1]::mk_float
+                else
+                    ss_residual_norms = steady_state_residuals(new_scratch.pdf_electron,
+                                                               old_scratch.pdf_electron,
+                                                               t_params.dt[]; use_mpi=true,
+                                                               only_max_abs=true)
                 end
                 if evolve_ppar
-                    ppar_residual =
+                    ss_ppar_residual_norms =
                         steady_state_residuals(new_scratch.electron_ppar,
                                                old_scratch.electron_ppar,
                                                t_params.dt[]; use_mpi=true,
                                                only_max_abs=true)
                     if global_rank[] == 0
-                        ppar_residual = first(values(ppar_residual))[1]
-                        residual = max(residual, ppar_residual)
+                        ppar_residual = first(values(ss_ppar_residual_norms))[1]::mk_float
+                        residual_norm = max(residual_norm, ppar_residual)
                     end
                 end
                 if global_rank[] == 0
                     if residual_tolerance === nothing
                         residual_tolerance = t_params.converged_residual_value
                     end
-                    electron_pdf_converged = abs(residual) < residual_tolerance
+                    electron_pdf_converged[] = abs(residual_norm) < residual_tolerance
                 end
-                @timeit_debug global_timer "MPI.Bcast comm_world" electron_pdf_converged = MPI.Bcast(electron_pdf_converged, 0, comm_world)
+                @timeit_debug global_timer "MPI.Bcast! comm_world" MPI.Bcast!(electron_pdf_converged, 0, comm_world)
             end
 
             if (mod(t_params.step_counter[] - initial_step_counter,100) == 0)
                 begin_serial_region()
                 @serial_region begin
                     if z.irank == 0 && z.irank == z.nrank - 1
-                        println("iteration: ", t_params.step_counter[] - initial_step_counter, " time: ", t_params.t[], " dt_electron: ", t_params.dt[], " phi_boundary: ", phi[[1,end],1], " residual: ", residual)
+                        println("iteration: ", t_params.step_counter[] - initial_step_counter, " time: ", t_params.t[], " dt_electron: ", t_params.dt[], " phi_boundary: ", phi[[1,end],1], " residual_norm: ", residual_norm)
                     elseif z.irank == 0
-                        println("iteration: ", t_params.step_counter[] - initial_step_counter, " time: ", t_params.t[], " dt_electron: ", t_params.dt[], " phi_boundary_lower: ", phi[1,1], " residual: ", residual)
+                        println("iteration: ", t_params.step_counter[] - initial_step_counter, " time: ", t_params.t[], " dt_electron: ", t_params.dt[], " phi_boundary_lower: ", phi[1,1], " residual_norm: ", residual_norm)
                     end
                 end
             end
@@ -1327,11 +1334,11 @@ global_rank[] == 0 && println("recalculating precon")
             reset_nonlinear_per_stage_counters!(nl_solver_params)
 
             t_params.step_counter[] += 1
-            if electron_pdf_converged
+            if electron_pdf_converged[]
                 break
             end
         end
-        if !electron_pdf_converged
+        if !electron_pdf_converged[]
             # If electron solve failed to converge for some `ir`, the failure will be
             # handled by restarting the ion timestep with a smaller dt, so no need to try
             # to solve for further `ir` values.
@@ -1355,7 +1362,7 @@ global_rank[] == 0 && println("recalculating precon")
     end
     begin_serial_region()
     @serial_region begin
-        if !electron_pdf_converged || do_debug_io
+        if !electron_pdf_converged[] || do_debug_io
             if io_electron !== nothing && io_electron !== true
                 t_params.moments_output_counter[] += 1
                 write_electron_state(scratch, moments, t_params, io_electron,
@@ -1390,7 +1397,7 @@ global_rank[] == 0 && println("recalculating precon")
         # Reset dt in case it was reduced to be less than 0.5*ion_dt
         t_params.dt[] = t_params.previous_dt[]
     end
-    if !electron_pdf_converged
+    if !electron_pdf_converged[]
         success = "kinetic-electrons"
     else
         success = ""
diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl
index 30ec91cbd..15278f281 100644
--- a/moment_kinetics/src/time_advance.jl
+++ b/moment_kinetics/src/time_advance.jl
@@ -3529,7 +3529,8 @@ end
                                                 external_source_settings, num_diss_params,
                                                 nl_solver_params.electron_advance,
                                                 max_electron_pdf_iterations,
-                                                max_electron_sim_time; ion_dt=dt)
+                                                max_electron_sim_time; evolve_ppar=true,
+                                                ion_dt=dt)
 
         # Update `fvec_out.electron_ppar` with the new electron pressure
         begin_r_z_region()

From cfc36a42135f38d95ac2791f7b2f1a310e961c70 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Thu, 24 Oct 2024 12:31:46 +0100
Subject: [PATCH 04/43] Fix typo in `steady_state_square_residuals()`

---
 moment_kinetics/src/analysis.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/moment_kinetics/src/analysis.jl b/moment_kinetics/src/analysis.jl
index 68e911efa..3f17645a9 100644
--- a/moment_kinetics/src/analysis.jl
+++ b/moment_kinetics/src/analysis.jl
@@ -817,7 +817,7 @@ function steady_state_square_residuals(variable, variable_at_previous_time, dt;
 
         if only_max_abs
             absolute_residual =
-                _steady_state_residual(variable, variable_at_previous_time, reshaped_dt)
+                _steady_state_absolute_residual(variable, variable_at_previous_time, reshaped_dt)
             # Need to wrap the maximum(...) in a call to vec(...) so that we return a
             # Vector, not an N-dimensional array where the first (N-1) dimensions all have
             # size 1.

From 9c43c728d95115fdbacf3af89a5833405afc76e5 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Thu, 24 Oct 2024 12:48:11 +0100
Subject: [PATCH 05/43] Return `Vector` from `steady_state_residuals()` when
 `only_max_abs=true`

Also provide positional-arguments-only form of
`steady_state_residuals()`, which helps const-propagation and therefore
type stability (the compiler knows that the function returns a Vector
when `only_max_abs` is passed a const `true`).
---
 moment_kinetics/src/analysis.jl               | 43 ++++++++++---------
 .../src/electron_kinetic_equation.jl          | 29 ++++++-------
 2 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/moment_kinetics/src/analysis.jl b/moment_kinetics/src/analysis.jl
index 3f17645a9..3fac86246 100644
--- a/moment_kinetics/src/analysis.jl
+++ b/moment_kinetics/src/analysis.jl
@@ -15,7 +15,7 @@ using ..interpolation: interpolate_to_grid_1d
 using ..load_data: open_readonly_output_file, get_nranks, load_pdf_data, load_rank_data
 using ..load_data: load_distributed_ion_pdf_slice
 using ..looping
-using ..type_definitions: mk_int
+using ..type_definitions: mk_int, mk_float
 using ..velocity_moments: integrate_over_vspace
 
 using FFTW
@@ -595,7 +595,8 @@ const default_epsilon = 1.0e-4
 
 """
     steady_state_residuals(variable, variable_at_previous_time, dt;
-                           epsilon=$default_epsilon, use_mpi=false)
+                           epsilon=$default_epsilon, use_mpi=false,
+                           only_max_abs=false)
 
 Calculate how close a variable is to steady state.
 
@@ -630,23 +631,26 @@ initialised, and that `variable` has r and z dimensions but no species dimension
 distributed-memory MPI, this routine will double-count the points on block boundaries.
 
 If `only_max_abs=true` is passed, then only calculate the 'maxium absolute residual'. In
-this case the OrderedDict returned will have only one entry, for `"max absolute
-residual"`.
+this case just returns the "max absolute residual", not an OrderedDict.
 """
 function steady_state_residuals(variable, variable_at_previous_time, dt;
                                 epsilon=default_epsilon, use_mpi=false,
                                 only_max_abs=false)
+    return steady_state_residuals(variable, variable_at_previous_time, dt, use_mpi,
+                                  only_max_abs, epsilon)
+end
+function steady_state_residuals(variable, variable_at_previous_time, dt, use_mpi,
+                                only_max_abs=false, epsilon=default_epsilon)
     square_residual_norms =
-        steady_state_square_residuals(variable, variable_at_previous_time, dt;
-                                      epsilon=epsilon, use_mpi=use_mpi,
-                                      only_max_abs=only_max_abs)
+        steady_state_square_residuals(variable, variable_at_previous_time, dt, nothing,
+                                      use_mpi, only_max_abs, epsilon)
     if global_rank[] == 0
         if only_max_abs
             # In this case as an optimisation the residual was not squared, so do not need
             # to square-root here
             return square_residual_norms
         else
-            return OrderedDict(k=>sqrt.(v) for (k,v) ∈ square_residual_norms)
+            return OrderedDict{String,Vector{mk_float}}(k=>sqrt.(v) for (k,v) ∈ square_residual_norms)
         end
     else
         return nothing
@@ -654,9 +658,9 @@ function steady_state_residuals(variable, variable_at_previous_time, dt;
 end
 
 """
-    steady_state_square_residuals(variable, variable_at_previous_time, dt;
-                                  variable_max=nothing, epsilon=1.0e-4,
-                                  use_mpi=false, only_max_abs=false)
+    steady_state_square_residuals(variable, variable_at_previous_time, dt,
+                                  variable_max=nothing, use_mpi=false,
+                                  only_max_abs=false, epsilon=$default_epsilon)
 
 Used to calculate the mean square residual for [`steady_state_residuals`](@ref).
 
@@ -668,9 +672,9 @@ See [`steady_state_residuals`](@ref) for documenation of the other arguments. Th
 values of [`steady_state_residuals`](@ref) are the square-root of the return values of
 this function.
 """
-function steady_state_square_residuals(variable, variable_at_previous_time, dt;
-                                       variable_max=nothing, epsilon=default_epsilon,
-                                       use_mpi=false, only_max_abs=false)
+function steady_state_square_residuals(variable, variable_at_previous_time, dt,
+                                       variable_max=nothing, use_mpi=false,
+                                       only_max_abs=false, epsilon=default_epsilon)
     if ndims(dt) == 0
         t_dim = ndims(variable) + 1
     else
@@ -797,10 +801,9 @@ function steady_state_square_residuals(variable, variable_at_previous_time, dt;
                                              (size(packed_results)..., n_blocks[]))
 
             if only_max_abs
-                return OrderedDict(
-                           "max absolute residual"=>maximum(gathered_block_results, dims=2))
+                return maximum(gathered_block_results, dims=2)
             else
-                return OrderedDict(
+                return OrderedDict{String,mk_float}(
                            "RMS absolute residual"=>mean(@view(gathered_block_results[:,1,:]), dims=2),
                            "max absolute residual"=>maximum(@view(gathered_block_results[:,2,:]), dims=2),
                            "RMS relative residual"=>mean(@view(gathered_block_results[:,3,:]), dims=2),
@@ -821,9 +824,7 @@ function steady_state_square_residuals(variable, variable_at_previous_time, dt;
             # Need to wrap the maximum(...) in a call to vec(...) so that we return a
             # Vector, not an N-dimensional array where the first (N-1) dimensions all have
             # size 1.
-            return OrderedDict(
-                       "max absolute residual"=>vec(maximum(absolute_residual;
-                                                            dims=tuple((1:t_dim-1)...))))
+            return vec(maximum(absolute_residual; dims=tuple((1:t_dim-1)...)))
         else
             absolute_square_residual, relative_square_residual =
                 _steady_state_square_residual(variable, variable_at_previous_time,
@@ -831,7 +832,7 @@ function steady_state_square_residuals(variable, variable_at_previous_time, dt;
             # Need to wrap the mean(...) or maximum(...) in a call to vec(...) so that we
             # return a Vector, not an N-dimensional array where the first (N-1) dimensions all
             # have size 1.
-            return OrderedDict(
+            return OrderedDict{String,Vector{mk_float}}(
                        "RMS absolute residual"=>vec(mean(absolute_square_residual;
                                                          dims=tuple((1:t_dim-1)...))),
                        "max absolute residual"=>vec(maximum(absolute_square_residual;
diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index 1a7327edd..513fc1264 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -1270,26 +1270,25 @@ global_rank[] == 0 && println("recalculating precon")
                 # expense, and especially the global MPI.Bcast()?
                 begin_z_vperp_vpa_region()
                 if global_rank[] == 0
-                    ss_residual_norms = steady_state_residuals(new_scratch.pdf_electron,
-                                                               old_scratch.pdf_electron,
-                                                               t_params.dt[]; use_mpi=true,
-                                                               only_max_abs=true)
-                    residual_norm = first(values(ss_residual_norms))[1]::mk_float
+                    residual_norm = steady_state_residuals(new_scratch.pdf_electron,
+                                                           old_scratch.pdf_electron,
+                                                           t_params.dt[], true, true)[1]
                 else
-                    ss_residual_norms = steady_state_residuals(new_scratch.pdf_electron,
-                                                               old_scratch.pdf_electron,
-                                                               t_params.dt[]; use_mpi=true,
-                                                               only_max_abs=true)
+                    steady_state_residuals(new_scratch.pdf_electron,
+                                           old_scratch.pdf_electron, t_params.dt[], true,
+                                           true)
                 end
                 if evolve_ppar
-                    ss_ppar_residual_norms =
-                        steady_state_residuals(new_scratch.electron_ppar,
-                                               old_scratch.electron_ppar,
-                                               t_params.dt[]; use_mpi=true,
-                                               only_max_abs=true)
                     if global_rank[] == 0
-                        ppar_residual = first(values(ss_ppar_residual_norms))[1]::mk_float
+                        ppar_residual =
+                            steady_state_residuals(new_scratch.electron_ppar,
+                                                   old_scratch.electron_ppar,
+                                                   t_params.dt[], true, true)[1]
                         residual_norm = max(residual_norm, ppar_residual)
+                    else
+                        steady_state_residuals(new_scratch.electron_ppar,
+                                               old_scratch.electron_ppar,
+                                               t_params.dt[], true, true)
                     end
                 end
                 if global_rank[] == 0

From 19309298be05f1b8641370f0b4d53e2aa210a4ca Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Thu, 24 Oct 2024 12:50:44 +0100
Subject: [PATCH 06/43] Slightly improve type stability in
 `setup_moment_kinetics()`

Declare the type of the thing returned by `MPI.Bcast()` to avoid type
instability.
---
 moment_kinetics/src/moment_kinetics.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/moment_kinetics/src/moment_kinetics.jl b/moment_kinetics/src/moment_kinetics.jl
index 3309ca8ed..51ffad3af 100644
--- a/moment_kinetics/src/moment_kinetics.jl
+++ b/moment_kinetics/src/moment_kinetics.jl
@@ -101,7 +101,7 @@ using .looping: debug_setup_loop_ranges_split_one_combination!
 using .moment_kinetics_input: mk_input, read_input_file
 using .time_advance: setup_time_advance!, time_advance!
 using .timer_utils
-using .type_definitions: mk_int, OptionsDict
+using .type_definitions: mk_float, mk_int, OptionsDict
 using .utils: to_minutes, get_default_restart_filename,
               get_prefix_iblock_and_move_existing_file
 using .em_fields: setup_em_fields
@@ -327,7 +327,7 @@ parallel loop ranges, and are only used by the tests in `debug_test/`.
 
     # Broadcast code_time from the root process of each shared-memory block (on which it
     # might have been loaded from a restart file).
-    code_time = MPI.Bcast(code_time, 0, comm_block[])
+    code_time = MPI.Bcast(code_time, 0, comm_block[])::mk_float
 
     # create arrays and do other work needed to setup
     # the main time advance loop -- including normalisation of f by density if requested

From effb76821192656233c979244111e7c0071d706a Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Thu, 24 Oct 2024 21:01:57 +0100
Subject: [PATCH 07/43] Fix bounds checks in Jacobian functions

After @boundscheck, need to actually throw an error.
---
 moment_kinetics/src/electron_fluid_equations.jl  |  6 +++---
 moment_kinetics/src/electron_kinetic_equation.jl | 14 +++++++-------
 moment_kinetics/src/electron_vpa_advection.jl    |  6 +++---
 moment_kinetics/src/electron_z_advection.jl      |  6 +++---
 moment_kinetics/src/external_sources.jl          |  6 +++---
 moment_kinetics/src/krook_collisions.jl          |  4 ++--
 moment_kinetics/src/moment_constraints.jl        |  3 +++
 7 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/moment_kinetics/src/electron_fluid_equations.jl b/moment_kinetics/src/electron_fluid_equations.jl
index 4fe7f2d24..9268622a1 100644
--- a/moment_kinetics/src/electron_fluid_equations.jl
+++ b/moment_kinetics/src/electron_fluid_equations.jl
@@ -364,9 +364,9 @@ function add_electron_energy_equation_to_Jacobian!(jacobian_matrix, f, dens, upa
         error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar "
               * "cannot be in same place in state vector.")
     end
-    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2)
-    @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n
-    @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big")
+    @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n || error("ppar_offset=$ppar_offset is too big")
 
     if composition.electron_physics == kinetic_electrons_with_temperature_equation
         error("kinetic_electrons_with_temperature_equation not "
diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index 513fc1264..47033e875 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -3257,8 +3257,8 @@ end
 function add_electron_dissipation_term_to_Jacobian!(jacobian_matrix, f, num_diss_params,
                                                     z, vperp, vpa, vpa_spectral, z_speed,
                                                     dt, ir; f_offset=0)
-    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2)
-    @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big")
 
     vpa_dissipation_coefficient = num_diss_params.electron.vpa_dissipation_coefficient
 
@@ -3537,9 +3537,9 @@ function add_contribution_from_electron_pdf_term_to_Jacobian!(
         error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar "
               * "cannot be in same place in state vector.")
     end
-    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2)
-    @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n
-    @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big")
+    @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n || error("ppar_offset=$ppar_offset is too big")
 
     source_density_amplitude = moments.electron.external_source_density_amplitude
     source_momentum_amplitude = moments.electron.external_source_momentum_amplitude
@@ -3641,8 +3641,8 @@ end
 
 function add_ion_dt_forcing_of_electron_ppar_to_Jacobian!(jacobian_matrix, z, dt, ion_dt,
                                                           ir; ppar_offset=0)
-    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2)
-    @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n || error("ppar_offset=$ppar_offset is too big")
 
     begin_z_region()
     @loop_z iz begin
diff --git a/moment_kinetics/src/electron_vpa_advection.jl b/moment_kinetics/src/electron_vpa_advection.jl
index 05527b05a..8e0c92ba4 100644
--- a/moment_kinetics/src/electron_vpa_advection.jl
+++ b/moment_kinetics/src/electron_vpa_advection.jl
@@ -105,9 +105,9 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar,
         error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar "
               * "cannot be in same place in state vector.")
     end
-    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2)
-    @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n
-    @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big")
+    @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n || error("ppar_offset=$ppar_offset is too big")
 
     v_size = vperp.n * vpa.n
     source_density_amplitude = @view moments.electron.external_source_density_amplitude[:,ir,:]
diff --git a/moment_kinetics/src/electron_z_advection.jl b/moment_kinetics/src/electron_z_advection.jl
index 3cb637bee..7685971a5 100644
--- a/moment_kinetics/src/electron_z_advection.jl
+++ b/moment_kinetics/src/electron_z_advection.jl
@@ -82,9 +82,9 @@ function add_electron_z_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, p
         error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar "
               * "cannot be in same place in state vector.")
     end
-    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2)
-    @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n
-    @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big")
+    @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n || error("ppar_offset=$ppar_offset is too big")
 
     v_size = vperp.n * vpa.n
 
diff --git a/moment_kinetics/src/external_sources.jl b/moment_kinetics/src/external_sources.jl
index 2eeeb3ac1..0166b307f 100644
--- a/moment_kinetics/src/external_sources.jl
+++ b/moment_kinetics/src/external_sources.jl
@@ -1031,9 +1031,9 @@ function add_external_electron_source_to_Jacobian!(jacobian_matrix, f, moments,
         error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar "
               * "cannot be in same place in state vector.")
     end
-    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2)
-    @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n
-    @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big")
+    @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n || error("ppar_offset=$ppar_offset is too big")
 
     if !electron_source.active
         return nothing
diff --git a/moment_kinetics/src/krook_collisions.jl b/moment_kinetics/src/krook_collisions.jl
index f0a99404d..77684580c 100644
--- a/moment_kinetics/src/krook_collisions.jl
+++ b/moment_kinetics/src/krook_collisions.jl
@@ -434,8 +434,8 @@ function add_electron_krook_collisions_to_Jacobian!(jacobian_matrix, f, dens, up
                                                     vth, upar_ion, collisions, z, vperp,
                                                     vpa, z_speed, dt, ir; f_offset=0,
                                                     ppar_offset)
-    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2)
-    @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big")
 
     if collisions.krook.nuee0 ≤ 0.0 && collisions.krook.nuei0 ≤ 0.0
         return nothing
diff --git a/moment_kinetics/src/moment_constraints.jl b/moment_kinetics/src/moment_constraints.jl
index ac91dd321..e880b5e3a 100644
--- a/moment_kinetics/src/moment_constraints.jl
+++ b/moment_kinetics/src/moment_constraints.jl
@@ -288,6 +288,9 @@ function add_electron_implicit_constraint_forcing_to_Jacobian!(jacobian_matrix,
                                                                z_speed, z, vperp, vpa,
                                                                constraint_forcing_rate,
                                                                dt, ir; f_offset=0)
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big")
+
     vpa_grid = vpa.grid
     vpa_wgts = vpa.wgts
     v_size = vperp.n * vpa.n

From 5f6caa62d99e223aae146591dcc2b3d0c1633eda Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Sat, 26 Oct 2024 13:06:35 +0100
Subject: [PATCH 08/43] Use mk_float explicitly in a few more places

Means less hacking to do if we ever do want to change (temporarily or
permanently) `mk_float` to a different type.
---
 moment_kinetics/src/coordinates.jl       |  6 ++--
 moment_kinetics/src/gauss_legendre.jl    |  8 ++++--
 moment_kinetics/src/moment_kinetics.jl   |  2 +-
 moment_kinetics/src/nonlinear_solvers.jl | 12 ++++----
 moment_kinetics/src/time_advance.jl      | 36 +++++++++++++-----------
 5 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/moment_kinetics/src/coordinates.jl b/moment_kinetics/src/coordinates.jl
index bf1c45416..373d7c8f8 100644
--- a/moment_kinetics/src/coordinates.jl
+++ b/moment_kinetics/src/coordinates.jl
@@ -381,9 +381,9 @@ function define_coordinate(coord_input::NamedTuple; parallel_io::Bool=false,
     end
 
     coord = coordinate(coord_input.name, n_global, n_local, coord_input.ngrid,
-        coord_input.nelement, coord_input.nelement_local, nrank, irank, coord_input.L,
-        grid, cell_width, igrid, ielement, imin, imax, igrid_full,
-        coord_input.discretization, coord_input.finite_difference_option,
+        coord_input.nelement, coord_input.nelement_local, nrank, irank,
+        mk_float(coord_input.L), grid, cell_width, igrid, ielement, imin, imax,
+        igrid_full, coord_input.discretization, coord_input.finite_difference_option,
         coord_input.cheb_option, coord_input.bc, coord_input.boundary_parameters, wgts,
         uniform_grid, duniform_dgrid, scratch, copy(scratch), copy(scratch),
         copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch),
diff --git a/moment_kinetics/src/gauss_legendre.jl b/moment_kinetics/src/gauss_legendre.jl
index 97c31d54e..ee9b706e4 100644
--- a/moment_kinetics/src/gauss_legendre.jl
+++ b/moment_kinetics/src/gauss_legendre.jl
@@ -163,6 +163,8 @@ end
 
 function setup_gausslegendre_pseudospectral_lobatto(coord; collision_operator_dim=true)
     x, w = gausslobatto(coord.ngrid)
+    x = mk_float.(x)
+    w = mk_float.(w)
     Dmat = allocate_float(coord.ngrid, coord.ngrid)
     gausslobattolegendre_differentiation_matrix!(Dmat,x,coord.ngrid)
     
@@ -234,6 +236,8 @@ end
 function setup_gausslegendre_pseudospectral_radau(coord; collision_operator_dim=true)
     # Gauss-Radau points on [-1,1)
     x, w = gaussradau(coord.ngrid)
+    x = mk_float.(x)
+    w = mk_float.(w)
     # Gauss-Radau points on (-1,1] 
     xreverse, wreverse = -reverse(x), reverse(w)
     # elemental differentiation matrix
@@ -387,7 +391,7 @@ ngrid -- number of points per element (incl. boundary points)
 
 Note that D has does not include a scaling factor
 """
-function gausslobattolegendre_differentiation_matrix!(D::Array{Float64,2},x::Array{Float64,1},ngrid::Int64)
+function gausslobattolegendre_differentiation_matrix!(D::Array{mk_float,2},x::Array{mk_float,1},ngrid::mk_int)
     D[:,:] .= 0.0
     for ix in 1:ngrid
         for ixp in 1:ngrid
@@ -418,7 +422,7 @@ ngrid -- number of points per element (incl. boundary points)
 
 Note that D has does not include a scaling factor
 """
-function gaussradaulegendre_differentiation_matrix!(D::Array{Float64,2},x::Array{Float64,1},ngrid::Int64)
+function gaussradaulegendre_differentiation_matrix!(D::Array{mk_float,2},x::Array{mk_float,1},ngrid::Int64)
     D[:,:] .= 0.0
     for ix in 1:ngrid
         for ixp in 1:ngrid
diff --git a/moment_kinetics/src/moment_kinetics.jl b/moment_kinetics/src/moment_kinetics.jl
index 51ffad3af..5d5e9d9f5 100644
--- a/moment_kinetics/src/moment_kinetics.jl
+++ b/moment_kinetics/src/moment_kinetics.jl
@@ -275,7 +275,7 @@ parallel loop ranges, and are only used by the tests in `debug_test/`.
                               manufactured_solns_input, t_input, num_diss_params,
                               advection_structs, io_input, input_dict)
         # initialize time variable
-        code_time = 0.
+        code_time = mk_float(0.0)
         dt = nothing
         dt_before_last_fail = nothing
         electron_dt = nothing
diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl
index 0958070a8..a94e2314c 100644
--- a/moment_kinetics/src/nonlinear_solvers.jl
+++ b/moment_kinetics/src/nonlinear_solvers.jl
@@ -186,13 +186,15 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa
 
     linear_initial_guess = zeros(linear_restart)
 
-    return nl_solver_info(nl_solver_input.rtol, nl_solver_input.atol,
+    return nl_solver_info(mk_float(nl_solver_input.rtol), mk_float(nl_solver_input.atol),
                           nl_solver_input.nonlinear_max_iterations,
-                          nl_solver_input.linear_rtol, nl_solver_input.linear_atol,
-                          linear_restart, nl_solver_input.linear_max_restarts, H, c, s, g,
-                          V, linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), Ref(0),
+                          mk_float(nl_solver_input.linear_rtol),
+                          mk_float(nl_solver_input.linear_atol), linear_restart,
+                          nl_solver_input.linear_max_restarts, H, c, s, g, V,
+                          linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), Ref(0),
                           Ref(0), Ref(nl_solver_input.preconditioner_update_interval),
-                          Ref(0.0), serial_solve, Ref(0), Ref(0), preconditioner_type,
+                          Ref(mk_float(0.0)), serial_solve, Ref(0), Ref(0),
+                          preconditioner_type,
                           nl_solver_input.preconditioner_update_interval, preconditioners)
 end
 
diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl
index 15278f281..cf07371d2 100644
--- a/moment_kinetics/src/time_advance.jl
+++ b/moment_kinetics/src/time_advance.jl
@@ -313,10 +313,11 @@ the returned `time_info`.
 function setup_time_info(t_input, n_variables, code_time, dt_reload,
                          dt_before_last_fail_reload, composition,
                          manufactured_solns_input, io_input, input_dict; electron=nothing)
+    code_time = mk_float(code_time)
     rk_coefs, rk_coefs_implicit, implicit_coefficient_is_zero, n_rk_stages, rk_order,
     adaptive, low_storage, CFL_prefactor =
         setup_runge_kutta_coefficients!(t_input["type"],
-                                        t_input["CFL_prefactor"],
+                                        mk_float(t_input["CFL_prefactor"]),
                                         t_input["split_operators"])
 
     if !adaptive
@@ -343,16 +344,16 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload,
     end
 
     t = Ref(code_time)
-    dt = Ref(dt_reload === nothing ? t_input["dt"] : dt_reload)
+    dt = Ref(dt_reload === nothing ? mk_float(t_input["dt"]) : dt_reload)
     previous_dt = Ref(dt[])
     dt_before_output = Ref(dt[])
-    dt_before_last_fail = Ref(dt_before_last_fail_reload === nothing ? Inf : dt_before_last_fail_reload)
+    dt_before_last_fail = Ref(dt_before_last_fail_reload === nothing ? mk_float(Inf) : dt_before_last_fail_reload)
     step_to_moments_output = Ref(false)
     step_to_dfns_output = Ref(false)
     write_moments_output = Ref(false)
     write_dfns_output = Ref(false)
 
-    end_time = code_time + t_input["dt"] * t_input["nstep"]
+    end_time = mk_float(code_time + t_input["dt"] * t_input["nstep"])
     epsilon = 1.e-11
     if adaptive || t_input["write_after_fixed_step_count"]
         if t_input["nwrite"] == 0
@@ -423,7 +424,7 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload,
         end
         decrease_dt_iteration_threshold = t_input["decrease_dt_iteration_threshold"]
         increase_dt_iteration_threshold = t_input["increase_dt_iteration_threshold"]
-        cap_factor_ion_dt = t_input["cap_factor_ion_dt"]
+        cap_factor_ion_dt = mk_float(t_input["cap_factor_ion_dt"])
         electron_t_params = nothing
     elseif electron === false
         debug_io = nothing
@@ -439,28 +440,31 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload,
         electron_t_params = electron
     end
     return time_info(n_variables, t_input["nstep"], end_time, t, dt, previous_dt,
-                     dt_before_output, dt_before_last_fail, CFL_prefactor,
+                     dt_before_output, dt_before_last_fail, mk_float(CFL_prefactor),
                      step_to_moments_output, step_to_dfns_output, write_moments_output,
                      write_dfns_output, Ref(0), Ref(0), Ref{mk_float}(0.0), Ref(0),
                      Ref(0), Ref(0), mk_int[], mk_int[], t_input["nwrite"],
                      t_input["nwrite_dfns"], moments_output_times, dfns_output_times,
                      t_input["type"], rk_coefs, rk_coefs_implicit,
                      implicit_coefficient_is_zero, n_rk_stages, rk_order, adaptive,
-                     low_storage, t_input["rtol"], t_input["atol"], t_input["atol_upar"],
-                     t_input["step_update_prefactor"], t_input["max_increase_factor"],
-                     t_input["max_increase_factor_near_last_fail"],
-                     t_input["last_fail_proximity_factor"], t_input["minimum_dt"],
-                     t_input["maximum_dt"],
+                     low_storage, mk_float(t_input["rtol"]), mk_float(t_input["atol"]),
+                     mk_float(t_input["atol_upar"]),
+                     mk_float(t_input["step_update_prefactor"]),
+                     mk_float(t_input["max_increase_factor"]),
+                     mk_float(t_input["max_increase_factor_near_last_fail"]),
+                     mk_float(t_input["last_fail_proximity_factor"]),
+                     mk_float(t_input["minimum_dt"]), mk_float(t_input["maximum_dt"]),
                      electron !== nothing && t_input["implicit_braginskii_conduction"],
                      electron !== nothing && t_input["implicit_electron_advance"],
                      electron !== nothing && t_input["implicit_ion_advance"],
                      electron !== nothing && t_input["implicit_vpa_advection"],
                      electron !== nothing && t_input["implicit_electron_ppar"],
-                     t_input["constraint_forcing_rate"], decrease_dt_iteration_threshold,
-                     increase_dt_iteration_threshold, cap_factor_ion_dt,
-                     t_input["write_after_fixed_step_count"], error_sum_zero,
-                     t_input["split_operators"], t_input["steady_state_residual"],
-                     t_input["converged_residual_value"],
+                     mk_float(t_input["constraint_forcing_rate"]),
+                     decrease_dt_iteration_threshold, increase_dt_iteration_threshold,
+                     mk_float(cap_factor_ion_dt), t_input["write_after_fixed_step_count"],
+                     error_sum_zero, t_input["split_operators"],
+                     t_input["steady_state_residual"],
+                     mk_float(t_input["converged_residual_value"]),
                      manufactured_solns_input.use_for_advance, t_input["stopfile_name"],
                      debug_io, electron_t_params)
 end

From 692e8debf929f07c3c3bba4f7462d975a0ff0333 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Thu, 24 Oct 2024 21:38:42 +0100
Subject: [PATCH 09/43] Functions to calculate Jacobians for ADI preconditioner

A variation on the 'alternating direction implicit' (ADI) method might
be useful as a preconditioner. It will require split Jacobians where
some 'implicit' parts only couple a subset of dimensions (i.e. z- or
velocity-dimensions), while 'explicit' parts may couple all dimensions
(but may also have some terms removed to make the matrix more sparse for
numerical efficiency).
---
 .../src/electron_fluid_equations.jl           | 159 +++++-
 .../src/electron_kinetic_equation.jl          | 523 ++++++++++++++++--
 moment_kinetics/src/electron_vpa_advection.jl | 211 +++++--
 moment_kinetics/src/electron_z_advection.jl   | 159 ++++--
 moment_kinetics/src/external_sources.jl       | 153 ++++-
 moment_kinetics/src/krook_collisions.jl       | 108 +++-
 moment_kinetics/src/moment_constraints.jl     |  98 +++-
 7 files changed, 1186 insertions(+), 225 deletions(-)

diff --git a/moment_kinetics/src/electron_fluid_equations.jl b/moment_kinetics/src/electron_fluid_equations.jl
index 9268622a1..dec0aff8f 100644
--- a/moment_kinetics/src/electron_fluid_equations.jl
+++ b/moment_kinetics/src/electron_fluid_equations.jl
@@ -358,8 +358,8 @@ function add_electron_energy_equation_to_Jacobian!(jacobian_matrix, f, dens, upa
                                                    vth, third_moment, ddens_dz, dupar_dz,
                                                    dppar_dz, dthird_moment_dz, collisions,
                                                    composition, z, vperp, vpa, z_spectral,
-                                                   num_diss_params, dt, ir; f_offset=0,
-                                                   ppar_offset=0)
+                                                   num_diss_params, dt, ir, include=:all;
+                                                   f_offset=0, ppar_offset=0)
     if f_offset == ppar_offset
         error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar "
               * "cannot be in same place in state vector.")
@@ -367,6 +367,7 @@ function add_electron_energy_equation_to_Jacobian!(jacobian_matrix, f, dens, upa
     @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
     @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big")
     @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n || error("ppar_offset=$ppar_offset is too big")
+    @boundscheck include ∈ (:all, :explicit_z, :explicit_v) || error("Unexpected value for include=$include")
 
     if composition.electron_physics == kinetic_electrons_with_temperature_equation
         error("kinetic_electrons_with_temperature_equation not "
@@ -418,13 +419,102 @@ function add_electron_energy_equation_to_Jacobian!(jacobian_matrix, f, dens, upa
         #     = (2 * sqrt(2) * p^(3/2) / n^(1/2) / me^(1/2))[irowz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[irowz,icolz]
         #       + sqrt(2) * (-p^(3/2) / n^(3/2) / me^(1/2) * dn/dz + 3.0 * p^(1/2) / n^(1/2) / me^(1/2) * dp/dz)[irowz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * delta[irowz,icolz]
 
+        # upar*dppar_dz
+        z_deriv_row_startind = z_deriv_matrix.rowptr[iz]
+        z_deriv_row_endind = z_deriv_matrix.rowptr[iz+1] - 1
+        z_deriv_colinds = @view z_deriv_matrix.colval[z_deriv_row_startind:z_deriv_row_endind]
+        z_deriv_row_nonzeros = @view z_deriv_matrix.nzval[z_deriv_row_startind:z_deriv_row_endind]
+        if include ∈ (:all, :explicit_z)
+            for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros)
+                col = ppar_offset + icolz
+                jacobian_matrix[row,col] +=
+                    dt * upar[iz] * z_deriv_entry
+            end
+        end
+
+        # 3*ppar*dupar_dz
+        if include === :all
+            jacobian_matrix[row,row] += 3.0 * dt * dupar_dz[iz]
+        end
+
+        # terms from d(qpar)/dz
+        if include === :all
+            jacobian_matrix[row,row] +=
+                dt * (3.0 * sqrt(2.0 * ppar[iz] / dens[iz] / me) * dthird_moment_dz[iz]
+                      - 1.5 * sqrt(2.0 * ppar[iz] / me) / dens[iz]^1.5 * third_moment[iz] * ddens_dz[iz]
+                      + 1.5 * sqrt(2.0 / ppar[iz] / dens[iz] / me) * third_moment[iz] * dppar_dz[iz])
+        end
+        if include ∈ (:all, :explicit_z)
+            for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros)
+                col = ppar_offset + icolz
+                jacobian_matrix[row,col] += dt * 3.0 * sqrt(2.0 * ppar[iz] / dens[iz] / me) * third_moment[iz] * z_deriv_entry
+            end
+        end
+        if include ∈ (:all, :explicit_v)
+            for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n
+                col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset
+                jacobian_matrix[row,col] += dt * (-(ppar[iz]/dens[iz])^1.5*sqrt(2.0/me)*ddens_dz[iz]
+                                                  + 3.0*sqrt(2.0*ppar[iz]/dens[iz]/me)*dppar_dz[iz]) *
+                                                 vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3
+            end
+        end
+        for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros), icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n
+            col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset
+            jacobian_matrix[row,col] += dt * 2.0*ppar[iz]^1.5*sqrt(2.0/dens[iz]/me) *
+                                             vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_entry
+        end
+    end
+
+    return nothing
+end
+
+function add_electron_energy_equation_to_z_only_Jacobian!(
+        jacobian_matrix, dens, upar, ppar, vth, third_moment, ddens_dz, dupar_dz,
+        dppar_dz, dthird_moment_dz, collisions, composition, z, vperp, vpa, z_spectral,
+        num_diss_params, dt, ir)
+
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    @boundscheck size(jacobian_matrix, 1) == z.n || error("Jacobian matrix size is wrong")
+
+    if composition.electron_physics == kinetic_electrons_with_temperature_equation
+        error("kinetic_electrons_with_temperature_equation not "
+              * "supported yet in preconditioner")
+    elseif composition.electron_physics != kinetic_electrons
+        error("Unsupported electron_physics=$(composition.electron_physics) "
+              * "in electron_backward_euler!() preconditioner.")
+    end
+    if num_diss_params.electron.moment_dissipation_coefficient > 0.0
+        error("z-diffusion of electron_ppar not yet supported in "
+              * "preconditioner")
+    end
+    if collisions.electron_fluid.nu_ei > 0.0
+        error("electron-ion collision terms for electron_ppar not yet "
+              * "supported in preconditioner")
+    end
+    if composition.n_neutral_species > 0 && collisions.reactions.electron_charge_exchange_frequency > 0.0
+        error("electron 'charge exchange' terms for electron_ppar not yet "
+              * "supported in preconditioner")
+    end
+    if composition.n_neutral_species > 0 && collisions.reactions.electron_ionization_frequency > 0.0
+        error("electron ionization terms for electron_ppar not yet "
+              * "supported in preconditioner")
+    end
+
+    me = composition.me_over_mi
+    z_deriv_matrix = z_spectral.D_matrix_csr
+    v_size = vperp.n * vpa.n
+
+    @loop_z iz begin
+        # Rows corresponding to electron_ppar
+        row = iz
+
         # upar*dppar_dz
         z_deriv_row_startind = z_deriv_matrix.rowptr[iz]
         z_deriv_row_endind = z_deriv_matrix.rowptr[iz+1] - 1
         z_deriv_colinds = @view z_deriv_matrix.colval[z_deriv_row_startind:z_deriv_row_endind]
         z_deriv_row_nonzeros = @view z_deriv_matrix.nzval[z_deriv_row_startind:z_deriv_row_endind]
         for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros)
-            col = ppar_offset + icolz
+            col = icolz
             jacobian_matrix[row,col] +=
                 dt * upar[iz] * z_deriv_entry
         end
@@ -438,20 +528,59 @@ function add_electron_energy_equation_to_Jacobian!(jacobian_matrix, f, dens, upa
                   - 1.5 * sqrt(2.0 * ppar[iz] / me) / dens[iz]^1.5 * third_moment[iz] * ddens_dz[iz]
                   + 1.5 * sqrt(2.0 / ppar[iz] / dens[iz] / me) * third_moment[iz] * dppar_dz[iz])
         for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros)
-            col = ppar_offset + icolz
+            col = icolz
             jacobian_matrix[row,col] += dt * 3.0 * sqrt(2.0 * ppar[iz] / dens[iz] / me) * third_moment[iz] * z_deriv_entry
         end
-        for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n
-            col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset
-            jacobian_matrix[row,col] += dt * (-(ppar[iz]/dens[iz])^1.5*sqrt(2.0/me)*ddens_dz[iz]
-                                              + 3.0*sqrt(2.0*ppar[iz]/dens[iz]/me)*dppar_dz[iz]) *
-                                             vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3
-        end
-        for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros), icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n
-            col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset
-            jacobian_matrix[row,col] += dt * 2.0*ppar[iz]^1.5*sqrt(2.0/dens[iz]/me) *
-                                             vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_entry
-        end
+    end
+
+    return nothing
+end
+
+function add_electron_energy_equation_to_v_only_Jacobian!(
+        jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dupar_dz,
+        dppar_dz, dthird_moment_dz, collisions, composition, z, vperp, vpa, z_spectral,
+        num_diss_params, dt, ir, iz)
+
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    @boundscheck size(jacobian_matrix, 1) == vperp.n * vpa.n + 1 || error("Jacobian matrix size is wrong")
+
+    if composition.electron_physics == kinetic_electrons_with_temperature_equation
+        error("kinetic_electrons_with_temperature_equation not "
+              * "supported yet in preconditioner")
+    elseif composition.electron_physics != kinetic_electrons
+        error("Unsupported electron_physics=$(composition.electron_physics) "
+              * "in electron_backward_euler!() preconditioner.")
+    end
+    if num_diss_params.electron.moment_dissipation_coefficient > 0.0
+        error("z-diffusion of electron_ppar not yet supported in "
+              * "preconditioner")
+    end
+    if collisions.electron_fluid.nu_ei > 0.0
+        error("electron-ion collision terms for electron_ppar not yet "
+              * "supported in preconditioner")
+    end
+    if composition.n_neutral_species > 0 && collisions.reactions.electron_charge_exchange_frequency > 0.0
+        error("electron 'charge exchange' terms for electron_ppar not yet "
+              * "supported in preconditioner")
+    end
+    if composition.n_neutral_species > 0 && collisions.reactions.electron_ionization_frequency > 0.0
+        error("electron ionization terms for electron_ppar not yet "
+              * "supported in preconditioner")
+    end
+
+    me = composition.me_over_mi
+
+    jacobian_matrix[end,end] += 3.0 * dt * dupar_dz
+
+    jacobian_matrix[end,end] +=
+        dt * (3.0 * sqrt(2.0 * ppar / dens / me) * dthird_moment_dz
+              - 1.5 * sqrt(2.0 * ppar / me) / dens^1.5 * third_moment * ddens_dz
+              + 1.5 * sqrt(2.0 / ppar / dens / me) * third_moment * dppar_dz)
+    for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n
+        col = (icolvperp - 1) * vpa.n + icolvpa
+        jacobian_matrix[end,col] += dt * (-(ppar/dens)^1.5*sqrt(2.0/me)*ddens_dz
+                                          + 3.0*sqrt(2.0*ppar/dens/me)*dppar_dz) *
+                                         vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3
     end
 
     return nothing
diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index 47033e875..54021ced6 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -29,23 +29,34 @@ using ..electron_fluid_equations: calculate_electron_moments!,
 using ..electron_fluid_equations: electron_energy_equation!,
                                   electron_energy_equation_no_r!,
                                   add_electron_energy_equation_to_Jacobian!,
+                                  add_electron_energy_equation_to_v_only_Jacobian!,
+                                  add_electron_energy_equation_to_z_only_Jacobian!,
                                   electron_energy_residual!
 using ..electron_z_advection: electron_z_advection!, update_electron_speed_z!,
-                              add_electron_z_advection_to_Jacobian!
+                              add_electron_z_advection_to_Jacobian!,
+                              add_electron_z_advection_to_v_only_Jacobian!,
+                              add_electron_z_advection_to_z_only_Jacobian!
 using ..electron_vpa_advection: electron_vpa_advection!, update_electron_speed_vpa!,
-                                add_electron_vpa_advection_to_Jacobian!
+                                add_electron_vpa_advection_to_Jacobian!,
+                                add_electron_vpa_advection_to_v_only_Jacobian!
 using ..em_fields: update_phi!
 using ..external_sources: total_external_electron_sources!,
-                          add_total_external_electron_source_to_Jacobian!
+                          add_total_external_electron_source_to_Jacobian!,
+                          add_total_external_electron_source_to_v_only_Jacobian!,
+                          add_total_external_electron_source_to_z_only_Jacobian!
 using ..file_io: get_electron_io_info, write_electron_state, finish_electron_io
 using ..krook_collisions: electron_krook_collisions!, get_collision_frequency_ee,
                           get_collision_frequency_ei,
-                          add_electron_krook_collisions_to_Jacobian!
+                          add_electron_krook_collisions_to_Jacobian!,
+                          add_electron_krook_collisions_to_v_only_Jacobian!,
+                          add_electron_krook_collisions_to_z_only_Jacobian!
 using ..timer_utils
 using ..moment_constraints: hard_force_moment_constraints!,
                             moment_constraints_on_residual!,
                             electron_implicit_constraint_forcing!,
-                            add_electron_implicit_constraint_forcing_to_Jacobian!
+                            add_electron_implicit_constraint_forcing_to_Jacobian!,
+                            add_electron_implicit_constraint_forcing_to_v_only_Jacobian!,
+                            add_electron_implicit_constraint_forcing_to_z_only_Jacobian!
 using ..moment_kinetics_structs: scratch_pdf, scratch_electron_pdf, electron_pdf_substruct
 using ..nonlinear_solvers
 using ..runge_kutta: rk_update_variable!, rk_loworder_solution!, local_error_norm,
@@ -2788,7 +2799,7 @@ end
                                              vpa_spectral, z_advect, vpa_advect,
                                              scratch_dummy, external_source_settings,
                                              num_diss_params, t_params, ion_dt,
-                                             ir, evolve_ppar)
+                                             ir, evolve_ppar, include=:all)
 
 Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equation and (if
 `evolve_ppar=true`) the electron energy equation.
@@ -2797,7 +2808,8 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati
                          jacobian_matrix, f, ppar, moments, collisions, composition, z,
                          vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect,
                          vpa_advect, scratch_dummy, external_source_settings,
-                         num_diss_params, t_params, ion_dt, ir, evolve_ppar) = begin
+                         num_diss_params, t_params, ion_dt, ir, evolve_ppar,
+                         include=:all) = begin
     dt = t_params.dt[]
 
     buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1]
@@ -2836,10 +2848,11 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati
     @loop_z_vperp_vpa iz ivperp ivpa begin
         # Rows corresponding to pdf_electron
         row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa
-        v_remainder = (ivperp - 1) * vpa.n + ivpa
 
         jacobian_matrix[row,:] .= 0.0
-        jacobian_matrix[row,row] += 1.0
+        if include === :all
+            jacobian_matrix[row,row] += 1.0
+        end
     end
     begin_z_region()
     @loop_z iz begin
@@ -2847,44 +2860,286 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati
         row = pdf_size + iz
 
         jacobian_matrix[row,:] .= 0.0
-        jacobian_matrix[row,row] += 1.0
+        if include === :all
+            jacobian_matrix[row,row] += 1.0
+        end
     end
 
     z_speed = @view z_advect[1].speed[:,:,:,ir]
 
+    if include ∈ (:all, :explicit_v)
+        dpdf_dz = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir]
+        begin_vperp_vpa_region()
+        update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir)
+        @loop_vperp_vpa ivperp ivpa begin
+            @views z_advect[1].adv_fac[:,ivpa,ivperp,ir] = -z_speed[:,ivpa,ivperp]
+        end
+        #calculate the upwind derivative
+        @views derivative_z_pdf_vpavperpz!(dpdf_dz, f, z_advect[1].adv_fac[:,:,:,ir],
+                                           scratch_dummy.buffer_vpavperpr_1[:,:,ir],
+                                           scratch_dummy.buffer_vpavperpr_2[:,:,ir],
+                                           scratch_dummy.buffer_vpavperpr_3[:,:,ir],
+                                           scratch_dummy.buffer_vpavperpr_4[:,:,ir],
+                                           scratch_dummy.buffer_vpavperpr_5[:,:,ir],
+                                           scratch_dummy.buffer_vpavperpr_6[:,:,ir],
+                                           z_spectral, z)
+    else
+        dpdf_dz = nothing
+    end
+
+    dpdf_dvpa = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir]
+    begin_z_vperp_region()
+    update_electron_speed_vpa!(vpa_advect[1], dens, upar, ppar, moments, vpa.grid,
+                               external_source_settings.electron, ir)
+    @loop_z_vperp iz ivperp begin
+        @views @. vpa_advect[1].adv_fac[:,ivperp,iz,ir] = -vpa_advect[1].speed[:,ivperp,iz,ir]
+    end
+    #calculate the upwind derivative of the electron pdf w.r.t. wpa
+    @loop_z_vperp iz ivperp begin
+        @views derivative!(dpdf_dvpa[:,ivperp,iz], f[:,ivperp,iz], vpa,
+                           vpa_advect[1].adv_fac[:,ivperp,iz,ir], vpa_spectral)
+    end
+
+    zeroth_moment = z.scratch_shared
+    first_moment = z.scratch_shared2
+    second_moment = z.scratch_shared3
+    begin_z_region()
+    vpa_grid = vpa.grid
+    vpa_wgts = vpa.wgts
+    @loop_z iz begin
+        @views zeroth_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_wgts)
+        @views first_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_grid, vpa_wgts)
+        @views second_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_grid, 2, vpa_wgts)
+    end
+
     add_electron_z_advection_to_Jacobian!(
-        jacobian_matrix, f, dens, upar, ppar, vth, me, z, vperp, vpa, z_spectral,
-        z_advect, scratch_dummy, dt, ir; ppar_offset=pdf_size)
+        jacobian_matrix, f, dens, upar, ppar, vth, dpdf_dz, me, z, vperp, vpa, z_spectral,
+        z_advect, z_speed, scratch_dummy, dt, ir, include; ppar_offset=pdf_size)
     add_electron_vpa_advection_to_Jacobian!(
-        jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz,
-        dthird_moment_dz, moments, me, z, vperp, vpa, z_spectral, vpa_spectral,
-        vpa_advect, z_speed, scratch_dummy, external_source_settings, dt, ir;
+        jacobian_matrix, f, dens, upar, ppar, vth, third_moment, dpdf_dvpa, ddens_dz,
+        dppar_dz, dthird_moment_dz, moments, me, z, vperp, vpa, z_spectral, vpa_spectral,
+        vpa_advect, z_speed, scratch_dummy, external_source_settings, dt, ir, include;
         ppar_offset=pdf_size)
     add_contribution_from_electron_pdf_term_to_Jacobian!(
         jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz,
         dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z,
-        vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir; ppar_offset=pdf_size)
+        vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir, include;
+        ppar_offset=pdf_size)
     add_electron_dissipation_term_to_Jacobian!(
-        jacobian_matrix, f, num_diss_params, z, vperp, vpa, vpa_spectral, z_speed, dt, ir)
+        jacobian_matrix, f, num_diss_params, z, vperp, vpa, vpa_spectral, z_speed, dt, ir,
+        include)
     add_electron_krook_collisions_to_Jacobian!(
         jacobian_matrix, f, dens, upar, ppar, vth, upar_ion, collisions, z, vperp, vpa,
-        z_speed, dt, ir; ppar_offset=pdf_size)
+        z_speed, dt, ir, include; ppar_offset=pdf_size)
     add_total_external_electron_source_to_Jacobian!(
         jacobian_matrix, f, moments, me, z_speed, external_source_settings.electron, z,
-        vperp, vpa, dt, ir; ppar_offset=pdf_size)
+        vperp, vpa, dt, ir, include; ppar_offset=pdf_size)
     add_electron_implicit_constraint_forcing_to_Jacobian!(
-        jacobian_matrix, f, z_speed, z, vperp, vpa, t_params.constraint_forcing_rate, dt,
-        ir)
+        jacobian_matrix, f, zeroth_moment, first_moment, second_moment, z_speed, z, vperp,
+        vpa, t_params.constraint_forcing_rate, dt, ir, include)
     # Always add the electron energy equation term, even if evolve_ppar=false, so that the
     # Jacobian matrix always has the same shape, meaning that we can always reuse the LU
     # factorization struct.
     add_electron_energy_equation_to_Jacobian!(
         jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dupar_dz,
         dppar_dz, dthird_moment_dz, collisions, composition, z, vperp, vpa, z_spectral,
-        num_diss_params, dt, ir; ppar_offset=pdf_size)
+        num_diss_params, dt, ir, include; ppar_offset=pdf_size)
     if ion_dt !== nothing
         add_ion_dt_forcing_of_electron_ppar_to_Jacobian!(
-            jacobian_matrix, z, dt, ion_dt, ir; ppar_offset=pdf_size)
+            jacobian_matrix, z, dt, ion_dt, ir, include; ppar_offset=pdf_size)
+    end
+
+    return nothing
+end
+
+"""
+    fill_electron_kinetic_equation_v_only_Jacobian!(jacobian_matrix, f, ppar, moments,
+                                                    collisions, composition, z, vperp,
+                                                    vpa, z_spectral, vperp_specral,
+                                                    vpa_spectral, z_advect, vpa_advect,
+                                                    scratch_dummy,
+                                                    external_source_settings,
+                                                    num_diss_params, t_params, ion_dt, ir,
+                                                    iz, evolve_ppar, include=:all)
+
+Fill a pre-allocated matrix with the Jacobian matrix for a velocity-space solve part of
+the ADI method for electron kinetic equation and (if `evolve_ppar=true`) the electron
+energy equation.
+"""
+@timeit global_timer fill_electron_kinetic_equation_v_only_Jacobian!(
+                         jacobian_matrix, f, ppar, dpdf_dz, dpdf_dvpa, z_speed, moments,
+                         zeroth_moment, first_moment, second_moment, third_moment,
+                         dthird_moment_dz, collisions, composition, z, vperp, vpa,
+                         z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect,
+                         scratch_dummy, external_source_settings, num_diss_params,
+                         t_params, ion_dt, ir, iz, evolve_ppar) = begin
+    dt = t_params.dt[]
+
+    vth = moments.electron.vth[iz,ir]
+    me = composition.me_over_mi
+    dens = moments.electron.dens[iz,ir]
+    upar = moments.electron.upar[iz,ir]
+    qpar = moments.electron.qpar[iz,ir]
+    ddens_dz = moments.electron.ddens_dz[iz,ir]
+    dupar_dz = moments.electron.dupar_dz[iz,ir]
+    dppar_dz = moments.electron.dppar_dz[iz,ir]
+    dvth_dz = moments.electron.dvth_dz[iz,ir]
+    dqpar_dz = moments.electron.dqpar_dz[iz,ir]
+
+    upar_ion = moments.ion.upar[iz,ir,1]
+
+    pdf_size = z.n * vperp.n * vpa.n
+    v_size = vperp.n * vpa.n
+
+    # Initialise jacobian_matrix to the identity
+    for row ∈ 1:size(jacobian_matrix, 1)
+        jacobian_matrix[row,:] .= 0.0
+        jacobian_matrix[row,row] += 1.0
+    end
+
+    add_electron_z_advection_to_v_only_Jacobian!(
+        jacobian_matrix, f, dens, upar, ppar, vth, dpdf_dz, me, z, vperp, vpa, z_spectral,
+        z_advect, z_speed, scratch_dummy, dt, ir, iz)
+    add_electron_vpa_advection_to_v_only_Jacobian!(
+        jacobian_matrix, f, dens, upar, ppar, vth, third_moment, dpdf_dvpa, ddens_dz,
+        dppar_dz, dthird_moment_dz, moments, me, z, vperp, vpa, z_spectral, vpa_spectral,
+        vpa_advect, z_speed, scratch_dummy, external_source_settings, dt, ir, iz)
+    add_contribution_from_electron_pdf_term_to_v_only_Jacobian!(
+        jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz,
+        dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z,
+        vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir, iz)
+    add_electron_dissipation_term_to_v_only_Jacobian!(
+        jacobian_matrix, f, num_diss_params, z, vperp, vpa, vpa_spectral, z_speed, dt, ir,
+        iz)
+    add_electron_krook_collisions_to_v_only_Jacobian!(
+        jacobian_matrix, f, dens, upar, ppar, vth, upar_ion, collisions, z, vperp, vpa,
+        z_speed, dt, ir, iz)
+    add_total_external_electron_source_to_v_only_Jacobian!(
+        jacobian_matrix, f, moments, me, z_speed, external_source_settings.electron, z,
+        vperp, vpa, dt, ir, iz)
+    add_electron_implicit_constraint_forcing_to_v_only_Jacobian!(
+        jacobian_matrix, f, zeroth_moment, first_moment, second_moment, z_speed, z, vperp,
+        vpa, t_params.constraint_forcing_rate, dt, ir, iz)
+    # Always add the electron energy equation term, even if evolve_ppar=false, so that the
+    # Jacobian matrix always has the same shape, meaning that we can always reuse the LU
+    # factorization struct.
+    add_electron_energy_equation_to_v_only_Jacobian!(
+        jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dupar_dz,
+        dppar_dz, dthird_moment_dz, collisions, composition, z, vperp, vpa, z_spectral,
+        num_diss_params, dt, ir, iz)
+    if ion_dt !== nothing
+        add_ion_dt_forcing_of_electron_ppar_to_v_only_Jacobian!(
+            jacobian_matrix, z, dt, ion_dt, ir, iz)
+    end
+
+    return nothing
+end
+
+"""
+    fill_electron_kinetic_equation_z_only_Jacobian_f!(
+        jacobian_matrix, f, ppar, dpdf_dz, dpdf_dvpa, z_speed, moments, zeroth_moment,
+        first_moment, second_moment, third_moment, dthird_moment_dz, collisions,
+        composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect,
+        vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params,
+        ion_dt, ir, ivperp, ivpa, evolve_ppar)
+
+Fill a pre-allocated matrix with the Jacobian matrix for a z-direction solve part of the
+ADI method for electron kinetic equation and (if `evolve_ppar=true`) the electron energy
+equation.
+"""
+@timeit global_timer fill_electron_kinetic_equation_z_only_Jacobian_f!(
+                         jacobian_matrix, f, ppar, dpdf_dz, dpdf_dvpa, z_speed, moments,
+                         zeroth_moment, first_moment, second_moment, third_moment,
+                         dthird_moment_dz, collisions, composition, z, vperp, vpa,
+                         z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect,
+                         scratch_dummy, external_source_settings, num_diss_params,
+                         t_params, ion_dt, ir, ivperp, ivpa, evolve_ppar) = begin
+    dt = t_params.dt[]
+
+    vth = @view moments.electron.vth[:,ir]
+    me = composition.me_over_mi
+    dens = @view moments.electron.dens[:,ir]
+    upar = @view moments.electron.upar[:,ir]
+    qpar = @view moments.electron.qpar[:,ir]
+    ddens_dz = @view moments.electron.ddens_dz[:,ir]
+    dupar_dz = @view moments.electron.dupar_dz[:,ir]
+    dppar_dz = @view moments.electron.dppar_dz[:,ir]
+    dvth_dz = @view moments.electron.dvth_dz[:,ir]
+    dqpar_dz = @view moments.electron.dqpar_dz[:,ir]
+
+    upar_ion = @view moments.ion.upar[:,ir,1]
+
+    pdf_size = z.n * vperp.n * vpa.n
+    v_size = vperp.n * vpa.n
+
+    # Initialise jacobian_matrix to the identity
+    for row ∈ 1:size(jacobian_matrix, 1)
+        jacobian_matrix[row,:] .= 0.0
+        jacobian_matrix[row,row] += 1.0
+    end
+
+    add_electron_z_advection_to_z_only_Jacobian!(
+        jacobian_matrix, f, dens, upar, ppar, vth, dpdf_dz, me, z, vperp, vpa, z_spectral,
+        z_advect, z_speed, scratch_dummy, dt, ir, ivperp, ivpa)
+    add_contribution_from_electron_pdf_term_to_z_only_Jacobian!(
+        jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz,
+        dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z,
+        vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir, ivperp, ivpa)
+    add_electron_krook_collisions_to_z_only_Jacobian!(
+        jacobian_matrix, f, dens, upar, ppar, vth, upar_ion, collisions, z, vperp, vpa,
+        z_speed, dt, ir, ivperp, ivpa)
+    add_total_external_electron_source_to_z_only_Jacobian!(
+        jacobian_matrix, f, moments, me, z_speed, external_source_settings.electron, z,
+        vperp, vpa, dt, ir, ivperp, ivpa)
+    add_electron_implicit_constraint_forcing_to_z_only_Jacobian!(
+        jacobian_matrix, f, zeroth_moment, first_moment, second_moment, z_speed, z, vperp,
+        vpa, t_params.constraint_forcing_rate, dt, ir, ivperp, ivpa)
+
+    return nothing
+end
+
+"""
+    fill_electron_kinetic_equation_z_only_Jacobian_ppar!(
+        jacobian_matrix, ppar, moments, zeroth_moment, first_moment, second_moment,
+        third_moment, dthird_moment_dz, collisions, composition, z, vperp, vpa,
+        z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy,
+        external_source_settings, num_diss_params, t_params, ion_dt, ir, evolve_ppar)
+
+Fill a pre-allocated matrix with the Jacobian matrix for a z-direction solve part of the
+ADI method for electron kinetic equation and (if `evolve_ppar=true`) the electron energy
+equation.
+"""
+@timeit global_timer fill_electron_kinetic_equation_z_only_Jacobian_ppar!(
+                         jacobian_matrix, ppar, moments, zeroth_moment, first_moment,
+                         second_moment, third_moment, dthird_moment_dz, collisions,
+                         composition, z, vperp, vpa, z_spectral, vperp_spectral,
+                         vpa_spectral, z_advect, vpa_advect, scratch_dummy,
+                         external_source_settings, num_diss_params, t_params, ion_dt, ir,
+                         evolve_ppar) = begin
+    dt = t_params.dt[]
+
+    vth = @view moments.electron.vth[:,ir]
+    dens = @view moments.electron.dens[:,ir]
+    upar = @view moments.electron.upar[:,ir]
+    ddens_dz = @view moments.electron.ddens_dz[:,ir]
+    dupar_dz = @view moments.electron.dupar_dz[:,ir]
+    dppar_dz = @view moments.electron.dppar_dz[:,ir]
+
+    pdf_size = z.n * vperp.n * vpa.n
+
+    # Initialise jacobian_matrix to the identity
+    for row ∈ 1:size(jacobian_matrix, 1)
+        jacobian_matrix[row,:] .= 0.0
+        jacobian_matrix[row,row] += 1.0
+    end
+
+    add_electron_energy_equation_to_z_only_Jacobian!(
+        jacobian_matrix, dens, upar, ppar, vth, third_moment, ddens_dz, dupar_dz,
+        dppar_dz, dthird_moment_dz, collisions, composition, z, vperp, vpa, z_spectral,
+        num_diss_params, dt, ir)
+    if ion_dt !== nothing
+        add_ion_dt_forcing_of_electron_ppar_to_z_only_Jacobian!(
+            jacobian_matrix, z, dt, ion_dt, ir)
     end
 
     return nothing
@@ -3256,9 +3511,10 @@ end
 
 function add_electron_dissipation_term_to_Jacobian!(jacobian_matrix, f, num_diss_params,
                                                     z, vperp, vpa, vpa_spectral, z_speed,
-                                                    dt, ir; f_offset=0)
+                                                    dt, ir, include=:all; f_offset=0)
     @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
     @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big")
+    @boundscheck include ∈ (:all, :explicit_z, :explicit_v) || error("Unexpected value for include=$include")
 
     vpa_dissipation_coefficient = num_diss_params.electron.vpa_dissipation_coefficient
 
@@ -3278,9 +3534,44 @@ function add_electron_dissipation_term_to_Jacobian!(jacobian_matrix, f, num_diss
         # Rows corresponding to pdf_electron
         row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset
 
+        # Terms from add_dissipation_term!()
+        if include ∈ (:all, :explicit_v)
+            for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n
+                col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset
+                jacobian_matrix[row,col] -= dt * vpa_dissipation_coefficient * vpa_dense_second_deriv_matrix[ivpa,icolvpa]
+            end
+        end
+    end
+
+    return nothing
+end
+
+function add_electron_dissipation_term_to_v_only_Jacobian!(
+        jacobian_matrix, f, num_diss_params, z, vperp, vpa, vpa_spectral, z_speed, dt, ir,
+        iz)
+
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    @boundscheck size(jacobian_matrix, 1) == vperp.n * vpa.n + 1 || error("Jacobian matrix size is wrong")
+
+    vpa_dissipation_coefficient = num_diss_params.electron.vpa_dissipation_coefficient
+
+    if vpa_dissipation_coefficient ≤ 0.0
+        return nothing
+    end
+
+    vpa_dense_second_deriv_matrix = vpa_spectral.dense_second_deriv_matrix
+
+    @loop_vperp_vpa ivperp ivpa begin
+        if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed)
+            continue
+        end
+
+        # Rows corresponding to pdf_electron
+        row = (ivperp - 1) * vpa.n + ivpa
+
         # Terms from add_dissipation_term!()
         for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n
-            col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset
+            col = (icolvperp - 1) * vpa.n + icolvpa
             jacobian_matrix[row,col] -= dt * vpa_dissipation_coefficient * vpa_dense_second_deriv_matrix[ivpa,icolvpa]
         end
     end
@@ -3531,7 +3822,8 @@ end
 function add_contribution_from_electron_pdf_term_to_Jacobian!(
         jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz,
         dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z,
-        vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir; f_offset=0, ppar_offset=0)
+        vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir, include=:all; f_offset=0,
+        ppar_offset=0)
 
     if f_offset == ppar_offset
         error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar "
@@ -3540,6 +3832,7 @@ function add_contribution_from_electron_pdf_term_to_Jacobian!(
     @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
     @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big")
     @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n || error("ppar_offset=$ppar_offset is too big")
+    @boundscheck include ∈ (:all, :explicit_z, :explicit_v) || error("Unexpected value for include=$include")
 
     source_density_amplitude = moments.electron.external_source_density_amplitude
     source_momentum_amplitude = moments.electron.external_source_momentum_amplitude
@@ -3591,15 +3884,19 @@ function add_contribution_from_electron_pdf_term_to_Jacobian!(
         #   (3/4*sqrt(2/me)/p^(1/2)/n^(3/2)*dn/dz + 1/2/sqrt(2*n*me)/p^(3/2)*dp/dz)[irowz] * delta(irowz,icolz)
         #   -1/sqrt(2*p*n*me)[irowz] * z_deriv_matrix[irowz,icolz]
         #
-        jacobian_matrix[row,row] += dt * (0.5 * dqpar_dz[iz] / ppar[iz]
-                                          + vpa.grid[ivpa] * vth[iz] * (ddens_dz[iz] / dens[iz]
-                                                                        - dvth_dz[iz] / vth[iz]))
-        for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n
-            col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset
-            jacobian_matrix[row,col] +=
-                dt * f[ivpa,ivperp,iz] *
-                (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dppar_dz[iz] - 0.5*sqrt(2.0*ppar[iz]/me)/dens[iz]^1.5*ddens_dz[iz]) *
-                vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3
+        if include === :all
+            jacobian_matrix[row,row] += dt * (0.5 * dqpar_dz[iz] / ppar[iz]
+                                              + vpa.grid[ivpa] * vth[iz] * (ddens_dz[iz] / dens[iz]
+                                                                            - dvth_dz[iz] / vth[iz]))
+        end
+        if include ∈ (:all, :explicit_v)
+            for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n
+                col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset
+                jacobian_matrix[row,col] +=
+                    dt * f[ivpa,ivperp,iz] *
+                    (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dppar_dz[iz] - 0.5*sqrt(2.0*ppar[iz]/me)/dens[iz]^1.5*ddens_dz[iz]) *
+                    vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3
+            end
         end
         z_deriv_row_startind = z_deriv_matrix.rowptr[iz]
         z_deriv_row_endind = z_deriv_matrix.rowptr[iz+1] - 1
@@ -3611,6 +3908,61 @@ function add_contribution_from_electron_pdf_term_to_Jacobian!(
                 dt * f[ivpa,ivperp,iz] * vth[iz] *
                 vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_entry
         end
+        if include === :all
+            for index ∈ eachindex(external_source_settings.electron)
+                electron_source = external_source_settings.electron[index]
+                if electron_source.active
+                    # Source terms from `add_contribution_from_pdf_term!()`
+                    jacobian_matrix[row,row] += dt * (1.5 * source_density_amplitude[iz,ir,index] / dens[iz]
+                                                      - (0.5 * source_pressure_amplitude[iz,ir,index]
+                                                         + source_momentum_amplitude[iz,ir,index]) / ppar[iz]
+                                                     )
+                end
+            end
+        end
+        if include ∈ (:all, :explicit_v)
+            jacobian_matrix[row,ppar_offset+iz] +=
+                dt * f[ivpa,ivperp,iz] *
+                (-0.75*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*third_moment[iz]*dppar_dz[iz]
+                 - 0.25*sqrt(2.0/ppar[iz]/me)/dens[iz]^1.5*third_moment[iz]*ddens_dz[iz]
+                 + 0.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dthird_moment_dz[iz]
+                 + vpa.grid[ivpa] * (0.75*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*ddens_dz[iz]
+                                     + 0.5/sqrt(2.0*dens[iz]*me)/ppar[iz]^1.5*dppar_dz[iz]))
+        end
+        for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros)
+            col = ppar_offset + icolz
+            jacobian_matrix[row,col] += dt * f[ivpa,ivperp,iz] *
+                (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*third_moment[iz]
+                 - vpa.grid[ivpa]/sqrt(2.0*ppar[iz]*dens[iz]*me)) * z_deriv_entry
+        end
+    end
+
+    return nothing
+end
+
+function add_contribution_from_electron_pdf_term_to_z_only_Jacobian!(
+        jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz,
+        dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z,
+        vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir, ivperp, ivpa)
+
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    @boundscheck size(jacobian_matrix, 1) == z.n || error("Jacobian matrix size is wrong")
+
+    source_density_amplitude = moments.electron.external_source_density_amplitude
+    source_momentum_amplitude = moments.electron.external_source_momentum_amplitude
+    source_pressure_amplitude = moments.electron.external_source_pressure_amplitude
+
+    @loop_z iz begin
+        if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed)
+            continue
+        end
+
+        # Rows corresponding to pdf_electron
+        row = iz
+
+        jacobian_matrix[row,row] += dt * (0.5 * dqpar_dz[iz] / ppar[iz]
+                                          + vpa.grid[ivpa] * vth[iz] * (ddens_dz[iz] / dens[iz]
+                                                                        - dvth_dz[iz] / vth[iz]))
         for index ∈ eachindex(external_source_settings.electron)
             electron_source = external_source_settings.electron[index]
             if electron_source.active
@@ -3621,33 +3973,93 @@ function add_contribution_from_electron_pdf_term_to_Jacobian!(
                                                  )
             end
         end
-        jacobian_matrix[row,ppar_offset+iz] +=
-            dt * f[ivpa,ivperp,iz] *
-            (-0.75*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*third_moment[iz]*dppar_dz[iz]
-             - 0.25*sqrt(2.0/ppar[iz]/me)/dens[iz]^1.5*third_moment[iz]*ddens_dz[iz]
-             + 0.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dthird_moment_dz[iz]
-             + vpa.grid[ivpa] * (0.75*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*ddens_dz[iz]
-                                 + 0.5/sqrt(2.0*dens[iz]*me)/ppar[iz]^1.5*dppar_dz[iz]))
-        for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros)
-            col = ppar_offset + icolz
-            jacobian_matrix[row,col] += dt * f[ivpa,ivperp,iz] *
-                (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*third_moment[iz]
-                 - vpa.grid[ivpa]/sqrt(2.0*ppar[iz]*dens[iz]*me)) * z_deriv_entry
+    end
+
+    return nothing
+end
+
+function add_contribution_from_electron_pdf_term_to_v_only_Jacobian!(
+        jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz,
+        dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z,
+        vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir, iz)
+
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    @boundscheck size(jacobian_matrix, 1) == vperp.n * vpa.n + 1 || error("Jacobian matrix size is wrong")
+
+    source_density_amplitude = moments.electron.external_source_density_amplitude
+    source_momentum_amplitude = moments.electron.external_source_momentum_amplitude
+    source_pressure_amplitude = moments.electron.external_source_pressure_amplitude
+    z_deriv_matrix = z_spectral.D_matrix_csr
+    v_size = vperp.n * vpa.n
+
+    @loop_vperp_vpa ivperp ivpa begin
+        if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed)
+            continue
         end
+
+        # Rows corresponding to pdf_electron
+        row = (ivperp - 1) * vpa.n + ivpa
+
+        jacobian_matrix[row,row] += dt * (0.5 * dqpar_dz / ppar
+                                          + vpa.grid[ivpa] * vth * (ddens_dz / dens
+                                                                    - dvth_dz / vth))
+        for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n
+            col = (icolvperp - 1) * vpa.n + icolvpa
+            jacobian_matrix[row,col] +=
+                dt * f[ivpa,ivperp] *
+                (1.5*sqrt(2.0/ppar/dens/me)*dppar_dz - 0.5*sqrt(2.0*ppar/me)/dens^1.5*ddens_dz) *
+                vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3
+        end
+        for index ∈ eachindex(external_source_settings.electron)
+            electron_source = external_source_settings.electron[index]
+            if electron_source.active
+                # Source terms from `add_contribution_from_pdf_term!()`
+                jacobian_matrix[row,row] += dt * (1.5 * source_density_amplitude[iz,ir,index] / dens
+                                                  - (0.5 * source_pressure_amplitude[iz,ir,index]
+                                                     + source_momentum_amplitude[iz,ir,index]) / ppar
+                                                 )
+            end
+        end
+        jacobian_matrix[row,end] +=
+            dt * f[ivpa,ivperp] *
+            (-0.75*sqrt(2.0/dens/me)/ppar^1.5*third_moment*dppar_dz
+             - 0.25*sqrt(2.0/ppar/me)/dens^1.5*third_moment*ddens_dz
+             + 0.5*sqrt(2.0/ppar/dens/me)*dthird_moment_dz
+             + vpa.grid[ivpa] * (0.75*sqrt(2.0/me/ppar)/dens^1.5*ddens_dz
+                                 + 0.5/sqrt(2.0*dens*me)/ppar^1.5*dppar_dz))
     end
 
     return nothing
 end
 
 function add_ion_dt_forcing_of_electron_ppar_to_Jacobian!(jacobian_matrix, z, dt, ion_dt,
-                                                          ir; ppar_offset=0)
+                                                          ir, include=:all; ppar_offset=0)
     @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
     @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n || error("ppar_offset=$ppar_offset is too big")
+    @boundscheck include ∈ (:all, :explicit_z, :explicit_v) || error("Unexpected value for include=$include")
+
+    if include === :all
+        begin_z_region()
+        @loop_z iz begin
+            # Rows corresponding to electron_ppar
+            row = ppar_offset + iz
+
+            # Backward-Euler forcing term
+            jacobian_matrix[row,row] += dt / ion_dt
+        end
+    end
+
+    return nothing
+end
+
+function add_ion_dt_forcing_of_electron_ppar_to_z_only_Jacobian!(jacobian_matrix, z, dt,
+                                                                 ion_dt, ir)
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    @boundscheck size(jacobian_matrix, 1) == z.n || error("Jacobian matrix size is wrong")
 
-    begin_z_region()
     @loop_z iz begin
         # Rows corresponding to electron_ppar
-        row = ppar_offset + iz
+        row = iz
 
         # Backward-Euler forcing term
         jacobian_matrix[row,row] += dt / ion_dt
@@ -3656,6 +4068,17 @@ function add_ion_dt_forcing_of_electron_ppar_to_Jacobian!(jacobian_matrix, z, dt
     return nothing
 end
 
+function add_ion_dt_forcing_of_electron_ppar_to_v_only_Jacobian!(jacobian_matrix, z, dt,
+                                                                 ion_dt, ir, iz)
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    #@boundscheck size(jacobian_matrix, 1) == vperp.n * vpa.n + 1 || error("Jacobian matrix size is wrong")
+
+    # Backward-Euler forcing term
+    jacobian_matrix[end,end] += dt / ion_dt
+
+    return nothing
+end
+
 # function check_electron_pdf_convergence!(electron_pdf_converged, pdf_new, pdf)
 #     # check to see if the electron pdf has converged to within the specified tolerance
 #     # NB: the convergence criterion is based on the average relative difference between the
diff --git a/moment_kinetics/src/electron_vpa_advection.jl b/moment_kinetics/src/electron_vpa_advection.jl
index 8e0c92ba4..a9e0fd383 100644
--- a/moment_kinetics/src/electron_vpa_advection.jl
+++ b/moment_kinetics/src/electron_vpa_advection.jl
@@ -95,12 +95,12 @@ function update_electron_speed_vpa!(advect, density, upar, ppar, moments, vpa,
 end
 
 function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, ppar,
-                                                 vth, third_moment, ddens_dz, dppar_dz,
-                                                 dthird_moment_dz, moments, me, z, vperp,
-                                                 vpa, z_spectral, vpa_spectral,
+                                                 vth, third_moment, dpdf_dvpa, ddens_dz,
+                                                 dppar_dz, dthird_moment_dz, moments, me,
+                                                 z, vperp, vpa, z_spectral, vpa_spectral,
                                                  vpa_advect, z_speed, scratch_dummy,
-                                                 external_source_settings, dt, ir;
-                                                 f_offset=0, ppar_offset=0)
+                                                 external_source_settings, dt, ir,
+                                                 include=:all; f_offset=0, ppar_offset=0)
     if f_offset == ppar_offset
         error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar "
               * "cannot be in same place in state vector.")
@@ -108,25 +108,13 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar,
     @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
     @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big")
     @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n || error("ppar_offset=$ppar_offset is too big")
+    @boundscheck include ∈ (:all, :explicit_z, :explicit_v) || error("Unexpected value for include=$include")
 
     v_size = vperp.n * vpa.n
     source_density_amplitude = @view moments.electron.external_source_density_amplitude[:,ir,:]
     source_momentum_amplitude = @view moments.electron.external_source_momentum_amplitude[:,ir,:]
     source_pressure_amplitude = @view moments.electron.external_source_pressure_amplitude[:,ir,:]
 
-    dpdf_dvpa = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir]
-    begin_z_vperp_region()
-    update_electron_speed_vpa!(vpa_advect[1], dens, upar, ppar, moments, vpa.grid,
-                               external_source_settings.electron, ir)
-    @loop_z_vperp iz ivperp begin
-        @views @. vpa_advect[1].adv_fac[:,ivperp,iz,ir] = -vpa_advect[1].speed[:,ivperp,iz,ir]
-    end
-    #calculate the upwind derivative of the electron pdf w.r.t. wpa
-    @loop_z_vperp iz ivperp begin
-        @views derivative!(dpdf_dvpa[:,ivperp,iz], f[:,ivperp,iz], vpa,
-                           vpa_advect[1].adv_fac[:,ivperp,iz,ir], vpa_spectral)
-    end
-
     if !isa(vpa_spectral, gausslegendre_info)
         error("Only gausslegendre_pseudospectral vpa-coordinate type is supported by "
               * "add_electron_vpa_advection_to_Jacobian!() preconditioner because we "
@@ -158,32 +146,34 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar,
         #    + source_density_amplitude*u/n/vth
         #    - w_∥*1/2*(source_pressure_amplitude + 2*u*source_momentum_amplitude)/p
         #    + w_∥*1/2*source_density_amplitude/n) * dg/dw_∥
-        if ielement_vpa == 1 && igrid_vpa == 1
-            jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+=
-                dt * vpa_speed * vpa_Dmat[1,:] ./ vpa_element_scale[ielement_vpa]
-        elseif ielement_vpa == vpa.nelement_local && igrid_vpa == vpa.ngrid
-            jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+=
-                dt * vpa_speed * vpa_Dmat[end,:] ./ vpa_element_scale[ielement_vpa]
-        elseif igrid_vpa == vpa.ngrid
-            # Note igrid_vpa is only ever 1 when ielement_vpa==1, because
-            # of the way element boundaries are counted.
-            icolumn_min_vpa_next = vpa.imin[ielement_vpa+1] - 1
-            icolumn_max_vpa_next = vpa.imax[ielement_vpa+1]
-            if vpa_speed < 0.0
-                jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa_next:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa_next] .+=
-                    dt * vpa_speed * vpa_Dmat[1,:] ./ vpa_element_scale[ielement_vpa+1]
-            elseif vpa_speed > 0.0
+        if include ∈ (:all, :explicit_v)
+            if ielement_vpa == 1 && igrid_vpa == 1
+                jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+=
+                    dt * vpa_speed * vpa_Dmat[1,:] ./ vpa_element_scale[ielement_vpa]
+            elseif ielement_vpa == vpa.nelement_local && igrid_vpa == vpa.ngrid
                 jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+=
                     dt * vpa_speed * vpa_Dmat[end,:] ./ vpa_element_scale[ielement_vpa]
+            elseif igrid_vpa == vpa.ngrid
+                # Note igrid_vpa is only ever 1 when ielement_vpa==1, because
+                # of the way element boundaries are counted.
+                icolumn_min_vpa_next = vpa.imin[ielement_vpa+1] - 1
+                icolumn_max_vpa_next = vpa.imax[ielement_vpa+1]
+                if vpa_speed < 0.0
+                    jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa_next:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa_next] .+=
+                        dt * vpa_speed * vpa_Dmat[1,:] ./ vpa_element_scale[ielement_vpa+1]
+                elseif vpa_speed > 0.0
+                    jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+=
+                        dt * vpa_speed * vpa_Dmat[end,:] ./ vpa_element_scale[ielement_vpa]
+                else
+                    jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+=
+                        dt * vpa_speed * 0.5 * vpa_Dmat[end,:] ./ vpa_element_scale[ielement_vpa]
+                    jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa_next:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa_next] .+=
+                        dt * vpa_speed * 0.5 * vpa_Dmat[1,:] ./ vpa_element_scale[ielement_vpa+1]
+                end
             else
                 jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+=
-                    dt * vpa_speed * 0.5 * vpa_Dmat[end,:] ./ vpa_element_scale[ielement_vpa]
-                jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa_next:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa_next] .+=
-                    dt * vpa_speed * 0.5 * vpa_Dmat[1,:] ./ vpa_element_scale[ielement_vpa+1]
+                    dt * vpa_speed * vpa_Dmat[igrid_vpa,:] ./ vpa_element_scale[ielement_vpa]
             end
-        else
-            jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+=
-                dt * vpa_speed * vpa_Dmat[igrid_vpa,:] ./ vpa_element_scale[ielement_vpa]
         end
         # q = 2*p*vth*∫dw_∥ w_∥^3 g
         #   = 2*p^(3/2)*sqrt(2/n/me)*∫dw_∥ w_∥^3 g
@@ -202,12 +192,14 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar,
         # d(w_∥*0.5/p*dq/dz[irowz])/d(p[icolz]) =
         #   (-w_∥*3/4*sqrt(2/n/me)/p^(3/2)*∫dw_∥ w_∥^3 g * dp/dz - w_∥*1/4*sqrt(2/me)/sqrt(p)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + w_∥*1/2*sqrt(2/n/me)/sqrt(p)*∫dw_∥ w_∥^3 dg/dz)[irowz] * delta(irowz,icolz)
         #   + w_∥*(1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g)[irowz] * z_deriv_matrix[irowz,icolz]
-        for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n
-            col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset
-            jacobian_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] *
-                vpa.grid[ivpa] * (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dppar_dz[iz]
-                                  - 0.5*sqrt(2.0*ppar[iz]/me)/dens[iz]^1.5*ddens_dz[iz]) *
-                               vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3
+        if include ∈ (:all, :explicit_v)
+            for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n
+                col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset
+                jacobian_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] *
+                    vpa.grid[ivpa] * (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dppar_dz[iz]
+                                      - 0.5*sqrt(2.0*ppar[iz]/me)/dens[iz]^1.5*ddens_dz[iz]) *
+                                   vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3
+            end
         end
         z_deriv_row_startind = z_deriv_matrix.rowptr[iz]
         z_deriv_row_endind = z_deriv_matrix.rowptr[iz+1] - 1
@@ -218,10 +210,12 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar,
             jacobian_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] *
                 vpa.grid[ivpa] * vth[iz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_entry
         end
-        jacobian_matrix[row,ppar_offset+iz] += dt * dpdf_dvpa[ivpa,ivperp,iz] * vpa.grid[ivpa] *
-            (-0.75*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*third_moment[iz]*dppar_dz[iz]
-             - 0.25*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*third_moment[iz]*ddens_dz[iz]
-             + 0.5*sqrt(2.0/dens[iz]/me/ppar[iz])*dthird_moment_dz[iz])
+        if include ∈ (:all, :explicit_v)
+            jacobian_matrix[row,ppar_offset+iz] += dt * dpdf_dvpa[ivpa,ivperp,iz] * vpa.grid[ivpa] *
+                (-0.75*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*third_moment[iz]*dppar_dz[iz]
+                 - 0.25*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*third_moment[iz]*ddens_dz[iz]
+                 + 0.5*sqrt(2.0/dens[iz]/me/ppar[iz])*dthird_moment_dz[iz])
+        end
         for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros)
             col = ppar_offset + icolz
             jacobian_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] * vpa.grid[ivpa] * 1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*third_moment[iz] * z_deriv_entry
@@ -251,18 +245,20 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar,
         #    - 1/2*source_density_amplitude*u/sqrt(2*n)/p^(3/2)
         #    + w_∥*1/2*(source_pressure_amplitude + 2*u*source_momentum_amplitude)/p^2)[irowz] * delta(irowz,icolz)
         #   + (1/2*sqrt(2/p/n/me) - w_∥^2/sqrt(2*p*n*me))[irowz] * z_deriv_matrix[irowz,icolz]
-        jacobian_matrix[row,ppar_offset+iz] += dt * (
-            -0.25*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*dppar_dz[iz]
-            - vpa.grid[ivpa]^2*(-0.5/sqrt(2.0*dens[iz]*me)/ppar[iz]^1.5*dppar_dz[iz] - 0.25*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*ddens_dz[iz])
-           ) * dpdf_dvpa[ivpa,ivperp,iz]
-        for index ∈ eachindex(external_source_settings.electron)
-            electron_source = external_source_settings.electron[index]
-            if electron_source.active
-                jacobian_matrix[row,ppar_offset+iz] += dt * (
-                    -0.5*source_density_amplitude[iz,index]*upar[iz]/sqrt(2.0*dens[iz])/ppar[iz]^1.5
-                    + vpa.grid[ivpa]*0.5*(source_pressure_amplitude[iz,index]
-                                          + 2.0*upar[iz]*source_momentum_amplitude[iz,index])/ppar[iz]^2
-                   ) * dpdf_dvpa[ivpa,ivperp,iz]
+        if include ∈ (:all, :explicit_v)
+            jacobian_matrix[row,ppar_offset+iz] += dt * (
+                -0.25*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*dppar_dz[iz]
+                - vpa.grid[ivpa]^2*(-0.5/sqrt(2.0*dens[iz]*me)/ppar[iz]^1.5*dppar_dz[iz] - 0.25*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*ddens_dz[iz])
+               ) * dpdf_dvpa[ivpa,ivperp,iz]
+            for index ∈ eachindex(external_source_settings.electron)
+                electron_source = external_source_settings.electron[index]
+                if electron_source.active
+                    jacobian_matrix[row,ppar_offset+iz] += dt * (
+                        -0.5*source_density_amplitude[iz,index]*upar[iz]/sqrt(2.0*dens[iz])/ppar[iz]^1.5
+                        + vpa.grid[ivpa]*0.5*(source_pressure_amplitude[iz,index]
+                                              + 2.0*upar[iz]*source_momentum_amplitude[iz,index])/ppar[iz]^2
+                       ) * dpdf_dvpa[ivpa,ivperp,iz]
+                end
             end
         end
         for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros)
@@ -277,4 +273,97 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar,
     return nothing
 end
 
+function add_electron_vpa_advection_to_v_only_Jacobian!(
+        jacobian_matrix, f, dens, upar, ppar, vth, third_moment, dpdf_dvpa, ddens_dz,
+        dppar_dz, dthird_moment_dz, moments, me, z, vperp, vpa, z_spectral, vpa_spectral,
+        vpa_advect, z_speed, scratch_dummy, external_source_settings, dt, ir, iz)
+
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    @boundscheck size(jacobian_matrix, 1) == vperp.n * vpa.n + 1 || error("Jacobian matrix size is wrong")
+
+    source_density_amplitude = @view moments.electron.external_source_density_amplitude[iz,ir,:]
+    source_momentum_amplitude = @view moments.electron.external_source_momentum_amplitude[iz,ir,:]
+    source_pressure_amplitude = @view moments.electron.external_source_pressure_amplitude[iz,ir,:]
+
+    if !isa(vpa_spectral, gausslegendre_info)
+        error("Only gausslegendre_pseudospectral vpa-coordinate type is supported by "
+              * "add_electron_vpa_advection_to_Jacobian!() preconditioner because we "
+              * "need differentiation matrices.")
+    end
+
+    vpa_Dmat = vpa_spectral.lobatto.Dmat
+    vpa_element_scale = vpa.element_scale
+
+    @loop_vperp_vpa ivperp ivpa begin
+        if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed)
+            continue
+        end
+
+        # Rows corresponding to pdf_electron
+        row = (ivperp - 1) * vpa.n + ivpa
+
+        ielement_vpa = vpa.ielement[ivpa]
+        igrid_vpa = vpa.igrid[ivpa]
+        icolumn_min_vpa = vpa.imin[ielement_vpa] - (ielement_vpa != 1)
+        icolumn_max_vpa = vpa.imax[ielement_vpa]
+
+        vpa_speed = vpa_advect[1].speed[ivpa,ivperp,iz,ir]
+
+        if ielement_vpa == 1 && igrid_vpa == 1
+            jacobian_matrix[row,(ivperp-1)*vpa.n+icolumn_min_vpa:(ivperp-1)*vpa.n+icolumn_max_vpa] .+=
+                dt * vpa_speed * vpa_Dmat[1,:] ./ vpa_element_scale[ielement_vpa]
+        elseif ielement_vpa == vpa.nelement_local && igrid_vpa == vpa.ngrid
+            jacobian_matrix[row,(ivperp-1)*vpa.n+icolumn_min_vpa:(ivperp-1)*vpa.n+icolumn_max_vpa] .+=
+                dt * vpa_speed * vpa_Dmat[end,:] ./ vpa_element_scale[ielement_vpa]
+        elseif igrid_vpa == vpa.ngrid
+            # Note igrid_vpa is only ever 1 when ielement_vpa==1, because
+            # of the way element boundaries are counted.
+            icolumn_min_vpa_next = vpa.imin[ielement_vpa+1] - 1
+            icolumn_max_vpa_next = vpa.imax[ielement_vpa+1]
+            if vpa_speed < 0.0
+                jacobian_matrix[row,(ivperp-1)*vpa.n+icolumn_min_vpa_next:(ivperp-1)*vpa.n+icolumn_max_vpa_next] .+=
+                    dt * vpa_speed * vpa_Dmat[1,:] ./ vpa_element_scale[ielement_vpa+1]
+            elseif vpa_speed > 0.0
+                jacobian_matrix[row,(ivperp-1)*vpa.n+icolumn_min_vpa:(ivperp-1)*vpa.n+icolumn_max_vpa] .+=
+                    dt * vpa_speed * vpa_Dmat[end,:] ./ vpa_element_scale[ielement_vpa]
+            else
+                jacobian_matrix[row,(ivperp-1)*vpa.n+icolumn_min_vpa:(ivperp-1)*vpa.n+icolumn_max_vpa] .+=
+                    dt * vpa_speed * 0.5 * vpa_Dmat[end,:] ./ vpa_element_scale[ielement_vpa]
+                jacobian_matrix[row,(ivperp-1)*vpa.n+icolumn_min_vpa_next:(ivperp-1)*vpa.n+icolumn_max_vpa_next] .+=
+                    dt * vpa_speed * 0.5 * vpa_Dmat[1,:] ./ vpa_element_scale[ielement_vpa+1]
+            end
+        else
+            jacobian_matrix[row,(ivperp-1)*vpa.n+icolumn_min_vpa:(ivperp-1)*vpa.n+icolumn_max_vpa] .+=
+                dt * vpa_speed * vpa_Dmat[igrid_vpa,:] ./ vpa_element_scale[ielement_vpa]
+        end
+        for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n
+            col = (icolvperp - 1) * vpa.n + icolvpa
+            jacobian_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp] *
+                vpa.grid[ivpa] * (1.5*sqrt(2.0/ppar/dens/me)*dppar_dz
+                                  - 0.5*sqrt(2.0*ppar/me)/dens^1.5*ddens_dz) *
+                               vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3
+        end
+        jacobian_matrix[row,end] += dt * dpdf_dvpa[ivpa,ivperp] * vpa.grid[ivpa] *
+            (-0.75*sqrt(2.0/dens/me)/ppar^1.5*third_moment*dppar_dz
+             - 0.25*sqrt(2.0/me/ppar)/dens^1.5*third_moment*ddens_dz
+             + 0.5*sqrt(2.0/dens/me/ppar)*dthird_moment_dz)
+        jacobian_matrix[row,end] += dt * (
+            -0.25*sqrt(2.0/dens/me)/ppar^1.5*dppar_dz
+            - vpa.grid[ivpa]^2*(-0.5/sqrt(2.0*dens*me)/ppar^1.5*dppar_dz - 0.25*sqrt(2.0/me/ppar)/dens^1.5*ddens_dz)
+           ) * dpdf_dvpa[ivpa,ivperp]
+        for index ∈ eachindex(external_source_settings.electron)
+            electron_source = external_source_settings.electron[index]
+            if electron_source.active
+                jacobian_matrix[row,end] += dt * (
+                    -0.5*source_density_amplitude[index]*upar/sqrt(2.0*dens)/ppar^1.5
+                    + vpa.grid[ivpa]*0.5*(source_pressure_amplitude[index]
+                                          + 2.0*upar*source_momentum_amplitude[index])/ppar^2
+                   ) * dpdf_dvpa[ivpa,ivperp]
+            end
+        end
+    end
+
+    return nothing
+end
+
 end
diff --git a/moment_kinetics/src/electron_z_advection.jl b/moment_kinetics/src/electron_z_advection.jl
index 7685971a5..8c78e58ab 100644
--- a/moment_kinetics/src/electron_z_advection.jl
+++ b/moment_kinetics/src/electron_z_advection.jl
@@ -75,9 +75,9 @@ function update_electron_speed_z!(advect, upar, vth, vpa)
 end
 
 function add_electron_z_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, ppar, vth,
-                                               me, z, vperp, vpa, z_spectral, z_advect,
-                                               scratch_dummy, dt, ir; f_offset=0,
-                                               ppar_offset=0)
+                                               dpdf_dz, me, z, vperp, vpa, z_spectral,
+                                               z_advect, z_speed, scratch_dummy, dt, ir,
+                                               include=:all; f_offset=0, ppar_offset=0)
     if f_offset == ppar_offset
         error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar "
               * "cannot be in same place in state vector.")
@@ -85,27 +85,82 @@ function add_electron_z_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, p
     @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
     @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big")
     @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n || error("ppar_offset=$ppar_offset is too big")
+    @boundscheck include ∈ (:all, :explicit_z, :explicit_v) || error("Unexpected value for include=$include")
 
     v_size = vperp.n * vpa.n
 
-    dpdf_dz = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir]
+    if !isa(z_spectral, gausslegendre_info)
+        error("Only gausslegendre_pseudospectral z-coordinate type is supported by "
+              * "add_electron_z_advection_to_Jacobian!() preconditioner because we need "
+              * "differentiation matrices.")
+    end
+    z_Dmat = z_spectral.lobatto.Dmat
+    z_element_scale = z.element_scale
 
-    begin_vperp_vpa_region()
-    update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir)
-    z_speed_array = @view z_advect[1].speed[:,:,:,1]
+    begin_z_vperp_vpa_region()
+    @loop_z_vperp_vpa iz ivperp ivpa begin
+        if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed)
+            continue
+        end
 
-    @loop_vperp_vpa ivperp ivpa begin
-        @views z_advect[1].adv_fac[:,ivpa,ivperp,ir] = -z_speed_array[:,ivpa,ivperp]
+        # Rows corresponding to pdf_electron
+        row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset
+        v_remainder = (ivperp - 1) * vpa.n + ivpa + f_offset
+
+        ielement_z = z.ielement[iz]
+        igrid_z = z.igrid[iz]
+        icolumn_min_z = z.imin[ielement_z] - (ielement_z != 1)
+        icolumn_max_z = z.imax[ielement_z]
+
+        this_z_speed = z_speed[iz,ivpa,ivperp]
+
+        # Contributions from (w_∥*vth + upar)*dg/dz
+        if include ∈ (:all, :explicit_z)
+            if ielement_z == 1 && igrid_z == 1
+                jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+=
+                dt * this_z_speed * z_Dmat[1,:] ./ z_element_scale[ielement_z]
+            elseif ielement_z == z.nelement_local && igrid_z == z.ngrid
+                jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+=
+                dt * this_z_speed * z_Dmat[end,:] ./ z_element_scale[ielement_z]
+            elseif igrid_z == z.ngrid
+                # Note igrid_z is only ever 1 when ielement_z==1, because
+                # of the way element boundaries are counted.
+                icolumn_min_z_next = z.imin[ielement_z+1] - 1
+                icolumn_max_z_next = z.imax[ielement_z+1]
+                if this_z_speed < 0.0
+                    jacobian_matrix[row,(icolumn_min_z_next-1)*v_size+v_remainder:v_size:(icolumn_max_z_next-1)*v_size+v_remainder] .+=
+                    dt * this_z_speed * z_Dmat[1,:] ./ z_element_scale[ielement_z+1]
+                elseif this_z_speed > 0.0
+                    jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+=
+                    dt * this_z_speed * z_Dmat[end,:] ./ z_element_scale[ielement_z]
+                else
+                    jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+=
+                    dt * this_z_speed * 0.5 * z_Dmat[end,:] ./ z_element_scale[ielement_z]
+                    jacobian_matrix[row,(icolumn_min_z_next-1)*v_size+v_remainder:v_size:(icolumn_max_z_next-1)*v_size+v_remainder] .+=
+                    dt * this_z_speed * 0.5 * z_Dmat[1,:] ./ z_element_scale[ielement_z+1]
+                end
+            else
+                jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+=
+                dt * this_z_speed * z_Dmat[igrid_z,:] ./ z_element_scale[ielement_z]
+            end
+        end
+        # vth = sqrt(2*p/n/me)
+        # so d(vth)/d(ppar) = 1/n/me/sqrt(2*p/n/me) = 1/n/me/vth
+        # and d(w_∥*vth*dg/dz)/d(ppar) = 1/n/me/vth*w_∥*dg/dz
+        if include ∈ (:all, :explicit_v)
+            jacobian_matrix[row,ppar_offset+iz] += dt / dens[iz] / me / vth[iz] * vpa.grid[ivpa] * dpdf_dz[ivpa,ivperp,iz]
+        end
     end
-    #calculate the upwind derivative
-    @views derivative_z_pdf_vpavperpz!(dpdf_dz, f, z_advect[1].adv_fac[:,:,:,ir],
-                                       scratch_dummy.buffer_vpavperpr_1[:,:,ir],
-                                       scratch_dummy.buffer_vpavperpr_2[:,:,ir],
-                                       scratch_dummy.buffer_vpavperpr_3[:,:,ir],
-                                       scratch_dummy.buffer_vpavperpr_4[:,:,ir],
-                                       scratch_dummy.buffer_vpavperpr_5[:,:,ir],
-                                       scratch_dummy.buffer_vpavperpr_6[:,:,ir],
-                                       z_spectral, z)
+
+    return nothing
+end
+
+function add_electron_z_advection_to_z_only_Jacobian!(
+        jacobian_matrix, f, dens, upar, ppar, vth, dpdf_dz, me, z, vperp, vpa, z_spectral,
+        z_advect, z_speed, scratch_dummy, dt, ir, ivperp, ivpa)
+
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    @boundscheck size(jacobian_matrix, 1) == z.n || error("Jacobian matrix size is wrong")
 
     if !isa(z_spectral, gausslegendre_info)
         error("Only gausslegendre_pseudospectral z-coordinate type is supported by "
@@ -115,56 +170,72 @@ function add_electron_z_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, p
     z_Dmat = z_spectral.lobatto.Dmat
     z_element_scale = z.element_scale
 
-    begin_z_vperp_vpa_region()
-    @loop_z_vperp_vpa iz ivperp ivpa begin
+    @loop_z iz begin
         if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa,
-                                                 z_speed_array)
+                                                 z_speed)
             continue
         end
 
         # Rows corresponding to pdf_electron
-        row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset
-        v_remainder = (ivperp - 1) * vpa.n + ivpa + f_offset
+        row = iz
 
         ielement_z = z.ielement[iz]
         igrid_z = z.igrid[iz]
         icolumn_min_z = z.imin[ielement_z] - (ielement_z != 1)
         icolumn_max_z = z.imax[ielement_z]
 
-        z_speed = z_speed_array[iz,ivpa,ivperp]
+        this_z_speed = z_speed[iz,ivpa,ivperp]
 
         # Contributions from (w_∥*vth + upar)*dg/dz
         if ielement_z == 1 && igrid_z == 1
-            jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+=
-            dt * z_speed * z_Dmat[1,:] ./ z_element_scale[ielement_z]
+            jacobian_matrix[row,icolumn_min_z:icolumn_max_z] .+=
+            dt * this_z_speed * z_Dmat[1,:] ./ z_element_scale[ielement_z]
         elseif ielement_z == z.nelement_local && igrid_z == z.ngrid
-            jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+=
-            dt * z_speed * z_Dmat[end,:] ./ z_element_scale[ielement_z]
+            jacobian_matrix[row,icolumn_min_z:icolumn_max_z] .+=
+            dt * this_z_speed * z_Dmat[end,:] ./ z_element_scale[ielement_z]
         elseif igrid_z == z.ngrid
             # Note igrid_z is only ever 1 when ielement_z==1, because
             # of the way element boundaries are counted.
             icolumn_min_z_next = z.imin[ielement_z+1] - 1
             icolumn_max_z_next = z.imax[ielement_z+1]
-            if z_speed < 0.0
-                jacobian_matrix[row,(icolumn_min_z_next-1)*v_size+v_remainder:v_size:(icolumn_max_z_next-1)*v_size+v_remainder] .+=
-                dt * z_speed * z_Dmat[1,:] ./ z_element_scale[ielement_z+1]
-            elseif z_speed > 0.0
-                jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+=
-                dt * z_speed * z_Dmat[end,:] ./ z_element_scale[ielement_z]
+            if this_z_speed < 0.0
+                jacobian_matrix[row,icolumn_min_z_next:icolumn_max_z_next] .+=
+                dt * this_z_speed * z_Dmat[1,:] ./ z_element_scale[ielement_z+1]
+            elseif this_z_speed > 0.0
+                jacobian_matrix[row,icolumn_min_z:icolumn_max_z] .+=
+                dt * this_z_speed * z_Dmat[end,:] ./ z_element_scale[ielement_z]
             else
-                jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+=
-                dt * z_speed * 0.5 * z_Dmat[end,:] ./ z_element_scale[ielement_z]
-                jacobian_matrix[row,(icolumn_min_z_next-1)*v_size+v_remainder:v_size:(icolumn_max_z_next-1)*v_size+v_remainder] .+=
-                dt * z_speed * 0.5 * z_Dmat[1,:] ./ z_element_scale[ielement_z+1]
+                jacobian_matrix[row,icolumn_min_z:icolumn_max_z] .+=
+                dt * this_z_speed * 0.5 * z_Dmat[end,:] ./ z_element_scale[ielement_z]
+                jacobian_matrix[row,icolumn_min_z_next:icolumn_max_z_next] .+=
+                dt * this_z_speed * 0.5 * z_Dmat[1,:] ./ z_element_scale[ielement_z+1]
             end
         else
-            jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+=
-            dt * z_speed * z_Dmat[igrid_z,:] ./ z_element_scale[ielement_z]
+            jacobian_matrix[row,icolumn_min_z:icolumn_max_z] .+=
+            dt * this_z_speed * z_Dmat[igrid_z,:] ./ z_element_scale[ielement_z]
         end
-        # vth = sqrt(2*p/n/me)
-        # so d(vth)/d(ppar) = 1/n/me/sqrt(2*p/n/me) = 1/n/me/vth
-        # and d(w_∥*vth*dg/dz)/d(ppar) = 1/n/me/vth*w_∥*dg/dz
-        jacobian_matrix[row,ppar_offset+iz] += dt / dens[iz] / me / vth[iz] * vpa.grid[ivpa] * dpdf_dz[ivpa,ivperp,iz]
+    end
+
+    return nothing
+end
+
+function add_electron_z_advection_to_v_only_Jacobian!(
+        jacobian_matrix, f, dens, upar, ppar, vth, dpdf_dz, me, z, vperp, vpa, z_spectral,
+        z_advect, z_speed, scratch_dummy, dt, ir, iz)
+
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    @boundscheck size(jacobian_matrix, 1) == vperp.n * vpa.n + 1 || error("Jacobian matrix size is wrong")
+
+    @loop_vperp_vpa ivperp ivpa begin
+        if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa,
+                                                 z_speed)
+            continue
+        end
+
+        # Rows corresponding to pdf_electron
+        row = (ivperp - 1) * vpa.n + ivpa
+
+        jacobian_matrix[row,end] += dt / dens / me / vth * vpa.grid[ivpa] * dpdf_dz[ivpa,ivperp]
     end
 
     return nothing
diff --git a/moment_kinetics/src/external_sources.jl b/moment_kinetics/src/external_sources.jl
index 0166b307f..87344cdea 100644
--- a/moment_kinetics/src/external_sources.jl
+++ b/moment_kinetics/src/external_sources.jl
@@ -1012,12 +1012,12 @@ Note that this function operates on a single point in `r`, given by `ir`, and `p
 end
 
 function add_total_external_electron_source_to_Jacobian!(
-        jacobian_matrix, f, moments, me, z_speed, electron_sources, z, vperp, vpa, dt, ir;
-        f_offset=0, ppar_offset=0)
+        jacobian_matrix, f, moments, me, z_speed, electron_sources, z, vperp, vpa, dt, ir,
+        include=:all; f_offset=0, ppar_offset=0)
     for index ∈ eachindex(electron_sources)
         add_external_electron_source_to_Jacobian!(jacobian_matrix, f, moments, me,
                                                   z_speed, electron_sources[index], index,
-                                                  z, vperp, vpa, dt, ir;
+                                                  z, vperp, vpa, dt, ir, include;
                                                   f_offset=f_offset,
                                                   ppar_offset=ppar_offset)
     end
@@ -1025,8 +1025,8 @@ end
 
 function add_external_electron_source_to_Jacobian!(jacobian_matrix, f, moments, me,
                                                    z_speed, electron_source, index, z,
-                                                   vperp, vpa, dt, ir; f_offset=0,
-                                                   ppar_offset=0)
+                                                   vperp, vpa, dt, ir, include=:all;
+                                                   f_offset=0, ppar_offset=0)
     if f_offset == ppar_offset
         error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar "
               * "cannot be in same place in state vector.")
@@ -1034,6 +1034,7 @@ function add_external_electron_source_to_Jacobian!(jacobian_matrix, f, moments,
     @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
     @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big")
     @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n || error("ppar_offset=$ppar_offset is too big")
+    @boundscheck include ∈ (:all, :explicit_z, :explicit_v) || error("Unexpected value for include=$include")
 
     if !electron_source.active
         return nothing
@@ -1055,7 +1056,7 @@ function add_external_electron_source_to_Jacobian!(jacobian_matrix, f, moments,
     v_size = vperp.n * vpa.n
 
     begin_z_vperp_vpa_region()
-    if electron_source.source_type == "energy"
+    if electron_source.source_type == "energy" && include === :all
         @loop_z_vperp_vpa iz ivperp ivpa begin
             if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa,
                                                      z_speed)
@@ -1069,26 +1070,136 @@ function add_external_electron_source_to_Jacobian!(jacobian_matrix, f, moments,
             jacobian_matrix[row,row] += dt * source_amplitude[iz]
         end
     end
-    @loop_z_vperp_vpa iz ivperp ivpa begin
+    if include ∈ (:all, :explicit_v)
+        @loop_z_vperp_vpa iz ivperp ivpa begin
+            if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed)
+                continue
+            end
+
+            # Rows corresponding to pdf_electron
+            row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset
+
+            # Contributions from
+            #   -vth/n*vth_factor*source_amplitude*exp(-((w_⟂*vth)^2+(w_∥*vth+u)^2)*me/source_T)
+            # Using
+            #   d(vth[irowz])/d(ppar[icolz]) = 1/2*vth/ppar * delta(irowz,icolz)
+            #
+            #   d(exp(-((w_⟂*vth)^2+(w_∥*vth+u)^2)*me/source_T)[irowz])/d(ppar[icolz])
+            #     = -2*(w_⟂^2+(w_∥*vth+u)*w_∥)*me/source_T * 1/2*vth/ppar * exp(-((w_⟂*vth)^2+(w_∥*vth+u)^2)*me/source_T) * delta(irowz,icolz)
+            #     = -(w_⟂^2+(w_∥*vth+u)*w_∥)*me/source_T * vth/ppar * exp(-((w_⟂*vth)^2+(w_∥*vth+u)^2)*me/source_T) * delta(irowz,icolz)
+            jacobian_matrix[row,ppar_offset+iz] +=
+                -dt * vth[iz] / dens[iz] * vth_factor * source_amplitude[iz] *
+                      (0.5/ppar[iz] - (vperp_grid[ivperp]^2 + (vpa_grid[ivpa]*vth[iz] + upar[iz])*vpa_grid[ivpa])*me/source_T*vth[iz]/ppar[iz]) *
+                      exp(-((vperp_grid[ivperp]*vth[iz])^2 + (vpa_grid[ivpa]*vth[iz] + upar[iz])^2) * me / source_T)
+        end
+    end
+
+    return nothing
+end
+
+function add_total_external_electron_source_to_z_only_Jacobian!(
+        jacobian_matrix, f, moments, me, z_speed, electron_sources, z, vperp, vpa, dt, ir,
+        ivperp, ivpa)
+    for index ∈ eachindex(electron_sources)
+        add_external_electron_source_to_z_only_Jacobian!(
+            jacobian_matrix, f, moments, me, z_speed, electron_sources[index], index, z,
+            vperp, vpa, dt, ir, ivperp, ivpa)
+    end
+end
+
+function add_external_electron_source_to_z_only_Jacobian!(
+        jacobian_matrix, f, moments, me, z_speed, electron_source, index, z, vperp, vpa,
+        dt, ir, ivperp, ivpa)
+
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    @boundscheck size(jacobian_matrix, 1) == z.n || error("Jacobian matrix size is wrong")
+
+    if !electron_source.active
+        return nothing
+    end
+
+    if electron_source.source_type == "energy"
+        source_amplitude = @view moments.electron.external_source_amplitude[:,ir,index]
+
+        @loop_z iz begin
+            if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa,
+                                                     z_speed)
+                continue
+            end
+
+            # Rows corresponding to pdf_electron
+            row = iz
+
+            # Contribution from `external_electron_source!()`
+            jacobian_matrix[row,row] += dt * source_amplitude[iz]
+        end
+    end
+
+    return nothing
+end
+
+function add_total_external_electron_source_to_v_only_Jacobian!(
+        jacobian_matrix, f, moments, me, z_speed, electron_sources, z, vperp, vpa, dt, ir,
+        iz)
+    for index ∈ eachindex(electron_sources)
+        add_external_electron_source_to_v_only_Jacobian!(
+            jacobian_matrix, f, moments, me, z_speed, electron_sources[index], index, z,
+            vperp, vpa, dt, ir, iz)
+    end
+end
+
+function add_external_electron_source_to_v_only_Jacobian!(
+        jacobian_matrix, f, moments, me, z_speed, electron_source, index, z, vperp, vpa,
+        dt, ir, iz)
+
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    @boundscheck size(jacobian_matrix, 1) == vperp.n * vpa.n + 1 || error("Jacobian matrix size is wrong")
+
+    if !electron_source.active
+        return nothing
+    end
+
+    source_amplitude = moments.electron.external_source_amplitude[iz,ir,index]
+    source_T = electron_source.source_T
+    dens = moments.electron.dens[iz,ir]
+    upar = moments.electron.upar[iz,ir]
+    ppar = moments.electron.ppar[iz,ir]
+    vth = moments.electron.vth[iz,ir]
+    if vperp.n == 1
+        vth_factor = 1.0 / sqrt(source_T / me)
+    else
+        vth_factor = 1.0 / sqrt(source_T / me)^1.5
+    end
+    vperp_grid = vperp.grid
+    vpa_grid = vpa.grid
+    v_size = vperp.n * vpa.n
+
+    if electron_source.source_type == "energy"
+        @loop_vperp_vpa ivperp ivpa begin
+            if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa,
+                                                     z_speed)
+                continue
+            end
+
+            # Rows corresponding to pdf_electron
+            row = (ivperp - 1) * vpa.n + ivpa
+
+            # Contribution from `external_electron_source!()`
+            jacobian_matrix[row,row] += dt * source_amplitude
+        end
+    end
+    @loop_vperp_vpa ivperp ivpa begin
         if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed)
             continue
         end
 
         # Rows corresponding to pdf_electron
-        row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset
-
-        # Contributions from
-        #   -vth/n*vth_factor*source_amplitude*exp(-((w_⟂*vth)^2+(w_∥*vth+u)^2)*me/source_T)
-        # Using
-        #   d(vth[irowz])/d(ppar[icolz]) = 1/2*vth/ppar * delta(irowz,icolz)
-        #
-        #   d(exp(-((w_⟂*vth)^2+(w_∥*vth+u)^2)*me/source_T)[irowz])/d(ppar[icolz])
-        #     = -2*(w_⟂^2+(w_∥*vth+u)*w_∥)*me/source_T * 1/2*vth/ppar * exp(-((w_⟂*vth)^2+(w_∥*vth+u)^2)*me/source_T) * delta(irowz,icolz)
-        #     = -(w_⟂^2+(w_∥*vth+u)*w_∥)*me/source_T * vth/ppar * exp(-((w_⟂*vth)^2+(w_∥*vth+u)^2)*me/source_T) * delta(irowz,icolz)
-        jacobian_matrix[row,ppar_offset+iz] +=
-            -dt * vth[iz] / dens[iz] * vth_factor * source_amplitude[iz] *
-                  (0.5/ppar[iz] - (vperp_grid[ivperp]^2 + (vpa_grid[ivpa]*vth[iz] + upar[iz])*vpa_grid[ivpa])*me/source_T*vth[iz]/ppar[iz]) *
-                  exp(-((vperp_grid[ivperp]*vth[iz])^2 + (vpa_grid[ivpa]*vth[iz] + upar[iz])^2) * me / source_T)
+        row = (ivperp - 1) * vpa.n + ivpa
+
+        jacobian_matrix[row,end] +=
+            -dt * vth / dens * vth_factor * source_amplitude *
+                  (0.5/ppar - (vperp_grid[ivperp]^2 + (vpa_grid[ivpa]*vth + upar)*vpa_grid[ivpa])*me/source_T*vth/ppar) *
+                  exp(-((vperp_grid[ivperp]*vth)^2 + (vpa_grid[ivpa]*vth + upar)^2) * me / source_T)
     end
 
     return nothing
diff --git a/moment_kinetics/src/krook_collisions.jl b/moment_kinetics/src/krook_collisions.jl
index 77684580c..5f0221261 100644
--- a/moment_kinetics/src/krook_collisions.jl
+++ b/moment_kinetics/src/krook_collisions.jl
@@ -432,10 +432,11 @@ end
 
 function add_electron_krook_collisions_to_Jacobian!(jacobian_matrix, f, dens, upar, ppar,
                                                     vth, upar_ion, collisions, z, vperp,
-                                                    vpa, z_speed, dt, ir; f_offset=0,
-                                                    ppar_offset)
+                                                    vpa, z_speed, dt, ir, include=:all;
+                                                    f_offset=0, ppar_offset)
     @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
     @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big")
+    @boundscheck include ∈ (:all, :explicit_z, :explicit_v) || error("Unexpected value for include=$include")
 
     if collisions.krook.nuee0 ≤ 0.0 && collisions.krook.nuei0 ≤ 0.0
         return nothing
@@ -457,25 +458,98 @@ function add_electron_krook_collisions_to_Jacobian!(jacobian_matrix, f, dens, up
         # Contribution from electron_krook_collisions!()
         nu_ee = get_collision_frequency_ee(collisions, dens[iz], vth[iz])
         nu_ei = get_collision_frequency_ei(collisions, dens[iz], vth[iz])
+        if include === :all
+            jacobian_matrix[row,row] += dt * (nu_ee + nu_ei)
+        end
+
+        if include ∈ (:all, :explicit_v)
+            fM_i = exp(-(vpa.grid[ivpa] + (upar_ion[iz] - upar[iz])/vth[iz])^2 - vperp.grid[ivperp]^2)
+            #   d(f_M(u_i)[irowz])/d(ppar[icolz])
+            #       = -2*(vpa.grid+(upar_ion-upar)/vth)*(upar_ion-upar)*(-1/2/vth/ppar)*f_M(u_i) * delta(irow,icolz)
+            #       = (vpa.grid+(upar_ion-upar)/vth)*(upar_ion-upar)/vth/ppar*f_M(u_i) * delta(irow,icolz)
+            jacobian_matrix[row,ppar_offset+iz] +=
+                -dt * nu_ei * (vpa.grid[ivpa]+(upar_ion[iz]-upar[iz])/vth[iz])*(upar_ion[iz]-upar[iz])/vth[iz]/ppar[iz]*fM_i
+
+            if using_reference_parameters
+                # Both collision frequencies are proportional to n/vth^3=n^(5/2)*(me/2/p)^3/2,
+                # so
+                #   d(nu[irowz])/d(ppar[icolz]) = -3/2*nu/ppar * delta(irowz,icolz)
+                #   d(-(vpa.grid+(upar_ion-upar)/vth)^2[irowz])/d(ppar[icoliz]
+                #       = -(vpa.grid+(upar_ion-upar)/vth)*(upar_ion-upar)/vth/ppar * delta(irow,icolz)
+                jacobian_matrix[row,ppar_offset+iz] +=
+                    -dt * 1.5 / ppar[iz] *
+                          (nu_ee * (f[ivpa,ivperp,iz] - exp(-vpa.grid[ivpa]^2 - vperp.grid[ivperp]^2))
+                           + nu_ei * (f[ivpa,ivperp,iz] - fM_i))
+            end
+        end
+    end
+
+    return nothing
+end
+
+function add_electron_krook_collisions_to_z_only_Jacobian!(
+        jacobian_matrix, f, dens, upar, ppar, vth, upar_ion, collisions, z, vperp, vpa,
+        z_speed, dt, ir, ivperp, ivpa)
+
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    @boundscheck size(jacobian_matrix, 1) == z.n || error("Jacobian matrix size is wrong")
+
+    if collisions.krook.nuee0 ≤ 0.0 && collisions.krook.nuei0 ≤ 0.0
+        return nothing
+    end
+
+    @loop_z iz begin
+        if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed)
+            continue
+        end
+
+        # Rows corresponding to pdf_electron
+        row = iz
+
+        # Contribution from electron_krook_collisions!()
+        nu_ee = get_collision_frequency_ee(collisions, dens[iz], vth[iz])
+        nu_ei = get_collision_frequency_ei(collisions, dens[iz], vth[iz])
+        jacobian_matrix[row,row] += dt * (nu_ee + nu_ei)
+    end
+
+    return nothing
+end
+
+function add_electron_krook_collisions_to_v_only_Jacobian!(
+        jacobian_matrix, f, dens, upar, ppar, vth, upar_ion, collisions, z, vperp, vpa,
+        z_speed, dt, ir, iz)
+
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    @boundscheck size(jacobian_matrix, 1) == vperp.n * vpa.n + 1 || error("Jacobian matrix size is wrong")
+
+    if collisions.krook.nuee0 ≤ 0.0 && collisions.krook.nuei0 ≤ 0.0
+        return nothing
+    end
+
+    using_reference_parameters = (collisions.krook.frequency_option == "reference_parameters")
+
+    @loop_vperp_vpa ivperp ivpa begin
+        if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed)
+            continue
+        end
+
+        # Rows corresponding to pdf_electron
+        row = (ivperp - 1) * vpa.n + ivpa
+
+        # Contribution from electron_krook_collisions!()
+        nu_ee = get_collision_frequency_ee(collisions, dens, vth)
+        nu_ei = get_collision_frequency_ei(collisions, dens, vth)
         jacobian_matrix[row,row] += dt * (nu_ee + nu_ei)
 
-        fM_i = exp(-(vpa.grid[ivpa] + (upar_ion[iz] - upar[iz])/vth[iz])^2 - vperp.grid[ivperp]^2)
-        #   d(f_M(u_i)[irowz])/d(ppar[icolz])
-        #       = -2*(vpa.grid+(upar_ion-upar)/vth)*(upar_ion-upar)*(-1/2/vth/ppar)*f_M(u_i) * delta(irow,icolz)
-        #       = (vpa.grid+(upar_ion-upar)/vth)*(upar_ion-upar)/vth/ppar*f_M(u_i) * delta(irow,icolz)
-        jacobian_matrix[row,ppar_offset+iz] +=
-            -dt * nu_ei * (vpa.grid[ivpa]+(upar_ion[iz]-upar[iz])/vth[iz])*(upar_ion[iz]-upar[iz])/vth[iz]/ppar[iz]*fM_i
+        fM_i = exp(-(vpa.grid[ivpa] + (upar_ion - upar)/vth)^2 - vperp.grid[ivperp]^2)
+        jacobian_matrix[row,end] +=
+            -dt * nu_ei * (vpa.grid[ivpa]+(upar_ion-upar)/vth)*(upar_ion-upar)/vth/ppar*fM_i
 
         if using_reference_parameters
-            # Both collision frequencies are proportional to n/vth^3=n^(5/2)*(me/2/p)^3/2,
-            # so
-            #   d(nu[irowz])/d(ppar[icolz]) = -3/2*nu/ppar * delta(irowz,icolz)
-            #   d(-(vpa.grid+(upar_ion-upar)/vth)^2[irowz])/d(ppar[icoliz]
-            #       = -(vpa.grid+(upar_ion-upar)/vth)*(upar_ion-upar)/vth/ppar * delta(irow,icolz)
-            jacobian_matrix[row,ppar_offset+iz] +=
-                -dt * 1.5 / ppar[iz] *
-                      (nu_ee * (f[ivpa,ivperp,iz] - exp(-vpa.grid[ivpa]^2 - vperp.grid[ivperp]^2))
-                       + nu_ei * (f[ivpa,ivperp,iz] - fM_i))
+            jacobian_matrix[row,end] +=
+                -dt * 1.5 / ppar *
+                      (nu_ee * (f[ivpa,ivperp] - exp(-vpa.grid[ivpa]^2 - vperp.grid[ivperp]^2))
+                       + nu_ei * (f[ivpa,ivperp] - fM_i))
         end
     end
 
diff --git a/moment_kinetics/src/moment_constraints.jl b/moment_kinetics/src/moment_constraints.jl
index e880b5e3a..33d93b439 100644
--- a/moment_kinetics/src/moment_constraints.jl
+++ b/moment_kinetics/src/moment_constraints.jl
@@ -279,32 +279,24 @@ end
     add_electron_implicit_constraint_forcing_to_Jacobian!(jacobian_matrix, f,
                                                           z_speed, z, vperp, vpa,
                                                           constraint_forcing_rate,
-                                                          dt, ir; f_offset=0)
+                                                          dt, ir, include=:all;
+                                                          f_offset=0)
 
 Add the contributions corresponding to [`electron_implicit_constraint_forcing!`](@ref) to
 `jacobian_matrix`.
 """
-function add_electron_implicit_constraint_forcing_to_Jacobian!(jacobian_matrix, f,
-                                                               z_speed, z, vperp, vpa,
-                                                               constraint_forcing_rate,
-                                                               dt, ir; f_offset=0)
+function add_electron_implicit_constraint_forcing_to_Jacobian!(
+        jacobian_matrix, f, zeroth_moment, first_moment, second_moment, z_speed, z, vperp,
+        vpa, constraint_forcing_rate, dt, ir, include=:all; f_offset=0)
+
     @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
     @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big")
+    @boundscheck include ∈ (:all, :explicit_z, :explicit_v) || error("Unexpected value for include=$include")
 
     vpa_grid = vpa.grid
     vpa_wgts = vpa.wgts
     v_size = vperp.n * vpa.n
 
-    zeroth_moment = z.scratch_shared
-    first_moment = z.scratch_shared2
-    second_moment = z.scratch_shared3
-    begin_z_region()
-    @loop_z iz begin
-        @views zeroth_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_wgts)
-        @views first_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_grid, vpa_wgts)
-        @views second_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_grid, 2, vpa_wgts)
-    end
-
     begin_z_vperp_vpa_region()
     @loop_z_vperp_vpa iz ivperp ivpa begin
         if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed)
@@ -314,21 +306,93 @@ function add_electron_implicit_constraint_forcing_to_Jacobian!(jacobian_matrix,
         # Rows corresponding to pdf_electron
         row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset
 
+        # Diagonal terms
+        if include === :all
+            jacobian_matrix[row,row] += -dt * constraint_forcing_rate *
+                                              ((1.0 - zeroth_moment[iz])
+                                               - first_moment[iz]*vpa_grid[ivpa]
+                                               + (0.5 - second_moment[iz])*vpa_grid[ivpa]^2)
+        end
+
+        if include ∈ (:all, :explicit_v)
+            # Integral terms
+            # d(∫dw_∥ w_∥^n g[irow])/d(g[icol]) = vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^n
+            for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n
+                col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset
+                jacobian_matrix[row,col] += dt * constraint_forcing_rate *
+                                                 (1.0
+                                                  + vpa_grid[icolvpa]*vpa_grid[ivpa]
+                                                  + vpa_grid[icolvpa]^2*vpa_grid[ivpa]^2) *
+                                                 vpa_wgts[icolvpa]/sqrt(π) * f[ivpa,ivperp,iz]
+            end
+        end
+    end
+
+    return nothing
+end
+
+function add_electron_implicit_constraint_forcing_to_z_only_Jacobian!(
+        jacobian_matrix, f, zeroth_moment, first_moment, second_moment, z_speed, z, vperp,
+        vpa, constraint_forcing_rate, dt, ir, ivperp, ivpa)
+
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    @boundscheck size(jacobian_matrix, 1) == z.n || error("Jacobian matrix size is wrong")
+
+    vpa_grid = vpa.grid
+    vpa_wgts = vpa.wgts
+
+    @loop_z iz begin
+        if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed)
+            continue
+        end
+
+        # Rows corresponding to pdf_electron
+        row = iz
+
         # Diagonal terms
         jacobian_matrix[row,row] += -dt * constraint_forcing_rate *
                                           ((1.0 - zeroth_moment[iz])
                                            - first_moment[iz]*vpa_grid[ivpa]
                                            + (0.5 - second_moment[iz])*vpa_grid[ivpa]^2)
+    end
+
+    return nothing
+end
+
+function add_electron_implicit_constraint_forcing_to_v_only_Jacobian!(
+        jacobian_matrix, f, zeroth_moment, first_moment, second_moment, z_speed, z, vperp,
+        vpa, constraint_forcing_rate, dt, ir, iz)
+
+    @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square")
+    @boundscheck size(jacobian_matrix, 1) == vperp.n * vpa.n + 1 || error("Jacobian matrix size is wrong")
+
+    vpa_grid = vpa.grid
+    vpa_wgts = vpa.wgts
+    v_size = vperp.n * vpa.n
+
+    @loop_vperp_vpa ivperp ivpa begin
+        if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed)
+            continue
+        end
+
+        # Rows corresponding to pdf_electron
+        row = (ivperp - 1) * vpa.n + ivpa
+
+        # Diagonal terms
+        jacobian_matrix[row,row] += -dt * constraint_forcing_rate *
+                                          ((1.0 - zeroth_moment)
+                                           - first_moment*vpa_grid[ivpa]
+                                           + (0.5 - second_moment)*vpa_grid[ivpa]^2)
 
         # Integral terms
         # d(∫dw_∥ w_∥^n g[irow])/d(g[icol]) = vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^n
         for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n
-            col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset
+            col = (icolvperp - 1) * vpa.n + icolvpa
             jacobian_matrix[row,col] += dt * constraint_forcing_rate *
                                              (1.0
                                               + vpa_grid[icolvpa]*vpa_grid[ivpa]
                                               + vpa_grid[icolvpa]^2*vpa_grid[ivpa]^2) *
-                                             vpa_wgts[icolvpa]/sqrt(π) * f[ivpa,ivperp,iz]
+                                             vpa_wgts[icolvpa]/sqrt(π) * f[ivpa,ivperp]
         end
     end
 

From 3c6929462e222966c6dc7b55d69b5556bf900c0e Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Fri, 25 Oct 2024 11:46:46 +0100
Subject: [PATCH 10/43] Tests for ADI Jacobians

---
 moment_kinetics/test/jacobian_matrix_tests.jl | 921 +++++++++++++++++-
 1 file changed, 905 insertions(+), 16 deletions(-)

diff --git a/moment_kinetics/test/jacobian_matrix_tests.jl b/moment_kinetics/test/jacobian_matrix_tests.jl
index e61ae7d33..e04a60d33 100644
--- a/moment_kinetics/test/jacobian_matrix_tests.jl
+++ b/moment_kinetics/test/jacobian_matrix_tests.jl
@@ -9,32 +9,54 @@ using moment_kinetics.analysis: vpagrid_to_dzdt
 using moment_kinetics.array_allocation: allocate_shared_float
 using moment_kinetics.boundary_conditions: enforce_v_boundary_condition_local!,
                                            enforce_vperp_boundary_condition!
-using moment_kinetics.derivatives: derivative_z!
+using moment_kinetics.calculus: derivative!
+using moment_kinetics.derivatives: derivative_z!, derivative_z_pdf_vpavperpz!
 using moment_kinetics.electron_fluid_equations: calculate_electron_qpar_from_pdf_no_r!,
                                                 electron_energy_equation_no_r!,
-                                                add_electron_energy_equation_to_Jacobian!
+                                                add_electron_energy_equation_to_Jacobian!,
+                                                add_electron_energy_equation_to_z_only_Jacobian!,
+                                                add_electron_energy_equation_to_v_only_Jacobian!
 using moment_kinetics.electron_kinetic_equation: add_contribution_from_pdf_term!,
                                                  add_contribution_from_electron_pdf_term_to_Jacobian!,
+                                                 add_contribution_from_electron_pdf_term_to_z_only_Jacobian!,
+                                                 add_contribution_from_electron_pdf_term_to_v_only_Jacobian!,
                                                  add_dissipation_term!,
                                                  add_electron_dissipation_term_to_Jacobian!,
+                                                 add_electron_dissipation_term_to_v_only_Jacobian!,
                                                  add_ion_dt_forcing_of_electron_ppar_to_Jacobian!,
+                                                 add_ion_dt_forcing_of_electron_ppar_to_z_only_Jacobian!,
+                                                 add_ion_dt_forcing_of_electron_ppar_to_v_only_Jacobian!,
                                                  electron_kinetic_equation_euler_update!,
-                                                 fill_electron_kinetic_equation_Jacobian!
+                                                 fill_electron_kinetic_equation_Jacobian!,
+                                                 fill_electron_kinetic_equation_v_only_Jacobian!,
+                                                 fill_electron_kinetic_equation_z_only_Jacobian_f!,
+                                                 fill_electron_kinetic_equation_z_only_Jacobian_ppar!
 using moment_kinetics.electron_vpa_advection: electron_vpa_advection!,
-                                              add_electron_vpa_advection_to_Jacobian!
+                                              update_electron_speed_vpa!,
+                                              add_electron_vpa_advection_to_Jacobian!,
+                                              add_electron_vpa_advection_to_v_only_Jacobian!
 using moment_kinetics.electron_z_advection: electron_z_advection!,
                                             update_electron_speed_z!,
-                                            add_electron_z_advection_to_Jacobian!
+                                            add_electron_z_advection_to_Jacobian!,
+                                            add_electron_z_advection_to_z_only_Jacobian!,
+                                            add_electron_z_advection_to_v_only_Jacobian!
 using moment_kinetics.external_sources: total_external_electron_sources!,
-                                        add_total_external_electron_source_to_Jacobian!
+                                        add_total_external_electron_source_to_Jacobian!,
+                                        add_total_external_electron_source_to_z_only_Jacobian!,
+                                        add_total_external_electron_source_to_v_only_Jacobian!
 using moment_kinetics.krook_collisions: electron_krook_collisions!,
-                                        add_electron_krook_collisions_to_Jacobian!
+                                        add_electron_krook_collisions_to_Jacobian!,
+                                        add_electron_krook_collisions_to_z_only_Jacobian!,
+                                        add_electron_krook_collisions_to_v_only_Jacobian!
 using moment_kinetics.looping
 using moment_kinetics.moment_constraints: electron_implicit_constraint_forcing!,
                                           add_electron_implicit_constraint_forcing_to_Jacobian!,
+                                          add_electron_implicit_constraint_forcing_to_z_only_Jacobian!,
+                                          add_electron_implicit_constraint_forcing_to_v_only_Jacobian!,
                                           hard_force_moment_constraints!
 using moment_kinetics.type_definitions: mk_float
-using moment_kinetics.velocity_moments: calculate_electron_moment_derivatives_no_r!
+using moment_kinetics.velocity_moments: calculate_electron_moment_derivatives_no_r!,
+                                        integrate_over_vspace
 
 using StatsBase
 
@@ -276,8 +298,27 @@ function test_electron_z_advection(test_input; rtol=(2.5e2*epsilon)^2)
         p_size = length(ppar)
         total_size = pdf_size + p_size
 
+        z_speed = @view z_advect[1].speed[:,:,:,ir]
+
+        dpdf_dz = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir]
+        begin_vperp_vpa_region()
+        update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir)
+        @loop_vperp_vpa ivperp ivpa begin
+            @views z_advect[1].adv_fac[:,ivpa,ivperp,ir] = -z_speed[:,ivpa,ivperp]
+        end
+        #calculate the upwind derivative
+        @views derivative_z_pdf_vpavperpz!(dpdf_dz, f, z_advect[1].adv_fac[:,:,:,ir],
+                                           scratch_dummy.buffer_vpavperpr_1[:,:,ir],
+                                           scratch_dummy.buffer_vpavperpr_2[:,:,ir],
+                                           scratch_dummy.buffer_vpavperpr_3[:,:,ir],
+                                           scratch_dummy.buffer_vpavperpr_4[:,:,ir],
+                                           scratch_dummy.buffer_vpavperpr_5[:,:,ir],
+                                           scratch_dummy.buffer_vpavperpr_6[:,:,ir],
+                                           z_spectral, z)
+
         jacobian_matrix = allocate_shared_float(total_size, total_size)
         @serial_region begin
+            jacobian_matrix .= 0.0
             for row ∈ 1:total_size
                 # Initialise identity matrix
                 jacobian_matrix[row,row] = 1.0
@@ -285,8 +326,85 @@ function test_electron_z_advection(test_input; rtol=(2.5e2*epsilon)^2)
         end
 
         add_electron_z_advection_to_Jacobian!(
-            jacobian_matrix, f, dens, upar, ppar, vth, me, z, vperp, vpa, z_spectral,
-            z_advect, scratch_dummy, dt, ir; ppar_offset=pdf_size)
+            jacobian_matrix, f, dens, upar, ppar, vth, dpdf_dz, me, z, vperp, vpa,
+            z_spectral, z_advect, z_speed, scratch_dummy, dt, ir; ppar_offset=pdf_size)
+
+        # Test 'ADI Jacobians' before other tests, because residual_func() may modify some
+        # variables (vth, etc.).
+
+        @testset "ADI Jacobians - implicit z" begin
+            # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
+            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+            begin_serial_region()
+            @serial_region begin
+                jacobian_matrix_ADI_check .= 0.0
+                for row ∈ 1:total_size
+                    # Initialise identity matrix
+                    jacobian_matrix_ADI_check[row,row] = 1.0
+                end
+            end
+
+            v_size = vperp.n * vpa.n
+
+            # Add 'implicit' contribution
+            begin_vperp_vpa_region()
+            @loop_vperp_vpa ivperp ivpa begin
+                this_slice = (ivperp - 1)*vpa.n + ivpa:v_size:(z.n - 1)*v_size + (ivperp - 1)*vpa.n + ivpa
+                @views add_electron_z_advection_to_z_only_Jacobian!(
+                    jacobian_matrix_ADI_check[this_slice,this_slice], f[ivpa,ivperp,:],
+                    dens, upar, ppar, vth, dpdf_dz[ivpa,ivperp,:], me, z, vperp, vpa,
+                    z_spectral, z_advect, z_speed, scratch_dummy, dt, ir,
+                    ivperp, ivpa)
+            end
+
+            # Add 'explicit' contribution
+            add_electron_z_advection_to_Jacobian!(
+                jacobian_matrix_ADI_check, f, dens, upar, ppar, vth, dpdf_dz, me, z,
+                vperp, vpa, z_spectral, z_advect, z_speed, scratch_dummy, dt, ir,
+                :explicit_v; ppar_offset=pdf_size)
+
+            begin_serial_region()
+            @serial_region begin
+                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15)
+            end
+        end
+
+        @testset "ADI Jacobians - implicit v" begin
+            # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
+            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+            begin_serial_region()
+            @serial_region begin
+                jacobian_matrix_ADI_check .= 0.0
+                for row ∈ 1:total_size
+                    # Initialise identity matrix
+                    jacobian_matrix_ADI_check[row,row] = 1.0
+                end
+            end
+
+            v_size = vperp.n * vpa.n
+
+            # Add 'implicit' contribution
+            begin_z_region()
+            @loop_z iz begin
+                this_slice = collect((iz - 1)*v_size + 1:iz*v_size)
+                push!(this_slice, iz + pdf_size)
+                @views add_electron_z_advection_to_v_only_Jacobian!(
+                    jacobian_matrix_ADI_check[this_slice,this_slice], f[:,:,iz],
+                    dens[iz], upar[iz], ppar[iz], vth[iz], dpdf_dz[:,:,iz], me, z, vperp,
+                    vpa, z_spectral, z_advect, z_speed, scratch_dummy, dt, ir, iz)
+            end
+
+            # Add 'explicit' contribution
+            add_electron_z_advection_to_Jacobian!(
+                jacobian_matrix_ADI_check, f, dens, upar, ppar, vth, dpdf_dz, me, z,
+                vperp, vpa, z_spectral, z_advect, z_speed, scratch_dummy, dt, ir,
+                :explicit_z; ppar_offset=pdf_size)
+
+            begin_serial_region()
+            @serial_region begin
+                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15)
+            end
+        end
 
         function residual_func!(residual, this_f, this_p)
             begin_z_region()
@@ -525,9 +643,23 @@ function test_electron_vpa_advection(test_input; rtol=(3.0e2*epsilon)^2)
         p_size = length(ppar)
         total_size = pdf_size + p_size
 
+        dpdf_dvpa = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir]
+        begin_z_vperp_region()
+        update_electron_speed_vpa!(vpa_advect[1], dens, upar, ppar, moments, vpa.grid,
+                                   external_source_settings.electron, ir)
+        @loop_z_vperp iz ivperp begin
+            @views @. vpa_advect[1].adv_fac[:,ivperp,iz,ir] = -vpa_advect[1].speed[:,ivperp,iz,ir]
+        end
+        #calculate the upwind derivative of the electron pdf w.r.t. wpa
+        @loop_z_vperp iz ivperp begin
+            @views derivative!(dpdf_dvpa[:,ivperp,iz], f[:,ivperp,iz], vpa,
+                               vpa_advect[1].adv_fac[:,ivperp,iz,ir], vpa_spectral)
+        end
+
         jacobian_matrix = allocate_shared_float(total_size, total_size)
         begin_serial_region()
         @serial_region begin
+            jacobian_matrix .= 0.0
             for row ∈ 1:total_size
                 # Initialise identity matrix
                 jacobian_matrix[row,row] = 1.0
@@ -535,10 +667,80 @@ function test_electron_vpa_advection(test_input; rtol=(3.0e2*epsilon)^2)
         end
 
         add_electron_vpa_advection_to_Jacobian!(
-            jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz,
-            dthird_moment_dz, moments, me, z, vperp, vpa, z_spectral, vpa_spectral,
-            vpa_advect, z_speed, scratch_dummy, external_source_settings, dt, ir;
-            ppar_offset=pdf_size)
+            jacobian_matrix, f, dens, upar, ppar, vth, third_moment, dpdf_dvpa, ddens_dz,
+            dppar_dz, dthird_moment_dz, moments, me, z, vperp, vpa, z_spectral,
+            vpa_spectral, vpa_advect, z_speed, scratch_dummy, external_source_settings,
+            dt, ir; ppar_offset=pdf_size)
+
+        # Test 'ADI Jacobians' before other tests, because residual_func() may modify some
+        # variables (vth, etc.).
+
+        @testset "ADI Jacobians - implicit z" begin
+            # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
+            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+            begin_serial_region()
+            @serial_region begin
+                jacobian_matrix_ADI_check .= 0.0
+                for row ∈ 1:total_size
+                    # Initialise identity matrix
+                    jacobian_matrix_ADI_check[row,row] = 1.0
+                end
+            end
+
+            # There is no 'implicit z' contribution for vpa advection
+
+            # Add 'explicit' contribution
+            add_electron_vpa_advection_to_Jacobian!(
+                jacobian_matrix_ADI_check, f, dens, upar, ppar, vth, third_moment,
+                dpdf_dvpa, ddens_dz, dppar_dz, dthird_moment_dz, moments, me, z, vperp,
+                vpa, z_spectral, vpa_spectral, vpa_advect, z_speed, scratch_dummy,
+                external_source_settings, dt, ir, :explicit_v; ppar_offset=pdf_size)
+
+            begin_serial_region()
+            @serial_region begin
+                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15)
+            end
+        end
+
+        @testset "ADI Jacobians - implicit v" begin
+            # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
+            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+            begin_serial_region()
+            @serial_region begin
+                jacobian_matrix_ADI_check .= 0.0
+                for row ∈ 1:total_size
+                    # Initialise identity matrix
+                    jacobian_matrix_ADI_check[row,row] = 1.0
+                end
+            end
+
+            v_size = vperp.n * vpa.n
+
+            # Add 'implicit' contribution
+            begin_z_region()
+            @loop_z iz begin
+                this_slice = collect((iz - 1)*v_size + 1:iz*v_size)
+                push!(this_slice, iz + pdf_size)
+                @views add_electron_vpa_advection_to_v_only_Jacobian!(
+                    jacobian_matrix_ADI_check[this_slice,this_slice], f[:,:,iz], dens[iz],
+                    upar[iz], ppar[iz], vth[iz], third_moment[iz], dpdf_dvpa[:,:,iz],
+                    ddens_dz[iz], dppar_dz[iz], dthird_moment_dz[iz], moments, me, z,
+                    vperp, vpa, z_spectral, vpa_spectral, vpa_advect, z_speed,
+                    scratch_dummy, external_source_settings, dt, ir, iz)
+            end
+
+            # Add 'explicit' contribution
+            add_electron_vpa_advection_to_Jacobian!(
+                jacobian_matrix_ADI_check, f, dens, upar, ppar, vth, third_moment,
+                dpdf_dvpa, ddens_dz, dppar_dz, dthird_moment_dz, moments, me, z, vperp,
+                vpa, z_spectral, vpa_spectral, vpa_advect, z_speed, scratch_dummy,
+                external_source_settings, dt, ir, :explicit_z; ppar_offset=pdf_size)
+
+            begin_serial_region()
+            @serial_region begin
+                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15)
+            end
+        end
 
         function residual_func!(residual, this_f, this_p)
             begin_z_region()
@@ -798,6 +1000,7 @@ function test_contribution_from_electron_pdf_term(test_input; rtol=(4.0e2*epsilo
         jacobian_matrix = allocate_shared_float(total_size, total_size)
         begin_serial_region()
         @serial_region begin
+            jacobian_matrix .= 0.0
             for row ∈ 1:total_size
                 # Initialise identity matrix
                 jacobian_matrix[row,row] = 1.0
@@ -809,6 +1012,87 @@ function test_contribution_from_electron_pdf_term(test_input; rtol=(4.0e2*epsilo
             dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z,
             vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir; ppar_offset=pdf_size)
 
+        # Test 'ADI Jacobians' before other tests, because residual_func() may modify some
+        # variables (vth, etc.).
+
+        @testset "ADI Jacobians - implicit z" begin
+            # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
+            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+            begin_serial_region()
+            @serial_region begin
+                jacobian_matrix_ADI_check .= 0.0
+                for row ∈ 1:total_size
+                    # Initialise identity matrix
+                    jacobian_matrix_ADI_check[row,row] = 1.0
+                end
+            end
+
+            v_size = vperp.n * vpa.n
+
+            # Add 'implicit' contribution
+            begin_vperp_vpa_region()
+            @loop_vperp_vpa ivperp ivpa begin
+                this_slice = (ivperp - 1)*vpa.n + ivpa:v_size:(z.n - 1)*v_size + (ivperp - 1)*vpa.n + ivpa
+                @views add_contribution_from_electron_pdf_term_to_z_only_Jacobian!(
+                    jacobian_matrix_ADI_check[this_slice,this_slice], f[ivpa,ivperp,:],
+                    dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, dvth_dz,
+                    dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z,
+                    vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir, ivperp, ivpa)
+            end
+
+            # Add 'explicit' contribution
+            add_contribution_from_electron_pdf_term_to_Jacobian!(
+                jacobian_matrix_ADI_check, f, dens, upar, ppar, vth, third_moment,
+                ddens_dz, dppar_dz, dvth_dz, dqpar_dz, dthird_moment_dz, moments, me,
+                external_source_settings, z, vperp, vpa, z_spectral, z_speed,
+                scratch_dummy, dt, ir, :explicit_v; ppar_offset=pdf_size)
+
+            begin_serial_region()
+            @serial_region begin
+                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-13)
+            end
+        end
+
+        @testset "ADI Jacobians - implicit v" begin
+            # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
+            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+            begin_serial_region()
+            @serial_region begin
+                jacobian_matrix_ADI_check .= 0.0
+                for row ∈ 1:total_size
+                    # Initialise identity matrix
+                    jacobian_matrix_ADI_check[row,row] = 1.0
+                end
+            end
+
+            v_size = vperp.n * vpa.n
+
+            # Add 'implicit' contribution
+            begin_z_region()
+            @loop_z iz begin
+                this_slice = collect((iz - 1)*v_size + 1:iz*v_size)
+                push!(this_slice, iz + pdf_size)
+                @views add_contribution_from_electron_pdf_term_to_v_only_Jacobian!(
+                    jacobian_matrix_ADI_check[this_slice,this_slice], f[:,:,iz], dens[iz],
+                    upar[iz], ppar[iz], vth[iz], third_moment[iz], ddens_dz[iz],
+                    dppar_dz[iz], dvth_dz[iz], dqpar_dz[iz], dthird_moment_dz[iz],
+                    moments, me, external_source_settings, z, vperp, vpa, z_spectral,
+                    z_speed, scratch_dummy, dt, ir, iz)
+            end
+
+            # Add 'explicit' contribution
+            add_contribution_from_electron_pdf_term_to_Jacobian!(
+                jacobian_matrix_ADI_check, f, dens, upar, ppar, vth, third_moment,
+                ddens_dz, dppar_dz, dvth_dz, dqpar_dz, dthird_moment_dz, moments, me,
+                external_source_settings, z, vperp, vpa, z_spectral, z_speed,
+                scratch_dummy, dt, ir, :explicit_z; ppar_offset=pdf_size)
+
+            begin_serial_region()
+            @serial_region begin
+                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-13)
+            end
+        end
+
         function residual_func!(residual, this_f, this_p)
             begin_z_region()
             @loop_z iz begin
@@ -1032,6 +1316,7 @@ function test_electron_dissipation_term(test_input; rtol=(3.0e0*epsilon)^2)
         jacobian_matrix = allocate_shared_float(total_size, total_size)
         begin_serial_region()
         @serial_region begin
+            jacobian_matrix .= 0.0
             for row ∈ 1:total_size
                 # Initialise identity matrix
                 jacobian_matrix[row,row] = 1.0
@@ -1042,6 +1327,66 @@ function test_electron_dissipation_term(test_input; rtol=(3.0e0*epsilon)^2)
             jacobian_matrix, f, num_diss_params, z, vperp, vpa, vpa_spectral, z_speed, dt,
             ir)
 
+        @testset "ADI Jacobians - implicit z" begin
+            # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
+            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+            begin_serial_region()
+            @serial_region begin
+                jacobian_matrix_ADI_check .= 0.0
+                for row ∈ 1:total_size
+                    # Initialise identity matrix
+                    jacobian_matrix_ADI_check[row,row] = 1.0
+                end
+            end
+
+            # There is no 'implicit z' contribution for electron dissipation
+
+            # Add 'explicit' contribution
+            add_electron_dissipation_term_to_Jacobian!(
+                jacobian_matrix_ADI_check, f, num_diss_params, z, vperp, vpa,
+                vpa_spectral, z_speed, dt, ir, :explicit_v)
+
+            begin_serial_region()
+            @serial_region begin
+                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15)
+            end
+        end
+
+        @testset "ADI Jacobians - implicit v" begin
+            # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
+            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+            begin_serial_region()
+            @serial_region begin
+                jacobian_matrix_ADI_check .= 0.0
+                for row ∈ 1:total_size
+                    # Initialise identity matrix
+                    jacobian_matrix_ADI_check[row,row] = 1.0
+                end
+            end
+
+            v_size = vperp.n * vpa.n
+
+            # Add 'implicit' contribution
+            begin_z_region()
+            @loop_z iz begin
+                this_slice = collect((iz - 1)*v_size + 1:iz*v_size)
+                push!(this_slice, iz + pdf_size)
+                @views add_electron_dissipation_term_to_v_only_Jacobian!(
+                    jacobian_matrix_ADI_check[this_slice,this_slice], f[:,:,iz],
+                    num_diss_params, z, vperp, vpa, vpa_spectral, z_speed, dt, ir, iz)
+            end
+
+            # Add 'explicit' contribution
+            add_electron_dissipation_term_to_Jacobian!(
+                jacobian_matrix_ADI_check, f, num_diss_params, z, vperp, vpa,
+                vpa_spectral, z_speed, dt, ir, :explicit_z)
+
+            begin_serial_region()
+            @serial_region begin
+                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15)
+            end
+        end
+
         function residual_func!(residual, this_f, this_p)
             begin_z_region()
             @loop_z iz begin
@@ -1269,6 +1614,7 @@ function test_electron_krook_collisions(test_input; rtol=(2.0e1*epsilon)^2)
         jacobian_matrix = allocate_shared_float(total_size, total_size)
         begin_serial_region()
         @serial_region begin
+            jacobian_matrix .= 0.0
             for row ∈ 1:total_size
                 # Initialise identity matrix
                 jacobian_matrix[row,row] = 1.0
@@ -1279,6 +1625,79 @@ function test_electron_krook_collisions(test_input; rtol=(2.0e1*epsilon)^2)
             jacobian_matrix, f, dens, upar, ppar, vth, @view(moments.ion.upar[:,ir]),
             collisions, z, vperp, vpa, z_speed, dt, ir; ppar_offset=pdf_size)
 
+        @testset "ADI Jacobians - implicit z" begin
+            # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
+            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+            begin_serial_region()
+            @serial_region begin
+                jacobian_matrix_ADI_check .= 0.0
+                for row ∈ 1:total_size
+                    # Initialise identity matrix
+                    jacobian_matrix_ADI_check[row,row] = 1.0
+                end
+            end
+
+            v_size = vperp.n * vpa.n
+
+            # Add 'implicit' contribution
+            begin_vperp_vpa_region()
+            @loop_vperp_vpa ivperp ivpa begin
+                this_slice = (ivperp - 1)*vpa.n + ivpa:v_size:(z.n - 1)*v_size + (ivperp - 1)*vpa.n + ivpa
+                @views add_electron_krook_collisions_to_z_only_Jacobian!(
+                    jacobian_matrix_ADI_check[this_slice,this_slice], f[ivpa,ivperp,:],
+                    dens, upar, ppar, vth, moments.ion.upar[:,ir], collisions, z, vperp,
+                    vpa, z_speed, dt, ir, ivperp, ivpa)
+            end
+
+            # Add 'explicit' contribution
+            add_electron_krook_collisions_to_Jacobian!(
+                jacobian_matrix_ADI_check, f, dens, upar, ppar, vth,
+                @view(moments.ion.upar[:,ir]), collisions, z, vperp, vpa, z_speed, dt, ir,
+                :explicit_v; ppar_offset=pdf_size)
+
+            begin_serial_region()
+            @serial_region begin
+                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15)
+            end
+        end
+
+        @testset "ADI Jacobians - implicit v" begin
+            # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
+            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+            begin_serial_region()
+            @serial_region begin
+                jacobian_matrix_ADI_check .= 0.0
+                for row ∈ 1:total_size
+                    # Initialise identity matrix
+                    jacobian_matrix_ADI_check[row,row] = 1.0
+                end
+            end
+
+            v_size = vperp.n * vpa.n
+
+            # Add 'implicit' contribution
+            begin_z_region()
+            @loop_z iz begin
+                this_slice = collect((iz - 1)*v_size + 1:iz*v_size)
+                push!(this_slice, iz + pdf_size)
+                @views add_electron_krook_collisions_to_v_only_Jacobian!(
+                    jacobian_matrix_ADI_check[this_slice,this_slice], f[:,:,iz], dens[iz],
+                    upar[iz], ppar[iz], vth[iz], moments.ion.upar[iz,ir], collisions, z,
+                    vperp, vpa, z_speed, dt, ir, iz)
+            end
+
+            # Add 'explicit' contribution
+            add_electron_krook_collisions_to_Jacobian!(
+                jacobian_matrix_ADI_check, f, dens, upar, ppar, vth,
+                @view(moments.ion.upar[:,ir]), collisions, z, vperp, vpa, z_speed, dt, ir,
+                :explicit_z; ppar_offset=pdf_size)
+
+            begin_serial_region()
+            @serial_region begin
+                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15)
+            end
+        end
+
         function residual_func!(residual, this_f, this_p)
             begin_z_region()
             @loop_z iz begin
@@ -1520,6 +1939,7 @@ function test_external_electron_source(test_input; rtol=(3.0e1*epsilon)^2)
         jacobian_matrix = allocate_shared_float(total_size, total_size)
         begin_serial_region()
         @serial_region begin
+            jacobian_matrix .= 0.0
             for row ∈ 1:total_size
                 # Initialise identity matrix
                 jacobian_matrix[row,row] = 1.0
@@ -1530,6 +1950,79 @@ function test_external_electron_source(test_input; rtol=(3.0e1*epsilon)^2)
             jacobian_matrix, f, moments, me, z_speed, external_source_settings.electron,
             z, vperp, vpa, dt, ir; ppar_offset=pdf_size)
 
+        @testset "ADI Jacobians - implicit z" begin
+            # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
+            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+            begin_serial_region()
+            @serial_region begin
+                jacobian_matrix_ADI_check .= 0.0
+                for row ∈ 1:total_size
+                    # Initialise identity matrix
+                    jacobian_matrix_ADI_check[row,row] = 1.0
+                end
+            end
+
+            v_size = vperp.n * vpa.n
+
+            # Add 'implicit' contribution
+            begin_vperp_vpa_region()
+            @loop_vperp_vpa ivperp ivpa begin
+                this_slice = (ivperp - 1)*vpa.n + ivpa:v_size:(z.n - 1)*v_size + (ivperp - 1)*vpa.n + ivpa
+                @views add_total_external_electron_source_to_z_only_Jacobian!(
+                    jacobian_matrix_ADI_check[this_slice,this_slice], f[ivpa,ivperp,:],
+                    moments, me, z_speed, external_source_settings.electron, z, vperp,
+                    vpa, dt, ir, ivperp, ivpa)
+            end
+
+            # Add 'explicit' contribution
+            add_total_external_electron_source_to_Jacobian!(
+                jacobian_matrix_ADI_check, f, moments, me, z_speed,
+                external_source_settings.electron, z, vperp, vpa, dt, ir, :explicit_v;
+                ppar_offset=pdf_size)
+
+            begin_serial_region()
+            @serial_region begin
+                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15)
+            end
+        end
+
+        @testset "ADI Jacobians - implicit v" begin
+            # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
+            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+            begin_serial_region()
+            @serial_region begin
+                jacobian_matrix_ADI_check .= 0.0
+                for row ∈ 1:total_size
+                    # Initialise identity matrix
+                    jacobian_matrix_ADI_check[row,row] = 1.0
+                end
+            end
+
+            v_size = vperp.n * vpa.n
+
+            # Add 'implicit' contribution
+            begin_z_region()
+            @loop_z iz begin
+                this_slice = collect((iz - 1)*v_size + 1:iz*v_size)
+                push!(this_slice, iz + pdf_size)
+                @views add_total_external_electron_source_to_v_only_Jacobian!(
+                    jacobian_matrix_ADI_check[this_slice,this_slice], f[:,:,iz],
+                    moments, me, z_speed, external_source_settings.electron, z, vperp,
+                    vpa, dt, ir, iz)
+            end
+
+            # Add 'explicit' contribution
+            add_total_external_electron_source_to_Jacobian!(
+                jacobian_matrix_ADI_check, f, moments, me, z_speed,
+                external_source_settings.electron, z, vperp, vpa, dt, ir, :explicit_z;
+                ppar_offset=pdf_size)
+
+            begin_serial_region()
+            @serial_region begin
+                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15)
+            end
+        end
+
         function residual_func!(residual, this_f, this_p)
             begin_z_region()
             @loop_z iz begin
@@ -1772,9 +2265,22 @@ function test_electron_implicit_constraint_forcing(test_input; rtol=(1.5e0*epsil
         p_size = length(ppar)
         total_size = pdf_size + p_size
 
+        zeroth_moment = z.scratch_shared
+        first_moment = z.scratch_shared2
+        second_moment = z.scratch_shared3
+        begin_z_region()
+        vpa_grid = vpa.grid
+        vpa_wgts = vpa.wgts
+        @loop_z iz begin
+            @views zeroth_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_wgts)
+            @views first_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_grid, vpa_wgts)
+            @views second_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_grid, 2, vpa_wgts)
+        end
+
         jacobian_matrix = allocate_shared_float(total_size, total_size)
         begin_serial_region()
         @serial_region begin
+            jacobian_matrix .= 0.0
             for row ∈ 1:total_size
                 # Initialise identity matrix
                 jacobian_matrix[row,row] = 1.0
@@ -1782,8 +2288,81 @@ function test_electron_implicit_constraint_forcing(test_input; rtol=(1.5e0*epsil
         end
 
         add_electron_implicit_constraint_forcing_to_Jacobian!(
-            jacobian_matrix, f, z_speed, z, vperp, vpa,
-            t_params.electron.constraint_forcing_rate, dt, ir)
+            jacobian_matrix, f, zeroth_moment, first_moment, second_moment, z_speed, z,
+            vperp, vpa, t_params.electron.constraint_forcing_rate, dt, ir)
+
+        @testset "ADI Jacobians - implicit z" begin
+            # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
+            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+            begin_serial_region()
+            @serial_region begin
+                jacobian_matrix_ADI_check .= 0.0
+                for row ∈ 1:total_size
+                    # Initialise identity matrix
+                    jacobian_matrix_ADI_check[row,row] = 1.0
+                end
+            end
+
+            v_size = vperp.n * vpa.n
+
+            # Add 'implicit' contribution
+            begin_vperp_vpa_region()
+            @loop_vperp_vpa ivperp ivpa begin
+                this_slice = (ivperp - 1)*vpa.n + ivpa:v_size:(z.n - 1)*v_size + (ivperp - 1)*vpa.n + ivpa
+                @views add_electron_implicit_constraint_forcing_to_z_only_Jacobian!(
+                    jacobian_matrix_ADI_check[this_slice,this_slice], f[ivpa,ivperp,:],
+                    zeroth_moment, first_moment, second_moment, z_speed, z, vperp, vpa,
+                    t_params.electron.constraint_forcing_rate, dt, ir, ivperp, ivpa)
+            end
+
+            # Add 'explicit' contribution
+            add_electron_implicit_constraint_forcing_to_Jacobian!(
+                jacobian_matrix_ADI_check, f, zeroth_moment, first_moment, second_moment,
+                z_speed, z, vperp, vpa, t_params.electron.constraint_forcing_rate, dt, ir,
+                :explicit_v)
+
+            begin_serial_region()
+            @serial_region begin
+                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15)
+            end
+        end
+
+        @testset "ADI Jacobians - implicit v" begin
+            # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
+            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+            begin_serial_region()
+            @serial_region begin
+                jacobian_matrix_ADI_check .= 0.0
+                for row ∈ 1:total_size
+                    # Initialise identity matrix
+                    jacobian_matrix_ADI_check[row,row] = 1.0
+                end
+            end
+
+            v_size = vperp.n * vpa.n
+
+            # Add 'implicit' contribution
+            begin_z_region()
+            @loop_z iz begin
+                this_slice = collect((iz - 1)*v_size + 1:iz*v_size)
+                push!(this_slice, iz + pdf_size)
+                @views add_electron_implicit_constraint_forcing_to_v_only_Jacobian!(
+                    jacobian_matrix_ADI_check[this_slice,this_slice], f[:,:,iz],
+                    zeroth_moment[iz], first_moment[iz], second_moment[iz], z_speed, z,
+                    vperp, vpa, t_params.electron.constraint_forcing_rate, dt, ir, iz)
+            end
+
+            # Add 'explicit' contribution
+            add_electron_implicit_constraint_forcing_to_Jacobian!(
+                jacobian_matrix_ADI_check, f, zeroth_moment, first_moment, second_moment,
+                z_speed, z, vperp, vpa, t_params.electron.constraint_forcing_rate, dt, ir,
+                :explicit_z)
+
+            begin_serial_region()
+            @serial_region begin
+                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15)
+            end
+        end
 
         function residual_func!(residual, this_f, this_p)
             begin_z_region()
@@ -2029,6 +2608,7 @@ function test_electron_energy_equation(test_input; rtol=(6.0e2*epsilon)^2)
         jacobian_matrix = allocate_shared_float(total_size, total_size)
         begin_serial_region()
         @serial_region begin
+            jacobian_matrix .= 0.0
             for row ∈ 1:total_size
                 # Initialise identity matrix
                 jacobian_matrix[row,row] = 1.0
@@ -2040,6 +2620,82 @@ function test_electron_energy_equation(test_input; rtol=(6.0e2*epsilon)^2)
             dppar_dz, dthird_moment_dz, collisions, composition, z, vperp, vpa,
             z_spectral, num_diss_params, dt, ir; ppar_offset=pdf_size)
 
+        @testset "ADI Jacobians - implicit z" begin
+            # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
+            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+            begin_serial_region()
+            @serial_region begin
+                jacobian_matrix_ADI_check .= 0.0
+                for row ∈ 1:total_size
+                    # Initialise identity matrix
+                    jacobian_matrix_ADI_check[row,row] = 1.0
+                end
+            end
+
+            v_size = vperp.n * vpa.n
+
+            @serial_region begin
+                # Add 'implicit' contribution
+                this_slice = total_size - z.n + 1:total_size
+                @views add_electron_energy_equation_to_z_only_Jacobian!(
+                    jacobian_matrix_ADI_check[this_slice,this_slice], dens, upar, ppar,
+                    vth, third_moment, ddens_dz, dupar_dz, dppar_dz, dthird_moment_dz,
+                    collisions, composition, z, vperp, vpa, z_spectral, num_diss_params,
+                    dt, ir)
+            end
+
+            # Add 'explicit' contribution
+            add_electron_energy_equation_to_Jacobian!(
+                jacobian_matrix_ADI_check, f, dens, upar, ppar, vth, third_moment,
+                ddens_dz, dupar_dz, dppar_dz, dthird_moment_dz, collisions, composition,
+                z, vperp, vpa, z_spectral, num_diss_params, dt, ir, :explicit_v;
+                ppar_offset=pdf_size)
+
+            begin_serial_region()
+            @serial_region begin
+                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15)
+            end
+        end
+
+        @testset "ADI Jacobians - implicit v" begin
+            # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
+            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+            begin_serial_region()
+            @serial_region begin
+                jacobian_matrix_ADI_check .= 0.0
+                for row ∈ 1:total_size
+                    # Initialise identity matrix
+                    jacobian_matrix_ADI_check[row,row] = 1.0
+                end
+            end
+
+            v_size = vperp.n * vpa.n
+
+            # Add 'implicit' contribution
+            begin_z_region()
+            @loop_z iz begin
+                this_slice = collect((iz - 1)*v_size + 1:iz*v_size)
+                push!(this_slice, iz + pdf_size)
+                @views add_electron_energy_equation_to_v_only_Jacobian!(
+                    jacobian_matrix_ADI_check[this_slice,this_slice], f[:,:,iz], dens[iz],
+                    upar[iz], ppar[iz], vth[iz], third_moment[iz], ddens_dz[iz],
+                    dupar_dz[iz], dppar_dz[iz], dthird_moment_dz[iz], collisions,
+                    composition, z, vperp, vpa, z_spectral, num_diss_params, dt, ir, iz)
+            end
+
+            # Add 'explicit' contribution
+            add_electron_energy_equation_to_Jacobian!(
+                jacobian_matrix_ADI_check, f, dens, upar, ppar, vth, third_moment,
+                ddens_dz, dupar_dz, dppar_dz, dthird_moment_dz, collisions, composition,
+                z, vperp, vpa, z_spectral, num_diss_params, dt, ir, :explicit_z;
+                ppar_offset=pdf_size)
+
+            begin_serial_region()
+            @serial_region begin
+                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15)
+            end
+        end
+
         function residual_func!(residual, this_f, this_p)
             begin_z_region()
             @loop_z iz begin
@@ -2219,6 +2875,7 @@ function test_ion_dt_forcing_of_electron_ppar(test_input; rtol=(1.5e1*epsilon)^2
         jacobian_matrix = allocate_shared_float(total_size, total_size)
         begin_serial_region()
         @serial_region begin
+            jacobian_matrix .= 0.0
             for row ∈ 1:total_size
                 # Initialise identity matrix
                 jacobian_matrix[row,row] = 1.0
@@ -2228,6 +2885,73 @@ function test_ion_dt_forcing_of_electron_ppar(test_input; rtol=(1.5e1*epsilon)^2
         add_ion_dt_forcing_of_electron_ppar_to_Jacobian!(
             jacobian_matrix, z, dt, ion_dt, ir; ppar_offset=pdf_size)
 
+        @testset "ADI Jacobians - implicit z" begin
+            # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
+            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+            begin_serial_region()
+            @serial_region begin
+                jacobian_matrix_ADI_check .= 0.0
+                for row ∈ 1:total_size
+                    # Initialise identity matrix
+                    jacobian_matrix_ADI_check[row,row] = 1.0
+                end
+            end
+
+            v_size = vperp.n * vpa.n
+
+            @serial_region begin
+                # Add 'implicit' contribution
+                this_slice = total_size - z.n + 1:total_size
+                @views add_ion_dt_forcing_of_electron_ppar_to_z_only_Jacobian!(
+                    jacobian_matrix_ADI_check[this_slice,this_slice], z, dt, ion_dt, ir)
+            end
+
+            # Add 'explicit' contribution
+            add_ion_dt_forcing_of_electron_ppar_to_Jacobian!(
+                jacobian_matrix_ADI_check, z, dt, ion_dt, ir, :explicit_v;
+                ppar_offset=pdf_size)
+
+            begin_serial_region()
+            @serial_region begin
+                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15)
+            end
+        end
+
+        @testset "ADI Jacobians - implicit v" begin
+            # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
+            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+            begin_serial_region()
+            @serial_region begin
+                jacobian_matrix_ADI_check .= 0.0
+                for row ∈ 1:total_size
+                    # Initialise identity matrix
+                    jacobian_matrix_ADI_check[row,row] = 1.0
+                end
+            end
+
+            v_size = vperp.n * vpa.n
+
+            # Add 'implicit' contribution
+            begin_z_region()
+            @loop_z iz begin
+                this_slice = collect((iz - 1)*v_size + 1:iz*v_size)
+                push!(this_slice, iz + pdf_size)
+                @views add_ion_dt_forcing_of_electron_ppar_to_v_only_Jacobian!(
+                    jacobian_matrix_ADI_check[this_slice,this_slice], z, dt, ion_dt, ir,
+                    iz)
+            end
+
+            # Add 'explicit' contribution
+            add_ion_dt_forcing_of_electron_ppar_to_Jacobian!(
+                jacobian_matrix_ADI_check, z, dt, ion_dt, ir, :explicit_z;
+                ppar_offset=pdf_size)
+
+            begin_serial_region()
+            @serial_region begin
+                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15)
+            end
+        end
+
         function residual_func!(residual, this_f, this_p)
             begin_z_region()
             @loop_z iz begin
@@ -2411,6 +3135,7 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2)
         jacobian_matrix = allocate_shared_float(total_size, total_size)
         begin_serial_region()
         @serial_region begin
+            jacobian_matrix .= 0.0
             for row ∈ 1:total_size
                 # Initialise identity matrix
                 jacobian_matrix[row,row] = 1.0
@@ -2423,6 +3148,170 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2)
             external_source_settings, num_diss_params, t_params.electron, ion_dt, ir,
             true)
 
+        # Test 'ADI Jacobians' before other tests, because residual_func() may modify some
+        # variables (vth, etc.).
+
+        # Reconstruct w_∥^3 moment of g_e from already-calculated qpar
+        buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1]
+        buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1]
+        buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1]
+        buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1]
+        third_moment = scratch_dummy.buffer_z_1
+        dthird_moment_dz = scratch_dummy.buffer_z_2
+        begin_z_region()
+        @loop_z iz begin
+            third_moment[iz] = 0.5 * qpar[iz] / ppar[iz] / vth[iz]
+        end
+        derivative_z!(dthird_moment_dz, third_moment, buffer_1, buffer_2, buffer_3,
+                      buffer_4, z_spectral, z)
+
+        z_speed = @view z_advect[1].speed[:,:,:,ir]
+
+        dpdf_dz = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir]
+        begin_vperp_vpa_region()
+        update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir)
+        @loop_vperp_vpa ivperp ivpa begin
+            @views z_advect[1].adv_fac[:,ivpa,ivperp,ir] = -z_speed[:,ivpa,ivperp]
+        end
+        #calculate the upwind derivative
+        @views derivative_z_pdf_vpavperpz!(dpdf_dz, f, z_advect[1].adv_fac[:,:,:,ir],
+                                           scratch_dummy.buffer_vpavperpr_1[:,:,ir],
+                                           scratch_dummy.buffer_vpavperpr_2[:,:,ir],
+                                           scratch_dummy.buffer_vpavperpr_3[:,:,ir],
+                                           scratch_dummy.buffer_vpavperpr_4[:,:,ir],
+                                           scratch_dummy.buffer_vpavperpr_5[:,:,ir],
+                                           scratch_dummy.buffer_vpavperpr_6[:,:,ir],
+                                           z_spectral, z)
+
+        dpdf_dvpa = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir]
+        begin_z_vperp_region()
+        update_electron_speed_vpa!(vpa_advect[1], dens, upar, ppar, moments, vpa.grid,
+                                   external_source_settings.electron, ir)
+        @loop_z_vperp iz ivperp begin
+            @views @. vpa_advect[1].adv_fac[:,ivperp,iz,ir] = -vpa_advect[1].speed[:,ivperp,iz,ir]
+        end
+        #calculate the upwind derivative of the electron pdf w.r.t. wpa
+        @loop_z_vperp iz ivperp begin
+            @views derivative!(dpdf_dvpa[:,ivperp,iz], f[:,ivperp,iz], vpa,
+                               vpa_advect[1].adv_fac[:,ivperp,iz,ir], vpa_spectral)
+        end
+
+        zeroth_moment = z.scratch_shared
+        first_moment = z.scratch_shared2
+        second_moment = z.scratch_shared3
+        begin_z_region()
+        vpa_grid = vpa.grid
+        vpa_wgts = vpa.wgts
+        @loop_z iz begin
+            @views zeroth_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_wgts)
+            @views first_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_grid, vpa_wgts)
+            @views second_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_grid, 2, vpa_wgts)
+        end
+
+        @testset "ADI Jacobians - implicit z" begin
+            # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
+            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+
+            begin_serial_region()
+            @serial_region begin
+                # Need to explicitly initialise because
+                # fill_electron_kinetic_equation_z_only_Jacobian_f!() and
+                # fill_electron_kinetic_equation_z_only_Jacobian_ppar!()
+                # only fill the diagonal-in-velocity-indices elements, so when applied to
+                # a full matrix they would not initialise every element.
+                jacobian_matrix_ADI_check .= 0.0
+            end
+
+            v_size = vperp.n * vpa.n
+
+            # Add 'implicit' contribution
+            begin_vperp_vpa_region()
+            @loop_vperp_vpa ivperp ivpa begin
+                this_slice = (ivperp - 1)*vpa.n + ivpa:v_size:(z.n - 1)*v_size + (ivperp - 1)*vpa.n + ivpa
+                @views fill_electron_kinetic_equation_z_only_Jacobian_f!(
+                    jacobian_matrix_ADI_check[this_slice,this_slice], f[ivpa,ivperp,:],
+                    ppar, dpdf_dz[ivpa,ivperp,:], dpdf_dvpa[ivpa,ivperp,:], z_speed,
+                    moments, zeroth_moment, first_moment, second_moment, third_moment,
+                    dthird_moment_dz, collisions, composition, z, vperp, vpa, z_spectral,
+                    vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy,
+                    external_source_settings, num_diss_params, t_params.electron, ion_dt,
+                    ir, ivperp, ivpa, true)
+            end
+
+            @serial_region begin
+                # Add 'implicit' contribution
+                this_slice = (pdf_size + 1):total_size
+                @views fill_electron_kinetic_equation_z_only_Jacobian_ppar!(
+                    jacobian_matrix_ADI_check[this_slice,this_slice], ppar, moments,
+                    zeroth_moment, first_moment, second_moment, third_moment,
+                    dthird_moment_dz, collisions, composition, z, vperp, vpa, z_spectral,
+                    vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy,
+                    external_source_settings, num_diss_params, t_params.electron, ion_dt,
+                    ir, true)
+            end
+
+            # Add 'explicit' contribution
+            jacobian_matrix_ADI_check_explicit = allocate_shared_float(total_size, total_size)
+            fill_electron_kinetic_equation_Jacobian!(
+                jacobian_matrix_ADI_check_explicit, f, ppar, moments, collisions,
+                composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral,
+                z_advect, vpa_advect, scratch_dummy, external_source_settings,
+                num_diss_params, t_params.electron, ion_dt, ir, true, :explicit_v)
+            begin_serial_region()
+            @serial_region begin
+                jacobian_matrix_ADI_check .+= jacobian_matrix_ADI_check_explicit
+
+                # The settings for this test are a bit strange, due to trying to get the
+                # finite-difference approximation to the Jacobian to agree with the
+                # Jacobian matrix functions without being too messed up by floating-point
+                # rounding errors. The result is that some entries in the Jacobian matrix
+                # here are O(1.0e5), so it is important to use `rtol` here.
+                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=1.0e-15, atol=1.0e-15)
+            end
+        end
+
+        @testset "ADI Jacobians - implicit v" begin
+            # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
+            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+
+            v_size = vperp.n * vpa.n
+
+            # Add 'implicit' contribution
+            begin_z_region()
+            @loop_z iz begin
+                this_slice = collect((iz - 1)*v_size + 1:iz*v_size)
+                push!(this_slice, iz + pdf_size)
+                @views fill_electron_kinetic_equation_v_only_Jacobian!(
+                    jacobian_matrix_ADI_check[this_slice,this_slice], f[:,:,iz], ppar[iz],
+                    dpdf_dz[:,:,iz], dpdf_dvpa[:,:,iz], z_speed, moments,
+                    zeroth_moment[iz], first_moment[iz], second_moment[iz],
+                    third_moment[iz], dthird_moment_dz[iz], collisions, composition, z,
+                    vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect,
+                    vpa_advect, scratch_dummy, external_source_settings, num_diss_params,
+                    t_params.electron, ion_dt, ir, iz, true)
+            end
+
+            # Add 'explicit' contribution
+            jacobian_matrix_ADI_check_explicit = allocate_shared_float(total_size, total_size)
+            fill_electron_kinetic_equation_Jacobian!(
+                jacobian_matrix_ADI_check_explicit, f, ppar, moments, collisions,
+                composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral,
+                z_advect, vpa_advect, scratch_dummy, external_source_settings,
+                num_diss_params, t_params.electron, ion_dt, ir, true, :explicit_z)
+
+            begin_serial_region()
+            @serial_region begin
+                jacobian_matrix_ADI_check .+= jacobian_matrix_ADI_check_explicit
+
+                # The settings for this test are a bit strange, due to trying to get the
+                # finite-difference approximation to the Jacobian to agree with the
+                # Jacobian matrix functions without being too messed up by floating-point
+                # rounding errors. The result is that some entries in the Jacobian matrix
+                # here are O(1.0e5), so it is important to use `rtol` here.
+                @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=1.0e-13, atol=1.0e-13)
+            end
+        end
+
         function residual_func!(residual_f, residual_p, this_f, this_p)
             begin_z_region()
             @loop_z iz begin

From c4fa0218e28cb1ef3437c9e4caf6f967e4abcbd6 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Mon, 28 Oct 2024 13:21:05 +0000
Subject: [PATCH 11/43] Reduce memory usage in Jacobian matrix tests

Avoid using more than two Jacobian-sized buffer arrays at any time in
the Jacobian matrix tests. Using more than this would cause the Github
Actions CI servers to run out of shared memory, causing an error.
---
 moment_kinetics/test/jacobian_matrix_tests.jl | 153 +++++++++++-------
 1 file changed, 98 insertions(+), 55 deletions(-)

diff --git a/moment_kinetics/test/jacobian_matrix_tests.jl b/moment_kinetics/test/jacobian_matrix_tests.jl
index e04a60d33..9bae8296c 100644
--- a/moment_kinetics/test/jacobian_matrix_tests.jl
+++ b/moment_kinetics/test/jacobian_matrix_tests.jl
@@ -332,9 +332,10 @@ function test_electron_z_advection(test_input; rtol=(2.5e2*epsilon)^2)
         # Test 'ADI Jacobians' before other tests, because residual_func() may modify some
         # variables (vth, etc.).
 
+        jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+
         @testset "ADI Jacobians - implicit z" begin
             # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
-            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
             begin_serial_region()
             @serial_region begin
                 jacobian_matrix_ADI_check .= 0.0
@@ -371,7 +372,6 @@ function test_electron_z_advection(test_input; rtol=(2.5e2*epsilon)^2)
 
         @testset "ADI Jacobians - implicit v" begin
             # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
-            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
             begin_serial_region()
             @serial_region begin
                 jacobian_matrix_ADI_check .= 0.0
@@ -675,9 +675,10 @@ function test_electron_vpa_advection(test_input; rtol=(3.0e2*epsilon)^2)
         # Test 'ADI Jacobians' before other tests, because residual_func() may modify some
         # variables (vth, etc.).
 
+        jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+
         @testset "ADI Jacobians - implicit z" begin
             # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
-            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
             begin_serial_region()
             @serial_region begin
                 jacobian_matrix_ADI_check .= 0.0
@@ -704,7 +705,6 @@ function test_electron_vpa_advection(test_input; rtol=(3.0e2*epsilon)^2)
 
         @testset "ADI Jacobians - implicit v" begin
             # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
-            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
             begin_serial_region()
             @serial_region begin
                 jacobian_matrix_ADI_check .= 0.0
@@ -1015,9 +1015,10 @@ function test_contribution_from_electron_pdf_term(test_input; rtol=(4.0e2*epsilo
         # Test 'ADI Jacobians' before other tests, because residual_func() may modify some
         # variables (vth, etc.).
 
+        jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+
         @testset "ADI Jacobians - implicit z" begin
             # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
-            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
             begin_serial_region()
             @serial_region begin
                 jacobian_matrix_ADI_check .= 0.0
@@ -1055,7 +1056,6 @@ function test_contribution_from_electron_pdf_term(test_input; rtol=(4.0e2*epsilo
 
         @testset "ADI Jacobians - implicit v" begin
             # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
-            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
             begin_serial_region()
             @serial_region begin
                 jacobian_matrix_ADI_check .= 0.0
@@ -1327,9 +1327,13 @@ function test_electron_dissipation_term(test_input; rtol=(3.0e0*epsilon)^2)
             jacobian_matrix, f, num_diss_params, z, vperp, vpa, vpa_spectral, z_speed, dt,
             ir)
 
+        # Test 'ADI Jacobians' before other tests, because residual_func() may modify some
+        # variables (vth, etc.).
+
+        jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+
         @testset "ADI Jacobians - implicit z" begin
             # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
-            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
             begin_serial_region()
             @serial_region begin
                 jacobian_matrix_ADI_check .= 0.0
@@ -1354,7 +1358,6 @@ function test_electron_dissipation_term(test_input; rtol=(3.0e0*epsilon)^2)
 
         @testset "ADI Jacobians - implicit v" begin
             # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
-            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
             begin_serial_region()
             @serial_region begin
                 jacobian_matrix_ADI_check .= 0.0
@@ -1625,9 +1628,13 @@ function test_electron_krook_collisions(test_input; rtol=(2.0e1*epsilon)^2)
             jacobian_matrix, f, dens, upar, ppar, vth, @view(moments.ion.upar[:,ir]),
             collisions, z, vperp, vpa, z_speed, dt, ir; ppar_offset=pdf_size)
 
+        # Test 'ADI Jacobians' before other tests, because residual_func() may modify some
+        # variables (vth, etc.).
+
+        jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+
         @testset "ADI Jacobians - implicit z" begin
             # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
-            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
             begin_serial_region()
             @serial_region begin
                 jacobian_matrix_ADI_check .= 0.0
@@ -1663,7 +1670,6 @@ function test_electron_krook_collisions(test_input; rtol=(2.0e1*epsilon)^2)
 
         @testset "ADI Jacobians - implicit v" begin
             # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
-            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
             begin_serial_region()
             @serial_region begin
                 jacobian_matrix_ADI_check .= 0.0
@@ -1950,9 +1956,13 @@ function test_external_electron_source(test_input; rtol=(3.0e1*epsilon)^2)
             jacobian_matrix, f, moments, me, z_speed, external_source_settings.electron,
             z, vperp, vpa, dt, ir; ppar_offset=pdf_size)
 
+        # Test 'ADI Jacobians' before other tests, because residual_func() may modify some
+        # variables (vth, etc.).
+
+        jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+
         @testset "ADI Jacobians - implicit z" begin
             # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
-            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
             begin_serial_region()
             @serial_region begin
                 jacobian_matrix_ADI_check .= 0.0
@@ -1988,7 +1998,6 @@ function test_external_electron_source(test_input; rtol=(3.0e1*epsilon)^2)
 
         @testset "ADI Jacobians - implicit v" begin
             # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
-            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
             begin_serial_region()
             @serial_region begin
                 jacobian_matrix_ADI_check .= 0.0
@@ -2291,9 +2300,13 @@ function test_electron_implicit_constraint_forcing(test_input; rtol=(1.5e0*epsil
             jacobian_matrix, f, zeroth_moment, first_moment, second_moment, z_speed, z,
             vperp, vpa, t_params.electron.constraint_forcing_rate, dt, ir)
 
+        # Test 'ADI Jacobians' before other tests, because residual_func() may modify some
+        # variables (vth, etc.).
+
+        jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+
         @testset "ADI Jacobians - implicit z" begin
             # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
-            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
             begin_serial_region()
             @serial_region begin
                 jacobian_matrix_ADI_check .= 0.0
@@ -2329,7 +2342,6 @@ function test_electron_implicit_constraint_forcing(test_input; rtol=(1.5e0*epsil
 
         @testset "ADI Jacobians - implicit v" begin
             # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
-            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
             begin_serial_region()
             @serial_region begin
                 jacobian_matrix_ADI_check .= 0.0
@@ -2620,9 +2632,13 @@ function test_electron_energy_equation(test_input; rtol=(6.0e2*epsilon)^2)
             dppar_dz, dthird_moment_dz, collisions, composition, z, vperp, vpa,
             z_spectral, num_diss_params, dt, ir; ppar_offset=pdf_size)
 
+        # Test 'ADI Jacobians' before other tests, because residual_func() may modify some
+        # variables (vth, etc.).
+
+        jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+
         @testset "ADI Jacobians - implicit z" begin
             # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
-            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
             begin_serial_region()
             @serial_region begin
                 jacobian_matrix_ADI_check .= 0.0
@@ -2659,7 +2675,6 @@ function test_electron_energy_equation(test_input; rtol=(6.0e2*epsilon)^2)
 
         @testset "ADI Jacobians - implicit v" begin
             # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
-            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
             begin_serial_region()
             @serial_region begin
                 jacobian_matrix_ADI_check .= 0.0
@@ -2885,9 +2900,13 @@ function test_ion_dt_forcing_of_electron_ppar(test_input; rtol=(1.5e1*epsilon)^2
         add_ion_dt_forcing_of_electron_ppar_to_Jacobian!(
             jacobian_matrix, z, dt, ion_dt, ir; ppar_offset=pdf_size)
 
+        # Test 'ADI Jacobians' before other tests, because residual_func() may modify some
+        # variables (vth, etc.).
+
+        jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+
         @testset "ADI Jacobians - implicit z" begin
             # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
-            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
             begin_serial_region()
             @serial_region begin
                 jacobian_matrix_ADI_check .= 0.0
@@ -2919,7 +2938,6 @@ function test_ion_dt_forcing_of_electron_ppar(test_input; rtol=(1.5e1*epsilon)^2
 
         @testset "ADI Jacobians - implicit v" begin
             # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
-            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
             begin_serial_region()
             @serial_region begin
                 jacobian_matrix_ADI_check .= 0.0
@@ -3133,20 +3151,15 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2)
         total_size = pdf_size + p_size
 
         jacobian_matrix = allocate_shared_float(total_size, total_size)
-        begin_serial_region()
-        @serial_region begin
-            jacobian_matrix .= 0.0
-            for row ∈ 1:total_size
-                # Initialise identity matrix
-                jacobian_matrix[row,row] = 1.0
-            end
-        end
 
-        fill_electron_kinetic_equation_Jacobian!(
-            jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa,
-            z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy,
-            external_source_settings, num_diss_params, t_params.electron, ion_dt, ir,
-            true)
+        # Calculate this later, so that we can use `jacobian_matrix` as a temporary
+        # buffer, to avoid allocating too much shared memory for the Github Actions CI
+        # servers.
+        #fill_electron_kinetic_equation_Jacobian!(
+        #    jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa,
+        #    z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy,
+        #    external_source_settings, num_diss_params, t_params.electron, ion_dt, ir,
+        #    true)
 
         # Test 'ADI Jacobians' before other tests, because residual_func() may modify some
         # variables (vth, etc.).
@@ -3208,19 +3221,22 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2)
             @views second_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_grid, 2, vpa_wgts)
         end
 
+        # Test 'ADI Jacobians' before other tests, because residual_func() may modify some
+        # variables (vth, etc.).
+
+        jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
+        begin_serial_region()
+        @serial_region begin
+            # Need to explicitly initialise because
+            # fill_electron_kinetic_equation_z_only_Jacobian_f!() and
+            # fill_electron_kinetic_equation_z_only_Jacobian_ppar!()
+            # only fill the diagonal-in-velocity-indices elements, so when applied to
+            # a full matrix they would not initialise every element.
+            jacobian_matrix_ADI_check .= 0.0
+        end
+
         @testset "ADI Jacobians - implicit z" begin
             # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
-            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
-
-            begin_serial_region()
-            @serial_region begin
-                # Need to explicitly initialise because
-                # fill_electron_kinetic_equation_z_only_Jacobian_f!() and
-                # fill_electron_kinetic_equation_z_only_Jacobian_ppar!()
-                # only fill the diagonal-in-velocity-indices elements, so when applied to
-                # a full matrix they would not initialise every element.
-                jacobian_matrix_ADI_check .= 0.0
-            end
 
             v_size = vperp.n * vpa.n
 
@@ -3251,16 +3267,25 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2)
             end
 
             # Add 'explicit' contribution
-            jacobian_matrix_ADI_check_explicit = allocate_shared_float(total_size, total_size)
+            # Use jacobian_matrix as a temporary buffer here.
             fill_electron_kinetic_equation_Jacobian!(
-                jacobian_matrix_ADI_check_explicit, f, ppar, moments, collisions,
-                composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral,
-                z_advect, vpa_advect, scratch_dummy, external_source_settings,
-                num_diss_params, t_params.electron, ion_dt, ir, true, :explicit_v)
+                jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa,
+                z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect,
+                scratch_dummy, external_source_settings, num_diss_params,
+                t_params.electron, ion_dt, ir, true, :explicit_v)
             begin_serial_region()
             @serial_region begin
-                jacobian_matrix_ADI_check .+= jacobian_matrix_ADI_check_explicit
+                jacobian_matrix_ADI_check .+= jacobian_matrix
+            end
+
+            fill_electron_kinetic_equation_Jacobian!(
+                jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa,
+                z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy,
+                external_source_settings, num_diss_params, t_params.electron, ion_dt, ir,
+                true)
 
+            begin_serial_region()
+            @serial_region begin
                 # The settings for this test are a bit strange, due to trying to get the
                 # finite-difference approximation to the Jacobian to agree with the
                 # Jacobian matrix functions without being too messed up by floating-point
@@ -3270,9 +3295,18 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2)
             end
         end
 
+        begin_serial_region()
+        @serial_region begin
+            # Need to explicitly initialise because
+            # fill_electron_kinetic_equation_z_only_Jacobian_f!() and
+            # fill_electron_kinetic_equation_z_only_Jacobian_ppar!()
+            # only fill the diagonal-in-velocity-indices elements, so when applied to
+            # a full matrix they would not initialise every element.
+            jacobian_matrix_ADI_check .= 0.0
+        end
+
         @testset "ADI Jacobians - implicit v" begin
             # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian.
-            jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size)
 
             v_size = vperp.n * vpa.n
 
@@ -3292,17 +3326,26 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2)
             end
 
             # Add 'explicit' contribution
-            jacobian_matrix_ADI_check_explicit = allocate_shared_float(total_size, total_size)
+            # Use jacobian_matrix as a temporary buffer here.
             fill_electron_kinetic_equation_Jacobian!(
-                jacobian_matrix_ADI_check_explicit, f, ppar, moments, collisions,
-                composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral,
-                z_advect, vpa_advect, scratch_dummy, external_source_settings,
-                num_diss_params, t_params.electron, ion_dt, ir, true, :explicit_z)
+                jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa,
+                z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect,
+                scratch_dummy, external_source_settings, num_diss_params,
+                t_params.electron, ion_dt, ir, true, :explicit_z)
 
             begin_serial_region()
             @serial_region begin
-                jacobian_matrix_ADI_check .+= jacobian_matrix_ADI_check_explicit
+                jacobian_matrix_ADI_check .+= jacobian_matrix
+            end
 
+            fill_electron_kinetic_equation_Jacobian!(
+                jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa,
+                z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy,
+                external_source_settings, num_diss_params, t_params.electron, ion_dt, ir,
+                true)
+
+            begin_serial_region()
+            @serial_region begin
                 # The settings for this test are a bit strange, due to trying to get the
                 # finite-difference approximation to the Jacobian to agree with the
                 # Jacobian matrix functions without being too messed up by floating-point

From 12cc4d465f4fcc07786b73733ff249ef454d1c49 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Sat, 26 Oct 2024 17:34:48 +0100
Subject: [PATCH 12/43] ADI preconditioner

Add a preconditioner for kinetic electrons using a variation on the
'alternating direction implicit' (ADI) method. Terms that couple
velocity space are first solved implicitly (over vpa only for the 1V
case), with terms with z-coupling treated 'explicitly' by being
subtracted from the right-hand-side; then terms that couple in z are
solved implicitly, with terms with v-coupling treated 'explicitly'. The
two steps may be iterated more times if this is needed.
---
 .../src/makie_post_processing.jl              |   2 +
 .../src/electron_kinetic_equation.jl          | 374 +++++++++++++++++-
 moment_kinetics/src/file_io.jl                |  12 +-
 moment_kinetics/src/initial_conditions.jl     |   2 +
 moment_kinetics/src/load_data.jl              |   7 +
 moment_kinetics/src/nonlinear_solvers.jl      |  78 +++-
 moment_kinetics/src/time_advance.jl           |   3 +-
 7 files changed, 473 insertions(+), 5 deletions(-)

diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl
index 0652d06c6..78ea8b827 100644
--- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl
+++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl
@@ -8079,8 +8079,10 @@ function timestep_diagnostics(run_info, run_info_dfns; plot_prefix=nothing, it=n
                     for p ∈ nl_prefixes
                         nonlinear_iterations = get_variable(ri, "$(p)_nonlinear_iterations_per_solve")
                         linear_iterations = get_variable(ri, "$(p)_linear_iterations_per_nonlinear_iteration")
+                        precon_iterations = get_variable(ri, "$(p)_precon_iterations_per_linear_iteration")
                         plot_1d(time, nonlinear_iterations, label=prefix * " " * p * " NL per solve", ax=ax)
                         plot_1d(time, linear_iterations, label=prefix * " " * p * " L per NL", ax=ax)
+                        plot_1d(time, precon_iterations, label=prefix * " " * p * " P per L", ax=ax)
                     end
                 end
             end
diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index 54021ced6..9b5fe83aa 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -932,7 +932,7 @@ global_rank[] == 0 && println("recalculating precon")
                 end
 
 
-                function lu_precon!(x)
+                @timeit_debug global_timer lu_precon!(x) = begin
                     precon_ppar, precon_f = x
 
                     precon_lu, _, this_input_buffer, this_output_buffer =
@@ -1006,6 +1006,378 @@ global_rank[] == 0 && println("recalculating precon")
 
                 left_preconditioner = identity
                 right_preconditioner = lu_precon!
+            elseif nl_solver_params.preconditioner_type === Val(:electron_adi)
+
+                if t_params.dt[] > 1.5 * nl_solver_params.precon_dt[] ||
+                        t_params.dt[] < 2.0/3.0 * nl_solver_params.precon_dt[]
+
+                    # dt has changed significantly, so update the preconditioner
+                    nl_solver_params.solves_since_precon_update[] = nl_solver_params.preconditioner_update_interval
+                end
+
+                if nl_solver_params.solves_since_precon_update[] ≥ nl_solver_params.preconditioner_update_interval
+global_rank[] == 0 && println("recalculating precon")
+                    nl_solver_params.solves_since_precon_update[] = 0
+                    nl_solver_params.precon_dt[] = t_params.dt[]
+
+                    adi_info = nl_solver_params.preconditioners[ir]
+
+                    dens = @view moments.electron.dens[:,ir]
+                    upar = @view moments.electron.upar[:,ir]
+                    vth = @view moments.electron.vth[:,ir]
+                    qpar = @view moments.electron.qpar[:,ir]
+
+                    # Reconstruct w_∥^3 moment of g_e from already-calculated qpar
+                    buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1]
+                    buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1]
+                    buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1]
+                    buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1]
+                    third_moment = scratch_dummy.buffer_z_1
+                    dthird_moment_dz = scratch_dummy.buffer_z_2
+                    begin_z_region()
+                    @loop_z iz begin
+                        third_moment[iz] = 0.5 * qpar[iz] / electron_ppar_new[iz] / vth[iz]
+                    end
+                    derivative_z!(dthird_moment_dz, third_moment, buffer_1, buffer_2,
+                                  buffer_3, buffer_4, z_spectral, z)
+
+                    z_speed = @view z_advect[1].speed[:,:,:,ir]
+
+                    dpdf_dz = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir]
+                    begin_vperp_vpa_region()
+                    update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir)
+                    @loop_vperp_vpa ivperp ivpa begin
+                        @views z_advect[1].adv_fac[:,ivpa,ivperp,ir] = -z_speed[:,ivpa,ivperp]
+                    end
+                    #calculate the upwind derivative
+                    @views derivative_z_pdf_vpavperpz!(dpdf_dz, f_electron_new,
+                                                       z_advect[1].adv_fac[:,:,:,ir],
+                                                       scratch_dummy.buffer_vpavperpr_1[:,:,ir],
+                                                       scratch_dummy.buffer_vpavperpr_2[:,:,ir],
+                                                       scratch_dummy.buffer_vpavperpr_3[:,:,ir],
+                                                       scratch_dummy.buffer_vpavperpr_4[:,:,ir],
+                                                       scratch_dummy.buffer_vpavperpr_5[:,:,ir],
+                                                       scratch_dummy.buffer_vpavperpr_6[:,:,ir],
+                                                       z_spectral, z)
+
+                    dpdf_dvpa = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir]
+                    begin_z_vperp_region()
+                    update_electron_speed_vpa!(vpa_advect[1], dens, upar,
+                                               electron_ppar_new, moments, vpa.grid,
+                                               external_source_settings.electron, ir)
+                    @loop_z_vperp iz ivperp begin
+                        @views @. vpa_advect[1].adv_fac[:,ivperp,iz,ir] = -vpa_advect[1].speed[:,ivperp,iz,ir]
+                    end
+                    #calculate the upwind derivative of the electron pdf w.r.t. wpa
+                    @loop_z_vperp iz ivperp begin
+                        @views derivative!(dpdf_dvpa[:,ivperp,iz], f_electron_new[:,ivperp,iz], vpa,
+                                           vpa_advect[1].adv_fac[:,ivperp,iz,ir], vpa_spectral)
+                    end
+
+                    zeroth_moment = z.scratch_shared
+                    first_moment = z.scratch_shared2
+                    second_moment = z.scratch_shared3
+                    begin_z_region()
+                    vpa_grid = vpa.grid
+                    vpa_wgts = vpa.wgts
+                    @loop_z iz begin
+                        @views zeroth_moment[iz] = integrate_over_vspace(f_electron_new[:,1,iz], vpa_wgts)
+                        @views first_moment[iz] = integrate_over_vspace(f_electron_new[:,1,iz], vpa_grid, vpa_wgts)
+                        @views second_moment[iz] = integrate_over_vspace(f_electron_new[:,1,iz], vpa_grid, 2, vpa_wgts)
+                    end
+
+                    v_size = vperp.n * vpa.n
+
+                    # Do setup for 'v solves'
+                    v_solve_counter = 0
+                    A = adi_info.v_solve_matrix_buffer
+                    explicit_J = adi_info.J_buffer
+                    # Get sparse matrix for explicit, right-hand-side part of the
+                    # solve.
+                    fill_electron_kinetic_equation_Jacobian!(
+                        explicit_J, f_electron_new, electron_ppar_new, moments,
+                        collisions, composition, z, vperp, vpa, z_spectral,
+                        vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy,
+                        external_source_settings, num_diss_params, t_params, ion_dt, ir,
+                        evolve_ppar, :explicit_z)
+                    begin_z_region()
+                    @loop_z iz begin
+                        v_solve_counter += 1
+                        # Get LU-factorized matrix for implicit part of the solve
+                        @views fill_electron_kinetic_equation_v_only_Jacobian!(
+                            A, f_electron_new[:,:,iz], electron_ppar_new[iz],
+                            dpdf_dz[:,:,iz], dpdf_dvpa[:,:,iz], z_speed, moments,
+                            zeroth_moment[iz], first_moment[iz], second_moment[iz],
+                            third_moment[iz], dthird_moment_dz[iz], collisions,
+                            composition, z, vperp, vpa, z_spectral, vperp_spectral,
+                            vpa_spectral, z_advect, vpa_advect, scratch_dummy,
+                            external_source_settings, num_diss_params, t_params, ion_dt,
+                            ir, iz, evolve_ppar)
+                        A_sparse = sparse(A)
+                        if !isassigned(adi_info.v_solve_implicit_lus, v_solve_counter)
+                            @timeit_debug global_timer "lu" adi_info.v_solve_implicit_lus[v_solve_counter] = lu(A_sparse)
+                        else
+                            # LU decomposition was previously created. The Jacobian always
+                            # has the same sparsity pattern, so by using `lu!()` we can
+                            # reuse some setup.
+                            try
+                                @timeit_debug global_timer "lu!" lu!(adi_info.v_solve_implicit_lus[v_solve_counter], A_sparse; check=false)
+                            catch e
+                                if !isa(e, ArgumentError)
+                                    rethrow(e)
+                                end
+                                println("Sparsity pattern of matrix changed, rebuilding "
+                                        * " LU from scratch ir=$ir, iz=$iz")
+                                @timeit_debug global_timer "lu" adi_info.v_solve_implicit_lus[v_solve_counter] = lu(A_sparse)
+                            end
+                        end
+
+                        adi_info.v_solve_explicit_matrices[v_solve_counter] = sparse(@view(explicit_J[adi_info.v_solve_global_inds[v_solve_counter],:]))
+                    end
+                    @boundscheck v_solve_counter == adi_info.v_solve_nsolve || error("v_solve_counter($v_solve_counter) != v_solve_nsolve($(adi_info.v_solve_nsolve))")
+
+                    # Do setup for 'z solves'
+                    z_solve_counter = 0
+                    A = adi_info.z_solve_matrix_buffer
+                    explicit_J = adi_info.J_buffer
+                    # Get sparse matrix for explicit, right-hand-side part of the
+                    # solve.
+                    fill_electron_kinetic_equation_Jacobian!(
+                        explicit_J, f_electron_new, electron_ppar_new, moments,
+                        collisions, composition, z, vperp, vpa, z_spectral,
+                        vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy,
+                        external_source_settings, num_diss_params, t_params, ion_dt, ir,
+                        evolve_ppar, :explicit_v)
+                    begin_vperp_vpa_region()
+                    @loop_vperp_vpa ivperp ivpa begin
+                        z_solve_counter += 1
+
+                        # Get LU-factorized matrix for implicit part of the solve
+                        @views fill_electron_kinetic_equation_z_only_Jacobian_f!(
+                            A, f_electron_new[ivpa,ivperp,:], electron_ppar_new,
+                            dpdf_dz[ivpa,ivperp,:], dpdf_dvpa[ivpa,ivperp,:], z_speed,
+                            moments, zeroth_moment, first_moment, second_moment,
+                            third_moment, dthird_moment_dz, collisions, composition, z,
+                            vperp, vpa, z_spectral, vperp_spectral, vpa_spectral,
+                            z_advect, vpa_advect, scratch_dummy, external_source_settings,
+                            num_diss_params, t_params, ion_dt, ir, ivperp, ivpa,
+                            evolve_ppar)
+
+                        A_sparse = sparse(A)
+                        if !isassigned(adi_info.z_solve_implicit_lus, z_solve_counter)
+                            @timeit_debug global_timer "lu" adi_info.z_solve_implicit_lus[z_solve_counter] = lu(A_sparse)
+                        else
+                            # LU decomposition was previously created. The Jacobian always
+                            # has the same sparsity pattern, so by using `lu!()` we can
+                            # reuse some setup.
+                            try
+                                @timeit_debug global_timer "lu!" lu!(adi_info.z_solve_implicit_lus[z_solve_counter], A_sparse; check=false)
+                            catch e
+                                if !isa(e, ArgumentError)
+                                    rethrow(e)
+                                end
+                                println("Sparsity pattern of matrix changed, rebuilding "
+                                        * " LU from scratch ir=$ir, ivperp=$ivperp, ivpa=$ivpa")
+                                @timeit_debug global_timer "lu" adi_info.z_solve_implicit_lus[z_solve_counter] = lu(A_sparse)
+                            end
+                        end
+
+                        adi_info.z_solve_explicit_matrices[z_solve_counter] = sparse(@view(explicit_J[adi_info.z_solve_global_inds[z_solve_counter],:]))
+                    end
+                    begin_serial_region(; no_synchronize=true)
+                    @serial_region begin
+                        # Do the solve for ppar on the rank-0 process, which has the
+                        # fewest grid points to handle if there are not an exactly equal
+                        # number of points for each process.
+                        z_solve_counter += 1
+
+                        # Get LU-factorized matrix for implicit part of the solve
+                        @views fill_electron_kinetic_equation_z_only_Jacobian_ppar!(
+                            A, electron_ppar_new, moments, zeroth_moment, first_moment,
+                            second_moment, third_moment, dthird_moment_dz, collisions,
+                            composition, z, vperp, vpa, z_spectral, vperp_spectral,
+                            vpa_spectral, z_advect, vpa_advect, scratch_dummy,
+                            external_source_settings, num_diss_params, t_params, ion_dt,
+                            ir, evolve_ppar)
+
+                        A_sparse = sparse(A)
+                        if !isassigned(adi_info.z_solve_implicit_lus, z_solve_counter)
+                            @timeit_debug global_timer "lu" adi_info.z_solve_implicit_lus[z_solve_counter] = lu(A_sparse)
+                        else
+                            # LU decomposition was previously created. The Jacobian always
+                            # has the same sparsity pattern, so by using `lu!()` we can
+                            # reuse some setup.
+                            try
+                                @timeit_debug global_timer "lu!" lu!(adi_info.z_solve_implicit_lus[z_solve_counter], A_sparse; check=false)
+                            catch e
+                                if !isa(e, ArgumentError)
+                                    rethrow(e)
+                                end
+                                println("Sparsity pattern of matrix changed, rebuilding "
+                                        * " LU from scratch ir=$ir, ppar z-solve")
+                                @timeit_debug global_timer "lu" adi_info.z_solve_implicit_lus[z_solve_counter] = lu(A_sparse)
+                            end
+                        end
+
+                        adi_info.z_solve_explicit_matrices[z_solve_counter] = sparse(@view(explicit_J[adi_info.z_solve_global_inds[z_solve_counter],:]))
+                    end
+                    @boundscheck z_solve_counter == adi_info.z_solve_nsolve || error("z_solve_counter($z_solve_counter) != z_solve_nsolve($(adi_info.z_solve_nsolve))")
+                end
+
+                @timeit_debug global_timer adi_precon!(x) = begin
+                    precon_ppar, precon_f = x
+
+                    adi_info = nl_solver_params.preconditioners[ir]
+                    precon_iterations = nl_solver_params.precon_iterations
+                    this_input_buffer = adi_info.input_buffer
+                    this_intermediate_buffer = adi_info.intermediate_buffer
+                    this_output_buffer = adi_info.output_buffer
+                    global_index_subrange = adi_info.global_index_subrange
+
+                    v_size = vperp.n * vpa.n
+                    pdf_size = z.n * v_size
+
+                    begin_z_vperp_vpa_region()
+                    @loop_z_vperp_vpa iz ivperp ivpa begin
+                        row = (iz - 1)*v_size + (ivperp - 1)*vpa.n + ivpa
+                        this_input_buffer[row] = precon_f[ivpa,ivperp,iz]
+                    end
+                    begin_z_region()
+                    @loop_z iz begin
+                        row = pdf_size + iz
+                        this_input_buffer[row] = precon_ppar[iz]
+                    end
+                    _block_synchronize()
+
+                    # Use this to copy current guess from output_buffer to
+                    # intermediate_buffer, to avoid race conditions as new guess is
+                    # written into output_buffer.
+                    function fill_intermediate_buffer!()
+                        _block_synchronize()
+                        for i ∈ global_index_subrange
+                            this_intermediate_buffer[i] = this_output_buffer[i]
+                        end
+                        _block_synchronize()
+                    end
+
+                    v_solve_global_inds = adi_info.v_solve_global_inds
+                    v_solve_nsolve = adi_info.v_solve_nsolve
+                    v_solve_implicit_lus = adi_info.v_solve_implicit_lus
+                    v_solve_explicit_matrices = adi_info.v_solve_explicit_matrices
+                    v_solve_buffer = adi_info.v_solve_buffer
+                    v_solve_buffer2 = adi_info.v_solve_buffer2
+                    function first_adi_v_solve!()
+                        # The initial guess is all-zero, so for the first solve there is
+                        # no need to multiply by the 'explicit matrix' as x==0, so E.x==0
+                        for isolve ∈ 1:v_solve_nsolve
+                            this_inds = v_solve_global_inds[isolve]
+                            v_solve_buffer .= this_input_buffer[this_inds]
+                            @timeit_debug global_timer "ldiv!" ldiv!(v_solve_buffer2, v_solve_implicit_lus[isolve], v_solve_buffer)
+                            this_output_buffer[this_inds] .= v_solve_buffer2
+                        end
+                    end
+                    function adi_v_solve!()
+                        for isolve ∈ 1:v_solve_nsolve
+                            this_inds = v_solve_global_inds[isolve]
+                            v_solve_buffer .= @view this_input_buffer[this_inds]
+                            # Need to multiply the 'explicit matrix' by -1, because all
+                            # the Jacobian-calculation functions are defined as if the
+                            # terms are being added to the left-hand-side preconditioner
+                            # matrix, but here the 'explicit matrix' terms are added on
+                            # the right-hand-side.
+                            @timeit_debug global_timer "mul!" mul!(v_solve_buffer, v_solve_explicit_matrices[isolve],
+                                 this_intermediate_buffer, -1.0, 1.0)
+                            @timeit_debug global_timer "ldiv!" ldiv!(v_solve_buffer2, v_solve_implicit_lus[isolve], v_solve_buffer)
+                            this_output_buffer[this_inds] .= v_solve_buffer2
+                        end
+                    end
+
+                    z_solve_global_inds = adi_info.z_solve_global_inds
+                    z_solve_nsolve = adi_info.z_solve_nsolve
+                    z_solve_implicit_lus = adi_info.z_solve_implicit_lus
+                    z_solve_explicit_matrices = adi_info.z_solve_explicit_matrices
+                    z_solve_buffer = adi_info.z_solve_buffer
+                    z_solve_buffer2 = adi_info.z_solve_buffer2
+                    function adi_z_solve!()
+                        for isolve ∈ 1:z_solve_nsolve
+                            this_inds = z_solve_global_inds[isolve]
+                            z_solve_buffer .= @view this_input_buffer[this_inds]
+                            # Need to multiply the 'explicit matrix' by -1, because all
+                            # the Jacobian-calculation functions are defined as if the
+                            # terms are being added to the left-hand-side preconditioner
+                            # matrix, but here the 'explicit matrix' terms are added on
+                            # the right-hand-side.
+                            @timeit_debug global_timer "mul!" mul!(z_solve_buffer, z_solve_explicit_matrices[isolve], this_intermediate_buffer, -1.0, 1.0)
+                            @timeit_debug global_timer "ldiv!" ldiv!(z_solve_buffer2, z_solve_implicit_lus[isolve], z_solve_buffer)
+                            this_output_buffer[this_inds] .= z_solve_buffer2
+                        end
+                    end
+
+                    precon_iterations[] += 1
+                    first_adi_v_solve!()
+                    fill_intermediate_buffer!()
+                    adi_z_solve!()
+                    for n ∈ 1:1
+                        precon_iterations[] += 1
+                        fill_intermediate_buffer!()
+                        adi_v_solve!()
+                        fill_intermediate_buffer!()
+                        adi_z_solve!()
+                    end
+
+                    # Unpack preconditioner solution
+                    begin_z_vperp_vpa_region()
+                    @loop_z_vperp_vpa iz ivperp ivpa begin
+                        row = (iz - 1)*v_size + (ivperp - 1)*vpa.n + ivpa
+                        precon_f[ivpa,ivperp,iz] = this_output_buffer[row]
+                    end
+                    begin_z_region()
+                    @loop_z iz begin
+                        row = pdf_size + iz
+                        precon_ppar[iz] = this_output_buffer[row]
+                    end
+
+                    # Ensure values of precon_f and precon_ppar are consistent across
+                    # distributed-MPI block boundaries. For precon_f take the upwind
+                    # value, and for precon_ppar take the average.
+                    f_lower_endpoints = @view scratch_dummy.buffer_vpavperpr_1[:,:,ir]
+                    f_upper_endpoints = @view scratch_dummy.buffer_vpavperpr_2[:,:,ir]
+                    receive_buffer1 = @view scratch_dummy.buffer_vpavperpr_3[:,:,ir]
+                    receive_buffer2 = @view scratch_dummy.buffer_vpavperpr_4[:,:,ir]
+                    begin_vperp_vpa_region()
+                    @loop_vperp_vpa ivperp ivpa begin
+                        f_lower_endpoints[ivpa,ivperp] = precon_f[ivpa,ivperp,1]
+                        f_upper_endpoints[ivpa,ivperp] = precon_f[ivpa,ivperp,end]
+                    end
+                    # We upwind the z-derivatives in `electron_z_advection!()`, so would
+                    # expect that upwinding the results here in z would make sense.
+                    # However, upwinding here makes convergence much slower (~10x),
+                    # compared to picking the values from one side or other of the block
+                    # boundary, or taking the average of the values on either side.
+                    # Neither direction is special, so taking the average seems most
+                    # sensible (although in an intial test it does not seem to converge
+                    # faster than just picking one or the other).
+                    # Maybe this could indicate that it is more important to have a fully
+                    # self-consistent Jacobian inversion for the
+                    # `electron_vpa_advection()` part rather than taking half(ish) of the
+                    # values from one block and the other half(ish) from the other.
+                    reconcile_element_boundaries_MPI_z_pdf_vpavperpz!(
+                        precon_f, f_lower_endpoints, f_upper_endpoints, receive_buffer1,
+                        receive_buffer2, z)
+
+                    begin_serial_region()
+                    @serial_region begin
+                        buffer_1[] = precon_ppar[1]
+                        buffer_2[] = precon_ppar[end]
+                    end
+                    reconcile_element_boundaries_MPI!(
+                        precon_ppar, buffer_1, buffer_2, buffer_3, buffer_4, z)
+
+                    return nothing
+                end
+
+                left_preconditioner = identity
+                right_preconditioner = adi_precon!
             elseif nl_solver_params.preconditioner_type === Val(:none)
                 left_preconditioner = identity
                 right_preconditioner = identity
diff --git a/moment_kinetics/src/file_io.jl b/moment_kinetics/src/file_io.jl
index 32b225754..c61cf9458 100644
--- a/moment_kinetics/src/file_io.jl
+++ b/moment_kinetics/src/file_io.jl
@@ -1203,6 +1203,10 @@ function define_dynamic_moment_variables!(fid, n_ion_species, n_neutral_species,
                                          dynamic, "$(term)_linear_iterations", mk_int;
                                          parallel_io=parallel_io,
                                          description="Number of linear iterations for $term"),
+                   precon_iterations=create_dynamic_variable!(
+                                         dynamic, "$(term)_precon_iterations", mk_int;
+                                         parallel_io=parallel_io,
+                                         description="Number of preconditioner iterations for $term"),
                   )
             for (term, params) ∈ pairs(nl_solver_params) if params !== nothing)
 
@@ -2152,7 +2156,8 @@ function reopen_moments_io(file_info)
                                for name ∈ nl_names)
                 return NamedTuple(Symbol(term)=>(n_solves=dyn["$(term)_n_solves"],
                                                  nonlinear_iterations=dyn["$(term)_nonlinear_iterations"],
-                                                 linear_iterations=dyn["$(term)_linear_iterations"])
+                                                 linear_iterations=dyn["$(term)_linear_iterations"],
+                                                 precon_iterations=dyn["$(term)_precon_iterations"])
                                   for term ∈ nl_prefixes)
             else
                 return nothing
@@ -2313,7 +2318,8 @@ function reopen_dfns_io(file_info)
                                for name ∈ nl_names)
                 return NamedTuple(Symbol(term)=>(n_solves=dyn["$(term)_n_solves"],
                                                  nonlinear_iterations=dyn["$(term)_nonlinear_iterations"],
-                                                 linear_iterations=dyn["$(term)_linear_iterations"])
+                                                 linear_iterations=dyn["$(term)_linear_iterations"],
+                                                 precon_iterations=dyn["$(term)_precon_iterations"])
                                   for term ∈ nl_prefixes)
             else
                 return nothing
@@ -2495,6 +2501,8 @@ file
                                   v.nonlinear_iterations[], t_idx, parallel_io)
             append_to_dynamic_var(io_moments.nl_solver_diagnostics[k].linear_iterations,
                                   v.linear_iterations[], t_idx, parallel_io)
+            append_to_dynamic_var(io_moments.nl_solver_diagnostics[k].precon_iterations,
+                                  v.precon_iterations[], t_idx, parallel_io)
         end
     end
 
diff --git a/moment_kinetics/src/initial_conditions.jl b/moment_kinetics/src/initial_conditions.jl
index 2ba8ccd4f..3102e137c 100644
--- a/moment_kinetics/src/initial_conditions.jl
+++ b/moment_kinetics/src/initial_conditions.jl
@@ -754,9 +754,11 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field
                                    nl_solver_params.electron_advance.n_solves,
                                    nl_solver_params.electron_advance.nonlinear_iterations,
                                    nl_solver_params.electron_advance.linear_iterations,
+                                   nl_solver_params.electron_advance.precon_iterations,
                                    nl_solver_params.electron_advance.global_n_solves,
                                    nl_solver_params.electron_advance.global_nonlinear_iterations,
                                    nl_solver_params.electron_advance.global_linear_iterations,
+                                   nl_solver_params.electron_advance.global_precon_iterations,
                                    nl_solver_params.electron_advance.solves_since_precon_update,
                                    nl_solver_params.electron_advance.precon_dt,
                                    nl_solver_params.electron_advance.serial_solve,
diff --git a/moment_kinetics/src/load_data.jl b/moment_kinetics/src/load_data.jl
index 35e427319..b5150c675 100644
--- a/moment_kinetics/src/load_data.jl
+++ b/moment_kinetics/src/load_data.jl
@@ -5033,6 +5033,13 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t
         nl_linear_iterations = get_per_step_from_cumulative_variable(
             run_info, "$(prefix)_linear_iterations"; kwargs...)
         variable = nl_linear_iterations ./ nl_iterations
+    elseif occursin("_precon_iterations_per_linear_iteration", variable_name)
+        prefix = split(variable_name, "_precon_iterations_per_linear_iteration")[1]
+        nl_linear_iterations = get_per_step_from_cumulative_variable(
+            run_info, "$(prefix)_linear_iterations"; kwargs...)
+        nl_precon_iterations = get_per_step_from_cumulative_variable(
+            run_info, "$(prefix)_precon_iterations"; kwargs...)
+        variable = nl_precon_iterations ./ nl_linear_iterations
     elseif endswith(variable_name, "_per_step") && variable_name ∉ run_info.variable_names
         # If "_per_step" is appended to a variable name, assume it is a cumulative
         # variable, and get the per-step version.
diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl
index a94e2314c..3789ab0c6 100644
--- a/moment_kinetics/src/nonlinear_solvers.jl
+++ b/moment_kinetics/src/nonlinear_solvers.jl
@@ -59,9 +59,11 @@ struct nl_solver_info{TH,TV,Tcsg,Tlig,Tprecon,Tpretype}
     n_solves::Base.RefValue{mk_int}
     nonlinear_iterations::Base.RefValue{mk_int}
     linear_iterations::Base.RefValue{mk_int}
+    precon_iterations::Base.RefValue{mk_int}
     global_n_solves::Base.RefValue{mk_int}
     global_nonlinear_iterations::Base.RefValue{mk_int}
     global_linear_iterations::Base.RefValue{mk_int}
+    global_precon_iterations::Base.RefValue{mk_int}
     solves_since_precon_update::Base.RefValue{mk_int}
     precon_dt::Base.RefValue{mk_float}
     serial_solve::Bool
@@ -178,6 +180,73 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa
                                 allocate_shared_float(pdf_plus_ppar_size),
                                ),
                                reverse(outer_coord_sizes))
+    elseif preconditioner_type === Val(:electron_adi)
+        nz = coords.z.n
+        pdf_plus_ppar_size = total_size_coords + nz
+        nvperp = coords.vperp.n
+        nvpa = coords.vpa.n
+        v_size = nvperp * nvpa
+
+        function get_adi_precon_buffers()
+            v_solve_z_range = looping.loop_ranges_store[(:z,)].z
+            v_solve_global_inds = [[((iz - 1)*v_size+1 : iz*v_size)..., total_size_coords+iz] for iz ∈ v_solve_z_range]
+            v_solve_nsolve = length(v_solve_z_range)
+            # Plus one for the one point of ppar that is included in the 'v solve'.
+            v_solve_n = nvperp * nvpa + 1
+            v_solve_implicit_lus = Vector{SparseArrays.UMFPACK.UmfpackLU{mk_float, mk_int}}(undef, v_solve_nsolve)
+            v_solve_explicit_matrices = Vector{SparseMatrixCSC{mk_float, mk_int}}(undef, v_solve_nsolve)
+            # This buffer is not shared-memory, because it will be used for a serial LU solve.
+            v_solve_buffer = allocate_float(v_solve_n)
+            v_solve_buffer2 = allocate_float(v_solve_n)
+            v_solve_matrix_buffer = allocate_float(v_solve_n, v_solve_n)
+
+            z_solve_vperp_range = looping.loop_ranges_store[(:vperp,:vpa)].vperp
+            z_solve_vpa_range = looping.loop_ranges_store[(:vperp,:vpa)].vpa
+            z_solve_global_inds = vec([(ivperp-1)*nvpa+ivpa:v_size:(nz-1)*v_size+(ivperp-1)*nvpa+ivpa for ivperp ∈ z_solve_vperp_range, ivpa ∈ z_solve_vpa_range])
+            z_solve_nsolve = length(z_solve_vperp_range) * length(z_solve_vpa_range)
+            @serial_region begin
+                # Do the solve for ppar on the rank-0 process, which has the fewest grid
+                # points to handle if there are not an exactly equal number of points for each
+                # process.
+                push!(z_solve_global_inds, total_size_coords+1 : total_size_coords+nz)
+                z_solve_nsolve += 1
+            end
+            z_solve_n = nz
+            z_solve_implicit_lus = Vector{SparseArrays.UMFPACK.UmfpackLU{mk_float, mk_int}}(undef, z_solve_nsolve)
+            z_solve_explicit_matrices = Vector{SparseMatrixCSC{mk_float, mk_int}}(undef, z_solve_nsolve)
+            # This buffer is not shared-memory, because it will be used for a serial LU solve.
+            z_solve_buffer = allocate_float(z_solve_n)
+            z_solve_buffer2 = allocate_float(z_solve_n)
+            z_solve_matrix_buffer = allocate_float(z_solve_n, z_solve_n)
+
+            J_buffer = allocate_shared_float(pdf_plus_ppar_size, pdf_plus_ppar_size)
+            input_buffer = allocate_shared_float(pdf_plus_ppar_size)
+            intermediate_buffer = allocate_shared_float(pdf_plus_ppar_size)
+            output_buffer = allocate_shared_float(pdf_plus_ppar_size)
+            error_buffer = allocate_shared_float(pdf_plus_ppar_size)
+
+            chunk_size = (pdf_plus_ppar_size + block_size[] - 1) ÷ block_size[]
+            # Set up so root process has fewest points, as root may have other work to do.
+            global_index_subrange = max(1, pdf_plus_ppar_size - (block_size[] - block_rank[]) * chunk_size + 1):(pdf_plus_ppar_size - (block_size[] - block_rank[] - 1) * chunk_size)
+
+            return (v_solve_global_inds=v_solve_global_inds,
+                    v_solve_nsolve=v_solve_nsolve,
+                    v_solve_implicit_lus=v_solve_implicit_lus,
+                    v_solve_explicit_matrices=v_solve_explicit_matrices,
+                    v_solve_buffer=v_solve_buffer, v_solve_buffer2=v_solve_buffer2,
+                    v_solve_matrix_buffer=v_solve_matrix_buffer,
+                    z_solve_global_inds=z_solve_global_inds,
+                    z_solve_nsolve=z_solve_nsolve,
+                    z_solve_implicit_lus=z_solve_implicit_lus,
+                    z_solve_explicit_matrices=z_solve_explicit_matrices,
+                    z_solve_buffer=z_solve_buffer, z_solve_buffer2=z_solve_buffer2,
+                    z_solve_matrix_buffer=z_solve_matrix_buffer, J_buffer=J_buffer,
+                    input_buffer=input_buffer, intermediate_buffer=intermediate_buffer,
+                    output_buffer=output_buffer,
+                    global_index_subrange=global_index_subrange)
+        end
+
+        preconditioners = fill(get_adi_precon_buffers(), reverse(outer_coord_sizes))
     elseif preconditioner_type === Val(:none)
         preconditioners = nothing
     else
@@ -192,7 +261,8 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa
                           mk_float(nl_solver_input.linear_atol), linear_restart,
                           nl_solver_input.linear_max_restarts, H, c, s, g, V,
                           linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), Ref(0),
-                          Ref(0), Ref(nl_solver_input.preconditioner_update_interval),
+                          Ref(0), Ref(0), Ref(0),
+                          Ref(nl_solver_input.preconditioner_update_interval),
                           Ref(mk_float(0.0)), serial_solve, Ref(0), Ref(0),
                           preconditioner_type,
                           nl_solver_input.preconditioner_update_interval, preconditioners)
@@ -235,12 +305,14 @@ total.
         nl_solver_params.ion_advance.global_n_solves[] = nl_solver_params.ion_advance.n_solves[]
         nl_solver_params.ion_advance.global_nonlinear_iterations[] = nl_solver_params.ion_advance.nonlinear_iterations[]
         nl_solver_params.ion_advance.global_linear_iterations[] = nl_solver_params.ion_advance.linear_iterations[]
+        nl_solver_params.ion_advance.global_precon_iterations[] = nl_solver_params.ion_advance.precon_iterations[]
     end
     if nl_solver_params.vpa_advection !== nothing
         # Solves are run in serial on separate processes, so need a global Allreduce
         @timeit_debug global_timer "MPI.Allreduce! comm_world" MPI.Allreduce!(nl_solver_params.vpa_advection.n_solves[], +, comm_world)
         @timeit_debug global_timer "MPI.Allreduce! comm_world" MPI.Allreduce!(nl_solver_params.vpa_advection.nonlinear_iterations[], +, comm_world)
         @timeit_debug global_timer "MPI.Allreduce! comm_world" MPI.Allreduce!(nl_solver_params.vpa_advection.linear_iterations[], +, comm_world)
+        @timeit_debug global_timer "MPI.Allreduce! comm_world" MPI.Allreduce!(nl_solver_params.vpa_advection.precon_iterations[], +, comm_world)
     end
 end
 
@@ -342,6 +414,7 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w,
     close_linear_counter = -1
     success = true
     previous_residual_norm = residual_norm
+old_precon_iterations = nl_solver_params.precon_iterations[]
     while (counter < 1 && residual_norm > 1.0e-8) || residual_norm > 1.0
         counter += 1
         #println("\nNewton ", counter)
@@ -446,6 +519,9 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w,
 #    println("Final residual: ", residual_norm)
 #    println("Total linear iterations: ", linear_counter)
 #    println("Linear iterations per Newton: ", linear_counter / counter)
+#    precon_count = nl_solver_params.precon_iterations[] - old_precon_iterations
+#    println("Total precon iterations: ", precon_count)
+#    println("Precon iterations per linear: ", precon_count / linear_counter)
 #
 #    println("Newton iterations after close: ", counter - close_counter)
 #    println("Total linear iterations after close: ", linear_counter - close_linear_counter)
diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl
index cf07371d2..19db3dc94 100644
--- a/moment_kinetics/src/time_advance.jl
+++ b/moment_kinetics/src/time_advance.jl
@@ -675,7 +675,8 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop
                               default_rtol=t_params.rtol / 10.0,
                               default_atol=t_params.atol / 10.0,
                               electron_ppar_pdf_solve=true,
-                              preconditioner_type=Val(:electron_lu))
+                              #preconditioner_type=Val(:electron_lu))
+                              preconditioner_type=Val(:electron_adi))
     nl_solver_ion_advance_params =
         setup_nonlinear_solve(t_params.implicit_ion_advance, input_dict,
                               (s=composition.n_ion_species, r=r, z=z, vperp=vperp,

From ac026051a1ea6ace2c049a6967c456b0f9cab5fb Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Sun, 27 Oct 2024 15:36:07 +0000
Subject: [PATCH 13/43] Skip qpar integral terms to make explicit matrix
 sparser for ADI precon

---
 .../src/electron_kinetic_equation.jl          | 30 ++++++++++---------
 moment_kinetics/src/electron_vpa_advection.jl | 14 +++++----
 2 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index 9b5fe83aa..118eec885 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -1099,7 +1099,7 @@ global_rank[] == 0 && println("recalculating precon")
                         collisions, composition, z, vperp, vpa, z_spectral,
                         vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy,
                         external_source_settings, num_diss_params, t_params, ion_dt, ir,
-                        evolve_ppar, :explicit_z)
+                        evolve_ppar, :explicit_z, false)
                     begin_z_region()
                     @loop_z iz begin
                         v_solve_counter += 1
@@ -1147,7 +1147,7 @@ global_rank[] == 0 && println("recalculating precon")
                         collisions, composition, z, vperp, vpa, z_spectral,
                         vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy,
                         external_source_settings, num_diss_params, t_params, ion_dt, ir,
-                        evolve_ppar, :explicit_v)
+                        evolve_ppar, :explicit_v, false)
                     begin_vperp_vpa_region()
                     @loop_vperp_vpa ivperp ivpa begin
                         z_solve_counter += 1
@@ -3181,7 +3181,7 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati
                          vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect,
                          vpa_advect, scratch_dummy, external_source_settings,
                          num_diss_params, t_params, ion_dt, ir, evolve_ppar,
-                         include=:all) = begin
+                         include=:all, include_qpar_integral_terms=true) = begin
     dt = t_params.dt[]
 
     buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1]
@@ -3290,13 +3290,13 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati
     add_electron_vpa_advection_to_Jacobian!(
         jacobian_matrix, f, dens, upar, ppar, vth, third_moment, dpdf_dvpa, ddens_dz,
         dppar_dz, dthird_moment_dz, moments, me, z, vperp, vpa, z_spectral, vpa_spectral,
-        vpa_advect, z_speed, scratch_dummy, external_source_settings, dt, ir, include;
-        ppar_offset=pdf_size)
+        vpa_advect, z_speed, scratch_dummy, external_source_settings, dt, ir, include,
+        include_qpar_integral_terms; ppar_offset=pdf_size)
     add_contribution_from_electron_pdf_term_to_Jacobian!(
         jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz,
         dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z,
-        vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir, include;
-        ppar_offset=pdf_size)
+        vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir, include,
+        include_qpar_integral_terms; ppar_offset=pdf_size)
     add_electron_dissipation_term_to_Jacobian!(
         jacobian_matrix, f, num_diss_params, z, vperp, vpa, vpa_spectral, z_speed, dt, ir,
         include)
@@ -4194,8 +4194,8 @@ end
 function add_contribution_from_electron_pdf_term_to_Jacobian!(
         jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz,
         dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z,
-        vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir, include=:all; f_offset=0,
-        ppar_offset=0)
+        vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir, include=:all,
+        include_qpar_integral_terms=true; f_offset=0, ppar_offset=0)
 
     if f_offset == ppar_offset
         error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar "
@@ -4274,11 +4274,13 @@ function add_contribution_from_electron_pdf_term_to_Jacobian!(
         z_deriv_row_endind = z_deriv_matrix.rowptr[iz+1] - 1
         z_deriv_colinds = @view z_deriv_matrix.colval[z_deriv_row_startind:z_deriv_row_endind]
         z_deriv_row_nonzeros = @view z_deriv_matrix.nzval[z_deriv_row_startind:z_deriv_row_endind]
-        for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros), icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n
-            col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset
-            jacobian_matrix[row,col] +=
-                dt * f[ivpa,ivperp,iz] * vth[iz] *
-                vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_entry
+        if include_qpar_integral_terms
+            for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros), icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n
+                col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset
+                jacobian_matrix[row,col] +=
+                    dt * f[ivpa,ivperp,iz] * vth[iz] *
+                    vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_entry
+            end
         end
         if include === :all
             for index ∈ eachindex(external_source_settings.electron)
diff --git a/moment_kinetics/src/electron_vpa_advection.jl b/moment_kinetics/src/electron_vpa_advection.jl
index a9e0fd383..0c2f7d02a 100644
--- a/moment_kinetics/src/electron_vpa_advection.jl
+++ b/moment_kinetics/src/electron_vpa_advection.jl
@@ -100,7 +100,9 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar,
                                                  z, vperp, vpa, z_spectral, vpa_spectral,
                                                  vpa_advect, z_speed, scratch_dummy,
                                                  external_source_settings, dt, ir,
-                                                 include=:all; f_offset=0, ppar_offset=0)
+                                                 include=:all,
+                                                 include_qpar_integral_terms=true;
+                                                 f_offset=0, ppar_offset=0)
     if f_offset == ppar_offset
         error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar "
               * "cannot be in same place in state vector.")
@@ -205,10 +207,12 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar,
         z_deriv_row_endind = z_deriv_matrix.rowptr[iz+1] - 1
         z_deriv_colinds = @view z_deriv_matrix.colval[z_deriv_row_startind:z_deriv_row_endind]
         z_deriv_row_nonzeros = @view z_deriv_matrix.nzval[z_deriv_row_startind:z_deriv_row_endind]
-        for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros), icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n
-            col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset
-            jacobian_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] *
-                vpa.grid[ivpa] * vth[iz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_entry
+        if include_qpar_integral_terms
+            for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros), icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n
+                col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset
+                jacobian_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] *
+                    vpa.grid[ivpa] * vth[iz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_entry
+            end
         end
         if include ∈ (:all, :explicit_v)
             jacobian_matrix[row,ppar_offset+iz] += dt * dpdf_dvpa[ivpa,ivperp,iz] * vpa.grid[ivpa] *

From e811cb0d5c6410bfa3555454532ef5c9bee4e0d1 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Sun, 27 Oct 2024 09:36:35 +0000
Subject: [PATCH 14/43] Skip right-preconditioner evaluation when initial guess
 is all-zeros

If delta_x is all-zero, then P^-1.delta_x is also all-zero, so no need
to evaluate it.
---
 moment_kinetics/src/nonlinear_solvers.jl | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl
index 3789ab0c6..4a574b171 100644
--- a/moment_kinetics/src/nonlinear_solvers.jl
+++ b/moment_kinetics/src/nonlinear_solvers.jl
@@ -408,7 +408,9 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w,
     counter = 0
     linear_counter = 0
 
-    parallel_map(solver_type, ()->0.0, delta_x)
+    # Would need this if delta_x was not set to zero within the Newton iteration loop
+    # below.
+    #parallel_map(solver_type, ()->0.0, delta_x)
 
     close_counter = -1
     close_linear_counter = -1
@@ -434,7 +436,8 @@ old_precon_iterations = nl_solver_params.precon_iterations[]
                                    s=nl_solver_params.s, g=nl_solver_params.g,
                                    V=nl_solver_params.V, rhs_delta=rhs_delta,
                                    initial_guess=nl_solver_params.linear_initial_guess,
-                                   serial_solve=nl_solver_params.serial_solve)
+                                   serial_solve=nl_solver_params.serial_solve,
+                                   initial_delta_x_is_zero=true)
         linear_counter += linear_its
 
         # If the residual does not decrease, we will do a line search to find an update
@@ -1217,7 +1220,8 @@ MGS-GMRES' in Zou (2023) [https://doi.org/10.1016/j.amc.2023.127869].
                          x, residual_func!, residual0, delta_x, v, w, solver_type::Val,
                          norm_params; coords, rtol, atol, restart, max_restarts,
                          left_preconditioner, right_preconditioner, H, c, s, g, V,
-                         rhs_delta, initial_guess, serial_solve) = begin
+                         rhs_delta, initial_guess, serial_solve,
+                         initial_delta_x_is_zero) = begin
     # Solve (approximately?):
     #   J δx = residual0
 
@@ -1234,8 +1238,10 @@ MGS-GMRES' in Zou (2023) [https://doi.org/10.1016/j.amc.2023.127869].
     # by a large number `Jv_scale_factor` (in constrast to the small `epsilon` in the
     # 'usual' case where the norm does not include either reative or absolute tolerance)
     # to ensure that we get a reasonable estimate of J.v.
-    function approximate_Jacobian_vector_product!(v)
-        right_preconditioner(v)
+    function approximate_Jacobian_vector_product!(v, skip_first_precon::Bool=false)
+        if !skip_first_precon
+            right_preconditioner(v)
+        end
 
         parallel_map(solver_type, (x,v) -> x + Jv_scale_factor * v, v, x, v)
         residual_func!(rhs_delta, v)
@@ -1249,8 +1255,12 @@ MGS-GMRES' in Zou (2023) [https://doi.org/10.1016/j.amc.2023.127869].
     # the left-preconditioner.
     parallel_map(solver_type, (delta_x) -> delta_x, v, delta_x)
     left_preconditioner(residual0)
+
     # This function transforms the data stored in 'v' from δx to ≈J.δx
-    approximate_Jacobian_vector_product!(v)
+    # If initial δx is all-zero, we can skip a right-preconditioner evaluation because it
+    # would just transform all-zero to all-zero.
+    approximate_Jacobian_vector_product!(v, initial_delta_x_is_zero)
+
     # Now we actually set 'w' as the first Krylov vector, and normalise it.
     parallel_map(solver_type, (residual0, v) -> -residual0 - v, w, residual0, v)
     beta = distributed_norm(solver_type, w, norm_params...)

From de9822c8e8056c019129b2a251d03f31d5e63cec Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Mon, 28 Oct 2024 09:28:31 +0000
Subject: [PATCH 15/43] Make number of ADI iterations settable, and default to
 1

One iteration of ADI preconditioning seems to be enough to make the JFNK
solver for kinetic electrons converge. The number of linear (Krylov)
iterations required increases slightly, but overall this should reduce
the computational cost.
---
 .../src/electron_kinetic_equation.jl          | 27 +++++++++++++------
 moment_kinetics/src/nonlinear_solvers.jl      | 10 ++++++-
 2 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index 118eec885..05fae2358 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -1094,12 +1094,17 @@ global_rank[] == 0 && println("recalculating precon")
                     explicit_J = adi_info.J_buffer
                     # Get sparse matrix for explicit, right-hand-side part of the
                     # solve.
-                    fill_electron_kinetic_equation_Jacobian!(
-                        explicit_J, f_electron_new, electron_ppar_new, moments,
-                        collisions, composition, z, vperp, vpa, z_spectral,
-                        vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy,
-                        external_source_settings, num_diss_params, t_params, ion_dt, ir,
-                        evolve_ppar, :explicit_z, false)
+                    if adi_info.n_extra_iterations > 0
+                        # If we only do one 'iteration' we don't need the 'explicit
+                        # matrix' for the first solve (the v-solve), because the initial
+                        # guess is zero,
+                        fill_electron_kinetic_equation_Jacobian!(
+                            explicit_J, f_electron_new, electron_ppar_new, moments,
+                            collisions, composition, z, vperp, vpa, z_spectral,
+                            vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy,
+                            external_source_settings, num_diss_params, t_params, ion_dt, ir,
+                            evolve_ppar, :explicit_z, false)
+                    end
                     begin_z_region()
                     @loop_z iz begin
                         v_solve_counter += 1
@@ -1132,7 +1137,12 @@ global_rank[] == 0 && println("recalculating precon")
                             end
                         end
 
-                        adi_info.v_solve_explicit_matrices[v_solve_counter] = sparse(@view(explicit_J[adi_info.v_solve_global_inds[v_solve_counter],:]))
+                        if adi_info.n_extra_iterations > 0
+                            # If we only do one 'iteration' we don't need the 'explicit
+                            # matrix' for the first solve (the v-solve), because the
+                            # initial guess is zero,
+                            adi_info.v_solve_explicit_matrices[v_solve_counter] = sparse(@view(explicit_J[adi_info.v_solve_global_inds[v_solve_counter],:]))
+                        end
                     end
                     @boundscheck v_solve_counter == adi_info.v_solve_nsolve || error("v_solve_counter($v_solve_counter) != v_solve_nsolve($(adi_info.v_solve_nsolve))")
 
@@ -1233,6 +1243,7 @@ global_rank[] == 0 && println("recalculating precon")
                     this_intermediate_buffer = adi_info.intermediate_buffer
                     this_output_buffer = adi_info.output_buffer
                     global_index_subrange = adi_info.global_index_subrange
+                    n_extra_iterations = adi_info.n_extra_iterations
 
                     v_size = vperp.n * vpa.n
                     pdf_size = z.n * v_size
@@ -1317,7 +1328,7 @@ global_rank[] == 0 && println("recalculating precon")
                     first_adi_v_solve!()
                     fill_intermediate_buffer!()
                     adi_z_solve!()
-                    for n ∈ 1:1
+                    for n ∈ 1:n_extra_iterations
                         precon_iterations[] += 1
                         fill_intermediate_buffer!()
                         adi_v_solve!()
diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl
index 4a574b171..5fed3dc4c 100644
--- a/moment_kinetics/src/nonlinear_solvers.jl
+++ b/moment_kinetics/src/nonlinear_solvers.jl
@@ -96,6 +96,7 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa
         linear_restart=10,
         linear_max_restarts=0,
         preconditioner_update_interval=300,
+        adi_precon_iterations=1,
        )
 
     if !active
@@ -229,6 +230,12 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa
             # Set up so root process has fewest points, as root may have other work to do.
             global_index_subrange = max(1, pdf_plus_ppar_size - (block_size[] - block_rank[]) * chunk_size + 1):(pdf_plus_ppar_size - (block_size[] - block_rank[] - 1) * chunk_size)
 
+            if nl_solver_input.adi_precon_iterations < 1
+                error("Setting adi_precon_iterations=$(nl_solver_input.adi_precon_iterations) "
+                      * "would mean the preconditioner does nothing.")
+            end
+            n_extra_iterations = nl_solver_input.adi_precon_iterations - 1
+
             return (v_solve_global_inds=v_solve_global_inds,
                     v_solve_nsolve=v_solve_nsolve,
                     v_solve_implicit_lus=v_solve_implicit_lus,
@@ -243,7 +250,8 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa
                     z_solve_matrix_buffer=z_solve_matrix_buffer, J_buffer=J_buffer,
                     input_buffer=input_buffer, intermediate_buffer=intermediate_buffer,
                     output_buffer=output_buffer,
-                    global_index_subrange=global_index_subrange)
+                    global_index_subrange=global_index_subrange,
+                    n_extra_iterations=n_extra_iterations)
         end
 
         preconditioners = fill(get_adi_precon_buffers(), reverse(outer_coord_sizes))

From 0d05d9fb3594993cc51032654ffd7e2ef2ae5f44 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Mon, 28 Oct 2024 11:02:57 +0000
Subject: [PATCH 16/43] For kinetic electrons, use LU when no shared-mem, ADI
 with shared-mem

When not parallelising using shared memory, there is no need to split
the preconditioner and the LU preconditioner should be the most
efficient. Therefore use the LU precon in serial, and use ADI only when
`block_size[] > 1`.
---
 moment_kinetics/src/time_advance.jl           |  10 +-
 .../test/kinetic_electron_tests.jl            | 205 ++++++++++++------
 2 files changed, 147 insertions(+), 68 deletions(-)

diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl
index 19db3dc94..219fd0ef9 100644
--- a/moment_kinetics/src/time_advance.jl
+++ b/moment_kinetics/src/time_advance.jl
@@ -667,6 +667,13 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop
                                                                     input_dict, (z=z,);
                                                                     default_rtol=t_params.rtol / 10.0,
                                                                     default_atol=t_params.atol / 10.0)
+    if block_size[] == 1
+        # No need to parallelise, so un-split LU solver should be most efficient.
+        electron_preconditioner_type = Val(:electron_lu)
+    else
+        # Want to parallelise preconditioner, so use ADI method.
+        electron_preconditioner_type = Val(:electron_adi)
+    end
     nl_solver_electron_advance_params =
         setup_nonlinear_solve(t_params.implicit_electron_advance || composition.electron_physics ∈ (kinetic_electrons, kinetic_electrons_with_temperature_equation),
                               input_dict,
@@ -675,8 +682,7 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop
                               default_rtol=t_params.rtol / 10.0,
                               default_atol=t_params.atol / 10.0,
                               electron_ppar_pdf_solve=true,
-                              #preconditioner_type=Val(:electron_lu))
-                              preconditioner_type=Val(:electron_adi))
+                              preconditioner_type=electron_preconditioner_type)
     nl_solver_ion_advance_params =
         setup_nonlinear_solve(t_params.implicit_ion_advance, input_dict,
                               (s=composition.n_ion_species, r=r, z=z, vperp=vperp,
diff --git a/moment_kinetics/test/kinetic_electron_tests.jl b/moment_kinetics/test/kinetic_electron_tests.jl
index 6a41a2c2f..33738748d 100644
--- a/moment_kinetics/test/kinetic_electron_tests.jl
+++ b/moment_kinetics/test/kinetic_electron_tests.jl
@@ -6,6 +6,7 @@ module KineticElectronsTests
 
 include("setup.jl")
 
+using moment_kinetics.communication
 using moment_kinetics.load_data: get_run_info_no_setup, close_run_info,
                                  postproc_load_variable
 using moment_kinetics.looping
@@ -172,72 +173,144 @@ function run_test()
 
         # Regression test
         # Benchmark data generated in serial on Linux
-        expected_Ez = [-0.5990683230706185 -1.136483186157602;
-                       -0.4944296396481284 -0.9873296990705788;
-                       -0.30889032954504736 -0.6694380824928302;
-                       -0.2064830747303776 -0.4471331690708596;
-                       -0.21232457328748663 -0.423069171542538;
-                       -0.18233875912042674 -0.3586467595624931;
-                       -0.16711429522309232 -0.3018272987758344;
-                       -0.16920776495088916 -0.27814384649305496;
-                       -0.1629417555658927 -0.26124630661090814;
-                       -0.16619150334079993 -0.2572789330163811;
-                       -0.15918194883360942 -0.23720078037362732;
-                       -0.14034706409006803 -0.20520396656341475;
-                       -0.12602184032280567 -0.1827016549071128;
-                       -0.10928716440800472 -0.15808919669899502;
-                       -0.07053969674257217 -0.10137753767917096;
-                       -0.0249577746169536 -0.0358411459260082;
-                       -2.8327303308330514e-15 -2.0803303361189427e-5;
-                       0.024957774616960776 0.03584490974053962;
-                       0.07053969674257636 0.1013692898656727;
-                       0.10928716440799909 0.15807862358546687;
-                       0.1260218403227975 0.18263049748179466;
-                       0.1403470640900294 0.20516566362571026;
-                       0.1591819488336015 0.23711236692241613;
-                       0.16619150334082114 0.257126146434857;
-                       0.16294175556587748 0.2609881259705107;
-                       0.16920776495090983 0.2778978154805798;
-                       0.1671142952230893 0.3015349192528757;
-                       0.1823387591204167 0.3585291689672981;
-                       0.21232457328753865 0.4231179549656996;
-                       0.20648307473037922 0.44816400221269476;
-                       0.3088903295450278 0.6716787105435247;
-                       0.4944296396481271 0.9861165590258743;
-                       0.5990683230705801 1.1300034111861956]
-        expected_vthe = [22.64555285302391 22.485481713141688;
-                         23.763411647653097 23.63281883616836;
-                         25.26907160117684 25.181703459470448;
-                         26.17920352818247 26.12461016686916;
-                         26.514772631426933 26.476018852279974;
-                         26.798783188585713 26.774387562937218;
-                         27.202255545479264 27.203662204308202;
-                         27.50424749120107 27.527732850637264;
-                         27.630498656270504 27.6642323848215;
-                         27.748483758260697 27.79134809261204;
-                         27.933760382468346 27.990808336620802;
-                         28.08611508251559 28.153978618442775;
-                         28.14959662643782 28.221734439130564;
-                         28.207730844115044 28.283677711828023;
-                         28.28567669896009 28.36634261525836;
-                         28.32728392065335 28.410489883644782;
-                         28.331064506972027 28.41437629072209;
-                         28.32729968986601 28.41050992096321;
-                         28.285678151542136 28.366352683865195;
-                         28.207765527709956 28.28373408727703;
-                         28.149604559462947 28.221771261090687;
-                         28.086248527111163 28.154158507899695;
-                         27.933979289064936 27.991103719847732;
-                         27.74906125092813 27.792046191405188;
-                         27.631210333523736 27.66508092926101;
-                         27.505479130159543 27.529115937508752;
-                         27.20422756527604 27.20578114592589;
-                         26.801712351383053 26.77740066591359;
-                         26.517644511297203 26.478915386575462;
-                         26.18176436913143 26.127099000267552;
-                         25.26635932097994 25.178676836919877;
-                         23.756593489029708 23.625697695979085;
-                         22.64390166090378 22.48400980852866]
+        if global_size[] == 1
+            # Serial solves use LU preconditioner
+            expected_Ez = [-0.5990683230706185 -1.136483186157602;
+                           -0.4944296396481284 -0.9873296990705788;
+                           -0.30889032954504736 -0.6694380824928302;
+                           -0.2064830747303776 -0.4471331690708596;
+                           -0.21232457328748663 -0.423069171542538;
+                           -0.18233875912042674 -0.3586467595624931;
+                           -0.16711429522309232 -0.3018272987758344;
+                           -0.16920776495088916 -0.27814384649305496;
+                           -0.1629417555658927 -0.26124630661090814;
+                           -0.16619150334079993 -0.2572789330163811;
+                           -0.15918194883360942 -0.23720078037362732;
+                           -0.14034706409006803 -0.20520396656341475;
+                           -0.12602184032280567 -0.1827016549071128;
+                           -0.10928716440800472 -0.15808919669899502;
+                           -0.07053969674257217 -0.10137753767917096;
+                           -0.0249577746169536 -0.0358411459260082;
+                           -2.8327303308330514e-15 -2.0803303361189427e-5;
+                           0.024957774616960776 0.03584490974053962;
+                           0.07053969674257636 0.1013692898656727;
+                           0.10928716440799909 0.15807862358546687;
+                           0.1260218403227975 0.18263049748179466;
+                           0.1403470640900294 0.20516566362571026;
+                           0.1591819488336015 0.23711236692241613;
+                           0.16619150334082114 0.257126146434857;
+                           0.16294175556587748 0.2609881259705107;
+                           0.16920776495090983 0.2778978154805798;
+                           0.1671142952230893 0.3015349192528757;
+                           0.1823387591204167 0.3585291689672981;
+                           0.21232457328753865 0.4231179549656996;
+                           0.20648307473037922 0.44816400221269476;
+                           0.3088903295450278 0.6716787105435247;
+                           0.4944296396481271 0.9861165590258743;
+                           0.5990683230705801 1.1300034111861956]
+            expected_vthe = [22.64555285302391 22.485481713141688;
+                             23.763411647653097 23.63281883616836;
+                             25.26907160117684 25.181703459470448;
+                             26.17920352818247 26.12461016686916;
+                             26.514772631426933 26.476018852279974;
+                             26.798783188585713 26.774387562937218;
+                             27.202255545479264 27.203662204308202;
+                             27.50424749120107 27.527732850637264;
+                             27.630498656270504 27.6642323848215;
+                             27.748483758260697 27.79134809261204;
+                             27.933760382468346 27.990808336620802;
+                             28.08611508251559 28.153978618442775;
+                             28.14959662643782 28.221734439130564;
+                             28.207730844115044 28.283677711828023;
+                             28.28567669896009 28.36634261525836;
+                             28.32728392065335 28.410489883644782;
+                             28.331064506972027 28.41437629072209;
+                             28.32729968986601 28.41050992096321;
+                             28.285678151542136 28.366352683865195;
+                             28.207765527709956 28.28373408727703;
+                             28.149604559462947 28.221771261090687;
+                             28.086248527111163 28.154158507899695;
+                             27.933979289064936 27.991103719847732;
+                             27.74906125092813 27.792046191405188;
+                             27.631210333523736 27.66508092926101;
+                             27.505479130159543 27.529115937508752;
+                             27.20422756527604 27.20578114592589;
+                             26.801712351383053 26.77740066591359;
+                             26.517644511297203 26.478915386575462;
+                             26.18176436913143 26.127099000267552;
+                             25.26635932097994 25.178676836919877;
+                             23.756593489029708 23.625697695979085;
+                             22.64390166090378 22.48400980852866]
+        else
+            # Parallel solves, which here use only shared-memory parallelism, use the ADI
+            # preconditioner, which should be as accurate, but may give different results
+            # within Newton-Krylov tolerances.
+            expected_Ez = [-0.5990683230706185 -1.136484793603861;
+                           -0.4944296396481284 -0.9873300031440772;
+                           -0.30889032954504736 -0.6694378168618197;
+                           -0.2064830747303776 -0.447133132132065;
+                           -0.21232457328748663 -0.42306913446372424;
+                           -0.18233875912042674 -0.3586467771727455;
+                           -0.16711429522309232 -0.30182728110160495;
+                           -0.16920776495088916 -0.27814382747995164;
+                           -0.1629417555658927 -0.2612463784138094;
+                           -0.16619150334079993 -0.25727894258000966;
+                           -0.15918194883360942 -0.23720078814350573;
+                           -0.14034706409006803 -0.20520397188041256;
+                           -0.12602184032280567 -0.18270162474892546;
+                           -0.10928716440800472 -0.1580892035790512;
+                           -0.07053969674257217 -0.10137753682381391;
+                           -0.0249577746169536 -0.03584114725793184;
+                           -2.8327303308330514e-15 -2.0802378395589373e-5;
+                           0.024957774616960776 0.0358449101669449;
+                           0.07053969674257636 0.10136928934666747;
+                           0.10928716440799909 0.15807862867071673;
+                           0.1260218403227975 0.18263047522175488;
+                           0.1403470640900294 0.20516566756031385; 0.1591819488336015 0.2371123741024713;
+                           0.16619150334082114 0.2571261543920033;
+                           0.16294175556587748 0.2609882062708652;
+                           0.16920776495090983 0.27789779494370415;
+                           0.1671142952230893 0.30153489797658445;
+                           0.1823387591204167 0.35852918516786003;
+                           0.21232457328753865 0.42311789840457864;
+                           0.20648307473037922 0.44816400062147066;
+                           0.3088903295450278 0.6716785459169026;
+                           0.4944296396481271 0.9861167610959626;
+                           0.5990683230705801 1.1300045383907789]
+            expected_vthe = [22.64555338227396 22.48548119549829;
+                             23.76341164436594 23.632819782771243;
+                             25.26907163394297 25.18170391887767;
+                             26.179203467285365 26.12461016927763;
+                             26.514772629327332 26.47601877788725;
+                             26.79878318858447 26.774387534342114;
+                             27.20225551034186 27.20366217166485;
+                             27.504247525601926 27.527732760234755;
+                             27.630498605068166 27.66423228184859;
+                             27.748483763235846 27.791348082529804;
+                             27.933760371994826 27.990808308571204;
+                             28.08611509938479 28.153978648601132;
+                             28.149596610550738 28.221734405417436;
+                             28.207730848524463 28.28367771694209;
+                             28.28567670146647 28.366342613061416;
+                             28.32728392764203 28.410489892675102;
+                             28.331064498175866 28.414376282256146;
+                             28.327299695349158 28.41050992979778;
+                             28.285678155424083 28.366352683054103;
+                             28.207765532359442 28.28373409338897;
+                             28.149604554344048 28.22177123547944;
+                             28.086248537316628 28.154158532699547;
+                             27.933979285563435 27.991103698041254;
+                             27.749061255285646 27.79204618050744;
+                             27.63121031067771 27.665080846653012;
+                             27.505479148983177 27.529115838548574;
+                             27.204227550854288 27.205781129997607;
+                             26.801712356957204 26.777400644678224;
+                             26.517644516966772 26.478915353716097;
+                             26.181764354679014 26.12709901369174;
+                             25.266359355820907 25.178677080491074;
+                             23.756593465755735 23.625698257711747;
+                             22.64390180335094 22.48400934735562]
+        end
 
         if expected_Ez == nothing
             # Error: no expected input provided

From c68ca473ef4bac536b0f91ffdfe8057c5b2feb29 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Sun, 10 Nov 2024 20:31:38 +0000
Subject: [PATCH 17/43] Fix electron bc cutoff at lower boundary

Indexing error meant that half the time the cutoff did not interpolate
smoothly between grid points.
---
 moment_kinetics/src/electron_kinetic_equation.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index 05fae2358..d8d2fe2d6 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -2394,7 +2394,7 @@ end
                 pdf[plus_vcut_ind+1,1,1,ir] *= vcut_fraction - 0.5
             else
                 pdf[plus_vcut_ind+1,1,1,ir] = 0.0
-                pdf[plus_vcut_ind+1,1,1,ir] *= vcut_fraction + 0.5
+                pdf[plus_vcut_ind,1,1,ir] *= vcut_fraction + 0.5
             end
 
             # update the electrostatic potential at the boundary to be the value corresponding to the updated cutoff velocity

From f2d0115767dffe61159ee61ad49446b6b9a78c0b Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Thu, 14 Nov 2024 09:48:22 +0000
Subject: [PATCH 18/43] Recalculate vth before bc in electron_backward_euler!()
 residual_func!()

Bugfix.
---
 .../src/electron_kinetic_equation.jl          | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index d8d2fe2d6..8a3d62d1f 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -1403,14 +1403,6 @@ global_rank[] == 0 && println("recalculating precon")
                 electron_ppar_residual, f_electron_residual = this_residual
                 electron_ppar_newvar, f_electron_newvar = new_variables
 
-                # enforce the boundary condition(s) on the electron pdf
-                @views enforce_boundary_condition_on_electron_pdf!(
-                           f_electron_newvar, phi, moments.electron.vth[:,ir],
-                           moments.electron.upar[:,ir], z, vperp, vpa, vperp_spectral,
-                           vpa_spectral, vpa_advect, moments,
-                           num_diss_params.electron.vpa_dissipation_coefficient > 0.0,
-                           composition.me_over_mi; bc_constraints=false)
-
                 if evolve_ppar
                     this_dens = moments.electron.dens
                     this_upar = moments.electron.upar
@@ -1423,6 +1415,17 @@ global_rank[] == 0 && println("recalculating precon")
                                                    (this_dens[iz,ir] *
                                                     composition.me_over_mi)))
                     end
+                end
+
+                # enforce the boundary condition(s) on the electron pdf
+                @views enforce_boundary_condition_on_electron_pdf!(
+                           f_electron_newvar, phi, moments.electron.vth[:,ir],
+                           moments.electron.upar[:,ir], z, vperp, vpa, vperp_spectral,
+                           vpa_spectral, vpa_advect, moments,
+                           num_diss_params.electron.vpa_dissipation_coefficient > 0.0,
+                           composition.me_over_mi; bc_constraints=false)
+
+                if evolve_ppar
                     # Calculate heat flux and derivatives using new_variables
                     @views calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar[:,ir],
                                                                   electron_ppar_newvar,

From 48e992ed3854495627191cae3f573b54d3cb38ec Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Sun, 17 Nov 2024 17:15:34 +0000
Subject: [PATCH 19/43] Move cutoff parameters for kinetic electron bc to
 separate function

Will allow the calculation of these parameters to be reused when
calculating a Jacobian matrix for the wall bc.
---
 .../src/electron_kinetic_equation.jl          | 218 ++++++++++--------
 1 file changed, 118 insertions(+), 100 deletions(-)

diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index 8a3d62d1f..d6eece8a9 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -2136,6 +2136,118 @@ function apply_electron_bc_and_constraints_no_r!(f_electron, phi, moments, z, vp
     end
 end
 
+function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa, ir)
+    # Delete the upar contribution here if ignoring the 'upar shift'
+    vpa_unnorm = @. vpa.scratch2 = vthe[1,ir] * vpa.grid + upar[1,ir]
+
+    u_over_vt = upar[1,ir] / vthe[1,ir]
+
+    # Initial guess for cut-off velocity is result from previous RK stage (which
+    # might be the previous timestep if this is the first stage). Recalculate this
+    # value from phi.
+    vcut = sqrt(phi[1,ir] / me_over_mi)
+
+    # -vcut is between minus_vcut_ind-1 and minus_vcut_ind
+    minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut)
+    if minus_vcut_ind < 2
+        error("In lower-z electron bc, failed to find vpa=-vcut point, minus_vcut_ind=$minus_vcut_ind")
+    end
+    if minus_vcut_ind > vpa.n
+        error("In lower-z electron bc, failed to find vpa=-vcut point, minus_vcut_ind=$minus_vcut_ind")
+    end
+
+    # sigma is the location we use for w_∥(v_∥=0) - set to 0 to ignore the 'upar
+    # shift'
+    sigma = -u_over_vt
+
+    # sigma is between sigma_ind-1 and sigma_ind
+    sigma_ind = searchsortedfirst(vpa_unnorm, 0.0)
+    if sigma_ind < 2
+        error("In lower-z electron bc, failed to find vpa=0 point, sigma_ind=$sigma_ind")
+    end
+    if sigma_ind > vpa.n
+        error("In lower-z electron bc, failed to find vpa=0 point, sigma_ind=$sigma_ind")
+    end
+
+    # sigma_fraction is the fraction of the distance between sigma_ind-1 and
+    # sigma_ind where sigma is.
+    sigma_fraction = (sigma - vpa_unnorm[sigma_ind-1]) / (vpa_unnorm[sigma_ind] - vpa_unnorm[sigma_ind-1])
+
+    # Want to construct the w-grid corresponding to -vpa.
+    #   wpa(vpa) = (vpa - upar)/vth
+    #   ⇒ vpa = vth*wpa(vpa) + upar
+    #   wpa(-vpa) = (-vpa - upar)/vth
+    #             = (-(vth*wpa(vpa) + upar) - upar)/vth
+    #             = (-vth*wpa - 2*upar)/vth
+    #             = -wpa - 2*upar/vth
+    # [Note that `vpa.grid` is slightly mis-named here - it contains the values of
+    #  wpa(+vpa) as we are using a 'moment kinetic' approach.]
+    # Need to reverse vpa.grid because the grid passed as the second argument of
+    # interpolate_to_grid_1d!() needs to be sorted in increasing order.
+    reversed_wpa_of_minus_vpa = @. vpa.scratch3 = -vpa.grid + 2.0 * sigma
+    #reversed_wpa_of_minus_vpa = vpa.scratch3 .= .-vpa.grid
+    reverse!(reversed_wpa_of_minus_vpa)
+
+    return vpa_unnorm, u_over_vt, vcut, minus_vcut_ind, sigma, sigma_ind, sigma_fraction,
+           reversed_wpa_of_minus_vpa
+end
+
+function get_cutoff_params_upper(upar, vthe, phi, me_over_mi, vpa, ir)
+    # Delete the upar contribution here if ignoring the 'upar shift'
+    vpa_unnorm = @. vpa.scratch2 = vthe[end,ir] * vpa.grid + upar[end,ir]
+
+    u_over_vt = upar[end,ir] / vthe[end,ir]
+
+    # Initial guess for cut-off velocity is result from previous RK stage (which
+    # might be the previous timestep if this is the first stage). Recalculate this
+    # value from phi.
+    vcut = sqrt(phi[end,ir] / me_over_mi)
+
+    # vcut is between plus_vcut_ind and plus_vcut_ind+1
+    plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut)
+    if plus_vcut_ind < 1
+        error("In upper-z electron bc, failed to find vpa=vcut point, plus_vcut_ind=$plus_vcut_ind")
+    end
+    if plus_vcut_ind > vpa.n - 1
+        error("In upper-z electron bc, failed to find vpa=vcut point, plus_vcut_ind=$plus_vcut_ind")
+    end
+
+    # sigma is the location we use for w_∥(v_∥=0) - set to 0 to ignore the 'upar
+    # shift'
+    sigma = -u_over_vt
+
+    # sigma is between sigma_ind and sigma_ind+1
+    sigma_ind = searchsortedlast(vpa_unnorm, 0.0)
+    if sigma_ind < 1
+        error("In upper-z electron bc, failed to find vpa=0 point, sigma_ind=$sigma_ind")
+    end
+    if sigma_ind > vpa.n - 1
+        error("In upper-z electron bc, failed to find vpa=0 point, sigma_ind=$sigma_ind")
+    end
+
+    # sigma_fraction is the fraction of the distance between sigma_ind+1 and
+    # sigma_ind where sigma is.
+    sigma_fraction = (sigma - vpa_unnorm[sigma_ind+1]) / (vpa_unnorm[sigma_ind] - vpa_unnorm[sigma_ind+1])
+
+    # Want to construct the w-grid corresponding to -vpa.
+    #   wpa(vpa) = (vpa - upar)/vth
+    #   ⇒ vpa = vth*wpa(vpa) + upar
+    #   wpa(-vpa) = (-vpa - upar)/vth
+    #             = (-(vth*wpa(vpa) + upar) - upar)/vth
+    #             = (-vth*wpa - 2*upar)/vth
+    #             = -wpa - 2*upar/vth
+    # [Note that `vpa.grid` is slightly mis-named here - it contains the values of
+    #  wpa(+vpa) as we are using a 'moment kinetic' approach.]
+    # Need to reverse vpa.grid because the grid passed as the second argument of
+    # interpolate_to_grid_1d!() needs to be sorted in increasing order.
+    reversed_wpa_of_minus_vpa = @. vpa.scratch3 = -vpa.grid + 2.0 * sigma
+    #reversed_wpa_of_minus_vpa = vpa.scratch3 .= .-vpa.grid
+    reverse!(reversed_wpa_of_minus_vpa)
+
+    return vpa_unnorm, u_over_vt, vcut, plus_vcut_ind, sigma, sigma_ind, sigma_fraction,
+           reversed_wpa_of_minus_vpa
+end
+
 @timeit global_timer enforce_boundary_condition_on_electron_pdf!(
                          pdf, phi, vthe, upar, z, vperp, vpa, vperp_spectral,
                          vpa_spectral, vpa_adv, moments, vpa_diffusion, me_over_mi;
@@ -2241,56 +2353,9 @@ end
             # constraints and determining the cut-off velocity (and therefore the sheath
             # potential).
 
-            # Delete the upar contribution here if ignoring the 'upar shift'
-            vpa_unnorm = @. vpa.scratch2 = vthe[1,ir] * vpa.grid + upar[1,ir]
-
-            u_over_vt = upar[1,ir] / vthe[1,ir]
-
-            # Initial guess for cut-off velocity is result from previous RK stage (which
-            # might be the previous timestep if this is the first stage). Recalculate this
-            # value from phi.
-            vcut = sqrt(phi[1,ir] / me_over_mi)
-
-            # -vcut is between minus_vcut_ind-1 and minus_vcut_ind
-            minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut)
-            if minus_vcut_ind < 2
-                error("In lower-z electron bc, failed to find vpa=-vcut point, minus_vcut_ind=$minus_vcut_ind")
-            end
-            if minus_vcut_ind > vpa.n
-                error("In lower-z electron bc, failed to find vpa=-vcut point, minus_vcut_ind=$minus_vcut_ind")
-            end
-
-            # sigma is the location we use for w_∥(v_∥=0) - set to 0 to ignore the 'upar
-            # shift'
-            sigma = -u_over_vt
-
-            # sigma is between sigma_ind-1 and sigma_ind
-            sigma_ind = searchsortedfirst(vpa_unnorm, 0.0)
-            if sigma_ind < 2
-                error("In lower-z electron bc, failed to find vpa=0 point, sigma_ind=$sigma_ind")
-            end
-            if sigma_ind > vpa.n
-                error("In lower-z electron bc, failed to find vpa=0 point, sigma_ind=$sigma_ind")
-            end
-
-            # sigma_fraction is the fraction of the distance between sigma_ind-1 and
-            # sigma_ind where sigma is.
-            sigma_fraction = (sigma - vpa_unnorm[sigma_ind-1]) / (vpa_unnorm[sigma_ind] - vpa_unnorm[sigma_ind-1])
-
-            # Want to construct the w-grid corresponding to -vpa.
-            #   wpa(vpa) = (vpa - upar)/vth
-            #   ⇒ vpa = vth*wpa(vpa) + upar
-            #   wpa(-vpa) = (-vpa - upar)/vth
-            #             = (-(vth*wpa(vpa) + upar) - upar)/vth
-            #             = (-vth*wpa - 2*upar)/vth
-            #             = -wpa - 2*upar/vth
-            # [Note that `vpa.grid` is slightly mis-named here - it contains the values of
-            #  wpa(+vpa) as we are using a 'moment kinetic' approach.]
-            # Need to reverse vpa.grid because the grid passed as the second argument of
-            # interpolate_to_grid_1d!() needs to be sorted in increasing order.
-            reversed_wpa_of_minus_vpa = @. vpa.scratch3 = -vpa.grid + 2.0 * sigma
-            #reversed_wpa_of_minus_vpa = vpa.scratch3 .= .-vpa.grid
-            reverse!(reversed_wpa_of_minus_vpa)
+            vpa_unnorm, u_over_vt, vcut, minus_vcut_ind, sigma, sigma_ind, sigma_fraction,
+                reversed_wpa_of_minus_vpa = get_cutoff_params_lower(upar, vthe, phi,
+                                                                    me_over_mi, vpa, ir)
 
             # interpolate the pdf onto this grid
             #@views interpolate_to_grid_1d!(interpolated_pdf, wpa_values, pdf[:,1,1,ir], vpa, vpa_spectral)
@@ -2510,56 +2575,9 @@ end
             # constraints and determining the cut-off velocity (and therefore the sheath
             # potential).
 
-            # Delete the upar contribution here if ignoring the 'upar shift'
-            vpa_unnorm = @. vpa.scratch2 = vthe[end,ir] * vpa.grid + upar[end,ir]
-
-            u_over_vt = upar[end,ir] / vthe[end,ir]
-
-            # Initial guess for cut-off velocity is result from previous RK stage (which
-            # might be the previous timestep if this is the first stage). Recalculate this
-            # value from phi.
-            vcut = sqrt(phi[end,ir] / me_over_mi)
-
-            # vcut is between plus_vcut_ind and plus_vcut_ind+1
-            plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut)
-            if plus_vcut_ind < 1
-                error("In upper-z electron bc, failed to find vpa=vcut point, plus_vcut_ind=$plus_vcut_ind")
-            end
-            if plus_vcut_ind > vpa.n - 1
-                error("In upper-z electron bc, failed to find vpa=vcut point, plus_vcut_ind=$plus_vcut_ind")
-            end
-
-            # sigma is the location we use for w_∥(v_∥=0) - set to 0 to ignore the 'upar
-            # shift'
-            sigma = -u_over_vt
-
-            # sigma is between sigma_ind and sigma_ind+1
-            sigma_ind = searchsortedlast(vpa_unnorm, 0.0)
-            if sigma_ind < 1
-                error("In upper-z electron bc, failed to find vpa=0 point, sigma_ind=$sigma_ind")
-            end
-            if sigma_ind > vpa.n - 1
-                error("In upper-z electron bc, failed to find vpa=0 point, sigma_ind=$sigma_ind")
-            end
-
-            # sigma_fraction is the fraction of the distance between sigma_ind+1 and
-            # sigma_ind where sigma is.
-            sigma_fraction = (sigma - vpa_unnorm[sigma_ind+1]) / (vpa_unnorm[sigma_ind] - vpa_unnorm[sigma_ind+1])
-
-            # Want to construct the w-grid corresponding to -vpa.
-            #   wpa(vpa) = (vpa - upar)/vth
-            #   ⇒ vpa = vth*wpa(vpa) + upar
-            #   wpa(-vpa) = (-vpa - upar)/vth
-            #             = (-(vth*wpa(vpa) + upar) - upar)/vth
-            #             = (-vth*wpa - 2*upar)/vth
-            #             = -wpa - 2*upar/vth
-            # [Note that `vpa.grid` is slightly mis-named here - it contains the values of
-            #  wpa(+vpa) as we are using a 'moment kinetic' approach.]
-            # Need to reverse vpa.grid because the grid passed as the second argument of
-            # interpolate_to_grid_1d!() needs to be sorted in increasing order.
-            reversed_wpa_of_minus_vpa = @. vpa.scratch3 = -vpa.grid + 2.0 * sigma
-            #reversed_wpa_of_minus_vpa = vpa.scratch3 .= .-vpa.grid
-            reverse!(reversed_wpa_of_minus_vpa)
+            vpa_unnorm, u_over_vt, vcut, plus_vcut_ind, sigma, sigma_ind, sigma_fraction,
+                reversed_wpa_of_minus_vpa = get_cutoff_params_upper(upar, vthe, phi,
+                                                                    me_over_mi, vpa, ir)
 
             # interpolate the pdf onto this grid
             #@views interpolate_to_grid_1d!(interpolated_pdf, wpa_values, pdf[:,1,1,ir], vpa, vpa_spectral)

From 67b546dfe796a29b41951cfa54e6b716e6917549 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Tue, 19 Nov 2024 10:23:40 +0000
Subject: [PATCH 20/43] Fix calculation of sigma_fraction in kinetic electron
 bc

---
 .../src/electron_kinetic_equation.jl          | 31 +++++++++----------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index d6eece8a9..dcb14db2e 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -2137,11 +2137,14 @@ function apply_electron_bc_and_constraints_no_r!(f_electron, phi, moments, z, vp
 end
 
 function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa, ir)
-    # Delete the upar contribution here if ignoring the 'upar shift'
-    vpa_unnorm = @. vpa.scratch2 = vthe[1,ir] * vpa.grid + upar[1,ir]
-
     u_over_vt = upar[1,ir] / vthe[1,ir]
 
+    # sigma is the location we use for w_∥(v_∥=0) - set to 0 to ignore the 'upar
+    # shift'
+    sigma = -u_over_vt
+
+    vpa_unnorm = @. vpa.scratch2 = vthe[1,ir] * (vpa.grid - sigma)
+
     # Initial guess for cut-off velocity is result from previous RK stage (which
     # might be the previous timestep if this is the first stage). Recalculate this
     # value from phi.
@@ -2156,10 +2159,6 @@ function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa, ir)
         error("In lower-z electron bc, failed to find vpa=-vcut point, minus_vcut_ind=$minus_vcut_ind")
     end
 
-    # sigma is the location we use for w_∥(v_∥=0) - set to 0 to ignore the 'upar
-    # shift'
-    sigma = -u_over_vt
-
     # sigma is between sigma_ind-1 and sigma_ind
     sigma_ind = searchsortedfirst(vpa_unnorm, 0.0)
     if sigma_ind < 2
@@ -2171,7 +2170,7 @@ function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa, ir)
 
     # sigma_fraction is the fraction of the distance between sigma_ind-1 and
     # sigma_ind where sigma is.
-    sigma_fraction = (sigma - vpa_unnorm[sigma_ind-1]) / (vpa_unnorm[sigma_ind] - vpa_unnorm[sigma_ind-1])
+    sigma_fraction = -vpa_unnorm[sigma_ind-1] / (vpa_unnorm[sigma_ind] - vpa_unnorm[sigma_ind-1])
 
     # Want to construct the w-grid corresponding to -vpa.
     #   wpa(vpa) = (vpa - upar)/vth
@@ -2193,11 +2192,15 @@ function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa, ir)
 end
 
 function get_cutoff_params_upper(upar, vthe, phi, me_over_mi, vpa, ir)
-    # Delete the upar contribution here if ignoring the 'upar shift'
-    vpa_unnorm = @. vpa.scratch2 = vthe[end,ir] * vpa.grid + upar[end,ir]
-
     u_over_vt = upar[end,ir] / vthe[end,ir]
 
+    # sigma is the location we use for w_∥(v_∥=0) - set to 0 to ignore the 'upar
+    # shift'
+    sigma = -u_over_vt
+
+    # Delete the upar contribution here if ignoring the 'upar shift'
+    vpa_unnorm = @. vpa.scratch2 = vthe[end,ir] * (vpa.grid - sigma)
+
     # Initial guess for cut-off velocity is result from previous RK stage (which
     # might be the previous timestep if this is the first stage). Recalculate this
     # value from phi.
@@ -2212,10 +2215,6 @@ function get_cutoff_params_upper(upar, vthe, phi, me_over_mi, vpa, ir)
         error("In upper-z electron bc, failed to find vpa=vcut point, plus_vcut_ind=$plus_vcut_ind")
     end
 
-    # sigma is the location we use for w_∥(v_∥=0) - set to 0 to ignore the 'upar
-    # shift'
-    sigma = -u_over_vt
-
     # sigma is between sigma_ind and sigma_ind+1
     sigma_ind = searchsortedlast(vpa_unnorm, 0.0)
     if sigma_ind < 1
@@ -2227,7 +2226,7 @@ function get_cutoff_params_upper(upar, vthe, phi, me_over_mi, vpa, ir)
 
     # sigma_fraction is the fraction of the distance between sigma_ind+1 and
     # sigma_ind where sigma is.
-    sigma_fraction = (sigma - vpa_unnorm[sigma_ind+1]) / (vpa_unnorm[sigma_ind] - vpa_unnorm[sigma_ind+1])
+    sigma_fraction = -vpa_unnorm[sigma_ind+1] / (vpa_unnorm[sigma_ind] - vpa_unnorm[sigma_ind+1])
 
     # Want to construct the w-grid corresponding to -vpa.
     #   wpa(vpa) = (vpa - upar)/vth

From c91a2bfa2f6a83422ead137548374480ff377740 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Tue, 19 Nov 2024 12:55:17 +0000
Subject: [PATCH 21/43] Make split of integrals at +/-vcut more consistent

Where integrals over the into-the-sheath part of the distribution
function are split into two parts at +/- vcut, modify the way that the
split is done so that the 'part 2' integral between 0 and +/-vcut is
calculated the same way as an integral over the out-from-the-sheath part
of the distribution which is cut off at -/+vcut.

Not sure if this is necessary, but seems nicer to be more consistent.
---
 .../src/electron_kinetic_equation.jl          | 61 ++++++++++++++-----
 1 file changed, 47 insertions(+), 14 deletions(-)

diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index dcb14db2e..30064571d 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -2378,19 +2378,36 @@ end
                 vcut_fraction = (-vcut - vpa_unnorm[minus_vcut_ind-1]) / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1])
 
                 function get_for_one_moment(integral_pieces)
-                    # Integral contribution from the cell containing vcut
-                    integral_vcut_cell = (0.5 * integral_pieces[minus_vcut_ind-1] + 0.5 * integral_pieces[minus_vcut_ind])
+                    # Integral contributions from the cell containing vcut.
+                    # Define these as follows to be consistent with the way the cutoff is
+                    # applied around plus_vcut_ind below.
+                    # Note that `integral_vcut_cell_part1` and `integral_vcut_cell_part2`
+                    # include all the contributions from the grid points
+                    # `minus_vcut_ind-1` and `minus_vcut_ind`, not just those from
+                    # 'inside' the grid cell.
+                    if vcut_fraction < 0.5
+                        integral_vcut_cell_part2 = integral_pieces[minus_vcut_ind-1] * (0.5 - vcut_fraction) +
+                                                   integral_pieces[minus_vcut_ind]
+                        integral_vcut_cell_part1 = integral_pieces[minus_vcut_ind-1] * (0.5 + vcut_fraction)
+
+                        # part1prime is d(part1)/d(vcut)
+                        part1prime = -integral_pieces[minus_vcut_ind-1] / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1])
+                    else
+                        integral_vcut_cell_part2 = integral_pieces[minus_vcut_ind] * (1.5 - vcut_fraction)
+                        integral_vcut_cell_part1 = integral_pieces[minus_vcut_ind-1] +
+                                                   integral_pieces[minus_vcut_ind] * (vcut_fraction - 0.5)
+
+                        # part1prime is d(part1)/d(vcut)
+                        part1prime = -integral_pieces[minus_vcut_ind] / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1])
+                    end
 
-                    part1 = sum(integral_pieces[1:minus_vcut_ind-2])
-                    part1 += 0.5 * integral_pieces[minus_vcut_ind-1] + vcut_fraction * integral_vcut_cell
-                    # part1prime is d(part1)/d(vcut)
-                    part1prime = -integral_vcut_cell / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1])
+                    part1 = sum(integral_pieces[1:minus_vcut_ind-2]) + integral_vcut_cell_part1
 
                     # Integral contribution from the cell containing sigma
                     integral_sigma_cell = (0.5 * integral_pieces[sigma_ind-1] + 0.5 * integral_pieces[sigma_ind])
 
                     part2 = sum(integral_pieces[minus_vcut_ind+1:sigma_ind-2])
-                    part2 += (1.0 - vcut_fraction) * integral_vcut_cell + 0.5 * integral_pieces[minus_vcut_ind] + 0.5 * integral_pieces[sigma_ind-1] + sigma_fraction * integral_sigma_cell
+                    part2 += integral_vcut_cell_part2 + 0.5 * integral_pieces[sigma_ind-1] + sigma_fraction * integral_sigma_cell
                     # part2prime is d(part2)/d(vcut)
                     part2prime = -part1prime
 
@@ -2597,22 +2614,38 @@ end
             function get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind)
                 # vcut_fraction is the fraction of the distance between plus_vcut_ind and
                 # plus_vcut_ind+1 where vcut is.
-                vcut_fraction = (vcut - vpa_unnorm[plus_vcut_ind+1]) / (vpa_unnorm[plus_vcut_ind] - vpa_unnorm[plus_vcut_ind+1])
+                vcut_fraction = (vcut - vpa_unnorm[plus_vcut_ind]) / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind])
 
                 function get_for_one_moment(integral_pieces)
                     # Integral contribution from the cell containing vcut
-                    integral_vcut_cell = (0.5 * integral_pieces[plus_vcut_ind] + 0.5 * integral_pieces[plus_vcut_ind+1])
+                    # Define these as follows to be consistent with the way the cutoff is
+                    # applied around plus_vcut_ind below.
+                    # Note that `integral_vcut_cell_part1` and `integral_vcut_cell_part2`
+                    # include all the contributions from the grid points `plus_vcut_ind`
+                    # and `plus_vcut_ind+1`, not just those from 'inside' the grid cell.
+                    if vcut_fraction > 0.5
+                        integral_vcut_cell_part2 = integral_pieces[plus_vcut_ind] +
+                                                   integral_pieces[plus_vcut_ind+1] * (vcut_fraction - 0.5)
+                        integral_vcut_cell_part1 = integral_pieces[plus_vcut_ind+1] * (1.5 - vcut_fraction)
+
+                        # part1prime is d(part1)/d(vcut)
+                        part1prime = -integral_pieces[plus_vcut_ind+1] / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind])
+                    else
+                        integral_vcut_cell_part2 = integral_pieces[plus_vcut_ind] * (0.5 + vcut_fraction)
+                        integral_vcut_cell_part1 = integral_pieces[plus_vcut_ind] * (0.5 - vcut_fraction) +
+                                                   integral_pieces[plus_vcut_ind+1]
+
+                        # part1prime is d(part1)/d(vcut)
+                        part1prime = -integral_pieces[plus_vcut_ind] / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind])
+                    end
 
-                    part1 = sum(integral_pieces[plus_vcut_ind+2:end])
-                    part1 += 0.5 * integral_pieces[plus_vcut_ind+1] + vcut_fraction * integral_vcut_cell
-                    # part1prime is d(part1)/d(vcut)
-                    part1prime = integral_vcut_cell / (vpa_unnorm[plus_vcut_ind] - vpa_unnorm[plus_vcut_ind+1])
+                    part1 = sum(integral_pieces[plus_vcut_ind+2:end]) + integral_vcut_cell_part1
 
                     # Integral contribution from the cell containing sigma
                     integral_sigma_cell = (0.5 * integral_pieces[sigma_ind] + 0.5 * integral_pieces[sigma_ind+1])
 
                     part2 = sum(integral_pieces[sigma_ind+2:plus_vcut_ind-1])
-                    part2 += (1.0 - vcut_fraction) * integral_vcut_cell + 0.5 * integral_pieces[plus_vcut_ind] + 0.5 * integral_pieces[sigma_ind+1] + sigma_fraction * integral_sigma_cell
+                    part2 += integral_vcut_cell_part2 + 0.5 * integral_pieces[sigma_ind+1] + sigma_fraction * integral_sigma_cell
                     # part2prime is d(part2)/d(vcut)
                     part2prime = -part1prime
 

From 9b7e886f3e86208283edb16113942dee27595ce5 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Tue, 19 Nov 2024 13:05:58 +0000
Subject: [PATCH 22/43] Narrower cutoff near zero of correction terms for
 electron bc integral

If the prefactor that sets the correction terms to be proportional to
vpa^2 near vpa=0 is too broad, then it is hard for the correction terms
to fix errors in low moments (e.g. density moment), so making it a bit
narrower reduces the size of the coefficients of the correction terms.
---
 moment_kinetics/src/electron_kinetic_equation.jl | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index 30064571d..690409c0f 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -2518,7 +2518,9 @@ end
             c3 = get_part3_for_one_moment_lower(energy_integral_pieces)
             d3 = get_part3_for_one_moment_lower(cubic_integral_pieces)
 
-            correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,1,ir] * vpa.wgts / sqrt(pi) * vpa_unnorm^2 / vthe[1,ir]^2 / (1.0 + vpa_unnorm^2 / vthe[1,ir]^2)
+            # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is.
+            sharpness = 4.0
+            correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,1,ir] * vpa.wgts / sqrt(pi) * sharpness * vpa_unnorm^2 / vthe[1,ir]^2 / (1.0 + sharpness * vpa_unnorm^2 / vthe[1,ir]^2)
             for ivpa ∈ 1:sigma_ind
                 # We only add the corrections to 'part3', so zero them out for negative v_∥.
                 # I think this is only actually significant for `sigma_ind-1` and
@@ -2568,7 +2570,7 @@ end
                                     + B * v_over_vth
                                     + C * v_over_vth^2
                                     + D * v_over_vth^3) *
-                                   v_over_vth^2 / (1.0 + v_over_vth^2) *
+                                   sharpness * v_over_vth^2 / (1.0 + sharpness * v_over_vth^2) *
                                    pdf[ivpa,1,1,ir]
             end
         end
@@ -2754,7 +2756,9 @@ end
             c3 = get_part3_for_one_moment_upper(energy_integral_pieces)
             d3 = get_part3_for_one_moment_upper(cubic_integral_pieces)
 
-            correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,end,ir] * vpa.wgts / sqrt(pi) * vpa_unnorm^2 / vthe[end,ir]^2 / (1.0 + vpa_unnorm^2 / vthe[end,ir]^2)
+            # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is.
+            sharpness = 4.0
+            correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,end,ir] * vpa.wgts / sqrt(pi) * sharpness * vpa_unnorm^2 / vthe[end,ir]^2 / (1.0 + sharpness * vpa_unnorm^2 / vthe[end,ir]^2)
             for ivpa ∈ sigma_ind:vpa.n
                 # We only add the corrections to 'part3', so zero them out for positive v_∥.
                 # I think this is only actually significant for `sigma_ind` and
@@ -2804,7 +2808,7 @@ end
                                     + B * v_over_vth
                                     + C * v_over_vth^2
                                     + D * v_over_vth^3) *
-                                   v_over_vth^2 / (1.0 + v_over_vth^2) *
+                                   sharpness * v_over_vth^2 / (1.0 + sharpness * v_over_vth^2) *
                                    pdf[ivpa,1,end,ir]
             end
         end

From b6474d0b3b70b0ac08e2d271c091f9aac3ea2492 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Wed, 20 Nov 2024 11:50:13 +0000
Subject: [PATCH 23/43] Make kinetic electron bc more robust if a guess for
 phi_wall is 0

If phi_wall=0, giving vcut=0, then it is not possible to apply moment
constraints, which causes an error due to a singular matrix. This case
is unphysical, so should not ever be a converged solution (vcut=0
corresponds to a fully electron-absorbing sheath). To avoid the problem,
check if vcut==0, and if so set vcut to some small value (we choose the
value at the next grid point after the one closest to 0).

[skip ci]
---
 moment_kinetics/src/electron_kinetic_equation.jl | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index 690409c0f..94e0a11dc 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -2152,6 +2152,12 @@ function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa, ir)
 
     # -vcut is between minus_vcut_ind-1 and minus_vcut_ind
     minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut)
+    if vcut == 0.0
+        # Force a non-zero initial guess, as zero makes no sense - that would mean all
+        # electrons are absorbed, i.e. there is no sheath.
+        minus_vcut_ind -= 1
+        vcut = -vpa_unnorm[minus_vcut_ind]
+    end
     if minus_vcut_ind < 2
         error("In lower-z electron bc, failed to find vpa=-vcut point, minus_vcut_ind=$minus_vcut_ind")
     end
@@ -2208,6 +2214,12 @@ function get_cutoff_params_upper(upar, vthe, phi, me_over_mi, vpa, ir)
 
     # vcut is between plus_vcut_ind and plus_vcut_ind+1
     plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut)
+    if vcut == 0.0
+        # Force a non-zero initial guess, as zero makes no sense - that would mean all
+        # electrons are absorbed, i.e. there is no sheath.
+        plus_vcut_ind += 1
+        vcut = vpa_unnorm[plus_vcut_ind]
+    end
     if plus_vcut_ind < 1
         error("In upper-z electron bc, failed to find vpa=vcut point, plus_vcut_ind=$plus_vcut_ind")
     end

From b36758394b7bd5c1e4ff3138025d14579e21fe84 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Fri, 22 Nov 2024 11:58:55 +0000
Subject: [PATCH 24/43] Make it possible to pick the kinetic electron
 preconditioner from input

By default (if `implicit_electron_ppar = true`), use LU when
`block_size[] == 1` or ADI otherwise, but now can pass
`implicit_electron_ppar = "lu"` or `implicit_electron_ppar = "adi"` to
pick the precoditioner type explicitly.
---
 moment_kinetics/src/input_structs.jl |  3 +-
 moment_kinetics/src/time_advance.jl  | 45 ++++++++++++++++++++++------
 2 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl
index edcdb9a8a..2bf4cf57c 100644
--- a/moment_kinetics/src/input_structs.jl
+++ b/moment_kinetics/src/input_structs.jl
@@ -33,7 +33,7 @@ using TOML
 an option but known at compile time when a `time_info` struct is passed as a function
 argument.
 """
-struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero}
+struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero, Telectronprecon}
     n_variables::mk_int
     nstep::mk_int
     end_time::mk_float
@@ -81,6 +81,7 @@ struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero
     implicit_ion_advance::Bool
     implicit_vpa_advection::Bool
     implicit_electron_ppar::Bool
+    electron_preconditioner_type::Telectronprecon
     constraint_forcing_rate::mk_float
     decrease_dt_iteration_threshold::mk_int
     increase_dt_iteration_threshold::mk_int
diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl
index 219fd0ef9..ce033a06a 100644
--- a/moment_kinetics/src/time_advance.jl
+++ b/moment_kinetics/src/time_advance.jl
@@ -409,6 +409,7 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload,
     else
         error_sum_zero = 0.0
     end
+
     if electron === nothing
         # Setting up time_info for electrons.
         # Store io_input as the debug_io variable so we can use it to open the debug
@@ -422,18 +423,50 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload,
         else
             debug_io = nothing
         end
+
+        implicit_electron_ppar = false
+        electron_preconditioner_type = nothing
         decrease_dt_iteration_threshold = t_input["decrease_dt_iteration_threshold"]
         increase_dt_iteration_threshold = t_input["increase_dt_iteration_threshold"]
         cap_factor_ion_dt = mk_float(t_input["cap_factor_ion_dt"])
         electron_t_params = nothing
     elseif electron === false
         debug_io = nothing
+        implicit_electron_ppar = false
+        electron_preconditioner_type = nothing
         decrease_dt_iteration_threshold = -1
         increase_dt_iteration_threshold = typemax(mk_int)
         cap_factor_ion_dt = Inf
         electron_t_params = nothing
     else
         debug_io = nothing
+
+        implicit_electron_ppar = (t_input["implicit_electron_ppar"] !== false)
+        if implicit_electron_ppar
+            if t_input["implicit_electron_ppar"] === true
+                if block_size[] == 1
+                    # No need to parallelise, so un-split LU solver should be most efficient.
+                    electron_preconditioner_type = Val(:electron_lu)
+                else
+                    # Want to parallelise preconditioner, so use ADI method.
+                    electron_preconditioner_type = Val(:electron_adi)
+                end
+            else
+                electron_precon_types = Dict("lu" => :electron_lu, "adi" => :electron_adi)
+                if t_input["implicit_electron_ppar"] ∈ keys(electron_precon_types)
+                    electron_preconditioner_type = Val(electron_precon_types[t_input["implicit_electron_ppar"]])
+                else
+                    precon_keys = collect(keys(electron_precon_types))
+                    error("Unrecognised option implicit_electron_ppar="
+                          * "\"$(t_input["implicit_electron_ppar"])\"  which should be "
+                          * "either false/true or a string giving the type of "
+                          * "preconditioner to use - one of $precon_keys.")
+                end
+            end
+        else
+            electron_preconditioner_type = Val(:none)
+        end
+
         decrease_dt_iteration_threshold = -1
         increase_dt_iteration_threshold = typemax(mk_int)
         cap_factor_ion_dt = Inf
@@ -458,7 +491,8 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload,
                      electron !== nothing && t_input["implicit_electron_advance"],
                      electron !== nothing && t_input["implicit_ion_advance"],
                      electron !== nothing && t_input["implicit_vpa_advection"],
-                     electron !== nothing && t_input["implicit_electron_ppar"],
+                     electron !== nothing && implicit_electron_ppar,
+                     electron_preconditioner_type,
                      mk_float(t_input["constraint_forcing_rate"]),
                      decrease_dt_iteration_threshold, increase_dt_iteration_threshold,
                      mk_float(cap_factor_ion_dt), t_input["write_after_fixed_step_count"],
@@ -667,13 +701,6 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop
                                                                     input_dict, (z=z,);
                                                                     default_rtol=t_params.rtol / 10.0,
                                                                     default_atol=t_params.atol / 10.0)
-    if block_size[] == 1
-        # No need to parallelise, so un-split LU solver should be most efficient.
-        electron_preconditioner_type = Val(:electron_lu)
-    else
-        # Want to parallelise preconditioner, so use ADI method.
-        electron_preconditioner_type = Val(:electron_adi)
-    end
     nl_solver_electron_advance_params =
         setup_nonlinear_solve(t_params.implicit_electron_advance || composition.electron_physics ∈ (kinetic_electrons, kinetic_electrons_with_temperature_equation),
                               input_dict,
@@ -682,7 +709,7 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop
                               default_rtol=t_params.rtol / 10.0,
                               default_atol=t_params.atol / 10.0,
                               electron_ppar_pdf_solve=true,
-                              preconditioner_type=electron_preconditioner_type)
+                              preconditioner_type=t_params.electron_preconditioner_type)
     nl_solver_ion_advance_params =
         setup_nonlinear_solve(t_params.implicit_ion_advance, input_dict,
                               (s=composition.n_ion_species, r=r, z=z, vperp=vperp,

From f69b3142d9911f2d8d2f7c32a09925e593fa553d Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Thu, 21 Nov 2024 21:08:54 +0000
Subject: [PATCH 25/43] Update downloaded HDF5 to 1.14.5

Hoped this might help find a bug, but did not help with that. Do not
know any particular reason to update, but might as well keep up to date.
---
 machines/generic-batch-template/compile_dependencies.sh | 6 +++---
 machines/generic-pc/compile_dependencies.sh             | 6 +++---
 machines/marconi/compile_dependencies.sh                | 6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/machines/generic-batch-template/compile_dependencies.sh b/machines/generic-batch-template/compile_dependencies.sh
index 2f333f12b..966ef12d9 100755
--- a/machines/generic-batch-template/compile_dependencies.sh
+++ b/machines/generic-batch-template/compile_dependencies.sh
@@ -77,10 +77,10 @@ if [[ $BUILDHDF5 == "y" && -d hdf5-build ]]; then
 fi
 
 if [[ $BUILDHDF5 == "y" ]]; then
-  HDF5=hdf5-1.14.3
+  HDF5=hdf5-1.14.5
   # Download and extract the source
-  wget -O ${HDF5}.tar.bz2 https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.14/hdf5-1.14.3/src/hdf5-1.14.3.tar.bz2
-  tar xjf ${HDF5}.tar.bz2
+  wget -O ${HDF5}.tar.gz https://support.hdfgroup.org/releases/hdf5/v1_14/v1_14_5/downloads/hdf5-1.14.5.tar.gz
+  tar xjf ${HDF5}.tar.gz
 
   cd $HDF5
 
diff --git a/machines/generic-pc/compile_dependencies.sh b/machines/generic-pc/compile_dependencies.sh
index ae70bd6b9..476c0ed0d 100755
--- a/machines/generic-pc/compile_dependencies.sh
+++ b/machines/generic-pc/compile_dependencies.sh
@@ -77,10 +77,10 @@ else
 fi
 
 if [[ $BUILDHDF5 == "y" ]]; then
-  HDF5=hdf5-1.14.3
+  HDF5=hdf5-1.14.5
   # Download and extract the source
-  wget -O ${HDF5}.tar.bz2 https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.14/hdf5-1.14.3/src/hdf5-1.14.3.tar.bz2
-  tar xjf ${HDF5}.tar.bz2
+  wget -O ${HDF5}.tar.gz https://support.hdfgroup.org/releases/hdf5/v1_14/v1_14_5/downloads/hdf5-1.14.5.tar.gz
+  tar xzf ${HDF5}.tar.gz
 
   cd $HDF5
 
diff --git a/machines/marconi/compile_dependencies.sh b/machines/marconi/compile_dependencies.sh
index e18a41e25..70aae5b49 100755
--- a/machines/marconi/compile_dependencies.sh
+++ b/machines/marconi/compile_dependencies.sh
@@ -30,10 +30,10 @@ if [ -d hdf5-build ]; then
 fi
 
 if [ $BUILDHDF5 -eq 0 ]; then
-  HDF5=hdf5-1.14.3
+  HDF5=hdf5-1.14.5
   # Download and extract the source
-  wget -O ${HDF5}.tar.bz2 https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.14/hdf5-1.14.3/src/hdf5-1.14.3.tar.bz2
-  tar xjf ${HDF5}.tar.bz2
+  wget -O ${HDF5}.tar.gz https://support.hdfgroup.org/releases/hdf5/v1_14/v1_14_5/downloads/hdf5-1.14.5.tar.gz
+  tar xjf ${HDF5}.tar.gz
 
   cd $HDF5
 

From 2e40ee29e1f8f1b2e4133edc447606faee3cf8f9 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Sun, 17 Nov 2024 16:18:57 +0000
Subject: [PATCH 26/43] Function to interpolate a function symmetrically around
 x=0

---
 moment_kinetics/src/interpolation.jl        | 41 ++++++++++++++++++++-
 moment_kinetics/test/interpolation_tests.jl | 40 +++++++++++++++++++-
 2 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/moment_kinetics/src/interpolation.jl b/moment_kinetics/src/interpolation.jl
index af1ae1514..69041a463 100644
--- a/moment_kinetics/src/interpolation.jl
+++ b/moment_kinetics/src/interpolation.jl
@@ -5,7 +5,7 @@ Note these are not guaranteed to be highly optimized!
 """
 module interpolation
 
-export interpolate_to_grid_z
+export interpolate_to_grid_z, interpolate_to_grid_1d!, interpolate_symmetric!
 
 using ..array_allocation: allocate_float
 using ..moment_kinetics_structs: null_spatial_dimension_info, null_velocity_dimension_info
@@ -275,4 +275,43 @@ function interpolate_to_grid_vpa(newgrid, f::AbstractVector{mk_float}, vpa, spec
     return interpolate_to_grid_1d(newgrid, f, vpa, spectral)
 end
 
+"""
+    interpolate_symmetric!(result, newgrid, f, oldgrid)
+
+Interpolate f from oldgrid to newgrid, imposing that `f(x)` is symmetric around `x=0`, so
+the interpolation is done by fitting a polynomial in `x^2` to the values of `f` given on
+`oldgrid`, and evaluating on `newgrid`. Since interpolation is done in a polynomial of
+`x^2`, the signs of the points on `newgrid` and `oldgrid` do not matter, and are ignored.
+"""
+function interpolate_symmetric!(result, newgrid, f, oldgrid)
+    nnew = length(newgrid)
+    nold = length(oldgrid)
+
+    if nnew == 0
+        return nothing
+    end
+
+    # Check all points in newgrid are covered by oldgrid (i.e. between zero and the
+    # maximum of oldgrid)
+    @boundscheck maximum(abs.(newgrid)) ≤ maximum(abs.(oldgrid)) || error("newgrid bigger ($(maximum(abs.(newgrid)))) than oldgrid ($(maximum(abs.(oldgrid)))).")
+    @boundscheck size(result) == size(newgrid) || error("Size of result ($(size(result))) is not the same as size of newgrid ($(size(newgrid))).")
+    @boundscheck size(f) == size(oldgrid) || error("Size of f ($(size(f))) is not the same as size of oldgrid ($(size(oldgrid))).")
+
+    if nold == 1
+        # Interpolating 'polynomial' is just a constant
+        result .= f[1]
+    else
+        result .= 0.0
+        for j ∈ 1:nold
+            one_over_denominator = 1.0 / prod((oldgrid[j]^2 - oldgrid[k]^2) for k ∈ 1:nold if k ≠ j)
+            this_f = f[j]
+            for i ∈ 1:nnew
+                result[i] += this_f * prod((newgrid[i]^2 - oldgrid[k]^2) for k ∈ 1:nold if k ≠ j) * one_over_denominator
+            end
+        end
+    end
+
+    return nothing
 end
+
+end # interpolation
diff --git a/moment_kinetics/test/interpolation_tests.jl b/moment_kinetics/test/interpolation_tests.jl
index a38506a2f..bd4a1348f 100644
--- a/moment_kinetics/test/interpolation_tests.jl
+++ b/moment_kinetics/test/interpolation_tests.jl
@@ -4,7 +4,7 @@ include("setup.jl")
 
 using moment_kinetics.coordinates: define_test_coordinate
 using moment_kinetics.interpolation:
-    interpolate_to_grid_1d, interpolate_to_grid_z, interpolate_to_grid_vpa
+    interpolate_to_grid_1d, interpolate_to_grid_z, interpolate_to_grid_vpa, interpolate_symmetric!
 
 using MPI
 
@@ -93,6 +93,44 @@ function runtests()
                                expected, rtol=rtol, atol=1.e-14)
             end
         end
+
+        @testset "symmetric interpolation" begin
+            @testset "lower to upper $nx" for nx ∈ 4:10
+                rtol = 0.2 ^ nx
+
+                ix = collect(1:nx)
+                x = @. 1.8 * (ix - 1) / (nx - 1) - 1.23
+                first_positive_ind = searchsortedlast(x, 0.0) + 1
+                f = cos.(x)
+
+                expected = f[first_positive_ind:end]
+
+                result = zeros(nx - first_positive_ind + 1)
+                @views interpolate_symmetric!(result, x[first_positive_ind:end],
+                                              f[1:first_positive_ind-1],
+                                              x[1:first_positive_ind-1])
+
+                @test isapprox(result, expected; rtol=rtol, atol=1.0e-14)
+            end
+
+            @testset "upper to lower $nx" for nx ∈ 4:10
+                rtol = 0.2 ^ nx
+
+                ix = collect(1:nx)
+                x = @. 1.8 * (ix - 1) / (nx - 1) - 0.57
+                first_positive_ind = searchsortedlast(x, 0.0) + 1
+                f = cos.(x)
+
+                expected = f[1:first_positive_ind-1]
+
+                result = zeros(first_positive_ind-1)
+                @views interpolate_symmetric!(result, x[1:first_positive_ind-1],
+                                              f[first_positive_ind:end],
+                                              x[first_positive_ind:end])
+
+                @test isapprox(result, expected; rtol=rtol, atol=1.0e-14)
+            end
+        end
     end
 end
 

From a750ad760e885c45fa09916ca2ce249ceb72e0a1 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Sun, 17 Nov 2024 17:04:08 +0000
Subject: [PATCH 27/43] Improve interpolation for kinetic electron bc in
 element containing zero

Previous scheme just interpolated using existing values in whole element
containing zero. This meant that the result of the boundary condition
depended on some points that are overwritten by the boundary condition.
Improve on this by doing special interpolation in the element containing
zero. Instead of using the usual `interpolate_to_grid_1d!()` function,
use `interpolate_symmetric!()` which does an interpolation that is
forced to be symmetric around v_parallel=0, as the interpolation
polynomial is a polynomial in `v_parallel^2` (the interpolating
polynomial is constructed using a Lagrange polynomial method).  The
inputs to the interpolation are now just the function values on grid
points (within the element containing zero) that are not set by the
boundary condition.

Also optimises the interpolation of the points in the elements not
containing zero by restricting the interpolation to just the points
needed for output, instead of interpolating to the full reversed grid.
---
 .../src/electron_kinetic_equation.jl          | 70 ++++++++++++++++---
 1 file changed, 61 insertions(+), 9 deletions(-)

diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index 94e0a11dc..fea5179d7 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -18,7 +18,8 @@ using ..calculus: derivative!, second_derivative!, integral,
 using ..communication
 using ..gauss_legendre: gausslegendre_info
 using ..input_structs
-using ..interpolation: interpolate_to_grid_1d!
+using ..interpolation: interpolate_to_grid_1d!,
+                       interpolate_symmetric!
 using ..type_definitions: mk_float, mk_int
 using ..array_allocation: allocate_float
 using ..electron_fluid_equations: calculate_electron_moments!,
@@ -2178,6 +2179,18 @@ function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa, ir)
     # sigma_ind where sigma is.
     sigma_fraction = -vpa_unnorm[sigma_ind-1] / (vpa_unnorm[sigma_ind] - vpa_unnorm[sigma_ind-1])
 
+    # Want the element that contains the interval on the lower side of sigma_ind. For
+    # points on element boundaries, the `ielement` array contains the element on the lower
+    # side of the grid point, so just looking up the `ielement` of `sigma_ind` is what we
+    # want here.
+    element_with_zero = vpa.ielement[sigma_ind]
+    element_with_zero_boundary = element_with_zero == 1 ? vpa.imin[element_with_zero] :
+                                                          vpa.imin[element_with_zero] - 1
+    # This searchsortedlast() call finds the last point ≤ to the negative of v_∥
+    # at the lower boundary of the element containing zero.
+    last_point_near_zero = searchsortedlast(vpa_unnorm,
+                                            -vpa_unnorm[element_with_zero_boundary])
+
     # Want to construct the w-grid corresponding to -vpa.
     #   wpa(vpa) = (vpa - upar)/vth
     #   ⇒ vpa = vth*wpa(vpa) + upar
@@ -2194,6 +2207,7 @@ function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa, ir)
     reverse!(reversed_wpa_of_minus_vpa)
 
     return vpa_unnorm, u_over_vt, vcut, minus_vcut_ind, sigma, sigma_ind, sigma_fraction,
+           element_with_zero, element_with_zero_boundary, last_point_near_zero,
            reversed_wpa_of_minus_vpa
 end
 
@@ -2240,6 +2254,16 @@ function get_cutoff_params_upper(upar, vthe, phi, me_over_mi, vpa, ir)
     # sigma_ind where sigma is.
     sigma_fraction = -vpa_unnorm[sigma_ind+1] / (vpa_unnorm[sigma_ind] - vpa_unnorm[sigma_ind+1])
 
+    # Want the element that contains the interval on the upper side of sigma_ind. For
+    # points on element boundaries, the `ielement` array contains the element on the lower
+    # side of the grid point, we need the `ielement` of `sigma_ind+1` here.
+    element_with_zero = vpa.ielement[sigma_ind+1]
+    element_with_zero_boundary = vpa.imax[element_with_zero]
+    # This searchsortedfirst() call finds the first point ≥ to the negative of v_∥ at the
+    # upper boundary of the element containing zero.
+    first_point_near_zero = searchsortedfirst(vpa_unnorm,
+                                              -vpa_unnorm[element_with_zero_boundary])
+
     # Want to construct the w-grid corresponding to -vpa.
     #   wpa(vpa) = (vpa - upar)/vth
     #   ⇒ vpa = vth*wpa(vpa) + upar
@@ -2256,6 +2280,7 @@ function get_cutoff_params_upper(upar, vthe, phi, me_over_mi, vpa, ir)
     reverse!(reversed_wpa_of_minus_vpa)
 
     return vpa_unnorm, u_over_vt, vcut, plus_vcut_ind, sigma, sigma_ind, sigma_fraction,
+           element_with_zero, element_with_zero_boundary, first_point_near_zero,
            reversed_wpa_of_minus_vpa
 end
 
@@ -2320,7 +2345,6 @@ end
     begin_r_region()
 
     newton_max_its = 100
-    reversed_pdf = vpa.scratch
 
     function get_residual_and_coefficients_for_bc(a1, a1prime, a2, a2prime, b1, b1prime,
                                                   c1, c1prime, c2, c2prime, d1, d1prime,
@@ -2365,14 +2389,28 @@ end
             # potential).
 
             vpa_unnorm, u_over_vt, vcut, minus_vcut_ind, sigma, sigma_ind, sigma_fraction,
+                element_with_zero, element_with_zero_boundary, last_point_near_zero,
                 reversed_wpa_of_minus_vpa = get_cutoff_params_lower(upar, vthe, phi,
                                                                     me_over_mi, vpa, ir)
 
             # interpolate the pdf onto this grid
-            #@views interpolate_to_grid_1d!(interpolated_pdf, wpa_values, pdf[:,1,1,ir], vpa, vpa_spectral)
-            @views interpolate_to_grid_1d!(reversed_pdf, reversed_wpa_of_minus_vpa, pdf[:,1,1,ir], vpa, vpa_spectral) # Could make this more efficient by only interpolating to the points needed below, by taking an appropriate view of wpa_of_minus_vpa. Also, in the element containing vpa=0, this interpolation depends on the values that will be replaced by the reflected, interpolated values, which is not ideal (maybe this element should be treated specially first?).
-            reverse!(reversed_pdf)
-            pdf[sigma_ind:end,1,1,ir] .= reversed_pdf[sigma_ind:end]
+            # 'near zero' means in the range where
+            # abs(v_∥)≤abs(lower boundary of element including v_∥=0)
+            # 'far from zero' means larger values of v_∥.
+
+            # Interpolate to the 'near zero' points
+            @views interpolate_symmetric!(pdf[sigma_ind:last_point_near_zero,1,1,ir],
+                                          vpa_unnorm[sigma_ind:last_point_near_zero],
+                                          pdf[element_with_zero_boundary:sigma_ind-1,1,1,ir],
+                                          vpa_unnorm[element_with_zero_boundary:sigma_ind-1])
+
+            # Interpolate to the 'far from zero' points
+            reversed_pdf_far_from_zero = vpa.scratch[last_point_near_zero+1:end]
+            @views interpolate_to_grid_1d!(reversed_pdf_far_from_zero,
+                                           reversed_wpa_of_minus_vpa[1:vpa.n-last_point_near_zero],
+                                           pdf[:,1,1,ir], vpa, vpa_spectral)
+            reverse!(reversed_pdf_far_from_zero)
+            pdf[last_point_near_zero+1:end,1,1,ir] .= reversed_pdf_far_from_zero
 
             # Per-grid-point contributions to moment integrals
             # Note that we need to include the normalisation factor of 1/sqrt(pi) that
@@ -2606,14 +2644,28 @@ end
             # potential).
 
             vpa_unnorm, u_over_vt, vcut, plus_vcut_ind, sigma, sigma_ind, sigma_fraction,
+                element_with_zero, element_with_zero_boundary, first_point_near_zero,
                 reversed_wpa_of_minus_vpa = get_cutoff_params_upper(upar, vthe, phi,
                                                                     me_over_mi, vpa, ir)
 
             # interpolate the pdf onto this grid
-            #@views interpolate_to_grid_1d!(interpolated_pdf, wpa_values, pdf[:,1,1,ir], vpa, vpa_spectral)
-            @views interpolate_to_grid_1d!(reversed_pdf, reversed_wpa_of_minus_vpa, pdf[:,1,end,ir], vpa, vpa_spectral) # Could make this more efficient by only interpolating to the points needed below, by taking an appropriate view of wpa_of_minus_vpa. Also, in the element containing vpa=0, this interpolation depends on the values that will be replaced by the reflected, interpolated values, which is not ideal (maybe this element should be treated specially first?).
+            # 'near zero' means in the range where
+            # abs(v_∥)≤abs(upper boundary of element including v_∥=0)
+            # 'far from zero' means more negative values of v_∥.
+
+            # Interpolate to the 'near zero' points
+            @views interpolate_symmetric!(pdf[first_point_near_zero:sigma_ind,1,end,ir],
+                                          vpa_unnorm[first_point_near_zero:sigma_ind],
+                                          pdf[sigma_ind+1:element_with_zero_boundary,1,end,ir],
+                                          vpa_unnorm[sigma_ind+1:element_with_zero_boundary])
+
+            # Interpolate to the 'far from zero' points
+            reversed_pdf = vpa.scratch[1:first_point_near_zero-1]
+            @views interpolate_to_grid_1d!(reversed_pdf,
+                                           reversed_wpa_of_minus_vpa[vpa.n-first_point_near_zero+2:end],
+                                           pdf[:,1,end,ir], vpa, vpa_spectral)
             reverse!(reversed_pdf)
-            pdf[1:sigma_ind,1,end,ir] .= reversed_pdf[1:sigma_ind]
+            pdf[1:first_point_near_zero-1,1,end,ir] .= reversed_pdf[1:first_point_near_zero-1]
 
             # Per-grid-point contributions to moment integrals
             # Note that we need to include the normalisation factor of 1/sqrt(pi) that

From 8c028f17879844904e67d1825c9c7eb1b7aa4fed Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Tue, 19 Nov 2024 12:59:12 +0000
Subject: [PATCH 28/43] Don't do single update of vcut when
 bc_constraints=false

Skipping the single update of vcut, which was previously done even when
`bc_constraints=false` was passed to
`enforce_boundary_condition_on_electron_pdf!()` makes it possible to
match the result with an interpolation matrix that does not couple (due
to the integral nature of the update of vcut) every point in the vpa
grid.
---
 .../src/electron_kinetic_equation.jl          | 104 +++++++++---------
 1 file changed, 54 insertions(+), 50 deletions(-)

diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index fea5179d7..9ae24a3a7 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -2482,37 +2482,39 @@ end
             C = 0.0
             # Always do at least one update of vcut
             epsilon, epsilonprime, A, C, a2, b2, c2, d2 = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind)
-            while true
-                # Newton iteration update. Note that primes denote derivatives with
-                # respect to vcut
-                delta_v = - epsilon / epsilonprime
-
-                if vcut > vthe[1,ir] && epsilonprime < 0.0
-                    # epsilon should be increasing with vcut at epsilon=0, so if
-                    # epsilonprime is negative, the solution is actually at a lower vcut -
-                    # at larger vcut, epsilon will just tend to 0 but never reach it.
-                    delta_v = -0.1 * vthe[1,ir]
-                end
+            if bc_constraints
+                while true
+                    # Newton iteration update. Note that primes denote derivatives with
+                    # respect to vcut
+                    delta_v = - epsilon / epsilonprime
+
+                    if vcut > vthe[1,ir] && epsilonprime < 0.0
+                        # epsilon should be increasing with vcut at epsilon=0, so if
+                        # epsilonprime is negative, the solution is actually at a lower vcut -
+                        # at larger vcut, epsilon will just tend to 0 but never reach it.
+                        delta_v = -0.1 * vthe[1,ir]
+                    end
 
-                # Prevent the step size from getting too big, to make Newton iteration
-                # more robust.
-                delta_v = min(delta_v, 0.1 * vthe[1,ir])
-                delta_v = max(delta_v, -0.1 * vthe[1,ir])
+                    # Prevent the step size from getting too big, to make Newton iteration
+                    # more robust.
+                    delta_v = min(delta_v, 0.1 * vthe[1,ir])
+                    delta_v = max(delta_v, -0.1 * vthe[1,ir])
 
-                vcut = vcut + delta_v
-                minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut)
+                    vcut = vcut + delta_v
+                    minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut)
 
-                epsilon, epsilonprime, A, C, a2, b2, c2, d2 = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind)
+                    epsilon, epsilonprime, A, C, a2, b2, c2, d2 = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind)
 
-                if abs(epsilon) < newton_tol
-                    break
-                end
+                    if abs(epsilon) < newton_tol
+                        break
+                    end
 
-                if counter ≥ newton_max_its
-                    error("Newton iteration for electron lower-z boundary failed to "
-                          * "converge after $counter iterations")
+                    if counter ≥ newton_max_its
+                        error("Newton iteration for electron lower-z boundary failed to "
+                              * "converge after $counter iterations")
+                    end
+                    counter += 1
                 end
-                counter += 1
             end
 
             # Adjust pdf so that after reflecting and cutting off tail, it will obey the
@@ -2734,37 +2736,39 @@ end
             counter = 1
             # Always do at least one update of vcut
             epsilon, epsilonprime, A, C, a2, b2, c2, d2 = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind)
-            while true
-                # Newton iteration update. Note that primes denote derivatives with
-                # respect to vcut
-                delta_v = - epsilon / epsilonprime
-
-                if vcut > vthe[1,ir] && epsilonprime > 0.0
-                    # epsilon should be decreasing with vcut at epsilon=0, so if
-                    # epsilonprime is positive, the solution is actually at a lower vcut -
-                    # at larger vcut, epsilon will just tend to 0 but never reach it.
-                    delta_v = -0.1 * vthe[1,ir]
-                end
+            if bc_constraints
+                while true
+                    # Newton iteration update. Note that primes denote derivatives with
+                    # respect to vcut
+                    delta_v = - epsilon / epsilonprime
+
+                    if vcut > vthe[1,ir] && epsilonprime > 0.0
+                        # epsilon should be decreasing with vcut at epsilon=0, so if
+                        # epsilonprime is positive, the solution is actually at a lower vcut -
+                        # at larger vcut, epsilon will just tend to 0 but never reach it.
+                        delta_v = -0.1 * vthe[1,ir]
+                    end
 
-                # Prevent the step size from getting too big, to make Newton iteration
-                # more robust.
-                delta_v = min(delta_v, 0.1 * vthe[end,ir])
-                delta_v = max(delta_v, -0.1 * vthe[end,ir])
+                    # Prevent the step size from getting too big, to make Newton iteration
+                    # more robust.
+                    delta_v = min(delta_v, 0.1 * vthe[end,ir])
+                    delta_v = max(delta_v, -0.1 * vthe[end,ir])
 
-                vcut = vcut + delta_v
-                plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut)
+                    vcut = vcut + delta_v
+                    plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut)
 
-                epsilon, epsilonprime, A, C, a2, b2, c2, d2 = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind)
+                    epsilon, epsilonprime, A, C, a2, b2, c2, d2 = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind)
 
-                if abs(epsilon) < newton_tol
-                    break
-                end
+                    if abs(epsilon) < newton_tol
+                        break
+                    end
 
-                if counter ≥ newton_max_its
-                    error("Newton iteration for electron upper-z boundary failed to "
-                          * "converge after $counter iterations")
+                    if counter ≥ newton_max_its
+                        error("Newton iteration for electron upper-z boundary failed to "
+                              * "converge after $counter iterations")
+                    end
+                    counter += 1
                 end
-                counter += 1
             end
 
             # Adjust pdf so that after reflecting and cutting off tail, it will obey the

From 68a6ccee10b0a7d4faf0aace0361d818a4baf7c0 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Wed, 20 Nov 2024 21:29:24 +0000
Subject: [PATCH 29/43] Calculate vcut_fraction in utility functions

Reduces code duplication.
---
 .../src/electron_kinetic_equation.jl          | 28 +++++++++++++------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index 9ae24a3a7..246da0fa1 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -2284,6 +2284,16 @@ function get_cutoff_params_upper(upar, vthe, phi, me_over_mi, vpa, ir)
            reversed_wpa_of_minus_vpa
 end
 
+function get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm)
+    return (-vcut - vpa_unnorm[minus_vcut_ind-1]) /
+           (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1])
+end
+
+function get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm)
+    return (vcut - vpa_unnorm[plus_vcut_ind]) /
+           (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind])
+end
+
 @timeit global_timer enforce_boundary_condition_on_electron_pdf!(
                          pdf, phi, vthe, upar, z, vperp, vpa, vperp_spectral,
                          vpa_spectral, vpa_adv, moments, vpa_diffusion, me_over_mi;
@@ -2425,7 +2435,7 @@ end
             function get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind)
                 # vcut_fraction is the fraction of the distance between minus_vcut_ind-1 and
                 # minus_vcut_ind where -vcut is.
-                vcut_fraction = (-vcut - vpa_unnorm[minus_vcut_ind-1]) / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1])
+                vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm)
 
                 function get_for_one_moment(integral_pieces)
                     # Integral contributions from the cell containing vcut.
@@ -2525,7 +2535,7 @@ end
             pdf[plus_vcut_ind+2:end,1,1,ir] .= 0.0
             # vcut_fraction is the fraction of the distance between plus_vcut_ind and
             # plus_vcut_ind+1 where vcut is.
-            vcut_fraction = (vcut - vpa_unnorm[plus_vcut_ind]) / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind])
+            vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm)
             if vcut_fraction > 0.5
                 pdf[plus_vcut_ind+1,1,1,ir] *= vcut_fraction - 0.5
             else
@@ -2682,7 +2692,7 @@ end
             function get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind)
                 # vcut_fraction is the fraction of the distance between plus_vcut_ind and
                 # plus_vcut_ind+1 where vcut is.
-                vcut_fraction = (vcut - vpa_unnorm[plus_vcut_ind]) / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind])
+                vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm)
 
                 function get_for_one_moment(integral_pieces)
                     # Integral contribution from the cell containing vcut
@@ -2777,14 +2787,14 @@ end
 
             minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut)
             pdf[1:minus_vcut_ind-2,1,end,ir] .= 0.0
-            # vcut_fraction is the fraction of the distance between minus_vcut_ind and
-            # minus_vcut_ind-1 where -vcut is.
-            vcut_fraction = (-vcut - vpa_unnorm[minus_vcut_ind]) / (vpa_unnorm[minus_vcut_ind-1] - vpa_unnorm[minus_vcut_ind])
-            if vcut_fraction > 0.5
-                pdf[minus_vcut_ind-1,1,end,ir] *= vcut_fraction - 0.5
+            # vcut_fraction is the fraction of the distance between minus_vcut_ind-1 and
+            # minus_vcut_ind where -vcut is.
+            vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm)
+            if vcut_fraction < 0.5
+                pdf[minus_vcut_ind-1,1,end,ir] *= 0.5 - vcut_fraction
             else
                 pdf[minus_vcut_ind-1,1,end,ir] = 0.0
-                pdf[minus_vcut_ind,1,end,ir] *= vcut_fraction + 0.5
+                pdf[minus_vcut_ind,1,end,ir] *= 1.5 - vcut_fraction
             end
 
             # update the electrostatic potential at the boundary to be the value corresponding to the updated cutoff velocity

From 45b8466cf7ad984cb1a8fb684ecb9d3105bfec1a Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Wed, 20 Nov 2024 22:16:56 +0000
Subject: [PATCH 30/43] Update vcut for kinetic electron bc at each Newton
 iteration

Improves convergence a bit, allowing electron solver to take larger
pseudo-timesteps, in at least one case.
---
 .../src/electron_fluid_equations.jl           |  2 +-
 .../src/electron_kinetic_equation.jl          | 45 +++++++++++++++++--
 moment_kinetics/src/nonlinear_solvers.jl      |  2 +-
 moment_kinetics/src/time_advance.jl           |  2 +-
 moment_kinetics/src/vpa_advection.jl          |  2 +-
 .../test/nonlinear_solver_tests.jl            |  4 +-
 6 files changed, 47 insertions(+), 10 deletions(-)

diff --git a/moment_kinetics/src/electron_fluid_equations.jl b/moment_kinetics/src/electron_fluid_equations.jl
index dec0aff8f..d62ed7a73 100644
--- a/moment_kinetics/src/electron_fluid_equations.jl
+++ b/moment_kinetics/src/electron_fluid_equations.jl
@@ -691,7 +691,7 @@ end
         # `residual` is zero, electron_ppar is the result of a backward-Euler timestep:
         #   (f_new - f_old) / dt = RHS(f_new)
         # ⇒ (f_new - f_old)/dt - RHS(f_new) = 0
-        function residual_func!(residual, electron_ppar)
+        function residual_func!(residual, electron_ppar; krylov=false)
             begin_z_region()
             @loop_z iz begin
                 residual[iz] = ppar_in[iz]
diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index 246da0fa1..cfc88e187 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -1400,7 +1400,7 @@ global_rank[] == 0 && println("recalculating precon")
 
             # Do a backward-Euler update of the electron pdf, and (if evove_ppar=true) the
             # electron parallel pressure.
-            function residual_func!(this_residual, new_variables)
+            function residual_func!(this_residual, new_variables; krylov=false)
                 electron_ppar_residual, f_electron_residual = this_residual
                 electron_ppar_newvar, f_electron_newvar = new_variables
 
@@ -1424,7 +1424,8 @@ global_rank[] == 0 && println("recalculating precon")
                            moments.electron.upar[:,ir], z, vperp, vpa, vperp_spectral,
                            vpa_spectral, vpa_advect, moments,
                            num_diss_params.electron.vpa_dissipation_coefficient > 0.0,
-                           composition.me_over_mi; bc_constraints=false)
+                           composition.me_over_mi; bc_constraints=false,
+                           update_vcut=!krylov)
 
                 if evolve_ppar
                     # Calculate heat flux and derivatives using new_variables
@@ -1852,7 +1853,7 @@ to allow the outer r-loop to be parallelised.
 
     newton_success = false
     for ir ∈ 1:r.n
-        function residual_func!(residual, new_variables; debug=false)
+        function residual_func!(residual, new_variables; debug=false, krylov=false)
             electron_ppar_residual, f_electron_residual = residual
             electron_ppar_new, f_electron_new = new_variables
 
@@ -2297,7 +2298,9 @@ end
 @timeit global_timer enforce_boundary_condition_on_electron_pdf!(
                          pdf, phi, vthe, upar, z, vperp, vpa, vperp_spectral,
                          vpa_spectral, vpa_adv, moments, vpa_diffusion, me_over_mi;
-                         bc_constraints=true) = begin
+                         bc_constraints=true, update_vcut=true) = begin
+
+    @boundscheck bc_constraints && !update_vcut && error("update_vcut is not used when bc_constraints=true, but update_vcut has non-default value")
 
     newton_tol = 1.0e-13
 
@@ -2525,6 +2528,23 @@ end
                     end
                     counter += 1
                 end
+            elseif update_vcut
+                # When bc_constraints=false, no constraints are applied in
+                # get_integrals_and_derivatives_lowerz(), so updating vcut is usually just
+                # solving a linear equation, not doing a Newton iteration. The exception
+                # is if minus_vcut_ind changes, in which case we have to re-do the update.
+                while true
+                    vcut = vcut - epsilon / epsilonprime
+                    minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut)
+
+                    vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm)
+
+                    if 0.0 ≤ vcut_fraction ≤ 1.0
+                        break
+                    end
+
+                    epsilon, epsilonprime, _, _, _, _, _, _ = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind)
+                end
             end
 
             # Adjust pdf so that after reflecting and cutting off tail, it will obey the
@@ -2779,6 +2799,23 @@ end
                     end
                     counter += 1
                 end
+            elseif update_vcut
+                # When bc_constraints=false, no constraints are applied in
+                # get_integrals_and_derivatives_upperz(), so updating vcut is usually just
+                # solving a linear equation, not doing a Newton iteration. The exception
+                # is if minus_vcut_ind changes, in which case we have to re-do the update.
+                while true
+                    vcut = vcut - epsilon / epsilonprime
+                    plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut)
+
+                    vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm)
+
+                    if 0.0 ≤ vcut_fraction ≤ 1.0
+                        break
+                    end
+
+                    epsilon, epsilonprime, _, _, _, _, _, _ = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind)
+                end
             end
 
             # Adjust pdf so that after reflecting and cutting off tail, it will obey the
diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl
index 5fed3dc4c..7b2707d69 100644
--- a/moment_kinetics/src/nonlinear_solvers.jl
+++ b/moment_kinetics/src/nonlinear_solvers.jl
@@ -1252,7 +1252,7 @@ MGS-GMRES' in Zou (2023) [https://doi.org/10.1016/j.amc.2023.127869].
         end
 
         parallel_map(solver_type, (x,v) -> x + Jv_scale_factor * v, v, x, v)
-        residual_func!(rhs_delta, v)
+        residual_func!(rhs_delta, v; krylov=true)
         parallel_map(solver_type, (rhs_delta, residual0) -> (rhs_delta - residual0) * inv_Jv_scale_factor,
                      v, rhs_delta, residual0)
         left_preconditioner(v)
diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl
index ce033a06a..f3592dc80 100644
--- a/moment_kinetics/src/time_advance.jl
+++ b/moment_kinetics/src/time_advance.jl
@@ -3783,7 +3783,7 @@ Do a backward-Euler timestep for all terms in the ion kinetic equation.
     # `residual` is zero, f_new is the result of a backward-Euler timestep:
     #   (f_new - f_old) / dt = RHS(f_new)
     # ⇒ f_new - f_old - dt*RHS(f_new) = 0
-    function residual_func!(residual, f_new)
+    function residual_func!(residual, f_new; krylov=false)
         begin_s_r_z_vperp_vpa_region()
         @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin
             residual[ivpa,ivperp,iz,ir,is] = f_old[ivpa,ivperp,iz,ir,is]
diff --git a/moment_kinetics/src/vpa_advection.jl b/moment_kinetics/src/vpa_advection.jl
index 8a04e4936..8abfe38e7 100644
--- a/moment_kinetics/src/vpa_advection.jl
+++ b/moment_kinetics/src/vpa_advection.jl
@@ -267,7 +267,7 @@ end
             # `residual` is zero, f_new is the result of a backward-Euler timestep:
             #   (f_new - f_old) / dt = RHS(f_new)
             # ⇒ f_new - f_old - dt*RHS(f_new) = 0
-            function residual_func!(residual, f_new)
+            function residual_func!(residual, f_new; krylov=false)
                 apply_bc!(f_new)
                 residual .= f_old
                 advance_f_local!(residual, f_new, vpa_advect[is], ivperp, iz, ir, vpa, dt,
diff --git a/moment_kinetics/test/nonlinear_solver_tests.jl b/moment_kinetics/test/nonlinear_solver_tests.jl
index ab68389f4..36c74eb21 100644
--- a/moment_kinetics/test/nonlinear_solver_tests.jl
+++ b/moment_kinetics/test/nonlinear_solver_tests.jl
@@ -67,7 +67,7 @@ function linear_test()
                                zeros(mk_float, 0, 0))
         coords = NamedTuple(c => the_coord for c ∈ coord_names)
 
-        function rhs_func!(residual, x)
+        function rhs_func!(residual, x; krylov=false)
             if serial_solve
                 residual .= A * x - b
             else
@@ -180,7 +180,7 @@ function nonlinear_test()
                                zeros(mk_float, 0, 0))
         coords = NamedTuple(c => the_coord for c ∈ coord_names)
 
-        function rhs_func!(residual, x)
+        function rhs_func!(residual, x; krylov=false)
             if serial_solve
                 i = 1
                 D = abs(x[i])^2.5

From d3c3e8e654d547e4e7640ab652a45ed87b1f4f37 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Thu, 21 Nov 2024 09:27:49 +0000
Subject: [PATCH 31/43] Add missing @views, move struct field lookups out of
 loops for electrons

Should reduce allocations.
---
 moment_kinetics/src/electron_fluid_equations.jl |  2 +-
 moment_kinetics/src/electron_vpa_advection.jl   | 14 +++++++++-----
 moment_kinetics/src/electron_z_advection.jl     | 14 +++++++++-----
 3 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/moment_kinetics/src/electron_fluid_equations.jl b/moment_kinetics/src/electron_fluid_equations.jl
index d62ed7a73..bd2f1cdbb 100644
--- a/moment_kinetics/src/electron_fluid_equations.jl
+++ b/moment_kinetics/src/electron_fluid_equations.jl
@@ -878,7 +878,7 @@ function calculate_electron_qpar_from_pdf_no_r!(qpar, ppar, vth, pdf, vpa, ir)
     begin_z_region()
     ivperp = 1
     @loop_z iz begin
-        @views qpar[iz] = 2*ppar[iz]*vth[iz]*integrate_over_vspace(pdf[:, ivperp, iz], vpa.grid.^3, vpa.wgts)
+        @views qpar[iz] = 2*ppar[iz]*vth[iz]*integrate_over_vspace(pdf[:, ivperp, iz], vpa.grid, 3, vpa.wgts)
     end
 end
 
diff --git a/moment_kinetics/src/electron_vpa_advection.jl b/moment_kinetics/src/electron_vpa_advection.jl
index 0c2f7d02a..2ffcc3298 100644
--- a/moment_kinetics/src/electron_vpa_advection.jl
+++ b/moment_kinetics/src/electron_vpa_advection.jl
@@ -22,6 +22,9 @@ calculate the wpa-advection term for the electron kinetic equation
                          ir) = begin
     begin_z_vperp_region()
 
+    adv_fac = advect[1].adv_fac
+    speed = advect[1].speed
+
     # create a reference to a scratch_dummy array to store the wpa-derivative of the electron pdf
     dpdf_dvpa = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir]
     #d2pdf_dvpa2 = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir]
@@ -31,7 +34,7 @@ calculate the wpa-advection term for the electron kinetic equation
                                       electron_source_settings, ir)
     # update adv_fac
     @loop_z_vperp iz ivperp begin
-        @views @. advect[1].adv_fac[:,ivperp,iz,ir] = -advect[1].speed[:,ivperp,iz,ir]
+        @views @. adv_fac[:,ivperp,iz,ir] = -speed[:,ivperp,iz,ir]
     end
     #calculate the upwind derivative of the electron pdf w.r.t. wpa
     @loop_z_vperp iz ivperp begin
@@ -43,7 +46,7 @@ calculate the wpa-advection term for the electron kinetic equation
     #end
     # calculate the advection term
     @loop_z_vperp iz ivperp begin
-        @. pdf_out[:,ivperp,iz] += dt * advect[1].adv_fac[:,ivperp,iz,ir] * dpdf_dvpa[:,ivperp,iz]
+        @views @. pdf_out[:,ivperp,iz] += dt * adv_fac[:,ivperp,iz,ir] * dpdf_dvpa[:,ivperp,iz]
         #@. pdf_out[:,ivperp,iz] -= advect[1].adv_fac[:,ivperp,iz,ir] * dpdf_dvpa[:,ivperp,iz] + 0.0001*d2pdf_dvpa2[:,ivperp,iz]
     end
     return nothing
@@ -58,10 +61,11 @@ function update_electron_speed_vpa!(advect, density, upar, ppar, moments, vpa,
     dppar_dz = @view moments.electron.dppar_dz[:,ir]
     dqpar_dz = @view moments.electron.dqpar_dz[:,ir]
     dvth_dz = @view moments.electron.dvth_dz[:,ir]
+    speed = advect.speed
     # calculate the advection speed in wpa
     @loop_z_vperp_vpa iz ivperp ivpa begin
-        advect.speed[ivpa,ivperp,iz,ir] = ((vth[iz] * dppar_dz[iz] + vpa[ivpa] * dqpar_dz[iz])
-                                           / (2 * ppar[iz]) - vpa[ivpa]^2 * dvth_dz[iz])
+        speed[ivpa,ivperp,iz,ir] = ((vth[iz] * dppar_dz[iz] + vpa[ivpa] * dqpar_dz[iz])
+                                    / (2 * ppar[iz]) - vpa[ivpa]^2 * dvth_dz[iz])
     end
 
     for index ∈ eachindex(electron_source_settings)
@@ -77,7 +81,7 @@ function update_electron_speed_vpa!(advect, density, upar, ppar, moments, vpa,
                         ppar[iz] +
                     0.5 * source_density_amplitude[iz] / density[iz]
                 @loop_vperp_vpa ivperp ivpa begin
-                    advect.speed[ivpa,ivperp,iz,ir] += term1 + vpa[ivpa] * term2_over_vpa
+                    speed[ivpa,ivperp,iz,ir] += term1 + vpa[ivpa] * term2_over_vpa
                 end
             end
         end
diff --git a/moment_kinetics/src/electron_z_advection.jl b/moment_kinetics/src/electron_z_advection.jl
index 8c78e58ab..ddb530c1b 100644
--- a/moment_kinetics/src/electron_z_advection.jl
+++ b/moment_kinetics/src/electron_z_advection.jl
@@ -23,6 +23,9 @@ calculate the z-advection term for the electron kinetic equation = wpa * vthe *
                          scratch_dummy, dt, ir) = begin
     begin_vperp_vpa_region()
 
+    adv_fac = advect[1].adv_fac
+    speed = advect[1].speed
+
     # create a pointer to a scratch_dummy array to store the z-derivative of the electron pdf
     dpdf_dz = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir]
     d2pdf_dz2 = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir]
@@ -32,11 +35,11 @@ calculate the z-advection term for the electron kinetic equation = wpa * vthe *
     # update adv_fac -- note that there is no factor of dt here because
     # in some cases the electron kinetic equation is solved as a steady-state equation iteratively
     @loop_vperp_vpa ivperp ivpa begin
-        @views advect[1].adv_fac[:,ivpa,ivperp,ir] = -advect[1].speed[:,ivpa,ivperp,ir]
+        @views @. adv_fac[:,ivpa,ivperp,ir] = -speed[:,ivpa,ivperp,ir]
     end
     #calculate the upwind derivative
     @views derivative_z_pdf_vpavperpz!(
-               dpdf_dz, pdf_in, advect[1].adv_fac[:,:,:,ir],
+               dpdf_dz, pdf_in, adv_fac[:,:,:,ir],
                scratch_dummy.buffer_vpavperpr_1[:,:,ir],
                scratch_dummy.buffer_vpavperpr_2[:,:,ir],
                scratch_dummy.buffer_vpavperpr_3[:,:,ir],
@@ -49,8 +52,8 @@ calculate the z-advection term for the electron kinetic equation = wpa * vthe *
     # calculate the advection term
     begin_z_vperp_vpa_region()
     @loop_z_vperp_vpa iz ivperp ivpa begin
-        pdf_out[ivpa,ivperp,iz] += dt * advect[1].adv_fac[iz,ivpa,ivperp,ir] * dpdf_dz[ivpa,ivperp,iz]
-        #pdf_out[ivpa,ivperp,iz] += dt * advect[1].adv_fac[iz,ivpa,ivperp,ir] * dpdf_dz[ivpa,ivperp,iz] + 0.0001*d2pdf_dz2[ivpa,ivperp,iz]
+        pdf_out[ivpa,ivperp,iz] += dt * adv_fac[iz,ivpa,ivperp,ir] * dpdf_dz[ivpa,ivperp,iz]
+        #pdf_out[ivpa,ivperp,iz] += dt * adv_fac[iz,ivpa,ivperp,ir] * dpdf_dz[ivpa,ivperp,iz] + 0.0001*d2pdf_dz2[ivpa,ivperp,iz]
     end
     return nothing
 end
@@ -60,9 +63,10 @@ calculate the electron advection speed in the z-direction at each grid point
 """
 function update_electron_speed_z!(advect, upar, vth, vpa, ir)
     # the electron advection speed in z is v_par = w_par * v_the
+    speed = advect.speed
     @loop_vperp_vpa ivperp ivpa begin
         #@. @views advect.speed[:,ivpa,ivperp,ir] = vpa[ivpa] * vth
-        @. @views advect.speed[:,ivpa,ivperp,ir] = vpa[ivpa] * vth + upar
+        @. speed[:,ivpa,ivperp,ir] = vpa[ivpa] * vth + upar
     end
     return nothing
 end

From 651f141b72c006d72a41dcb2d4fbc54777d1db23 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Thu, 21 Nov 2024 09:44:24 +0000
Subject: [PATCH 32/43] Clean up @views in
 enforce_boundary_condition_on_electron_pdf!()

Hopefully reduce allocations. Maybe save a little compilation time by
removing some unnecessary `@views`.
---
 .../src/electron_kinetic_equation.jl          | 76 +++++++++----------
 1 file changed, 38 insertions(+), 38 deletions(-)

diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index cfc88e187..d6b2c1a4a 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -2317,7 +2317,7 @@ end
     end
     if vperp.n > 1
         begin_r_z_vpa_region()
-        @views enforce_vperp_boundary_condition!(pdf, vperp.bc, vperp, vperp_spectral)
+        enforce_vperp_boundary_condition!(pdf, vperp.bc, vperp, vperp_spectral)
     end
 
     if z.bc == "periodic"
@@ -2418,7 +2418,7 @@ end
                                           vpa_unnorm[element_with_zero_boundary:sigma_ind-1])
 
             # Interpolate to the 'far from zero' points
-            reversed_pdf_far_from_zero = vpa.scratch[last_point_near_zero+1:end]
+            reversed_pdf_far_from_zero = @view vpa.scratch[last_point_near_zero+1:end]
             @views interpolate_to_grid_1d!(reversed_pdf_far_from_zero,
                                            reversed_wpa_of_minus_vpa[1:vpa.n-last_point_near_zero],
                                            pdf[:,1,1,ir], vpa, vpa_spectral)
@@ -2430,10 +2430,10 @@ end
             # would be factored in by integrate_over_vspace(). This will need to
             # change/adapt when we support 2V as well as 1V.
             density_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,1,ir] * vpa.wgts / sqrt(pi)
-            flow_integral_pieces = @views @. vpa.scratch4 = density_integral_pieces * vpa_unnorm / vthe[1,ir]
-            energy_integral_pieces = @views @. vpa.scratch5 = flow_integral_pieces * vpa_unnorm / vthe[1,ir]
-            cubic_integral_pieces = @views @. vpa.scratch6 = energy_integral_pieces * vpa_unnorm / vthe[1,ir]
-            quartic_integral_pieces = @views @. vpa.scratch7 = cubic_integral_pieces * vpa_unnorm / vthe[1,ir]
+            flow_integral_pieces = @. vpa.scratch4 = density_integral_pieces * vpa_unnorm / vthe[1,ir]
+            energy_integral_pieces = @. vpa.scratch5 = flow_integral_pieces * vpa_unnorm / vthe[1,ir]
+            cubic_integral_pieces = @. vpa.scratch6 = energy_integral_pieces * vpa_unnorm / vthe[1,ir]
+            quartic_integral_pieces = @. vpa.scratch7 = cubic_integral_pieces * vpa_unnorm / vthe[1,ir]
 
             function get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind)
                 # vcut_fraction is the fraction of the distance between minus_vcut_ind-1 and
@@ -2464,12 +2464,12 @@ end
                         part1prime = -integral_pieces[minus_vcut_ind] / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1])
                     end
 
-                    part1 = sum(integral_pieces[1:minus_vcut_ind-2]) + integral_vcut_cell_part1
+                    part1 = sum(@view integral_pieces[1:minus_vcut_ind-2]) + integral_vcut_cell_part1
 
                     # Integral contribution from the cell containing sigma
                     integral_sigma_cell = (0.5 * integral_pieces[sigma_ind-1] + 0.5 * integral_pieces[sigma_ind])
 
-                    part2 = sum(integral_pieces[minus_vcut_ind+1:sigma_ind-2])
+                    part2 = sum(@view integral_pieces[minus_vcut_ind+1:sigma_ind-2])
                     part2 += integral_vcut_cell_part2 + 0.5 * integral_pieces[sigma_ind-1] + sigma_fraction * integral_sigma_cell
                     # part2prime is d(part2)/d(vcut)
                     part2prime = -part1prime
@@ -2578,10 +2578,10 @@ end
 
             # Need to recalculate these with the updated distribution function
             density_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,1,ir] * vpa.wgts / sqrt(pi)
-            flow_integral_pieces = @views @. vpa.scratch4 = density_integral_pieces * vpa_unnorm / vthe[1,ir]
-            energy_integral_pieces = @views @. vpa.scratch5 = flow_integral_pieces * vpa_unnorm / vthe[1,ir]
-            cubic_integral_pieces = @views @. vpa.scratch6 = energy_integral_pieces * vpa_unnorm / vthe[1,ir]
-            quartic_integral_pieces = @views @. vpa.scratch7 = cubic_integral_pieces * vpa_unnorm / vthe[1,ir]
+            flow_integral_pieces = @. vpa.scratch4 = density_integral_pieces * vpa_unnorm / vthe[1,ir]
+            energy_integral_pieces = @. vpa.scratch5 = flow_integral_pieces * vpa_unnorm / vthe[1,ir]
+            cubic_integral_pieces = @. vpa.scratch6 = energy_integral_pieces * vpa_unnorm / vthe[1,ir]
+            quartic_integral_pieces = @. vpa.scratch7 = cubic_integral_pieces * vpa_unnorm / vthe[1,ir]
 
             # Update the part2 integrals since we've applied the A and C factors
             _, _, _, _, a2, b2, c2, d2 = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind)
@@ -2590,7 +2590,7 @@ end
                 # Integral contribution from the cell containing sigma
                 integral_sigma_cell = (0.5 * integral_pieces[sigma_ind-1] + 0.5 * integral_pieces[sigma_ind])
 
-                @views part3 = sum(integral_pieces[sigma_ind+1:plus_vcut_ind+1])
+                part3 = sum(@view integral_pieces[sigma_ind+1:plus_vcut_ind+1])
                 part3 += 0.5 * integral_pieces[sigma_ind] + (1.0 - sigma_fraction) * integral_sigma_cell
 
                 return part3
@@ -2616,12 +2616,12 @@ end
                 # v_∥^2/vth^2/(1+v_∥^2/vth^2)≈v_∥^2/vth^2≈0.
                 correction0_integral_pieces[ivpa] = 0.0
             end
-            correction1_integral_pieces = @views @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[1,ir]
-            correction2_integral_pieces = @views @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[1,ir]
-            correction3_integral_pieces = @views @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[1,ir]
-            correction4_integral_pieces = @views @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[1,ir]
-            correction5_integral_pieces = @views @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[1,ir]
-            correction6_integral_pieces = @views @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[1,ir]
+            correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[1,ir]
+            correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[1,ir]
+            correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[1,ir]
+            correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[1,ir]
+            correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[1,ir]
+            correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[1,ir]
 
             alpha = get_part3_for_one_moment_lower(correction0_integral_pieces)
             beta = get_part3_for_one_moment_lower(correction1_integral_pieces)
@@ -2692,22 +2692,22 @@ end
                                           vpa_unnorm[sigma_ind+1:element_with_zero_boundary])
 
             # Interpolate to the 'far from zero' points
-            reversed_pdf = vpa.scratch[1:first_point_near_zero-1]
+            reversed_pdf = @view vpa.scratch[1:first_point_near_zero-1]
             @views interpolate_to_grid_1d!(reversed_pdf,
                                            reversed_wpa_of_minus_vpa[vpa.n-first_point_near_zero+2:end],
                                            pdf[:,1,end,ir], vpa, vpa_spectral)
             reverse!(reversed_pdf)
-            pdf[1:first_point_near_zero-1,1,end,ir] .= reversed_pdf[1:first_point_near_zero-1]
+            pdf[1:first_point_near_zero-1,1,end,ir] .= reversed_pdf
 
             # Per-grid-point contributions to moment integrals
             # Note that we need to include the normalisation factor of 1/sqrt(pi) that
             # would be factored in by integrate_over_vspace(). This will need to
             # change/adapt when we support 2V as well as 1V.
             density_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,end,ir] * vpa.wgts / sqrt(pi)
-            flow_integral_pieces = @views @. vpa.scratch4 = density_integral_pieces * vpa_unnorm / vthe[end,ir]
-            energy_integral_pieces = @views @. vpa.scratch5 = flow_integral_pieces * vpa_unnorm / vthe[end,ir]
-            cubic_integral_pieces = @views @. vpa.scratch6 = energy_integral_pieces * vpa_unnorm / vthe[end,ir]
-            quartic_integral_pieces = @views @. vpa.scratch7 = cubic_integral_pieces * vpa_unnorm / vthe[end,ir]
+            flow_integral_pieces = @. vpa.scratch4 = density_integral_pieces * vpa_unnorm / vthe[end,ir]
+            energy_integral_pieces = @. vpa.scratch5 = flow_integral_pieces * vpa_unnorm / vthe[end,ir]
+            cubic_integral_pieces = @. vpa.scratch6 = energy_integral_pieces * vpa_unnorm / vthe[end,ir]
+            quartic_integral_pieces = @. vpa.scratch7 = cubic_integral_pieces * vpa_unnorm / vthe[end,ir]
 
             function get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind)
                 # vcut_fraction is the fraction of the distance between plus_vcut_ind and
@@ -2737,12 +2737,12 @@ end
                         part1prime = -integral_pieces[plus_vcut_ind] / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind])
                     end
 
-                    part1 = sum(integral_pieces[plus_vcut_ind+2:end]) + integral_vcut_cell_part1
+                    part1 = sum(@view integral_pieces[plus_vcut_ind+2:end]) + integral_vcut_cell_part1
 
                     # Integral contribution from the cell containing sigma
                     integral_sigma_cell = (0.5 * integral_pieces[sigma_ind] + 0.5 * integral_pieces[sigma_ind+1])
 
-                    part2 = sum(integral_pieces[sigma_ind+2:plus_vcut_ind-1])
+                    part2 = sum(@view integral_pieces[sigma_ind+2:plus_vcut_ind-1])
                     part2 += integral_vcut_cell_part2 + 0.5 * integral_pieces[sigma_ind+1] + sigma_fraction * integral_sigma_cell
                     # part2prime is d(part2)/d(vcut)
                     part2prime = -part1prime
@@ -2849,10 +2849,10 @@ end
 
             # Need to recalculate these with the updated distribution function
             density_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,end,ir] * vpa.wgts / sqrt(pi)
-            flow_integral_pieces = @views @. vpa.scratch4 = density_integral_pieces * vpa_unnorm / vthe[end,ir]
-            energy_integral_pieces = @views @. vpa.scratch5 = flow_integral_pieces * vpa_unnorm / vthe[end,ir]
-            cubic_integral_pieces = @views @. vpa.scratch6 = energy_integral_pieces * vpa_unnorm / vthe[end,ir]
-            quartic_integral_pieces = @views @. vpa.scratch7 = cubic_integral_pieces * vpa_unnorm / vthe[end,ir]
+            flow_integral_pieces = @. vpa.scratch4 = density_integral_pieces * vpa_unnorm / vthe[end,ir]
+            energy_integral_pieces = @. vpa.scratch5 = flow_integral_pieces * vpa_unnorm / vthe[end,ir]
+            cubic_integral_pieces = @. vpa.scratch6 = energy_integral_pieces * vpa_unnorm / vthe[end,ir]
+            quartic_integral_pieces = @. vpa.scratch7 = cubic_integral_pieces * vpa_unnorm / vthe[end,ir]
 
             # Update the part2 integrals since we've applied the A and C factors
             _, _, _, _, a2, b2, c2, d2 = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind)
@@ -2861,7 +2861,7 @@ end
                 # Integral contribution from the cell containing sigma
                 integral_sigma_cell = (0.5 * integral_pieces[sigma_ind] + 0.5 * integral_pieces[sigma_ind+1])
 
-                @views part3 = sum(integral_pieces[minus_vcut_ind-1:sigma_ind-1])
+                part3 = sum(@view integral_pieces[minus_vcut_ind-1:sigma_ind-1])
                 part3 += 0.5 * integral_pieces[sigma_ind] + (1.0 - sigma_fraction) * integral_sigma_cell
 
                 return part3
@@ -2887,12 +2887,12 @@ end
                 # v_∥^2/vth^2/(1+v_∥^2/vth^2)≈v_∥^2/vth^2≈0.
                 correction0_integral_pieces[ivpa] = 0.0
             end
-            correction1_integral_pieces = @views @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[end,ir]
-            correction2_integral_pieces = @views @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[end,ir]
-            correction3_integral_pieces = @views @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[end,ir]
-            correction4_integral_pieces = @views @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[end,ir]
-            correction5_integral_pieces = @views @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[end,ir]
-            correction6_integral_pieces = @views @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[end,ir]
+            correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[end,ir]
+            correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[end,ir]
+            correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[end,ir]
+            correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[end,ir]
+            correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[end,ir]
+            correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[end,ir]
 
             alpha = get_part3_for_one_moment_upper(correction0_integral_pieces)
             beta = get_part3_for_one_moment_upper(correction1_integral_pieces)

From 18f5f56fc482c03ba579bf25cc62b759a23637d9 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Thu, 21 Nov 2024 15:31:34 +0000
Subject: [PATCH 33/43] Improve type stability in
 enforce_boundary_condition_on_electron_pdf!()

---
 moment_kinetics/src/boundary_conditions.jl    |   3 +-
 .../src/electron_kinetic_equation.jl          | 168 +++++++++---------
 moment_kinetics/src/initial_conditions.jl     |  18 +-
 3 files changed, 99 insertions(+), 90 deletions(-)

diff --git a/moment_kinetics/src/boundary_conditions.jl b/moment_kinetics/src/boundary_conditions.jl
index 2bf467e74..a6fcbdc89 100644
--- a/moment_kinetics/src/boundary_conditions.jl
+++ b/moment_kinetics/src/boundary_conditions.jl
@@ -1029,7 +1029,7 @@ function enforce_v_boundary_condition_local!(f, bc, speed, v_diffusion, v, v_spe
 
         D0 = v_spectral.lobatto.Dmat[end,:]
         # adjust F(vpa = L/2) so that d F / d vpa = 0 at vpa = L/2
-        f[end] = -sum(D0[1:ngrid-1].*f[end-v.ngrid+1:end-1])/D0[v.ngrid]
+        f[end] = -sum(D0[1:v.ngrid-1].*f[end-v.ngrid+1:end-1])/D0[v.ngrid]
     elseif bc == "periodic"
         f[1] = 0.5*(f[1]+f[end])
         f[end] = f[1]
@@ -1038,6 +1038,7 @@ function enforce_v_boundary_condition_local!(f, bc, speed, v_diffusion, v, v_spe
     else
         error("Unsupported boundary condition option '$bc' for $(v.name)")
     end
+    return nothing
 end
 
 """
diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index d6b2c1a4a..b2eff367a 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -404,8 +404,8 @@ function update_electron_pdf_with_time_advance!(scratch, pdf, moments, phi, coll
                                                  moments.electron.dens, composition)
             end
 
-            apply_electron_bc_and_constraints!(scratch[istage+1], phi, moments, z, vperp,
-                                               vpa, vperp_spectral, vpa_spectral,
+            apply_electron_bc_and_constraints!(scratch[istage+1], phi, moments, r, z,
+                                               vperp, vpa, vperp_spectral, vpa_spectral,
                                                vpa_advect, num_diss_params, composition)
 
             latest_pdf = scratch[istage+1].pdf_electron
@@ -1424,7 +1424,7 @@ global_rank[] == 0 && println("recalculating precon")
                            moments.electron.upar[:,ir], z, vperp, vpa, vperp_spectral,
                            vpa_spectral, vpa_advect, moments,
                            num_diss_params.electron.vpa_dissipation_coefficient > 0.0,
-                           composition.me_over_mi; bc_constraints=false,
+                           composition.me_over_mi, ir; bc_constraints=false,
                            update_vcut=!krylov)
 
                 if evolve_ppar
@@ -2068,7 +2068,7 @@ function speedup_hack!(fvec_out, fvec_in, z_speedup_fac, z, vpa; evolve_ppar=fal
     return nothing
 end
 
-function apply_electron_bc_and_constraints!(this_scratch, phi, moments, z, vperp, vpa,
+function apply_electron_bc_and_constraints!(this_scratch, phi, moments, r, z, vperp, vpa,
                                             vperp_spectral, vpa_spectral, vpa_advect,
                                             num_diss_params, composition)
     latest_pdf = this_scratch.pdf_electron
@@ -2078,13 +2078,15 @@ function apply_electron_bc_and_constraints!(this_scratch, phi, moments, z, vperp
         latest_pdf[ivpa,ivperp,iz,ir] = max(latest_pdf[ivpa,ivperp,iz,ir], 0.0)
     end
 
-    # enforce the boundary condition(s) on the electron pdf
-    enforce_boundary_condition_on_electron_pdf!(latest_pdf, phi, moments.electron.vth,
-                                                moments.electron.upar, z, vperp, vpa,
-                                                vperp_spectral, vpa_spectral, vpa_advect,
-                                                moments,
-                                                num_diss_params.electron.vpa_dissipation_coefficient > 0.0,
-                                                composition.me_over_mi)
+    for ir ∈ 1:r.n
+        # enforce the boundary condition(s) on the electron pdf
+        @views enforce_boundary_condition_on_electron_pdf!(
+                   latest_pdf[:,:,:,ir], phi[:,ir], moments.electron.vth[:,ir],
+                   moments.electron.upar[:,ir], z, vperp, vpa, vperp_spectral,
+                   vpa_spectral, vpa_advect, moments,
+                   num_diss_params.electron.vpa_dissipation_coefficient > 0.0,
+                   composition.me_over_mi, ir)
+    end
 
     begin_r_z_region()
     A = moments.electron.constraints_A_coefficient
@@ -2118,7 +2120,7 @@ function apply_electron_bc_and_constraints_no_r!(f_electron, phi, moments, z, vp
                f_electron, phi, moments.electron.vth[:,ir], moments.electron.upar[:,ir],
                z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_advect, moments,
                num_diss_params.electron.vpa_dissipation_coefficient > 0.0,
-               composition.me_over_mi)
+               composition.me_over_mi, ir)
 
     begin_z_region()
     A = moments.electron.constraints_A_coefficient
@@ -2297,7 +2299,7 @@ end
 
 @timeit global_timer enforce_boundary_condition_on_electron_pdf!(
                          pdf, phi, vthe, upar, z, vperp, vpa, vperp_spectral,
-                         vpa_spectral, vpa_adv, moments, vpa_diffusion, me_over_mi;
+                         vpa_spectral, vpa_adv, moments, vpa_diffusion, me_over_mi, ir;
                          bc_constraints=true, update_vcut=true) = begin
 
     @boundscheck bc_constraints && !update_vcut && error("update_vcut is not used when bc_constraints=true, but update_vcut has non-default value")
@@ -2306,18 +2308,18 @@ end
 
     # Enforce velocity-space boundary conditions
     if vpa.n > 1
-        begin_r_z_vperp_region()
-        @loop_r_z_vperp ir iz ivperp begin
+        begin_z_vperp_region()
+        @loop_z_vperp iz ivperp begin
             # enforce the vpa BC
             # use that adv.speed independent of vpa
-            @views enforce_v_boundary_condition_local!(pdf[:,ivperp,iz,ir], vpa.bc,
+            @views enforce_v_boundary_condition_local!(pdf[:,ivperp,iz], vpa.bc,
                                                        vpa_adv[1].speed[:,ivperp,iz,ir],
                                                        vpa_diffusion, vpa, vpa_spectral)
         end
     end
     if vperp.n > 1
-        begin_r_z_vpa_region()
-        enforce_vperp_boundary_condition!(pdf, vperp.bc, vperp, vperp_spectral)
+        begin_z_vpa_region()
+        enforce_vperp_boundary_condition!(pdf, vperp.bc, vperp, vperp_spectral, ir)
     end
 
     if z.bc == "periodic"
@@ -2326,21 +2328,25 @@ end
     elseif z.bc == "constant"
         begin_r_vperp_vpa_region()
         density_offset = 1.0
-        vwidth = 1.0/sqrt(composition.me_over_mi)
+        vwidth = 1.0/sqrt(me_over_mi)
         dens = moments.electron.dens
         if z.irank == 0
-            speed = z_adv[1].speed
             @loop_r_vperp_vpa ir ivperp ivpa begin
-                if speed[1,ivpa,ivperp,ir] > 0.0
-                    pdf[ivpa,ivperp,1,ir,is] = density_offset / dens[1,ir] * vthe[1,ir] * exp(-(speed[1,ivpa,ivperp,ir]^2 + vperp.grid[ivperp]^2)/vwidth^2)
+                u = moments.electron.upar[1,ir]
+                vthe = moments.electron.vth[1,ir]
+                speed = vpa.grid[ivpa] * vthe + u
+                if speed > 0.0
+                    pdf[ivpa,ivperp,1,ir] = density_offset / dens[1,ir] * vthe[1,ir] * exp(-(speed^2 + vperp.grid[ivperp]^2)/vwidth^2)
                 end
             end
         end
         if z.irank == z.nrank - 1
-            speed = z_adv[is].speed
             @loop_r_vperp_vpa ir ivperp ivpa begin
-                if speed[end,ivpa,ivperp,ir] > 0.0
-                    pdf[ivpa,ivperp,end,ir,is] = density_offset / dens[end,ir] * vthe[end,ir] * exp(-(speed[end,ivpa,ivperp,ir]^2 + vperp.grid[ivperp]^2)/vwidth^2)
+                u = moments.electron.upar[end,ir]
+                vthe = moments.electron.vth[end,ir]
+                speed = vpa.grid[ivpa] * vthe + u
+                if speed > 0.0
+                    pdf[ivpa,ivperp,end,ir] = density_offset / dens[end,ir] * vthe[end,ir] * exp(-(speed^2 + vperp.grid[ivperp]^2)/vwidth^2)
                 end
             end
         end
@@ -2429,16 +2435,16 @@ end
             # Note that we need to include the normalisation factor of 1/sqrt(pi) that
             # would be factored in by integrate_over_vspace(). This will need to
             # change/adapt when we support 2V as well as 1V.
-            density_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,1,ir] * vpa.wgts / sqrt(pi)
-            flow_integral_pieces = @. vpa.scratch4 = density_integral_pieces * vpa_unnorm / vthe[1,ir]
-            energy_integral_pieces = @. vpa.scratch5 = flow_integral_pieces * vpa_unnorm / vthe[1,ir]
-            cubic_integral_pieces = @. vpa.scratch6 = energy_integral_pieces * vpa_unnorm / vthe[1,ir]
-            quartic_integral_pieces = @. vpa.scratch7 = cubic_integral_pieces * vpa_unnorm / vthe[1,ir]
+            density_integral_pieces_lowerz = @views @. vpa.scratch3 = pdf[:,1,1,ir] * vpa.wgts / sqrt(pi)
+            flow_integral_pieces_lowerz = @. vpa.scratch4 = density_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir]
+            energy_integral_pieces_lowerz = @. vpa.scratch5 = flow_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir]
+            cubic_integral_pieces_lowerz = @. vpa.scratch6 = energy_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir]
+            quartic_integral_pieces_lowerz = @. vpa.scratch7 = cubic_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir]
 
             function get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind)
                 # vcut_fraction is the fraction of the distance between minus_vcut_ind-1 and
                 # minus_vcut_ind where -vcut is.
-                vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm)
+                local vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm)
 
                 function get_for_one_moment(integral_pieces)
                     # Integral contributions from the cell containing vcut.
@@ -2476,18 +2482,18 @@ end
 
                     return part1, part1prime, part2, part2prime
                 end
-                a1, a1prime, a2, a2prime = get_for_one_moment(density_integral_pieces)
-                b1, b1prime, b2, _ = get_for_one_moment(flow_integral_pieces)
-                c1, c1prime, c2, c2prime = get_for_one_moment(energy_integral_pieces)
-                d1, d1prime, d2, _ = get_for_one_moment(cubic_integral_pieces)
-                e1, e1prime, e2, e2prime = get_for_one_moment(quartic_integral_pieces)
-
-                return get_residual_and_coefficients_for_bc(a1, a1prime, a2, a2prime, b1,
-                                                            b1prime, c1, c1prime, c2,
-                                                            c2prime, d1, d1prime, e1,
-                                                            e1prime, e2, e2prime,
-                                                            u_over_vt)...,
-                       a2, b2, c2, d2
+                this_a1, this_a1prime, this_a2, this_a2prime = get_for_one_moment(density_integral_pieces_lowerz)
+                this_b1, this_b1prime, this_b2, _ = get_for_one_moment(flow_integral_pieces_lowerz)
+                this_c1, this_c1prime, this_c2, this_c2prime = get_for_one_moment(energy_integral_pieces_lowerz)
+                this_d1, this_d1prime, this_d2, _ = get_for_one_moment(cubic_integral_pieces_lowerz)
+                this_e1, this_e1prime, this_e2, this_e2prime = get_for_one_moment(quartic_integral_pieces_lowerz)
+
+                return get_residual_and_coefficients_for_bc(
+                           this_a1, this_a1prime, this_a2, this_a2prime, this_b1,
+                           this_b1prime, this_c1, this_c1prime, this_c2, this_c2prime,
+                           this_d1, this_d1prime, this_e1, this_e1prime, this_e2,
+                           this_e2prime, u_over_vt)...,
+                       this_a2, this_b2, this_c2, this_d2
             end
 
             counter = 1
@@ -2577,11 +2583,11 @@ end
             # interpolation.
 
             # Need to recalculate these with the updated distribution function
-            density_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,1,ir] * vpa.wgts / sqrt(pi)
-            flow_integral_pieces = @. vpa.scratch4 = density_integral_pieces * vpa_unnorm / vthe[1,ir]
-            energy_integral_pieces = @. vpa.scratch5 = flow_integral_pieces * vpa_unnorm / vthe[1,ir]
-            cubic_integral_pieces = @. vpa.scratch6 = energy_integral_pieces * vpa_unnorm / vthe[1,ir]
-            quartic_integral_pieces = @. vpa.scratch7 = cubic_integral_pieces * vpa_unnorm / vthe[1,ir]
+            @views @. density_integral_pieces_lowerz = pdf[:,1,1,ir] * vpa.wgts / sqrt(pi)
+            @. flow_integral_pieces_lowerz = density_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir]
+            @. energy_integral_pieces_lowerz = flow_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir]
+            @. cubic_integral_pieces_lowerz = energy_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir]
+            @. quartic_integral_pieces_lowerz = cubic_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir]
 
             # Update the part2 integrals since we've applied the A and C factors
             _, _, _, _, a2, b2, c2, d2 = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind)
@@ -2595,10 +2601,10 @@ end
 
                 return part3
             end
-            a3 = get_part3_for_one_moment_lower(density_integral_pieces)
-            b3 = get_part3_for_one_moment_lower(flow_integral_pieces)
-            c3 = get_part3_for_one_moment_lower(energy_integral_pieces)
-            d3 = get_part3_for_one_moment_lower(cubic_integral_pieces)
+            a3 = get_part3_for_one_moment_lower(density_integral_pieces_lowerz)
+            b3 = get_part3_for_one_moment_lower(flow_integral_pieces_lowerz)
+            c3 = get_part3_for_one_moment_lower(energy_integral_pieces_lowerz)
+            d3 = get_part3_for_one_moment_lower(cubic_integral_pieces_lowerz)
 
             # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is.
             sharpness = 4.0
@@ -2703,16 +2709,16 @@ end
             # Note that we need to include the normalisation factor of 1/sqrt(pi) that
             # would be factored in by integrate_over_vspace(). This will need to
             # change/adapt when we support 2V as well as 1V.
-            density_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,end,ir] * vpa.wgts / sqrt(pi)
-            flow_integral_pieces = @. vpa.scratch4 = density_integral_pieces * vpa_unnorm / vthe[end,ir]
-            energy_integral_pieces = @. vpa.scratch5 = flow_integral_pieces * vpa_unnorm / vthe[end,ir]
-            cubic_integral_pieces = @. vpa.scratch6 = energy_integral_pieces * vpa_unnorm / vthe[end,ir]
-            quartic_integral_pieces = @. vpa.scratch7 = cubic_integral_pieces * vpa_unnorm / vthe[end,ir]
+            density_integral_pieces_upperz = @views @. vpa.scratch3 = pdf[:,1,end,ir] * vpa.wgts / sqrt(pi)
+            flow_integral_pieces_upperz = @. vpa.scratch4 = density_integral_pieces_upperz * vpa_unnorm / vthe[end,ir]
+            energy_integral_pieces_upperz = @. vpa.scratch5 = flow_integral_pieces_upperz * vpa_unnorm / vthe[end,ir]
+            cubic_integral_pieces_upperz = @. vpa.scratch6 = energy_integral_pieces_upperz * vpa_unnorm / vthe[end,ir]
+            quartic_integral_pieces_upperz = @. vpa.scratch7 = cubic_integral_pieces_upperz * vpa_unnorm / vthe[end,ir]
 
             function get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind)
                 # vcut_fraction is the fraction of the distance between plus_vcut_ind and
                 # plus_vcut_ind+1 where vcut is.
-                vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm)
+                local vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm)
 
                 function get_for_one_moment(integral_pieces)
                     # Integral contribution from the cell containing vcut
@@ -2749,18 +2755,18 @@ end
 
                     return part1, part1prime, part2, part2prime
                 end
-                a1, a1prime, a2, a2prime = get_for_one_moment(density_integral_pieces)
-                b1, b1prime, b2, _ = get_for_one_moment(flow_integral_pieces)
-                c1, c1prime, c2, c2prime = get_for_one_moment(energy_integral_pieces)
-                d1, d1prime, d2, _ = get_for_one_moment(cubic_integral_pieces)
-                e1, e1prime, e2, e2prime = get_for_one_moment(quartic_integral_pieces)
-
-                return get_residual_and_coefficients_for_bc(a1, a1prime, a2, a2prime, b1,
-                                                            b1prime, c1, c1prime, c2,
-                                                            c2prime, d1, d1prime, e1,
-                                                            e1prime, e2, e2prime,
-                                                            u_over_vt)...,
-                       a2, b2, c2, d2
+                this_a1, this_a1prime, this_a2, this_a2prime = get_for_one_moment(density_integral_pieces_upperz)
+                this_b1, this_b1prime, this_b2, _ = get_for_one_moment(flow_integral_pieces_upperz)
+                this_c1, this_c1prime, this_c2, this_c2prime = get_for_one_moment(energy_integral_pieces_upperz)
+                this_d1, this_d1prime, this_d2, _ = get_for_one_moment(cubic_integral_pieces_upperz)
+                this_e1, this_e1prime, this_e2, this_e2prime = get_for_one_moment(quartic_integral_pieces_upperz)
+
+                return get_residual_and_coefficients_for_bc(
+                           this_a1, this_a1prime, this_a2, this_a2prime, this_b1,
+                           this_b1prime, this_c1, this_c1prime, this_c2, this_c2prime,
+                           this_d1, this_d1prime, this_e1, this_e1prime, this_e2,
+                           this_e2prime, u_over_vt)...,
+                       this_a2, this_b2, this_c2, this_d2
             end
 
             counter = 1
@@ -2848,11 +2854,11 @@ end
             # interpolation.
 
             # Need to recalculate these with the updated distribution function
-            density_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,end,ir] * vpa.wgts / sqrt(pi)
-            flow_integral_pieces = @. vpa.scratch4 = density_integral_pieces * vpa_unnorm / vthe[end,ir]
-            energy_integral_pieces = @. vpa.scratch5 = flow_integral_pieces * vpa_unnorm / vthe[end,ir]
-            cubic_integral_pieces = @. vpa.scratch6 = energy_integral_pieces * vpa_unnorm / vthe[end,ir]
-            quartic_integral_pieces = @. vpa.scratch7 = cubic_integral_pieces * vpa_unnorm / vthe[end,ir]
+            @views @. density_integral_pieces_upperz = pdf[:,1,end,ir] * vpa.wgts / sqrt(pi)
+            @. flow_integral_pieces_upperz = density_integral_pieces_upperz * vpa_unnorm / vthe[end,ir]
+            @. energy_integral_pieces_upperz = flow_integral_pieces_upperz * vpa_unnorm / vthe[end,ir]
+            @. cubic_integral_pieces_upperz = energy_integral_pieces_upperz * vpa_unnorm / vthe[end,ir]
+            @. quartic_integral_pieces_upperz = cubic_integral_pieces_upperz * vpa_unnorm / vthe[end,ir]
 
             # Update the part2 integrals since we've applied the A and C factors
             _, _, _, _, a2, b2, c2, d2 = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind)
@@ -2866,10 +2872,10 @@ end
 
                 return part3
             end
-            a3 = get_part3_for_one_moment_upper(density_integral_pieces)
-            b3 = get_part3_for_one_moment_upper(flow_integral_pieces)
-            c3 = get_part3_for_one_moment_upper(energy_integral_pieces)
-            d3 = get_part3_for_one_moment_upper(cubic_integral_pieces)
+            a3 = get_part3_for_one_moment_upper(density_integral_pieces_upperz)
+            b3 = get_part3_for_one_moment_upper(flow_integral_pieces_upperz)
+            c3 = get_part3_for_one_moment_upper(energy_integral_pieces_upperz)
+            d3 = get_part3_for_one_moment_upper(cubic_integral_pieces_upperz)
 
             # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is.
             sharpness = 4.0
@@ -3008,8 +3014,8 @@ appropriate.
         update_electron_vth_temperature!(moments, scratch[2].electron_ppar,
                                          moments.electron.dens, composition)
     end
-    apply_electron_bc_and_constraints!(scratch[t_params.n_rk_stages+1], phi, moments, z,
-                                       vperp, vpa, vperp_spectral, vpa_spectral,
+    apply_electron_bc_and_constraints!(scratch[t_params.n_rk_stages+1], phi, moments, r,
+                                       z, vperp, vpa, vperp_spectral, vpa_spectral,
                                        vpa_advect, num_diss_params, composition)
     if evolve_ppar
         # Reset vth in the `moments` struct to the result consistent with full-accuracy RK
diff --git a/moment_kinetics/src/initial_conditions.jl b/moment_kinetics/src/initial_conditions.jl
index 3102e137c..522710641 100644
--- a/moment_kinetics/src/initial_conditions.jl
+++ b/moment_kinetics/src/initial_conditions.jl
@@ -339,7 +339,7 @@ function initialize_electrons!(pdf, moments, fields, geometry, composition, r, z
         end
         init_electron_pdf_over_density_and_boundary_phi!(
             pdf.electron.norm, fields.phi, moments.electron.dens, moments.electron.upar,
-            moments.electron.vth, z, vpa, vperp, vperp_spectral, vpa_spectral,
+            moments.electron.vth, r, z, vpa, vperp, vperp_spectral, vpa_spectral,
             [(speed=speed,)], moments, num_diss_params,
             composition.me_over_mi, scratch_dummy)
     end
@@ -1564,8 +1564,8 @@ care is taken to ensure that the parallel boundary condition is satisfied;
 NB: as the electron pdf is obtained via a time-independent equation,
 this 'initital' value for the electron will just be the first guess in an iterative solution
 """
-function init_electron_pdf_over_density_and_boundary_phi!(pdf, phi, density, upar, vth, z,
-        vpa, vperp, vperp_spectral, vpa_spectral, vpa_advect, moments, num_diss_params,
+function init_electron_pdf_over_density_and_boundary_phi!(pdf, phi, density, upar, vth, r,
+        z, vpa, vperp, vperp_spectral, vpa_spectral, vpa_advect, moments, num_diss_params,
         me_over_mi, scratch_dummy; restart_from_boltzmann=false)
 
     if z.bc == "wall"
@@ -1581,11 +1581,13 @@ function init_electron_pdf_over_density_and_boundary_phi!(pdf, phi, density, upa
         end
         # Apply the sheath boundary condition to get cut-off boundary distribution
         # functions and boundary values of phi
-        enforce_boundary_condition_on_electron_pdf!(pdf, phi, vth, upar, z, vperp, vpa,
-                                                    vperp_spectral, vpa_spectral,
-                                                    vpa_advect, moments,
-                                                    num_diss_params.electron.vpa_dissipation_coefficient > 0.0,
-                                                    me_over_mi)
+        for ir ∈ 1:r.n
+            @views enforce_boundary_condition_on_electron_pdf!(
+                       pdf[:,:,:,ir], phi[:,ir], vth[:,ir], upar[:,ir], z, vperp, vpa,
+                       vperp_spectral, vpa_spectral, vpa_advect, moments,
+                       num_diss_params.electron.vpa_dissipation_coefficient > 0.0,
+                       me_over_mi, ir)
+        end
 
         # Distribute the z-boundary pdf values to every process
         begin_serial_region()

From 3f3d7b553d0bb59fe6b22c804c6a0b6e9ef99378 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Thu, 21 Nov 2024 21:02:15 +0000
Subject: [PATCH 34/43] Improve type stability in electron_backward_euler!()

Several small updates. In particular need to be careful with variables
that are captured by a locally-defined function like `residual_func!()`.
---
 .../src/electron_kinetic_equation.jl          | 39 +++++++++----------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index b2eff367a..e7d41d820 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -1029,10 +1029,6 @@ global_rank[] == 0 && println("recalculating precon")
                     qpar = @view moments.electron.qpar[:,ir]
 
                     # Reconstruct w_∥^3 moment of g_e from already-calculated qpar
-                    buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1]
-                    buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1]
-                    buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1]
-                    buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1]
                     third_moment = scratch_dummy.buffer_z_1
                     dthird_moment_dz = scratch_dummy.buffer_z_2
                     begin_z_region()
@@ -1525,22 +1521,20 @@ global_rank[] == 0 && println("recalculating precon")
                     v_unnorm = vpa.scratch
                     zero = 1.0e-14
                     if z.irank == 0
-                        iz = 1
-                        v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[iz,ir],
-                                                    moments.electron.upar[iz,ir], true, true)
+                        v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[1,ir],
+                                                    moments.electron.upar[1,ir], true, true)
                         @loop_vperp_vpa ivperp ivpa begin
                             if v_unnorm[ivpa] > -zero
-                                f_electron_residual[ivpa,ivperp,iz] = 0.0
+                                f_electron_residual[ivpa,ivperp,1] = 0.0
                             end
                         end
                     end
                     if z.irank == z.nrank - 1
-                        iz = z.n
-                        v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[iz,ir],
-                                                    moments.electron.upar[iz,ir], true, true)
+                        v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[end,ir],
+                                                    moments.electron.upar[end,ir], true, true)
                         @loop_vperp_vpa ivperp ivpa begin
                             if v_unnorm[ivpa] < zero
-                                f_electron_residual[ivpa,ivperp,iz] = 0.0
+                                f_electron_residual[ivpa,ivperp,end] = 0.0
                             end
                         end
                     end
@@ -1630,15 +1624,18 @@ global_rank[] == 0 && println("recalculating precon")
                 nl_solver_params.solves_since_precon_update[] = nl_solver_params.preconditioner_update_interval
 
                 # Swap old_scratch and new_scratch so that the next step restarts from the
-                # same state
-                scratch[1] = new_scratch
-                scratch[t_params.n_rk_stages+1] = old_scratch
-                old_scratch = scratch[1]
-                new_scratch = scratch[t_params.n_rk_stages+1]
-                f_electron_old = @view old_scratch.pdf_electron[:,:,:,ir]
-                f_electron_new = @view new_scratch.pdf_electron[:,:,:,ir]
-                electron_ppar_old = @view old_scratch.electron_ppar[:,ir]
-                electron_ppar_new = @view new_scratch.electron_ppar[:,ir]
+                # same state. Copy values over here rather than just swapping references
+                # to arrays, because f_electron_old and electron_ppar_old are captured by
+                # residual_func!() above, so any change in the things they refer to will
+                # cause type instability in residual_func!().
+                begin_z_vperp_vpa_region()
+                @loop_z_vperp_vpa iz ivperp ivpa begin
+                    f_electron_new[ivpa,ivperp,iz] = f_electron_old[ivpa,ivperp,iz]
+                end
+                begin_z_region()
+                @loop_z iz begin
+                    electron_ppar_new[iz] = electron_ppar_old[iz]
+                end
             end
 
             apply_electron_bc_and_constraints_no_r!(f_electron_new, phi, moments, z,

From 3184e4aea5ffbd1695ca4ba8a0665ad61c996e1c Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Mon, 25 Nov 2024 11:11:55 +0000
Subject: [PATCH 35/43] Make maximum number of pseudotimesteps and maximum
 pseudotime settable

Input parameters that control maximum number of pseudotimesteps and
maximum total pseudotime for each kinetic electron pseudotimestepping
loop.
---
 moment_kinetics/src/initial_conditions.jl    |  2 +-
 moment_kinetics/src/input_structs.jl         |  2 ++
 moment_kinetics/src/moment_kinetics_input.jl |  2 ++
 moment_kinetics/src/time_advance.jl          | 22 +++++++++++++++-----
 4 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/moment_kinetics/src/initial_conditions.jl b/moment_kinetics/src/initial_conditions.jl
index 522710641..df191e544 100644
--- a/moment_kinetics/src/initial_conditions.jl
+++ b/moment_kinetics/src/initial_conditions.jl
@@ -668,7 +668,7 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field
         ##max_electron_pdf_iterations = 10000
         #max_electron_sim_time = nothing
         max_electron_pdf_iterations = nothing
-        max_electron_sim_time = 2.0
+        max_electron_sim_time = max(2.0, t_params.electron.max_pseudotime)
         if t_params.electron.debug_io !== nothing
             io_electron = setup_electron_io(t_params.electron.debug_io[1], vpa, vperp, z,
                                             r, composition, collisions,
diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl
index 2bf4cf57c..682830365 100644
--- a/moment_kinetics/src/input_structs.jl
+++ b/moment_kinetics/src/input_structs.jl
@@ -86,6 +86,8 @@ struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero
     decrease_dt_iteration_threshold::mk_int
     increase_dt_iteration_threshold::mk_int
     cap_factor_ion_dt::mk_float
+    max_pseudotimesteps::mk_int
+    max_pseudotime::mk_float
     write_after_fixed_step_count::Bool
     error_sum_zero::Terrorsum
     split_operators::Bool
diff --git a/moment_kinetics/src/moment_kinetics_input.jl b/moment_kinetics/src/moment_kinetics_input.jl
index 18e78aeb6..e1008bf0e 100644
--- a/moment_kinetics/src/moment_kinetics_input.jl
+++ b/moment_kinetics/src/moment_kinetics_input.jl
@@ -210,6 +210,8 @@ function mk_input(input_dict=OptionsDict(); save_inputs_to_txt=false, ignore_MPI
         decrease_dt_iteration_threshold=100,
         increase_dt_iteration_threshold=20,
         cap_factor_ion_dt=10.0,
+        max_pseudotimesteps=1000,
+        max_pseudotime=1.0e-2,
         no_restart=false,
         debug_io=false,
        )
diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl
index f3592dc80..0cd59185d 100644
--- a/moment_kinetics/src/time_advance.jl
+++ b/moment_kinetics/src/time_advance.jl
@@ -429,6 +429,8 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload,
         decrease_dt_iteration_threshold = t_input["decrease_dt_iteration_threshold"]
         increase_dt_iteration_threshold = t_input["increase_dt_iteration_threshold"]
         cap_factor_ion_dt = mk_float(t_input["cap_factor_ion_dt"])
+        max_pseudotimesteps = t_input["max_pseudotimesteps"]
+        max_pseudotime = t_input["max_pseudotime"]
         electron_t_params = nothing
     elseif electron === false
         debug_io = nothing
@@ -437,6 +439,8 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload,
         decrease_dt_iteration_threshold = -1
         increase_dt_iteration_threshold = typemax(mk_int)
         cap_factor_ion_dt = Inf
+        max_pseudotimesteps = -1
+        max_pseudotime = Inf
         electron_t_params = nothing
     else
         debug_io = nothing
@@ -470,6 +474,8 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload,
         decrease_dt_iteration_threshold = -1
         increase_dt_iteration_threshold = typemax(mk_int)
         cap_factor_ion_dt = Inf
+        max_pseudotimesteps = -1
+        max_pseudotime = Inf
         electron_t_params = electron
     end
     return time_info(n_variables, t_input["nstep"], end_time, t, dt, previous_dt,
@@ -495,7 +501,8 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload,
                      electron_preconditioner_type,
                      mk_float(t_input["constraint_forcing_rate"]),
                      decrease_dt_iteration_threshold, increase_dt_iteration_threshold,
-                     mk_float(cap_factor_ion_dt), t_input["write_after_fixed_step_count"],
+                     mk_float(cap_factor_ion_dt), mk_int(max_pseudotimesteps),
+                     mk_float(max_pseudotime), t_input["write_after_fixed_step_count"],
                      error_sum_zero, t_input["split_operators"],
                      t_input["steady_state_residual"],
                      mk_float(t_input["converged_residual_value"]),
@@ -2962,8 +2969,13 @@ end
 
     n_rk_stages = t_params.n_rk_stages
 
-    max_electron_pdf_iterations = 1000
-    max_electron_sim_time = 1.0e-3
+    if t_params.electron !== nothing
+        max_electron_pdf_iterations = t_params.electron.max_pseudotimesteps
+        max_electron_sim_time = t_params.electron.max_pseudotime
+    else
+        max_electron_pdf_iterations = nothing
+        max_electron_sim_time = nothing
+    end
 
     first_scratch = scratch[1]
     @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin
@@ -3556,8 +3568,8 @@ end
                                              t_params.electron, t_params.dt[],
                                              nl_solver_params.electron_advance)
     elseif t_params.implicit_electron_ppar
-        max_electron_pdf_iterations = 1000
-        max_electron_sim_time = 1.0e-3
+        max_electron_pdf_iterations = t_params.electron.max_pseudotimesteps
+        max_electron_sim_time = t_params.electron.max_pseudotime
         electron_success = update_electron_pdf!(scratch_electron, pdf.electron.norm,
                                                 moments, fields.phi, r, z, vperp, vpa,
                                                 z_spectral, vperp_spectral, vpa_spectral,

From cc9f967161974d04b059e2d49be10ac12515a776 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Mon, 25 Nov 2024 21:27:30 +0000
Subject: [PATCH 36/43] Reset timers at beginning of run_moment_kinetics()

Helps if something didn't previously clean up the timers.
---
 moment_kinetics/src/moment_kinetics.jl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/moment_kinetics/src/moment_kinetics.jl b/moment_kinetics/src/moment_kinetics.jl
index 5d5e9d9f5..10cdbbfca 100644
--- a/moment_kinetics/src/moment_kinetics.jl
+++ b/moment_kinetics/src/moment_kinetics.jl
@@ -123,6 +123,9 @@ function run_moment_kinetics(input_dict::OptionsDict; restart=false, restart_tim
         check_so_newer_than_code()
     end
 
+    # Reset timers in case there was a previous run which did not clean them up.
+    reset_mk_timers!()
+
     mk_state = nothing
     try
         @timeit global_timer "moment_kinetics" begin

From 63d8808a88183d2c1da76b6d68c5bb9b64d881a8 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Wed, 27 Nov 2024 11:14:18 +0000
Subject: [PATCH 37/43] Remove loop over ir in
 enforce_boundary_condition_on_electron_pdf!()

This boundary condition function should act only at one `ir`, which is
passed as an argument, so there should be no loop over `ir`. This bug
has not impacted simulations so far because we have only used r.n=0 for
kinetic electrons so far.
---
 .../src/electron_kinetic_equation.jl          | 194 +++++++++---------
 1 file changed, 97 insertions(+), 97 deletions(-)

diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index e7d41d820..fad344e69 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -1408,7 +1408,7 @@ global_rank[] == 0 && println("recalculating precon")
                     @loop_z iz begin
                         # update the electron thermal speed using the updated electron
                         # parallel pressure
-                        this_vth[iz,ir] = sqrt(abs(2.0 * electron_ppar_newvar[iz,ir] /
+                        this_vth[iz,ir] = sqrt(abs(2.0 * electron_ppar_newvar[iz] /
                                                    (this_dens[iz,ir] *
                                                     composition.me_over_mi)))
                     end
@@ -2328,22 +2328,22 @@ end
         vwidth = 1.0/sqrt(me_over_mi)
         dens = moments.electron.dens
         if z.irank == 0
-            @loop_r_vperp_vpa ir ivperp ivpa begin
-                u = moments.electron.upar[1,ir]
-                vthe = moments.electron.vth[1,ir]
+            @loop_vperp_vpa ivperp ivpa begin
+                u = moments.electron.upar[1]
+                vthe = moments.electron.vth[1]
                 speed = vpa.grid[ivpa] * vthe + u
                 if speed > 0.0
-                    pdf[ivpa,ivperp,1,ir] = density_offset / dens[1,ir] * vthe[1,ir] * exp(-(speed^2 + vperp.grid[ivperp]^2)/vwidth^2)
+                    pdf[ivpa,ivperp,1] = density_offset / dens[1] * vthe[1] * exp(-(speed^2 + vperp.grid[ivperp]^2)/vwidth^2)
                 end
             end
         end
         if z.irank == z.nrank - 1
-            @loop_r_vperp_vpa ir ivperp ivpa begin
-                u = moments.electron.upar[end,ir]
-                vthe = moments.electron.vth[end,ir]
+            @loop_vperp_vpa ivperp ivpa begin
+                u = moments.electron.upar[end]
+                vthe = moments.electron.vth[end]
                 speed = vpa.grid[ivpa] * vthe + u
                 if speed > 0.0
-                    pdf[ivpa,ivperp,end,ir] = density_offset / dens[end,ir] * vthe[end,ir] * exp(-(speed^2 + vperp.grid[ivperp]^2)/vwidth^2)
+                    pdf[ivpa,ivperp,end] = density_offset / dens[end] * vthe[end] * exp(-(speed^2 + vperp.grid[ivperp]^2)/vwidth^2)
                 end
             end
         end
@@ -2358,7 +2358,7 @@ end
     # the electrostatic potential at the boundary, which determines the critical speed, is unknown a priori;
     # use the constraint that the first moment of the normalised pdf be zero to choose the potential.
 
-    begin_r_region()
+    begin_serial_region()
 
     newton_max_its = 100
 
@@ -2395,11 +2395,12 @@ end
         return epsilon, epsilonprime, A, C
     end
 
-    if z.irank == 0
-        if z.bc != "wall"
-            error("Options other than wall, constant or z-periodic bc not implemented yet for electrons")
-        end
-        @loop_r ir begin
+    @serial_region begin
+        if z.irank == 0
+            if z.bc != "wall"
+                error("Options other than wall, constant or z-periodic bc not implemented yet for electrons")
+            end
+
             # Impose sheath-edge boundary condition, while also imposing moment
             # constraints and determining the cut-off velocity (and therefore the sheath
             # potential).
@@ -2415,28 +2416,28 @@ end
             # 'far from zero' means larger values of v_∥.
 
             # Interpolate to the 'near zero' points
-            @views interpolate_symmetric!(pdf[sigma_ind:last_point_near_zero,1,1,ir],
+            @views interpolate_symmetric!(pdf[sigma_ind:last_point_near_zero,1,1],
                                           vpa_unnorm[sigma_ind:last_point_near_zero],
-                                          pdf[element_with_zero_boundary:sigma_ind-1,1,1,ir],
+                                          pdf[element_with_zero_boundary:sigma_ind-1,1,1],
                                           vpa_unnorm[element_with_zero_boundary:sigma_ind-1])
 
             # Interpolate to the 'far from zero' points
             reversed_pdf_far_from_zero = @view vpa.scratch[last_point_near_zero+1:end]
             @views interpolate_to_grid_1d!(reversed_pdf_far_from_zero,
                                            reversed_wpa_of_minus_vpa[1:vpa.n-last_point_near_zero],
-                                           pdf[:,1,1,ir], vpa, vpa_spectral)
+                                           pdf[:,1,1], vpa, vpa_spectral)
             reverse!(reversed_pdf_far_from_zero)
-            pdf[last_point_near_zero+1:end,1,1,ir] .= reversed_pdf_far_from_zero
+            pdf[last_point_near_zero+1:end,1,1] .= reversed_pdf_far_from_zero
 
             # Per-grid-point contributions to moment integrals
             # Note that we need to include the normalisation factor of 1/sqrt(pi) that
             # would be factored in by integrate_over_vspace(). This will need to
             # change/adapt when we support 2V as well as 1V.
-            density_integral_pieces_lowerz = @views @. vpa.scratch3 = pdf[:,1,1,ir] * vpa.wgts / sqrt(pi)
-            flow_integral_pieces_lowerz = @. vpa.scratch4 = density_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir]
-            energy_integral_pieces_lowerz = @. vpa.scratch5 = flow_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir]
-            cubic_integral_pieces_lowerz = @. vpa.scratch6 = energy_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir]
-            quartic_integral_pieces_lowerz = @. vpa.scratch7 = cubic_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir]
+            density_integral_pieces_lowerz = @views @. vpa.scratch3 = pdf[:,1,1] * vpa.wgts / sqrt(pi)
+            flow_integral_pieces_lowerz = @. vpa.scratch4 = density_integral_pieces_lowerz * vpa_unnorm / vthe[1]
+            energy_integral_pieces_lowerz = @. vpa.scratch5 = flow_integral_pieces_lowerz * vpa_unnorm / vthe[1]
+            cubic_integral_pieces_lowerz = @. vpa.scratch6 = energy_integral_pieces_lowerz * vpa_unnorm / vthe[1]
+            quartic_integral_pieces_lowerz = @. vpa.scratch7 = cubic_integral_pieces_lowerz * vpa_unnorm / vthe[1]
 
             function get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind)
                 # vcut_fraction is the fraction of the distance between minus_vcut_ind-1 and
@@ -2504,17 +2505,17 @@ end
                     # respect to vcut
                     delta_v = - epsilon / epsilonprime
 
-                    if vcut > vthe[1,ir] && epsilonprime < 0.0
+                    if vcut > vthe[1] && epsilonprime < 0.0
                         # epsilon should be increasing with vcut at epsilon=0, so if
                         # epsilonprime is negative, the solution is actually at a lower vcut -
                         # at larger vcut, epsilon will just tend to 0 but never reach it.
-                        delta_v = -0.1 * vthe[1,ir]
+                        delta_v = -0.1 * vthe[1]
                     end
 
                     # Prevent the step size from getting too big, to make Newton iteration
                     # more robust.
-                    delta_v = min(delta_v, 0.1 * vthe[1,ir])
-                    delta_v = max(delta_v, -0.1 * vthe[1,ir])
+                    delta_v = min(delta_v, 0.1 * vthe[1])
+                    delta_v = max(delta_v, -0.1 * vthe[1])
 
                     vcut = vcut + delta_v
                     minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut)
@@ -2552,22 +2553,22 @@ end
 
             # Adjust pdf so that after reflecting and cutting off tail, it will obey the
             # constraints.
-            @. pdf[:,1,1,ir] *= A + C * vpa_unnorm^2 / vthe[1,ir]^2
+            @. pdf[:,1,1] *= A + C * vpa_unnorm^2 / vthe[1]^2
 
             plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut)
-            pdf[plus_vcut_ind+2:end,1,1,ir] .= 0.0
+            pdf[plus_vcut_ind+2:end,1,1] .= 0.0
             # vcut_fraction is the fraction of the distance between plus_vcut_ind and
             # plus_vcut_ind+1 where vcut is.
             vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm)
             if vcut_fraction > 0.5
-                pdf[plus_vcut_ind+1,1,1,ir] *= vcut_fraction - 0.5
+                pdf[plus_vcut_ind+1,1,1] *= vcut_fraction - 0.5
             else
-                pdf[plus_vcut_ind+1,1,1,ir] = 0.0
-                pdf[plus_vcut_ind,1,1,ir] *= vcut_fraction + 0.5
+                pdf[plus_vcut_ind+1,1,1] = 0.0
+                pdf[plus_vcut_ind,1,1] *= vcut_fraction + 0.5
             end
 
             # update the electrostatic potential at the boundary to be the value corresponding to the updated cutoff velocity
-            phi[1,ir] = me_over_mi * vcut^2
+            phi[1] = me_over_mi * vcut^2
 
             moments.electron.constraints_A_coefficient[1,ir] = A
             moments.electron.constraints_B_coefficient[1,ir] = 0.0
@@ -2580,11 +2581,11 @@ end
             # interpolation.
 
             # Need to recalculate these with the updated distribution function
-            @views @. density_integral_pieces_lowerz = pdf[:,1,1,ir] * vpa.wgts / sqrt(pi)
-            @. flow_integral_pieces_lowerz = density_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir]
-            @. energy_integral_pieces_lowerz = flow_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir]
-            @. cubic_integral_pieces_lowerz = energy_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir]
-            @. quartic_integral_pieces_lowerz = cubic_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir]
+            @views @. density_integral_pieces_lowerz = pdf[:,1,1] * vpa.wgts / sqrt(pi)
+            @. flow_integral_pieces_lowerz = density_integral_pieces_lowerz * vpa_unnorm / vthe[1]
+            @. energy_integral_pieces_lowerz = flow_integral_pieces_lowerz * vpa_unnorm / vthe[1]
+            @. cubic_integral_pieces_lowerz = energy_integral_pieces_lowerz * vpa_unnorm / vthe[1]
+            @. quartic_integral_pieces_lowerz = cubic_integral_pieces_lowerz * vpa_unnorm / vthe[1]
 
             # Update the part2 integrals since we've applied the A and C factors
             _, _, _, _, a2, b2, c2, d2 = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind)
@@ -2605,7 +2606,7 @@ end
 
             # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is.
             sharpness = 4.0
-            correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,1,ir] * vpa.wgts / sqrt(pi) * sharpness * vpa_unnorm^2 / vthe[1,ir]^2 / (1.0 + sharpness * vpa_unnorm^2 / vthe[1,ir]^2)
+            correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,1] * vpa.wgts / sqrt(pi) * sharpness * vpa_unnorm^2 / vthe[1]^2 / (1.0 + sharpness * vpa_unnorm^2 / vthe[1]^2)
             for ivpa ∈ 1:sigma_ind
                 # We only add the corrections to 'part3', so zero them out for negative v_∥.
                 # I think this is only actually significant for `sigma_ind-1` and
@@ -2619,12 +2620,12 @@ end
                 # v_∥^2/vth^2/(1+v_∥^2/vth^2)≈v_∥^2/vth^2≈0.
                 correction0_integral_pieces[ivpa] = 0.0
             end
-            correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[1,ir]
-            correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[1,ir]
-            correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[1,ir]
-            correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[1,ir]
-            correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[1,ir]
-            correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[1,ir]
+            correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[1]
+            correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[1]
+            correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[1]
+            correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[1]
+            correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[1]
+            correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[1]
 
             alpha = get_part3_for_one_moment_lower(correction0_integral_pieces)
             beta = get_part3_for_one_moment_lower(correction1_integral_pieces)
@@ -2649,31 +2650,30 @@ end
                        ] \ [a2-a3, -b2-b3, c2-c3, -d2-d3]
             A, B, C, D = solution
             for ivpa ∈ sigma_ind+1:plus_vcut_ind+1
-                v_over_vth = vpa_unnorm[ivpa]/vthe[1,ir]
-                pdf[ivpa,1,1,ir] = pdf[ivpa,1,1,ir] +
+                v_over_vth = vpa_unnorm[ivpa]/vthe[1]
+                pdf[ivpa,1,1] = pdf[ivpa,1,1] +
                                    (A
                                     + B * v_over_vth
                                     + C * v_over_vth^2
                                     + D * v_over_vth^3) *
                                    sharpness * v_over_vth^2 / (1.0 + sharpness * v_over_vth^2) *
-                                   pdf[ivpa,1,1,ir]
+                                   pdf[ivpa,1,1]
             end
         end
-    end
 
-    # next enforce the boundary condition at z_max.
-    # this involves forcing the pdf to be zero for electrons travelling faster than the max speed
-    # they could attain by accelerating in the electric field between the wall and the simulation boundary;
-    # for electrons with negative velocities less than this critical value, they must have the same
-    # pdf as electrons with positive velocities of the same magnitude.
-    # the electrostatic potential at the boundary, which determines the critical speed, is unknown a priori;
-    # use the constraint that the first moment of the normalised pdf be zero to choose the potential.
-    
-    if z.irank == z.nrank - 1
-        if z.bc != "wall"
-            error("Options other than wall or z-periodic bc not implemented yet for electrons")
-        end
-        @loop_r ir begin
+        # next enforce the boundary condition at z_max.
+        # this involves forcing the pdf to be zero for electrons travelling faster than the max speed
+        # they could attain by accelerating in the electric field between the wall and the simulation boundary;
+        # for electrons with negative velocities less than this critical value, they must have the same
+        # pdf as electrons with positive velocities of the same magnitude.
+        # the electrostatic potential at the boundary, which determines the critical speed, is unknown a priori;
+        # use the constraint that the first moment of the normalised pdf be zero to choose the potential.
+        
+        if z.irank == z.nrank - 1
+            if z.bc != "wall"
+                error("Options other than wall or z-periodic bc not implemented yet for electrons")
+            end
+
             # Impose sheath-edge boundary condition, while also imposing moment
             # constraints and determining the cut-off velocity (and therefore the sheath
             # potential).
@@ -2689,28 +2689,28 @@ end
             # 'far from zero' means more negative values of v_∥.
 
             # Interpolate to the 'near zero' points
-            @views interpolate_symmetric!(pdf[first_point_near_zero:sigma_ind,1,end,ir],
+            @views interpolate_symmetric!(pdf[first_point_near_zero:sigma_ind,1,end],
                                           vpa_unnorm[first_point_near_zero:sigma_ind],
-                                          pdf[sigma_ind+1:element_with_zero_boundary,1,end,ir],
+                                          pdf[sigma_ind+1:element_with_zero_boundary,1,end],
                                           vpa_unnorm[sigma_ind+1:element_with_zero_boundary])
 
             # Interpolate to the 'far from zero' points
             reversed_pdf = @view vpa.scratch[1:first_point_near_zero-1]
             @views interpolate_to_grid_1d!(reversed_pdf,
                                            reversed_wpa_of_minus_vpa[vpa.n-first_point_near_zero+2:end],
-                                           pdf[:,1,end,ir], vpa, vpa_spectral)
+                                           pdf[:,1,end], vpa, vpa_spectral)
             reverse!(reversed_pdf)
-            pdf[1:first_point_near_zero-1,1,end,ir] .= reversed_pdf
+            pdf[1:first_point_near_zero-1,1,end] .= reversed_pdf
 
             # Per-grid-point contributions to moment integrals
             # Note that we need to include the normalisation factor of 1/sqrt(pi) that
             # would be factored in by integrate_over_vspace(). This will need to
             # change/adapt when we support 2V as well as 1V.
-            density_integral_pieces_upperz = @views @. vpa.scratch3 = pdf[:,1,end,ir] * vpa.wgts / sqrt(pi)
-            flow_integral_pieces_upperz = @. vpa.scratch4 = density_integral_pieces_upperz * vpa_unnorm / vthe[end,ir]
-            energy_integral_pieces_upperz = @. vpa.scratch5 = flow_integral_pieces_upperz * vpa_unnorm / vthe[end,ir]
-            cubic_integral_pieces_upperz = @. vpa.scratch6 = energy_integral_pieces_upperz * vpa_unnorm / vthe[end,ir]
-            quartic_integral_pieces_upperz = @. vpa.scratch7 = cubic_integral_pieces_upperz * vpa_unnorm / vthe[end,ir]
+            density_integral_pieces_upperz = @views @. vpa.scratch3 = pdf[:,1,end] * vpa.wgts / sqrt(pi)
+            flow_integral_pieces_upperz = @. vpa.scratch4 = density_integral_pieces_upperz * vpa_unnorm / vthe[end]
+            energy_integral_pieces_upperz = @. vpa.scratch5 = flow_integral_pieces_upperz * vpa_unnorm / vthe[end]
+            cubic_integral_pieces_upperz = @. vpa.scratch6 = energy_integral_pieces_upperz * vpa_unnorm / vthe[end]
+            quartic_integral_pieces_upperz = @. vpa.scratch7 = cubic_integral_pieces_upperz * vpa_unnorm / vthe[end]
 
             function get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind)
                 # vcut_fraction is the fraction of the distance between plus_vcut_ind and
@@ -2775,17 +2775,17 @@ end
                     # respect to vcut
                     delta_v = - epsilon / epsilonprime
 
-                    if vcut > vthe[1,ir] && epsilonprime > 0.0
+                    if vcut > vthe[1] && epsilonprime > 0.0
                         # epsilon should be decreasing with vcut at epsilon=0, so if
                         # epsilonprime is positive, the solution is actually at a lower vcut -
                         # at larger vcut, epsilon will just tend to 0 but never reach it.
-                        delta_v = -0.1 * vthe[1,ir]
+                        delta_v = -0.1 * vthe[1]
                     end
 
                     # Prevent the step size from getting too big, to make Newton iteration
                     # more robust.
-                    delta_v = min(delta_v, 0.1 * vthe[end,ir])
-                    delta_v = max(delta_v, -0.1 * vthe[end,ir])
+                    delta_v = min(delta_v, 0.1 * vthe[end])
+                    delta_v = max(delta_v, -0.1 * vthe[end])
 
                     vcut = vcut + delta_v
                     plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut)
@@ -2823,22 +2823,22 @@ end
 
             # Adjust pdf so that after reflecting and cutting off tail, it will obey the
             # constraints.
-            @. pdf[:,1,end,ir] *= A + C * vpa_unnorm^2 / vthe[end,ir]^2
+            @. pdf[:,1,end] *= A + C * vpa_unnorm^2 / vthe[end]^2
 
             minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut)
-            pdf[1:minus_vcut_ind-2,1,end,ir] .= 0.0
+            pdf[1:minus_vcut_ind-2,1,end] .= 0.0
             # vcut_fraction is the fraction of the distance between minus_vcut_ind-1 and
             # minus_vcut_ind where -vcut is.
             vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm)
             if vcut_fraction < 0.5
-                pdf[minus_vcut_ind-1,1,end,ir] *= 0.5 - vcut_fraction
+                pdf[minus_vcut_ind-1,1,end] *= 0.5 - vcut_fraction
             else
-                pdf[minus_vcut_ind-1,1,end,ir] = 0.0
-                pdf[minus_vcut_ind,1,end,ir] *= 1.5 - vcut_fraction
+                pdf[minus_vcut_ind-1,1,end] = 0.0
+                pdf[minus_vcut_ind,1,end] *= 1.5 - vcut_fraction
             end
 
             # update the electrostatic potential at the boundary to be the value corresponding to the updated cutoff velocity
-            phi[end,ir] = me_over_mi * vcut^2
+            phi[end] = me_over_mi * vcut^2
 
             moments.electron.constraints_A_coefficient[end,ir] = A
             moments.electron.constraints_B_coefficient[end,ir] = 0.0
@@ -2851,11 +2851,11 @@ end
             # interpolation.
 
             # Need to recalculate these with the updated distribution function
-            @views @. density_integral_pieces_upperz = pdf[:,1,end,ir] * vpa.wgts / sqrt(pi)
-            @. flow_integral_pieces_upperz = density_integral_pieces_upperz * vpa_unnorm / vthe[end,ir]
-            @. energy_integral_pieces_upperz = flow_integral_pieces_upperz * vpa_unnorm / vthe[end,ir]
-            @. cubic_integral_pieces_upperz = energy_integral_pieces_upperz * vpa_unnorm / vthe[end,ir]
-            @. quartic_integral_pieces_upperz = cubic_integral_pieces_upperz * vpa_unnorm / vthe[end,ir]
+            @views @. density_integral_pieces_upperz = pdf[:,1,end] * vpa.wgts / sqrt(pi)
+            @. flow_integral_pieces_upperz = density_integral_pieces_upperz * vpa_unnorm / vthe[end]
+            @. energy_integral_pieces_upperz = flow_integral_pieces_upperz * vpa_unnorm / vthe[end]
+            @. cubic_integral_pieces_upperz = energy_integral_pieces_upperz * vpa_unnorm / vthe[end]
+            @. quartic_integral_pieces_upperz = cubic_integral_pieces_upperz * vpa_unnorm / vthe[end]
 
             # Update the part2 integrals since we've applied the A and C factors
             _, _, _, _, a2, b2, c2, d2 = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind)
@@ -2876,7 +2876,7 @@ end
 
             # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is.
             sharpness = 4.0
-            correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,end,ir] * vpa.wgts / sqrt(pi) * sharpness * vpa_unnorm^2 / vthe[end,ir]^2 / (1.0 + sharpness * vpa_unnorm^2 / vthe[end,ir]^2)
+            correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,end] * vpa.wgts / sqrt(pi) * sharpness * vpa_unnorm^2 / vthe[end]^2 / (1.0 + sharpness * vpa_unnorm^2 / vthe[end]^2)
             for ivpa ∈ sigma_ind:vpa.n
                 # We only add the corrections to 'part3', so zero them out for positive v_∥.
                 # I think this is only actually significant for `sigma_ind` and
@@ -2890,12 +2890,12 @@ end
                 # v_∥^2/vth^2/(1+v_∥^2/vth^2)≈v_∥^2/vth^2≈0.
                 correction0_integral_pieces[ivpa] = 0.0
             end
-            correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[end,ir]
-            correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[end,ir]
-            correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[end,ir]
-            correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[end,ir]
-            correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[end,ir]
-            correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[end,ir]
+            correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[end]
+            correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[end]
+            correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[end]
+            correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[end]
+            correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[end]
+            correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[end]
 
             alpha = get_part3_for_one_moment_upper(correction0_integral_pieces)
             beta = get_part3_for_one_moment_upper(correction1_integral_pieces)
@@ -2920,14 +2920,14 @@ end
                        ] \ [a2-a3, -b2-b3, c2-c3, -d2-d3]
             A, B, C, D = solution
             for ivpa ∈ minus_vcut_ind-1:sigma_ind-1
-                v_over_vth = vpa_unnorm[ivpa]/vthe[end,ir]
-                pdf[ivpa,1,end,ir] = pdf[ivpa,1,end,ir] +
+                v_over_vth = vpa_unnorm[ivpa]/vthe[end]
+                pdf[ivpa,1,end] = pdf[ivpa,1,end] +
                                    (A
                                     + B * v_over_vth
                                     + C * v_over_vth^2
                                     + D * v_over_vth^3) *
                                    sharpness * v_over_vth^2 / (1.0 + sharpness * v_over_vth^2) *
-                                   pdf[ivpa,1,end,ir]
+                                   pdf[ivpa,1,end]
             end
         end
     end

From 81418c45ced0a692e290ffcf050b141165a00292 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Sat, 23 Nov 2024 17:18:40 +0000
Subject: [PATCH 38/43] Fix adaptive-timestep IMEX for kinetic electrons

Can (?) skip the problematic solve, that was run to update the electron
shape function without updating the electron pressure, on the 'explicit
first stage' of ESDIRK schemes. This might have the effect of reducing
the order of accuracy of the scheme somehow, as the qpar_e used for the
'explicit' calculation of the time derivative of electron_ppar is taken
from the most recent implicit solve rather than a solve updated with the
new ion/electron profiles. However, the change is probably small (?) and
at least the solver does run now - it is useful to have an adaptive ion
timestep as it may let the code recover by reducing the ion timestep
when an electron implicit solve fails to converge.
---
 moment_kinetics/src/time_advance.jl           |   4 +-
 .../test/kinetic_electron_tests.jl            | 392 +++++++++---------
 2 files changed, 202 insertions(+), 194 deletions(-)

diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl
index 0cd59185d..8f2d7095b 100644
--- a/moment_kinetics/src/time_advance.jl
+++ b/moment_kinetics/src/time_advance.jl
@@ -3116,9 +3116,7 @@ end
                                 || (istage == n_rk_stages && t_params.implicit_coefficient_is_zero[1])
                                 || t_params.implicit_coefficient_is_zero[istage+1])
         update_electrons = (t_params.rk_coefs_implicit === nothing
-                            || !(t_params.implicit_electron_advance || t_params.implicit_electron_ppar)
-                            || (istage < n_rk_stages && t_params.implicit_coefficient_is_zero[istage+1])
-                            || (istage == n_rk_stages && t_params.implicit_coefficient_is_zero[1]))
+                            || !(t_params.implicit_electron_advance || t_params.implicit_electron_ppar))
         diagnostic_moments = diagnostic_checks && istage == n_rk_stages
         success = apply_all_bcs_constraints_update_moments!(
             scratch[istage+1], pdf, moments, fields, boundary_distributions,
diff --git a/moment_kinetics/test/kinetic_electron_tests.jl b/moment_kinetics/test/kinetic_electron_tests.jl
index 33738748d..63c6b6ee2 100644
--- a/moment_kinetics/test/kinetic_electron_tests.jl
+++ b/moment_kinetics/test/kinetic_electron_tests.jl
@@ -123,9 +123,14 @@ kinetic_input["nonlinear_solver"] = OptionsDict("nonlinear_max_iterations" => 10
                                                 "rtol" => 1.0e-8,
                                                 "atol" => 1.0e-14,
                                                 "linear_restart" => 5,
-                                                "preconditioner_update_interval" => 1000,
+                                                "preconditioner_update_interval" => 100,
                                                )
 
+kinetic_input_adaptive_timestep = deepcopy(kinetic_input)
+kinetic_input_adaptive_timestep["output"]["run_name"] = "kinetic_electron_adaptive_timestep_test"
+kinetic_input_adaptive_timestep["timestepping"]["type"] = "KennedyCarpenterARK324"
+kinetic_input_adaptive_timestep["timestepping"]["maximum_dt"] = 1.0e-5
+
 
 """
 Run a test for a single set of parameters
@@ -136,204 +141,209 @@ function run_test()
     this_boltzmann_input = deepcopy(boltzmann_input)
     this_boltzmann_input["output"]["base_directory"] = test_output_directory
 
-    this_kinetic_input = deepcopy(kinetic_input)
-    this_kinetic_input["output"]["base_directory"] = test_output_directory
-
-    # Provide some progress info
-    println("    - testing kinetic electrons")
-
-    # Suppress console output while running? Test is pretty long, so maybe better to leave
-    # intermediate output visible. Leaving `quietoutput()` commented out for now...
+    # Suppress console output while running.
     quietoutput() do
         run_moment_kinetics(this_boltzmann_input)
+    end
 
-        restart_from_directory = joinpath(this_boltzmann_input["output"]["base_directory"], this_boltzmann_input["output"]["run_name"])
-        restart_from_file_pattern = this_boltzmann_input["output"]["run_name"] * ".dfns*.h5"
-        restart_from_file = glob(restart_from_file_pattern, restart_from_directory)[1]
+    for (this_kinetic_input, label, tol) ∈ ((deepcopy(kinetic_input), "", 1.0e-6),
+                                             (deepcopy(kinetic_input_adaptive_timestep), "adaptive timestep", 1.0e-4))
+        # Provide some progress info
+        println("    - testing kinetic electrons $label")
 
-        # run kinetic electron simulation
-        run_moment_kinetics(this_kinetic_input; restart=restart_from_file)
-    end
+        this_kinetic_input["output"]["base_directory"] = test_output_directory
 
-    if global_rank[] == 0
-        # Load and analyse output
-        #########################
-
-        path = joinpath(realpath(this_kinetic_input["output"]["base_directory"]), this_kinetic_input["output"]["run_name"])
-
-        # open the output file(s)
-        run_info = get_run_info_no_setup(path, dfns=true)
-
-        # load fields data
-        Ez = postproc_load_variable(run_info, "Ez")[:,1,:]
-        vthe = postproc_load_variable(run_info, "electron_thermal_speed")[:,1,:]
-        electron_advance_linear_iterations = postproc_load_variable(run_info, "electron_advance_linear_iterations")[end]
-
-        close_run_info(run_info)
-
-        # Regression test
-        # Benchmark data generated in serial on Linux
-        if global_size[] == 1
-            # Serial solves use LU preconditioner
-            expected_Ez = [-0.5990683230706185 -1.136483186157602;
-                           -0.4944296396481284 -0.9873296990705788;
-                           -0.30889032954504736 -0.6694380824928302;
-                           -0.2064830747303776 -0.4471331690708596;
-                           -0.21232457328748663 -0.423069171542538;
-                           -0.18233875912042674 -0.3586467595624931;
-                           -0.16711429522309232 -0.3018272987758344;
-                           -0.16920776495088916 -0.27814384649305496;
-                           -0.1629417555658927 -0.26124630661090814;
-                           -0.16619150334079993 -0.2572789330163811;
-                           -0.15918194883360942 -0.23720078037362732;
-                           -0.14034706409006803 -0.20520396656341475;
-                           -0.12602184032280567 -0.1827016549071128;
-                           -0.10928716440800472 -0.15808919669899502;
-                           -0.07053969674257217 -0.10137753767917096;
-                           -0.0249577746169536 -0.0358411459260082;
-                           -2.8327303308330514e-15 -2.0803303361189427e-5;
-                           0.024957774616960776 0.03584490974053962;
-                           0.07053969674257636 0.1013692898656727;
-                           0.10928716440799909 0.15807862358546687;
-                           0.1260218403227975 0.18263049748179466;
-                           0.1403470640900294 0.20516566362571026;
-                           0.1591819488336015 0.23711236692241613;
-                           0.16619150334082114 0.257126146434857;
-                           0.16294175556587748 0.2609881259705107;
-                           0.16920776495090983 0.2778978154805798;
-                           0.1671142952230893 0.3015349192528757;
-                           0.1823387591204167 0.3585291689672981;
-                           0.21232457328753865 0.4231179549656996;
-                           0.20648307473037922 0.44816400221269476;
-                           0.3088903295450278 0.6716787105435247;
-                           0.4944296396481271 0.9861165590258743;
-                           0.5990683230705801 1.1300034111861956]
-            expected_vthe = [22.64555285302391 22.485481713141688;
-                             23.763411647653097 23.63281883616836;
-                             25.26907160117684 25.181703459470448;
-                             26.17920352818247 26.12461016686916;
-                             26.514772631426933 26.476018852279974;
-                             26.798783188585713 26.774387562937218;
-                             27.202255545479264 27.203662204308202;
-                             27.50424749120107 27.527732850637264;
-                             27.630498656270504 27.6642323848215;
-                             27.748483758260697 27.79134809261204;
-                             27.933760382468346 27.990808336620802;
-                             28.08611508251559 28.153978618442775;
-                             28.14959662643782 28.221734439130564;
-                             28.207730844115044 28.283677711828023;
-                             28.28567669896009 28.36634261525836;
-                             28.32728392065335 28.410489883644782;
-                             28.331064506972027 28.41437629072209;
-                             28.32729968986601 28.41050992096321;
-                             28.285678151542136 28.366352683865195;
-                             28.207765527709956 28.28373408727703;
-                             28.149604559462947 28.221771261090687;
-                             28.086248527111163 28.154158507899695;
-                             27.933979289064936 27.991103719847732;
-                             27.74906125092813 27.792046191405188;
-                             27.631210333523736 27.66508092926101;
-                             27.505479130159543 27.529115937508752;
-                             27.20422756527604 27.20578114592589;
-                             26.801712351383053 26.77740066591359;
-                             26.517644511297203 26.478915386575462;
-                             26.18176436913143 26.127099000267552;
-                             25.26635932097994 25.178676836919877;
-                             23.756593489029708 23.625697695979085;
-                             22.64390166090378 22.48400980852866]
-        else
-            # Parallel solves, which here use only shared-memory parallelism, use the ADI
-            # preconditioner, which should be as accurate, but may give different results
-            # within Newton-Krylov tolerances.
-            expected_Ez = [-0.5990683230706185 -1.136484793603861;
-                           -0.4944296396481284 -0.9873300031440772;
-                           -0.30889032954504736 -0.6694378168618197;
-                           -0.2064830747303776 -0.447133132132065;
-                           -0.21232457328748663 -0.42306913446372424;
-                           -0.18233875912042674 -0.3586467771727455;
-                           -0.16711429522309232 -0.30182728110160495;
-                           -0.16920776495088916 -0.27814382747995164;
-                           -0.1629417555658927 -0.2612463784138094;
-                           -0.16619150334079993 -0.25727894258000966;
-                           -0.15918194883360942 -0.23720078814350573;
-                           -0.14034706409006803 -0.20520397188041256;
-                           -0.12602184032280567 -0.18270162474892546;
-                           -0.10928716440800472 -0.1580892035790512;
-                           -0.07053969674257217 -0.10137753682381391;
-                           -0.0249577746169536 -0.03584114725793184;
-                           -2.8327303308330514e-15 -2.0802378395589373e-5;
-                           0.024957774616960776 0.0358449101669449;
-                           0.07053969674257636 0.10136928934666747;
-                           0.10928716440799909 0.15807862867071673;
-                           0.1260218403227975 0.18263047522175488;
-                           0.1403470640900294 0.20516566756031385; 0.1591819488336015 0.2371123741024713;
-                           0.16619150334082114 0.2571261543920033;
-                           0.16294175556587748 0.2609882062708652;
-                           0.16920776495090983 0.27789779494370415;
-                           0.1671142952230893 0.30153489797658445;
-                           0.1823387591204167 0.35852918516786003;
-                           0.21232457328753865 0.42311789840457864;
-                           0.20648307473037922 0.44816400062147066;
-                           0.3088903295450278 0.6716785459169026;
-                           0.4944296396481271 0.9861167610959626;
-                           0.5990683230705801 1.1300045383907789]
-            expected_vthe = [22.64555338227396 22.48548119549829;
-                             23.76341164436594 23.632819782771243;
-                             25.26907163394297 25.18170391887767;
-                             26.179203467285365 26.12461016927763;
-                             26.514772629327332 26.47601877788725;
-                             26.79878318858447 26.774387534342114;
-                             27.20225551034186 27.20366217166485;
-                             27.504247525601926 27.527732760234755;
-                             27.630498605068166 27.66423228184859;
-                             27.748483763235846 27.791348082529804;
-                             27.933760371994826 27.990808308571204;
-                             28.08611509938479 28.153978648601132;
-                             28.149596610550738 28.221734405417436;
-                             28.207730848524463 28.28367771694209;
-                             28.28567670146647 28.366342613061416;
-                             28.32728392764203 28.410489892675102;
-                             28.331064498175866 28.414376282256146;
-                             28.327299695349158 28.41050992979778;
-                             28.285678155424083 28.366352683054103;
-                             28.207765532359442 28.28373409338897;
-                             28.149604554344048 28.22177123547944;
-                             28.086248537316628 28.154158532699547;
-                             27.933979285563435 27.991103698041254;
-                             27.749061255285646 27.79204618050744;
-                             27.63121031067771 27.665080846653012;
-                             27.505479148983177 27.529115838548574;
-                             27.204227550854288 27.205781129997607;
-                             26.801712356957204 26.777400644678224;
-                             26.517644516966772 26.478915353716097;
-                             26.181764354679014 26.12709901369174;
-                             25.266359355820907 25.178677080491074;
-                             23.756593465755735 23.625698257711747;
-                             22.64390180335094 22.48400934735562]
-        end
+        # Suppress console output while running.
+        quietoutput() do
+            restart_from_directory = joinpath(this_boltzmann_input["output"]["base_directory"], this_boltzmann_input["output"]["run_name"])
+            restart_from_file_pattern = this_boltzmann_input["output"]["run_name"] * ".dfns*.h5"
+            restart_from_file = glob(restart_from_file_pattern, restart_from_directory)[1]
 
-        if expected_Ez == nothing
-            # Error: no expected input provided
-            println("data tested would be: Ez=", Ez)
-            @test false
-        else
-            @test elementwise_isapprox(Ez, expected_Ez, rtol=0.0, atol=2.0e-6)
-        end
-        if expected_vthe == nothing
-            # Error: no expected input provided
-            println("data tested would be: vthe=", vthe)
-            @test false
-        else
-            @test elementwise_isapprox(vthe, expected_vthe, rtol=1.0e-6, atol=0.0)
+            # run kinetic electron simulation
+            run_moment_kinetics(this_kinetic_input; restart=restart_from_file)
         end
 
-        # Iteration counts are fairly inconsistent, but it's good to check that they at
-        # least don't unexpectedly increase by an order of magnitude.
-        # Expected iteration count is from a serial run on Linux.
-        expected_electron_advance_linear_iterations = 49307
-        @test electron_advance_linear_iterations < 2 * expected_electron_advance_linear_iterations
-        if !(electron_advance_linear_iterations < 2 * expected_electron_advance_linear_iterations)
-            println("electron_advance_linear_iterations=$electron_advance_linear_iterations was greater than twice the expected $expected_electron_advance_linear_iterations.")
+        if global_rank[] == 0
+            # Load and analyse output
+            #########################
+
+            path = joinpath(realpath(this_kinetic_input["output"]["base_directory"]), this_kinetic_input["output"]["run_name"])
+
+            # open the output file(s)
+            run_info = get_run_info_no_setup(path, dfns=true)
+
+            # load fields data
+            Ez = postproc_load_variable(run_info, "Ez")[:,1,:]
+            vthe = postproc_load_variable(run_info, "electron_thermal_speed")[:,1,:]
+            electron_advance_linear_iterations = postproc_load_variable(run_info, "electron_advance_linear_iterations")[end]
+
+            close_run_info(run_info)
+
+            # Regression test
+            # Benchmark data generated in serial on Linux
+            if global_size[] == 1
+                # Serial solves use LU preconditioner
+                expected_Ez = [-0.5990683230706185 -1.1053138725180998;
+                               -0.4944296396481284 -0.9819332128466166;
+                               -0.30889032954504736 -0.6745656961983237;
+                               -0.2064830747303776 -0.4459531272930669;
+                               -0.21232457328748663 -0.4253218487528007;
+                               -0.18233875912042674 -0.3596054334022437;
+                               -0.16711429522309232 -0.3021381799340685;
+                               -0.16920776495088916 -0.2784335484692499;
+                               -0.1629417555658927 -0.2612551389558109;
+                               -0.16619150334079993 -0.2574841927015592;
+                               -0.15918194883360942 -0.23740132549636406;
+                               -0.14034706409006803 -0.20534503972256973;
+                               -0.12602184032280567 -0.1827098539044343;
+                               -0.10928716440800472 -0.1582133200686042;
+                               -0.07053969674257217 -0.10145491369831482;
+                               -0.0249577746169536 -0.03585934915825971;
+                               -2.8327303308330514e-15 3.742211718942586e-14;
+                               0.024957774616960776 0.03585934915827381;
+                               0.07053969674257636 0.10145491369829167;
+                               0.10928716440799909 0.15821332006862954;
+                               0.1260218403227975 0.18270985390445083;
+                               0.1403470640900294 0.20534503972250218;
+                               0.1591819488336015 0.23740132549634094;
+                               0.16619150334082114 0.2574841927015898;
+                               0.16294175556587748 0.261255138955811;
+                               0.16920776495090983 0.2784335484692798;
+                               0.1671142952230893 0.3021381799340713;
+                               0.1823387591204167 0.3596054334022252;
+                               0.21232457328753865 0.4253218487528467;
+                               0.20648307473037922 0.44595312729305947;
+                               0.3088903295450278 0.6745656961983009;
+                               0.4944296396481271 0.9819332128466268;
+                               0.5990683230705801 1.1053138725180645]
+                expected_vthe = [22.654024448490784 22.494016350356883;
+                                 23.744503682730446 23.61361063067715;
+                                 25.26061134578617 25.173128418725682;
+                                 26.177253875120066 26.122412383901523;
+                                 26.510545637302872 26.47158368991228;
+                                 26.798827552847246 26.77429043464489;
+                                 27.202535498354287 27.2038739551587;
+                                 27.506373594650846 27.529813468465488;
+                                 27.631027625644876 27.664719606410365;
+                                 27.750902611036295 27.793759280909274;
+                                 27.935780521313532 27.992775960575692;
+                                 28.089380398280714 28.157198480516957;
+                                 28.15152314377127 28.223553488629253;
+                                 28.211115085781678 28.2870195116558;
+                                 28.28856778918977 28.369130039283018;
+                                 28.330972960680672 28.41411592647979;
+                                 28.33351348538364 28.416680586218863;
+                                 28.330972960680675 28.41411592647976;
+                                 28.288567789189763 28.369130039283064;
+                                 28.211115085781678 28.287019511655785;
+                                 28.15152314377127 28.223553488629236;
+                                 28.089380398280724 28.157198480516957;
+                                 27.93578052131354 27.992775960575713;
+                                 27.750902611036295 27.79375928090935;
+                                 27.63102762564488 27.664719606410383;
+                                 27.506373594650853 27.529813468465495;
+                                 27.202535498354287 27.2038739551587;
+                                 26.79882755284725 26.774290434644872;
+                                 26.510545637302886 26.471583689912283;
+                                 26.177253875120083 26.122412383901523;
+                                 25.26061134578619 25.173128418725696;
+                                 23.744503682730446 23.613610630677236;
+                                 22.65402444849082 22.494016350356937]
+            else
+                # Parallel solves, which here use only shared-memory parallelism, use the ADI
+                # preconditioner, which should be as accurate, but may give different results
+                # within Newton-Krylov tolerances.
+                expected_Ez = [-0.5990683230706185 -1.1053137071260657;
+                               -0.4944296396481284 -0.9819330928307715;
+                               -0.30889032954504736 -0.6745656725019216;
+                               -0.2064830747303776 -0.44595313784207047;
+                               -0.21232457328748663 -0.425321828548;
+                               -0.18233875912042674 -0.3596054340570364;
+                               -0.16711429522309232 -0.30213818089568956;
+                               -0.16920776495088916 -0.27843354821637;
+                               -0.1629417555658927 -0.2612551385019989;
+                               -0.16619150334079993 -0.2574841930766524;
+                               -0.15918194883360942 -0.23740132557788143;
+                               -0.14034706409006803 -0.20534504018275174;
+                               -0.12602184032280567 -0.18270985430997166;
+                               -0.10928716440800472 -0.1582133189704785;
+                               -0.07053969674257217 -0.101454914566153;
+                               -0.0249577746169536 -0.035859347929368034;
+                               -2.8327303308330514e-15 -4.536628997349189e-9;
+                               0.024957774616960776 0.035859348624052545;
+                               0.07053969674257636 0.10145491474282464;
+                               0.10928716440799909 0.15821331955573922;
+                               0.1260218403227975 0.18270985667178208;
+                               0.1403470640900294 0.2053450392202274;
+                               0.1591819488336015 0.23740132578753803;
+                               0.16619150334082114 0.25748419283426127;
+                               0.16294175556587748 0.2612551396310432;
+                               0.16920776495090983 0.2784335479625835;
+                               0.1671142952230893 0.3021381809909585;
+                               0.1823387591204167 0.35960543399747075;
+                               0.21232457328753865 0.4253218286915096;
+                               0.20648307473037922 0.44595313782295487;
+                               0.3088903295450278 0.6745656725300222;
+                               0.4944296396481271 0.9819330927685747;
+                               0.5990683230705801 1.1053137082172033]
+                expected_vthe = [22.654024454479018 22.494016869931663;
+                                 23.74450367962989 23.61361086266046;
+                                 25.260611341892094 25.173128419566062;
+                                 26.17725387357487 26.122412390676395;
+                                 26.510545632956767 26.47158369227529;
+                                 26.7988275507785 26.774290427357606;
+                                 27.20253549703805 27.20387395613098;
+                                 27.506373594719115 27.529813465559865;
+                                 27.63102762567087 27.6647196112545;
+                                 27.75090260968854 27.79375927764987;
+                                 27.935780521822277 27.992775962652605;
+                                 28.08938039775227 28.157198478502867;
+                                 28.151523156278788 28.223553495610926;
+                                 28.211115080270424 28.28701950947455;
+                                 28.288567793141777 28.369130040934596;
+                                 28.330972955353705 28.414115925374524;
+                                 28.333513456094945 28.41668058720323;
+                                 28.330972961606466 28.414115929999316;
+                                 28.288567792143006 28.369130041232697;
+                                 28.211115083430062 28.287019512466056;
+                                 28.15152314952673 28.223553491119628;
+                                 28.089380398299795 28.157198479157458;
+                                 27.93578052229754 27.99277596224337;
+                                 27.750902609816293 27.79375927871885;
+                                 27.631027625671482 27.664719609967122;
+                                 27.50637359506551 27.52981346582775;
+                                 27.20253549697429 27.203873955958308;
+                                 26.798827550864885 26.77429042759387;
+                                 26.510545632587316 26.471583691722795;
+                                 26.177253873758893 26.122412390844207;
+                                 25.26061134158348 25.17312841929966;
+                                 23.7445036798294 23.613610862832093;
+                                 22.654024453873603 22.494016869407307]
+            end
+
+            if expected_Ez == nothing
+                # Error: no expected input provided
+                println("data tested would be: Ez=", Ez)
+                @test false
+            else
+                @test elementwise_isapprox(Ez, expected_Ez, rtol=0.0, atol=2.0*tol)
+            end
+            if expected_vthe == nothing
+                # Error: no expected input provided
+                println("data tested would be: vthe=", vthe)
+                @test false
+            else
+                @test elementwise_isapprox(vthe, expected_vthe, rtol=tol, atol=0.0)
+            end
+
+            # Iteration counts are fairly inconsistent, but it's good to check that they at
+            # least don't unexpectedly increase by an order of magnitude.
+            # Expected iteration count is from a serial run on Linux.
+            expected_electron_advance_linear_iterations = 48716
+            @test electron_advance_linear_iterations < 2 * expected_electron_advance_linear_iterations
+            if !(electron_advance_linear_iterations < 2 * expected_electron_advance_linear_iterations)
+                println("electron_advance_linear_iterations=$electron_advance_linear_iterations was greater than twice the expected $expected_electron_advance_linear_iterations.")
+            end
         end
     end
 

From 70955ce89edc8dc930a70725d9b0e01e9c476945 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Wed, 27 Nov 2024 22:53:38 +0000
Subject: [PATCH 39/43] When using adaptive timestep, don't force fixed output
 times by default

When running kinetic electron simulations, it can cause problems to take
a very short ion timestep. When writing outputs at exactly fixed output
times, this can happen if the previous timestep happened to end just
before the output time. To avoid the very short step default to writing
output at whatever time the end of the timestep is that exceeds the set
output time. There is an option to force the previous behaviour of a
decreased timestep so that output is written exactly at the nominal
output time.
---
 moment_kinetics/src/input_structs.jl          |  1 +
 moment_kinetics/src/moment_kinetics_input.jl  |  1 +
 moment_kinetics/src/runge_kutta.jl            | 65 +++++++++++++++----
 moment_kinetics/src/time_advance.jl           | 25 +++++--
 .../test/braginskii_electrons_imex_tests.jl   |  1 +
 .../test/recycling_fraction_tests.jl          | 21 +++---
 6 files changed, 86 insertions(+), 28 deletions(-)

diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl
index 682830365..dfc80b3f4 100644
--- a/moment_kinetics/src/input_structs.jl
+++ b/moment_kinetics/src/input_structs.jl
@@ -57,6 +57,7 @@ struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero
     limit_caused_by::Vector{mk_int}
     nwrite_moments::mk_int
     nwrite_dfns::mk_int
+    exact_output_times::Bool
     moments_output_times::Vector{mk_float}
     dfns_output_times::Vector{mk_float}
     type::String
diff --git a/moment_kinetics/src/moment_kinetics_input.jl b/moment_kinetics/src/moment_kinetics_input.jl
index e1008bf0e..c6ff98c85 100644
--- a/moment_kinetics/src/moment_kinetics_input.jl
+++ b/moment_kinetics/src/moment_kinetics_input.jl
@@ -146,6 +146,7 @@ function mk_input(input_dict=OptionsDict(); save_inputs_to_txt=false, ignore_MPI
         CFL_prefactor=-1.0,
         nwrite=1,
         nwrite_dfns=-1,
+        exact_output_times=false,
         type="SSPRK4",
         split_operators=false,
         steady_state_residual=false,
diff --git a/moment_kinetics/src/runge_kutta.jl b/moment_kinetics/src/runge_kutta.jl
index 0c87369b7..772cda8ba 100644
--- a/moment_kinetics/src/runge_kutta.jl
+++ b/moment_kinetics/src/runge_kutta.jl
@@ -1166,10 +1166,25 @@ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms,
             t_params.failure_caused_by[end] += 1
         end
 
-        # If we were trying to take a step to the output timestep, dt will be smaller on
-        # the re-try, so will not reach the output time.
-        t_params.step_to_moments_output[] = false
-        t_params.step_to_dfns_output[] = false
+        if t_params.exact_output_times
+            # If we were trying to take a step to the output timestep, dt will be smaller on
+            # the re-try, so will not reach the output time.
+            t_params.step_to_moments_output[] = false
+            t_params.step_to_dfns_output[] = false
+        else
+            # If with the reduced dt the step will not pass the next output time,
+            # deactivate step_to_*_output[].
+            if (t_params.step_to_moments_output[]
+                && t_params.t[] + t_params.previous_dt[] + t_params.dt[] <
+                   t_params.moments_output_times[t_params.moments_output_counter[]])
+                t_params.step_to_moments_output[] = false
+            end
+            if (t_params.step_to_dfns_output[]
+                && t_params.t[] + t_params.previous_dt[] + t_params.dt[] <
+                   t_params.dfns_output_times[t_params.dfns_output_counter[]])
+                t_params.step_to_dfns_output[] = false
+            end
+        end
     elseif (error_norm[] > 1.0 || isnan(error_norm[])) && t_params.dt[] > t_params.minimum_dt * (1.0 + 1.0e-13)
         # (1.0 + 1.0e-13) fudge factor accounts for possible rounding errors when
         # t+dt=next_output_time.
@@ -1199,10 +1214,25 @@ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms,
             t_params.failure_caused_by[max_error_variable_index] += 1
         end
 
-        # If we were trying to take a step to the output timestep, dt will be smaller on
-        # the re-try, so will not reach the output time.
-        t_params.step_to_moments_output[] = false
-        t_params.step_to_dfns_output[] = false
+        if t_params.exact_output_times
+            # If we were trying to take a step to the output timestep, dt will be smaller on
+            # the re-try, so will not reach the output time.
+            t_params.step_to_moments_output[] = false
+            t_params.step_to_dfns_output[] = false
+        else
+            # If with the reduced dt the step will not pass the next output time,
+            # deactivate step_to_*_output[].
+            if (t_params.step_to_moments_output[]
+                && t_params.t[] + t_params.previous_dt[] + t_params.dt[] <
+                   t_params.moments_output_times[t_params.moments_output_counter[]])
+                t_params.step_to_moments_output[] = false
+            end
+            if (t_params.step_to_dfns_output[]
+                && t_params.t[] + t_params.previous_dt[] + t_params.dt[] <
+                   t_params.dfns_output_times[t_params.dfns_output_counter[]])
+                t_params.step_to_dfns_output[] = false
+            end
+        end
 
         #println("t=$t, timestep failed, error_norm=$(error_norm[]), error_norms=$error_norms, decreasing timestep to ", t_params.dt[])
     else
@@ -1211,9 +1241,11 @@ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms,
         t_params.previous_dt[] = t_params.dt[]
 
         if t_params.step_to_moments_output[] || t_params.step_to_dfns_output[]
-            # Completed an output step, reset dt to what it was before it was reduced to reach
-            # the output time
-            t_params.dt[] = t_params.dt_before_output[]
+            if !t_params.exact_output_times
+                # Completed an output step, reset dt to what it was before it was reduced to reach
+                # the output time
+                t_params.dt[] = t_params.dt_before_output[]
+            end
 
             if t_params.step_to_moments_output[]
                 t_params.step_to_moments_output[] = false
@@ -1227,7 +1259,8 @@ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms,
             if t_params.dt[] > CFL_limit[]
                 t_params.dt[] = CFL_limit[]
             end
-        else
+        end
+        if !t_params.exact_output_times || !(t_params.write_moments_output[] || t_params.write_dfns_output[])
             # Adjust timestep according to Fehlberg's suggestion
             # (https://en.wikipedia.org/wiki/Runge%E2%80%93Kutta%E2%80%93Fehlberg_method).
             # `step_update_prefactor` is a constant numerical factor to make the estimate
@@ -1337,7 +1370,9 @@ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms,
         && (current_time + t_params.dt[] >= t_params.moments_output_times[t_params.moments_output_counter[]]))
 
         t_params.dt_before_output[] = current_dt
-        t_params.dt[] = t_params.moments_output_times[t_params.moments_output_counter[]] - current_time
+        if t_params.exact_output_times
+            t_params.dt[] = t_params.moments_output_times[t_params.moments_output_counter[]] - current_time
+        end
         t_params.step_to_moments_output[] = true
 
         if t_params.dt[] < 0.0
@@ -1352,7 +1387,9 @@ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms,
         && (current_time + t_params.dt[] >= t_params.dfns_output_times[t_params.dfns_output_counter[]]))
 
         t_params.dt_before_output[] = current_dt
-        t_params.dt[] = t_params.dfns_output_times[t_params.dfns_output_counter[]] - current_time
+        if t_params.exact_output_times
+            t_params.dt[] = t_params.dfns_output_times[t_params.dfns_output_counter[]] - current_time
+        end
         t_params.step_to_dfns_output[] = true
 
         if t_params.dt[] < 0.0
diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl
index 8f2d7095b..d2c0e295a 100644
--- a/moment_kinetics/src/time_advance.jl
+++ b/moment_kinetics/src/time_advance.jl
@@ -355,7 +355,7 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload,
 
     end_time = mk_float(code_time + t_input["dt"] * t_input["nstep"])
     epsilon = 1.e-11
-    if adaptive || t_input["write_after_fixed_step_count"]
+    if adaptive && !t_input["write_after_fixed_step_count"]
         if t_input["nwrite"] == 0
             moments_output_times = [end_time]
         else
@@ -483,11 +483,12 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload,
                      step_to_moments_output, step_to_dfns_output, write_moments_output,
                      write_dfns_output, Ref(0), Ref(0), Ref{mk_float}(0.0), Ref(0),
                      Ref(0), Ref(0), mk_int[], mk_int[], t_input["nwrite"],
-                     t_input["nwrite_dfns"], moments_output_times, dfns_output_times,
-                     t_input["type"], rk_coefs, rk_coefs_implicit,
-                     implicit_coefficient_is_zero, n_rk_stages, rk_order, adaptive,
-                     low_storage, mk_float(t_input["rtol"]), mk_float(t_input["atol"]),
-                     mk_float(t_input["atol_upar"]),
+                     t_input["nwrite_dfns"],
+                     electron !== nothing && t_input["exact_output_times"],
+                     moments_output_times, dfns_output_times, t_input["type"], rk_coefs,
+                     rk_coefs_implicit, implicit_coefficient_is_zero, n_rk_stages,
+                     rk_order, adaptive, low_storage, mk_float(t_input["rtol"]),
+                     mk_float(t_input["atol"]), mk_float(t_input["atol_upar"]),
                      mk_float(t_input["step_update_prefactor"]),
                      mk_float(t_input["max_increase_factor"]),
                      mk_float(t_input["max_increase_factor_near_last_fail"]),
@@ -1878,9 +1879,21 @@ function  time_advance!(pdf, scratch, scratch_implicit, scratch_electron, t_para
             end
             if write_moments
                 t_params.moments_output_counter[] += 1
+                if !t_params.exact_output_times
+                    while (t_params.moments_output_counter[] ≤ length(t_params.moments_output_times)
+                           && t_params.moments_output_times[t_params.moments_output_counter[]] ≤ t_params.t[])
+                        t_params.moments_output_counter[] += 1
+                    end
+                end
             end
             if write_dfns
                 t_params.dfns_output_counter[] += 1
+                if !t_params.exact_output_times
+                    while (t_params.dfns_output_counter[] ≤ length(t_params.dfns_output_times)
+                           && t_params.dfns_output_times[t_params.dfns_output_counter[]] ≤ t_params.t[])
+                        t_params.dfns_output_counter[] += 1
+                    end
+                end
             end
 
             if write_moments || write_dfns || finish_now
diff --git a/moment_kinetics/test/braginskii_electrons_imex_tests.jl b/moment_kinetics/test/braginskii_electrons_imex_tests.jl
index 1104271f3..fb91dfc3c 100644
--- a/moment_kinetics/test/braginskii_electrons_imex_tests.jl
+++ b/moment_kinetics/test/braginskii_electrons_imex_tests.jl
@@ -68,6 +68,7 @@ test_input = OptionsDict( "composition" => OptionsDict("n_ion_species" => 1,
                                                      "minimum_dt" => 1.e-7,
                                                      "rtol" => 1.0e-7,
                                                      "nwrite" => 10000,
+                                                     "exact_output_times" => true,
                                                      "high_precision_error_sum" => true),
                   "nonlinear_solver" => OptionsDict("nonlinear_max_iterations" => 100),
                   "r" => OptionsDict("ngrid" => 1,
diff --git a/moment_kinetics/test/recycling_fraction_tests.jl b/moment_kinetics/test/recycling_fraction_tests.jl
index 04d289637..77b14fc2b 100644
--- a/moment_kinetics/test/recycling_fraction_tests.jl
+++ b/moment_kinetics/test/recycling_fraction_tests.jl
@@ -169,6 +169,11 @@ test_input_adaptive_split3["timestepping"] = recursive_merge(test_input_adaptive
                                                                          "minimum_dt" => 1.0e-7,
                                                                          "step_update_prefactor" => 0.064))
 
+# Test exact_output_times option in full-f/split1/split2 cases
+test_input_adaptive["timestepping"]["exact_output_times"] = true
+test_input_adaptive_split1["timestepping"]["exact_output_times"] = true
+test_input_adaptive_split2["timestepping"]["exact_output_times"] = true
+
 """
 Run a test for a single set of parameters
 """
@@ -341,14 +346,14 @@ function runtests()
         @testset "Adaptive timestep - split 3" begin
             test_input_adaptive_split3["output"]["base_directory"] = test_output_directory
             run_test(test_input_adaptive_split3,
-                     [-0.034623352735472034, -0.03200541773193755, -0.02714032291656093,
-                      -0.020924986472905527, -0.01015057042512689, 0.0027893133203071574,
-                      0.012837899470698978, 0.022096372980618853, 0.0330348469665054,
-                      0.041531828755231016, 0.045382106043818246, 0.046246244563868354,
-                      0.042551970615727366, 0.034815169767529956, 0.027080688565416164,
-                      0.017886490800418996, 0.004784403555306537, -0.007762152788142663,
-                      -0.01629330539573498, -0.02413421820486561, -0.0315621379076817,
-                      -0.03416924694766477], rtol=6.0e-4, atol=2.0e-12)
+                     [-0.0346196925024167, -0.03200201693849987, -0.02713764319615098,
+                      -0.02092311349672712, -0.010150026206894121, 0.0027883420935253572,
+                      0.012835791449600767, 0.02209326318113659, 0.03303078703903627,
+                      0.04152829640863164, 0.04538051487359227, 0.04624543438581702,
+                      0.04254876799453081, 0.03481104153755928, 0.027077084096581314,
+                      0.01788382934269672, 0.00478320487966262, -0.0077618876322877485,
+                      -0.016292009420807548, -0.024131976958124225, -0.031559093785483404,
+                      -0.0341657304695615], rtol=6.0e-4, atol=2.0e-12)
         end
 
         @long @testset "Check other timestep - $type" for

From ae1645870882cf1aa6e1b304f45ca222030db585 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Thu, 28 Nov 2024 13:55:35 +0000
Subject: [PATCH 40/43] Move exact_output_times to different position in
 time_info struct

This seems to avoid compiler errors on macOS. No idea why this should
help, or what the original problem was!
---
 moment_kinetics/src/input_structs.jl |  2 +-
 moment_kinetics/src/time_advance.jl  | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl
index dfc80b3f4..047167195 100644
--- a/moment_kinetics/src/input_structs.jl
+++ b/moment_kinetics/src/input_structs.jl
@@ -57,7 +57,6 @@ struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero
     limit_caused_by::Vector{mk_int}
     nwrite_moments::mk_int
     nwrite_dfns::mk_int
-    exact_output_times::Bool
     moments_output_times::Vector{mk_float}
     dfns_output_times::Vector{mk_float}
     type::String
@@ -66,6 +65,7 @@ struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero
     implicit_coefficient_is_zero::Timpzero
     n_rk_stages::mk_int
     rk_order::mk_int
+    exact_output_times::Bool
     adaptive::Bool
     low_storage::Bool
     rtol::mk_float
diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl
index d2c0e295a..d579c7461 100644
--- a/moment_kinetics/src/time_advance.jl
+++ b/moment_kinetics/src/time_advance.jl
@@ -483,12 +483,12 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload,
                      step_to_moments_output, step_to_dfns_output, write_moments_output,
                      write_dfns_output, Ref(0), Ref(0), Ref{mk_float}(0.0), Ref(0),
                      Ref(0), Ref(0), mk_int[], mk_int[], t_input["nwrite"],
-                     t_input["nwrite_dfns"],
-                     electron !== nothing && t_input["exact_output_times"],
-                     moments_output_times, dfns_output_times, t_input["type"], rk_coefs,
-                     rk_coefs_implicit, implicit_coefficient_is_zero, n_rk_stages,
-                     rk_order, adaptive, low_storage, mk_float(t_input["rtol"]),
-                     mk_float(t_input["atol"]), mk_float(t_input["atol_upar"]),
+                     t_input["nwrite_dfns"], moments_output_times, dfns_output_times,
+                     t_input["type"], rk_coefs, rk_coefs_implicit,
+                     implicit_coefficient_is_zero, n_rk_stages, rk_order,
+                     electron !== nothing && t_input["exact_output_times"], adaptive,
+                     low_storage, mk_float(t_input["rtol"]), mk_float(t_input["atol"]),
+                     mk_float(t_input["atol_upar"]),
                      mk_float(t_input["step_update_prefactor"]),
                      mk_float(t_input["max_increase_factor"]),
                      mk_float(t_input["max_increase_factor_near_last_fail"]),

From 7ad038ef75f31626d2baedc21264e969275d1bf2 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Tue, 3 Dec 2024 12:18:33 +0000
Subject: [PATCH 41/43] Skip Jacobian matrix tests in macOS parallel tests CI
 job

This test is extremely slow when run in parallel on macOS (maybe the
macOS servers on Github Actions don't have enough memory?), so skip it
in this case to avoid test failures.
---
 moment_kinetics/test/jacobian_matrix_tests.jl | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/moment_kinetics/test/jacobian_matrix_tests.jl b/moment_kinetics/test/jacobian_matrix_tests.jl
index 9bae8296c..b699aa563 100644
--- a/moment_kinetics/test/jacobian_matrix_tests.jl
+++ b/moment_kinetics/test/jacobian_matrix_tests.jl
@@ -10,6 +10,7 @@ using moment_kinetics.array_allocation: allocate_shared_float
 using moment_kinetics.boundary_conditions: enforce_v_boundary_condition_local!,
                                            enforce_vperp_boundary_condition!
 using moment_kinetics.calculus: derivative!
+using moment_kinetics.communication
 using moment_kinetics.derivatives: derivative_z!, derivative_z_pdf_vpavperpz!
 using moment_kinetics.electron_fluid_equations: calculate_electron_qpar_from_pdf_no_r!,
                                                 electron_energy_equation_no_r!,
@@ -3534,6 +3535,11 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2)
 end
 
 function runtests()
+    if Sys.isapple() && "CI" ∈ keys(ENV) && global_size[] > 1
+        # These tests are too slow in the parallel tests job on macOS, so skip in that
+        # case.
+        return nothing
+    end
     # Create a temporary directory for test output
     test_output_directory = get_MPI_tempdir()
     test_input["output"]["base_directory"] = test_output_directory

From 940ca4a8a89e99c6f4952e0125c8da24fa681fc5 Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Wed, 4 Dec 2024 20:21:19 +0000
Subject: [PATCH 42/43] Make kinetic electron test use shared-memory, and ADI,
 when possible

---
 .../test/kinetic_electron_tests.jl            | 216 ++++++------------
 1 file changed, 75 insertions(+), 141 deletions(-)

diff --git a/moment_kinetics/test/kinetic_electron_tests.jl b/moment_kinetics/test/kinetic_electron_tests.jl
index 63c6b6ee2..0b0907c58 100644
--- a/moment_kinetics/test/kinetic_electron_tests.jl
+++ b/moment_kinetics/test/kinetic_electron_tests.jl
@@ -92,7 +92,14 @@ boltzmann_input = OptionsDict(
    )
 
 # Test use distributed-memory when possible
-boltzmann_input["z"]["nelement_local"] = boltzmann_input["z"]["nelement"] ÷ gcd(boltzmann_input["z"]["nelement"], global_size[])
+if global_size[] % 2 == 0
+    # Divide by 2 so that we use shared memory when running in parallel, and so test the
+    # ADI preconditioner.
+    procs_to_divide_by = global_size[] ÷ 2
+else
+    procs_to_divide_by = global_size[]
+end
+boltzmann_input["z"]["nelement_local"] = boltzmann_input["z"]["nelement"] ÷ gcd(boltzmann_input["z"]["nelement"], procs_to_divide_by)
 
 kinetic_input = deepcopy(boltzmann_input)
 kinetic_input["output"]["run_name"] = "kinetic_electron_test"
@@ -180,146 +187,73 @@ function run_test()
             close_run_info(run_info)
 
             # Regression test
-            # Benchmark data generated in serial on Linux
-            if global_size[] == 1
-                # Serial solves use LU preconditioner
-                expected_Ez = [-0.5990683230706185 -1.1053138725180998;
-                               -0.4944296396481284 -0.9819332128466166;
-                               -0.30889032954504736 -0.6745656961983237;
-                               -0.2064830747303776 -0.4459531272930669;
-                               -0.21232457328748663 -0.4253218487528007;
-                               -0.18233875912042674 -0.3596054334022437;
-                               -0.16711429522309232 -0.3021381799340685;
-                               -0.16920776495088916 -0.2784335484692499;
-                               -0.1629417555658927 -0.2612551389558109;
-                               -0.16619150334079993 -0.2574841927015592;
-                               -0.15918194883360942 -0.23740132549636406;
-                               -0.14034706409006803 -0.20534503972256973;
-                               -0.12602184032280567 -0.1827098539044343;
-                               -0.10928716440800472 -0.1582133200686042;
-                               -0.07053969674257217 -0.10145491369831482;
-                               -0.0249577746169536 -0.03585934915825971;
-                               -2.8327303308330514e-15 3.742211718942586e-14;
-                               0.024957774616960776 0.03585934915827381;
-                               0.07053969674257636 0.10145491369829167;
-                               0.10928716440799909 0.15821332006862954;
-                               0.1260218403227975 0.18270985390445083;
-                               0.1403470640900294 0.20534503972250218;
-                               0.1591819488336015 0.23740132549634094;
-                               0.16619150334082114 0.2574841927015898;
-                               0.16294175556587748 0.261255138955811;
-                               0.16920776495090983 0.2784335484692798;
-                               0.1671142952230893 0.3021381799340713;
-                               0.1823387591204167 0.3596054334022252;
-                               0.21232457328753865 0.4253218487528467;
-                               0.20648307473037922 0.44595312729305947;
-                               0.3088903295450278 0.6745656961983009;
-                               0.4944296396481271 0.9819332128466268;
-                               0.5990683230705801 1.1053138725180645]
-                expected_vthe = [22.654024448490784 22.494016350356883;
-                                 23.744503682730446 23.61361063067715;
-                                 25.26061134578617 25.173128418725682;
-                                 26.177253875120066 26.122412383901523;
-                                 26.510545637302872 26.47158368991228;
-                                 26.798827552847246 26.77429043464489;
-                                 27.202535498354287 27.2038739551587;
-                                 27.506373594650846 27.529813468465488;
-                                 27.631027625644876 27.664719606410365;
-                                 27.750902611036295 27.793759280909274;
-                                 27.935780521313532 27.992775960575692;
-                                 28.089380398280714 28.157198480516957;
-                                 28.15152314377127 28.223553488629253;
-                                 28.211115085781678 28.2870195116558;
-                                 28.28856778918977 28.369130039283018;
-                                 28.330972960680672 28.41411592647979;
-                                 28.33351348538364 28.416680586218863;
-                                 28.330972960680675 28.41411592647976;
-                                 28.288567789189763 28.369130039283064;
-                                 28.211115085781678 28.287019511655785;
-                                 28.15152314377127 28.223553488629236;
-                                 28.089380398280724 28.157198480516957;
-                                 27.93578052131354 27.992775960575713;
-                                 27.750902611036295 27.79375928090935;
-                                 27.63102762564488 27.664719606410383;
-                                 27.506373594650853 27.529813468465495;
-                                 27.202535498354287 27.2038739551587;
-                                 26.79882755284725 26.774290434644872;
-                                 26.510545637302886 26.471583689912283;
-                                 26.177253875120083 26.122412383901523;
-                                 25.26061134578619 25.173128418725696;
-                                 23.744503682730446 23.613610630677236;
-                                 22.65402444849082 22.494016350356937]
-            else
-                # Parallel solves, which here use only shared-memory parallelism, use the ADI
-                # preconditioner, which should be as accurate, but may give different results
-                # within Newton-Krylov tolerances.
-                expected_Ez = [-0.5990683230706185 -1.1053137071260657;
-                               -0.4944296396481284 -0.9819330928307715;
-                               -0.30889032954504736 -0.6745656725019216;
-                               -0.2064830747303776 -0.44595313784207047;
-                               -0.21232457328748663 -0.425321828548;
-                               -0.18233875912042674 -0.3596054340570364;
-                               -0.16711429522309232 -0.30213818089568956;
-                               -0.16920776495088916 -0.27843354821637;
-                               -0.1629417555658927 -0.2612551385019989;
-                               -0.16619150334079993 -0.2574841930766524;
-                               -0.15918194883360942 -0.23740132557788143;
-                               -0.14034706409006803 -0.20534504018275174;
-                               -0.12602184032280567 -0.18270985430997166;
-                               -0.10928716440800472 -0.1582133189704785;
-                               -0.07053969674257217 -0.101454914566153;
-                               -0.0249577746169536 -0.035859347929368034;
-                               -2.8327303308330514e-15 -4.536628997349189e-9;
-                               0.024957774616960776 0.035859348624052545;
-                               0.07053969674257636 0.10145491474282464;
-                               0.10928716440799909 0.15821331955573922;
-                               0.1260218403227975 0.18270985667178208;
-                               0.1403470640900294 0.2053450392202274;
-                               0.1591819488336015 0.23740132578753803;
-                               0.16619150334082114 0.25748419283426127;
-                               0.16294175556587748 0.2612551396310432;
-                               0.16920776495090983 0.2784335479625835;
-                               0.1671142952230893 0.3021381809909585;
-                               0.1823387591204167 0.35960543399747075;
-                               0.21232457328753865 0.4253218286915096;
-                               0.20648307473037922 0.44595313782295487;
-                               0.3088903295450278 0.6745656725300222;
-                               0.4944296396481271 0.9819330927685747;
-                               0.5990683230705801 1.1053137082172033]
-                expected_vthe = [22.654024454479018 22.494016869931663;
-                                 23.74450367962989 23.61361086266046;
-                                 25.260611341892094 25.173128419566062;
-                                 26.17725387357487 26.122412390676395;
-                                 26.510545632956767 26.47158369227529;
-                                 26.7988275507785 26.774290427357606;
-                                 27.20253549703805 27.20387395613098;
-                                 27.506373594719115 27.529813465559865;
-                                 27.63102762567087 27.6647196112545;
-                                 27.75090260968854 27.79375927764987;
-                                 27.935780521822277 27.992775962652605;
-                                 28.08938039775227 28.157198478502867;
-                                 28.151523156278788 28.223553495610926;
-                                 28.211115080270424 28.28701950947455;
-                                 28.288567793141777 28.369130040934596;
-                                 28.330972955353705 28.414115925374524;
-                                 28.333513456094945 28.41668058720323;
-                                 28.330972961606466 28.414115929999316;
-                                 28.288567792143006 28.369130041232697;
-                                 28.211115083430062 28.287019512466056;
-                                 28.15152314952673 28.223553491119628;
-                                 28.089380398299795 28.157198479157458;
-                                 27.93578052229754 27.99277596224337;
-                                 27.750902609816293 27.79375927871885;
-                                 27.631027625671482 27.664719609967122;
-                                 27.50637359506551 27.52981346582775;
-                                 27.20253549697429 27.203873955958308;
-                                 26.798827550864885 26.77429042759387;
-                                 26.510545632587316 26.471583691722795;
-                                 26.177253873758893 26.122412390844207;
-                                 25.26061134158348 25.17312841929966;
-                                 23.7445036798294 23.613610862832093;
-                                 22.654024453873603 22.494016869407307]
-            end
+            # Benchmark data generated in serial on Linux with the LU preconditioner
+            expected_Ez = [-0.5990683230706185 -1.1053138725180998;
+                           -0.4944296396481284 -0.9819332128466166;
+                           -0.30889032954504736 -0.6745656961983237;
+                           -0.2064830747303776 -0.4459531272930669;
+                           -0.21232457328748663 -0.4253218487528007;
+                           -0.18233875912042674 -0.3596054334022437;
+                           -0.16711429522309232 -0.3021381799340685;
+                           -0.16920776495088916 -0.2784335484692499;
+                           -0.1629417555658927 -0.2612551389558109;
+                           -0.16619150334079993 -0.2574841927015592;
+                           -0.15918194883360942 -0.23740132549636406;
+                           -0.14034706409006803 -0.20534503972256973;
+                           -0.12602184032280567 -0.1827098539044343;
+                           -0.10928716440800472 -0.1582133200686042;
+                           -0.07053969674257217 -0.10145491369831482;
+                           -0.0249577746169536 -0.03585934915825971;
+                           -2.8327303308330514e-15 3.742211718942586e-14;
+                           0.024957774616960776 0.03585934915827381;
+                           0.07053969674257636 0.10145491369829167;
+                           0.10928716440799909 0.15821332006862954;
+                           0.1260218403227975 0.18270985390445083;
+                           0.1403470640900294 0.20534503972250218;
+                           0.1591819488336015 0.23740132549634094;
+                           0.16619150334082114 0.2574841927015898;
+                           0.16294175556587748 0.261255138955811;
+                           0.16920776495090983 0.2784335484692798;
+                           0.1671142952230893 0.3021381799340713;
+                           0.1823387591204167 0.3596054334022252;
+                           0.21232457328753865 0.4253218487528467;
+                           0.20648307473037922 0.44595312729305947;
+                           0.3088903295450278 0.6745656961983009;
+                           0.4944296396481271 0.9819332128466268;
+                           0.5990683230705801 1.1053138725180645]
+            expected_vthe = [22.654024448490784 22.494016350356883;
+                             23.744503682730446 23.61361063067715;
+                             25.26061134578617 25.173128418725682;
+                             26.177253875120066 26.122412383901523;
+                             26.510545637302872 26.47158368991228;
+                             26.798827552847246 26.77429043464489;
+                             27.202535498354287 27.2038739551587;
+                             27.506373594650846 27.529813468465488;
+                             27.631027625644876 27.664719606410365;
+                             27.750902611036295 27.793759280909274;
+                             27.935780521313532 27.992775960575692;
+                             28.089380398280714 28.157198480516957;
+                             28.15152314377127 28.223553488629253;
+                             28.211115085781678 28.2870195116558;
+                             28.28856778918977 28.369130039283018;
+                             28.330972960680672 28.41411592647979;
+                             28.33351348538364 28.416680586218863;
+                             28.330972960680675 28.41411592647976;
+                             28.288567789189763 28.369130039283064;
+                             28.211115085781678 28.287019511655785;
+                             28.15152314377127 28.223553488629236;
+                             28.089380398280724 28.157198480516957;
+                             27.93578052131354 27.992775960575713;
+                             27.750902611036295 27.79375928090935;
+                             27.63102762564488 27.664719606410383;
+                             27.506373594650853 27.529813468465495;
+                             27.202535498354287 27.2038739551587;
+                             26.79882755284725 26.774290434644872;
+                             26.510545637302886 26.471583689912283;
+                             26.177253875120083 26.122412383901523;
+                             25.26061134578619 25.173128418725696;
+                             23.744503682730446 23.613610630677236;
+                             22.65402444849082 22.494016350356937]
 
             if expected_Ez == nothing
                 # Error: no expected input provided

From bda160b2dd7fcba9f29a42ff05136ba8411cb97a Mon Sep 17 00:00:00 2001
From: John Omotani <john.omotani@ukaea.uk>
Date: Wed, 4 Dec 2024 19:57:59 +0000
Subject: [PATCH 43/43] Communicate block boundary points after each ADI
 iteration

This might help to remove some of the performance loss due to inverting
the preconditioner separately in each distributed-MPI block.
---
 .../src/electron_kinetic_equation.jl          |  83 +++---
 .../test/kinetic_electron_tests.jl            | 252 ++++++++++--------
 2 files changed, 182 insertions(+), 153 deletions(-)

diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
index fad344e69..49b5e81fb 100644
--- a/moment_kinetics/src/electron_kinetic_equation.jl
+++ b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -1245,6 +1245,50 @@ global_rank[] == 0 && println("recalculating precon")
                     v_size = vperp.n * vpa.n
                     pdf_size = z.n * v_size
 
+                    # Use these views to communicate block-boundary points
+                    output_buffer_pdf_view = reshape(@view(this_output_buffer[1:pdf_size]), size(precon_f))
+                    output_buffer_ppar_view = @view(this_output_buffer[pdf_size+1:end])
+                    f_lower_endpoints = @view scratch_dummy.buffer_vpavperpr_1[:,:,ir]
+                    f_upper_endpoints = @view scratch_dummy.buffer_vpavperpr_2[:,:,ir]
+                    receive_buffer1 = @view scratch_dummy.buffer_vpavperpr_3[:,:,ir]
+                    receive_buffer2 = @view scratch_dummy.buffer_vpavperpr_4[:,:,ir]
+
+                    function adi_communicate_boundary_points()
+                        # Ensure values of precon_f and precon_ppar are consistent across
+                        # distributed-MPI block boundaries. For precon_f take the upwind
+                        # value, and for precon_ppar take the average.
+                        begin_vperp_vpa_region()
+                        @loop_vperp_vpa ivperp ivpa begin
+                            f_lower_endpoints[ivpa,ivperp] = output_buffer_pdf_view[ivpa,ivperp,1]
+                            f_upper_endpoints[ivpa,ivperp] = output_buffer_pdf_view[ivpa,ivperp,end]
+                        end
+                        # We upwind the z-derivatives in `electron_z_advection!()`, so would
+                        # expect that upwinding the results here in z would make sense.
+                        # However, upwinding here makes convergence much slower (~10x),
+                        # compared to picking the values from one side or other of the block
+                        # boundary, or taking the average of the values on either side.
+                        # Neither direction is special, so taking the average seems most
+                        # sensible (although in an intial test it does not seem to converge
+                        # faster than just picking one or the other).
+                        # Maybe this could indicate that it is more important to have a fully
+                        # self-consistent Jacobian inversion for the
+                        # `electron_vpa_advection()` part rather than taking half(ish) of the
+                        # values from one block and the other half(ish) from the other.
+                        reconcile_element_boundaries_MPI_z_pdf_vpavperpz!(
+                            output_buffer_pdf_view, f_lower_endpoints, f_upper_endpoints, receive_buffer1,
+                            receive_buffer2, z)
+
+                        begin_serial_region()
+                        @serial_region begin
+                            buffer_1[] = output_buffer_ppar_view[1]
+                            buffer_2[] = output_buffer_ppar_view[end]
+                        end
+                        reconcile_element_boundaries_MPI!(
+                            output_buffer_ppar_view, buffer_1, buffer_2, buffer_3, buffer_4, z)
+
+                        return nothing
+                    end
+
                     begin_z_vperp_vpa_region()
                     @loop_z_vperp_vpa iz ivperp ivpa begin
                         row = (iz - 1)*v_size + (ivperp - 1)*vpa.n + ivpa
@@ -1325,12 +1369,15 @@ global_rank[] == 0 && println("recalculating precon")
                     first_adi_v_solve!()
                     fill_intermediate_buffer!()
                     adi_z_solve!()
+                    adi_communicate_boundary_points()
+
                     for n ∈ 1:n_extra_iterations
                         precon_iterations[] += 1
                         fill_intermediate_buffer!()
                         adi_v_solve!()
                         fill_intermediate_buffer!()
                         adi_z_solve!()
+                        adi_communicate_boundary_points()
                     end
 
                     # Unpack preconditioner solution
@@ -1345,42 +1392,6 @@ global_rank[] == 0 && println("recalculating precon")
                         precon_ppar[iz] = this_output_buffer[row]
                     end
 
-                    # Ensure values of precon_f and precon_ppar are consistent across
-                    # distributed-MPI block boundaries. For precon_f take the upwind
-                    # value, and for precon_ppar take the average.
-                    f_lower_endpoints = @view scratch_dummy.buffer_vpavperpr_1[:,:,ir]
-                    f_upper_endpoints = @view scratch_dummy.buffer_vpavperpr_2[:,:,ir]
-                    receive_buffer1 = @view scratch_dummy.buffer_vpavperpr_3[:,:,ir]
-                    receive_buffer2 = @view scratch_dummy.buffer_vpavperpr_4[:,:,ir]
-                    begin_vperp_vpa_region()
-                    @loop_vperp_vpa ivperp ivpa begin
-                        f_lower_endpoints[ivpa,ivperp] = precon_f[ivpa,ivperp,1]
-                        f_upper_endpoints[ivpa,ivperp] = precon_f[ivpa,ivperp,end]
-                    end
-                    # We upwind the z-derivatives in `electron_z_advection!()`, so would
-                    # expect that upwinding the results here in z would make sense.
-                    # However, upwinding here makes convergence much slower (~10x),
-                    # compared to picking the values from one side or other of the block
-                    # boundary, or taking the average of the values on either side.
-                    # Neither direction is special, so taking the average seems most
-                    # sensible (although in an intial test it does not seem to converge
-                    # faster than just picking one or the other).
-                    # Maybe this could indicate that it is more important to have a fully
-                    # self-consistent Jacobian inversion for the
-                    # `electron_vpa_advection()` part rather than taking half(ish) of the
-                    # values from one block and the other half(ish) from the other.
-                    reconcile_element_boundaries_MPI_z_pdf_vpavperpz!(
-                        precon_f, f_lower_endpoints, f_upper_endpoints, receive_buffer1,
-                        receive_buffer2, z)
-
-                    begin_serial_region()
-                    @serial_region begin
-                        buffer_1[] = precon_ppar[1]
-                        buffer_2[] = precon_ppar[end]
-                    end
-                    reconcile_element_boundaries_MPI!(
-                        precon_ppar, buffer_1, buffer_2, buffer_3, buffer_4, z)
-
                     return nothing
                 end
 
diff --git a/moment_kinetics/test/kinetic_electron_tests.jl b/moment_kinetics/test/kinetic_electron_tests.jl
index 0b0907c58..60ef83ae6 100644
--- a/moment_kinetics/test/kinetic_electron_tests.jl
+++ b/moment_kinetics/test/kinetic_electron_tests.jl
@@ -153,130 +153,148 @@ function run_test()
         run_moment_kinetics(this_boltzmann_input)
     end
 
-    for (this_kinetic_input, label, tol) ∈ ((deepcopy(kinetic_input), "", 1.0e-6),
-                                             (deepcopy(kinetic_input_adaptive_timestep), "adaptive timestep", 1.0e-4))
-        # Provide some progress info
-        println("    - testing kinetic electrons $label")
+    if ("nelement_local" ∈ keys(kinetic_input["z"])
+        && kinetic_input["z"]["nelement"] ÷ kinetic_input["z"]["nelement_local"] < global_size[]
+       )
+        # Using shared-memory parallelism, so should be using ADI preconditioner
+        adi_precon_iterations_values = (1,2)
+    else
+        adi_precon_iterations_values = -1
+    end
 
+    for (this_kinetic_input, label, tol) ∈ ((deepcopy(kinetic_input), "", 1.0e-6),
+                                             (deepcopy(kinetic_input_adaptive_timestep), ", adaptive timestep", 1.0e-4))
         this_kinetic_input["output"]["base_directory"] = test_output_directory
 
-        # Suppress console output while running.
-        quietoutput() do
-            restart_from_directory = joinpath(this_boltzmann_input["output"]["base_directory"], this_boltzmann_input["output"]["run_name"])
-            restart_from_file_pattern = this_boltzmann_input["output"]["run_name"] * ".dfns*.h5"
-            restart_from_file = glob(restart_from_file_pattern, restart_from_directory)[1]
-
-            # run kinetic electron simulation
-            run_moment_kinetics(this_kinetic_input; restart=restart_from_file)
-        end
-
-        if global_rank[] == 0
-            # Load and analyse output
-            #########################
-
-            path = joinpath(realpath(this_kinetic_input["output"]["base_directory"]), this_kinetic_input["output"]["run_name"])
-
-            # open the output file(s)
-            run_info = get_run_info_no_setup(path, dfns=true)
-
-            # load fields data
-            Ez = postproc_load_variable(run_info, "Ez")[:,1,:]
-            vthe = postproc_load_variable(run_info, "electron_thermal_speed")[:,1,:]
-            electron_advance_linear_iterations = postproc_load_variable(run_info, "electron_advance_linear_iterations")[end]
-
-            close_run_info(run_info)
-
-            # Regression test
-            # Benchmark data generated in serial on Linux with the LU preconditioner
-            expected_Ez = [-0.5990683230706185 -1.1053138725180998;
-                           -0.4944296396481284 -0.9819332128466166;
-                           -0.30889032954504736 -0.6745656961983237;
-                           -0.2064830747303776 -0.4459531272930669;
-                           -0.21232457328748663 -0.4253218487528007;
-                           -0.18233875912042674 -0.3596054334022437;
-                           -0.16711429522309232 -0.3021381799340685;
-                           -0.16920776495088916 -0.2784335484692499;
-                           -0.1629417555658927 -0.2612551389558109;
-                           -0.16619150334079993 -0.2574841927015592;
-                           -0.15918194883360942 -0.23740132549636406;
-                           -0.14034706409006803 -0.20534503972256973;
-                           -0.12602184032280567 -0.1827098539044343;
-                           -0.10928716440800472 -0.1582133200686042;
-                           -0.07053969674257217 -0.10145491369831482;
-                           -0.0249577746169536 -0.03585934915825971;
-                           -2.8327303308330514e-15 3.742211718942586e-14;
-                           0.024957774616960776 0.03585934915827381;
-                           0.07053969674257636 0.10145491369829167;
-                           0.10928716440799909 0.15821332006862954;
-                           0.1260218403227975 0.18270985390445083;
-                           0.1403470640900294 0.20534503972250218;
-                           0.1591819488336015 0.23740132549634094;
-                           0.16619150334082114 0.2574841927015898;
-                           0.16294175556587748 0.261255138955811;
-                           0.16920776495090983 0.2784335484692798;
-                           0.1671142952230893 0.3021381799340713;
-                           0.1823387591204167 0.3596054334022252;
-                           0.21232457328753865 0.4253218487528467;
-                           0.20648307473037922 0.44595312729305947;
-                           0.3088903295450278 0.6745656961983009;
-                           0.4944296396481271 0.9819332128466268;
-                           0.5990683230705801 1.1053138725180645]
-            expected_vthe = [22.654024448490784 22.494016350356883;
-                             23.744503682730446 23.61361063067715;
-                             25.26061134578617 25.173128418725682;
-                             26.177253875120066 26.122412383901523;
-                             26.510545637302872 26.47158368991228;
-                             26.798827552847246 26.77429043464489;
-                             27.202535498354287 27.2038739551587;
-                             27.506373594650846 27.529813468465488;
-                             27.631027625644876 27.664719606410365;
-                             27.750902611036295 27.793759280909274;
-                             27.935780521313532 27.992775960575692;
-                             28.089380398280714 28.157198480516957;
-                             28.15152314377127 28.223553488629253;
-                             28.211115085781678 28.2870195116558;
-                             28.28856778918977 28.369130039283018;
-                             28.330972960680672 28.41411592647979;
-                             28.33351348538364 28.416680586218863;
-                             28.330972960680675 28.41411592647976;
-                             28.288567789189763 28.369130039283064;
-                             28.211115085781678 28.287019511655785;
-                             28.15152314377127 28.223553488629236;
-                             28.089380398280724 28.157198480516957;
-                             27.93578052131354 27.992775960575713;
-                             27.750902611036295 27.79375928090935;
-                             27.63102762564488 27.664719606410383;
-                             27.506373594650853 27.529813468465495;
-                             27.202535498354287 27.2038739551587;
-                             26.79882755284725 26.774290434644872;
-                             26.510545637302886 26.471583689912283;
-                             26.177253875120083 26.122412383901523;
-                             25.26061134578619 25.173128418725696;
-                             23.744503682730446 23.613610630677236;
-                             22.65402444849082 22.494016350356937]
-
-            if expected_Ez == nothing
-                # Error: no expected input provided
-                println("data tested would be: Ez=", Ez)
-                @test false
+        for adi_precon_iterations ∈ adi_precon_iterations_values
+            if adi_precon_iterations < 0
+                # Provide some progress info
+                println("    - testing kinetic electrons $label")
             else
-                @test elementwise_isapprox(Ez, expected_Ez, rtol=0.0, atol=2.0*tol)
+                this_kinetic_input["nonlinear_solver"]["adi_precon_iterations"] = adi_precon_iterations
+
+                # Provide some progress info
+                println("    - testing kinetic electrons $adi_precon_iterations ADI iterations$label")
             end
-            if expected_vthe == nothing
-                # Error: no expected input provided
-                println("data tested would be: vthe=", vthe)
-                @test false
-            else
-                @test elementwise_isapprox(vthe, expected_vthe, rtol=tol, atol=0.0)
+
+            # Suppress console output while running.
+            quietoutput() do
+                restart_from_directory = joinpath(this_boltzmann_input["output"]["base_directory"], this_boltzmann_input["output"]["run_name"])
+                restart_from_file_pattern = this_boltzmann_input["output"]["run_name"] * ".dfns*.h5"
+                restart_from_file = glob(restart_from_file_pattern, restart_from_directory)[1]
+
+                # run kinetic electron simulation
+                run_moment_kinetics(this_kinetic_input; restart=restart_from_file)
             end
 
-            # Iteration counts are fairly inconsistent, but it's good to check that they at
-            # least don't unexpectedly increase by an order of magnitude.
-            # Expected iteration count is from a serial run on Linux.
-            expected_electron_advance_linear_iterations = 48716
-            @test electron_advance_linear_iterations < 2 * expected_electron_advance_linear_iterations
-            if !(electron_advance_linear_iterations < 2 * expected_electron_advance_linear_iterations)
-                println("electron_advance_linear_iterations=$electron_advance_linear_iterations was greater than twice the expected $expected_electron_advance_linear_iterations.")
+            if global_rank[] == 0
+                # Load and analyse output
+                #########################
+
+                path = joinpath(realpath(this_kinetic_input["output"]["base_directory"]), this_kinetic_input["output"]["run_name"])
+
+                # open the output file(s)
+                run_info = get_run_info_no_setup(path, dfns=true)
+
+                # load fields data
+                Ez = postproc_load_variable(run_info, "Ez")[:,1,:]
+                vthe = postproc_load_variable(run_info, "electron_thermal_speed")[:,1,:]
+                electron_advance_linear_iterations = postproc_load_variable(run_info, "electron_advance_linear_iterations")[end]
+
+                close_run_info(run_info)
+
+                # Regression test
+                # Benchmark data generated in serial on Linux with the LU preconditioner
+                expected_Ez = [-0.5990683230706185 -1.1053138725180998;
+                               -0.4944296396481284 -0.9819332128466166;
+                               -0.30889032954504736 -0.6745656961983237;
+                               -0.2064830747303776 -0.4459531272930669;
+                               -0.21232457328748663 -0.4253218487528007;
+                               -0.18233875912042674 -0.3596054334022437;
+                               -0.16711429522309232 -0.3021381799340685;
+                               -0.16920776495088916 -0.2784335484692499;
+                               -0.1629417555658927 -0.2612551389558109;
+                               -0.16619150334079993 -0.2574841927015592;
+                               -0.15918194883360942 -0.23740132549636406;
+                               -0.14034706409006803 -0.20534503972256973;
+                               -0.12602184032280567 -0.1827098539044343;
+                               -0.10928716440800472 -0.1582133200686042;
+                               -0.07053969674257217 -0.10145491369831482;
+                               -0.0249577746169536 -0.03585934915825971;
+                               -2.8327303308330514e-15 3.742211718942586e-14;
+                               0.024957774616960776 0.03585934915827381;
+                               0.07053969674257636 0.10145491369829167;
+                               0.10928716440799909 0.15821332006862954;
+                               0.1260218403227975 0.18270985390445083;
+                               0.1403470640900294 0.20534503972250218;
+                               0.1591819488336015 0.23740132549634094;
+                               0.16619150334082114 0.2574841927015898;
+                               0.16294175556587748 0.261255138955811;
+                               0.16920776495090983 0.2784335484692798;
+                               0.1671142952230893 0.3021381799340713;
+                               0.1823387591204167 0.3596054334022252;
+                               0.21232457328753865 0.4253218487528467;
+                               0.20648307473037922 0.44595312729305947;
+                               0.3088903295450278 0.6745656961983009;
+                               0.4944296396481271 0.9819332128466268;
+                               0.5990683230705801 1.1053138725180645]
+                expected_vthe = [22.654024448490784 22.494016350356883;
+                                 23.744503682730446 23.61361063067715;
+                                 25.26061134578617 25.173128418725682;
+                                 26.177253875120066 26.122412383901523;
+                                 26.510545637302872 26.47158368991228;
+                                 26.798827552847246 26.77429043464489;
+                                 27.202535498354287 27.2038739551587;
+                                 27.506373594650846 27.529813468465488;
+                                 27.631027625644876 27.664719606410365;
+                                 27.750902611036295 27.793759280909274;
+                                 27.935780521313532 27.992775960575692;
+                                 28.089380398280714 28.157198480516957;
+                                 28.15152314377127 28.223553488629253;
+                                 28.211115085781678 28.2870195116558;
+                                 28.28856778918977 28.369130039283018;
+                                 28.330972960680672 28.41411592647979;
+                                 28.33351348538364 28.416680586218863;
+                                 28.330972960680675 28.41411592647976;
+                                 28.288567789189763 28.369130039283064;
+                                 28.211115085781678 28.287019511655785;
+                                 28.15152314377127 28.223553488629236;
+                                 28.089380398280724 28.157198480516957;
+                                 27.93578052131354 27.992775960575713;
+                                 27.750902611036295 27.79375928090935;
+                                 27.63102762564488 27.664719606410383;
+                                 27.506373594650853 27.529813468465495;
+                                 27.202535498354287 27.2038739551587;
+                                 26.79882755284725 26.774290434644872;
+                                 26.510545637302886 26.471583689912283;
+                                 26.177253875120083 26.122412383901523;
+                                 25.26061134578619 25.173128418725696;
+                                 23.744503682730446 23.613610630677236;
+                                 22.65402444849082 22.494016350356937]
+
+                if expected_Ez == nothing
+                    # Error: no expected input provided
+                    println("data tested would be: Ez=", Ez)
+                    @test false
+                else
+                    @test elementwise_isapprox(Ez, expected_Ez, rtol=0.0, atol=2.0*tol)
+                end
+                if expected_vthe == nothing
+                    # Error: no expected input provided
+                    println("data tested would be: vthe=", vthe)
+                    @test false
+                else
+                    @test elementwise_isapprox(vthe, expected_vthe, rtol=tol, atol=0.0)
+                end
+
+                # Iteration counts are fairly inconsistent, but it's good to check that they at
+                # least don't unexpectedly increase by an order of magnitude.
+                # Expected iteration count is from a serial run on Linux.
+                expected_electron_advance_linear_iterations = 48716
+                @test electron_advance_linear_iterations < 2 * expected_electron_advance_linear_iterations
+                if !(electron_advance_linear_iterations < 2 * expected_electron_advance_linear_iterations)
+                    println("electron_advance_linear_iterations=$electron_advance_linear_iterations was greater than twice the expected $expected_electron_advance_linear_iterations.")
+                end
             end
         end
     end