From 67518b0da3c0d86673de7828b98290c75300482d Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 23 Oct 2024 22:58:55 +0100 Subject: [PATCH 01/43] Separate Jacobians for "electron_split_lu" into separate function Cleaner, and might help compile-time. --- .../src/electron_kinetic_equation.jl | 354 ++++++++++-------- 1 file changed, 190 insertions(+), 164 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 7359576e2..83664c4cf 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -800,7 +800,6 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos if nl_solver_params.solves_since_precon_update[] ≥ nl_solver_params.preconditioner_update_interval nl_solver_params.solves_since_precon_update[] = 0 - dt = t_params.dt[] vth = @view moments.electron.vth[:,ir] me = composition.me_over_mi dens = @view moments.electron.dens[:,ir] @@ -825,171 +824,16 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos begin_vperp_vpa_region() update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) @loop_vperp_vpa ivperp ivpa begin - z_matrix = allocate_float(z.n, z.n) - z_matrix .= 0.0 - - z_speed = @view z_advect[1].speed[:,ivpa,ivperp,ir] - for ielement ∈ 1:z.nelement_local - imin = z.imin[ielement] - (ielement != 1) - imax = z.imax[ielement] - if ielement == 1 - z_matrix[imin,imin:imax] .+= z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement] - else - if z_speed[imin] < 0.0 - z_matrix[imin,imin:imax] .+= z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement] - elseif z_speed[imin] > 0.0 - # Do nothing - else - z_matrix[imin,imin:imax] .+= 0.5 .* z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement] - end - end - z_matrix[imin+1:imax-1,imin:imax] .+= z_spectral.lobatto.Dmat[2:end-1,:] ./ z.element_scale[ielement] - if ielement == z.nelement_local - z_matrix[imax,imin:imax] .+= z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement] - else - if z_speed[imax] < 0.0 - # Do nothing - elseif z_speed[imax] > 0.0 - z_matrix[imax,imin:imax] .+= z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement] - else - z_matrix[imax,imin:imax] .+= 0.5 .* z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement] - end - end - end - # Multiply by advection speed - for row ∈ 1:z.n - z_matrix[row,:] .*= dt * z_speed[row] - end - - # Diagonal entries - for row ∈ 1:z.n - z_matrix[row,row] += 1.0 - - # Terms from `add_contribution_from_pdf_term!()` - z_matrix[row,row] += dt * (0.5 * dqpar_dz[row] / ppar[row] - + vpa.grid[ivpa] * vth[row] * (ddens_dz[row] / dens[row] - - dvth_dz[row] / vth[row])) - end - if external_source_settings.electron.active - for row ∈ 1:z.n - # Source terms from `add_contribution_from_pdf_term!()` - z_matrix[row,row] += dt * (1.5 * source_density_amplitude[row] / dens[row] - - (0.5 * source_pressure_amplitude[row] - + source_momentum_amplitude[row]) / ppar[row] - ) - end - if external_source_settings.electron.source_type == "energy" - for row ∈ 1:z.n - # Contribution from `external_electron_source!()` - z_matrix[row,row] += dt * source_amplitude[row] - end - end - end - if collisions.krook.nuee0 > 0.0 || collisions.krook.nuei0 > 0.0 - for row ∈ 1:z.n - # Contribution from electron_krook_collisions!() - nu_ee = get_collision_frequency_ee(collisions, dens[row], vth[row]) - nu_ei = get_collision_frequency_ei(collisions, dens[row], vth[row]) - z_matrix[row,row] += dt * (nu_ee + nu_ei) - end - end - + z_matrix, ppar_matrix = get_electron_split_Jacobians!( + ivperp, ivpa, ppar, moments, collisions, composition, z, + vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, + z_advect, vpa_advect, scratch_dummy, + external_source_settings, num_diss_params, t_params, ion_dt, + ir, evolve_ppar) @timeit_debug global_timer "lu" nl_solver_params.preconditioners.z[ivpa,ivperp,ir] = lu(sparse(z_matrix)) - end - - if z.irank == 0 - ppar_matrix = allocate_float(z.n, z.n) - ppar_matrix .= 0.0 - - if composition.electron_physics == kinetic_electrons_with_temperature_equation - error("kinetic_electrons_with_temperature_equation not " - * "supported yet in preconditioner") - elseif composition.electron_physics != kinetic_electrons - error("Unsupported electron_physics=$(composition.electron_physics) " - * "in electron_backward_euler!() preconditioner.") - end - - # Reconstruct w_∥^3 moment of g_e from already-calculated qpar - @views third_moment = @. 0.5 * moments.electron.qpar[:,ir] / electron_ppar_new / vth - - # Note that as - # qpar = 2 * ppar * vth * third_moment - # = 2 * ppar^(3/2) / dens^(1/2) / me^(1/2) * third_moment - # we have that - # d(qpar)/dz = 2 * ppar^(3/2) / dens^(1/2) / me^(1/2) * d(third_moment)/dz - # - ppar^(3/2) / dens^(3/2) / me^(1/2) * third_moment * d(dens)/dz - # + 3 * ppar^(1/2) / dens^(1/2) / me^(1/2) * third_moment * d(ppar)/dz - # so for the Jacobian - # d[d(qpar)/dz)]/d[ppar] - # = 3 * ppar^(1/2) / dens^(1/2) / me^(1/2) * d(third_moment)/dz - # - 3/2 * ppar^(1/2) / dens^(3/2) / me^(1/2) * third_moment * d(dens)/dz - # + 3/2 / ppar^(1/2) / dens^(1/2) / me^(1/2) * third_moment * d(ppar)/dz - # + 3 * ppar^(1/2) / dens^(1/2) / me^(1/2) * third_moment * d(.)/dz - dthird_moment_dz = z.scratch2 - derivative_z!(z.scratch2, third_moment, buffer_1, buffer_2, - buffer_3, buffer_4, z_spectral, z) - - # Diagonal terms - for row ∈ 1:z.n - ppar_matrix[row,row] = 1.0 - - # 3*ppar*dupar_dz - ppar_matrix[row,row] += 3.0 * dt * dupar_dz[row] - - # terms from d(qpar)/dz - ppar_matrix[row,row] += - dt * (3.0 * sqrt(electron_ppar_new[row] / dens[row] / me) * dthird_moment_dz[row] - - 1.5 * sqrt(electron_ppar_new[row] / me) / dens[row]^1.5 * third_moment[row] * ddens_dz[row] - + 1.5 / sqrt(electron_ppar_new[row] / dens[row] / me) * third_moment[row] * dppar_dz[row]) - end - if ion_dt !== nothing - # Backward-Euler forcing term - for row ∈ 1:z.n - ppar_matrix[row,row] += dt / ion_dt - end - end - - - # d(.)/dz terms - # Note that the z-derivative matrix is local to this block, and - # for the preconditioner we do not include any distributed-MPI - # communication (we rely on the JFNK iteration to sort out the - # coupling between blocks). - if !isa(z_spectral, gausslegendre_info) - error("Only gausslegendre_pseudospectral coordinate type is " - * "supported by electron_backward_euler!() " - * "preconditioner because we need differentiation" - * "matrices.") - end - z_deriv_matrix = z_spectral.D_matrix - for row ∈ 1:z.n - @. ppar_matrix[row,:] += - dt * (upar[row] - + 3.0 * sqrt(electron_ppar_new[row] / dens[row] / me) * third_moment[row]) * - z_deriv_matrix[row,:] + if ivperp == 1 && ivpa == 1 + @timeit_debug global_timer "lu" nl_solver_params.preconditioners.ppar[ir] = lu(sparse(ppar_matrix)) end - - if num_diss_params.electron.moment_dissipation_coefficient > 0.0 - error("z-diffusion of electron_ppar not yet supported in " - * "preconditioner") - end - if collisions.nu_ei > 0.0 - error("electron-ion collision terms for electron_ppar not yet " - * "supported in preconditioner") - end - if composition.n_neutral_species > 0 && collisions.charge_exchange_electron > 0.0 - error("electron 'charge exchange' terms for electron_ppar not yet " - * "supported in preconditioner") - end - if composition.n_neutral_species > 0 && collisions.ionization_electron > 0.0 - error("electron ionization terms for electron_ppar not yet " - * "supported in preconditioner") - end - - @timeit_debug global_timer "lu" nl_solver_params.preconditioners.ppar[ir] = lu(sparse(ppar_matrix)) - else - ppar_matrix = allocate_float(0, 0) - ppar_matrix[] = 1.0 end end @@ -3040,6 +2884,188 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati return nothing end +""" + get_electron_split_Jacobians!(ivperp, ivpa, ppar, moments, collisions, composition, + z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, + z_advect, vpa_advect, scratch_dummy, + external_source_settings, num_diss_params, t_params, + ion_dt, ir, evolve_ppar + +Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equation and (if +`evolve_ppar=true`) the electron energy equation. +""" +@timeit global_timer get_electron_split_Jacobians!( + ivperp, ivpa, ppar, moments, collisions, composition, z, + vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, + vpa_advect, scratch_dummy, external_source_settings, + num_diss_params, t_params, ion_dt, ir, evolve_ppar) = begin + + dt = t_params.dt[] + + z_matrix = allocate_float(z.n, z.n) + z_matrix .= 0.0 + + z_speed = @view z_advect[1].speed[:,ivpa,ivperp,ir] + for ielement ∈ 1:z.nelement_local + imin = z.imin[ielement] - (ielement != 1) + imax = z.imax[ielement] + if ielement == 1 + z_matrix[imin,imin:imax] .+= z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement] + else + if z_speed[imin] < 0.0 + z_matrix[imin,imin:imax] .+= z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement] + elseif z_speed[imin] > 0.0 + # Do nothing + else + z_matrix[imin,imin:imax] .+= 0.5 .* z_spectral.lobatto.Dmat[1,:] ./ z.element_scale[ielement] + end + end + z_matrix[imin+1:imax-1,imin:imax] .+= z_spectral.lobatto.Dmat[2:end-1,:] ./ z.element_scale[ielement] + if ielement == z.nelement_local + z_matrix[imax,imin:imax] .+= z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement] + else + if z_speed[imax] < 0.0 + # Do nothing + elseif z_speed[imax] > 0.0 + z_matrix[imax,imin:imax] .+= z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement] + else + z_matrix[imax,imin:imax] .+= 0.5 .* z_spectral.lobatto.Dmat[end,:] ./ z.element_scale[ielement] + end + end + end + # Multiply by advection speed + for row ∈ 1:z.n + z_matrix[row,:] .*= dt * z_speed[row] + end + + # Diagonal entries + for row ∈ 1:z.n + z_matrix[row,row] += 1.0 + + # Terms from `add_contribution_from_pdf_term!()` + z_matrix[row,row] += dt * (0.5 * dqpar_dz[row] / ppar[row] + + vpa.grid[ivpa] * vth[row] * (ddens_dz[row] / dens[row] + - dvth_dz[row] / vth[row])) + end + if external_source_settings.electron.active + for row ∈ 1:z.n + # Source terms from `add_contribution_from_pdf_term!()` + z_matrix[row,row] += dt * (1.5 * source_density_amplitude[row] / dens[row] + - (0.5 * source_pressure_amplitude[row] + + source_momentum_amplitude[row]) / ppar[row] + ) + end + if external_source_settings.electron.source_type == "energy" + for row ∈ 1:z.n + # Contribution from `external_electron_source!()` + z_matrix[row,row] += dt * source_amplitude[row] + end + end + end + if collisions.krook.nuee0 > 0.0 || collisions.krook.nuei0 > 0.0 + for row ∈ 1:z.n + # Contribution from electron_krook_collisions!() + nu_ee = get_collision_frequency_ee(collisions, dens[row], vth[row]) + nu_ei = get_collision_frequency_ei(collisions, dens[row], vth[row]) + z_matrix[row,row] += dt * (nu_ee + nu_ei) + end + end + + if z.irank == 0 && ivperp == 1 && ivpa == 1 + ppar_matrix = allocate_float(z.n, z.n) + ppar_matrix .= 0.0 + + if composition.electron_physics == kinetic_electrons_with_temperature_equation + error("kinetic_electrons_with_temperature_equation not " + * "supported yet in preconditioner") + elseif composition.electron_physics != kinetic_electrons + error("Unsupported electron_physics=$(composition.electron_physics) " + * "in electron_backward_euler!() preconditioner.") + end + + # Reconstruct w_∥^3 moment of g_e from already-calculated qpar + @views third_moment = @. 0.5 * moments.electron.qpar[:,ir] / electron_ppar_new / vth + + # Note that as + # qpar = 2 * ppar * vth * third_moment + # = 2 * ppar^(3/2) / dens^(1/2) / me^(1/2) * third_moment + # we have that + # d(qpar)/dz = 2 * ppar^(3/2) / dens^(1/2) / me^(1/2) * d(third_moment)/dz + # - ppar^(3/2) / dens^(3/2) / me^(1/2) * third_moment * d(dens)/dz + # + 3 * ppar^(1/2) / dens^(1/2) / me^(1/2) * third_moment * d(ppar)/dz + # so for the Jacobian + # d[d(qpar)/dz)]/d[ppar] + # = 3 * ppar^(1/2) / dens^(1/2) / me^(1/2) * d(third_moment)/dz + # - 3/2 * ppar^(1/2) / dens^(3/2) / me^(1/2) * third_moment * d(dens)/dz + # + 3/2 / ppar^(1/2) / dens^(1/2) / me^(1/2) * third_moment * d(ppar)/dz + # + 3 * ppar^(1/2) / dens^(1/2) / me^(1/2) * third_moment * d(.)/dz + dthird_moment_dz = z.scratch2 + derivative_z!(z.scratch2, third_moment, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + + # Diagonal terms + for row ∈ 1:z.n + ppar_matrix[row,row] = 1.0 + + # 3*ppar*dupar_dz + ppar_matrix[row,row] += 3.0 * dt * dupar_dz[row] + + # terms from d(qpar)/dz + ppar_matrix[row,row] += + dt * (3.0 * sqrt(electron_ppar_new[row] / dens[row] / me) * dthird_moment_dz[row] + - 1.5 * sqrt(electron_ppar_new[row] / me) / dens[row]^1.5 * third_moment[row] * ddens_dz[row] + + 1.5 / sqrt(electron_ppar_new[row] / dens[row] / me) * third_moment[row] * dppar_dz[row]) + end + if ion_dt !== nothing + # Backward-Euler forcing term + for row ∈ 1:z.n + ppar_matrix[row,row] += dt / ion_dt + end + end + + # d(.)/dz terms + # Note that the z-derivative matrix is local to this block, and + # for the preconditioner we do not include any distributed-MPI + # communication (we rely on the JFNK iteration to sort out the + # coupling between blocks). + if !isa(z_spectral, gausslegendre_info) + error("Only gausslegendre_pseudospectral coordinate type is " + * "supported by electron_backward_euler!() " + * "preconditioner because we need differentiation" + * "matrices.") + end + z_deriv_matrix = z_spectral.D_matrix + for row ∈ 1:z.n + @. ppar_matrix[row,:] += + dt * (upar[row] + + 3.0 * sqrt(electron_ppar_new[row] / dens[row] / me) * third_moment[row]) * + z_deriv_matrix[row,:] + end + + if num_diss_params.electron.moment_dissipation_coefficient > 0.0 + error("z-diffusion of electron_ppar not yet supported in " + * "preconditioner") + end + if collisions.nu_ei > 0.0 + error("electron-ion collision terms for electron_ppar not yet " + * "supported in preconditioner") + end + if composition.n_neutral_species > 0 && collisions.charge_exchange_electron > 0.0 + error("electron 'charge exchange' terms for electron_ppar not yet " + * "supported in preconditioner") + end + if composition.n_neutral_species > 0 && collisions.ionization_electron > 0.0 + error("electron ionization terms for electron_ppar not yet " + * "supported in preconditioner") + end + else + ppar_matrix = allocate_float(0, 0) + ppar_matrix[] = 1.0 + end + + return z_matrix, ppar_matrix +end + #""" #electron_kinetic_equation_residual! calculates the residual of the (time-independent) electron kinetic equation #INPUTS: From 8be9e5aa4c6f800c85b681e191cf15ea524367ad Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 23 Oct 2024 23:11:52 +0100 Subject: [PATCH 02/43] Use Val() types for preconditioner_type Hopefully helps the compiler to remove unneeded branches at compile time. --- moment_kinetics/src/electron_kinetic_equation.jl | 6 +++--- moment_kinetics/src/nonlinear_solvers.jl | 14 +++++++------- moment_kinetics/src/time_advance.jl | 6 +++--- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 83664c4cf..49645690e 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -796,7 +796,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos scratch_dummy, z, z_spectral, num_diss_params.electron.moment_dissipation_coefficient, ir) - if nl_solver_params.preconditioner_type == "electron_split_lu" + if nl_solver_params.preconditioner_type === Val(:electron_split_lu) if nl_solver_params.solves_since_precon_update[] ≥ nl_solver_params.preconditioner_update_interval nl_solver_params.solves_since_precon_update[] = 0 @@ -863,7 +863,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos left_preconditioner = identity right_preconditioner = split_precon! - elseif nl_solver_params.preconditioner_type == "electron_lu" + elseif nl_solver_params.preconditioner_type === Val(:electron_lu) if t_params.dt[] > 1.5 * nl_solver_params.precon_dt[] || t_params.dt[] < 2.0/3.0 * nl_solver_params.precon_dt[] @@ -993,7 +993,7 @@ global_rank[] == 0 && println("recalculating precon") left_preconditioner = identity right_preconditioner = lu_precon! - elseif nl_solver_params.preconditioner_type == "none" + elseif nl_solver_params.preconditioner_type === Val(:none) left_preconditioner = identity right_preconditioner = identity else diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index fa330441c..0958070a8 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -42,7 +42,7 @@ using MPI using SparseArrays using StatsBase: mean -struct nl_solver_info{TH,TV,Tcsg,Tlig,Tprecon} +struct nl_solver_info{TH,TV,Tcsg,Tlig,Tprecon,Tpretype} rtol::mk_float atol::mk_float nonlinear_max_iterations::mk_int @@ -67,7 +67,7 @@ struct nl_solver_info{TH,TV,Tcsg,Tlig,Tprecon} serial_solve::Bool max_nonlinear_iterations_this_step::Base.RefValue{mk_int} max_linear_iterations_this_step::Base.RefValue{mk_int} - preconditioner_type::String + preconditioner_type::Tpretype preconditioner_update_interval::mk_int preconditioners::Tprecon end @@ -83,7 +83,7 @@ for example a preconditioner object for each point in that outer loop. """ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); default_rtol=1.0e-5, default_atol=1.0e-12, serial_solve=false, - electron_ppar_pdf_solve=false, preconditioner_type="none") + electron_ppar_pdf_solve=false, preconditioner_type=Val(:none)) nl_solver_section = set_defaults_and_check_section!( input_dict, "nonlinear_solver"; rtol=default_rtol, @@ -157,12 +157,12 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa end end - if preconditioner_type == "lu" + if preconditioner_type === Val(:lu) # Create dummy LU solver objects so we can create an array for preconditioners. # These will be calculated properly within the time loop. preconditioners = fill(lu(sparse(1.0*I, total_size_coords, total_size_coords)), reverse(outer_coord_sizes)) - elseif preconditioner_type == "electron_split_lu" + elseif preconditioner_type === Val(:electron_split_lu) preconditioners = (z=fill(lu(sparse(1.0*I, coords.z.n, coords.z.n)), tuple(coords.vpa.n, reverse(outer_coord_sizes)...)), vpa=fill(lu(sparse(1.0*I, coords.vpa.n, coords.vpa.n)), @@ -170,7 +170,7 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa ppar=fill(lu(sparse(1.0*I, coords.z.n, coords.z.n)), reverse(outer_coord_sizes)), ) - elseif preconditioner_type == "electron_lu" + elseif preconditioner_type === Val(:electron_lu) pdf_plus_ppar_size = total_size_coords + coords.z.n preconditioners = fill((lu(sparse(1.0*I, 1, 1)), allocate_shared_float(pdf_plus_ppar_size, pdf_plus_ppar_size), @@ -178,7 +178,7 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa allocate_shared_float(pdf_plus_ppar_size), ), reverse(outer_coord_sizes)) - elseif preconditioner_type == "none" + elseif preconditioner_type === Val(:none) preconditioners = nothing else error("Unrecognised preconditioner_type=$preconditioner_type") diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 57ac3167e..30ec91cbd 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -671,7 +671,7 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop default_rtol=t_params.rtol / 10.0, default_atol=t_params.atol / 10.0, electron_ppar_pdf_solve=true, - preconditioner_type="electron_lu") + preconditioner_type=Val(:electron_lu)) nl_solver_ion_advance_params = setup_nonlinear_solve(t_params.implicit_ion_advance, input_dict, (s=composition.n_ion_species, r=r, z=z, vperp=vperp, @@ -679,7 +679,7 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop (); default_rtol=t_params.rtol / 10.0, default_atol=t_params.atol / 10.0, - preconditioner_type="lu") + preconditioner_type=Val(:lu)) # Implicit solve for vpa_advection term should be done in serial, as it will be called # within a parallelised s_r_z_vperp loop. nl_solver_vpa_advection_params = @@ -687,7 +687,7 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop (composition.n_ion_species, r, z, vperp); default_rtol=t_params.rtol / 10.0, default_atol=t_params.atol / 10.0, - serial_solve=true, preconditioner_type="lu") + serial_solve=true, preconditioner_type=Val(:lu)) if nl_solver_ion_advance_params !== nothing && nl_solver_vpa_advection_params !== nothing error("Cannot use implicit_ion_advance and implicit_vpa_advection at the same " From 51c5180c413a03bf6d5def1f7e8e8078bfa724e0 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 24 Oct 2024 12:29:03 +0100 Subject: [PATCH 03/43] Improve type-stability in electron_backward_euler!() Hopefully improves compile/run time. --- .../src/electron_kinetic_equation.jl | 73 ++++++++++--------- moment_kinetics/src/time_advance.jl | 3 +- 2 files changed, 42 insertions(+), 34 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 49645690e..1a7327edd 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -665,7 +665,9 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos reduced_by_ion_dt = false if ion_dt !== nothing - evolve_ppar = true + if !evolve_ppar + error("evolve_ppar must be `true` when `ion_dt` is passed. ion_dt=$ion_dt") + end # Use forward-Euler step (with `ion_dt` as the timestep) as initial guess for # updated electron_ppar @@ -742,7 +744,7 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos t_params.moments_output_counter[], r, z, vperp, vpa) end end - electron_pdf_converged = false + electron_pdf_converged = Ref(false) # No paralleism in r for now - will need to add a specially adapted shared-memory # parallelism scheme to allow it for 2D1V or 2D2V simulations. for ir ∈ 1:r.n @@ -753,11 +755,11 @@ function electron_backward_euler!(scratch, pdf, moments, phi, collisions, compos buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1] # initialise the electron pdf convergence flag to false - electron_pdf_converged = false + electron_pdf_converged[] = false first_step = true # evolve (artificially) in time until the residual is less than the tolerance - while (!electron_pdf_converged + while (!electron_pdf_converged[] && ((max_electron_pdf_iterations !== nothing && t_params.step_counter[] - initial_step_counter < max_electron_pdf_iterations) || (max_electron_sim_time !== nothing && t_params.t[] - initial_time < max_electron_sim_time)) && t_params.dt[] > 0.0 && !isnan(t_params.dt[])) @@ -922,33 +924,33 @@ global_rank[] == 0 && println("recalculating precon") function lu_precon!(x) precon_ppar, precon_f = x - precon_lu, _, input_buffer, output_buffer = + precon_lu, _, this_input_buffer, this_output_buffer = nl_solver_params.preconditioners[ir] begin_serial_region() counter = 1 @loop_z_vperp_vpa iz ivperp ivpa begin - input_buffer[counter] = precon_f[ivpa,ivperp,iz] + this_input_buffer[counter] = precon_f[ivpa,ivperp,iz] counter += 1 end @loop_z iz begin - input_buffer[counter] = precon_ppar[iz] + this_input_buffer[counter] = precon_ppar[iz] counter += 1 end begin_serial_region() @serial_region begin - @timeit_debug global_timer "ldiv!" ldiv!(output_buffer, precon_lu, input_buffer) + @timeit_debug global_timer "ldiv!" ldiv!(this_output_buffer, precon_lu, this_input_buffer) end begin_serial_region() counter = 1 @loop_z_vperp_vpa iz ivperp ivpa begin - precon_f[ivpa,ivperp,iz] = output_buffer[counter] + precon_f[ivpa,ivperp,iz] = this_output_buffer[counter] counter += 1 end @loop_z iz begin - precon_ppar[iz] = output_buffer[counter] + precon_ppar[iz] = this_output_buffer[counter] counter += 1 end @@ -1003,8 +1005,8 @@ global_rank[] == 0 && println("recalculating precon") # Do a backward-Euler update of the electron pdf, and (if evove_ppar=true) the # electron parallel pressure. - function residual_func!(residual, new_variables) - electron_ppar_residual, f_electron_residual = residual + function residual_func!(this_residual, new_variables) + electron_ppar_residual, f_electron_residual = this_residual electron_ppar_newvar, f_electron_newvar = new_variables # enforce the boundary condition(s) on the electron pdf @@ -1259,48 +1261,53 @@ global_rank[] == 0 && println("recalculating precon") buffer_3, buffer_4, z_spectral, z) end - residual = -1.0 + residual_norm = -1.0 if newton_success # Calculate residuals to decide if iteration is converged. - # Might want an option to calculate the residual only after a certain number - # of iterations (especially during initialization when there are likely to be - # a large number of iterations required) to avoid the expense, and especially - # the global MPI.Bcast()? + # Might want an option to calculate the r_normesidual only after a certain + # number of iterations (especially during initialization when there are + # likely to be a large number of iterations required) to avoid the + # expense, and especially the global MPI.Bcast()? begin_z_vperp_vpa_region() - residual = steady_state_residuals(new_scratch.pdf_electron, - old_scratch.pdf_electron, - t_params.dt[]; use_mpi=true, - only_max_abs=true) if global_rank[] == 0 - residual = first(values(residual))[1] + ss_residual_norms = steady_state_residuals(new_scratch.pdf_electron, + old_scratch.pdf_electron, + t_params.dt[]; use_mpi=true, + only_max_abs=true) + residual_norm = first(values(ss_residual_norms))[1]::mk_float + else + ss_residual_norms = steady_state_residuals(new_scratch.pdf_electron, + old_scratch.pdf_electron, + t_params.dt[]; use_mpi=true, + only_max_abs=true) end if evolve_ppar - ppar_residual = + ss_ppar_residual_norms = steady_state_residuals(new_scratch.electron_ppar, old_scratch.electron_ppar, t_params.dt[]; use_mpi=true, only_max_abs=true) if global_rank[] == 0 - ppar_residual = first(values(ppar_residual))[1] - residual = max(residual, ppar_residual) + ppar_residual = first(values(ss_ppar_residual_norms))[1]::mk_float + residual_norm = max(residual_norm, ppar_residual) end end if global_rank[] == 0 if residual_tolerance === nothing residual_tolerance = t_params.converged_residual_value end - electron_pdf_converged = abs(residual) < residual_tolerance + electron_pdf_converged[] = abs(residual_norm) < residual_tolerance end - @timeit_debug global_timer "MPI.Bcast comm_world" electron_pdf_converged = MPI.Bcast(electron_pdf_converged, 0, comm_world) + @timeit_debug global_timer "MPI.Bcast! comm_world" MPI.Bcast!(electron_pdf_converged, 0, comm_world) end if (mod(t_params.step_counter[] - initial_step_counter,100) == 0) begin_serial_region() @serial_region begin if z.irank == 0 && z.irank == z.nrank - 1 - println("iteration: ", t_params.step_counter[] - initial_step_counter, " time: ", t_params.t[], " dt_electron: ", t_params.dt[], " phi_boundary: ", phi[[1,end],1], " residual: ", residual) + println("iteration: ", t_params.step_counter[] - initial_step_counter, " time: ", t_params.t[], " dt_electron: ", t_params.dt[], " phi_boundary: ", phi[[1,end],1], " residual_norm: ", residual_norm) elseif z.irank == 0 - println("iteration: ", t_params.step_counter[] - initial_step_counter, " time: ", t_params.t[], " dt_electron: ", t_params.dt[], " phi_boundary_lower: ", phi[1,1], " residual: ", residual) + println("iteration: ", t_params.step_counter[] - initial_step_counter, " time: ", t_params.t[], " dt_electron: ", t_params.dt[], " phi_boundary_lower: ", phi[1,1], " residual_norm: ", residual_norm) end end end @@ -1327,11 +1334,11 @@ global_rank[] == 0 && println("recalculating precon") reset_nonlinear_per_stage_counters!(nl_solver_params) t_params.step_counter[] += 1 - if electron_pdf_converged + if electron_pdf_converged[] break end end - if !electron_pdf_converged + if !electron_pdf_converged[] # If electron solve failed to converge for some `ir`, the failure will be # handled by restarting the ion timestep with a smaller dt, so no need to try # to solve for further `ir` values. @@ -1355,7 +1362,7 @@ global_rank[] == 0 && println("recalculating precon") end begin_serial_region() @serial_region begin - if !electron_pdf_converged || do_debug_io + if !electron_pdf_converged[] || do_debug_io if io_electron !== nothing && io_electron !== true t_params.moments_output_counter[] += 1 write_electron_state(scratch, moments, t_params, io_electron, @@ -1390,7 +1397,7 @@ global_rank[] == 0 && println("recalculating precon") # Reset dt in case it was reduced to be less than 0.5*ion_dt t_params.dt[] = t_params.previous_dt[] end - if !electron_pdf_converged + if !electron_pdf_converged[] success = "kinetic-electrons" else success = "" diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 30ec91cbd..15278f281 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -3529,7 +3529,8 @@ end external_source_settings, num_diss_params, nl_solver_params.electron_advance, max_electron_pdf_iterations, - max_electron_sim_time; ion_dt=dt) + max_electron_sim_time; evolve_ppar=true, + ion_dt=dt) # Update `fvec_out.electron_ppar` with the new electron pressure begin_r_z_region() From cfc36a42135f38d95ac2791f7b2f1a310e961c70 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 24 Oct 2024 12:31:46 +0100 Subject: [PATCH 04/43] Fix typo in `steady_state_square_residuals()` --- moment_kinetics/src/analysis.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moment_kinetics/src/analysis.jl b/moment_kinetics/src/analysis.jl index 68e911efa..3f17645a9 100644 --- a/moment_kinetics/src/analysis.jl +++ b/moment_kinetics/src/analysis.jl @@ -817,7 +817,7 @@ function steady_state_square_residuals(variable, variable_at_previous_time, dt; if only_max_abs absolute_residual = - _steady_state_residual(variable, variable_at_previous_time, reshaped_dt) + _steady_state_absolute_residual(variable, variable_at_previous_time, reshaped_dt) # Need to wrap the maximum(...) in a call to vec(...) so that we return a # Vector, not an N-dimensional array where the first (N-1) dimensions all have # size 1. From 9c43c728d95115fdbacf3af89a5833405afc76e5 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 24 Oct 2024 12:48:11 +0100 Subject: [PATCH 05/43] Return `Vector` from `steady_state_residuals()` when `only_max_abs=true` Also provide positional-arguments-only form of `steady_state_residuals()`, which helps const-propagation and therefore type stability (the compiler knows that the function returns a Vector when `only_max_abs` is passed a const `true`). --- moment_kinetics/src/analysis.jl | 43 ++++++++++--------- .../src/electron_kinetic_equation.jl | 29 ++++++------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/moment_kinetics/src/analysis.jl b/moment_kinetics/src/analysis.jl index 3f17645a9..3fac86246 100644 --- a/moment_kinetics/src/analysis.jl +++ b/moment_kinetics/src/analysis.jl @@ -15,7 +15,7 @@ using ..interpolation: interpolate_to_grid_1d using ..load_data: open_readonly_output_file, get_nranks, load_pdf_data, load_rank_data using ..load_data: load_distributed_ion_pdf_slice using ..looping -using ..type_definitions: mk_int +using ..type_definitions: mk_int, mk_float using ..velocity_moments: integrate_over_vspace using FFTW @@ -595,7 +595,8 @@ const default_epsilon = 1.0e-4 """ steady_state_residuals(variable, variable_at_previous_time, dt; - epsilon=$default_epsilon, use_mpi=false) + epsilon=$default_epsilon, use_mpi=false, + only_max_abs=false) Calculate how close a variable is to steady state. @@ -630,23 +631,26 @@ initialised, and that `variable` has r and z dimensions but no species dimension distributed-memory MPI, this routine will double-count the points on block boundaries. If `only_max_abs=true` is passed, then only calculate the 'maxium absolute residual'. In -this case the OrderedDict returned will have only one entry, for `"max absolute -residual"`. +this case just returns the "max absolute residual", not an OrderedDict. """ function steady_state_residuals(variable, variable_at_previous_time, dt; epsilon=default_epsilon, use_mpi=false, only_max_abs=false) + return steady_state_residuals(variable, variable_at_previous_time, dt, use_mpi, + only_max_abs, epsilon) +end +function steady_state_residuals(variable, variable_at_previous_time, dt, use_mpi, + only_max_abs=false, epsilon=default_epsilon) square_residual_norms = - steady_state_square_residuals(variable, variable_at_previous_time, dt; - epsilon=epsilon, use_mpi=use_mpi, - only_max_abs=only_max_abs) + steady_state_square_residuals(variable, variable_at_previous_time, dt, nothing, + use_mpi, only_max_abs, epsilon) if global_rank[] == 0 if only_max_abs # In this case as an optimisation the residual was not squared, so do not need # to square-root here return square_residual_norms else - return OrderedDict(k=>sqrt.(v) for (k,v) ∈ square_residual_norms) + return OrderedDict{String,Vector{mk_float}}(k=>sqrt.(v) for (k,v) ∈ square_residual_norms) end else return nothing @@ -654,9 +658,9 @@ function steady_state_residuals(variable, variable_at_previous_time, dt; end """ - steady_state_square_residuals(variable, variable_at_previous_time, dt; - variable_max=nothing, epsilon=1.0e-4, - use_mpi=false, only_max_abs=false) + steady_state_square_residuals(variable, variable_at_previous_time, dt, + variable_max=nothing, use_mpi=false, + only_max_abs=false, epsilon=$default_epsilon) Used to calculate the mean square residual for [`steady_state_residuals`](@ref). @@ -668,9 +672,9 @@ See [`steady_state_residuals`](@ref) for documenation of the other arguments. Th values of [`steady_state_residuals`](@ref) are the square-root of the return values of this function. """ -function steady_state_square_residuals(variable, variable_at_previous_time, dt; - variable_max=nothing, epsilon=default_epsilon, - use_mpi=false, only_max_abs=false) +function steady_state_square_residuals(variable, variable_at_previous_time, dt, + variable_max=nothing, use_mpi=false, + only_max_abs=false, epsilon=default_epsilon) if ndims(dt) == 0 t_dim = ndims(variable) + 1 else @@ -797,10 +801,9 @@ function steady_state_square_residuals(variable, variable_at_previous_time, dt; (size(packed_results)..., n_blocks[])) if only_max_abs - return OrderedDict( - "max absolute residual"=>maximum(gathered_block_results, dims=2)) + return maximum(gathered_block_results, dims=2) else - return OrderedDict( + return OrderedDict{String,mk_float}( "RMS absolute residual"=>mean(@view(gathered_block_results[:,1,:]), dims=2), "max absolute residual"=>maximum(@view(gathered_block_results[:,2,:]), dims=2), "RMS relative residual"=>mean(@view(gathered_block_results[:,3,:]), dims=2), @@ -821,9 +824,7 @@ function steady_state_square_residuals(variable, variable_at_previous_time, dt; # Need to wrap the maximum(...) in a call to vec(...) so that we return a # Vector, not an N-dimensional array where the first (N-1) dimensions all have # size 1. - return OrderedDict( - "max absolute residual"=>vec(maximum(absolute_residual; - dims=tuple((1:t_dim-1)...)))) + return vec(maximum(absolute_residual; dims=tuple((1:t_dim-1)...))) else absolute_square_residual, relative_square_residual = _steady_state_square_residual(variable, variable_at_previous_time, @@ -831,7 +832,7 @@ function steady_state_square_residuals(variable, variable_at_previous_time, dt; # Need to wrap the mean(...) or maximum(...) in a call to vec(...) so that we # return a Vector, not an N-dimensional array where the first (N-1) dimensions all # have size 1. - return OrderedDict( + return OrderedDict{String,Vector{mk_float}}( "RMS absolute residual"=>vec(mean(absolute_square_residual; dims=tuple((1:t_dim-1)...))), "max absolute residual"=>vec(maximum(absolute_square_residual; diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 1a7327edd..513fc1264 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1270,26 +1270,25 @@ global_rank[] == 0 && println("recalculating precon") # expense, and especially the global MPI.Bcast()? begin_z_vperp_vpa_region() if global_rank[] == 0 - ss_residual_norms = steady_state_residuals(new_scratch.pdf_electron, - old_scratch.pdf_electron, - t_params.dt[]; use_mpi=true, - only_max_abs=true) - residual_norm = first(values(ss_residual_norms))[1]::mk_float + residual_norm = steady_state_residuals(new_scratch.pdf_electron, + old_scratch.pdf_electron, + t_params.dt[], true, true)[1] else - ss_residual_norms = steady_state_residuals(new_scratch.pdf_electron, - old_scratch.pdf_electron, - t_params.dt[]; use_mpi=true, - only_max_abs=true) + steady_state_residuals(new_scratch.pdf_electron, + old_scratch.pdf_electron, t_params.dt[], true, + true) end if evolve_ppar - ss_ppar_residual_norms = - steady_state_residuals(new_scratch.electron_ppar, - old_scratch.electron_ppar, - t_params.dt[]; use_mpi=true, - only_max_abs=true) if global_rank[] == 0 - ppar_residual = first(values(ss_ppar_residual_norms))[1]::mk_float + ppar_residual = + steady_state_residuals(new_scratch.electron_ppar, + old_scratch.electron_ppar, + t_params.dt[], true, true)[1] residual_norm = max(residual_norm, ppar_residual) + else + steady_state_residuals(new_scratch.electron_ppar, + old_scratch.electron_ppar, + t_params.dt[], true, true) end end if global_rank[] == 0 From 19309298be05f1b8641370f0b4d53e2aa210a4ca Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 24 Oct 2024 12:50:44 +0100 Subject: [PATCH 06/43] Slightly improve type stability in `setup_moment_kinetics()` Declare the type of the thing returned by `MPI.Bcast()` to avoid type instability. --- moment_kinetics/src/moment_kinetics.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/moment_kinetics/src/moment_kinetics.jl b/moment_kinetics/src/moment_kinetics.jl index 3309ca8ed..51ffad3af 100644 --- a/moment_kinetics/src/moment_kinetics.jl +++ b/moment_kinetics/src/moment_kinetics.jl @@ -101,7 +101,7 @@ using .looping: debug_setup_loop_ranges_split_one_combination! using .moment_kinetics_input: mk_input, read_input_file using .time_advance: setup_time_advance!, time_advance! using .timer_utils -using .type_definitions: mk_int, OptionsDict +using .type_definitions: mk_float, mk_int, OptionsDict using .utils: to_minutes, get_default_restart_filename, get_prefix_iblock_and_move_existing_file using .em_fields: setup_em_fields @@ -327,7 +327,7 @@ parallel loop ranges, and are only used by the tests in `debug_test/`. # Broadcast code_time from the root process of each shared-memory block (on which it # might have been loaded from a restart file). - code_time = MPI.Bcast(code_time, 0, comm_block[]) + code_time = MPI.Bcast(code_time, 0, comm_block[])::mk_float # create arrays and do other work needed to setup # the main time advance loop -- including normalisation of f by density if requested From effb76821192656233c979244111e7c0071d706a Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 24 Oct 2024 21:01:57 +0100 Subject: [PATCH 07/43] Fix bounds checks in Jacobian functions After @boundscheck, need to actually throw an error. --- moment_kinetics/src/electron_fluid_equations.jl | 6 +++--- moment_kinetics/src/electron_kinetic_equation.jl | 14 +++++++------- moment_kinetics/src/electron_vpa_advection.jl | 6 +++--- moment_kinetics/src/electron_z_advection.jl | 6 +++--- moment_kinetics/src/external_sources.jl | 6 +++--- moment_kinetics/src/krook_collisions.jl | 4 ++-- moment_kinetics/src/moment_constraints.jl | 3 +++ 7 files changed, 24 insertions(+), 21 deletions(-) diff --git a/moment_kinetics/src/electron_fluid_equations.jl b/moment_kinetics/src/electron_fluid_equations.jl index 4fe7f2d24..9268622a1 100644 --- a/moment_kinetics/src/electron_fluid_equations.jl +++ b/moment_kinetics/src/electron_fluid_equations.jl @@ -364,9 +364,9 @@ function add_electron_energy_equation_to_Jacobian!(jacobian_matrix, f, dens, upa error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar " * "cannot be in same place in state vector.") end - @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) - @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n - @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big") + @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n || error("ppar_offset=$ppar_offset is too big") if composition.electron_physics == kinetic_electrons_with_temperature_equation error("kinetic_electrons_with_temperature_equation not " diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 513fc1264..47033e875 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -3257,8 +3257,8 @@ end function add_electron_dissipation_term_to_Jacobian!(jacobian_matrix, f, num_diss_params, z, vperp, vpa, vpa_spectral, z_speed, dt, ir; f_offset=0) - @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) - @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big") vpa_dissipation_coefficient = num_diss_params.electron.vpa_dissipation_coefficient @@ -3537,9 +3537,9 @@ function add_contribution_from_electron_pdf_term_to_Jacobian!( error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar " * "cannot be in same place in state vector.") end - @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) - @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n - @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big") + @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n || error("ppar_offset=$ppar_offset is too big") source_density_amplitude = moments.electron.external_source_density_amplitude source_momentum_amplitude = moments.electron.external_source_momentum_amplitude @@ -3641,8 +3641,8 @@ end function add_ion_dt_forcing_of_electron_ppar_to_Jacobian!(jacobian_matrix, z, dt, ion_dt, ir; ppar_offset=0) - @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) - @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n || error("ppar_offset=$ppar_offset is too big") begin_z_region() @loop_z iz begin diff --git a/moment_kinetics/src/electron_vpa_advection.jl b/moment_kinetics/src/electron_vpa_advection.jl index 05527b05a..8e0c92ba4 100644 --- a/moment_kinetics/src/electron_vpa_advection.jl +++ b/moment_kinetics/src/electron_vpa_advection.jl @@ -105,9 +105,9 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar " * "cannot be in same place in state vector.") end - @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) - @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n - @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big") + @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n || error("ppar_offset=$ppar_offset is too big") v_size = vperp.n * vpa.n source_density_amplitude = @view moments.electron.external_source_density_amplitude[:,ir,:] diff --git a/moment_kinetics/src/electron_z_advection.jl b/moment_kinetics/src/electron_z_advection.jl index 3cb637bee..7685971a5 100644 --- a/moment_kinetics/src/electron_z_advection.jl +++ b/moment_kinetics/src/electron_z_advection.jl @@ -82,9 +82,9 @@ function add_electron_z_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, p error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar " * "cannot be in same place in state vector.") end - @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) - @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n - @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big") + @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n || error("ppar_offset=$ppar_offset is too big") v_size = vperp.n * vpa.n diff --git a/moment_kinetics/src/external_sources.jl b/moment_kinetics/src/external_sources.jl index 2eeeb3ac1..0166b307f 100644 --- a/moment_kinetics/src/external_sources.jl +++ b/moment_kinetics/src/external_sources.jl @@ -1031,9 +1031,9 @@ function add_external_electron_source_to_Jacobian!(jacobian_matrix, f, moments, error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar " * "cannot be in same place in state vector.") end - @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) - @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n - @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big") + @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n || error("ppar_offset=$ppar_offset is too big") if !electron_source.active return nothing diff --git a/moment_kinetics/src/krook_collisions.jl b/moment_kinetics/src/krook_collisions.jl index f0a99404d..77684580c 100644 --- a/moment_kinetics/src/krook_collisions.jl +++ b/moment_kinetics/src/krook_collisions.jl @@ -434,8 +434,8 @@ function add_electron_krook_collisions_to_Jacobian!(jacobian_matrix, f, dens, up vth, upar_ion, collisions, z, vperp, vpa, z_speed, dt, ir; f_offset=0, ppar_offset) - @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) - @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big") if collisions.krook.nuee0 ≤ 0.0 && collisions.krook.nuei0 ≤ 0.0 return nothing diff --git a/moment_kinetics/src/moment_constraints.jl b/moment_kinetics/src/moment_constraints.jl index ac91dd321..e880b5e3a 100644 --- a/moment_kinetics/src/moment_constraints.jl +++ b/moment_kinetics/src/moment_constraints.jl @@ -288,6 +288,9 @@ function add_electron_implicit_constraint_forcing_to_Jacobian!(jacobian_matrix, z_speed, z, vperp, vpa, constraint_forcing_rate, dt, ir; f_offset=0) + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big") + vpa_grid = vpa.grid vpa_wgts = vpa.wgts v_size = vperp.n * vpa.n From 5f6caa62d99e223aae146591dcc2b3d0c1633eda Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 26 Oct 2024 13:06:35 +0100 Subject: [PATCH 08/43] Use mk_float explicitly in a few more places Means less hacking to do if we ever do want to change (temporarily or permanently) `mk_float` to a different type. --- moment_kinetics/src/coordinates.jl | 6 ++-- moment_kinetics/src/gauss_legendre.jl | 8 ++++-- moment_kinetics/src/moment_kinetics.jl | 2 +- moment_kinetics/src/nonlinear_solvers.jl | 12 ++++---- moment_kinetics/src/time_advance.jl | 36 +++++++++++++----------- 5 files changed, 37 insertions(+), 27 deletions(-) diff --git a/moment_kinetics/src/coordinates.jl b/moment_kinetics/src/coordinates.jl index bf1c45416..373d7c8f8 100644 --- a/moment_kinetics/src/coordinates.jl +++ b/moment_kinetics/src/coordinates.jl @@ -381,9 +381,9 @@ function define_coordinate(coord_input::NamedTuple; parallel_io::Bool=false, end coord = coordinate(coord_input.name, n_global, n_local, coord_input.ngrid, - coord_input.nelement, coord_input.nelement_local, nrank, irank, coord_input.L, - grid, cell_width, igrid, ielement, imin, imax, igrid_full, - coord_input.discretization, coord_input.finite_difference_option, + coord_input.nelement, coord_input.nelement_local, nrank, irank, + mk_float(coord_input.L), grid, cell_width, igrid, ielement, imin, imax, + igrid_full, coord_input.discretization, coord_input.finite_difference_option, coord_input.cheb_option, coord_input.bc, coord_input.boundary_parameters, wgts, uniform_grid, duniform_dgrid, scratch, copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch), copy(scratch), diff --git a/moment_kinetics/src/gauss_legendre.jl b/moment_kinetics/src/gauss_legendre.jl index 97c31d54e..ee9b706e4 100644 --- a/moment_kinetics/src/gauss_legendre.jl +++ b/moment_kinetics/src/gauss_legendre.jl @@ -163,6 +163,8 @@ end function setup_gausslegendre_pseudospectral_lobatto(coord; collision_operator_dim=true) x, w = gausslobatto(coord.ngrid) + x = mk_float.(x) + w = mk_float.(w) Dmat = allocate_float(coord.ngrid, coord.ngrid) gausslobattolegendre_differentiation_matrix!(Dmat,x,coord.ngrid) @@ -234,6 +236,8 @@ end function setup_gausslegendre_pseudospectral_radau(coord; collision_operator_dim=true) # Gauss-Radau points on [-1,1) x, w = gaussradau(coord.ngrid) + x = mk_float.(x) + w = mk_float.(w) # Gauss-Radau points on (-1,1] xreverse, wreverse = -reverse(x), reverse(w) # elemental differentiation matrix @@ -387,7 +391,7 @@ ngrid -- number of points per element (incl. boundary points) Note that D has does not include a scaling factor """ -function gausslobattolegendre_differentiation_matrix!(D::Array{Float64,2},x::Array{Float64,1},ngrid::Int64) +function gausslobattolegendre_differentiation_matrix!(D::Array{mk_float,2},x::Array{mk_float,1},ngrid::mk_int) D[:,:] .= 0.0 for ix in 1:ngrid for ixp in 1:ngrid @@ -418,7 +422,7 @@ ngrid -- number of points per element (incl. boundary points) Note that D has does not include a scaling factor """ -function gaussradaulegendre_differentiation_matrix!(D::Array{Float64,2},x::Array{Float64,1},ngrid::Int64) +function gaussradaulegendre_differentiation_matrix!(D::Array{mk_float,2},x::Array{mk_float,1},ngrid::Int64) D[:,:] .= 0.0 for ix in 1:ngrid for ixp in 1:ngrid diff --git a/moment_kinetics/src/moment_kinetics.jl b/moment_kinetics/src/moment_kinetics.jl index 51ffad3af..5d5e9d9f5 100644 --- a/moment_kinetics/src/moment_kinetics.jl +++ b/moment_kinetics/src/moment_kinetics.jl @@ -275,7 +275,7 @@ parallel loop ranges, and are only used by the tests in `debug_test/`. manufactured_solns_input, t_input, num_diss_params, advection_structs, io_input, input_dict) # initialize time variable - code_time = 0. + code_time = mk_float(0.0) dt = nothing dt_before_last_fail = nothing electron_dt = nothing diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index 0958070a8..a94e2314c 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -186,13 +186,15 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa linear_initial_guess = zeros(linear_restart) - return nl_solver_info(nl_solver_input.rtol, nl_solver_input.atol, + return nl_solver_info(mk_float(nl_solver_input.rtol), mk_float(nl_solver_input.atol), nl_solver_input.nonlinear_max_iterations, - nl_solver_input.linear_rtol, nl_solver_input.linear_atol, - linear_restart, nl_solver_input.linear_max_restarts, H, c, s, g, - V, linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), Ref(0), + mk_float(nl_solver_input.linear_rtol), + mk_float(nl_solver_input.linear_atol), linear_restart, + nl_solver_input.linear_max_restarts, H, c, s, g, V, + linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), Ref(0), Ref(0), Ref(nl_solver_input.preconditioner_update_interval), - Ref(0.0), serial_solve, Ref(0), Ref(0), preconditioner_type, + Ref(mk_float(0.0)), serial_solve, Ref(0), Ref(0), + preconditioner_type, nl_solver_input.preconditioner_update_interval, preconditioners) end diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 15278f281..cf07371d2 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -313,10 +313,11 @@ the returned `time_info`. function setup_time_info(t_input, n_variables, code_time, dt_reload, dt_before_last_fail_reload, composition, manufactured_solns_input, io_input, input_dict; electron=nothing) + code_time = mk_float(code_time) rk_coefs, rk_coefs_implicit, implicit_coefficient_is_zero, n_rk_stages, rk_order, adaptive, low_storage, CFL_prefactor = setup_runge_kutta_coefficients!(t_input["type"], - t_input["CFL_prefactor"], + mk_float(t_input["CFL_prefactor"]), t_input["split_operators"]) if !adaptive @@ -343,16 +344,16 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, end t = Ref(code_time) - dt = Ref(dt_reload === nothing ? t_input["dt"] : dt_reload) + dt = Ref(dt_reload === nothing ? mk_float(t_input["dt"]) : dt_reload) previous_dt = Ref(dt[]) dt_before_output = Ref(dt[]) - dt_before_last_fail = Ref(dt_before_last_fail_reload === nothing ? Inf : dt_before_last_fail_reload) + dt_before_last_fail = Ref(dt_before_last_fail_reload === nothing ? mk_float(Inf) : dt_before_last_fail_reload) step_to_moments_output = Ref(false) step_to_dfns_output = Ref(false) write_moments_output = Ref(false) write_dfns_output = Ref(false) - end_time = code_time + t_input["dt"] * t_input["nstep"] + end_time = mk_float(code_time + t_input["dt"] * t_input["nstep"]) epsilon = 1.e-11 if adaptive || t_input["write_after_fixed_step_count"] if t_input["nwrite"] == 0 @@ -423,7 +424,7 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, end decrease_dt_iteration_threshold = t_input["decrease_dt_iteration_threshold"] increase_dt_iteration_threshold = t_input["increase_dt_iteration_threshold"] - cap_factor_ion_dt = t_input["cap_factor_ion_dt"] + cap_factor_ion_dt = mk_float(t_input["cap_factor_ion_dt"]) electron_t_params = nothing elseif electron === false debug_io = nothing @@ -439,28 +440,31 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, electron_t_params = electron end return time_info(n_variables, t_input["nstep"], end_time, t, dt, previous_dt, - dt_before_output, dt_before_last_fail, CFL_prefactor, + dt_before_output, dt_before_last_fail, mk_float(CFL_prefactor), step_to_moments_output, step_to_dfns_output, write_moments_output, write_dfns_output, Ref(0), Ref(0), Ref{mk_float}(0.0), Ref(0), Ref(0), Ref(0), mk_int[], mk_int[], t_input["nwrite"], t_input["nwrite_dfns"], moments_output_times, dfns_output_times, t_input["type"], rk_coefs, rk_coefs_implicit, implicit_coefficient_is_zero, n_rk_stages, rk_order, adaptive, - low_storage, t_input["rtol"], t_input["atol"], t_input["atol_upar"], - t_input["step_update_prefactor"], t_input["max_increase_factor"], - t_input["max_increase_factor_near_last_fail"], - t_input["last_fail_proximity_factor"], t_input["minimum_dt"], - t_input["maximum_dt"], + low_storage, mk_float(t_input["rtol"]), mk_float(t_input["atol"]), + mk_float(t_input["atol_upar"]), + mk_float(t_input["step_update_prefactor"]), + mk_float(t_input["max_increase_factor"]), + mk_float(t_input["max_increase_factor_near_last_fail"]), + mk_float(t_input["last_fail_proximity_factor"]), + mk_float(t_input["minimum_dt"]), mk_float(t_input["maximum_dt"]), electron !== nothing && t_input["implicit_braginskii_conduction"], electron !== nothing && t_input["implicit_electron_advance"], electron !== nothing && t_input["implicit_ion_advance"], electron !== nothing && t_input["implicit_vpa_advection"], electron !== nothing && t_input["implicit_electron_ppar"], - t_input["constraint_forcing_rate"], decrease_dt_iteration_threshold, - increase_dt_iteration_threshold, cap_factor_ion_dt, - t_input["write_after_fixed_step_count"], error_sum_zero, - t_input["split_operators"], t_input["steady_state_residual"], - t_input["converged_residual_value"], + mk_float(t_input["constraint_forcing_rate"]), + decrease_dt_iteration_threshold, increase_dt_iteration_threshold, + mk_float(cap_factor_ion_dt), t_input["write_after_fixed_step_count"], + error_sum_zero, t_input["split_operators"], + t_input["steady_state_residual"], + mk_float(t_input["converged_residual_value"]), manufactured_solns_input.use_for_advance, t_input["stopfile_name"], debug_io, electron_t_params) end From 692e8debf929f07c3c3bba4f7462d975a0ff0333 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 24 Oct 2024 21:38:42 +0100 Subject: [PATCH 09/43] Functions to calculate Jacobians for ADI preconditioner A variation on the 'alternating direction implicit' (ADI) method might be useful as a preconditioner. It will require split Jacobians where some 'implicit' parts only couple a subset of dimensions (i.e. z- or velocity-dimensions), while 'explicit' parts may couple all dimensions (but may also have some terms removed to make the matrix more sparse for numerical efficiency). --- .../src/electron_fluid_equations.jl | 159 +++++- .../src/electron_kinetic_equation.jl | 523 ++++++++++++++++-- moment_kinetics/src/electron_vpa_advection.jl | 211 +++++-- moment_kinetics/src/electron_z_advection.jl | 159 ++++-- moment_kinetics/src/external_sources.jl | 153 ++++- moment_kinetics/src/krook_collisions.jl | 108 +++- moment_kinetics/src/moment_constraints.jl | 98 +++- 7 files changed, 1186 insertions(+), 225 deletions(-) diff --git a/moment_kinetics/src/electron_fluid_equations.jl b/moment_kinetics/src/electron_fluid_equations.jl index 9268622a1..dec0aff8f 100644 --- a/moment_kinetics/src/electron_fluid_equations.jl +++ b/moment_kinetics/src/electron_fluid_equations.jl @@ -358,8 +358,8 @@ function add_electron_energy_equation_to_Jacobian!(jacobian_matrix, f, dens, upa vth, third_moment, ddens_dz, dupar_dz, dppar_dz, dthird_moment_dz, collisions, composition, z, vperp, vpa, z_spectral, - num_diss_params, dt, ir; f_offset=0, - ppar_offset=0) + num_diss_params, dt, ir, include=:all; + f_offset=0, ppar_offset=0) if f_offset == ppar_offset error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar " * "cannot be in same place in state vector.") @@ -367,6 +367,7 @@ function add_electron_energy_equation_to_Jacobian!(jacobian_matrix, f, dens, upa @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big") @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n || error("ppar_offset=$ppar_offset is too big") + @boundscheck include ∈ (:all, :explicit_z, :explicit_v) || error("Unexpected value for include=$include") if composition.electron_physics == kinetic_electrons_with_temperature_equation error("kinetic_electrons_with_temperature_equation not " @@ -418,13 +419,102 @@ function add_electron_energy_equation_to_Jacobian!(jacobian_matrix, f, dens, upa # = (2 * sqrt(2) * p^(3/2) / n^(1/2) / me^(1/2))[irowz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_matrix[irowz,icolz] # + sqrt(2) * (-p^(3/2) / n^(3/2) / me^(1/2) * dn/dz + 3.0 * p^(1/2) / n^(1/2) / me^(1/2) * dp/dz)[irowz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * delta[irowz,icolz] + # upar*dppar_dz + z_deriv_row_startind = z_deriv_matrix.rowptr[iz] + z_deriv_row_endind = z_deriv_matrix.rowptr[iz+1] - 1 + z_deriv_colinds = @view z_deriv_matrix.colval[z_deriv_row_startind:z_deriv_row_endind] + z_deriv_row_nonzeros = @view z_deriv_matrix.nzval[z_deriv_row_startind:z_deriv_row_endind] + if include ∈ (:all, :explicit_z) + for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros) + col = ppar_offset + icolz + jacobian_matrix[row,col] += + dt * upar[iz] * z_deriv_entry + end + end + + # 3*ppar*dupar_dz + if include === :all + jacobian_matrix[row,row] += 3.0 * dt * dupar_dz[iz] + end + + # terms from d(qpar)/dz + if include === :all + jacobian_matrix[row,row] += + dt * (3.0 * sqrt(2.0 * ppar[iz] / dens[iz] / me) * dthird_moment_dz[iz] + - 1.5 * sqrt(2.0 * ppar[iz] / me) / dens[iz]^1.5 * third_moment[iz] * ddens_dz[iz] + + 1.5 * sqrt(2.0 / ppar[iz] / dens[iz] / me) * third_moment[iz] * dppar_dz[iz]) + end + if include ∈ (:all, :explicit_z) + for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros) + col = ppar_offset + icolz + jacobian_matrix[row,col] += dt * 3.0 * sqrt(2.0 * ppar[iz] / dens[iz] / me) * third_moment[iz] * z_deriv_entry + end + end + if include ∈ (:all, :explicit_v) + for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + jacobian_matrix[row,col] += dt * (-(ppar[iz]/dens[iz])^1.5*sqrt(2.0/me)*ddens_dz[iz] + + 3.0*sqrt(2.0*ppar[iz]/dens[iz]/me)*dppar_dz[iz]) * + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 + end + end + for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros), icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + jacobian_matrix[row,col] += dt * 2.0*ppar[iz]^1.5*sqrt(2.0/dens[iz]/me) * + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_entry + end + end + + return nothing +end + +function add_electron_energy_equation_to_z_only_Jacobian!( + jacobian_matrix, dens, upar, ppar, vth, third_moment, ddens_dz, dupar_dz, + dppar_dz, dthird_moment_dz, collisions, composition, z, vperp, vpa, z_spectral, + num_diss_params, dt, ir) + + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + @boundscheck size(jacobian_matrix, 1) == z.n || error("Jacobian matrix size is wrong") + + if composition.electron_physics == kinetic_electrons_with_temperature_equation + error("kinetic_electrons_with_temperature_equation not " + * "supported yet in preconditioner") + elseif composition.electron_physics != kinetic_electrons + error("Unsupported electron_physics=$(composition.electron_physics) " + * "in electron_backward_euler!() preconditioner.") + end + if num_diss_params.electron.moment_dissipation_coefficient > 0.0 + error("z-diffusion of electron_ppar not yet supported in " + * "preconditioner") + end + if collisions.electron_fluid.nu_ei > 0.0 + error("electron-ion collision terms for electron_ppar not yet " + * "supported in preconditioner") + end + if composition.n_neutral_species > 0 && collisions.reactions.electron_charge_exchange_frequency > 0.0 + error("electron 'charge exchange' terms for electron_ppar not yet " + * "supported in preconditioner") + end + if composition.n_neutral_species > 0 && collisions.reactions.electron_ionization_frequency > 0.0 + error("electron ionization terms for electron_ppar not yet " + * "supported in preconditioner") + end + + me = composition.me_over_mi + z_deriv_matrix = z_spectral.D_matrix_csr + v_size = vperp.n * vpa.n + + @loop_z iz begin + # Rows corresponding to electron_ppar + row = iz + # upar*dppar_dz z_deriv_row_startind = z_deriv_matrix.rowptr[iz] z_deriv_row_endind = z_deriv_matrix.rowptr[iz+1] - 1 z_deriv_colinds = @view z_deriv_matrix.colval[z_deriv_row_startind:z_deriv_row_endind] z_deriv_row_nonzeros = @view z_deriv_matrix.nzval[z_deriv_row_startind:z_deriv_row_endind] for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros) - col = ppar_offset + icolz + col = icolz jacobian_matrix[row,col] += dt * upar[iz] * z_deriv_entry end @@ -438,20 +528,59 @@ function add_electron_energy_equation_to_Jacobian!(jacobian_matrix, f, dens, upa - 1.5 * sqrt(2.0 * ppar[iz] / me) / dens[iz]^1.5 * third_moment[iz] * ddens_dz[iz] + 1.5 * sqrt(2.0 / ppar[iz] / dens[iz] / me) * third_moment[iz] * dppar_dz[iz]) for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros) - col = ppar_offset + icolz + col = icolz jacobian_matrix[row,col] += dt * 3.0 * sqrt(2.0 * ppar[iz] / dens[iz] / me) * third_moment[iz] * z_deriv_entry end - for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n - col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset - jacobian_matrix[row,col] += dt * (-(ppar[iz]/dens[iz])^1.5*sqrt(2.0/me)*ddens_dz[iz] - + 3.0*sqrt(2.0*ppar[iz]/dens[iz]/me)*dppar_dz[iz]) * - vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 - end - for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros), icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n - col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset - jacobian_matrix[row,col] += dt * 2.0*ppar[iz]^1.5*sqrt(2.0/dens[iz]/me) * - vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_entry - end + end + + return nothing +end + +function add_electron_energy_equation_to_v_only_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dupar_dz, + dppar_dz, dthird_moment_dz, collisions, composition, z, vperp, vpa, z_spectral, + num_diss_params, dt, ir, iz) + + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + @boundscheck size(jacobian_matrix, 1) == vperp.n * vpa.n + 1 || error("Jacobian matrix size is wrong") + + if composition.electron_physics == kinetic_electrons_with_temperature_equation + error("kinetic_electrons_with_temperature_equation not " + * "supported yet in preconditioner") + elseif composition.electron_physics != kinetic_electrons + error("Unsupported electron_physics=$(composition.electron_physics) " + * "in electron_backward_euler!() preconditioner.") + end + if num_diss_params.electron.moment_dissipation_coefficient > 0.0 + error("z-diffusion of electron_ppar not yet supported in " + * "preconditioner") + end + if collisions.electron_fluid.nu_ei > 0.0 + error("electron-ion collision terms for electron_ppar not yet " + * "supported in preconditioner") + end + if composition.n_neutral_species > 0 && collisions.reactions.electron_charge_exchange_frequency > 0.0 + error("electron 'charge exchange' terms for electron_ppar not yet " + * "supported in preconditioner") + end + if composition.n_neutral_species > 0 && collisions.reactions.electron_ionization_frequency > 0.0 + error("electron ionization terms for electron_ppar not yet " + * "supported in preconditioner") + end + + me = composition.me_over_mi + + jacobian_matrix[end,end] += 3.0 * dt * dupar_dz + + jacobian_matrix[end,end] += + dt * (3.0 * sqrt(2.0 * ppar / dens / me) * dthird_moment_dz + - 1.5 * sqrt(2.0 * ppar / me) / dens^1.5 * third_moment * ddens_dz + + 1.5 * sqrt(2.0 / ppar / dens / me) * third_moment * dppar_dz) + for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (icolvperp - 1) * vpa.n + icolvpa + jacobian_matrix[end,col] += dt * (-(ppar/dens)^1.5*sqrt(2.0/me)*ddens_dz + + 3.0*sqrt(2.0*ppar/dens/me)*dppar_dz) * + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 end return nothing diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 47033e875..54021ced6 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -29,23 +29,34 @@ using ..electron_fluid_equations: calculate_electron_moments!, using ..electron_fluid_equations: electron_energy_equation!, electron_energy_equation_no_r!, add_electron_energy_equation_to_Jacobian!, + add_electron_energy_equation_to_v_only_Jacobian!, + add_electron_energy_equation_to_z_only_Jacobian!, electron_energy_residual! using ..electron_z_advection: electron_z_advection!, update_electron_speed_z!, - add_electron_z_advection_to_Jacobian! + add_electron_z_advection_to_Jacobian!, + add_electron_z_advection_to_v_only_Jacobian!, + add_electron_z_advection_to_z_only_Jacobian! using ..electron_vpa_advection: electron_vpa_advection!, update_electron_speed_vpa!, - add_electron_vpa_advection_to_Jacobian! + add_electron_vpa_advection_to_Jacobian!, + add_electron_vpa_advection_to_v_only_Jacobian! using ..em_fields: update_phi! using ..external_sources: total_external_electron_sources!, - add_total_external_electron_source_to_Jacobian! + add_total_external_electron_source_to_Jacobian!, + add_total_external_electron_source_to_v_only_Jacobian!, + add_total_external_electron_source_to_z_only_Jacobian! using ..file_io: get_electron_io_info, write_electron_state, finish_electron_io using ..krook_collisions: electron_krook_collisions!, get_collision_frequency_ee, get_collision_frequency_ei, - add_electron_krook_collisions_to_Jacobian! + add_electron_krook_collisions_to_Jacobian!, + add_electron_krook_collisions_to_v_only_Jacobian!, + add_electron_krook_collisions_to_z_only_Jacobian! using ..timer_utils using ..moment_constraints: hard_force_moment_constraints!, moment_constraints_on_residual!, electron_implicit_constraint_forcing!, - add_electron_implicit_constraint_forcing_to_Jacobian! + add_electron_implicit_constraint_forcing_to_Jacobian!, + add_electron_implicit_constraint_forcing_to_v_only_Jacobian!, + add_electron_implicit_constraint_forcing_to_z_only_Jacobian! using ..moment_kinetics_structs: scratch_pdf, scratch_electron_pdf, electron_pdf_substruct using ..nonlinear_solvers using ..runge_kutta: rk_update_variable!, rk_loworder_solution!, local_error_norm, @@ -2788,7 +2799,7 @@ end vpa_spectral, z_advect, vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params, ion_dt, - ir, evolve_ppar) + ir, evolve_ppar, include=:all) Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equation and (if `evolve_ppar=true`) the electron energy equation. @@ -2797,7 +2808,8 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, external_source_settings, - num_diss_params, t_params, ion_dt, ir, evolve_ppar) = begin + num_diss_params, t_params, ion_dt, ir, evolve_ppar, + include=:all) = begin dt = t_params.dt[] buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] @@ -2836,10 +2848,11 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati @loop_z_vperp_vpa iz ivperp ivpa begin # Rows corresponding to pdf_electron row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa - v_remainder = (ivperp - 1) * vpa.n + ivpa jacobian_matrix[row,:] .= 0.0 - jacobian_matrix[row,row] += 1.0 + if include === :all + jacobian_matrix[row,row] += 1.0 + end end begin_z_region() @loop_z iz begin @@ -2847,44 +2860,286 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati row = pdf_size + iz jacobian_matrix[row,:] .= 0.0 - jacobian_matrix[row,row] += 1.0 + if include === :all + jacobian_matrix[row,row] += 1.0 + end end z_speed = @view z_advect[1].speed[:,:,:,ir] + if include ∈ (:all, :explicit_v) + dpdf_dz = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir] + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + @loop_vperp_vpa ivperp ivpa begin + @views z_advect[1].adv_fac[:,ivpa,ivperp,ir] = -z_speed[:,ivpa,ivperp] + end + #calculate the upwind derivative + @views derivative_z_pdf_vpavperpz!(dpdf_dz, f, z_advect[1].adv_fac[:,:,:,ir], + scratch_dummy.buffer_vpavperpr_1[:,:,ir], + scratch_dummy.buffer_vpavperpr_2[:,:,ir], + scratch_dummy.buffer_vpavperpr_3[:,:,ir], + scratch_dummy.buffer_vpavperpr_4[:,:,ir], + scratch_dummy.buffer_vpavperpr_5[:,:,ir], + scratch_dummy.buffer_vpavperpr_6[:,:,ir], + z_spectral, z) + else + dpdf_dz = nothing + end + + dpdf_dvpa = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir] + begin_z_vperp_region() + update_electron_speed_vpa!(vpa_advect[1], dens, upar, ppar, moments, vpa.grid, + external_source_settings.electron, ir) + @loop_z_vperp iz ivperp begin + @views @. vpa_advect[1].adv_fac[:,ivperp,iz,ir] = -vpa_advect[1].speed[:,ivperp,iz,ir] + end + #calculate the upwind derivative of the electron pdf w.r.t. wpa + @loop_z_vperp iz ivperp begin + @views derivative!(dpdf_dvpa[:,ivperp,iz], f[:,ivperp,iz], vpa, + vpa_advect[1].adv_fac[:,ivperp,iz,ir], vpa_spectral) + end + + zeroth_moment = z.scratch_shared + first_moment = z.scratch_shared2 + second_moment = z.scratch_shared3 + begin_z_region() + vpa_grid = vpa.grid + vpa_wgts = vpa.wgts + @loop_z iz begin + @views zeroth_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_wgts) + @views first_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_grid, vpa_wgts) + @views second_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_grid, 2, vpa_wgts) + end + add_electron_z_advection_to_Jacobian!( - jacobian_matrix, f, dens, upar, ppar, vth, me, z, vperp, vpa, z_spectral, - z_advect, scratch_dummy, dt, ir; ppar_offset=pdf_size) + jacobian_matrix, f, dens, upar, ppar, vth, dpdf_dz, me, z, vperp, vpa, z_spectral, + z_advect, z_speed, scratch_dummy, dt, ir, include; ppar_offset=pdf_size) add_electron_vpa_advection_to_Jacobian!( - jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, - dthird_moment_dz, moments, me, z, vperp, vpa, z_spectral, vpa_spectral, - vpa_advect, z_speed, scratch_dummy, external_source_settings, dt, ir; + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, dpdf_dvpa, ddens_dz, + dppar_dz, dthird_moment_dz, moments, me, z, vperp, vpa, z_spectral, vpa_spectral, + vpa_advect, z_speed, scratch_dummy, external_source_settings, dt, ir, include; ppar_offset=pdf_size) add_contribution_from_electron_pdf_term_to_Jacobian!( jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z, - vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir; ppar_offset=pdf_size) + vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir, include; + ppar_offset=pdf_size) add_electron_dissipation_term_to_Jacobian!( - jacobian_matrix, f, num_diss_params, z, vperp, vpa, vpa_spectral, z_speed, dt, ir) + jacobian_matrix, f, num_diss_params, z, vperp, vpa, vpa_spectral, z_speed, dt, ir, + include) add_electron_krook_collisions_to_Jacobian!( jacobian_matrix, f, dens, upar, ppar, vth, upar_ion, collisions, z, vperp, vpa, - z_speed, dt, ir; ppar_offset=pdf_size) + z_speed, dt, ir, include; ppar_offset=pdf_size) add_total_external_electron_source_to_Jacobian!( jacobian_matrix, f, moments, me, z_speed, external_source_settings.electron, z, - vperp, vpa, dt, ir; ppar_offset=pdf_size) + vperp, vpa, dt, ir, include; ppar_offset=pdf_size) add_electron_implicit_constraint_forcing_to_Jacobian!( - jacobian_matrix, f, z_speed, z, vperp, vpa, t_params.constraint_forcing_rate, dt, - ir) + jacobian_matrix, f, zeroth_moment, first_moment, second_moment, z_speed, z, vperp, + vpa, t_params.constraint_forcing_rate, dt, ir, include) # Always add the electron energy equation term, even if evolve_ppar=false, so that the # Jacobian matrix always has the same shape, meaning that we can always reuse the LU # factorization struct. add_electron_energy_equation_to_Jacobian!( jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dupar_dz, dppar_dz, dthird_moment_dz, collisions, composition, z, vperp, vpa, z_spectral, - num_diss_params, dt, ir; ppar_offset=pdf_size) + num_diss_params, dt, ir, include; ppar_offset=pdf_size) if ion_dt !== nothing add_ion_dt_forcing_of_electron_ppar_to_Jacobian!( - jacobian_matrix, z, dt, ion_dt, ir; ppar_offset=pdf_size) + jacobian_matrix, z, dt, ion_dt, ir, include; ppar_offset=pdf_size) + end + + return nothing +end + +""" + fill_electron_kinetic_equation_v_only_Jacobian!(jacobian_matrix, f, ppar, moments, + collisions, composition, z, vperp, + vpa, z_spectral, vperp_specral, + vpa_spectral, z_advect, vpa_advect, + scratch_dummy, + external_source_settings, + num_diss_params, t_params, ion_dt, ir, + iz, evolve_ppar, include=:all) + +Fill a pre-allocated matrix with the Jacobian matrix for a velocity-space solve part of +the ADI method for electron kinetic equation and (if `evolve_ppar=true`) the electron +energy equation. +""" +@timeit global_timer fill_electron_kinetic_equation_v_only_Jacobian!( + jacobian_matrix, f, ppar, dpdf_dz, dpdf_dvpa, z_speed, moments, + zeroth_moment, first_moment, second_moment, third_moment, + dthird_moment_dz, collisions, composition, z, vperp, vpa, + z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, + scratch_dummy, external_source_settings, num_diss_params, + t_params, ion_dt, ir, iz, evolve_ppar) = begin + dt = t_params.dt[] + + vth = moments.electron.vth[iz,ir] + me = composition.me_over_mi + dens = moments.electron.dens[iz,ir] + upar = moments.electron.upar[iz,ir] + qpar = moments.electron.qpar[iz,ir] + ddens_dz = moments.electron.ddens_dz[iz,ir] + dupar_dz = moments.electron.dupar_dz[iz,ir] + dppar_dz = moments.electron.dppar_dz[iz,ir] + dvth_dz = moments.electron.dvth_dz[iz,ir] + dqpar_dz = moments.electron.dqpar_dz[iz,ir] + + upar_ion = moments.ion.upar[iz,ir,1] + + pdf_size = z.n * vperp.n * vpa.n + v_size = vperp.n * vpa.n + + # Initialise jacobian_matrix to the identity + for row ∈ 1:size(jacobian_matrix, 1) + jacobian_matrix[row,:] .= 0.0 + jacobian_matrix[row,row] += 1.0 + end + + add_electron_z_advection_to_v_only_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, dpdf_dz, me, z, vperp, vpa, z_spectral, + z_advect, z_speed, scratch_dummy, dt, ir, iz) + add_electron_vpa_advection_to_v_only_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, dpdf_dvpa, ddens_dz, + dppar_dz, dthird_moment_dz, moments, me, z, vperp, vpa, z_spectral, vpa_spectral, + vpa_advect, z_speed, scratch_dummy, external_source_settings, dt, ir, iz) + add_contribution_from_electron_pdf_term_to_v_only_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, + dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z, + vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir, iz) + add_electron_dissipation_term_to_v_only_Jacobian!( + jacobian_matrix, f, num_diss_params, z, vperp, vpa, vpa_spectral, z_speed, dt, ir, + iz) + add_electron_krook_collisions_to_v_only_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, upar_ion, collisions, z, vperp, vpa, + z_speed, dt, ir, iz) + add_total_external_electron_source_to_v_only_Jacobian!( + jacobian_matrix, f, moments, me, z_speed, external_source_settings.electron, z, + vperp, vpa, dt, ir, iz) + add_electron_implicit_constraint_forcing_to_v_only_Jacobian!( + jacobian_matrix, f, zeroth_moment, first_moment, second_moment, z_speed, z, vperp, + vpa, t_params.constraint_forcing_rate, dt, ir, iz) + # Always add the electron energy equation term, even if evolve_ppar=false, so that the + # Jacobian matrix always has the same shape, meaning that we can always reuse the LU + # factorization struct. + add_electron_energy_equation_to_v_only_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dupar_dz, + dppar_dz, dthird_moment_dz, collisions, composition, z, vperp, vpa, z_spectral, + num_diss_params, dt, ir, iz) + if ion_dt !== nothing + add_ion_dt_forcing_of_electron_ppar_to_v_only_Jacobian!( + jacobian_matrix, z, dt, ion_dt, ir, iz) + end + + return nothing +end + +""" + fill_electron_kinetic_equation_z_only_Jacobian_f!( + jacobian_matrix, f, ppar, dpdf_dz, dpdf_dvpa, z_speed, moments, zeroth_moment, + first_moment, second_moment, third_moment, dthird_moment_dz, collisions, + composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, + vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params, + ion_dt, ir, ivperp, ivpa, evolve_ppar) + +Fill a pre-allocated matrix with the Jacobian matrix for a z-direction solve part of the +ADI method for electron kinetic equation and (if `evolve_ppar=true`) the electron energy +equation. +""" +@timeit global_timer fill_electron_kinetic_equation_z_only_Jacobian_f!( + jacobian_matrix, f, ppar, dpdf_dz, dpdf_dvpa, z_speed, moments, + zeroth_moment, first_moment, second_moment, third_moment, + dthird_moment_dz, collisions, composition, z, vperp, vpa, + z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, + scratch_dummy, external_source_settings, num_diss_params, + t_params, ion_dt, ir, ivperp, ivpa, evolve_ppar) = begin + dt = t_params.dt[] + + vth = @view moments.electron.vth[:,ir] + me = composition.me_over_mi + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + qpar = @view moments.electron.qpar[:,ir] + ddens_dz = @view moments.electron.ddens_dz[:,ir] + dupar_dz = @view moments.electron.dupar_dz[:,ir] + dppar_dz = @view moments.electron.dppar_dz[:,ir] + dvth_dz = @view moments.electron.dvth_dz[:,ir] + dqpar_dz = @view moments.electron.dqpar_dz[:,ir] + + upar_ion = @view moments.ion.upar[:,ir,1] + + pdf_size = z.n * vperp.n * vpa.n + v_size = vperp.n * vpa.n + + # Initialise jacobian_matrix to the identity + for row ∈ 1:size(jacobian_matrix, 1) + jacobian_matrix[row,:] .= 0.0 + jacobian_matrix[row,row] += 1.0 + end + + add_electron_z_advection_to_z_only_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, dpdf_dz, me, z, vperp, vpa, z_spectral, + z_advect, z_speed, scratch_dummy, dt, ir, ivperp, ivpa) + add_contribution_from_electron_pdf_term_to_z_only_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, + dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z, + vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir, ivperp, ivpa) + add_electron_krook_collisions_to_z_only_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, upar_ion, collisions, z, vperp, vpa, + z_speed, dt, ir, ivperp, ivpa) + add_total_external_electron_source_to_z_only_Jacobian!( + jacobian_matrix, f, moments, me, z_speed, external_source_settings.electron, z, + vperp, vpa, dt, ir, ivperp, ivpa) + add_electron_implicit_constraint_forcing_to_z_only_Jacobian!( + jacobian_matrix, f, zeroth_moment, first_moment, second_moment, z_speed, z, vperp, + vpa, t_params.constraint_forcing_rate, dt, ir, ivperp, ivpa) + + return nothing +end + +""" + fill_electron_kinetic_equation_z_only_Jacobian_ppar!( + jacobian_matrix, ppar, moments, zeroth_moment, first_moment, second_moment, + third_moment, dthird_moment_dz, collisions, composition, z, vperp, vpa, + z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, + external_source_settings, num_diss_params, t_params, ion_dt, ir, evolve_ppar) + +Fill a pre-allocated matrix with the Jacobian matrix for a z-direction solve part of the +ADI method for electron kinetic equation and (if `evolve_ppar=true`) the electron energy +equation. +""" +@timeit global_timer fill_electron_kinetic_equation_z_only_Jacobian_ppar!( + jacobian_matrix, ppar, moments, zeroth_moment, first_moment, + second_moment, third_moment, dthird_moment_dz, collisions, + composition, z, vperp, vpa, z_spectral, vperp_spectral, + vpa_spectral, z_advect, vpa_advect, scratch_dummy, + external_source_settings, num_diss_params, t_params, ion_dt, ir, + evolve_ppar) = begin + dt = t_params.dt[] + + vth = @view moments.electron.vth[:,ir] + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + ddens_dz = @view moments.electron.ddens_dz[:,ir] + dupar_dz = @view moments.electron.dupar_dz[:,ir] + dppar_dz = @view moments.electron.dppar_dz[:,ir] + + pdf_size = z.n * vperp.n * vpa.n + + # Initialise jacobian_matrix to the identity + for row ∈ 1:size(jacobian_matrix, 1) + jacobian_matrix[row,:] .= 0.0 + jacobian_matrix[row,row] += 1.0 + end + + add_electron_energy_equation_to_z_only_Jacobian!( + jacobian_matrix, dens, upar, ppar, vth, third_moment, ddens_dz, dupar_dz, + dppar_dz, dthird_moment_dz, collisions, composition, z, vperp, vpa, z_spectral, + num_diss_params, dt, ir) + if ion_dt !== nothing + add_ion_dt_forcing_of_electron_ppar_to_z_only_Jacobian!( + jacobian_matrix, z, dt, ion_dt, ir) end return nothing @@ -3256,9 +3511,10 @@ end function add_electron_dissipation_term_to_Jacobian!(jacobian_matrix, f, num_diss_params, z, vperp, vpa, vpa_spectral, z_speed, - dt, ir; f_offset=0) + dt, ir, include=:all; f_offset=0) @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big") + @boundscheck include ∈ (:all, :explicit_z, :explicit_v) || error("Unexpected value for include=$include") vpa_dissipation_coefficient = num_diss_params.electron.vpa_dissipation_coefficient @@ -3278,9 +3534,44 @@ function add_electron_dissipation_term_to_Jacobian!(jacobian_matrix, f, num_diss # Rows corresponding to pdf_electron row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset + # Terms from add_dissipation_term!() + if include ∈ (:all, :explicit_v) + for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + jacobian_matrix[row,col] -= dt * vpa_dissipation_coefficient * vpa_dense_second_deriv_matrix[ivpa,icolvpa] + end + end + end + + return nothing +end + +function add_electron_dissipation_term_to_v_only_Jacobian!( + jacobian_matrix, f, num_diss_params, z, vperp, vpa, vpa_spectral, z_speed, dt, ir, + iz) + + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + @boundscheck size(jacobian_matrix, 1) == vperp.n * vpa.n + 1 || error("Jacobian matrix size is wrong") + + vpa_dissipation_coefficient = num_diss_params.electron.vpa_dissipation_coefficient + + if vpa_dissipation_coefficient ≤ 0.0 + return nothing + end + + vpa_dense_second_deriv_matrix = vpa_spectral.dense_second_deriv_matrix + + @loop_vperp_vpa ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) + continue + end + + # Rows corresponding to pdf_electron + row = (ivperp - 1) * vpa.n + ivpa + # Terms from add_dissipation_term!() for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n - col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + col = (icolvperp - 1) * vpa.n + icolvpa jacobian_matrix[row,col] -= dt * vpa_dissipation_coefficient * vpa_dense_second_deriv_matrix[ivpa,icolvpa] end end @@ -3531,7 +3822,8 @@ end function add_contribution_from_electron_pdf_term_to_Jacobian!( jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z, - vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir; f_offset=0, ppar_offset=0) + vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir, include=:all; f_offset=0, + ppar_offset=0) if f_offset == ppar_offset error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar " @@ -3540,6 +3832,7 @@ function add_contribution_from_electron_pdf_term_to_Jacobian!( @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big") @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n || error("ppar_offset=$ppar_offset is too big") + @boundscheck include ∈ (:all, :explicit_z, :explicit_v) || error("Unexpected value for include=$include") source_density_amplitude = moments.electron.external_source_density_amplitude source_momentum_amplitude = moments.electron.external_source_momentum_amplitude @@ -3591,15 +3884,19 @@ function add_contribution_from_electron_pdf_term_to_Jacobian!( # (3/4*sqrt(2/me)/p^(1/2)/n^(3/2)*dn/dz + 1/2/sqrt(2*n*me)/p^(3/2)*dp/dz)[irowz] * delta(irowz,icolz) # -1/sqrt(2*p*n*me)[irowz] * z_deriv_matrix[irowz,icolz] # - jacobian_matrix[row,row] += dt * (0.5 * dqpar_dz[iz] / ppar[iz] - + vpa.grid[ivpa] * vth[iz] * (ddens_dz[iz] / dens[iz] - - dvth_dz[iz] / vth[iz])) - for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n - col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset - jacobian_matrix[row,col] += - dt * f[ivpa,ivperp,iz] * - (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dppar_dz[iz] - 0.5*sqrt(2.0*ppar[iz]/me)/dens[iz]^1.5*ddens_dz[iz]) * - vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 + if include === :all + jacobian_matrix[row,row] += dt * (0.5 * dqpar_dz[iz] / ppar[iz] + + vpa.grid[ivpa] * vth[iz] * (ddens_dz[iz] / dens[iz] + - dvth_dz[iz] / vth[iz])) + end + if include ∈ (:all, :explicit_v) + for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + jacobian_matrix[row,col] += + dt * f[ivpa,ivperp,iz] * + (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dppar_dz[iz] - 0.5*sqrt(2.0*ppar[iz]/me)/dens[iz]^1.5*ddens_dz[iz]) * + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 + end end z_deriv_row_startind = z_deriv_matrix.rowptr[iz] z_deriv_row_endind = z_deriv_matrix.rowptr[iz+1] - 1 @@ -3611,6 +3908,61 @@ function add_contribution_from_electron_pdf_term_to_Jacobian!( dt * f[ivpa,ivperp,iz] * vth[iz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_entry end + if include === :all + for index ∈ eachindex(external_source_settings.electron) + electron_source = external_source_settings.electron[index] + if electron_source.active + # Source terms from `add_contribution_from_pdf_term!()` + jacobian_matrix[row,row] += dt * (1.5 * source_density_amplitude[iz,ir,index] / dens[iz] + - (0.5 * source_pressure_amplitude[iz,ir,index] + + source_momentum_amplitude[iz,ir,index]) / ppar[iz] + ) + end + end + end + if include ∈ (:all, :explicit_v) + jacobian_matrix[row,ppar_offset+iz] += + dt * f[ivpa,ivperp,iz] * + (-0.75*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*third_moment[iz]*dppar_dz[iz] + - 0.25*sqrt(2.0/ppar[iz]/me)/dens[iz]^1.5*third_moment[iz]*ddens_dz[iz] + + 0.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dthird_moment_dz[iz] + + vpa.grid[ivpa] * (0.75*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*ddens_dz[iz] + + 0.5/sqrt(2.0*dens[iz]*me)/ppar[iz]^1.5*dppar_dz[iz])) + end + for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros) + col = ppar_offset + icolz + jacobian_matrix[row,col] += dt * f[ivpa,ivperp,iz] * + (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*third_moment[iz] + - vpa.grid[ivpa]/sqrt(2.0*ppar[iz]*dens[iz]*me)) * z_deriv_entry + end + end + + return nothing +end + +function add_contribution_from_electron_pdf_term_to_z_only_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, + dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z, + vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir, ivperp, ivpa) + + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + @boundscheck size(jacobian_matrix, 1) == z.n || error("Jacobian matrix size is wrong") + + source_density_amplitude = moments.electron.external_source_density_amplitude + source_momentum_amplitude = moments.electron.external_source_momentum_amplitude + source_pressure_amplitude = moments.electron.external_source_pressure_amplitude + + @loop_z iz begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) + continue + end + + # Rows corresponding to pdf_electron + row = iz + + jacobian_matrix[row,row] += dt * (0.5 * dqpar_dz[iz] / ppar[iz] + + vpa.grid[ivpa] * vth[iz] * (ddens_dz[iz] / dens[iz] + - dvth_dz[iz] / vth[iz])) for index ∈ eachindex(external_source_settings.electron) electron_source = external_source_settings.electron[index] if electron_source.active @@ -3621,33 +3973,93 @@ function add_contribution_from_electron_pdf_term_to_Jacobian!( ) end end - jacobian_matrix[row,ppar_offset+iz] += - dt * f[ivpa,ivperp,iz] * - (-0.75*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*third_moment[iz]*dppar_dz[iz] - - 0.25*sqrt(2.0/ppar[iz]/me)/dens[iz]^1.5*third_moment[iz]*ddens_dz[iz] - + 0.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dthird_moment_dz[iz] - + vpa.grid[ivpa] * (0.75*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*ddens_dz[iz] - + 0.5/sqrt(2.0*dens[iz]*me)/ppar[iz]^1.5*dppar_dz[iz])) - for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros) - col = ppar_offset + icolz - jacobian_matrix[row,col] += dt * f[ivpa,ivperp,iz] * - (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*third_moment[iz] - - vpa.grid[ivpa]/sqrt(2.0*ppar[iz]*dens[iz]*me)) * z_deriv_entry + end + + return nothing +end + +function add_contribution_from_electron_pdf_term_to_v_only_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, + dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z, + vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir, iz) + + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + @boundscheck size(jacobian_matrix, 1) == vperp.n * vpa.n + 1 || error("Jacobian matrix size is wrong") + + source_density_amplitude = moments.electron.external_source_density_amplitude + source_momentum_amplitude = moments.electron.external_source_momentum_amplitude + source_pressure_amplitude = moments.electron.external_source_pressure_amplitude + z_deriv_matrix = z_spectral.D_matrix_csr + v_size = vperp.n * vpa.n + + @loop_vperp_vpa ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) + continue end + + # Rows corresponding to pdf_electron + row = (ivperp - 1) * vpa.n + ivpa + + jacobian_matrix[row,row] += dt * (0.5 * dqpar_dz / ppar + + vpa.grid[ivpa] * vth * (ddens_dz / dens + - dvth_dz / vth)) + for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (icolvperp - 1) * vpa.n + icolvpa + jacobian_matrix[row,col] += + dt * f[ivpa,ivperp] * + (1.5*sqrt(2.0/ppar/dens/me)*dppar_dz - 0.5*sqrt(2.0*ppar/me)/dens^1.5*ddens_dz) * + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 + end + for index ∈ eachindex(external_source_settings.electron) + electron_source = external_source_settings.electron[index] + if electron_source.active + # Source terms from `add_contribution_from_pdf_term!()` + jacobian_matrix[row,row] += dt * (1.5 * source_density_amplitude[iz,ir,index] / dens + - (0.5 * source_pressure_amplitude[iz,ir,index] + + source_momentum_amplitude[iz,ir,index]) / ppar + ) + end + end + jacobian_matrix[row,end] += + dt * f[ivpa,ivperp] * + (-0.75*sqrt(2.0/dens/me)/ppar^1.5*third_moment*dppar_dz + - 0.25*sqrt(2.0/ppar/me)/dens^1.5*third_moment*ddens_dz + + 0.5*sqrt(2.0/ppar/dens/me)*dthird_moment_dz + + vpa.grid[ivpa] * (0.75*sqrt(2.0/me/ppar)/dens^1.5*ddens_dz + + 0.5/sqrt(2.0*dens*me)/ppar^1.5*dppar_dz)) end return nothing end function add_ion_dt_forcing_of_electron_ppar_to_Jacobian!(jacobian_matrix, z, dt, ion_dt, - ir; ppar_offset=0) + ir, include=:all; ppar_offset=0) @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n || error("ppar_offset=$ppar_offset is too big") + @boundscheck include ∈ (:all, :explicit_z, :explicit_v) || error("Unexpected value for include=$include") + + if include === :all + begin_z_region() + @loop_z iz begin + # Rows corresponding to electron_ppar + row = ppar_offset + iz + + # Backward-Euler forcing term + jacobian_matrix[row,row] += dt / ion_dt + end + end + + return nothing +end + +function add_ion_dt_forcing_of_electron_ppar_to_z_only_Jacobian!(jacobian_matrix, z, dt, + ion_dt, ir) + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + @boundscheck size(jacobian_matrix, 1) == z.n || error("Jacobian matrix size is wrong") - begin_z_region() @loop_z iz begin # Rows corresponding to electron_ppar - row = ppar_offset + iz + row = iz # Backward-Euler forcing term jacobian_matrix[row,row] += dt / ion_dt @@ -3656,6 +4068,17 @@ function add_ion_dt_forcing_of_electron_ppar_to_Jacobian!(jacobian_matrix, z, dt return nothing end +function add_ion_dt_forcing_of_electron_ppar_to_v_only_Jacobian!(jacobian_matrix, z, dt, + ion_dt, ir, iz) + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + #@boundscheck size(jacobian_matrix, 1) == vperp.n * vpa.n + 1 || error("Jacobian matrix size is wrong") + + # Backward-Euler forcing term + jacobian_matrix[end,end] += dt / ion_dt + + return nothing +end + # function check_electron_pdf_convergence!(electron_pdf_converged, pdf_new, pdf) # # check to see if the electron pdf has converged to within the specified tolerance # # NB: the convergence criterion is based on the average relative difference between the diff --git a/moment_kinetics/src/electron_vpa_advection.jl b/moment_kinetics/src/electron_vpa_advection.jl index 8e0c92ba4..a9e0fd383 100644 --- a/moment_kinetics/src/electron_vpa_advection.jl +++ b/moment_kinetics/src/electron_vpa_advection.jl @@ -95,12 +95,12 @@ function update_electron_speed_vpa!(advect, density, upar, ppar, moments, vpa, end function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, ppar, - vth, third_moment, ddens_dz, dppar_dz, - dthird_moment_dz, moments, me, z, vperp, - vpa, z_spectral, vpa_spectral, + vth, third_moment, dpdf_dvpa, ddens_dz, + dppar_dz, dthird_moment_dz, moments, me, + z, vperp, vpa, z_spectral, vpa_spectral, vpa_advect, z_speed, scratch_dummy, - external_source_settings, dt, ir; - f_offset=0, ppar_offset=0) + external_source_settings, dt, ir, + include=:all; f_offset=0, ppar_offset=0) if f_offset == ppar_offset error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar " * "cannot be in same place in state vector.") @@ -108,25 +108,13 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big") @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n || error("ppar_offset=$ppar_offset is too big") + @boundscheck include ∈ (:all, :explicit_z, :explicit_v) || error("Unexpected value for include=$include") v_size = vperp.n * vpa.n source_density_amplitude = @view moments.electron.external_source_density_amplitude[:,ir,:] source_momentum_amplitude = @view moments.electron.external_source_momentum_amplitude[:,ir,:] source_pressure_amplitude = @view moments.electron.external_source_pressure_amplitude[:,ir,:] - dpdf_dvpa = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir] - begin_z_vperp_region() - update_electron_speed_vpa!(vpa_advect[1], dens, upar, ppar, moments, vpa.grid, - external_source_settings.electron, ir) - @loop_z_vperp iz ivperp begin - @views @. vpa_advect[1].adv_fac[:,ivperp,iz,ir] = -vpa_advect[1].speed[:,ivperp,iz,ir] - end - #calculate the upwind derivative of the electron pdf w.r.t. wpa - @loop_z_vperp iz ivperp begin - @views derivative!(dpdf_dvpa[:,ivperp,iz], f[:,ivperp,iz], vpa, - vpa_advect[1].adv_fac[:,ivperp,iz,ir], vpa_spectral) - end - if !isa(vpa_spectral, gausslegendre_info) error("Only gausslegendre_pseudospectral vpa-coordinate type is supported by " * "add_electron_vpa_advection_to_Jacobian!() preconditioner because we " @@ -158,32 +146,34 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, # + source_density_amplitude*u/n/vth # - w_∥*1/2*(source_pressure_amplitude + 2*u*source_momentum_amplitude)/p # + w_∥*1/2*source_density_amplitude/n) * dg/dw_∥ - if ielement_vpa == 1 && igrid_vpa == 1 - jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= - dt * vpa_speed * vpa_Dmat[1,:] ./ vpa_element_scale[ielement_vpa] - elseif ielement_vpa == vpa.nelement_local && igrid_vpa == vpa.ngrid - jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= - dt * vpa_speed * vpa_Dmat[end,:] ./ vpa_element_scale[ielement_vpa] - elseif igrid_vpa == vpa.ngrid - # Note igrid_vpa is only ever 1 when ielement_vpa==1, because - # of the way element boundaries are counted. - icolumn_min_vpa_next = vpa.imin[ielement_vpa+1] - 1 - icolumn_max_vpa_next = vpa.imax[ielement_vpa+1] - if vpa_speed < 0.0 - jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa_next:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa_next] .+= - dt * vpa_speed * vpa_Dmat[1,:] ./ vpa_element_scale[ielement_vpa+1] - elseif vpa_speed > 0.0 + if include ∈ (:all, :explicit_v) + if ielement_vpa == 1 && igrid_vpa == 1 + jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= + dt * vpa_speed * vpa_Dmat[1,:] ./ vpa_element_scale[ielement_vpa] + elseif ielement_vpa == vpa.nelement_local && igrid_vpa == vpa.ngrid jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= dt * vpa_speed * vpa_Dmat[end,:] ./ vpa_element_scale[ielement_vpa] + elseif igrid_vpa == vpa.ngrid + # Note igrid_vpa is only ever 1 when ielement_vpa==1, because + # of the way element boundaries are counted. + icolumn_min_vpa_next = vpa.imin[ielement_vpa+1] - 1 + icolumn_max_vpa_next = vpa.imax[ielement_vpa+1] + if vpa_speed < 0.0 + jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa_next:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa_next] .+= + dt * vpa_speed * vpa_Dmat[1,:] ./ vpa_element_scale[ielement_vpa+1] + elseif vpa_speed > 0.0 + jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= + dt * vpa_speed * vpa_Dmat[end,:] ./ vpa_element_scale[ielement_vpa] + else + jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= + dt * vpa_speed * 0.5 * vpa_Dmat[end,:] ./ vpa_element_scale[ielement_vpa] + jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa_next:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa_next] .+= + dt * vpa_speed * 0.5 * vpa_Dmat[1,:] ./ vpa_element_scale[ielement_vpa+1] + end else jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= - dt * vpa_speed * 0.5 * vpa_Dmat[end,:] ./ vpa_element_scale[ielement_vpa] - jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa_next:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa_next] .+= - dt * vpa_speed * 0.5 * vpa_Dmat[1,:] ./ vpa_element_scale[ielement_vpa+1] + dt * vpa_speed * vpa_Dmat[igrid_vpa,:] ./ vpa_element_scale[ielement_vpa] end - else - jacobian_matrix[row,(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_min_vpa:(iz-1)*v_size+(ivperp-1)*vpa.n+icolumn_max_vpa] .+= - dt * vpa_speed * vpa_Dmat[igrid_vpa,:] ./ vpa_element_scale[ielement_vpa] end # q = 2*p*vth*∫dw_∥ w_∥^3 g # = 2*p^(3/2)*sqrt(2/n/me)*∫dw_∥ w_∥^3 g @@ -202,12 +192,14 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, # d(w_∥*0.5/p*dq/dz[irowz])/d(p[icolz]) = # (-w_∥*3/4*sqrt(2/n/me)/p^(3/2)*∫dw_∥ w_∥^3 g * dp/dz - w_∥*1/4*sqrt(2/me)/sqrt(p)/n^(3/2)*∫dw_∥ w_∥^3 g * dn/dz + w_∥*1/2*sqrt(2/n/me)/sqrt(p)*∫dw_∥ w_∥^3 dg/dz)[irowz] * delta(irowz,icolz) # + w_∥*(1.5*sqrt(2/p/n/me)*∫dw_∥ w_∥^3 g)[irowz] * z_deriv_matrix[irowz,icolz] - for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n - col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset - jacobian_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] * - vpa.grid[ivpa] * (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dppar_dz[iz] - - 0.5*sqrt(2.0*ppar[iz]/me)/dens[iz]^1.5*ddens_dz[iz]) * - vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 + if include ∈ (:all, :explicit_v) + for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + jacobian_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] * + vpa.grid[ivpa] * (1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*dppar_dz[iz] + - 0.5*sqrt(2.0*ppar[iz]/me)/dens[iz]^1.5*ddens_dz[iz]) * + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 + end end z_deriv_row_startind = z_deriv_matrix.rowptr[iz] z_deriv_row_endind = z_deriv_matrix.rowptr[iz+1] - 1 @@ -218,10 +210,12 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, jacobian_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] * vpa.grid[ivpa] * vth[iz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_entry end - jacobian_matrix[row,ppar_offset+iz] += dt * dpdf_dvpa[ivpa,ivperp,iz] * vpa.grid[ivpa] * - (-0.75*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*third_moment[iz]*dppar_dz[iz] - - 0.25*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*third_moment[iz]*ddens_dz[iz] - + 0.5*sqrt(2.0/dens[iz]/me/ppar[iz])*dthird_moment_dz[iz]) + if include ∈ (:all, :explicit_v) + jacobian_matrix[row,ppar_offset+iz] += dt * dpdf_dvpa[ivpa,ivperp,iz] * vpa.grid[ivpa] * + (-0.75*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*third_moment[iz]*dppar_dz[iz] + - 0.25*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*third_moment[iz]*ddens_dz[iz] + + 0.5*sqrt(2.0/dens[iz]/me/ppar[iz])*dthird_moment_dz[iz]) + end for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros) col = ppar_offset + icolz jacobian_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] * vpa.grid[ivpa] * 1.5*sqrt(2.0/ppar[iz]/dens[iz]/me)*third_moment[iz] * z_deriv_entry @@ -251,18 +245,20 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, # - 1/2*source_density_amplitude*u/sqrt(2*n)/p^(3/2) # + w_∥*1/2*(source_pressure_amplitude + 2*u*source_momentum_amplitude)/p^2)[irowz] * delta(irowz,icolz) # + (1/2*sqrt(2/p/n/me) - w_∥^2/sqrt(2*p*n*me))[irowz] * z_deriv_matrix[irowz,icolz] - jacobian_matrix[row,ppar_offset+iz] += dt * ( - -0.25*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*dppar_dz[iz] - - vpa.grid[ivpa]^2*(-0.5/sqrt(2.0*dens[iz]*me)/ppar[iz]^1.5*dppar_dz[iz] - 0.25*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*ddens_dz[iz]) - ) * dpdf_dvpa[ivpa,ivperp,iz] - for index ∈ eachindex(external_source_settings.electron) - electron_source = external_source_settings.electron[index] - if electron_source.active - jacobian_matrix[row,ppar_offset+iz] += dt * ( - -0.5*source_density_amplitude[iz,index]*upar[iz]/sqrt(2.0*dens[iz])/ppar[iz]^1.5 - + vpa.grid[ivpa]*0.5*(source_pressure_amplitude[iz,index] - + 2.0*upar[iz]*source_momentum_amplitude[iz,index])/ppar[iz]^2 - ) * dpdf_dvpa[ivpa,ivperp,iz] + if include ∈ (:all, :explicit_v) + jacobian_matrix[row,ppar_offset+iz] += dt * ( + -0.25*sqrt(2.0/dens[iz]/me)/ppar[iz]^1.5*dppar_dz[iz] + - vpa.grid[ivpa]^2*(-0.5/sqrt(2.0*dens[iz]*me)/ppar[iz]^1.5*dppar_dz[iz] - 0.25*sqrt(2.0/me/ppar[iz])/dens[iz]^1.5*ddens_dz[iz]) + ) * dpdf_dvpa[ivpa,ivperp,iz] + for index ∈ eachindex(external_source_settings.electron) + electron_source = external_source_settings.electron[index] + if electron_source.active + jacobian_matrix[row,ppar_offset+iz] += dt * ( + -0.5*source_density_amplitude[iz,index]*upar[iz]/sqrt(2.0*dens[iz])/ppar[iz]^1.5 + + vpa.grid[ivpa]*0.5*(source_pressure_amplitude[iz,index] + + 2.0*upar[iz]*source_momentum_amplitude[iz,index])/ppar[iz]^2 + ) * dpdf_dvpa[ivpa,ivperp,iz] + end end end for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros) @@ -277,4 +273,97 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, return nothing end +function add_electron_vpa_advection_to_v_only_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, dpdf_dvpa, ddens_dz, + dppar_dz, dthird_moment_dz, moments, me, z, vperp, vpa, z_spectral, vpa_spectral, + vpa_advect, z_speed, scratch_dummy, external_source_settings, dt, ir, iz) + + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + @boundscheck size(jacobian_matrix, 1) == vperp.n * vpa.n + 1 || error("Jacobian matrix size is wrong") + + source_density_amplitude = @view moments.electron.external_source_density_amplitude[iz,ir,:] + source_momentum_amplitude = @view moments.electron.external_source_momentum_amplitude[iz,ir,:] + source_pressure_amplitude = @view moments.electron.external_source_pressure_amplitude[iz,ir,:] + + if !isa(vpa_spectral, gausslegendre_info) + error("Only gausslegendre_pseudospectral vpa-coordinate type is supported by " + * "add_electron_vpa_advection_to_Jacobian!() preconditioner because we " + * "need differentiation matrices.") + end + + vpa_Dmat = vpa_spectral.lobatto.Dmat + vpa_element_scale = vpa.element_scale + + @loop_vperp_vpa ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) + continue + end + + # Rows corresponding to pdf_electron + row = (ivperp - 1) * vpa.n + ivpa + + ielement_vpa = vpa.ielement[ivpa] + igrid_vpa = vpa.igrid[ivpa] + icolumn_min_vpa = vpa.imin[ielement_vpa] - (ielement_vpa != 1) + icolumn_max_vpa = vpa.imax[ielement_vpa] + + vpa_speed = vpa_advect[1].speed[ivpa,ivperp,iz,ir] + + if ielement_vpa == 1 && igrid_vpa == 1 + jacobian_matrix[row,(ivperp-1)*vpa.n+icolumn_min_vpa:(ivperp-1)*vpa.n+icolumn_max_vpa] .+= + dt * vpa_speed * vpa_Dmat[1,:] ./ vpa_element_scale[ielement_vpa] + elseif ielement_vpa == vpa.nelement_local && igrid_vpa == vpa.ngrid + jacobian_matrix[row,(ivperp-1)*vpa.n+icolumn_min_vpa:(ivperp-1)*vpa.n+icolumn_max_vpa] .+= + dt * vpa_speed * vpa_Dmat[end,:] ./ vpa_element_scale[ielement_vpa] + elseif igrid_vpa == vpa.ngrid + # Note igrid_vpa is only ever 1 when ielement_vpa==1, because + # of the way element boundaries are counted. + icolumn_min_vpa_next = vpa.imin[ielement_vpa+1] - 1 + icolumn_max_vpa_next = vpa.imax[ielement_vpa+1] + if vpa_speed < 0.0 + jacobian_matrix[row,(ivperp-1)*vpa.n+icolumn_min_vpa_next:(ivperp-1)*vpa.n+icolumn_max_vpa_next] .+= + dt * vpa_speed * vpa_Dmat[1,:] ./ vpa_element_scale[ielement_vpa+1] + elseif vpa_speed > 0.0 + jacobian_matrix[row,(ivperp-1)*vpa.n+icolumn_min_vpa:(ivperp-1)*vpa.n+icolumn_max_vpa] .+= + dt * vpa_speed * vpa_Dmat[end,:] ./ vpa_element_scale[ielement_vpa] + else + jacobian_matrix[row,(ivperp-1)*vpa.n+icolumn_min_vpa:(ivperp-1)*vpa.n+icolumn_max_vpa] .+= + dt * vpa_speed * 0.5 * vpa_Dmat[end,:] ./ vpa_element_scale[ielement_vpa] + jacobian_matrix[row,(ivperp-1)*vpa.n+icolumn_min_vpa_next:(ivperp-1)*vpa.n+icolumn_max_vpa_next] .+= + dt * vpa_speed * 0.5 * vpa_Dmat[1,:] ./ vpa_element_scale[ielement_vpa+1] + end + else + jacobian_matrix[row,(ivperp-1)*vpa.n+icolumn_min_vpa:(ivperp-1)*vpa.n+icolumn_max_vpa] .+= + dt * vpa_speed * vpa_Dmat[igrid_vpa,:] ./ vpa_element_scale[ielement_vpa] + end + for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (icolvperp - 1) * vpa.n + icolvpa + jacobian_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp] * + vpa.grid[ivpa] * (1.5*sqrt(2.0/ppar/dens/me)*dppar_dz + - 0.5*sqrt(2.0*ppar/me)/dens^1.5*ddens_dz) * + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 + end + jacobian_matrix[row,end] += dt * dpdf_dvpa[ivpa,ivperp] * vpa.grid[ivpa] * + (-0.75*sqrt(2.0/dens/me)/ppar^1.5*third_moment*dppar_dz + - 0.25*sqrt(2.0/me/ppar)/dens^1.5*third_moment*ddens_dz + + 0.5*sqrt(2.0/dens/me/ppar)*dthird_moment_dz) + jacobian_matrix[row,end] += dt * ( + -0.25*sqrt(2.0/dens/me)/ppar^1.5*dppar_dz + - vpa.grid[ivpa]^2*(-0.5/sqrt(2.0*dens*me)/ppar^1.5*dppar_dz - 0.25*sqrt(2.0/me/ppar)/dens^1.5*ddens_dz) + ) * dpdf_dvpa[ivpa,ivperp] + for index ∈ eachindex(external_source_settings.electron) + electron_source = external_source_settings.electron[index] + if electron_source.active + jacobian_matrix[row,end] += dt * ( + -0.5*source_density_amplitude[index]*upar/sqrt(2.0*dens)/ppar^1.5 + + vpa.grid[ivpa]*0.5*(source_pressure_amplitude[index] + + 2.0*upar*source_momentum_amplitude[index])/ppar^2 + ) * dpdf_dvpa[ivpa,ivperp] + end + end + end + + return nothing +end + end diff --git a/moment_kinetics/src/electron_z_advection.jl b/moment_kinetics/src/electron_z_advection.jl index 7685971a5..8c78e58ab 100644 --- a/moment_kinetics/src/electron_z_advection.jl +++ b/moment_kinetics/src/electron_z_advection.jl @@ -75,9 +75,9 @@ function update_electron_speed_z!(advect, upar, vth, vpa) end function add_electron_z_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, ppar, vth, - me, z, vperp, vpa, z_spectral, z_advect, - scratch_dummy, dt, ir; f_offset=0, - ppar_offset=0) + dpdf_dz, me, z, vperp, vpa, z_spectral, + z_advect, z_speed, scratch_dummy, dt, ir, + include=:all; f_offset=0, ppar_offset=0) if f_offset == ppar_offset error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar " * "cannot be in same place in state vector.") @@ -85,27 +85,82 @@ function add_electron_z_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, p @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big") @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n || error("ppar_offset=$ppar_offset is too big") + @boundscheck include ∈ (:all, :explicit_z, :explicit_v) || error("Unexpected value for include=$include") v_size = vperp.n * vpa.n - dpdf_dz = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir] + if !isa(z_spectral, gausslegendre_info) + error("Only gausslegendre_pseudospectral z-coordinate type is supported by " + * "add_electron_z_advection_to_Jacobian!() preconditioner because we need " + * "differentiation matrices.") + end + z_Dmat = z_spectral.lobatto.Dmat + z_element_scale = z.element_scale - begin_vperp_vpa_region() - update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) - z_speed_array = @view z_advect[1].speed[:,:,:,1] + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) + continue + end - @loop_vperp_vpa ivperp ivpa begin - @views z_advect[1].adv_fac[:,ivpa,ivperp,ir] = -z_speed_array[:,ivpa,ivperp] + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset + v_remainder = (ivperp - 1) * vpa.n + ivpa + f_offset + + ielement_z = z.ielement[iz] + igrid_z = z.igrid[iz] + icolumn_min_z = z.imin[ielement_z] - (ielement_z != 1) + icolumn_max_z = z.imax[ielement_z] + + this_z_speed = z_speed[iz,ivpa,ivperp] + + # Contributions from (w_∥*vth + upar)*dg/dz + if include ∈ (:all, :explicit_z) + if ielement_z == 1 && igrid_z == 1 + jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= + dt * this_z_speed * z_Dmat[1,:] ./ z_element_scale[ielement_z] + elseif ielement_z == z.nelement_local && igrid_z == z.ngrid + jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= + dt * this_z_speed * z_Dmat[end,:] ./ z_element_scale[ielement_z] + elseif igrid_z == z.ngrid + # Note igrid_z is only ever 1 when ielement_z==1, because + # of the way element boundaries are counted. + icolumn_min_z_next = z.imin[ielement_z+1] - 1 + icolumn_max_z_next = z.imax[ielement_z+1] + if this_z_speed < 0.0 + jacobian_matrix[row,(icolumn_min_z_next-1)*v_size+v_remainder:v_size:(icolumn_max_z_next-1)*v_size+v_remainder] .+= + dt * this_z_speed * z_Dmat[1,:] ./ z_element_scale[ielement_z+1] + elseif this_z_speed > 0.0 + jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= + dt * this_z_speed * z_Dmat[end,:] ./ z_element_scale[ielement_z] + else + jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= + dt * this_z_speed * 0.5 * z_Dmat[end,:] ./ z_element_scale[ielement_z] + jacobian_matrix[row,(icolumn_min_z_next-1)*v_size+v_remainder:v_size:(icolumn_max_z_next-1)*v_size+v_remainder] .+= + dt * this_z_speed * 0.5 * z_Dmat[1,:] ./ z_element_scale[ielement_z+1] + end + else + jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= + dt * this_z_speed * z_Dmat[igrid_z,:] ./ z_element_scale[ielement_z] + end + end + # vth = sqrt(2*p/n/me) + # so d(vth)/d(ppar) = 1/n/me/sqrt(2*p/n/me) = 1/n/me/vth + # and d(w_∥*vth*dg/dz)/d(ppar) = 1/n/me/vth*w_∥*dg/dz + if include ∈ (:all, :explicit_v) + jacobian_matrix[row,ppar_offset+iz] += dt / dens[iz] / me / vth[iz] * vpa.grid[ivpa] * dpdf_dz[ivpa,ivperp,iz] + end end - #calculate the upwind derivative - @views derivative_z_pdf_vpavperpz!(dpdf_dz, f, z_advect[1].adv_fac[:,:,:,ir], - scratch_dummy.buffer_vpavperpr_1[:,:,ir], - scratch_dummy.buffer_vpavperpr_2[:,:,ir], - scratch_dummy.buffer_vpavperpr_3[:,:,ir], - scratch_dummy.buffer_vpavperpr_4[:,:,ir], - scratch_dummy.buffer_vpavperpr_5[:,:,ir], - scratch_dummy.buffer_vpavperpr_6[:,:,ir], - z_spectral, z) + + return nothing +end + +function add_electron_z_advection_to_z_only_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, dpdf_dz, me, z, vperp, vpa, z_spectral, + z_advect, z_speed, scratch_dummy, dt, ir, ivperp, ivpa) + + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + @boundscheck size(jacobian_matrix, 1) == z.n || error("Jacobian matrix size is wrong") if !isa(z_spectral, gausslegendre_info) error("Only gausslegendre_pseudospectral z-coordinate type is supported by " @@ -115,56 +170,72 @@ function add_electron_z_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, p z_Dmat = z_spectral.lobatto.Dmat z_element_scale = z.element_scale - begin_z_vperp_vpa_region() - @loop_z_vperp_vpa iz ivperp ivpa begin + @loop_z iz begin if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, - z_speed_array) + z_speed) continue end # Rows corresponding to pdf_electron - row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset - v_remainder = (ivperp - 1) * vpa.n + ivpa + f_offset + row = iz ielement_z = z.ielement[iz] igrid_z = z.igrid[iz] icolumn_min_z = z.imin[ielement_z] - (ielement_z != 1) icolumn_max_z = z.imax[ielement_z] - z_speed = z_speed_array[iz,ivpa,ivperp] + this_z_speed = z_speed[iz,ivpa,ivperp] # Contributions from (w_∥*vth + upar)*dg/dz if ielement_z == 1 && igrid_z == 1 - jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= - dt * z_speed * z_Dmat[1,:] ./ z_element_scale[ielement_z] + jacobian_matrix[row,icolumn_min_z:icolumn_max_z] .+= + dt * this_z_speed * z_Dmat[1,:] ./ z_element_scale[ielement_z] elseif ielement_z == z.nelement_local && igrid_z == z.ngrid - jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= - dt * z_speed * z_Dmat[end,:] ./ z_element_scale[ielement_z] + jacobian_matrix[row,icolumn_min_z:icolumn_max_z] .+= + dt * this_z_speed * z_Dmat[end,:] ./ z_element_scale[ielement_z] elseif igrid_z == z.ngrid # Note igrid_z is only ever 1 when ielement_z==1, because # of the way element boundaries are counted. icolumn_min_z_next = z.imin[ielement_z+1] - 1 icolumn_max_z_next = z.imax[ielement_z+1] - if z_speed < 0.0 - jacobian_matrix[row,(icolumn_min_z_next-1)*v_size+v_remainder:v_size:(icolumn_max_z_next-1)*v_size+v_remainder] .+= - dt * z_speed * z_Dmat[1,:] ./ z_element_scale[ielement_z+1] - elseif z_speed > 0.0 - jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= - dt * z_speed * z_Dmat[end,:] ./ z_element_scale[ielement_z] + if this_z_speed < 0.0 + jacobian_matrix[row,icolumn_min_z_next:icolumn_max_z_next] .+= + dt * this_z_speed * z_Dmat[1,:] ./ z_element_scale[ielement_z+1] + elseif this_z_speed > 0.0 + jacobian_matrix[row,icolumn_min_z:icolumn_max_z] .+= + dt * this_z_speed * z_Dmat[end,:] ./ z_element_scale[ielement_z] else - jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= - dt * z_speed * 0.5 * z_Dmat[end,:] ./ z_element_scale[ielement_z] - jacobian_matrix[row,(icolumn_min_z_next-1)*v_size+v_remainder:v_size:(icolumn_max_z_next-1)*v_size+v_remainder] .+= - dt * z_speed * 0.5 * z_Dmat[1,:] ./ z_element_scale[ielement_z+1] + jacobian_matrix[row,icolumn_min_z:icolumn_max_z] .+= + dt * this_z_speed * 0.5 * z_Dmat[end,:] ./ z_element_scale[ielement_z] + jacobian_matrix[row,icolumn_min_z_next:icolumn_max_z_next] .+= + dt * this_z_speed * 0.5 * z_Dmat[1,:] ./ z_element_scale[ielement_z+1] end else - jacobian_matrix[row,(icolumn_min_z-1)*v_size+v_remainder:v_size:(icolumn_max_z-1)*v_size+v_remainder] .+= - dt * z_speed * z_Dmat[igrid_z,:] ./ z_element_scale[ielement_z] + jacobian_matrix[row,icolumn_min_z:icolumn_max_z] .+= + dt * this_z_speed * z_Dmat[igrid_z,:] ./ z_element_scale[ielement_z] end - # vth = sqrt(2*p/n/me) - # so d(vth)/d(ppar) = 1/n/me/sqrt(2*p/n/me) = 1/n/me/vth - # and d(w_∥*vth*dg/dz)/d(ppar) = 1/n/me/vth*w_∥*dg/dz - jacobian_matrix[row,ppar_offset+iz] += dt / dens[iz] / me / vth[iz] * vpa.grid[ivpa] * dpdf_dz[ivpa,ivperp,iz] + end + + return nothing +end + +function add_electron_z_advection_to_v_only_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, dpdf_dz, me, z, vperp, vpa, z_spectral, + z_advect, z_speed, scratch_dummy, dt, ir, iz) + + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + @boundscheck size(jacobian_matrix, 1) == vperp.n * vpa.n + 1 || error("Jacobian matrix size is wrong") + + @loop_vperp_vpa ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, + z_speed) + continue + end + + # Rows corresponding to pdf_electron + row = (ivperp - 1) * vpa.n + ivpa + + jacobian_matrix[row,end] += dt / dens / me / vth * vpa.grid[ivpa] * dpdf_dz[ivpa,ivperp] end return nothing diff --git a/moment_kinetics/src/external_sources.jl b/moment_kinetics/src/external_sources.jl index 0166b307f..87344cdea 100644 --- a/moment_kinetics/src/external_sources.jl +++ b/moment_kinetics/src/external_sources.jl @@ -1012,12 +1012,12 @@ Note that this function operates on a single point in `r`, given by `ir`, and `p end function add_total_external_electron_source_to_Jacobian!( - jacobian_matrix, f, moments, me, z_speed, electron_sources, z, vperp, vpa, dt, ir; - f_offset=0, ppar_offset=0) + jacobian_matrix, f, moments, me, z_speed, electron_sources, z, vperp, vpa, dt, ir, + include=:all; f_offset=0, ppar_offset=0) for index ∈ eachindex(electron_sources) add_external_electron_source_to_Jacobian!(jacobian_matrix, f, moments, me, z_speed, electron_sources[index], index, - z, vperp, vpa, dt, ir; + z, vperp, vpa, dt, ir, include; f_offset=f_offset, ppar_offset=ppar_offset) end @@ -1025,8 +1025,8 @@ end function add_external_electron_source_to_Jacobian!(jacobian_matrix, f, moments, me, z_speed, electron_source, index, z, - vperp, vpa, dt, ir; f_offset=0, - ppar_offset=0) + vperp, vpa, dt, ir, include=:all; + f_offset=0, ppar_offset=0) if f_offset == ppar_offset error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar " * "cannot be in same place in state vector.") @@ -1034,6 +1034,7 @@ function add_external_electron_source_to_Jacobian!(jacobian_matrix, f, moments, @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big") @boundscheck size(jacobian_matrix, 1) ≥ ppar_offset + z.n || error("ppar_offset=$ppar_offset is too big") + @boundscheck include ∈ (:all, :explicit_z, :explicit_v) || error("Unexpected value for include=$include") if !electron_source.active return nothing @@ -1055,7 +1056,7 @@ function add_external_electron_source_to_Jacobian!(jacobian_matrix, f, moments, v_size = vperp.n * vpa.n begin_z_vperp_vpa_region() - if electron_source.source_type == "energy" + if electron_source.source_type == "energy" && include === :all @loop_z_vperp_vpa iz ivperp ivpa begin if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) @@ -1069,26 +1070,136 @@ function add_external_electron_source_to_Jacobian!(jacobian_matrix, f, moments, jacobian_matrix[row,row] += dt * source_amplitude[iz] end end - @loop_z_vperp_vpa iz ivperp ivpa begin + if include ∈ (:all, :explicit_v) + @loop_z_vperp_vpa iz ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) + continue + end + + # Rows corresponding to pdf_electron + row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset + + # Contributions from + # -vth/n*vth_factor*source_amplitude*exp(-((w_⟂*vth)^2+(w_∥*vth+u)^2)*me/source_T) + # Using + # d(vth[irowz])/d(ppar[icolz]) = 1/2*vth/ppar * delta(irowz,icolz) + # + # d(exp(-((w_⟂*vth)^2+(w_∥*vth+u)^2)*me/source_T)[irowz])/d(ppar[icolz]) + # = -2*(w_⟂^2+(w_∥*vth+u)*w_∥)*me/source_T * 1/2*vth/ppar * exp(-((w_⟂*vth)^2+(w_∥*vth+u)^2)*me/source_T) * delta(irowz,icolz) + # = -(w_⟂^2+(w_∥*vth+u)*w_∥)*me/source_T * vth/ppar * exp(-((w_⟂*vth)^2+(w_∥*vth+u)^2)*me/source_T) * delta(irowz,icolz) + jacobian_matrix[row,ppar_offset+iz] += + -dt * vth[iz] / dens[iz] * vth_factor * source_amplitude[iz] * + (0.5/ppar[iz] - (vperp_grid[ivperp]^2 + (vpa_grid[ivpa]*vth[iz] + upar[iz])*vpa_grid[ivpa])*me/source_T*vth[iz]/ppar[iz]) * + exp(-((vperp_grid[ivperp]*vth[iz])^2 + (vpa_grid[ivpa]*vth[iz] + upar[iz])^2) * me / source_T) + end + end + + return nothing +end + +function add_total_external_electron_source_to_z_only_Jacobian!( + jacobian_matrix, f, moments, me, z_speed, electron_sources, z, vperp, vpa, dt, ir, + ivperp, ivpa) + for index ∈ eachindex(electron_sources) + add_external_electron_source_to_z_only_Jacobian!( + jacobian_matrix, f, moments, me, z_speed, electron_sources[index], index, z, + vperp, vpa, dt, ir, ivperp, ivpa) + end +end + +function add_external_electron_source_to_z_only_Jacobian!( + jacobian_matrix, f, moments, me, z_speed, electron_source, index, z, vperp, vpa, + dt, ir, ivperp, ivpa) + + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + @boundscheck size(jacobian_matrix, 1) == z.n || error("Jacobian matrix size is wrong") + + if !electron_source.active + return nothing + end + + if electron_source.source_type == "energy" + source_amplitude = @view moments.electron.external_source_amplitude[:,ir,index] + + @loop_z iz begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, + z_speed) + continue + end + + # Rows corresponding to pdf_electron + row = iz + + # Contribution from `external_electron_source!()` + jacobian_matrix[row,row] += dt * source_amplitude[iz] + end + end + + return nothing +end + +function add_total_external_electron_source_to_v_only_Jacobian!( + jacobian_matrix, f, moments, me, z_speed, electron_sources, z, vperp, vpa, dt, ir, + iz) + for index ∈ eachindex(electron_sources) + add_external_electron_source_to_v_only_Jacobian!( + jacobian_matrix, f, moments, me, z_speed, electron_sources[index], index, z, + vperp, vpa, dt, ir, iz) + end +end + +function add_external_electron_source_to_v_only_Jacobian!( + jacobian_matrix, f, moments, me, z_speed, electron_source, index, z, vperp, vpa, + dt, ir, iz) + + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + @boundscheck size(jacobian_matrix, 1) == vperp.n * vpa.n + 1 || error("Jacobian matrix size is wrong") + + if !electron_source.active + return nothing + end + + source_amplitude = moments.electron.external_source_amplitude[iz,ir,index] + source_T = electron_source.source_T + dens = moments.electron.dens[iz,ir] + upar = moments.electron.upar[iz,ir] + ppar = moments.electron.ppar[iz,ir] + vth = moments.electron.vth[iz,ir] + if vperp.n == 1 + vth_factor = 1.0 / sqrt(source_T / me) + else + vth_factor = 1.0 / sqrt(source_T / me)^1.5 + end + vperp_grid = vperp.grid + vpa_grid = vpa.grid + v_size = vperp.n * vpa.n + + if electron_source.source_type == "energy" + @loop_vperp_vpa ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, + z_speed) + continue + end + + # Rows corresponding to pdf_electron + row = (ivperp - 1) * vpa.n + ivpa + + # Contribution from `external_electron_source!()` + jacobian_matrix[row,row] += dt * source_amplitude + end + end + @loop_vperp_vpa ivperp ivpa begin if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) continue end # Rows corresponding to pdf_electron - row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset - - # Contributions from - # -vth/n*vth_factor*source_amplitude*exp(-((w_⟂*vth)^2+(w_∥*vth+u)^2)*me/source_T) - # Using - # d(vth[irowz])/d(ppar[icolz]) = 1/2*vth/ppar * delta(irowz,icolz) - # - # d(exp(-((w_⟂*vth)^2+(w_∥*vth+u)^2)*me/source_T)[irowz])/d(ppar[icolz]) - # = -2*(w_⟂^2+(w_∥*vth+u)*w_∥)*me/source_T * 1/2*vth/ppar * exp(-((w_⟂*vth)^2+(w_∥*vth+u)^2)*me/source_T) * delta(irowz,icolz) - # = -(w_⟂^2+(w_∥*vth+u)*w_∥)*me/source_T * vth/ppar * exp(-((w_⟂*vth)^2+(w_∥*vth+u)^2)*me/source_T) * delta(irowz,icolz) - jacobian_matrix[row,ppar_offset+iz] += - -dt * vth[iz] / dens[iz] * vth_factor * source_amplitude[iz] * - (0.5/ppar[iz] - (vperp_grid[ivperp]^2 + (vpa_grid[ivpa]*vth[iz] + upar[iz])*vpa_grid[ivpa])*me/source_T*vth[iz]/ppar[iz]) * - exp(-((vperp_grid[ivperp]*vth[iz])^2 + (vpa_grid[ivpa]*vth[iz] + upar[iz])^2) * me / source_T) + row = (ivperp - 1) * vpa.n + ivpa + + jacobian_matrix[row,end] += + -dt * vth / dens * vth_factor * source_amplitude * + (0.5/ppar - (vperp_grid[ivperp]^2 + (vpa_grid[ivpa]*vth + upar)*vpa_grid[ivpa])*me/source_T*vth/ppar) * + exp(-((vperp_grid[ivperp]*vth)^2 + (vpa_grid[ivpa]*vth + upar)^2) * me / source_T) end return nothing diff --git a/moment_kinetics/src/krook_collisions.jl b/moment_kinetics/src/krook_collisions.jl index 77684580c..5f0221261 100644 --- a/moment_kinetics/src/krook_collisions.jl +++ b/moment_kinetics/src/krook_collisions.jl @@ -432,10 +432,11 @@ end function add_electron_krook_collisions_to_Jacobian!(jacobian_matrix, f, dens, upar, ppar, vth, upar_ion, collisions, z, vperp, - vpa, z_speed, dt, ir; f_offset=0, - ppar_offset) + vpa, z_speed, dt, ir, include=:all; + f_offset=0, ppar_offset) @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big") + @boundscheck include ∈ (:all, :explicit_z, :explicit_v) || error("Unexpected value for include=$include") if collisions.krook.nuee0 ≤ 0.0 && collisions.krook.nuei0 ≤ 0.0 return nothing @@ -457,25 +458,98 @@ function add_electron_krook_collisions_to_Jacobian!(jacobian_matrix, f, dens, up # Contribution from electron_krook_collisions!() nu_ee = get_collision_frequency_ee(collisions, dens[iz], vth[iz]) nu_ei = get_collision_frequency_ei(collisions, dens[iz], vth[iz]) + if include === :all + jacobian_matrix[row,row] += dt * (nu_ee + nu_ei) + end + + if include ∈ (:all, :explicit_v) + fM_i = exp(-(vpa.grid[ivpa] + (upar_ion[iz] - upar[iz])/vth[iz])^2 - vperp.grid[ivperp]^2) + # d(f_M(u_i)[irowz])/d(ppar[icolz]) + # = -2*(vpa.grid+(upar_ion-upar)/vth)*(upar_ion-upar)*(-1/2/vth/ppar)*f_M(u_i) * delta(irow,icolz) + # = (vpa.grid+(upar_ion-upar)/vth)*(upar_ion-upar)/vth/ppar*f_M(u_i) * delta(irow,icolz) + jacobian_matrix[row,ppar_offset+iz] += + -dt * nu_ei * (vpa.grid[ivpa]+(upar_ion[iz]-upar[iz])/vth[iz])*(upar_ion[iz]-upar[iz])/vth[iz]/ppar[iz]*fM_i + + if using_reference_parameters + # Both collision frequencies are proportional to n/vth^3=n^(5/2)*(me/2/p)^3/2, + # so + # d(nu[irowz])/d(ppar[icolz]) = -3/2*nu/ppar * delta(irowz,icolz) + # d(-(vpa.grid+(upar_ion-upar)/vth)^2[irowz])/d(ppar[icoliz] + # = -(vpa.grid+(upar_ion-upar)/vth)*(upar_ion-upar)/vth/ppar * delta(irow,icolz) + jacobian_matrix[row,ppar_offset+iz] += + -dt * 1.5 / ppar[iz] * + (nu_ee * (f[ivpa,ivperp,iz] - exp(-vpa.grid[ivpa]^2 - vperp.grid[ivperp]^2)) + + nu_ei * (f[ivpa,ivperp,iz] - fM_i)) + end + end + end + + return nothing +end + +function add_electron_krook_collisions_to_z_only_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, upar_ion, collisions, z, vperp, vpa, + z_speed, dt, ir, ivperp, ivpa) + + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + @boundscheck size(jacobian_matrix, 1) == z.n || error("Jacobian matrix size is wrong") + + if collisions.krook.nuee0 ≤ 0.0 && collisions.krook.nuei0 ≤ 0.0 + return nothing + end + + @loop_z iz begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) + continue + end + + # Rows corresponding to pdf_electron + row = iz + + # Contribution from electron_krook_collisions!() + nu_ee = get_collision_frequency_ee(collisions, dens[iz], vth[iz]) + nu_ei = get_collision_frequency_ei(collisions, dens[iz], vth[iz]) + jacobian_matrix[row,row] += dt * (nu_ee + nu_ei) + end + + return nothing +end + +function add_electron_krook_collisions_to_v_only_Jacobian!( + jacobian_matrix, f, dens, upar, ppar, vth, upar_ion, collisions, z, vperp, vpa, + z_speed, dt, ir, iz) + + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + @boundscheck size(jacobian_matrix, 1) == vperp.n * vpa.n + 1 || error("Jacobian matrix size is wrong") + + if collisions.krook.nuee0 ≤ 0.0 && collisions.krook.nuei0 ≤ 0.0 + return nothing + end + + using_reference_parameters = (collisions.krook.frequency_option == "reference_parameters") + + @loop_vperp_vpa ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) + continue + end + + # Rows corresponding to pdf_electron + row = (ivperp - 1) * vpa.n + ivpa + + # Contribution from electron_krook_collisions!() + nu_ee = get_collision_frequency_ee(collisions, dens, vth) + nu_ei = get_collision_frequency_ei(collisions, dens, vth) jacobian_matrix[row,row] += dt * (nu_ee + nu_ei) - fM_i = exp(-(vpa.grid[ivpa] + (upar_ion[iz] - upar[iz])/vth[iz])^2 - vperp.grid[ivperp]^2) - # d(f_M(u_i)[irowz])/d(ppar[icolz]) - # = -2*(vpa.grid+(upar_ion-upar)/vth)*(upar_ion-upar)*(-1/2/vth/ppar)*f_M(u_i) * delta(irow,icolz) - # = (vpa.grid+(upar_ion-upar)/vth)*(upar_ion-upar)/vth/ppar*f_M(u_i) * delta(irow,icolz) - jacobian_matrix[row,ppar_offset+iz] += - -dt * nu_ei * (vpa.grid[ivpa]+(upar_ion[iz]-upar[iz])/vth[iz])*(upar_ion[iz]-upar[iz])/vth[iz]/ppar[iz]*fM_i + fM_i = exp(-(vpa.grid[ivpa] + (upar_ion - upar)/vth)^2 - vperp.grid[ivperp]^2) + jacobian_matrix[row,end] += + -dt * nu_ei * (vpa.grid[ivpa]+(upar_ion-upar)/vth)*(upar_ion-upar)/vth/ppar*fM_i if using_reference_parameters - # Both collision frequencies are proportional to n/vth^3=n^(5/2)*(me/2/p)^3/2, - # so - # d(nu[irowz])/d(ppar[icolz]) = -3/2*nu/ppar * delta(irowz,icolz) - # d(-(vpa.grid+(upar_ion-upar)/vth)^2[irowz])/d(ppar[icoliz] - # = -(vpa.grid+(upar_ion-upar)/vth)*(upar_ion-upar)/vth/ppar * delta(irow,icolz) - jacobian_matrix[row,ppar_offset+iz] += - -dt * 1.5 / ppar[iz] * - (nu_ee * (f[ivpa,ivperp,iz] - exp(-vpa.grid[ivpa]^2 - vperp.grid[ivperp]^2)) - + nu_ei * (f[ivpa,ivperp,iz] - fM_i)) + jacobian_matrix[row,end] += + -dt * 1.5 / ppar * + (nu_ee * (f[ivpa,ivperp] - exp(-vpa.grid[ivpa]^2 - vperp.grid[ivperp]^2)) + + nu_ei * (f[ivpa,ivperp] - fM_i)) end end diff --git a/moment_kinetics/src/moment_constraints.jl b/moment_kinetics/src/moment_constraints.jl index e880b5e3a..33d93b439 100644 --- a/moment_kinetics/src/moment_constraints.jl +++ b/moment_kinetics/src/moment_constraints.jl @@ -279,32 +279,24 @@ end add_electron_implicit_constraint_forcing_to_Jacobian!(jacobian_matrix, f, z_speed, z, vperp, vpa, constraint_forcing_rate, - dt, ir; f_offset=0) + dt, ir, include=:all; + f_offset=0) Add the contributions corresponding to [`electron_implicit_constraint_forcing!`](@ref) to `jacobian_matrix`. """ -function add_electron_implicit_constraint_forcing_to_Jacobian!(jacobian_matrix, f, - z_speed, z, vperp, vpa, - constraint_forcing_rate, - dt, ir; f_offset=0) +function add_electron_implicit_constraint_forcing_to_Jacobian!( + jacobian_matrix, f, zeroth_moment, first_moment, second_moment, z_speed, z, vperp, + vpa, constraint_forcing_rate, dt, ir, include=:all; f_offset=0) + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") @boundscheck size(jacobian_matrix, 1) ≥ f_offset + z.n * vperp.n * vpa.n || error("f_offset=$f_offset is too big") + @boundscheck include ∈ (:all, :explicit_z, :explicit_v) || error("Unexpected value for include=$include") vpa_grid = vpa.grid vpa_wgts = vpa.wgts v_size = vperp.n * vpa.n - zeroth_moment = z.scratch_shared - first_moment = z.scratch_shared2 - second_moment = z.scratch_shared3 - begin_z_region() - @loop_z iz begin - @views zeroth_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_wgts) - @views first_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_grid, vpa_wgts) - @views second_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_grid, 2, vpa_wgts) - end - begin_z_vperp_vpa_region() @loop_z_vperp_vpa iz ivperp ivpa begin if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) @@ -314,21 +306,93 @@ function add_electron_implicit_constraint_forcing_to_Jacobian!(jacobian_matrix, # Rows corresponding to pdf_electron row = (iz - 1) * v_size + (ivperp - 1) * vpa.n + ivpa + f_offset + # Diagonal terms + if include === :all + jacobian_matrix[row,row] += -dt * constraint_forcing_rate * + ((1.0 - zeroth_moment[iz]) + - first_moment[iz]*vpa_grid[ivpa] + + (0.5 - second_moment[iz])*vpa_grid[ivpa]^2) + end + + if include ∈ (:all, :explicit_v) + # Integral terms + # d(∫dw_∥ w_∥^n g[irow])/d(g[icol]) = vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^n + for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + jacobian_matrix[row,col] += dt * constraint_forcing_rate * + (1.0 + + vpa_grid[icolvpa]*vpa_grid[ivpa] + + vpa_grid[icolvpa]^2*vpa_grid[ivpa]^2) * + vpa_wgts[icolvpa]/sqrt(π) * f[ivpa,ivperp,iz] + end + end + end + + return nothing +end + +function add_electron_implicit_constraint_forcing_to_z_only_Jacobian!( + jacobian_matrix, f, zeroth_moment, first_moment, second_moment, z_speed, z, vperp, + vpa, constraint_forcing_rate, dt, ir, ivperp, ivpa) + + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + @boundscheck size(jacobian_matrix, 1) == z.n || error("Jacobian matrix size is wrong") + + vpa_grid = vpa.grid + vpa_wgts = vpa.wgts + + @loop_z iz begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) + continue + end + + # Rows corresponding to pdf_electron + row = iz + # Diagonal terms jacobian_matrix[row,row] += -dt * constraint_forcing_rate * ((1.0 - zeroth_moment[iz]) - first_moment[iz]*vpa_grid[ivpa] + (0.5 - second_moment[iz])*vpa_grid[ivpa]^2) + end + + return nothing +end + +function add_electron_implicit_constraint_forcing_to_v_only_Jacobian!( + jacobian_matrix, f, zeroth_moment, first_moment, second_moment, z_speed, z, vperp, + vpa, constraint_forcing_rate, dt, ir, iz) + + @boundscheck size(jacobian_matrix, 1) == size(jacobian_matrix, 2) || error("Jacobian is not square") + @boundscheck size(jacobian_matrix, 1) == vperp.n * vpa.n + 1 || error("Jacobian matrix size is wrong") + + vpa_grid = vpa.grid + vpa_wgts = vpa.wgts + v_size = vperp.n * vpa.n + + @loop_vperp_vpa ivperp ivpa begin + if skip_f_electron_bc_points_in_Jacobian(iz, ivperp, ivpa, z, vperp, vpa, z_speed) + continue + end + + # Rows corresponding to pdf_electron + row = (ivperp - 1) * vpa.n + ivpa + + # Diagonal terms + jacobian_matrix[row,row] += -dt * constraint_forcing_rate * + ((1.0 - zeroth_moment) + - first_moment*vpa_grid[ivpa] + + (0.5 - second_moment)*vpa_grid[ivpa]^2) # Integral terms # d(∫dw_∥ w_∥^n g[irow])/d(g[icol]) = vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^n for icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n - col = (iz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + col = (icolvperp - 1) * vpa.n + icolvpa jacobian_matrix[row,col] += dt * constraint_forcing_rate * (1.0 + vpa_grid[icolvpa]*vpa_grid[ivpa] + vpa_grid[icolvpa]^2*vpa_grid[ivpa]^2) * - vpa_wgts[icolvpa]/sqrt(π) * f[ivpa,ivperp,iz] + vpa_wgts[icolvpa]/sqrt(π) * f[ivpa,ivperp] end end From 3c6929462e222966c6dc7b55d69b5556bf900c0e Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 25 Oct 2024 11:46:46 +0100 Subject: [PATCH 10/43] Tests for ADI Jacobians --- moment_kinetics/test/jacobian_matrix_tests.jl | 921 +++++++++++++++++- 1 file changed, 905 insertions(+), 16 deletions(-) diff --git a/moment_kinetics/test/jacobian_matrix_tests.jl b/moment_kinetics/test/jacobian_matrix_tests.jl index e61ae7d33..e04a60d33 100644 --- a/moment_kinetics/test/jacobian_matrix_tests.jl +++ b/moment_kinetics/test/jacobian_matrix_tests.jl @@ -9,32 +9,54 @@ using moment_kinetics.analysis: vpagrid_to_dzdt using moment_kinetics.array_allocation: allocate_shared_float using moment_kinetics.boundary_conditions: enforce_v_boundary_condition_local!, enforce_vperp_boundary_condition! -using moment_kinetics.derivatives: derivative_z! +using moment_kinetics.calculus: derivative! +using moment_kinetics.derivatives: derivative_z!, derivative_z_pdf_vpavperpz! using moment_kinetics.electron_fluid_equations: calculate_electron_qpar_from_pdf_no_r!, electron_energy_equation_no_r!, - add_electron_energy_equation_to_Jacobian! + add_electron_energy_equation_to_Jacobian!, + add_electron_energy_equation_to_z_only_Jacobian!, + add_electron_energy_equation_to_v_only_Jacobian! using moment_kinetics.electron_kinetic_equation: add_contribution_from_pdf_term!, add_contribution_from_electron_pdf_term_to_Jacobian!, + add_contribution_from_electron_pdf_term_to_z_only_Jacobian!, + add_contribution_from_electron_pdf_term_to_v_only_Jacobian!, add_dissipation_term!, add_electron_dissipation_term_to_Jacobian!, + add_electron_dissipation_term_to_v_only_Jacobian!, add_ion_dt_forcing_of_electron_ppar_to_Jacobian!, + add_ion_dt_forcing_of_electron_ppar_to_z_only_Jacobian!, + add_ion_dt_forcing_of_electron_ppar_to_v_only_Jacobian!, electron_kinetic_equation_euler_update!, - fill_electron_kinetic_equation_Jacobian! + fill_electron_kinetic_equation_Jacobian!, + fill_electron_kinetic_equation_v_only_Jacobian!, + fill_electron_kinetic_equation_z_only_Jacobian_f!, + fill_electron_kinetic_equation_z_only_Jacobian_ppar! using moment_kinetics.electron_vpa_advection: electron_vpa_advection!, - add_electron_vpa_advection_to_Jacobian! + update_electron_speed_vpa!, + add_electron_vpa_advection_to_Jacobian!, + add_electron_vpa_advection_to_v_only_Jacobian! using moment_kinetics.electron_z_advection: electron_z_advection!, update_electron_speed_z!, - add_electron_z_advection_to_Jacobian! + add_electron_z_advection_to_Jacobian!, + add_electron_z_advection_to_z_only_Jacobian!, + add_electron_z_advection_to_v_only_Jacobian! using moment_kinetics.external_sources: total_external_electron_sources!, - add_total_external_electron_source_to_Jacobian! + add_total_external_electron_source_to_Jacobian!, + add_total_external_electron_source_to_z_only_Jacobian!, + add_total_external_electron_source_to_v_only_Jacobian! using moment_kinetics.krook_collisions: electron_krook_collisions!, - add_electron_krook_collisions_to_Jacobian! + add_electron_krook_collisions_to_Jacobian!, + add_electron_krook_collisions_to_z_only_Jacobian!, + add_electron_krook_collisions_to_v_only_Jacobian! using moment_kinetics.looping using moment_kinetics.moment_constraints: electron_implicit_constraint_forcing!, add_electron_implicit_constraint_forcing_to_Jacobian!, + add_electron_implicit_constraint_forcing_to_z_only_Jacobian!, + add_electron_implicit_constraint_forcing_to_v_only_Jacobian!, hard_force_moment_constraints! using moment_kinetics.type_definitions: mk_float -using moment_kinetics.velocity_moments: calculate_electron_moment_derivatives_no_r! +using moment_kinetics.velocity_moments: calculate_electron_moment_derivatives_no_r!, + integrate_over_vspace using StatsBase @@ -276,8 +298,27 @@ function test_electron_z_advection(test_input; rtol=(2.5e2*epsilon)^2) p_size = length(ppar) total_size = pdf_size + p_size + z_speed = @view z_advect[1].speed[:,:,:,ir] + + dpdf_dz = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir] + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + @loop_vperp_vpa ivperp ivpa begin + @views z_advect[1].adv_fac[:,ivpa,ivperp,ir] = -z_speed[:,ivpa,ivperp] + end + #calculate the upwind derivative + @views derivative_z_pdf_vpavperpz!(dpdf_dz, f, z_advect[1].adv_fac[:,:,:,ir], + scratch_dummy.buffer_vpavperpr_1[:,:,ir], + scratch_dummy.buffer_vpavperpr_2[:,:,ir], + scratch_dummy.buffer_vpavperpr_3[:,:,ir], + scratch_dummy.buffer_vpavperpr_4[:,:,ir], + scratch_dummy.buffer_vpavperpr_5[:,:,ir], + scratch_dummy.buffer_vpavperpr_6[:,:,ir], + z_spectral, z) + jacobian_matrix = allocate_shared_float(total_size, total_size) @serial_region begin + jacobian_matrix .= 0.0 for row ∈ 1:total_size # Initialise identity matrix jacobian_matrix[row,row] = 1.0 @@ -285,8 +326,85 @@ function test_electron_z_advection(test_input; rtol=(2.5e2*epsilon)^2) end add_electron_z_advection_to_Jacobian!( - jacobian_matrix, f, dens, upar, ppar, vth, me, z, vperp, vpa, z_spectral, - z_advect, scratch_dummy, dt, ir; ppar_offset=pdf_size) + jacobian_matrix, f, dens, upar, ppar, vth, dpdf_dz, me, z, vperp, vpa, + z_spectral, z_advect, z_speed, scratch_dummy, dt, ir; ppar_offset=pdf_size) + + # Test 'ADI Jacobians' before other tests, because residual_func() may modify some + # variables (vth, etc.). + + @testset "ADI Jacobians - implicit z" begin + # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + jacobian_matrix_ADI_check .= 0.0 + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix_ADI_check[row,row] = 1.0 + end + end + + v_size = vperp.n * vpa.n + + # Add 'implicit' contribution + begin_vperp_vpa_region() + @loop_vperp_vpa ivperp ivpa begin + this_slice = (ivperp - 1)*vpa.n + ivpa:v_size:(z.n - 1)*v_size + (ivperp - 1)*vpa.n + ivpa + @views add_electron_z_advection_to_z_only_Jacobian!( + jacobian_matrix_ADI_check[this_slice,this_slice], f[ivpa,ivperp,:], + dens, upar, ppar, vth, dpdf_dz[ivpa,ivperp,:], me, z, vperp, vpa, + z_spectral, z_advect, z_speed, scratch_dummy, dt, ir, + ivperp, ivpa) + end + + # Add 'explicit' contribution + add_electron_z_advection_to_Jacobian!( + jacobian_matrix_ADI_check, f, dens, upar, ppar, vth, dpdf_dz, me, z, + vperp, vpa, z_spectral, z_advect, z_speed, scratch_dummy, dt, ir, + :explicit_v; ppar_offset=pdf_size) + + begin_serial_region() + @serial_region begin + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15) + end + end + + @testset "ADI Jacobians - implicit v" begin + # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + jacobian_matrix_ADI_check .= 0.0 + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix_ADI_check[row,row] = 1.0 + end + end + + v_size = vperp.n * vpa.n + + # Add 'implicit' contribution + begin_z_region() + @loop_z iz begin + this_slice = collect((iz - 1)*v_size + 1:iz*v_size) + push!(this_slice, iz + pdf_size) + @views add_electron_z_advection_to_v_only_Jacobian!( + jacobian_matrix_ADI_check[this_slice,this_slice], f[:,:,iz], + dens[iz], upar[iz], ppar[iz], vth[iz], dpdf_dz[:,:,iz], me, z, vperp, + vpa, z_spectral, z_advect, z_speed, scratch_dummy, dt, ir, iz) + end + + # Add 'explicit' contribution + add_electron_z_advection_to_Jacobian!( + jacobian_matrix_ADI_check, f, dens, upar, ppar, vth, dpdf_dz, me, z, + vperp, vpa, z_spectral, z_advect, z_speed, scratch_dummy, dt, ir, + :explicit_z; ppar_offset=pdf_size) + + begin_serial_region() + @serial_region begin + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15) + end + end function residual_func!(residual, this_f, this_p) begin_z_region() @@ -525,9 +643,23 @@ function test_electron_vpa_advection(test_input; rtol=(3.0e2*epsilon)^2) p_size = length(ppar) total_size = pdf_size + p_size + dpdf_dvpa = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir] + begin_z_vperp_region() + update_electron_speed_vpa!(vpa_advect[1], dens, upar, ppar, moments, vpa.grid, + external_source_settings.electron, ir) + @loop_z_vperp iz ivperp begin + @views @. vpa_advect[1].adv_fac[:,ivperp,iz,ir] = -vpa_advect[1].speed[:,ivperp,iz,ir] + end + #calculate the upwind derivative of the electron pdf w.r.t. wpa + @loop_z_vperp iz ivperp begin + @views derivative!(dpdf_dvpa[:,ivperp,iz], f[:,ivperp,iz], vpa, + vpa_advect[1].adv_fac[:,ivperp,iz,ir], vpa_spectral) + end + jacobian_matrix = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin + jacobian_matrix .= 0.0 for row ∈ 1:total_size # Initialise identity matrix jacobian_matrix[row,row] = 1.0 @@ -535,10 +667,80 @@ function test_electron_vpa_advection(test_input; rtol=(3.0e2*epsilon)^2) end add_electron_vpa_advection_to_Jacobian!( - jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, - dthird_moment_dz, moments, me, z, vperp, vpa, z_spectral, vpa_spectral, - vpa_advect, z_speed, scratch_dummy, external_source_settings, dt, ir; - ppar_offset=pdf_size) + jacobian_matrix, f, dens, upar, ppar, vth, third_moment, dpdf_dvpa, ddens_dz, + dppar_dz, dthird_moment_dz, moments, me, z, vperp, vpa, z_spectral, + vpa_spectral, vpa_advect, z_speed, scratch_dummy, external_source_settings, + dt, ir; ppar_offset=pdf_size) + + # Test 'ADI Jacobians' before other tests, because residual_func() may modify some + # variables (vth, etc.). + + @testset "ADI Jacobians - implicit z" begin + # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + jacobian_matrix_ADI_check .= 0.0 + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix_ADI_check[row,row] = 1.0 + end + end + + # There is no 'implicit z' contribution for vpa advection + + # Add 'explicit' contribution + add_electron_vpa_advection_to_Jacobian!( + jacobian_matrix_ADI_check, f, dens, upar, ppar, vth, third_moment, + dpdf_dvpa, ddens_dz, dppar_dz, dthird_moment_dz, moments, me, z, vperp, + vpa, z_spectral, vpa_spectral, vpa_advect, z_speed, scratch_dummy, + external_source_settings, dt, ir, :explicit_v; ppar_offset=pdf_size) + + begin_serial_region() + @serial_region begin + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15) + end + end + + @testset "ADI Jacobians - implicit v" begin + # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + jacobian_matrix_ADI_check .= 0.0 + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix_ADI_check[row,row] = 1.0 + end + end + + v_size = vperp.n * vpa.n + + # Add 'implicit' contribution + begin_z_region() + @loop_z iz begin + this_slice = collect((iz - 1)*v_size + 1:iz*v_size) + push!(this_slice, iz + pdf_size) + @views add_electron_vpa_advection_to_v_only_Jacobian!( + jacobian_matrix_ADI_check[this_slice,this_slice], f[:,:,iz], dens[iz], + upar[iz], ppar[iz], vth[iz], third_moment[iz], dpdf_dvpa[:,:,iz], + ddens_dz[iz], dppar_dz[iz], dthird_moment_dz[iz], moments, me, z, + vperp, vpa, z_spectral, vpa_spectral, vpa_advect, z_speed, + scratch_dummy, external_source_settings, dt, ir, iz) + end + + # Add 'explicit' contribution + add_electron_vpa_advection_to_Jacobian!( + jacobian_matrix_ADI_check, f, dens, upar, ppar, vth, third_moment, + dpdf_dvpa, ddens_dz, dppar_dz, dthird_moment_dz, moments, me, z, vperp, + vpa, z_spectral, vpa_spectral, vpa_advect, z_speed, scratch_dummy, + external_source_settings, dt, ir, :explicit_z; ppar_offset=pdf_size) + + begin_serial_region() + @serial_region begin + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15) + end + end function residual_func!(residual, this_f, this_p) begin_z_region() @@ -798,6 +1000,7 @@ function test_contribution_from_electron_pdf_term(test_input; rtol=(4.0e2*epsilo jacobian_matrix = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin + jacobian_matrix .= 0.0 for row ∈ 1:total_size # Initialise identity matrix jacobian_matrix[row,row] = 1.0 @@ -809,6 +1012,87 @@ function test_contribution_from_electron_pdf_term(test_input; rtol=(4.0e2*epsilo dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z, vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir; ppar_offset=pdf_size) + # Test 'ADI Jacobians' before other tests, because residual_func() may modify some + # variables (vth, etc.). + + @testset "ADI Jacobians - implicit z" begin + # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + jacobian_matrix_ADI_check .= 0.0 + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix_ADI_check[row,row] = 1.0 + end + end + + v_size = vperp.n * vpa.n + + # Add 'implicit' contribution + begin_vperp_vpa_region() + @loop_vperp_vpa ivperp ivpa begin + this_slice = (ivperp - 1)*vpa.n + ivpa:v_size:(z.n - 1)*v_size + (ivperp - 1)*vpa.n + ivpa + @views add_contribution_from_electron_pdf_term_to_z_only_Jacobian!( + jacobian_matrix_ADI_check[this_slice,this_slice], f[ivpa,ivperp,:], + dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, dvth_dz, + dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z, + vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir, ivperp, ivpa) + end + + # Add 'explicit' contribution + add_contribution_from_electron_pdf_term_to_Jacobian!( + jacobian_matrix_ADI_check, f, dens, upar, ppar, vth, third_moment, + ddens_dz, dppar_dz, dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, + external_source_settings, z, vperp, vpa, z_spectral, z_speed, + scratch_dummy, dt, ir, :explicit_v; ppar_offset=pdf_size) + + begin_serial_region() + @serial_region begin + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-13) + end + end + + @testset "ADI Jacobians - implicit v" begin + # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + jacobian_matrix_ADI_check .= 0.0 + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix_ADI_check[row,row] = 1.0 + end + end + + v_size = vperp.n * vpa.n + + # Add 'implicit' contribution + begin_z_region() + @loop_z iz begin + this_slice = collect((iz - 1)*v_size + 1:iz*v_size) + push!(this_slice, iz + pdf_size) + @views add_contribution_from_electron_pdf_term_to_v_only_Jacobian!( + jacobian_matrix_ADI_check[this_slice,this_slice], f[:,:,iz], dens[iz], + upar[iz], ppar[iz], vth[iz], third_moment[iz], ddens_dz[iz], + dppar_dz[iz], dvth_dz[iz], dqpar_dz[iz], dthird_moment_dz[iz], + moments, me, external_source_settings, z, vperp, vpa, z_spectral, + z_speed, scratch_dummy, dt, ir, iz) + end + + # Add 'explicit' contribution + add_contribution_from_electron_pdf_term_to_Jacobian!( + jacobian_matrix_ADI_check, f, dens, upar, ppar, vth, third_moment, + ddens_dz, dppar_dz, dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, + external_source_settings, z, vperp, vpa, z_spectral, z_speed, + scratch_dummy, dt, ir, :explicit_z; ppar_offset=pdf_size) + + begin_serial_region() + @serial_region begin + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-13) + end + end + function residual_func!(residual, this_f, this_p) begin_z_region() @loop_z iz begin @@ -1032,6 +1316,7 @@ function test_electron_dissipation_term(test_input; rtol=(3.0e0*epsilon)^2) jacobian_matrix = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin + jacobian_matrix .= 0.0 for row ∈ 1:total_size # Initialise identity matrix jacobian_matrix[row,row] = 1.0 @@ -1042,6 +1327,66 @@ function test_electron_dissipation_term(test_input; rtol=(3.0e0*epsilon)^2) jacobian_matrix, f, num_diss_params, z, vperp, vpa, vpa_spectral, z_speed, dt, ir) + @testset "ADI Jacobians - implicit z" begin + # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + jacobian_matrix_ADI_check .= 0.0 + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix_ADI_check[row,row] = 1.0 + end + end + + # There is no 'implicit z' contribution for electron dissipation + + # Add 'explicit' contribution + add_electron_dissipation_term_to_Jacobian!( + jacobian_matrix_ADI_check, f, num_diss_params, z, vperp, vpa, + vpa_spectral, z_speed, dt, ir, :explicit_v) + + begin_serial_region() + @serial_region begin + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15) + end + end + + @testset "ADI Jacobians - implicit v" begin + # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + jacobian_matrix_ADI_check .= 0.0 + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix_ADI_check[row,row] = 1.0 + end + end + + v_size = vperp.n * vpa.n + + # Add 'implicit' contribution + begin_z_region() + @loop_z iz begin + this_slice = collect((iz - 1)*v_size + 1:iz*v_size) + push!(this_slice, iz + pdf_size) + @views add_electron_dissipation_term_to_v_only_Jacobian!( + jacobian_matrix_ADI_check[this_slice,this_slice], f[:,:,iz], + num_diss_params, z, vperp, vpa, vpa_spectral, z_speed, dt, ir, iz) + end + + # Add 'explicit' contribution + add_electron_dissipation_term_to_Jacobian!( + jacobian_matrix_ADI_check, f, num_diss_params, z, vperp, vpa, + vpa_spectral, z_speed, dt, ir, :explicit_z) + + begin_serial_region() + @serial_region begin + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15) + end + end + function residual_func!(residual, this_f, this_p) begin_z_region() @loop_z iz begin @@ -1269,6 +1614,7 @@ function test_electron_krook_collisions(test_input; rtol=(2.0e1*epsilon)^2) jacobian_matrix = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin + jacobian_matrix .= 0.0 for row ∈ 1:total_size # Initialise identity matrix jacobian_matrix[row,row] = 1.0 @@ -1279,6 +1625,79 @@ function test_electron_krook_collisions(test_input; rtol=(2.0e1*epsilon)^2) jacobian_matrix, f, dens, upar, ppar, vth, @view(moments.ion.upar[:,ir]), collisions, z, vperp, vpa, z_speed, dt, ir; ppar_offset=pdf_size) + @testset "ADI Jacobians - implicit z" begin + # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + jacobian_matrix_ADI_check .= 0.0 + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix_ADI_check[row,row] = 1.0 + end + end + + v_size = vperp.n * vpa.n + + # Add 'implicit' contribution + begin_vperp_vpa_region() + @loop_vperp_vpa ivperp ivpa begin + this_slice = (ivperp - 1)*vpa.n + ivpa:v_size:(z.n - 1)*v_size + (ivperp - 1)*vpa.n + ivpa + @views add_electron_krook_collisions_to_z_only_Jacobian!( + jacobian_matrix_ADI_check[this_slice,this_slice], f[ivpa,ivperp,:], + dens, upar, ppar, vth, moments.ion.upar[:,ir], collisions, z, vperp, + vpa, z_speed, dt, ir, ivperp, ivpa) + end + + # Add 'explicit' contribution + add_electron_krook_collisions_to_Jacobian!( + jacobian_matrix_ADI_check, f, dens, upar, ppar, vth, + @view(moments.ion.upar[:,ir]), collisions, z, vperp, vpa, z_speed, dt, ir, + :explicit_v; ppar_offset=pdf_size) + + begin_serial_region() + @serial_region begin + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15) + end + end + + @testset "ADI Jacobians - implicit v" begin + # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + jacobian_matrix_ADI_check .= 0.0 + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix_ADI_check[row,row] = 1.0 + end + end + + v_size = vperp.n * vpa.n + + # Add 'implicit' contribution + begin_z_region() + @loop_z iz begin + this_slice = collect((iz - 1)*v_size + 1:iz*v_size) + push!(this_slice, iz + pdf_size) + @views add_electron_krook_collisions_to_v_only_Jacobian!( + jacobian_matrix_ADI_check[this_slice,this_slice], f[:,:,iz], dens[iz], + upar[iz], ppar[iz], vth[iz], moments.ion.upar[iz,ir], collisions, z, + vperp, vpa, z_speed, dt, ir, iz) + end + + # Add 'explicit' contribution + add_electron_krook_collisions_to_Jacobian!( + jacobian_matrix_ADI_check, f, dens, upar, ppar, vth, + @view(moments.ion.upar[:,ir]), collisions, z, vperp, vpa, z_speed, dt, ir, + :explicit_z; ppar_offset=pdf_size) + + begin_serial_region() + @serial_region begin + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15) + end + end + function residual_func!(residual, this_f, this_p) begin_z_region() @loop_z iz begin @@ -1520,6 +1939,7 @@ function test_external_electron_source(test_input; rtol=(3.0e1*epsilon)^2) jacobian_matrix = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin + jacobian_matrix .= 0.0 for row ∈ 1:total_size # Initialise identity matrix jacobian_matrix[row,row] = 1.0 @@ -1530,6 +1950,79 @@ function test_external_electron_source(test_input; rtol=(3.0e1*epsilon)^2) jacobian_matrix, f, moments, me, z_speed, external_source_settings.electron, z, vperp, vpa, dt, ir; ppar_offset=pdf_size) + @testset "ADI Jacobians - implicit z" begin + # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + jacobian_matrix_ADI_check .= 0.0 + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix_ADI_check[row,row] = 1.0 + end + end + + v_size = vperp.n * vpa.n + + # Add 'implicit' contribution + begin_vperp_vpa_region() + @loop_vperp_vpa ivperp ivpa begin + this_slice = (ivperp - 1)*vpa.n + ivpa:v_size:(z.n - 1)*v_size + (ivperp - 1)*vpa.n + ivpa + @views add_total_external_electron_source_to_z_only_Jacobian!( + jacobian_matrix_ADI_check[this_slice,this_slice], f[ivpa,ivperp,:], + moments, me, z_speed, external_source_settings.electron, z, vperp, + vpa, dt, ir, ivperp, ivpa) + end + + # Add 'explicit' contribution + add_total_external_electron_source_to_Jacobian!( + jacobian_matrix_ADI_check, f, moments, me, z_speed, + external_source_settings.electron, z, vperp, vpa, dt, ir, :explicit_v; + ppar_offset=pdf_size) + + begin_serial_region() + @serial_region begin + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15) + end + end + + @testset "ADI Jacobians - implicit v" begin + # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + jacobian_matrix_ADI_check .= 0.0 + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix_ADI_check[row,row] = 1.0 + end + end + + v_size = vperp.n * vpa.n + + # Add 'implicit' contribution + begin_z_region() + @loop_z iz begin + this_slice = collect((iz - 1)*v_size + 1:iz*v_size) + push!(this_slice, iz + pdf_size) + @views add_total_external_electron_source_to_v_only_Jacobian!( + jacobian_matrix_ADI_check[this_slice,this_slice], f[:,:,iz], + moments, me, z_speed, external_source_settings.electron, z, vperp, + vpa, dt, ir, iz) + end + + # Add 'explicit' contribution + add_total_external_electron_source_to_Jacobian!( + jacobian_matrix_ADI_check, f, moments, me, z_speed, + external_source_settings.electron, z, vperp, vpa, dt, ir, :explicit_z; + ppar_offset=pdf_size) + + begin_serial_region() + @serial_region begin + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15) + end + end + function residual_func!(residual, this_f, this_p) begin_z_region() @loop_z iz begin @@ -1772,9 +2265,22 @@ function test_electron_implicit_constraint_forcing(test_input; rtol=(1.5e0*epsil p_size = length(ppar) total_size = pdf_size + p_size + zeroth_moment = z.scratch_shared + first_moment = z.scratch_shared2 + second_moment = z.scratch_shared3 + begin_z_region() + vpa_grid = vpa.grid + vpa_wgts = vpa.wgts + @loop_z iz begin + @views zeroth_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_wgts) + @views first_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_grid, vpa_wgts) + @views second_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_grid, 2, vpa_wgts) + end + jacobian_matrix = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin + jacobian_matrix .= 0.0 for row ∈ 1:total_size # Initialise identity matrix jacobian_matrix[row,row] = 1.0 @@ -1782,8 +2288,81 @@ function test_electron_implicit_constraint_forcing(test_input; rtol=(1.5e0*epsil end add_electron_implicit_constraint_forcing_to_Jacobian!( - jacobian_matrix, f, z_speed, z, vperp, vpa, - t_params.electron.constraint_forcing_rate, dt, ir) + jacobian_matrix, f, zeroth_moment, first_moment, second_moment, z_speed, z, + vperp, vpa, t_params.electron.constraint_forcing_rate, dt, ir) + + @testset "ADI Jacobians - implicit z" begin + # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + jacobian_matrix_ADI_check .= 0.0 + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix_ADI_check[row,row] = 1.0 + end + end + + v_size = vperp.n * vpa.n + + # Add 'implicit' contribution + begin_vperp_vpa_region() + @loop_vperp_vpa ivperp ivpa begin + this_slice = (ivperp - 1)*vpa.n + ivpa:v_size:(z.n - 1)*v_size + (ivperp - 1)*vpa.n + ivpa + @views add_electron_implicit_constraint_forcing_to_z_only_Jacobian!( + jacobian_matrix_ADI_check[this_slice,this_slice], f[ivpa,ivperp,:], + zeroth_moment, first_moment, second_moment, z_speed, z, vperp, vpa, + t_params.electron.constraint_forcing_rate, dt, ir, ivperp, ivpa) + end + + # Add 'explicit' contribution + add_electron_implicit_constraint_forcing_to_Jacobian!( + jacobian_matrix_ADI_check, f, zeroth_moment, first_moment, second_moment, + z_speed, z, vperp, vpa, t_params.electron.constraint_forcing_rate, dt, ir, + :explicit_v) + + begin_serial_region() + @serial_region begin + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15) + end + end + + @testset "ADI Jacobians - implicit v" begin + # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + jacobian_matrix_ADI_check .= 0.0 + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix_ADI_check[row,row] = 1.0 + end + end + + v_size = vperp.n * vpa.n + + # Add 'implicit' contribution + begin_z_region() + @loop_z iz begin + this_slice = collect((iz - 1)*v_size + 1:iz*v_size) + push!(this_slice, iz + pdf_size) + @views add_electron_implicit_constraint_forcing_to_v_only_Jacobian!( + jacobian_matrix_ADI_check[this_slice,this_slice], f[:,:,iz], + zeroth_moment[iz], first_moment[iz], second_moment[iz], z_speed, z, + vperp, vpa, t_params.electron.constraint_forcing_rate, dt, ir, iz) + end + + # Add 'explicit' contribution + add_electron_implicit_constraint_forcing_to_Jacobian!( + jacobian_matrix_ADI_check, f, zeroth_moment, first_moment, second_moment, + z_speed, z, vperp, vpa, t_params.electron.constraint_forcing_rate, dt, ir, + :explicit_z) + + begin_serial_region() + @serial_region begin + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15) + end + end function residual_func!(residual, this_f, this_p) begin_z_region() @@ -2029,6 +2608,7 @@ function test_electron_energy_equation(test_input; rtol=(6.0e2*epsilon)^2) jacobian_matrix = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin + jacobian_matrix .= 0.0 for row ∈ 1:total_size # Initialise identity matrix jacobian_matrix[row,row] = 1.0 @@ -2040,6 +2620,82 @@ function test_electron_energy_equation(test_input; rtol=(6.0e2*epsilon)^2) dppar_dz, dthird_moment_dz, collisions, composition, z, vperp, vpa, z_spectral, num_diss_params, dt, ir; ppar_offset=pdf_size) + @testset "ADI Jacobians - implicit z" begin + # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + jacobian_matrix_ADI_check .= 0.0 + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix_ADI_check[row,row] = 1.0 + end + end + + v_size = vperp.n * vpa.n + + @serial_region begin + # Add 'implicit' contribution + this_slice = total_size - z.n + 1:total_size + @views add_electron_energy_equation_to_z_only_Jacobian!( + jacobian_matrix_ADI_check[this_slice,this_slice], dens, upar, ppar, + vth, third_moment, ddens_dz, dupar_dz, dppar_dz, dthird_moment_dz, + collisions, composition, z, vperp, vpa, z_spectral, num_diss_params, + dt, ir) + end + + # Add 'explicit' contribution + add_electron_energy_equation_to_Jacobian!( + jacobian_matrix_ADI_check, f, dens, upar, ppar, vth, third_moment, + ddens_dz, dupar_dz, dppar_dz, dthird_moment_dz, collisions, composition, + z, vperp, vpa, z_spectral, num_diss_params, dt, ir, :explicit_v; + ppar_offset=pdf_size) + + begin_serial_region() + @serial_region begin + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15) + end + end + + @testset "ADI Jacobians - implicit v" begin + # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + jacobian_matrix_ADI_check .= 0.0 + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix_ADI_check[row,row] = 1.0 + end + end + + v_size = vperp.n * vpa.n + + # Add 'implicit' contribution + begin_z_region() + @loop_z iz begin + this_slice = collect((iz - 1)*v_size + 1:iz*v_size) + push!(this_slice, iz + pdf_size) + @views add_electron_energy_equation_to_v_only_Jacobian!( + jacobian_matrix_ADI_check[this_slice,this_slice], f[:,:,iz], dens[iz], + upar[iz], ppar[iz], vth[iz], third_moment[iz], ddens_dz[iz], + dupar_dz[iz], dppar_dz[iz], dthird_moment_dz[iz], collisions, + composition, z, vperp, vpa, z_spectral, num_diss_params, dt, ir, iz) + end + + # Add 'explicit' contribution + add_electron_energy_equation_to_Jacobian!( + jacobian_matrix_ADI_check, f, dens, upar, ppar, vth, third_moment, + ddens_dz, dupar_dz, dppar_dz, dthird_moment_dz, collisions, composition, + z, vperp, vpa, z_spectral, num_diss_params, dt, ir, :explicit_z; + ppar_offset=pdf_size) + + begin_serial_region() + @serial_region begin + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15) + end + end + function residual_func!(residual, this_f, this_p) begin_z_region() @loop_z iz begin @@ -2219,6 +2875,7 @@ function test_ion_dt_forcing_of_electron_ppar(test_input; rtol=(1.5e1*epsilon)^2 jacobian_matrix = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin + jacobian_matrix .= 0.0 for row ∈ 1:total_size # Initialise identity matrix jacobian_matrix[row,row] = 1.0 @@ -2228,6 +2885,73 @@ function test_ion_dt_forcing_of_electron_ppar(test_input; rtol=(1.5e1*epsilon)^2 add_ion_dt_forcing_of_electron_ppar_to_Jacobian!( jacobian_matrix, z, dt, ion_dt, ir; ppar_offset=pdf_size) + @testset "ADI Jacobians - implicit z" begin + # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + jacobian_matrix_ADI_check .= 0.0 + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix_ADI_check[row,row] = 1.0 + end + end + + v_size = vperp.n * vpa.n + + @serial_region begin + # Add 'implicit' contribution + this_slice = total_size - z.n + 1:total_size + @views add_ion_dt_forcing_of_electron_ppar_to_z_only_Jacobian!( + jacobian_matrix_ADI_check[this_slice,this_slice], z, dt, ion_dt, ir) + end + + # Add 'explicit' contribution + add_ion_dt_forcing_of_electron_ppar_to_Jacobian!( + jacobian_matrix_ADI_check, z, dt, ion_dt, ir, :explicit_v; + ppar_offset=pdf_size) + + begin_serial_region() + @serial_region begin + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15) + end + end + + @testset "ADI Jacobians - implicit v" begin + # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + jacobian_matrix_ADI_check .= 0.0 + for row ∈ 1:total_size + # Initialise identity matrix + jacobian_matrix_ADI_check[row,row] = 1.0 + end + end + + v_size = vperp.n * vpa.n + + # Add 'implicit' contribution + begin_z_region() + @loop_z iz begin + this_slice = collect((iz - 1)*v_size + 1:iz*v_size) + push!(this_slice, iz + pdf_size) + @views add_ion_dt_forcing_of_electron_ppar_to_v_only_Jacobian!( + jacobian_matrix_ADI_check[this_slice,this_slice], z, dt, ion_dt, ir, + iz) + end + + # Add 'explicit' contribution + add_ion_dt_forcing_of_electron_ppar_to_Jacobian!( + jacobian_matrix_ADI_check, z, dt, ion_dt, ir, :explicit_z; + ppar_offset=pdf_size) + + begin_serial_region() + @serial_region begin + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=0.0, atol=1.0e-15) + end + end + function residual_func!(residual, this_f, this_p) begin_z_region() @loop_z iz begin @@ -2411,6 +3135,7 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) jacobian_matrix = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin + jacobian_matrix .= 0.0 for row ∈ 1:total_size # Initialise identity matrix jacobian_matrix[row,row] = 1.0 @@ -2423,6 +3148,170 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) external_source_settings, num_diss_params, t_params.electron, ion_dt, ir, true) + # Test 'ADI Jacobians' before other tests, because residual_func() may modify some + # variables (vth, etc.). + + # Reconstruct w_∥^3 moment of g_e from already-calculated qpar + buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] + buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1] + buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1] + buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1] + third_moment = scratch_dummy.buffer_z_1 + dthird_moment_dz = scratch_dummy.buffer_z_2 + begin_z_region() + @loop_z iz begin + third_moment[iz] = 0.5 * qpar[iz] / ppar[iz] / vth[iz] + end + derivative_z!(dthird_moment_dz, third_moment, buffer_1, buffer_2, buffer_3, + buffer_4, z_spectral, z) + + z_speed = @view z_advect[1].speed[:,:,:,ir] + + dpdf_dz = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir] + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + @loop_vperp_vpa ivperp ivpa begin + @views z_advect[1].adv_fac[:,ivpa,ivperp,ir] = -z_speed[:,ivpa,ivperp] + end + #calculate the upwind derivative + @views derivative_z_pdf_vpavperpz!(dpdf_dz, f, z_advect[1].adv_fac[:,:,:,ir], + scratch_dummy.buffer_vpavperpr_1[:,:,ir], + scratch_dummy.buffer_vpavperpr_2[:,:,ir], + scratch_dummy.buffer_vpavperpr_3[:,:,ir], + scratch_dummy.buffer_vpavperpr_4[:,:,ir], + scratch_dummy.buffer_vpavperpr_5[:,:,ir], + scratch_dummy.buffer_vpavperpr_6[:,:,ir], + z_spectral, z) + + dpdf_dvpa = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir] + begin_z_vperp_region() + update_electron_speed_vpa!(vpa_advect[1], dens, upar, ppar, moments, vpa.grid, + external_source_settings.electron, ir) + @loop_z_vperp iz ivperp begin + @views @. vpa_advect[1].adv_fac[:,ivperp,iz,ir] = -vpa_advect[1].speed[:,ivperp,iz,ir] + end + #calculate the upwind derivative of the electron pdf w.r.t. wpa + @loop_z_vperp iz ivperp begin + @views derivative!(dpdf_dvpa[:,ivperp,iz], f[:,ivperp,iz], vpa, + vpa_advect[1].adv_fac[:,ivperp,iz,ir], vpa_spectral) + end + + zeroth_moment = z.scratch_shared + first_moment = z.scratch_shared2 + second_moment = z.scratch_shared3 + begin_z_region() + vpa_grid = vpa.grid + vpa_wgts = vpa.wgts + @loop_z iz begin + @views zeroth_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_wgts) + @views first_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_grid, vpa_wgts) + @views second_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_grid, 2, vpa_wgts) + end + + @testset "ADI Jacobians - implicit z" begin + # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + + begin_serial_region() + @serial_region begin + # Need to explicitly initialise because + # fill_electron_kinetic_equation_z_only_Jacobian_f!() and + # fill_electron_kinetic_equation_z_only_Jacobian_ppar!() + # only fill the diagonal-in-velocity-indices elements, so when applied to + # a full matrix they would not initialise every element. + jacobian_matrix_ADI_check .= 0.0 + end + + v_size = vperp.n * vpa.n + + # Add 'implicit' contribution + begin_vperp_vpa_region() + @loop_vperp_vpa ivperp ivpa begin + this_slice = (ivperp - 1)*vpa.n + ivpa:v_size:(z.n - 1)*v_size + (ivperp - 1)*vpa.n + ivpa + @views fill_electron_kinetic_equation_z_only_Jacobian_f!( + jacobian_matrix_ADI_check[this_slice,this_slice], f[ivpa,ivperp,:], + ppar, dpdf_dz[ivpa,ivperp,:], dpdf_dvpa[ivpa,ivperp,:], z_speed, + moments, zeroth_moment, first_moment, second_moment, third_moment, + dthird_moment_dz, collisions, composition, z, vperp, vpa, z_spectral, + vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, + external_source_settings, num_diss_params, t_params.electron, ion_dt, + ir, ivperp, ivpa, true) + end + + @serial_region begin + # Add 'implicit' contribution + this_slice = (pdf_size + 1):total_size + @views fill_electron_kinetic_equation_z_only_Jacobian_ppar!( + jacobian_matrix_ADI_check[this_slice,this_slice], ppar, moments, + zeroth_moment, first_moment, second_moment, third_moment, + dthird_moment_dz, collisions, composition, z, vperp, vpa, z_spectral, + vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, + external_source_settings, num_diss_params, t_params.electron, ion_dt, + ir, true) + end + + # Add 'explicit' contribution + jacobian_matrix_ADI_check_explicit = allocate_shared_float(total_size, total_size) + fill_electron_kinetic_equation_Jacobian!( + jacobian_matrix_ADI_check_explicit, f, ppar, moments, collisions, + composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, + z_advect, vpa_advect, scratch_dummy, external_source_settings, + num_diss_params, t_params.electron, ion_dt, ir, true, :explicit_v) + begin_serial_region() + @serial_region begin + jacobian_matrix_ADI_check .+= jacobian_matrix_ADI_check_explicit + + # The settings for this test are a bit strange, due to trying to get the + # finite-difference approximation to the Jacobian to agree with the + # Jacobian matrix functions without being too messed up by floating-point + # rounding errors. The result is that some entries in the Jacobian matrix + # here are O(1.0e5), so it is important to use `rtol` here. + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=1.0e-15, atol=1.0e-15) + end + end + + @testset "ADI Jacobians - implicit v" begin + # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + + v_size = vperp.n * vpa.n + + # Add 'implicit' contribution + begin_z_region() + @loop_z iz begin + this_slice = collect((iz - 1)*v_size + 1:iz*v_size) + push!(this_slice, iz + pdf_size) + @views fill_electron_kinetic_equation_v_only_Jacobian!( + jacobian_matrix_ADI_check[this_slice,this_slice], f[:,:,iz], ppar[iz], + dpdf_dz[:,:,iz], dpdf_dvpa[:,:,iz], z_speed, moments, + zeroth_moment[iz], first_moment[iz], second_moment[iz], + third_moment[iz], dthird_moment_dz[iz], collisions, composition, z, + vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, + vpa_advect, scratch_dummy, external_source_settings, num_diss_params, + t_params.electron, ion_dt, ir, iz, true) + end + + # Add 'explicit' contribution + jacobian_matrix_ADI_check_explicit = allocate_shared_float(total_size, total_size) + fill_electron_kinetic_equation_Jacobian!( + jacobian_matrix_ADI_check_explicit, f, ppar, moments, collisions, + composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, + z_advect, vpa_advect, scratch_dummy, external_source_settings, + num_diss_params, t_params.electron, ion_dt, ir, true, :explicit_z) + + begin_serial_region() + @serial_region begin + jacobian_matrix_ADI_check .+= jacobian_matrix_ADI_check_explicit + + # The settings for this test are a bit strange, due to trying to get the + # finite-difference approximation to the Jacobian to agree with the + # Jacobian matrix functions without being too messed up by floating-point + # rounding errors. The result is that some entries in the Jacobian matrix + # here are O(1.0e5), so it is important to use `rtol` here. + @test elementwise_isapprox(jacobian_matrix_ADI_check, jacobian_matrix; rtol=1.0e-13, atol=1.0e-13) + end + end + function residual_func!(residual_f, residual_p, this_f, this_p) begin_z_region() @loop_z iz begin From c4fa0218e28cb1ef3437c9e4caf6f967e4abcbd6 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 28 Oct 2024 13:21:05 +0000 Subject: [PATCH 11/43] Reduce memory usage in Jacobian matrix tests Avoid using more than two Jacobian-sized buffer arrays at any time in the Jacobian matrix tests. Using more than this would cause the Github Actions CI servers to run out of shared memory, causing an error. --- moment_kinetics/test/jacobian_matrix_tests.jl | 153 +++++++++++------- 1 file changed, 98 insertions(+), 55 deletions(-) diff --git a/moment_kinetics/test/jacobian_matrix_tests.jl b/moment_kinetics/test/jacobian_matrix_tests.jl index e04a60d33..9bae8296c 100644 --- a/moment_kinetics/test/jacobian_matrix_tests.jl +++ b/moment_kinetics/test/jacobian_matrix_tests.jl @@ -332,9 +332,10 @@ function test_electron_z_advection(test_input; rtol=(2.5e2*epsilon)^2) # Test 'ADI Jacobians' before other tests, because residual_func() may modify some # variables (vth, etc.). + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + @testset "ADI Jacobians - implicit z" begin # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. - jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin jacobian_matrix_ADI_check .= 0.0 @@ -371,7 +372,6 @@ function test_electron_z_advection(test_input; rtol=(2.5e2*epsilon)^2) @testset "ADI Jacobians - implicit v" begin # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. - jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin jacobian_matrix_ADI_check .= 0.0 @@ -675,9 +675,10 @@ function test_electron_vpa_advection(test_input; rtol=(3.0e2*epsilon)^2) # Test 'ADI Jacobians' before other tests, because residual_func() may modify some # variables (vth, etc.). + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + @testset "ADI Jacobians - implicit z" begin # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. - jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin jacobian_matrix_ADI_check .= 0.0 @@ -704,7 +705,6 @@ function test_electron_vpa_advection(test_input; rtol=(3.0e2*epsilon)^2) @testset "ADI Jacobians - implicit v" begin # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. - jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin jacobian_matrix_ADI_check .= 0.0 @@ -1015,9 +1015,10 @@ function test_contribution_from_electron_pdf_term(test_input; rtol=(4.0e2*epsilo # Test 'ADI Jacobians' before other tests, because residual_func() may modify some # variables (vth, etc.). + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + @testset "ADI Jacobians - implicit z" begin # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. - jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin jacobian_matrix_ADI_check .= 0.0 @@ -1055,7 +1056,6 @@ function test_contribution_from_electron_pdf_term(test_input; rtol=(4.0e2*epsilo @testset "ADI Jacobians - implicit v" begin # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. - jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin jacobian_matrix_ADI_check .= 0.0 @@ -1327,9 +1327,13 @@ function test_electron_dissipation_term(test_input; rtol=(3.0e0*epsilon)^2) jacobian_matrix, f, num_diss_params, z, vperp, vpa, vpa_spectral, z_speed, dt, ir) + # Test 'ADI Jacobians' before other tests, because residual_func() may modify some + # variables (vth, etc.). + + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + @testset "ADI Jacobians - implicit z" begin # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. - jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin jacobian_matrix_ADI_check .= 0.0 @@ -1354,7 +1358,6 @@ function test_electron_dissipation_term(test_input; rtol=(3.0e0*epsilon)^2) @testset "ADI Jacobians - implicit v" begin # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. - jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin jacobian_matrix_ADI_check .= 0.0 @@ -1625,9 +1628,13 @@ function test_electron_krook_collisions(test_input; rtol=(2.0e1*epsilon)^2) jacobian_matrix, f, dens, upar, ppar, vth, @view(moments.ion.upar[:,ir]), collisions, z, vperp, vpa, z_speed, dt, ir; ppar_offset=pdf_size) + # Test 'ADI Jacobians' before other tests, because residual_func() may modify some + # variables (vth, etc.). + + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + @testset "ADI Jacobians - implicit z" begin # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. - jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin jacobian_matrix_ADI_check .= 0.0 @@ -1663,7 +1670,6 @@ function test_electron_krook_collisions(test_input; rtol=(2.0e1*epsilon)^2) @testset "ADI Jacobians - implicit v" begin # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. - jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin jacobian_matrix_ADI_check .= 0.0 @@ -1950,9 +1956,13 @@ function test_external_electron_source(test_input; rtol=(3.0e1*epsilon)^2) jacobian_matrix, f, moments, me, z_speed, external_source_settings.electron, z, vperp, vpa, dt, ir; ppar_offset=pdf_size) + # Test 'ADI Jacobians' before other tests, because residual_func() may modify some + # variables (vth, etc.). + + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + @testset "ADI Jacobians - implicit z" begin # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. - jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin jacobian_matrix_ADI_check .= 0.0 @@ -1988,7 +1998,6 @@ function test_external_electron_source(test_input; rtol=(3.0e1*epsilon)^2) @testset "ADI Jacobians - implicit v" begin # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. - jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin jacobian_matrix_ADI_check .= 0.0 @@ -2291,9 +2300,13 @@ function test_electron_implicit_constraint_forcing(test_input; rtol=(1.5e0*epsil jacobian_matrix, f, zeroth_moment, first_moment, second_moment, z_speed, z, vperp, vpa, t_params.electron.constraint_forcing_rate, dt, ir) + # Test 'ADI Jacobians' before other tests, because residual_func() may modify some + # variables (vth, etc.). + + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + @testset "ADI Jacobians - implicit z" begin # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. - jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin jacobian_matrix_ADI_check .= 0.0 @@ -2329,7 +2342,6 @@ function test_electron_implicit_constraint_forcing(test_input; rtol=(1.5e0*epsil @testset "ADI Jacobians - implicit v" begin # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. - jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin jacobian_matrix_ADI_check .= 0.0 @@ -2620,9 +2632,13 @@ function test_electron_energy_equation(test_input; rtol=(6.0e2*epsilon)^2) dppar_dz, dthird_moment_dz, collisions, composition, z, vperp, vpa, z_spectral, num_diss_params, dt, ir; ppar_offset=pdf_size) + # Test 'ADI Jacobians' before other tests, because residual_func() may modify some + # variables (vth, etc.). + + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + @testset "ADI Jacobians - implicit z" begin # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. - jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin jacobian_matrix_ADI_check .= 0.0 @@ -2659,7 +2675,6 @@ function test_electron_energy_equation(test_input; rtol=(6.0e2*epsilon)^2) @testset "ADI Jacobians - implicit v" begin # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. - jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin jacobian_matrix_ADI_check .= 0.0 @@ -2885,9 +2900,13 @@ function test_ion_dt_forcing_of_electron_ppar(test_input; rtol=(1.5e1*epsilon)^2 add_ion_dt_forcing_of_electron_ppar_to_Jacobian!( jacobian_matrix, z, dt, ion_dt, ir; ppar_offset=pdf_size) + # Test 'ADI Jacobians' before other tests, because residual_func() may modify some + # variables (vth, etc.). + + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + @testset "ADI Jacobians - implicit z" begin # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. - jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin jacobian_matrix_ADI_check .= 0.0 @@ -2919,7 +2938,6 @@ function test_ion_dt_forcing_of_electron_ppar(test_input; rtol=(1.5e1*epsilon)^2 @testset "ADI Jacobians - implicit v" begin # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. - jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) begin_serial_region() @serial_region begin jacobian_matrix_ADI_check .= 0.0 @@ -3133,20 +3151,15 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) total_size = pdf_size + p_size jacobian_matrix = allocate_shared_float(total_size, total_size) - begin_serial_region() - @serial_region begin - jacobian_matrix .= 0.0 - for row ∈ 1:total_size - # Initialise identity matrix - jacobian_matrix[row,row] = 1.0 - end - end - fill_electron_kinetic_equation_Jacobian!( - jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa, - z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, - external_source_settings, num_diss_params, t_params.electron, ion_dt, ir, - true) + # Calculate this later, so that we can use `jacobian_matrix` as a temporary + # buffer, to avoid allocating too much shared memory for the Github Actions CI + # servers. + #fill_electron_kinetic_equation_Jacobian!( + # jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa, + # z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, + # external_source_settings, num_diss_params, t_params.electron, ion_dt, ir, + # true) # Test 'ADI Jacobians' before other tests, because residual_func() may modify some # variables (vth, etc.). @@ -3208,19 +3221,22 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) @views second_moment[iz] = integrate_over_vspace(f[:,1,iz], vpa_grid, 2, vpa_wgts) end + # Test 'ADI Jacobians' before other tests, because residual_func() may modify some + # variables (vth, etc.). + + jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) + begin_serial_region() + @serial_region begin + # Need to explicitly initialise because + # fill_electron_kinetic_equation_z_only_Jacobian_f!() and + # fill_electron_kinetic_equation_z_only_Jacobian_ppar!() + # only fill the diagonal-in-velocity-indices elements, so when applied to + # a full matrix they would not initialise every element. + jacobian_matrix_ADI_check .= 0.0 + end + @testset "ADI Jacobians - implicit z" begin # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. - jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) - - begin_serial_region() - @serial_region begin - # Need to explicitly initialise because - # fill_electron_kinetic_equation_z_only_Jacobian_f!() and - # fill_electron_kinetic_equation_z_only_Jacobian_ppar!() - # only fill the diagonal-in-velocity-indices elements, so when applied to - # a full matrix they would not initialise every element. - jacobian_matrix_ADI_check .= 0.0 - end v_size = vperp.n * vpa.n @@ -3251,16 +3267,25 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) end # Add 'explicit' contribution - jacobian_matrix_ADI_check_explicit = allocate_shared_float(total_size, total_size) + # Use jacobian_matrix as a temporary buffer here. fill_electron_kinetic_equation_Jacobian!( - jacobian_matrix_ADI_check_explicit, f, ppar, moments, collisions, - composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, - z_advect, vpa_advect, scratch_dummy, external_source_settings, - num_diss_params, t_params.electron, ion_dt, ir, true, :explicit_v) + jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa, + z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, + scratch_dummy, external_source_settings, num_diss_params, + t_params.electron, ion_dt, ir, true, :explicit_v) begin_serial_region() @serial_region begin - jacobian_matrix_ADI_check .+= jacobian_matrix_ADI_check_explicit + jacobian_matrix_ADI_check .+= jacobian_matrix + end + + fill_electron_kinetic_equation_Jacobian!( + jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa, + z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, + external_source_settings, num_diss_params, t_params.electron, ion_dt, ir, + true) + begin_serial_region() + @serial_region begin # The settings for this test are a bit strange, due to trying to get the # finite-difference approximation to the Jacobian to agree with the # Jacobian matrix functions without being too messed up by floating-point @@ -3270,9 +3295,18 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) end end + begin_serial_region() + @serial_region begin + # Need to explicitly initialise because + # fill_electron_kinetic_equation_z_only_Jacobian_f!() and + # fill_electron_kinetic_equation_z_only_Jacobian_ppar!() + # only fill the diagonal-in-velocity-indices elements, so when applied to + # a full matrix they would not initialise every element. + jacobian_matrix_ADI_check .= 0.0 + end + @testset "ADI Jacobians - implicit v" begin # 'Implicit' and 'explicit' parts of Jacobian should add up to full Jacobian. - jacobian_matrix_ADI_check = allocate_shared_float(total_size, total_size) v_size = vperp.n * vpa.n @@ -3292,17 +3326,26 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) end # Add 'explicit' contribution - jacobian_matrix_ADI_check_explicit = allocate_shared_float(total_size, total_size) + # Use jacobian_matrix as a temporary buffer here. fill_electron_kinetic_equation_Jacobian!( - jacobian_matrix_ADI_check_explicit, f, ppar, moments, collisions, - composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, - z_advect, vpa_advect, scratch_dummy, external_source_settings, - num_diss_params, t_params.electron, ion_dt, ir, true, :explicit_z) + jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa, + z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, + scratch_dummy, external_source_settings, num_diss_params, + t_params.electron, ion_dt, ir, true, :explicit_z) begin_serial_region() @serial_region begin - jacobian_matrix_ADI_check .+= jacobian_matrix_ADI_check_explicit + jacobian_matrix_ADI_check .+= jacobian_matrix + end + fill_electron_kinetic_equation_Jacobian!( + jacobian_matrix, f, ppar, moments, collisions, composition, z, vperp, vpa, + z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, + external_source_settings, num_diss_params, t_params.electron, ion_dt, ir, + true) + + begin_serial_region() + @serial_region begin # The settings for this test are a bit strange, due to trying to get the # finite-difference approximation to the Jacobian to agree with the # Jacobian matrix functions without being too messed up by floating-point From 12cc4d465f4fcc07786b73733ff249ef454d1c49 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 26 Oct 2024 17:34:48 +0100 Subject: [PATCH 12/43] ADI preconditioner Add a preconditioner for kinetic electrons using a variation on the 'alternating direction implicit' (ADI) method. Terms that couple velocity space are first solved implicitly (over vpa only for the 1V case), with terms with z-coupling treated 'explicitly' by being subtracted from the right-hand-side; then terms that couple in z are solved implicitly, with terms with v-coupling treated 'explicitly'. The two steps may be iterated more times if this is needed. --- .../src/makie_post_processing.jl | 2 + .../src/electron_kinetic_equation.jl | 374 +++++++++++++++++- moment_kinetics/src/file_io.jl | 12 +- moment_kinetics/src/initial_conditions.jl | 2 + moment_kinetics/src/load_data.jl | 7 + moment_kinetics/src/nonlinear_solvers.jl | 78 +++- moment_kinetics/src/time_advance.jl | 3 +- 7 files changed, 473 insertions(+), 5 deletions(-) diff --git a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl index 0652d06c6..78ea8b827 100644 --- a/makie_post_processing/makie_post_processing/src/makie_post_processing.jl +++ b/makie_post_processing/makie_post_processing/src/makie_post_processing.jl @@ -8079,8 +8079,10 @@ function timestep_diagnostics(run_info, run_info_dfns; plot_prefix=nothing, it=n for p ∈ nl_prefixes nonlinear_iterations = get_variable(ri, "$(p)_nonlinear_iterations_per_solve") linear_iterations = get_variable(ri, "$(p)_linear_iterations_per_nonlinear_iteration") + precon_iterations = get_variable(ri, "$(p)_precon_iterations_per_linear_iteration") plot_1d(time, nonlinear_iterations, label=prefix * " " * p * " NL per solve", ax=ax) plot_1d(time, linear_iterations, label=prefix * " " * p * " L per NL", ax=ax) + plot_1d(time, precon_iterations, label=prefix * " " * p * " P per L", ax=ax) end end end diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 54021ced6..9b5fe83aa 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -932,7 +932,7 @@ global_rank[] == 0 && println("recalculating precon") end - function lu_precon!(x) + @timeit_debug global_timer lu_precon!(x) = begin precon_ppar, precon_f = x precon_lu, _, this_input_buffer, this_output_buffer = @@ -1006,6 +1006,378 @@ global_rank[] == 0 && println("recalculating precon") left_preconditioner = identity right_preconditioner = lu_precon! + elseif nl_solver_params.preconditioner_type === Val(:electron_adi) + + if t_params.dt[] > 1.5 * nl_solver_params.precon_dt[] || + t_params.dt[] < 2.0/3.0 * nl_solver_params.precon_dt[] + + # dt has changed significantly, so update the preconditioner + nl_solver_params.solves_since_precon_update[] = nl_solver_params.preconditioner_update_interval + end + + if nl_solver_params.solves_since_precon_update[] ≥ nl_solver_params.preconditioner_update_interval +global_rank[] == 0 && println("recalculating precon") + nl_solver_params.solves_since_precon_update[] = 0 + nl_solver_params.precon_dt[] = t_params.dt[] + + adi_info = nl_solver_params.preconditioners[ir] + + dens = @view moments.electron.dens[:,ir] + upar = @view moments.electron.upar[:,ir] + vth = @view moments.electron.vth[:,ir] + qpar = @view moments.electron.qpar[:,ir] + + # Reconstruct w_∥^3 moment of g_e from already-calculated qpar + buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] + buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1] + buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1] + buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1] + third_moment = scratch_dummy.buffer_z_1 + dthird_moment_dz = scratch_dummy.buffer_z_2 + begin_z_region() + @loop_z iz begin + third_moment[iz] = 0.5 * qpar[iz] / electron_ppar_new[iz] / vth[iz] + end + derivative_z!(dthird_moment_dz, third_moment, buffer_1, buffer_2, + buffer_3, buffer_4, z_spectral, z) + + z_speed = @view z_advect[1].speed[:,:,:,ir] + + dpdf_dz = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir] + begin_vperp_vpa_region() + update_electron_speed_z!(z_advect[1], upar, vth, vpa.grid, ir) + @loop_vperp_vpa ivperp ivpa begin + @views z_advect[1].adv_fac[:,ivpa,ivperp,ir] = -z_speed[:,ivpa,ivperp] + end + #calculate the upwind derivative + @views derivative_z_pdf_vpavperpz!(dpdf_dz, f_electron_new, + z_advect[1].adv_fac[:,:,:,ir], + scratch_dummy.buffer_vpavperpr_1[:,:,ir], + scratch_dummy.buffer_vpavperpr_2[:,:,ir], + scratch_dummy.buffer_vpavperpr_3[:,:,ir], + scratch_dummy.buffer_vpavperpr_4[:,:,ir], + scratch_dummy.buffer_vpavperpr_5[:,:,ir], + scratch_dummy.buffer_vpavperpr_6[:,:,ir], + z_spectral, z) + + dpdf_dvpa = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir] + begin_z_vperp_region() + update_electron_speed_vpa!(vpa_advect[1], dens, upar, + electron_ppar_new, moments, vpa.grid, + external_source_settings.electron, ir) + @loop_z_vperp iz ivperp begin + @views @. vpa_advect[1].adv_fac[:,ivperp,iz,ir] = -vpa_advect[1].speed[:,ivperp,iz,ir] + end + #calculate the upwind derivative of the electron pdf w.r.t. wpa + @loop_z_vperp iz ivperp begin + @views derivative!(dpdf_dvpa[:,ivperp,iz], f_electron_new[:,ivperp,iz], vpa, + vpa_advect[1].adv_fac[:,ivperp,iz,ir], vpa_spectral) + end + + zeroth_moment = z.scratch_shared + first_moment = z.scratch_shared2 + second_moment = z.scratch_shared3 + begin_z_region() + vpa_grid = vpa.grid + vpa_wgts = vpa.wgts + @loop_z iz begin + @views zeroth_moment[iz] = integrate_over_vspace(f_electron_new[:,1,iz], vpa_wgts) + @views first_moment[iz] = integrate_over_vspace(f_electron_new[:,1,iz], vpa_grid, vpa_wgts) + @views second_moment[iz] = integrate_over_vspace(f_electron_new[:,1,iz], vpa_grid, 2, vpa_wgts) + end + + v_size = vperp.n * vpa.n + + # Do setup for 'v solves' + v_solve_counter = 0 + A = adi_info.v_solve_matrix_buffer + explicit_J = adi_info.J_buffer + # Get sparse matrix for explicit, right-hand-side part of the + # solve. + fill_electron_kinetic_equation_Jacobian!( + explicit_J, f_electron_new, electron_ppar_new, moments, + collisions, composition, z, vperp, vpa, z_spectral, + vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, + external_source_settings, num_diss_params, t_params, ion_dt, ir, + evolve_ppar, :explicit_z) + begin_z_region() + @loop_z iz begin + v_solve_counter += 1 + # Get LU-factorized matrix for implicit part of the solve + @views fill_electron_kinetic_equation_v_only_Jacobian!( + A, f_electron_new[:,:,iz], electron_ppar_new[iz], + dpdf_dz[:,:,iz], dpdf_dvpa[:,:,iz], z_speed, moments, + zeroth_moment[iz], first_moment[iz], second_moment[iz], + third_moment[iz], dthird_moment_dz[iz], collisions, + composition, z, vperp, vpa, z_spectral, vperp_spectral, + vpa_spectral, z_advect, vpa_advect, scratch_dummy, + external_source_settings, num_diss_params, t_params, ion_dt, + ir, iz, evolve_ppar) + A_sparse = sparse(A) + if !isassigned(adi_info.v_solve_implicit_lus, v_solve_counter) + @timeit_debug global_timer "lu" adi_info.v_solve_implicit_lus[v_solve_counter] = lu(A_sparse) + else + # LU decomposition was previously created. The Jacobian always + # has the same sparsity pattern, so by using `lu!()` we can + # reuse some setup. + try + @timeit_debug global_timer "lu!" lu!(adi_info.v_solve_implicit_lus[v_solve_counter], A_sparse; check=false) + catch e + if !isa(e, ArgumentError) + rethrow(e) + end + println("Sparsity pattern of matrix changed, rebuilding " + * " LU from scratch ir=$ir, iz=$iz") + @timeit_debug global_timer "lu" adi_info.v_solve_implicit_lus[v_solve_counter] = lu(A_sparse) + end + end + + adi_info.v_solve_explicit_matrices[v_solve_counter] = sparse(@view(explicit_J[adi_info.v_solve_global_inds[v_solve_counter],:])) + end + @boundscheck v_solve_counter == adi_info.v_solve_nsolve || error("v_solve_counter($v_solve_counter) != v_solve_nsolve($(adi_info.v_solve_nsolve))") + + # Do setup for 'z solves' + z_solve_counter = 0 + A = adi_info.z_solve_matrix_buffer + explicit_J = adi_info.J_buffer + # Get sparse matrix for explicit, right-hand-side part of the + # solve. + fill_electron_kinetic_equation_Jacobian!( + explicit_J, f_electron_new, electron_ppar_new, moments, + collisions, composition, z, vperp, vpa, z_spectral, + vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, + external_source_settings, num_diss_params, t_params, ion_dt, ir, + evolve_ppar, :explicit_v) + begin_vperp_vpa_region() + @loop_vperp_vpa ivperp ivpa begin + z_solve_counter += 1 + + # Get LU-factorized matrix for implicit part of the solve + @views fill_electron_kinetic_equation_z_only_Jacobian_f!( + A, f_electron_new[ivpa,ivperp,:], electron_ppar_new, + dpdf_dz[ivpa,ivperp,:], dpdf_dvpa[ivpa,ivperp,:], z_speed, + moments, zeroth_moment, first_moment, second_moment, + third_moment, dthird_moment_dz, collisions, composition, z, + vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, + z_advect, vpa_advect, scratch_dummy, external_source_settings, + num_diss_params, t_params, ion_dt, ir, ivperp, ivpa, + evolve_ppar) + + A_sparse = sparse(A) + if !isassigned(adi_info.z_solve_implicit_lus, z_solve_counter) + @timeit_debug global_timer "lu" adi_info.z_solve_implicit_lus[z_solve_counter] = lu(A_sparse) + else + # LU decomposition was previously created. The Jacobian always + # has the same sparsity pattern, so by using `lu!()` we can + # reuse some setup. + try + @timeit_debug global_timer "lu!" lu!(adi_info.z_solve_implicit_lus[z_solve_counter], A_sparse; check=false) + catch e + if !isa(e, ArgumentError) + rethrow(e) + end + println("Sparsity pattern of matrix changed, rebuilding " + * " LU from scratch ir=$ir, ivperp=$ivperp, ivpa=$ivpa") + @timeit_debug global_timer "lu" adi_info.z_solve_implicit_lus[z_solve_counter] = lu(A_sparse) + end + end + + adi_info.z_solve_explicit_matrices[z_solve_counter] = sparse(@view(explicit_J[adi_info.z_solve_global_inds[z_solve_counter],:])) + end + begin_serial_region(; no_synchronize=true) + @serial_region begin + # Do the solve for ppar on the rank-0 process, which has the + # fewest grid points to handle if there are not an exactly equal + # number of points for each process. + z_solve_counter += 1 + + # Get LU-factorized matrix for implicit part of the solve + @views fill_electron_kinetic_equation_z_only_Jacobian_ppar!( + A, electron_ppar_new, moments, zeroth_moment, first_moment, + second_moment, third_moment, dthird_moment_dz, collisions, + composition, z, vperp, vpa, z_spectral, vperp_spectral, + vpa_spectral, z_advect, vpa_advect, scratch_dummy, + external_source_settings, num_diss_params, t_params, ion_dt, + ir, evolve_ppar) + + A_sparse = sparse(A) + if !isassigned(adi_info.z_solve_implicit_lus, z_solve_counter) + @timeit_debug global_timer "lu" adi_info.z_solve_implicit_lus[z_solve_counter] = lu(A_sparse) + else + # LU decomposition was previously created. The Jacobian always + # has the same sparsity pattern, so by using `lu!()` we can + # reuse some setup. + try + @timeit_debug global_timer "lu!" lu!(adi_info.z_solve_implicit_lus[z_solve_counter], A_sparse; check=false) + catch e + if !isa(e, ArgumentError) + rethrow(e) + end + println("Sparsity pattern of matrix changed, rebuilding " + * " LU from scratch ir=$ir, ppar z-solve") + @timeit_debug global_timer "lu" adi_info.z_solve_implicit_lus[z_solve_counter] = lu(A_sparse) + end + end + + adi_info.z_solve_explicit_matrices[z_solve_counter] = sparse(@view(explicit_J[adi_info.z_solve_global_inds[z_solve_counter],:])) + end + @boundscheck z_solve_counter == adi_info.z_solve_nsolve || error("z_solve_counter($z_solve_counter) != z_solve_nsolve($(adi_info.z_solve_nsolve))") + end + + @timeit_debug global_timer adi_precon!(x) = begin + precon_ppar, precon_f = x + + adi_info = nl_solver_params.preconditioners[ir] + precon_iterations = nl_solver_params.precon_iterations + this_input_buffer = adi_info.input_buffer + this_intermediate_buffer = adi_info.intermediate_buffer + this_output_buffer = adi_info.output_buffer + global_index_subrange = adi_info.global_index_subrange + + v_size = vperp.n * vpa.n + pdf_size = z.n * v_size + + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + row = (iz - 1)*v_size + (ivperp - 1)*vpa.n + ivpa + this_input_buffer[row] = precon_f[ivpa,ivperp,iz] + end + begin_z_region() + @loop_z iz begin + row = pdf_size + iz + this_input_buffer[row] = precon_ppar[iz] + end + _block_synchronize() + + # Use this to copy current guess from output_buffer to + # intermediate_buffer, to avoid race conditions as new guess is + # written into output_buffer. + function fill_intermediate_buffer!() + _block_synchronize() + for i ∈ global_index_subrange + this_intermediate_buffer[i] = this_output_buffer[i] + end + _block_synchronize() + end + + v_solve_global_inds = adi_info.v_solve_global_inds + v_solve_nsolve = adi_info.v_solve_nsolve + v_solve_implicit_lus = adi_info.v_solve_implicit_lus + v_solve_explicit_matrices = adi_info.v_solve_explicit_matrices + v_solve_buffer = adi_info.v_solve_buffer + v_solve_buffer2 = adi_info.v_solve_buffer2 + function first_adi_v_solve!() + # The initial guess is all-zero, so for the first solve there is + # no need to multiply by the 'explicit matrix' as x==0, so E.x==0 + for isolve ∈ 1:v_solve_nsolve + this_inds = v_solve_global_inds[isolve] + v_solve_buffer .= this_input_buffer[this_inds] + @timeit_debug global_timer "ldiv!" ldiv!(v_solve_buffer2, v_solve_implicit_lus[isolve], v_solve_buffer) + this_output_buffer[this_inds] .= v_solve_buffer2 + end + end + function adi_v_solve!() + for isolve ∈ 1:v_solve_nsolve + this_inds = v_solve_global_inds[isolve] + v_solve_buffer .= @view this_input_buffer[this_inds] + # Need to multiply the 'explicit matrix' by -1, because all + # the Jacobian-calculation functions are defined as if the + # terms are being added to the left-hand-side preconditioner + # matrix, but here the 'explicit matrix' terms are added on + # the right-hand-side. + @timeit_debug global_timer "mul!" mul!(v_solve_buffer, v_solve_explicit_matrices[isolve], + this_intermediate_buffer, -1.0, 1.0) + @timeit_debug global_timer "ldiv!" ldiv!(v_solve_buffer2, v_solve_implicit_lus[isolve], v_solve_buffer) + this_output_buffer[this_inds] .= v_solve_buffer2 + end + end + + z_solve_global_inds = adi_info.z_solve_global_inds + z_solve_nsolve = adi_info.z_solve_nsolve + z_solve_implicit_lus = adi_info.z_solve_implicit_lus + z_solve_explicit_matrices = adi_info.z_solve_explicit_matrices + z_solve_buffer = adi_info.z_solve_buffer + z_solve_buffer2 = adi_info.z_solve_buffer2 + function adi_z_solve!() + for isolve ∈ 1:z_solve_nsolve + this_inds = z_solve_global_inds[isolve] + z_solve_buffer .= @view this_input_buffer[this_inds] + # Need to multiply the 'explicit matrix' by -1, because all + # the Jacobian-calculation functions are defined as if the + # terms are being added to the left-hand-side preconditioner + # matrix, but here the 'explicit matrix' terms are added on + # the right-hand-side. + @timeit_debug global_timer "mul!" mul!(z_solve_buffer, z_solve_explicit_matrices[isolve], this_intermediate_buffer, -1.0, 1.0) + @timeit_debug global_timer "ldiv!" ldiv!(z_solve_buffer2, z_solve_implicit_lus[isolve], z_solve_buffer) + this_output_buffer[this_inds] .= z_solve_buffer2 + end + end + + precon_iterations[] += 1 + first_adi_v_solve!() + fill_intermediate_buffer!() + adi_z_solve!() + for n ∈ 1:1 + precon_iterations[] += 1 + fill_intermediate_buffer!() + adi_v_solve!() + fill_intermediate_buffer!() + adi_z_solve!() + end + + # Unpack preconditioner solution + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + row = (iz - 1)*v_size + (ivperp - 1)*vpa.n + ivpa + precon_f[ivpa,ivperp,iz] = this_output_buffer[row] + end + begin_z_region() + @loop_z iz begin + row = pdf_size + iz + precon_ppar[iz] = this_output_buffer[row] + end + + # Ensure values of precon_f and precon_ppar are consistent across + # distributed-MPI block boundaries. For precon_f take the upwind + # value, and for precon_ppar take the average. + f_lower_endpoints = @view scratch_dummy.buffer_vpavperpr_1[:,:,ir] + f_upper_endpoints = @view scratch_dummy.buffer_vpavperpr_2[:,:,ir] + receive_buffer1 = @view scratch_dummy.buffer_vpavperpr_3[:,:,ir] + receive_buffer2 = @view scratch_dummy.buffer_vpavperpr_4[:,:,ir] + begin_vperp_vpa_region() + @loop_vperp_vpa ivperp ivpa begin + f_lower_endpoints[ivpa,ivperp] = precon_f[ivpa,ivperp,1] + f_upper_endpoints[ivpa,ivperp] = precon_f[ivpa,ivperp,end] + end + # We upwind the z-derivatives in `electron_z_advection!()`, so would + # expect that upwinding the results here in z would make sense. + # However, upwinding here makes convergence much slower (~10x), + # compared to picking the values from one side or other of the block + # boundary, or taking the average of the values on either side. + # Neither direction is special, so taking the average seems most + # sensible (although in an intial test it does not seem to converge + # faster than just picking one or the other). + # Maybe this could indicate that it is more important to have a fully + # self-consistent Jacobian inversion for the + # `electron_vpa_advection()` part rather than taking half(ish) of the + # values from one block and the other half(ish) from the other. + reconcile_element_boundaries_MPI_z_pdf_vpavperpz!( + precon_f, f_lower_endpoints, f_upper_endpoints, receive_buffer1, + receive_buffer2, z) + + begin_serial_region() + @serial_region begin + buffer_1[] = precon_ppar[1] + buffer_2[] = precon_ppar[end] + end + reconcile_element_boundaries_MPI!( + precon_ppar, buffer_1, buffer_2, buffer_3, buffer_4, z) + + return nothing + end + + left_preconditioner = identity + right_preconditioner = adi_precon! elseif nl_solver_params.preconditioner_type === Val(:none) left_preconditioner = identity right_preconditioner = identity diff --git a/moment_kinetics/src/file_io.jl b/moment_kinetics/src/file_io.jl index 32b225754..c61cf9458 100644 --- a/moment_kinetics/src/file_io.jl +++ b/moment_kinetics/src/file_io.jl @@ -1203,6 +1203,10 @@ function define_dynamic_moment_variables!(fid, n_ion_species, n_neutral_species, dynamic, "$(term)_linear_iterations", mk_int; parallel_io=parallel_io, description="Number of linear iterations for $term"), + precon_iterations=create_dynamic_variable!( + dynamic, "$(term)_precon_iterations", mk_int; + parallel_io=parallel_io, + description="Number of preconditioner iterations for $term"), ) for (term, params) ∈ pairs(nl_solver_params) if params !== nothing) @@ -2152,7 +2156,8 @@ function reopen_moments_io(file_info) for name ∈ nl_names) return NamedTuple(Symbol(term)=>(n_solves=dyn["$(term)_n_solves"], nonlinear_iterations=dyn["$(term)_nonlinear_iterations"], - linear_iterations=dyn["$(term)_linear_iterations"]) + linear_iterations=dyn["$(term)_linear_iterations"], + precon_iterations=dyn["$(term)_precon_iterations"]) for term ∈ nl_prefixes) else return nothing @@ -2313,7 +2318,8 @@ function reopen_dfns_io(file_info) for name ∈ nl_names) return NamedTuple(Symbol(term)=>(n_solves=dyn["$(term)_n_solves"], nonlinear_iterations=dyn["$(term)_nonlinear_iterations"], - linear_iterations=dyn["$(term)_linear_iterations"]) + linear_iterations=dyn["$(term)_linear_iterations"], + precon_iterations=dyn["$(term)_precon_iterations"]) for term ∈ nl_prefixes) else return nothing @@ -2495,6 +2501,8 @@ file v.nonlinear_iterations[], t_idx, parallel_io) append_to_dynamic_var(io_moments.nl_solver_diagnostics[k].linear_iterations, v.linear_iterations[], t_idx, parallel_io) + append_to_dynamic_var(io_moments.nl_solver_diagnostics[k].precon_iterations, + v.precon_iterations[], t_idx, parallel_io) end end diff --git a/moment_kinetics/src/initial_conditions.jl b/moment_kinetics/src/initial_conditions.jl index 2ba8ccd4f..3102e137c 100644 --- a/moment_kinetics/src/initial_conditions.jl +++ b/moment_kinetics/src/initial_conditions.jl @@ -754,9 +754,11 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field nl_solver_params.electron_advance.n_solves, nl_solver_params.electron_advance.nonlinear_iterations, nl_solver_params.electron_advance.linear_iterations, + nl_solver_params.electron_advance.precon_iterations, nl_solver_params.electron_advance.global_n_solves, nl_solver_params.electron_advance.global_nonlinear_iterations, nl_solver_params.electron_advance.global_linear_iterations, + nl_solver_params.electron_advance.global_precon_iterations, nl_solver_params.electron_advance.solves_since_precon_update, nl_solver_params.electron_advance.precon_dt, nl_solver_params.electron_advance.serial_solve, diff --git a/moment_kinetics/src/load_data.jl b/moment_kinetics/src/load_data.jl index 35e427319..b5150c675 100644 --- a/moment_kinetics/src/load_data.jl +++ b/moment_kinetics/src/load_data.jl @@ -5033,6 +5033,13 @@ function get_variable(run_info, variable_name; normalize_advection_speed_shape=t nl_linear_iterations = get_per_step_from_cumulative_variable( run_info, "$(prefix)_linear_iterations"; kwargs...) variable = nl_linear_iterations ./ nl_iterations + elseif occursin("_precon_iterations_per_linear_iteration", variable_name) + prefix = split(variable_name, "_precon_iterations_per_linear_iteration")[1] + nl_linear_iterations = get_per_step_from_cumulative_variable( + run_info, "$(prefix)_linear_iterations"; kwargs...) + nl_precon_iterations = get_per_step_from_cumulative_variable( + run_info, "$(prefix)_precon_iterations"; kwargs...) + variable = nl_precon_iterations ./ nl_linear_iterations elseif endswith(variable_name, "_per_step") && variable_name ∉ run_info.variable_names # If "_per_step" is appended to a variable name, assume it is a cumulative # variable, and get the per-step version. diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index a94e2314c..3789ab0c6 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -59,9 +59,11 @@ struct nl_solver_info{TH,TV,Tcsg,Tlig,Tprecon,Tpretype} n_solves::Base.RefValue{mk_int} nonlinear_iterations::Base.RefValue{mk_int} linear_iterations::Base.RefValue{mk_int} + precon_iterations::Base.RefValue{mk_int} global_n_solves::Base.RefValue{mk_int} global_nonlinear_iterations::Base.RefValue{mk_int} global_linear_iterations::Base.RefValue{mk_int} + global_precon_iterations::Base.RefValue{mk_int} solves_since_precon_update::Base.RefValue{mk_int} precon_dt::Base.RefValue{mk_float} serial_solve::Bool @@ -178,6 +180,73 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa allocate_shared_float(pdf_plus_ppar_size), ), reverse(outer_coord_sizes)) + elseif preconditioner_type === Val(:electron_adi) + nz = coords.z.n + pdf_plus_ppar_size = total_size_coords + nz + nvperp = coords.vperp.n + nvpa = coords.vpa.n + v_size = nvperp * nvpa + + function get_adi_precon_buffers() + v_solve_z_range = looping.loop_ranges_store[(:z,)].z + v_solve_global_inds = [[((iz - 1)*v_size+1 : iz*v_size)..., total_size_coords+iz] for iz ∈ v_solve_z_range] + v_solve_nsolve = length(v_solve_z_range) + # Plus one for the one point of ppar that is included in the 'v solve'. + v_solve_n = nvperp * nvpa + 1 + v_solve_implicit_lus = Vector{SparseArrays.UMFPACK.UmfpackLU{mk_float, mk_int}}(undef, v_solve_nsolve) + v_solve_explicit_matrices = Vector{SparseMatrixCSC{mk_float, mk_int}}(undef, v_solve_nsolve) + # This buffer is not shared-memory, because it will be used for a serial LU solve. + v_solve_buffer = allocate_float(v_solve_n) + v_solve_buffer2 = allocate_float(v_solve_n) + v_solve_matrix_buffer = allocate_float(v_solve_n, v_solve_n) + + z_solve_vperp_range = looping.loop_ranges_store[(:vperp,:vpa)].vperp + z_solve_vpa_range = looping.loop_ranges_store[(:vperp,:vpa)].vpa + z_solve_global_inds = vec([(ivperp-1)*nvpa+ivpa:v_size:(nz-1)*v_size+(ivperp-1)*nvpa+ivpa for ivperp ∈ z_solve_vperp_range, ivpa ∈ z_solve_vpa_range]) + z_solve_nsolve = length(z_solve_vperp_range) * length(z_solve_vpa_range) + @serial_region begin + # Do the solve for ppar on the rank-0 process, which has the fewest grid + # points to handle if there are not an exactly equal number of points for each + # process. + push!(z_solve_global_inds, total_size_coords+1 : total_size_coords+nz) + z_solve_nsolve += 1 + end + z_solve_n = nz + z_solve_implicit_lus = Vector{SparseArrays.UMFPACK.UmfpackLU{mk_float, mk_int}}(undef, z_solve_nsolve) + z_solve_explicit_matrices = Vector{SparseMatrixCSC{mk_float, mk_int}}(undef, z_solve_nsolve) + # This buffer is not shared-memory, because it will be used for a serial LU solve. + z_solve_buffer = allocate_float(z_solve_n) + z_solve_buffer2 = allocate_float(z_solve_n) + z_solve_matrix_buffer = allocate_float(z_solve_n, z_solve_n) + + J_buffer = allocate_shared_float(pdf_plus_ppar_size, pdf_plus_ppar_size) + input_buffer = allocate_shared_float(pdf_plus_ppar_size) + intermediate_buffer = allocate_shared_float(pdf_plus_ppar_size) + output_buffer = allocate_shared_float(pdf_plus_ppar_size) + error_buffer = allocate_shared_float(pdf_plus_ppar_size) + + chunk_size = (pdf_plus_ppar_size + block_size[] - 1) ÷ block_size[] + # Set up so root process has fewest points, as root may have other work to do. + global_index_subrange = max(1, pdf_plus_ppar_size - (block_size[] - block_rank[]) * chunk_size + 1):(pdf_plus_ppar_size - (block_size[] - block_rank[] - 1) * chunk_size) + + return (v_solve_global_inds=v_solve_global_inds, + v_solve_nsolve=v_solve_nsolve, + v_solve_implicit_lus=v_solve_implicit_lus, + v_solve_explicit_matrices=v_solve_explicit_matrices, + v_solve_buffer=v_solve_buffer, v_solve_buffer2=v_solve_buffer2, + v_solve_matrix_buffer=v_solve_matrix_buffer, + z_solve_global_inds=z_solve_global_inds, + z_solve_nsolve=z_solve_nsolve, + z_solve_implicit_lus=z_solve_implicit_lus, + z_solve_explicit_matrices=z_solve_explicit_matrices, + z_solve_buffer=z_solve_buffer, z_solve_buffer2=z_solve_buffer2, + z_solve_matrix_buffer=z_solve_matrix_buffer, J_buffer=J_buffer, + input_buffer=input_buffer, intermediate_buffer=intermediate_buffer, + output_buffer=output_buffer, + global_index_subrange=global_index_subrange) + end + + preconditioners = fill(get_adi_precon_buffers(), reverse(outer_coord_sizes)) elseif preconditioner_type === Val(:none) preconditioners = nothing else @@ -192,7 +261,8 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa mk_float(nl_solver_input.linear_atol), linear_restart, nl_solver_input.linear_max_restarts, H, c, s, g, V, linear_initial_guess, Ref(0), Ref(0), Ref(0), Ref(0), Ref(0), - Ref(0), Ref(nl_solver_input.preconditioner_update_interval), + Ref(0), Ref(0), Ref(0), + Ref(nl_solver_input.preconditioner_update_interval), Ref(mk_float(0.0)), serial_solve, Ref(0), Ref(0), preconditioner_type, nl_solver_input.preconditioner_update_interval, preconditioners) @@ -235,12 +305,14 @@ total. nl_solver_params.ion_advance.global_n_solves[] = nl_solver_params.ion_advance.n_solves[] nl_solver_params.ion_advance.global_nonlinear_iterations[] = nl_solver_params.ion_advance.nonlinear_iterations[] nl_solver_params.ion_advance.global_linear_iterations[] = nl_solver_params.ion_advance.linear_iterations[] + nl_solver_params.ion_advance.global_precon_iterations[] = nl_solver_params.ion_advance.precon_iterations[] end if nl_solver_params.vpa_advection !== nothing # Solves are run in serial on separate processes, so need a global Allreduce @timeit_debug global_timer "MPI.Allreduce! comm_world" MPI.Allreduce!(nl_solver_params.vpa_advection.n_solves[], +, comm_world) @timeit_debug global_timer "MPI.Allreduce! comm_world" MPI.Allreduce!(nl_solver_params.vpa_advection.nonlinear_iterations[], +, comm_world) @timeit_debug global_timer "MPI.Allreduce! comm_world" MPI.Allreduce!(nl_solver_params.vpa_advection.linear_iterations[], +, comm_world) + @timeit_debug global_timer "MPI.Allreduce! comm_world" MPI.Allreduce!(nl_solver_params.vpa_advection.precon_iterations[], +, comm_world) end end @@ -342,6 +414,7 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, close_linear_counter = -1 success = true previous_residual_norm = residual_norm +old_precon_iterations = nl_solver_params.precon_iterations[] while (counter < 1 && residual_norm > 1.0e-8) || residual_norm > 1.0 counter += 1 #println("\nNewton ", counter) @@ -446,6 +519,9 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, # println("Final residual: ", residual_norm) # println("Total linear iterations: ", linear_counter) # println("Linear iterations per Newton: ", linear_counter / counter) +# precon_count = nl_solver_params.precon_iterations[] - old_precon_iterations +# println("Total precon iterations: ", precon_count) +# println("Precon iterations per linear: ", precon_count / linear_counter) # # println("Newton iterations after close: ", counter - close_counter) # println("Total linear iterations after close: ", linear_counter - close_linear_counter) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index cf07371d2..19db3dc94 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -675,7 +675,8 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop default_rtol=t_params.rtol / 10.0, default_atol=t_params.atol / 10.0, electron_ppar_pdf_solve=true, - preconditioner_type=Val(:electron_lu)) + #preconditioner_type=Val(:electron_lu)) + preconditioner_type=Val(:electron_adi)) nl_solver_ion_advance_params = setup_nonlinear_solve(t_params.implicit_ion_advance, input_dict, (s=composition.n_ion_species, r=r, z=z, vperp=vperp, From ac026051a1ea6ace2c049a6967c456b0f9cab5fb Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 27 Oct 2024 15:36:07 +0000 Subject: [PATCH 13/43] Skip qpar integral terms to make explicit matrix sparser for ADI precon --- .../src/electron_kinetic_equation.jl | 30 ++++++++++--------- moment_kinetics/src/electron_vpa_advection.jl | 14 +++++---- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 9b5fe83aa..118eec885 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1099,7 +1099,7 @@ global_rank[] == 0 && println("recalculating precon") collisions, composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params, ion_dt, ir, - evolve_ppar, :explicit_z) + evolve_ppar, :explicit_z, false) begin_z_region() @loop_z iz begin v_solve_counter += 1 @@ -1147,7 +1147,7 @@ global_rank[] == 0 && println("recalculating precon") collisions, composition, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params, ion_dt, ir, - evolve_ppar, :explicit_v) + evolve_ppar, :explicit_v, false) begin_vperp_vpa_region() @loop_vperp_vpa ivperp ivpa begin z_solve_counter += 1 @@ -3181,7 +3181,7 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, external_source_settings, num_diss_params, t_params, ion_dt, ir, evolve_ppar, - include=:all) = begin + include=:all, include_qpar_integral_terms=true) = begin dt = t_params.dt[] buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] @@ -3290,13 +3290,13 @@ Fill a pre-allocated matrix with the Jacobian matrix for electron kinetic equati add_electron_vpa_advection_to_Jacobian!( jacobian_matrix, f, dens, upar, ppar, vth, third_moment, dpdf_dvpa, ddens_dz, dppar_dz, dthird_moment_dz, moments, me, z, vperp, vpa, z_spectral, vpa_spectral, - vpa_advect, z_speed, scratch_dummy, external_source_settings, dt, ir, include; - ppar_offset=pdf_size) + vpa_advect, z_speed, scratch_dummy, external_source_settings, dt, ir, include, + include_qpar_integral_terms; ppar_offset=pdf_size) add_contribution_from_electron_pdf_term_to_Jacobian!( jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z, - vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir, include; - ppar_offset=pdf_size) + vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir, include, + include_qpar_integral_terms; ppar_offset=pdf_size) add_electron_dissipation_term_to_Jacobian!( jacobian_matrix, f, num_diss_params, z, vperp, vpa, vpa_spectral, z_speed, dt, ir, include) @@ -4194,8 +4194,8 @@ end function add_contribution_from_electron_pdf_term_to_Jacobian!( jacobian_matrix, f, dens, upar, ppar, vth, third_moment, ddens_dz, dppar_dz, dvth_dz, dqpar_dz, dthird_moment_dz, moments, me, external_source_settings, z, - vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir, include=:all; f_offset=0, - ppar_offset=0) + vperp, vpa, z_spectral, z_speed, scratch_dummy, dt, ir, include=:all, + include_qpar_integral_terms=true; f_offset=0, ppar_offset=0) if f_offset == ppar_offset error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar " @@ -4274,11 +4274,13 @@ function add_contribution_from_electron_pdf_term_to_Jacobian!( z_deriv_row_endind = z_deriv_matrix.rowptr[iz+1] - 1 z_deriv_colinds = @view z_deriv_matrix.colval[z_deriv_row_startind:z_deriv_row_endind] z_deriv_row_nonzeros = @view z_deriv_matrix.nzval[z_deriv_row_startind:z_deriv_row_endind] - for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros), icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n - col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset - jacobian_matrix[row,col] += - dt * f[ivpa,ivperp,iz] * vth[iz] * - vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_entry + if include_qpar_integral_terms + for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros), icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + jacobian_matrix[row,col] += + dt * f[ivpa,ivperp,iz] * vth[iz] * + vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_entry + end end if include === :all for index ∈ eachindex(external_source_settings.electron) diff --git a/moment_kinetics/src/electron_vpa_advection.jl b/moment_kinetics/src/electron_vpa_advection.jl index a9e0fd383..0c2f7d02a 100644 --- a/moment_kinetics/src/electron_vpa_advection.jl +++ b/moment_kinetics/src/electron_vpa_advection.jl @@ -100,7 +100,9 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, z, vperp, vpa, z_spectral, vpa_spectral, vpa_advect, z_speed, scratch_dummy, external_source_settings, dt, ir, - include=:all; f_offset=0, ppar_offset=0) + include=:all, + include_qpar_integral_terms=true; + f_offset=0, ppar_offset=0) if f_offset == ppar_offset error("Got f_offset=$f_offset the same as ppar_offset=$ppar_offset. f and ppar " * "cannot be in same place in state vector.") @@ -205,10 +207,12 @@ function add_electron_vpa_advection_to_Jacobian!(jacobian_matrix, f, dens, upar, z_deriv_row_endind = z_deriv_matrix.rowptr[iz+1] - 1 z_deriv_colinds = @view z_deriv_matrix.colval[z_deriv_row_startind:z_deriv_row_endind] z_deriv_row_nonzeros = @view z_deriv_matrix.nzval[z_deriv_row_startind:z_deriv_row_endind] - for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros), icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n - col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset - jacobian_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] * - vpa.grid[ivpa] * vth[iz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_entry + if include_qpar_integral_terms + for (icolz, z_deriv_entry) ∈ zip(z_deriv_colinds, z_deriv_row_nonzeros), icolvperp ∈ 1:vperp.n, icolvpa ∈ 1:vpa.n + col = (icolz - 1) * v_size + (icolvperp - 1) * vpa.n + icolvpa + f_offset + jacobian_matrix[row,col] += dt * dpdf_dvpa[ivpa,ivperp,iz] * + vpa.grid[ivpa] * vth[iz] * vpa.wgts[icolvpa]/sqrt(π) * vpa.grid[icolvpa]^3 * z_deriv_entry + end end if include ∈ (:all, :explicit_v) jacobian_matrix[row,ppar_offset+iz] += dt * dpdf_dvpa[ivpa,ivperp,iz] * vpa.grid[ivpa] * From e811cb0d5c6410bfa3555454532ef5c9bee4e0d1 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 27 Oct 2024 09:36:35 +0000 Subject: [PATCH 14/43] Skip right-preconditioner evaluation when initial guess is all-zeros If delta_x is all-zero, then P^-1.delta_x is also all-zero, so no need to evaluate it. --- moment_kinetics/src/nonlinear_solvers.jl | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index 3789ab0c6..4a574b171 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -408,7 +408,9 @@ function newton_solve!(x, residual_func!, residual, delta_x, rhs_delta, v, w, counter = 0 linear_counter = 0 - parallel_map(solver_type, ()->0.0, delta_x) + # Would need this if delta_x was not set to zero within the Newton iteration loop + # below. + #parallel_map(solver_type, ()->0.0, delta_x) close_counter = -1 close_linear_counter = -1 @@ -434,7 +436,8 @@ old_precon_iterations = nl_solver_params.precon_iterations[] s=nl_solver_params.s, g=nl_solver_params.g, V=nl_solver_params.V, rhs_delta=rhs_delta, initial_guess=nl_solver_params.linear_initial_guess, - serial_solve=nl_solver_params.serial_solve) + serial_solve=nl_solver_params.serial_solve, + initial_delta_x_is_zero=true) linear_counter += linear_its # If the residual does not decrease, we will do a line search to find an update @@ -1217,7 +1220,8 @@ MGS-GMRES' in Zou (2023) [https://doi.org/10.1016/j.amc.2023.127869]. x, residual_func!, residual0, delta_x, v, w, solver_type::Val, norm_params; coords, rtol, atol, restart, max_restarts, left_preconditioner, right_preconditioner, H, c, s, g, V, - rhs_delta, initial_guess, serial_solve) = begin + rhs_delta, initial_guess, serial_solve, + initial_delta_x_is_zero) = begin # Solve (approximately?): # J δx = residual0 @@ -1234,8 +1238,10 @@ MGS-GMRES' in Zou (2023) [https://doi.org/10.1016/j.amc.2023.127869]. # by a large number `Jv_scale_factor` (in constrast to the small `epsilon` in the # 'usual' case where the norm does not include either reative or absolute tolerance) # to ensure that we get a reasonable estimate of J.v. - function approximate_Jacobian_vector_product!(v) - right_preconditioner(v) + function approximate_Jacobian_vector_product!(v, skip_first_precon::Bool=false) + if !skip_first_precon + right_preconditioner(v) + end parallel_map(solver_type, (x,v) -> x + Jv_scale_factor * v, v, x, v) residual_func!(rhs_delta, v) @@ -1249,8 +1255,12 @@ MGS-GMRES' in Zou (2023) [https://doi.org/10.1016/j.amc.2023.127869]. # the left-preconditioner. parallel_map(solver_type, (delta_x) -> delta_x, v, delta_x) left_preconditioner(residual0) + # This function transforms the data stored in 'v' from δx to ≈J.δx - approximate_Jacobian_vector_product!(v) + # If initial δx is all-zero, we can skip a right-preconditioner evaluation because it + # would just transform all-zero to all-zero. + approximate_Jacobian_vector_product!(v, initial_delta_x_is_zero) + # Now we actually set 'w' as the first Krylov vector, and normalise it. parallel_map(solver_type, (residual0, v) -> -residual0 - v, w, residual0, v) beta = distributed_norm(solver_type, w, norm_params...) From de9822c8e8056c019129b2a251d03f31d5e63cec Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 28 Oct 2024 09:28:31 +0000 Subject: [PATCH 15/43] Make number of ADI iterations settable, and default to 1 One iteration of ADI preconditioning seems to be enough to make the JFNK solver for kinetic electrons converge. The number of linear (Krylov) iterations required increases slightly, but overall this should reduce the computational cost. --- .../src/electron_kinetic_equation.jl | 27 +++++++++++++------ moment_kinetics/src/nonlinear_solvers.jl | 10 ++++++- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 118eec885..05fae2358 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1094,12 +1094,17 @@ global_rank[] == 0 && println("recalculating precon") explicit_J = adi_info.J_buffer # Get sparse matrix for explicit, right-hand-side part of the # solve. - fill_electron_kinetic_equation_Jacobian!( - explicit_J, f_electron_new, electron_ppar_new, moments, - collisions, composition, z, vperp, vpa, z_spectral, - vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, - external_source_settings, num_diss_params, t_params, ion_dt, ir, - evolve_ppar, :explicit_z, false) + if adi_info.n_extra_iterations > 0 + # If we only do one 'iteration' we don't need the 'explicit + # matrix' for the first solve (the v-solve), because the initial + # guess is zero, + fill_electron_kinetic_equation_Jacobian!( + explicit_J, f_electron_new, electron_ppar_new, moments, + collisions, composition, z, vperp, vpa, z_spectral, + vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy, + external_source_settings, num_diss_params, t_params, ion_dt, ir, + evolve_ppar, :explicit_z, false) + end begin_z_region() @loop_z iz begin v_solve_counter += 1 @@ -1132,7 +1137,12 @@ global_rank[] == 0 && println("recalculating precon") end end - adi_info.v_solve_explicit_matrices[v_solve_counter] = sparse(@view(explicit_J[adi_info.v_solve_global_inds[v_solve_counter],:])) + if adi_info.n_extra_iterations > 0 + # If we only do one 'iteration' we don't need the 'explicit + # matrix' for the first solve (the v-solve), because the + # initial guess is zero, + adi_info.v_solve_explicit_matrices[v_solve_counter] = sparse(@view(explicit_J[adi_info.v_solve_global_inds[v_solve_counter],:])) + end end @boundscheck v_solve_counter == adi_info.v_solve_nsolve || error("v_solve_counter($v_solve_counter) != v_solve_nsolve($(adi_info.v_solve_nsolve))") @@ -1233,6 +1243,7 @@ global_rank[] == 0 && println("recalculating precon") this_intermediate_buffer = adi_info.intermediate_buffer this_output_buffer = adi_info.output_buffer global_index_subrange = adi_info.global_index_subrange + n_extra_iterations = adi_info.n_extra_iterations v_size = vperp.n * vpa.n pdf_size = z.n * v_size @@ -1317,7 +1328,7 @@ global_rank[] == 0 && println("recalculating precon") first_adi_v_solve!() fill_intermediate_buffer!() adi_z_solve!() - for n ∈ 1:1 + for n ∈ 1:n_extra_iterations precon_iterations[] += 1 fill_intermediate_buffer!() adi_v_solve!() diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index 4a574b171..5fed3dc4c 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -96,6 +96,7 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa linear_restart=10, linear_max_restarts=0, preconditioner_update_interval=300, + adi_precon_iterations=1, ) if !active @@ -229,6 +230,12 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa # Set up so root process has fewest points, as root may have other work to do. global_index_subrange = max(1, pdf_plus_ppar_size - (block_size[] - block_rank[]) * chunk_size + 1):(pdf_plus_ppar_size - (block_size[] - block_rank[] - 1) * chunk_size) + if nl_solver_input.adi_precon_iterations < 1 + error("Setting adi_precon_iterations=$(nl_solver_input.adi_precon_iterations) " + * "would mean the preconditioner does nothing.") + end + n_extra_iterations = nl_solver_input.adi_precon_iterations - 1 + return (v_solve_global_inds=v_solve_global_inds, v_solve_nsolve=v_solve_nsolve, v_solve_implicit_lus=v_solve_implicit_lus, @@ -243,7 +250,8 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa z_solve_matrix_buffer=z_solve_matrix_buffer, J_buffer=J_buffer, input_buffer=input_buffer, intermediate_buffer=intermediate_buffer, output_buffer=output_buffer, - global_index_subrange=global_index_subrange) + global_index_subrange=global_index_subrange, + n_extra_iterations=n_extra_iterations) end preconditioners = fill(get_adi_precon_buffers(), reverse(outer_coord_sizes)) From 0d05d9fb3594993cc51032654ffd7e2ef2ae5f44 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 28 Oct 2024 11:02:57 +0000 Subject: [PATCH 16/43] For kinetic electrons, use LU when no shared-mem, ADI with shared-mem When not parallelising using shared memory, there is no need to split the preconditioner and the LU preconditioner should be the most efficient. Therefore use the LU precon in serial, and use ADI only when `block_size[] > 1`. --- moment_kinetics/src/time_advance.jl | 10 +- .../test/kinetic_electron_tests.jl | 205 ++++++++++++------ 2 files changed, 147 insertions(+), 68 deletions(-) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 19db3dc94..219fd0ef9 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -667,6 +667,13 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop input_dict, (z=z,); default_rtol=t_params.rtol / 10.0, default_atol=t_params.atol / 10.0) + if block_size[] == 1 + # No need to parallelise, so un-split LU solver should be most efficient. + electron_preconditioner_type = Val(:electron_lu) + else + # Want to parallelise preconditioner, so use ADI method. + electron_preconditioner_type = Val(:electron_adi) + end nl_solver_electron_advance_params = setup_nonlinear_solve(t_params.implicit_electron_advance || composition.electron_physics ∈ (kinetic_electrons, kinetic_electrons_with_temperature_equation), input_dict, @@ -675,8 +682,7 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop default_rtol=t_params.rtol / 10.0, default_atol=t_params.atol / 10.0, electron_ppar_pdf_solve=true, - #preconditioner_type=Val(:electron_lu)) - preconditioner_type=Val(:electron_adi)) + preconditioner_type=electron_preconditioner_type) nl_solver_ion_advance_params = setup_nonlinear_solve(t_params.implicit_ion_advance, input_dict, (s=composition.n_ion_species, r=r, z=z, vperp=vperp, diff --git a/moment_kinetics/test/kinetic_electron_tests.jl b/moment_kinetics/test/kinetic_electron_tests.jl index 6a41a2c2f..33738748d 100644 --- a/moment_kinetics/test/kinetic_electron_tests.jl +++ b/moment_kinetics/test/kinetic_electron_tests.jl @@ -6,6 +6,7 @@ module KineticElectronsTests include("setup.jl") +using moment_kinetics.communication using moment_kinetics.load_data: get_run_info_no_setup, close_run_info, postproc_load_variable using moment_kinetics.looping @@ -172,72 +173,144 @@ function run_test() # Regression test # Benchmark data generated in serial on Linux - expected_Ez = [-0.5990683230706185 -1.136483186157602; - -0.4944296396481284 -0.9873296990705788; - -0.30889032954504736 -0.6694380824928302; - -0.2064830747303776 -0.4471331690708596; - -0.21232457328748663 -0.423069171542538; - -0.18233875912042674 -0.3586467595624931; - -0.16711429522309232 -0.3018272987758344; - -0.16920776495088916 -0.27814384649305496; - -0.1629417555658927 -0.26124630661090814; - -0.16619150334079993 -0.2572789330163811; - -0.15918194883360942 -0.23720078037362732; - -0.14034706409006803 -0.20520396656341475; - -0.12602184032280567 -0.1827016549071128; - -0.10928716440800472 -0.15808919669899502; - -0.07053969674257217 -0.10137753767917096; - -0.0249577746169536 -0.0358411459260082; - -2.8327303308330514e-15 -2.0803303361189427e-5; - 0.024957774616960776 0.03584490974053962; - 0.07053969674257636 0.1013692898656727; - 0.10928716440799909 0.15807862358546687; - 0.1260218403227975 0.18263049748179466; - 0.1403470640900294 0.20516566362571026; - 0.1591819488336015 0.23711236692241613; - 0.16619150334082114 0.257126146434857; - 0.16294175556587748 0.2609881259705107; - 0.16920776495090983 0.2778978154805798; - 0.1671142952230893 0.3015349192528757; - 0.1823387591204167 0.3585291689672981; - 0.21232457328753865 0.4231179549656996; - 0.20648307473037922 0.44816400221269476; - 0.3088903295450278 0.6716787105435247; - 0.4944296396481271 0.9861165590258743; - 0.5990683230705801 1.1300034111861956] - expected_vthe = [22.64555285302391 22.485481713141688; - 23.763411647653097 23.63281883616836; - 25.26907160117684 25.181703459470448; - 26.17920352818247 26.12461016686916; - 26.514772631426933 26.476018852279974; - 26.798783188585713 26.774387562937218; - 27.202255545479264 27.203662204308202; - 27.50424749120107 27.527732850637264; - 27.630498656270504 27.6642323848215; - 27.748483758260697 27.79134809261204; - 27.933760382468346 27.990808336620802; - 28.08611508251559 28.153978618442775; - 28.14959662643782 28.221734439130564; - 28.207730844115044 28.283677711828023; - 28.28567669896009 28.36634261525836; - 28.32728392065335 28.410489883644782; - 28.331064506972027 28.41437629072209; - 28.32729968986601 28.41050992096321; - 28.285678151542136 28.366352683865195; - 28.207765527709956 28.28373408727703; - 28.149604559462947 28.221771261090687; - 28.086248527111163 28.154158507899695; - 27.933979289064936 27.991103719847732; - 27.74906125092813 27.792046191405188; - 27.631210333523736 27.66508092926101; - 27.505479130159543 27.529115937508752; - 27.20422756527604 27.20578114592589; - 26.801712351383053 26.77740066591359; - 26.517644511297203 26.478915386575462; - 26.18176436913143 26.127099000267552; - 25.26635932097994 25.178676836919877; - 23.756593489029708 23.625697695979085; - 22.64390166090378 22.48400980852866] + if global_size[] == 1 + # Serial solves use LU preconditioner + expected_Ez = [-0.5990683230706185 -1.136483186157602; + -0.4944296396481284 -0.9873296990705788; + -0.30889032954504736 -0.6694380824928302; + -0.2064830747303776 -0.4471331690708596; + -0.21232457328748663 -0.423069171542538; + -0.18233875912042674 -0.3586467595624931; + -0.16711429522309232 -0.3018272987758344; + -0.16920776495088916 -0.27814384649305496; + -0.1629417555658927 -0.26124630661090814; + -0.16619150334079993 -0.2572789330163811; + -0.15918194883360942 -0.23720078037362732; + -0.14034706409006803 -0.20520396656341475; + -0.12602184032280567 -0.1827016549071128; + -0.10928716440800472 -0.15808919669899502; + -0.07053969674257217 -0.10137753767917096; + -0.0249577746169536 -0.0358411459260082; + -2.8327303308330514e-15 -2.0803303361189427e-5; + 0.024957774616960776 0.03584490974053962; + 0.07053969674257636 0.1013692898656727; + 0.10928716440799909 0.15807862358546687; + 0.1260218403227975 0.18263049748179466; + 0.1403470640900294 0.20516566362571026; + 0.1591819488336015 0.23711236692241613; + 0.16619150334082114 0.257126146434857; + 0.16294175556587748 0.2609881259705107; + 0.16920776495090983 0.2778978154805798; + 0.1671142952230893 0.3015349192528757; + 0.1823387591204167 0.3585291689672981; + 0.21232457328753865 0.4231179549656996; + 0.20648307473037922 0.44816400221269476; + 0.3088903295450278 0.6716787105435247; + 0.4944296396481271 0.9861165590258743; + 0.5990683230705801 1.1300034111861956] + expected_vthe = [22.64555285302391 22.485481713141688; + 23.763411647653097 23.63281883616836; + 25.26907160117684 25.181703459470448; + 26.17920352818247 26.12461016686916; + 26.514772631426933 26.476018852279974; + 26.798783188585713 26.774387562937218; + 27.202255545479264 27.203662204308202; + 27.50424749120107 27.527732850637264; + 27.630498656270504 27.6642323848215; + 27.748483758260697 27.79134809261204; + 27.933760382468346 27.990808336620802; + 28.08611508251559 28.153978618442775; + 28.14959662643782 28.221734439130564; + 28.207730844115044 28.283677711828023; + 28.28567669896009 28.36634261525836; + 28.32728392065335 28.410489883644782; + 28.331064506972027 28.41437629072209; + 28.32729968986601 28.41050992096321; + 28.285678151542136 28.366352683865195; + 28.207765527709956 28.28373408727703; + 28.149604559462947 28.221771261090687; + 28.086248527111163 28.154158507899695; + 27.933979289064936 27.991103719847732; + 27.74906125092813 27.792046191405188; + 27.631210333523736 27.66508092926101; + 27.505479130159543 27.529115937508752; + 27.20422756527604 27.20578114592589; + 26.801712351383053 26.77740066591359; + 26.517644511297203 26.478915386575462; + 26.18176436913143 26.127099000267552; + 25.26635932097994 25.178676836919877; + 23.756593489029708 23.625697695979085; + 22.64390166090378 22.48400980852866] + else + # Parallel solves, which here use only shared-memory parallelism, use the ADI + # preconditioner, which should be as accurate, but may give different results + # within Newton-Krylov tolerances. + expected_Ez = [-0.5990683230706185 -1.136484793603861; + -0.4944296396481284 -0.9873300031440772; + -0.30889032954504736 -0.6694378168618197; + -0.2064830747303776 -0.447133132132065; + -0.21232457328748663 -0.42306913446372424; + -0.18233875912042674 -0.3586467771727455; + -0.16711429522309232 -0.30182728110160495; + -0.16920776495088916 -0.27814382747995164; + -0.1629417555658927 -0.2612463784138094; + -0.16619150334079993 -0.25727894258000966; + -0.15918194883360942 -0.23720078814350573; + -0.14034706409006803 -0.20520397188041256; + -0.12602184032280567 -0.18270162474892546; + -0.10928716440800472 -0.1580892035790512; + -0.07053969674257217 -0.10137753682381391; + -0.0249577746169536 -0.03584114725793184; + -2.8327303308330514e-15 -2.0802378395589373e-5; + 0.024957774616960776 0.0358449101669449; + 0.07053969674257636 0.10136928934666747; + 0.10928716440799909 0.15807862867071673; + 0.1260218403227975 0.18263047522175488; + 0.1403470640900294 0.20516566756031385; 0.1591819488336015 0.2371123741024713; + 0.16619150334082114 0.2571261543920033; + 0.16294175556587748 0.2609882062708652; + 0.16920776495090983 0.27789779494370415; + 0.1671142952230893 0.30153489797658445; + 0.1823387591204167 0.35852918516786003; + 0.21232457328753865 0.42311789840457864; + 0.20648307473037922 0.44816400062147066; + 0.3088903295450278 0.6716785459169026; + 0.4944296396481271 0.9861167610959626; + 0.5990683230705801 1.1300045383907789] + expected_vthe = [22.64555338227396 22.48548119549829; + 23.76341164436594 23.632819782771243; + 25.26907163394297 25.18170391887767; + 26.179203467285365 26.12461016927763; + 26.514772629327332 26.47601877788725; + 26.79878318858447 26.774387534342114; + 27.20225551034186 27.20366217166485; + 27.504247525601926 27.527732760234755; + 27.630498605068166 27.66423228184859; + 27.748483763235846 27.791348082529804; + 27.933760371994826 27.990808308571204; + 28.08611509938479 28.153978648601132; + 28.149596610550738 28.221734405417436; + 28.207730848524463 28.28367771694209; + 28.28567670146647 28.366342613061416; + 28.32728392764203 28.410489892675102; + 28.331064498175866 28.414376282256146; + 28.327299695349158 28.41050992979778; + 28.285678155424083 28.366352683054103; + 28.207765532359442 28.28373409338897; + 28.149604554344048 28.22177123547944; + 28.086248537316628 28.154158532699547; + 27.933979285563435 27.991103698041254; + 27.749061255285646 27.79204618050744; + 27.63121031067771 27.665080846653012; + 27.505479148983177 27.529115838548574; + 27.204227550854288 27.205781129997607; + 26.801712356957204 26.777400644678224; + 26.517644516966772 26.478915353716097; + 26.181764354679014 26.12709901369174; + 25.266359355820907 25.178677080491074; + 23.756593465755735 23.625698257711747; + 22.64390180335094 22.48400934735562] + end if expected_Ez == nothing # Error: no expected input provided From c68ca473ef4bac536b0f91ffdfe8057c5b2feb29 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 10 Nov 2024 20:31:38 +0000 Subject: [PATCH 17/43] Fix electron bc cutoff at lower boundary Indexing error meant that half the time the cutoff did not interpolate smoothly between grid points. --- moment_kinetics/src/electron_kinetic_equation.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 05fae2358..d8d2fe2d6 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -2394,7 +2394,7 @@ end pdf[plus_vcut_ind+1,1,1,ir] *= vcut_fraction - 0.5 else pdf[plus_vcut_ind+1,1,1,ir] = 0.0 - pdf[plus_vcut_ind+1,1,1,ir] *= vcut_fraction + 0.5 + pdf[plus_vcut_ind,1,1,ir] *= vcut_fraction + 0.5 end # update the electrostatic potential at the boundary to be the value corresponding to the updated cutoff velocity From f2d0115767dffe61159ee61ad49446b6b9a78c0b Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 14 Nov 2024 09:48:22 +0000 Subject: [PATCH 18/43] Recalculate vth before bc in electron_backward_euler!() residual_func!() Bugfix. --- .../src/electron_kinetic_equation.jl | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index d8d2fe2d6..8a3d62d1f 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1403,14 +1403,6 @@ global_rank[] == 0 && println("recalculating precon") electron_ppar_residual, f_electron_residual = this_residual electron_ppar_newvar, f_electron_newvar = new_variables - # enforce the boundary condition(s) on the electron pdf - @views enforce_boundary_condition_on_electron_pdf!( - f_electron_newvar, phi, moments.electron.vth[:,ir], - moments.electron.upar[:,ir], z, vperp, vpa, vperp_spectral, - vpa_spectral, vpa_advect, moments, - num_diss_params.electron.vpa_dissipation_coefficient > 0.0, - composition.me_over_mi; bc_constraints=false) - if evolve_ppar this_dens = moments.electron.dens this_upar = moments.electron.upar @@ -1423,6 +1415,17 @@ global_rank[] == 0 && println("recalculating precon") (this_dens[iz,ir] * composition.me_over_mi))) end + end + + # enforce the boundary condition(s) on the electron pdf + @views enforce_boundary_condition_on_electron_pdf!( + f_electron_newvar, phi, moments.electron.vth[:,ir], + moments.electron.upar[:,ir], z, vperp, vpa, vperp_spectral, + vpa_spectral, vpa_advect, moments, + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + composition.me_over_mi; bc_constraints=false) + + if evolve_ppar # Calculate heat flux and derivatives using new_variables @views calculate_electron_qpar_from_pdf_no_r!(moments.electron.qpar[:,ir], electron_ppar_newvar, From 48e992ed3854495627191cae3f573b54d3cb38ec Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 17 Nov 2024 17:15:34 +0000 Subject: [PATCH 19/43] Move cutoff parameters for kinetic electron bc to separate function Will allow the calculation of these parameters to be reused when calculating a Jacobian matrix for the wall bc. --- .../src/electron_kinetic_equation.jl | 218 ++++++++++-------- 1 file changed, 118 insertions(+), 100 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 8a3d62d1f..d6eece8a9 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -2136,6 +2136,118 @@ function apply_electron_bc_and_constraints_no_r!(f_electron, phi, moments, z, vp end end +function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa, ir) + # Delete the upar contribution here if ignoring the 'upar shift' + vpa_unnorm = @. vpa.scratch2 = vthe[1,ir] * vpa.grid + upar[1,ir] + + u_over_vt = upar[1,ir] / vthe[1,ir] + + # Initial guess for cut-off velocity is result from previous RK stage (which + # might be the previous timestep if this is the first stage). Recalculate this + # value from phi. + vcut = sqrt(phi[1,ir] / me_over_mi) + + # -vcut is between minus_vcut_ind-1 and minus_vcut_ind + minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut) + if minus_vcut_ind < 2 + error("In lower-z electron bc, failed to find vpa=-vcut point, minus_vcut_ind=$minus_vcut_ind") + end + if minus_vcut_ind > vpa.n + error("In lower-z electron bc, failed to find vpa=-vcut point, minus_vcut_ind=$minus_vcut_ind") + end + + # sigma is the location we use for w_∥(v_∥=0) - set to 0 to ignore the 'upar + # shift' + sigma = -u_over_vt + + # sigma is between sigma_ind-1 and sigma_ind + sigma_ind = searchsortedfirst(vpa_unnorm, 0.0) + if sigma_ind < 2 + error("In lower-z electron bc, failed to find vpa=0 point, sigma_ind=$sigma_ind") + end + if sigma_ind > vpa.n + error("In lower-z electron bc, failed to find vpa=0 point, sigma_ind=$sigma_ind") + end + + # sigma_fraction is the fraction of the distance between sigma_ind-1 and + # sigma_ind where sigma is. + sigma_fraction = (sigma - vpa_unnorm[sigma_ind-1]) / (vpa_unnorm[sigma_ind] - vpa_unnorm[sigma_ind-1]) + + # Want to construct the w-grid corresponding to -vpa. + # wpa(vpa) = (vpa - upar)/vth + # ⇒ vpa = vth*wpa(vpa) + upar + # wpa(-vpa) = (-vpa - upar)/vth + # = (-(vth*wpa(vpa) + upar) - upar)/vth + # = (-vth*wpa - 2*upar)/vth + # = -wpa - 2*upar/vth + # [Note that `vpa.grid` is slightly mis-named here - it contains the values of + # wpa(+vpa) as we are using a 'moment kinetic' approach.] + # Need to reverse vpa.grid because the grid passed as the second argument of + # interpolate_to_grid_1d!() needs to be sorted in increasing order. + reversed_wpa_of_minus_vpa = @. vpa.scratch3 = -vpa.grid + 2.0 * sigma + #reversed_wpa_of_minus_vpa = vpa.scratch3 .= .-vpa.grid + reverse!(reversed_wpa_of_minus_vpa) + + return vpa_unnorm, u_over_vt, vcut, minus_vcut_ind, sigma, sigma_ind, sigma_fraction, + reversed_wpa_of_minus_vpa +end + +function get_cutoff_params_upper(upar, vthe, phi, me_over_mi, vpa, ir) + # Delete the upar contribution here if ignoring the 'upar shift' + vpa_unnorm = @. vpa.scratch2 = vthe[end,ir] * vpa.grid + upar[end,ir] + + u_over_vt = upar[end,ir] / vthe[end,ir] + + # Initial guess for cut-off velocity is result from previous RK stage (which + # might be the previous timestep if this is the first stage). Recalculate this + # value from phi. + vcut = sqrt(phi[end,ir] / me_over_mi) + + # vcut is between plus_vcut_ind and plus_vcut_ind+1 + plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut) + if plus_vcut_ind < 1 + error("In upper-z electron bc, failed to find vpa=vcut point, plus_vcut_ind=$plus_vcut_ind") + end + if plus_vcut_ind > vpa.n - 1 + error("In upper-z electron bc, failed to find vpa=vcut point, plus_vcut_ind=$plus_vcut_ind") + end + + # sigma is the location we use for w_∥(v_∥=0) - set to 0 to ignore the 'upar + # shift' + sigma = -u_over_vt + + # sigma is between sigma_ind and sigma_ind+1 + sigma_ind = searchsortedlast(vpa_unnorm, 0.0) + if sigma_ind < 1 + error("In upper-z electron bc, failed to find vpa=0 point, sigma_ind=$sigma_ind") + end + if sigma_ind > vpa.n - 1 + error("In upper-z electron bc, failed to find vpa=0 point, sigma_ind=$sigma_ind") + end + + # sigma_fraction is the fraction of the distance between sigma_ind+1 and + # sigma_ind where sigma is. + sigma_fraction = (sigma - vpa_unnorm[sigma_ind+1]) / (vpa_unnorm[sigma_ind] - vpa_unnorm[sigma_ind+1]) + + # Want to construct the w-grid corresponding to -vpa. + # wpa(vpa) = (vpa - upar)/vth + # ⇒ vpa = vth*wpa(vpa) + upar + # wpa(-vpa) = (-vpa - upar)/vth + # = (-(vth*wpa(vpa) + upar) - upar)/vth + # = (-vth*wpa - 2*upar)/vth + # = -wpa - 2*upar/vth + # [Note that `vpa.grid` is slightly mis-named here - it contains the values of + # wpa(+vpa) as we are using a 'moment kinetic' approach.] + # Need to reverse vpa.grid because the grid passed as the second argument of + # interpolate_to_grid_1d!() needs to be sorted in increasing order. + reversed_wpa_of_minus_vpa = @. vpa.scratch3 = -vpa.grid + 2.0 * sigma + #reversed_wpa_of_minus_vpa = vpa.scratch3 .= .-vpa.grid + reverse!(reversed_wpa_of_minus_vpa) + + return vpa_unnorm, u_over_vt, vcut, plus_vcut_ind, sigma, sigma_ind, sigma_fraction, + reversed_wpa_of_minus_vpa +end + @timeit global_timer enforce_boundary_condition_on_electron_pdf!( pdf, phi, vthe, upar, z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_adv, moments, vpa_diffusion, me_over_mi; @@ -2241,56 +2353,9 @@ end # constraints and determining the cut-off velocity (and therefore the sheath # potential). - # Delete the upar contribution here if ignoring the 'upar shift' - vpa_unnorm = @. vpa.scratch2 = vthe[1,ir] * vpa.grid + upar[1,ir] - - u_over_vt = upar[1,ir] / vthe[1,ir] - - # Initial guess for cut-off velocity is result from previous RK stage (which - # might be the previous timestep if this is the first stage). Recalculate this - # value from phi. - vcut = sqrt(phi[1,ir] / me_over_mi) - - # -vcut is between minus_vcut_ind-1 and minus_vcut_ind - minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut) - if minus_vcut_ind < 2 - error("In lower-z electron bc, failed to find vpa=-vcut point, minus_vcut_ind=$minus_vcut_ind") - end - if minus_vcut_ind > vpa.n - error("In lower-z electron bc, failed to find vpa=-vcut point, minus_vcut_ind=$minus_vcut_ind") - end - - # sigma is the location we use for w_∥(v_∥=0) - set to 0 to ignore the 'upar - # shift' - sigma = -u_over_vt - - # sigma is between sigma_ind-1 and sigma_ind - sigma_ind = searchsortedfirst(vpa_unnorm, 0.0) - if sigma_ind < 2 - error("In lower-z electron bc, failed to find vpa=0 point, sigma_ind=$sigma_ind") - end - if sigma_ind > vpa.n - error("In lower-z electron bc, failed to find vpa=0 point, sigma_ind=$sigma_ind") - end - - # sigma_fraction is the fraction of the distance between sigma_ind-1 and - # sigma_ind where sigma is. - sigma_fraction = (sigma - vpa_unnorm[sigma_ind-1]) / (vpa_unnorm[sigma_ind] - vpa_unnorm[sigma_ind-1]) - - # Want to construct the w-grid corresponding to -vpa. - # wpa(vpa) = (vpa - upar)/vth - # ⇒ vpa = vth*wpa(vpa) + upar - # wpa(-vpa) = (-vpa - upar)/vth - # = (-(vth*wpa(vpa) + upar) - upar)/vth - # = (-vth*wpa - 2*upar)/vth - # = -wpa - 2*upar/vth - # [Note that `vpa.grid` is slightly mis-named here - it contains the values of - # wpa(+vpa) as we are using a 'moment kinetic' approach.] - # Need to reverse vpa.grid because the grid passed as the second argument of - # interpolate_to_grid_1d!() needs to be sorted in increasing order. - reversed_wpa_of_minus_vpa = @. vpa.scratch3 = -vpa.grid + 2.0 * sigma - #reversed_wpa_of_minus_vpa = vpa.scratch3 .= .-vpa.grid - reverse!(reversed_wpa_of_minus_vpa) + vpa_unnorm, u_over_vt, vcut, minus_vcut_ind, sigma, sigma_ind, sigma_fraction, + reversed_wpa_of_minus_vpa = get_cutoff_params_lower(upar, vthe, phi, + me_over_mi, vpa, ir) # interpolate the pdf onto this grid #@views interpolate_to_grid_1d!(interpolated_pdf, wpa_values, pdf[:,1,1,ir], vpa, vpa_spectral) @@ -2510,56 +2575,9 @@ end # constraints and determining the cut-off velocity (and therefore the sheath # potential). - # Delete the upar contribution here if ignoring the 'upar shift' - vpa_unnorm = @. vpa.scratch2 = vthe[end,ir] * vpa.grid + upar[end,ir] - - u_over_vt = upar[end,ir] / vthe[end,ir] - - # Initial guess for cut-off velocity is result from previous RK stage (which - # might be the previous timestep if this is the first stage). Recalculate this - # value from phi. - vcut = sqrt(phi[end,ir] / me_over_mi) - - # vcut is between plus_vcut_ind and plus_vcut_ind+1 - plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut) - if plus_vcut_ind < 1 - error("In upper-z electron bc, failed to find vpa=vcut point, plus_vcut_ind=$plus_vcut_ind") - end - if plus_vcut_ind > vpa.n - 1 - error("In upper-z electron bc, failed to find vpa=vcut point, plus_vcut_ind=$plus_vcut_ind") - end - - # sigma is the location we use for w_∥(v_∥=0) - set to 0 to ignore the 'upar - # shift' - sigma = -u_over_vt - - # sigma is between sigma_ind and sigma_ind+1 - sigma_ind = searchsortedlast(vpa_unnorm, 0.0) - if sigma_ind < 1 - error("In upper-z electron bc, failed to find vpa=0 point, sigma_ind=$sigma_ind") - end - if sigma_ind > vpa.n - 1 - error("In upper-z electron bc, failed to find vpa=0 point, sigma_ind=$sigma_ind") - end - - # sigma_fraction is the fraction of the distance between sigma_ind+1 and - # sigma_ind where sigma is. - sigma_fraction = (sigma - vpa_unnorm[sigma_ind+1]) / (vpa_unnorm[sigma_ind] - vpa_unnorm[sigma_ind+1]) - - # Want to construct the w-grid corresponding to -vpa. - # wpa(vpa) = (vpa - upar)/vth - # ⇒ vpa = vth*wpa(vpa) + upar - # wpa(-vpa) = (-vpa - upar)/vth - # = (-(vth*wpa(vpa) + upar) - upar)/vth - # = (-vth*wpa - 2*upar)/vth - # = -wpa - 2*upar/vth - # [Note that `vpa.grid` is slightly mis-named here - it contains the values of - # wpa(+vpa) as we are using a 'moment kinetic' approach.] - # Need to reverse vpa.grid because the grid passed as the second argument of - # interpolate_to_grid_1d!() needs to be sorted in increasing order. - reversed_wpa_of_minus_vpa = @. vpa.scratch3 = -vpa.grid + 2.0 * sigma - #reversed_wpa_of_minus_vpa = vpa.scratch3 .= .-vpa.grid - reverse!(reversed_wpa_of_minus_vpa) + vpa_unnorm, u_over_vt, vcut, plus_vcut_ind, sigma, sigma_ind, sigma_fraction, + reversed_wpa_of_minus_vpa = get_cutoff_params_upper(upar, vthe, phi, + me_over_mi, vpa, ir) # interpolate the pdf onto this grid #@views interpolate_to_grid_1d!(interpolated_pdf, wpa_values, pdf[:,1,1,ir], vpa, vpa_spectral) From 67b546dfe796a29b41951cfa54e6b716e6917549 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 19 Nov 2024 10:23:40 +0000 Subject: [PATCH 20/43] Fix calculation of sigma_fraction in kinetic electron bc --- .../src/electron_kinetic_equation.jl | 31 +++++++++---------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index d6eece8a9..dcb14db2e 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -2137,11 +2137,14 @@ function apply_electron_bc_and_constraints_no_r!(f_electron, phi, moments, z, vp end function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa, ir) - # Delete the upar contribution here if ignoring the 'upar shift' - vpa_unnorm = @. vpa.scratch2 = vthe[1,ir] * vpa.grid + upar[1,ir] - u_over_vt = upar[1,ir] / vthe[1,ir] + # sigma is the location we use for w_∥(v_∥=0) - set to 0 to ignore the 'upar + # shift' + sigma = -u_over_vt + + vpa_unnorm = @. vpa.scratch2 = vthe[1,ir] * (vpa.grid - sigma) + # Initial guess for cut-off velocity is result from previous RK stage (which # might be the previous timestep if this is the first stage). Recalculate this # value from phi. @@ -2156,10 +2159,6 @@ function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa, ir) error("In lower-z electron bc, failed to find vpa=-vcut point, minus_vcut_ind=$minus_vcut_ind") end - # sigma is the location we use for w_∥(v_∥=0) - set to 0 to ignore the 'upar - # shift' - sigma = -u_over_vt - # sigma is between sigma_ind-1 and sigma_ind sigma_ind = searchsortedfirst(vpa_unnorm, 0.0) if sigma_ind < 2 @@ -2171,7 +2170,7 @@ function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa, ir) # sigma_fraction is the fraction of the distance between sigma_ind-1 and # sigma_ind where sigma is. - sigma_fraction = (sigma - vpa_unnorm[sigma_ind-1]) / (vpa_unnorm[sigma_ind] - vpa_unnorm[sigma_ind-1]) + sigma_fraction = -vpa_unnorm[sigma_ind-1] / (vpa_unnorm[sigma_ind] - vpa_unnorm[sigma_ind-1]) # Want to construct the w-grid corresponding to -vpa. # wpa(vpa) = (vpa - upar)/vth @@ -2193,11 +2192,15 @@ function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa, ir) end function get_cutoff_params_upper(upar, vthe, phi, me_over_mi, vpa, ir) - # Delete the upar contribution here if ignoring the 'upar shift' - vpa_unnorm = @. vpa.scratch2 = vthe[end,ir] * vpa.grid + upar[end,ir] - u_over_vt = upar[end,ir] / vthe[end,ir] + # sigma is the location we use for w_∥(v_∥=0) - set to 0 to ignore the 'upar + # shift' + sigma = -u_over_vt + + # Delete the upar contribution here if ignoring the 'upar shift' + vpa_unnorm = @. vpa.scratch2 = vthe[end,ir] * (vpa.grid - sigma) + # Initial guess for cut-off velocity is result from previous RK stage (which # might be the previous timestep if this is the first stage). Recalculate this # value from phi. @@ -2212,10 +2215,6 @@ function get_cutoff_params_upper(upar, vthe, phi, me_over_mi, vpa, ir) error("In upper-z electron bc, failed to find vpa=vcut point, plus_vcut_ind=$plus_vcut_ind") end - # sigma is the location we use for w_∥(v_∥=0) - set to 0 to ignore the 'upar - # shift' - sigma = -u_over_vt - # sigma is between sigma_ind and sigma_ind+1 sigma_ind = searchsortedlast(vpa_unnorm, 0.0) if sigma_ind < 1 @@ -2227,7 +2226,7 @@ function get_cutoff_params_upper(upar, vthe, phi, me_over_mi, vpa, ir) # sigma_fraction is the fraction of the distance between sigma_ind+1 and # sigma_ind where sigma is. - sigma_fraction = (sigma - vpa_unnorm[sigma_ind+1]) / (vpa_unnorm[sigma_ind] - vpa_unnorm[sigma_ind+1]) + sigma_fraction = -vpa_unnorm[sigma_ind+1] / (vpa_unnorm[sigma_ind] - vpa_unnorm[sigma_ind+1]) # Want to construct the w-grid corresponding to -vpa. # wpa(vpa) = (vpa - upar)/vth From c91a2bfa2f6a83422ead137548374480ff377740 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 19 Nov 2024 12:55:17 +0000 Subject: [PATCH 21/43] Make split of integrals at +/-vcut more consistent Where integrals over the into-the-sheath part of the distribution function are split into two parts at +/- vcut, modify the way that the split is done so that the 'part 2' integral between 0 and +/-vcut is calculated the same way as an integral over the out-from-the-sheath part of the distribution which is cut off at -/+vcut. Not sure if this is necessary, but seems nicer to be more consistent. --- .../src/electron_kinetic_equation.jl | 61 ++++++++++++++----- 1 file changed, 47 insertions(+), 14 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index dcb14db2e..30064571d 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -2378,19 +2378,36 @@ end vcut_fraction = (-vcut - vpa_unnorm[minus_vcut_ind-1]) / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1]) function get_for_one_moment(integral_pieces) - # Integral contribution from the cell containing vcut - integral_vcut_cell = (0.5 * integral_pieces[minus_vcut_ind-1] + 0.5 * integral_pieces[minus_vcut_ind]) + # Integral contributions from the cell containing vcut. + # Define these as follows to be consistent with the way the cutoff is + # applied around plus_vcut_ind below. + # Note that `integral_vcut_cell_part1` and `integral_vcut_cell_part2` + # include all the contributions from the grid points + # `minus_vcut_ind-1` and `minus_vcut_ind`, not just those from + # 'inside' the grid cell. + if vcut_fraction < 0.5 + integral_vcut_cell_part2 = integral_pieces[minus_vcut_ind-1] * (0.5 - vcut_fraction) + + integral_pieces[minus_vcut_ind] + integral_vcut_cell_part1 = integral_pieces[minus_vcut_ind-1] * (0.5 + vcut_fraction) + + # part1prime is d(part1)/d(vcut) + part1prime = -integral_pieces[minus_vcut_ind-1] / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1]) + else + integral_vcut_cell_part2 = integral_pieces[minus_vcut_ind] * (1.5 - vcut_fraction) + integral_vcut_cell_part1 = integral_pieces[minus_vcut_ind-1] + + integral_pieces[minus_vcut_ind] * (vcut_fraction - 0.5) + + # part1prime is d(part1)/d(vcut) + part1prime = -integral_pieces[minus_vcut_ind] / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1]) + end - part1 = sum(integral_pieces[1:minus_vcut_ind-2]) - part1 += 0.5 * integral_pieces[minus_vcut_ind-1] + vcut_fraction * integral_vcut_cell - # part1prime is d(part1)/d(vcut) - part1prime = -integral_vcut_cell / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1]) + part1 = sum(integral_pieces[1:minus_vcut_ind-2]) + integral_vcut_cell_part1 # Integral contribution from the cell containing sigma integral_sigma_cell = (0.5 * integral_pieces[sigma_ind-1] + 0.5 * integral_pieces[sigma_ind]) part2 = sum(integral_pieces[minus_vcut_ind+1:sigma_ind-2]) - part2 += (1.0 - vcut_fraction) * integral_vcut_cell + 0.5 * integral_pieces[minus_vcut_ind] + 0.5 * integral_pieces[sigma_ind-1] + sigma_fraction * integral_sigma_cell + part2 += integral_vcut_cell_part2 + 0.5 * integral_pieces[sigma_ind-1] + sigma_fraction * integral_sigma_cell # part2prime is d(part2)/d(vcut) part2prime = -part1prime @@ -2597,22 +2614,38 @@ end function get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind) # vcut_fraction is the fraction of the distance between plus_vcut_ind and # plus_vcut_ind+1 where vcut is. - vcut_fraction = (vcut - vpa_unnorm[plus_vcut_ind+1]) / (vpa_unnorm[plus_vcut_ind] - vpa_unnorm[plus_vcut_ind+1]) + vcut_fraction = (vcut - vpa_unnorm[plus_vcut_ind]) / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind]) function get_for_one_moment(integral_pieces) # Integral contribution from the cell containing vcut - integral_vcut_cell = (0.5 * integral_pieces[plus_vcut_ind] + 0.5 * integral_pieces[plus_vcut_ind+1]) + # Define these as follows to be consistent with the way the cutoff is + # applied around plus_vcut_ind below. + # Note that `integral_vcut_cell_part1` and `integral_vcut_cell_part2` + # include all the contributions from the grid points `plus_vcut_ind` + # and `plus_vcut_ind+1`, not just those from 'inside' the grid cell. + if vcut_fraction > 0.5 + integral_vcut_cell_part2 = integral_pieces[plus_vcut_ind] + + integral_pieces[plus_vcut_ind+1] * (vcut_fraction - 0.5) + integral_vcut_cell_part1 = integral_pieces[plus_vcut_ind+1] * (1.5 - vcut_fraction) + + # part1prime is d(part1)/d(vcut) + part1prime = -integral_pieces[plus_vcut_ind+1] / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind]) + else + integral_vcut_cell_part2 = integral_pieces[plus_vcut_ind] * (0.5 + vcut_fraction) + integral_vcut_cell_part1 = integral_pieces[plus_vcut_ind] * (0.5 - vcut_fraction) + + integral_pieces[plus_vcut_ind+1] + + # part1prime is d(part1)/d(vcut) + part1prime = -integral_pieces[plus_vcut_ind] / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind]) + end - part1 = sum(integral_pieces[plus_vcut_ind+2:end]) - part1 += 0.5 * integral_pieces[plus_vcut_ind+1] + vcut_fraction * integral_vcut_cell - # part1prime is d(part1)/d(vcut) - part1prime = integral_vcut_cell / (vpa_unnorm[plus_vcut_ind] - vpa_unnorm[plus_vcut_ind+1]) + part1 = sum(integral_pieces[plus_vcut_ind+2:end]) + integral_vcut_cell_part1 # Integral contribution from the cell containing sigma integral_sigma_cell = (0.5 * integral_pieces[sigma_ind] + 0.5 * integral_pieces[sigma_ind+1]) part2 = sum(integral_pieces[sigma_ind+2:plus_vcut_ind-1]) - part2 += (1.0 - vcut_fraction) * integral_vcut_cell + 0.5 * integral_pieces[plus_vcut_ind] + 0.5 * integral_pieces[sigma_ind+1] + sigma_fraction * integral_sigma_cell + part2 += integral_vcut_cell_part2 + 0.5 * integral_pieces[sigma_ind+1] + sigma_fraction * integral_sigma_cell # part2prime is d(part2)/d(vcut) part2prime = -part1prime From 9b7e886f3e86208283edb16113942dee27595ce5 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 19 Nov 2024 13:05:58 +0000 Subject: [PATCH 22/43] Narrower cutoff near zero of correction terms for electron bc integral If the prefactor that sets the correction terms to be proportional to vpa^2 near vpa=0 is too broad, then it is hard for the correction terms to fix errors in low moments (e.g. density moment), so making it a bit narrower reduces the size of the coefficients of the correction terms. --- moment_kinetics/src/electron_kinetic_equation.jl | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 30064571d..690409c0f 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -2518,7 +2518,9 @@ end c3 = get_part3_for_one_moment_lower(energy_integral_pieces) d3 = get_part3_for_one_moment_lower(cubic_integral_pieces) - correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,1,ir] * vpa.wgts / sqrt(pi) * vpa_unnorm^2 / vthe[1,ir]^2 / (1.0 + vpa_unnorm^2 / vthe[1,ir]^2) + # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is. + sharpness = 4.0 + correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,1,ir] * vpa.wgts / sqrt(pi) * sharpness * vpa_unnorm^2 / vthe[1,ir]^2 / (1.0 + sharpness * vpa_unnorm^2 / vthe[1,ir]^2) for ivpa ∈ 1:sigma_ind # We only add the corrections to 'part3', so zero them out for negative v_∥. # I think this is only actually significant for `sigma_ind-1` and @@ -2568,7 +2570,7 @@ end + B * v_over_vth + C * v_over_vth^2 + D * v_over_vth^3) * - v_over_vth^2 / (1.0 + v_over_vth^2) * + sharpness * v_over_vth^2 / (1.0 + sharpness * v_over_vth^2) * pdf[ivpa,1,1,ir] end end @@ -2754,7 +2756,9 @@ end c3 = get_part3_for_one_moment_upper(energy_integral_pieces) d3 = get_part3_for_one_moment_upper(cubic_integral_pieces) - correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,end,ir] * vpa.wgts / sqrt(pi) * vpa_unnorm^2 / vthe[end,ir]^2 / (1.0 + vpa_unnorm^2 / vthe[end,ir]^2) + # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is. + sharpness = 4.0 + correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,end,ir] * vpa.wgts / sqrt(pi) * sharpness * vpa_unnorm^2 / vthe[end,ir]^2 / (1.0 + sharpness * vpa_unnorm^2 / vthe[end,ir]^2) for ivpa ∈ sigma_ind:vpa.n # We only add the corrections to 'part3', so zero them out for positive v_∥. # I think this is only actually significant for `sigma_ind` and @@ -2804,7 +2808,7 @@ end + B * v_over_vth + C * v_over_vth^2 + D * v_over_vth^3) * - v_over_vth^2 / (1.0 + v_over_vth^2) * + sharpness * v_over_vth^2 / (1.0 + sharpness * v_over_vth^2) * pdf[ivpa,1,end,ir] end end From b6474d0b3b70b0ac08e2d271c091f9aac3ea2492 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 20 Nov 2024 11:50:13 +0000 Subject: [PATCH 23/43] Make kinetic electron bc more robust if a guess for phi_wall is 0 If phi_wall=0, giving vcut=0, then it is not possible to apply moment constraints, which causes an error due to a singular matrix. This case is unphysical, so should not ever be a converged solution (vcut=0 corresponds to a fully electron-absorbing sheath). To avoid the problem, check if vcut==0, and if so set vcut to some small value (we choose the value at the next grid point after the one closest to 0). [skip ci] --- moment_kinetics/src/electron_kinetic_equation.jl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 690409c0f..94e0a11dc 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -2152,6 +2152,12 @@ function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa, ir) # -vcut is between minus_vcut_ind-1 and minus_vcut_ind minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut) + if vcut == 0.0 + # Force a non-zero initial guess, as zero makes no sense - that would mean all + # electrons are absorbed, i.e. there is no sheath. + minus_vcut_ind -= 1 + vcut = -vpa_unnorm[minus_vcut_ind] + end if minus_vcut_ind < 2 error("In lower-z electron bc, failed to find vpa=-vcut point, minus_vcut_ind=$minus_vcut_ind") end @@ -2208,6 +2214,12 @@ function get_cutoff_params_upper(upar, vthe, phi, me_over_mi, vpa, ir) # vcut is between plus_vcut_ind and plus_vcut_ind+1 plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut) + if vcut == 0.0 + # Force a non-zero initial guess, as zero makes no sense - that would mean all + # electrons are absorbed, i.e. there is no sheath. + plus_vcut_ind += 1 + vcut = vpa_unnorm[plus_vcut_ind] + end if plus_vcut_ind < 1 error("In upper-z electron bc, failed to find vpa=vcut point, plus_vcut_ind=$plus_vcut_ind") end From b36758394b7bd5c1e4ff3138025d14579e21fe84 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Fri, 22 Nov 2024 11:58:55 +0000 Subject: [PATCH 24/43] Make it possible to pick the kinetic electron preconditioner from input By default (if `implicit_electron_ppar = true`), use LU when `block_size[] == 1` or ADI otherwise, but now can pass `implicit_electron_ppar = "lu"` or `implicit_electron_ppar = "adi"` to pick the precoditioner type explicitly. --- moment_kinetics/src/input_structs.jl | 3 +- moment_kinetics/src/time_advance.jl | 45 ++++++++++++++++++++++------ 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index edcdb9a8a..2bf4cf57c 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -33,7 +33,7 @@ using TOML an option but known at compile time when a `time_info` struct is passed as a function argument. """ -struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero} +struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero, Telectronprecon} n_variables::mk_int nstep::mk_int end_time::mk_float @@ -81,6 +81,7 @@ struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero implicit_ion_advance::Bool implicit_vpa_advection::Bool implicit_electron_ppar::Bool + electron_preconditioner_type::Telectronprecon constraint_forcing_rate::mk_float decrease_dt_iteration_threshold::mk_int increase_dt_iteration_threshold::mk_int diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 219fd0ef9..ce033a06a 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -409,6 +409,7 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, else error_sum_zero = 0.0 end + if electron === nothing # Setting up time_info for electrons. # Store io_input as the debug_io variable so we can use it to open the debug @@ -422,18 +423,50 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, else debug_io = nothing end + + implicit_electron_ppar = false + electron_preconditioner_type = nothing decrease_dt_iteration_threshold = t_input["decrease_dt_iteration_threshold"] increase_dt_iteration_threshold = t_input["increase_dt_iteration_threshold"] cap_factor_ion_dt = mk_float(t_input["cap_factor_ion_dt"]) electron_t_params = nothing elseif electron === false debug_io = nothing + implicit_electron_ppar = false + electron_preconditioner_type = nothing decrease_dt_iteration_threshold = -1 increase_dt_iteration_threshold = typemax(mk_int) cap_factor_ion_dt = Inf electron_t_params = nothing else debug_io = nothing + + implicit_electron_ppar = (t_input["implicit_electron_ppar"] !== false) + if implicit_electron_ppar + if t_input["implicit_electron_ppar"] === true + if block_size[] == 1 + # No need to parallelise, so un-split LU solver should be most efficient. + electron_preconditioner_type = Val(:electron_lu) + else + # Want to parallelise preconditioner, so use ADI method. + electron_preconditioner_type = Val(:electron_adi) + end + else + electron_precon_types = Dict("lu" => :electron_lu, "adi" => :electron_adi) + if t_input["implicit_electron_ppar"] ∈ keys(electron_precon_types) + electron_preconditioner_type = Val(electron_precon_types[t_input["implicit_electron_ppar"]]) + else + precon_keys = collect(keys(electron_precon_types)) + error("Unrecognised option implicit_electron_ppar=" + * "\"$(t_input["implicit_electron_ppar"])\" which should be " + * "either false/true or a string giving the type of " + * "preconditioner to use - one of $precon_keys.") + end + end + else + electron_preconditioner_type = Val(:none) + end + decrease_dt_iteration_threshold = -1 increase_dt_iteration_threshold = typemax(mk_int) cap_factor_ion_dt = Inf @@ -458,7 +491,8 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, electron !== nothing && t_input["implicit_electron_advance"], electron !== nothing && t_input["implicit_ion_advance"], electron !== nothing && t_input["implicit_vpa_advection"], - electron !== nothing && t_input["implicit_electron_ppar"], + electron !== nothing && implicit_electron_ppar, + electron_preconditioner_type, mk_float(t_input["constraint_forcing_rate"]), decrease_dt_iteration_threshold, increase_dt_iteration_threshold, mk_float(cap_factor_ion_dt), t_input["write_after_fixed_step_count"], @@ -667,13 +701,6 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop input_dict, (z=z,); default_rtol=t_params.rtol / 10.0, default_atol=t_params.atol / 10.0) - if block_size[] == 1 - # No need to parallelise, so un-split LU solver should be most efficient. - electron_preconditioner_type = Val(:electron_lu) - else - # Want to parallelise preconditioner, so use ADI method. - electron_preconditioner_type = Val(:electron_adi) - end nl_solver_electron_advance_params = setup_nonlinear_solve(t_params.implicit_electron_advance || composition.electron_physics ∈ (kinetic_electrons, kinetic_electrons_with_temperature_equation), input_dict, @@ -682,7 +709,7 @@ function setup_time_advance!(pdf, fields, vz, vr, vzeta, vpa, vperp, z, r, gyrop default_rtol=t_params.rtol / 10.0, default_atol=t_params.atol / 10.0, electron_ppar_pdf_solve=true, - preconditioner_type=electron_preconditioner_type) + preconditioner_type=t_params.electron_preconditioner_type) nl_solver_ion_advance_params = setup_nonlinear_solve(t_params.implicit_ion_advance, input_dict, (s=composition.n_ion_species, r=r, z=z, vperp=vperp, From f69b3142d9911f2d8d2f7c32a09925e593fa553d Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 21 Nov 2024 21:08:54 +0000 Subject: [PATCH 25/43] Update downloaded HDF5 to 1.14.5 Hoped this might help find a bug, but did not help with that. Do not know any particular reason to update, but might as well keep up to date. --- machines/generic-batch-template/compile_dependencies.sh | 6 +++--- machines/generic-pc/compile_dependencies.sh | 6 +++--- machines/marconi/compile_dependencies.sh | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/machines/generic-batch-template/compile_dependencies.sh b/machines/generic-batch-template/compile_dependencies.sh index 2f333f12b..966ef12d9 100755 --- a/machines/generic-batch-template/compile_dependencies.sh +++ b/machines/generic-batch-template/compile_dependencies.sh @@ -77,10 +77,10 @@ if [[ $BUILDHDF5 == "y" && -d hdf5-build ]]; then fi if [[ $BUILDHDF5 == "y" ]]; then - HDF5=hdf5-1.14.3 + HDF5=hdf5-1.14.5 # Download and extract the source - wget -O ${HDF5}.tar.bz2 https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.14/hdf5-1.14.3/src/hdf5-1.14.3.tar.bz2 - tar xjf ${HDF5}.tar.bz2 + wget -O ${HDF5}.tar.gz https://support.hdfgroup.org/releases/hdf5/v1_14/v1_14_5/downloads/hdf5-1.14.5.tar.gz + tar xjf ${HDF5}.tar.gz cd $HDF5 diff --git a/machines/generic-pc/compile_dependencies.sh b/machines/generic-pc/compile_dependencies.sh index ae70bd6b9..476c0ed0d 100755 --- a/machines/generic-pc/compile_dependencies.sh +++ b/machines/generic-pc/compile_dependencies.sh @@ -77,10 +77,10 @@ else fi if [[ $BUILDHDF5 == "y" ]]; then - HDF5=hdf5-1.14.3 + HDF5=hdf5-1.14.5 # Download and extract the source - wget -O ${HDF5}.tar.bz2 https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.14/hdf5-1.14.3/src/hdf5-1.14.3.tar.bz2 - tar xjf ${HDF5}.tar.bz2 + wget -O ${HDF5}.tar.gz https://support.hdfgroup.org/releases/hdf5/v1_14/v1_14_5/downloads/hdf5-1.14.5.tar.gz + tar xzf ${HDF5}.tar.gz cd $HDF5 diff --git a/machines/marconi/compile_dependencies.sh b/machines/marconi/compile_dependencies.sh index e18a41e25..70aae5b49 100755 --- a/machines/marconi/compile_dependencies.sh +++ b/machines/marconi/compile_dependencies.sh @@ -30,10 +30,10 @@ if [ -d hdf5-build ]; then fi if [ $BUILDHDF5 -eq 0 ]; then - HDF5=hdf5-1.14.3 + HDF5=hdf5-1.14.5 # Download and extract the source - wget -O ${HDF5}.tar.bz2 https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.14/hdf5-1.14.3/src/hdf5-1.14.3.tar.bz2 - tar xjf ${HDF5}.tar.bz2 + wget -O ${HDF5}.tar.gz https://support.hdfgroup.org/releases/hdf5/v1_14/v1_14_5/downloads/hdf5-1.14.5.tar.gz + tar xjf ${HDF5}.tar.gz cd $HDF5 From 2e40ee29e1f8f1b2e4133edc447606faee3cf8f9 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 17 Nov 2024 16:18:57 +0000 Subject: [PATCH 26/43] Function to interpolate a function symmetrically around x=0 --- moment_kinetics/src/interpolation.jl | 41 ++++++++++++++++++++- moment_kinetics/test/interpolation_tests.jl | 40 +++++++++++++++++++- 2 files changed, 79 insertions(+), 2 deletions(-) diff --git a/moment_kinetics/src/interpolation.jl b/moment_kinetics/src/interpolation.jl index af1ae1514..69041a463 100644 --- a/moment_kinetics/src/interpolation.jl +++ b/moment_kinetics/src/interpolation.jl @@ -5,7 +5,7 @@ Note these are not guaranteed to be highly optimized! """ module interpolation -export interpolate_to_grid_z +export interpolate_to_grid_z, interpolate_to_grid_1d!, interpolate_symmetric! using ..array_allocation: allocate_float using ..moment_kinetics_structs: null_spatial_dimension_info, null_velocity_dimension_info @@ -275,4 +275,43 @@ function interpolate_to_grid_vpa(newgrid, f::AbstractVector{mk_float}, vpa, spec return interpolate_to_grid_1d(newgrid, f, vpa, spectral) end +""" + interpolate_symmetric!(result, newgrid, f, oldgrid) + +Interpolate f from oldgrid to newgrid, imposing that `f(x)` is symmetric around `x=0`, so +the interpolation is done by fitting a polynomial in `x^2` to the values of `f` given on +`oldgrid`, and evaluating on `newgrid`. Since interpolation is done in a polynomial of +`x^2`, the signs of the points on `newgrid` and `oldgrid` do not matter, and are ignored. +""" +function interpolate_symmetric!(result, newgrid, f, oldgrid) + nnew = length(newgrid) + nold = length(oldgrid) + + if nnew == 0 + return nothing + end + + # Check all points in newgrid are covered by oldgrid (i.e. between zero and the + # maximum of oldgrid) + @boundscheck maximum(abs.(newgrid)) ≤ maximum(abs.(oldgrid)) || error("newgrid bigger ($(maximum(abs.(newgrid)))) than oldgrid ($(maximum(abs.(oldgrid)))).") + @boundscheck size(result) == size(newgrid) || error("Size of result ($(size(result))) is not the same as size of newgrid ($(size(newgrid))).") + @boundscheck size(f) == size(oldgrid) || error("Size of f ($(size(f))) is not the same as size of oldgrid ($(size(oldgrid))).") + + if nold == 1 + # Interpolating 'polynomial' is just a constant + result .= f[1] + else + result .= 0.0 + for j ∈ 1:nold + one_over_denominator = 1.0 / prod((oldgrid[j]^2 - oldgrid[k]^2) for k ∈ 1:nold if k ≠ j) + this_f = f[j] + for i ∈ 1:nnew + result[i] += this_f * prod((newgrid[i]^2 - oldgrid[k]^2) for k ∈ 1:nold if k ≠ j) * one_over_denominator + end + end + end + + return nothing end + +end # interpolation diff --git a/moment_kinetics/test/interpolation_tests.jl b/moment_kinetics/test/interpolation_tests.jl index a38506a2f..bd4a1348f 100644 --- a/moment_kinetics/test/interpolation_tests.jl +++ b/moment_kinetics/test/interpolation_tests.jl @@ -4,7 +4,7 @@ include("setup.jl") using moment_kinetics.coordinates: define_test_coordinate using moment_kinetics.interpolation: - interpolate_to_grid_1d, interpolate_to_grid_z, interpolate_to_grid_vpa + interpolate_to_grid_1d, interpolate_to_grid_z, interpolate_to_grid_vpa, interpolate_symmetric! using MPI @@ -93,6 +93,44 @@ function runtests() expected, rtol=rtol, atol=1.e-14) end end + + @testset "symmetric interpolation" begin + @testset "lower to upper $nx" for nx ∈ 4:10 + rtol = 0.2 ^ nx + + ix = collect(1:nx) + x = @. 1.8 * (ix - 1) / (nx - 1) - 1.23 + first_positive_ind = searchsortedlast(x, 0.0) + 1 + f = cos.(x) + + expected = f[first_positive_ind:end] + + result = zeros(nx - first_positive_ind + 1) + @views interpolate_symmetric!(result, x[first_positive_ind:end], + f[1:first_positive_ind-1], + x[1:first_positive_ind-1]) + + @test isapprox(result, expected; rtol=rtol, atol=1.0e-14) + end + + @testset "upper to lower $nx" for nx ∈ 4:10 + rtol = 0.2 ^ nx + + ix = collect(1:nx) + x = @. 1.8 * (ix - 1) / (nx - 1) - 0.57 + first_positive_ind = searchsortedlast(x, 0.0) + 1 + f = cos.(x) + + expected = f[1:first_positive_ind-1] + + result = zeros(first_positive_ind-1) + @views interpolate_symmetric!(result, x[1:first_positive_ind-1], + f[first_positive_ind:end], + x[first_positive_ind:end]) + + @test isapprox(result, expected; rtol=rtol, atol=1.0e-14) + end + end end end From a750ad760e885c45fa09916ca2ce249ceb72e0a1 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 17 Nov 2024 17:04:08 +0000 Subject: [PATCH 27/43] Improve interpolation for kinetic electron bc in element containing zero Previous scheme just interpolated using existing values in whole element containing zero. This meant that the result of the boundary condition depended on some points that are overwritten by the boundary condition. Improve on this by doing special interpolation in the element containing zero. Instead of using the usual `interpolate_to_grid_1d!()` function, use `interpolate_symmetric!()` which does an interpolation that is forced to be symmetric around v_parallel=0, as the interpolation polynomial is a polynomial in `v_parallel^2` (the interpolating polynomial is constructed using a Lagrange polynomial method). The inputs to the interpolation are now just the function values on grid points (within the element containing zero) that are not set by the boundary condition. Also optimises the interpolation of the points in the elements not containing zero by restricting the interpolation to just the points needed for output, instead of interpolating to the full reversed grid. --- .../src/electron_kinetic_equation.jl | 70 ++++++++++++++++--- 1 file changed, 61 insertions(+), 9 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 94e0a11dc..fea5179d7 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -18,7 +18,8 @@ using ..calculus: derivative!, second_derivative!, integral, using ..communication using ..gauss_legendre: gausslegendre_info using ..input_structs -using ..interpolation: interpolate_to_grid_1d! +using ..interpolation: interpolate_to_grid_1d!, + interpolate_symmetric! using ..type_definitions: mk_float, mk_int using ..array_allocation: allocate_float using ..electron_fluid_equations: calculate_electron_moments!, @@ -2178,6 +2179,18 @@ function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa, ir) # sigma_ind where sigma is. sigma_fraction = -vpa_unnorm[sigma_ind-1] / (vpa_unnorm[sigma_ind] - vpa_unnorm[sigma_ind-1]) + # Want the element that contains the interval on the lower side of sigma_ind. For + # points on element boundaries, the `ielement` array contains the element on the lower + # side of the grid point, so just looking up the `ielement` of `sigma_ind` is what we + # want here. + element_with_zero = vpa.ielement[sigma_ind] + element_with_zero_boundary = element_with_zero == 1 ? vpa.imin[element_with_zero] : + vpa.imin[element_with_zero] - 1 + # This searchsortedlast() call finds the last point ≤ to the negative of v_∥ + # at the lower boundary of the element containing zero. + last_point_near_zero = searchsortedlast(vpa_unnorm, + -vpa_unnorm[element_with_zero_boundary]) + # Want to construct the w-grid corresponding to -vpa. # wpa(vpa) = (vpa - upar)/vth # ⇒ vpa = vth*wpa(vpa) + upar @@ -2194,6 +2207,7 @@ function get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa, ir) reverse!(reversed_wpa_of_minus_vpa) return vpa_unnorm, u_over_vt, vcut, minus_vcut_ind, sigma, sigma_ind, sigma_fraction, + element_with_zero, element_with_zero_boundary, last_point_near_zero, reversed_wpa_of_minus_vpa end @@ -2240,6 +2254,16 @@ function get_cutoff_params_upper(upar, vthe, phi, me_over_mi, vpa, ir) # sigma_ind where sigma is. sigma_fraction = -vpa_unnorm[sigma_ind+1] / (vpa_unnorm[sigma_ind] - vpa_unnorm[sigma_ind+1]) + # Want the element that contains the interval on the upper side of sigma_ind. For + # points on element boundaries, the `ielement` array contains the element on the lower + # side of the grid point, we need the `ielement` of `sigma_ind+1` here. + element_with_zero = vpa.ielement[sigma_ind+1] + element_with_zero_boundary = vpa.imax[element_with_zero] + # This searchsortedfirst() call finds the first point ≥ to the negative of v_∥ at the + # upper boundary of the element containing zero. + first_point_near_zero = searchsortedfirst(vpa_unnorm, + -vpa_unnorm[element_with_zero_boundary]) + # Want to construct the w-grid corresponding to -vpa. # wpa(vpa) = (vpa - upar)/vth # ⇒ vpa = vth*wpa(vpa) + upar @@ -2256,6 +2280,7 @@ function get_cutoff_params_upper(upar, vthe, phi, me_over_mi, vpa, ir) reverse!(reversed_wpa_of_minus_vpa) return vpa_unnorm, u_over_vt, vcut, plus_vcut_ind, sigma, sigma_ind, sigma_fraction, + element_with_zero, element_with_zero_boundary, first_point_near_zero, reversed_wpa_of_minus_vpa end @@ -2320,7 +2345,6 @@ end begin_r_region() newton_max_its = 100 - reversed_pdf = vpa.scratch function get_residual_and_coefficients_for_bc(a1, a1prime, a2, a2prime, b1, b1prime, c1, c1prime, c2, c2prime, d1, d1prime, @@ -2365,14 +2389,28 @@ end # potential). vpa_unnorm, u_over_vt, vcut, minus_vcut_ind, sigma, sigma_ind, sigma_fraction, + element_with_zero, element_with_zero_boundary, last_point_near_zero, reversed_wpa_of_minus_vpa = get_cutoff_params_lower(upar, vthe, phi, me_over_mi, vpa, ir) # interpolate the pdf onto this grid - #@views interpolate_to_grid_1d!(interpolated_pdf, wpa_values, pdf[:,1,1,ir], vpa, vpa_spectral) - @views interpolate_to_grid_1d!(reversed_pdf, reversed_wpa_of_minus_vpa, pdf[:,1,1,ir], vpa, vpa_spectral) # Could make this more efficient by only interpolating to the points needed below, by taking an appropriate view of wpa_of_minus_vpa. Also, in the element containing vpa=0, this interpolation depends on the values that will be replaced by the reflected, interpolated values, which is not ideal (maybe this element should be treated specially first?). - reverse!(reversed_pdf) - pdf[sigma_ind:end,1,1,ir] .= reversed_pdf[sigma_ind:end] + # 'near zero' means in the range where + # abs(v_∥)≤abs(lower boundary of element including v_∥=0) + # 'far from zero' means larger values of v_∥. + + # Interpolate to the 'near zero' points + @views interpolate_symmetric!(pdf[sigma_ind:last_point_near_zero,1,1,ir], + vpa_unnorm[sigma_ind:last_point_near_zero], + pdf[element_with_zero_boundary:sigma_ind-1,1,1,ir], + vpa_unnorm[element_with_zero_boundary:sigma_ind-1]) + + # Interpolate to the 'far from zero' points + reversed_pdf_far_from_zero = vpa.scratch[last_point_near_zero+1:end] + @views interpolate_to_grid_1d!(reversed_pdf_far_from_zero, + reversed_wpa_of_minus_vpa[1:vpa.n-last_point_near_zero], + pdf[:,1,1,ir], vpa, vpa_spectral) + reverse!(reversed_pdf_far_from_zero) + pdf[last_point_near_zero+1:end,1,1,ir] .= reversed_pdf_far_from_zero # Per-grid-point contributions to moment integrals # Note that we need to include the normalisation factor of 1/sqrt(pi) that @@ -2606,14 +2644,28 @@ end # potential). vpa_unnorm, u_over_vt, vcut, plus_vcut_ind, sigma, sigma_ind, sigma_fraction, + element_with_zero, element_with_zero_boundary, first_point_near_zero, reversed_wpa_of_minus_vpa = get_cutoff_params_upper(upar, vthe, phi, me_over_mi, vpa, ir) # interpolate the pdf onto this grid - #@views interpolate_to_grid_1d!(interpolated_pdf, wpa_values, pdf[:,1,1,ir], vpa, vpa_spectral) - @views interpolate_to_grid_1d!(reversed_pdf, reversed_wpa_of_minus_vpa, pdf[:,1,end,ir], vpa, vpa_spectral) # Could make this more efficient by only interpolating to the points needed below, by taking an appropriate view of wpa_of_minus_vpa. Also, in the element containing vpa=0, this interpolation depends on the values that will be replaced by the reflected, interpolated values, which is not ideal (maybe this element should be treated specially first?). + # 'near zero' means in the range where + # abs(v_∥)≤abs(upper boundary of element including v_∥=0) + # 'far from zero' means more negative values of v_∥. + + # Interpolate to the 'near zero' points + @views interpolate_symmetric!(pdf[first_point_near_zero:sigma_ind,1,end,ir], + vpa_unnorm[first_point_near_zero:sigma_ind], + pdf[sigma_ind+1:element_with_zero_boundary,1,end,ir], + vpa_unnorm[sigma_ind+1:element_with_zero_boundary]) + + # Interpolate to the 'far from zero' points + reversed_pdf = vpa.scratch[1:first_point_near_zero-1] + @views interpolate_to_grid_1d!(reversed_pdf, + reversed_wpa_of_minus_vpa[vpa.n-first_point_near_zero+2:end], + pdf[:,1,end,ir], vpa, vpa_spectral) reverse!(reversed_pdf) - pdf[1:sigma_ind,1,end,ir] .= reversed_pdf[1:sigma_ind] + pdf[1:first_point_near_zero-1,1,end,ir] .= reversed_pdf[1:first_point_near_zero-1] # Per-grid-point contributions to moment integrals # Note that we need to include the normalisation factor of 1/sqrt(pi) that From 8c028f17879844904e67d1825c9c7eb1b7aa4fed Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 19 Nov 2024 12:59:12 +0000 Subject: [PATCH 28/43] Don't do single update of vcut when bc_constraints=false Skipping the single update of vcut, which was previously done even when `bc_constraints=false` was passed to `enforce_boundary_condition_on_electron_pdf!()` makes it possible to match the result with an interpolation matrix that does not couple (due to the integral nature of the update of vcut) every point in the vpa grid. --- .../src/electron_kinetic_equation.jl | 104 +++++++++--------- 1 file changed, 54 insertions(+), 50 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index fea5179d7..9ae24a3a7 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -2482,37 +2482,39 @@ end C = 0.0 # Always do at least one update of vcut epsilon, epsilonprime, A, C, a2, b2, c2, d2 = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind) - while true - # Newton iteration update. Note that primes denote derivatives with - # respect to vcut - delta_v = - epsilon / epsilonprime - - if vcut > vthe[1,ir] && epsilonprime < 0.0 - # epsilon should be increasing with vcut at epsilon=0, so if - # epsilonprime is negative, the solution is actually at a lower vcut - - # at larger vcut, epsilon will just tend to 0 but never reach it. - delta_v = -0.1 * vthe[1,ir] - end + if bc_constraints + while true + # Newton iteration update. Note that primes denote derivatives with + # respect to vcut + delta_v = - epsilon / epsilonprime + + if vcut > vthe[1,ir] && epsilonprime < 0.0 + # epsilon should be increasing with vcut at epsilon=0, so if + # epsilonprime is negative, the solution is actually at a lower vcut - + # at larger vcut, epsilon will just tend to 0 but never reach it. + delta_v = -0.1 * vthe[1,ir] + end - # Prevent the step size from getting too big, to make Newton iteration - # more robust. - delta_v = min(delta_v, 0.1 * vthe[1,ir]) - delta_v = max(delta_v, -0.1 * vthe[1,ir]) + # Prevent the step size from getting too big, to make Newton iteration + # more robust. + delta_v = min(delta_v, 0.1 * vthe[1,ir]) + delta_v = max(delta_v, -0.1 * vthe[1,ir]) - vcut = vcut + delta_v - minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut) + vcut = vcut + delta_v + minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut) - epsilon, epsilonprime, A, C, a2, b2, c2, d2 = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind) + epsilon, epsilonprime, A, C, a2, b2, c2, d2 = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind) - if abs(epsilon) < newton_tol - break - end + if abs(epsilon) < newton_tol + break + end - if counter ≥ newton_max_its - error("Newton iteration for electron lower-z boundary failed to " - * "converge after $counter iterations") + if counter ≥ newton_max_its + error("Newton iteration for electron lower-z boundary failed to " + * "converge after $counter iterations") + end + counter += 1 end - counter += 1 end # Adjust pdf so that after reflecting and cutting off tail, it will obey the @@ -2734,37 +2736,39 @@ end counter = 1 # Always do at least one update of vcut epsilon, epsilonprime, A, C, a2, b2, c2, d2 = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind) - while true - # Newton iteration update. Note that primes denote derivatives with - # respect to vcut - delta_v = - epsilon / epsilonprime - - if vcut > vthe[1,ir] && epsilonprime > 0.0 - # epsilon should be decreasing with vcut at epsilon=0, so if - # epsilonprime is positive, the solution is actually at a lower vcut - - # at larger vcut, epsilon will just tend to 0 but never reach it. - delta_v = -0.1 * vthe[1,ir] - end + if bc_constraints + while true + # Newton iteration update. Note that primes denote derivatives with + # respect to vcut + delta_v = - epsilon / epsilonprime + + if vcut > vthe[1,ir] && epsilonprime > 0.0 + # epsilon should be decreasing with vcut at epsilon=0, so if + # epsilonprime is positive, the solution is actually at a lower vcut - + # at larger vcut, epsilon will just tend to 0 but never reach it. + delta_v = -0.1 * vthe[1,ir] + end - # Prevent the step size from getting too big, to make Newton iteration - # more robust. - delta_v = min(delta_v, 0.1 * vthe[end,ir]) - delta_v = max(delta_v, -0.1 * vthe[end,ir]) + # Prevent the step size from getting too big, to make Newton iteration + # more robust. + delta_v = min(delta_v, 0.1 * vthe[end,ir]) + delta_v = max(delta_v, -0.1 * vthe[end,ir]) - vcut = vcut + delta_v - plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut) + vcut = vcut + delta_v + plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut) - epsilon, epsilonprime, A, C, a2, b2, c2, d2 = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind) + epsilon, epsilonprime, A, C, a2, b2, c2, d2 = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind) - if abs(epsilon) < newton_tol - break - end + if abs(epsilon) < newton_tol + break + end - if counter ≥ newton_max_its - error("Newton iteration for electron upper-z boundary failed to " - * "converge after $counter iterations") + if counter ≥ newton_max_its + error("Newton iteration for electron upper-z boundary failed to " + * "converge after $counter iterations") + end + counter += 1 end - counter += 1 end # Adjust pdf so that after reflecting and cutting off tail, it will obey the From 68a6ccee10b0a7d4faf0aace0361d818a4baf7c0 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 20 Nov 2024 21:29:24 +0000 Subject: [PATCH 29/43] Calculate vcut_fraction in utility functions Reduces code duplication. --- .../src/electron_kinetic_equation.jl | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 9ae24a3a7..246da0fa1 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -2284,6 +2284,16 @@ function get_cutoff_params_upper(upar, vthe, phi, me_over_mi, vpa, ir) reversed_wpa_of_minus_vpa end +function get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm) + return (-vcut - vpa_unnorm[minus_vcut_ind-1]) / + (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1]) +end + +function get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm) + return (vcut - vpa_unnorm[plus_vcut_ind]) / + (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind]) +end + @timeit global_timer enforce_boundary_condition_on_electron_pdf!( pdf, phi, vthe, upar, z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_adv, moments, vpa_diffusion, me_over_mi; @@ -2425,7 +2435,7 @@ end function get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind) # vcut_fraction is the fraction of the distance between minus_vcut_ind-1 and # minus_vcut_ind where -vcut is. - vcut_fraction = (-vcut - vpa_unnorm[minus_vcut_ind-1]) / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1]) + vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm) function get_for_one_moment(integral_pieces) # Integral contributions from the cell containing vcut. @@ -2525,7 +2535,7 @@ end pdf[plus_vcut_ind+2:end,1,1,ir] .= 0.0 # vcut_fraction is the fraction of the distance between plus_vcut_ind and # plus_vcut_ind+1 where vcut is. - vcut_fraction = (vcut - vpa_unnorm[plus_vcut_ind]) / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind]) + vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm) if vcut_fraction > 0.5 pdf[plus_vcut_ind+1,1,1,ir] *= vcut_fraction - 0.5 else @@ -2682,7 +2692,7 @@ end function get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind) # vcut_fraction is the fraction of the distance between plus_vcut_ind and # plus_vcut_ind+1 where vcut is. - vcut_fraction = (vcut - vpa_unnorm[plus_vcut_ind]) / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind]) + vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm) function get_for_one_moment(integral_pieces) # Integral contribution from the cell containing vcut @@ -2777,14 +2787,14 @@ end minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut) pdf[1:minus_vcut_ind-2,1,end,ir] .= 0.0 - # vcut_fraction is the fraction of the distance between minus_vcut_ind and - # minus_vcut_ind-1 where -vcut is. - vcut_fraction = (-vcut - vpa_unnorm[minus_vcut_ind]) / (vpa_unnorm[minus_vcut_ind-1] - vpa_unnorm[minus_vcut_ind]) - if vcut_fraction > 0.5 - pdf[minus_vcut_ind-1,1,end,ir] *= vcut_fraction - 0.5 + # vcut_fraction is the fraction of the distance between minus_vcut_ind-1 and + # minus_vcut_ind where -vcut is. + vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm) + if vcut_fraction < 0.5 + pdf[minus_vcut_ind-1,1,end,ir] *= 0.5 - vcut_fraction else pdf[minus_vcut_ind-1,1,end,ir] = 0.0 - pdf[minus_vcut_ind,1,end,ir] *= vcut_fraction + 0.5 + pdf[minus_vcut_ind,1,end,ir] *= 1.5 - vcut_fraction end # update the electrostatic potential at the boundary to be the value corresponding to the updated cutoff velocity From 45b8466cf7ad984cb1a8fb684ecb9d3105bfec1a Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 20 Nov 2024 22:16:56 +0000 Subject: [PATCH 30/43] Update vcut for kinetic electron bc at each Newton iteration Improves convergence a bit, allowing electron solver to take larger pseudo-timesteps, in at least one case. --- .../src/electron_fluid_equations.jl | 2 +- .../src/electron_kinetic_equation.jl | 45 +++++++++++++++++-- moment_kinetics/src/nonlinear_solvers.jl | 2 +- moment_kinetics/src/time_advance.jl | 2 +- moment_kinetics/src/vpa_advection.jl | 2 +- .../test/nonlinear_solver_tests.jl | 4 +- 6 files changed, 47 insertions(+), 10 deletions(-) diff --git a/moment_kinetics/src/electron_fluid_equations.jl b/moment_kinetics/src/electron_fluid_equations.jl index dec0aff8f..d62ed7a73 100644 --- a/moment_kinetics/src/electron_fluid_equations.jl +++ b/moment_kinetics/src/electron_fluid_equations.jl @@ -691,7 +691,7 @@ end # `residual` is zero, electron_ppar is the result of a backward-Euler timestep: # (f_new - f_old) / dt = RHS(f_new) # ⇒ (f_new - f_old)/dt - RHS(f_new) = 0 - function residual_func!(residual, electron_ppar) + function residual_func!(residual, electron_ppar; krylov=false) begin_z_region() @loop_z iz begin residual[iz] = ppar_in[iz] diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index 246da0fa1..cfc88e187 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1400,7 +1400,7 @@ global_rank[] == 0 && println("recalculating precon") # Do a backward-Euler update of the electron pdf, and (if evove_ppar=true) the # electron parallel pressure. - function residual_func!(this_residual, new_variables) + function residual_func!(this_residual, new_variables; krylov=false) electron_ppar_residual, f_electron_residual = this_residual electron_ppar_newvar, f_electron_newvar = new_variables @@ -1424,7 +1424,8 @@ global_rank[] == 0 && println("recalculating precon") moments.electron.upar[:,ir], z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_advect, moments, num_diss_params.electron.vpa_dissipation_coefficient > 0.0, - composition.me_over_mi; bc_constraints=false) + composition.me_over_mi; bc_constraints=false, + update_vcut=!krylov) if evolve_ppar # Calculate heat flux and derivatives using new_variables @@ -1852,7 +1853,7 @@ to allow the outer r-loop to be parallelised. newton_success = false for ir ∈ 1:r.n - function residual_func!(residual, new_variables; debug=false) + function residual_func!(residual, new_variables; debug=false, krylov=false) electron_ppar_residual, f_electron_residual = residual electron_ppar_new, f_electron_new = new_variables @@ -2297,7 +2298,9 @@ end @timeit global_timer enforce_boundary_condition_on_electron_pdf!( pdf, phi, vthe, upar, z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_adv, moments, vpa_diffusion, me_over_mi; - bc_constraints=true) = begin + bc_constraints=true, update_vcut=true) = begin + + @boundscheck bc_constraints && !update_vcut && error("update_vcut is not used when bc_constraints=true, but update_vcut has non-default value") newton_tol = 1.0e-13 @@ -2525,6 +2528,23 @@ end end counter += 1 end + elseif update_vcut + # When bc_constraints=false, no constraints are applied in + # get_integrals_and_derivatives_lowerz(), so updating vcut is usually just + # solving a linear equation, not doing a Newton iteration. The exception + # is if minus_vcut_ind changes, in which case we have to re-do the update. + while true + vcut = vcut - epsilon / epsilonprime + minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut) + + vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm) + + if 0.0 ≤ vcut_fraction ≤ 1.0 + break + end + + epsilon, epsilonprime, _, _, _, _, _, _ = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind) + end end # Adjust pdf so that after reflecting and cutting off tail, it will obey the @@ -2779,6 +2799,23 @@ end end counter += 1 end + elseif update_vcut + # When bc_constraints=false, no constraints are applied in + # get_integrals_and_derivatives_upperz(), so updating vcut is usually just + # solving a linear equation, not doing a Newton iteration. The exception + # is if minus_vcut_ind changes, in which case we have to re-do the update. + while true + vcut = vcut - epsilon / epsilonprime + plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut) + + vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm) + + if 0.0 ≤ vcut_fraction ≤ 1.0 + break + end + + epsilon, epsilonprime, _, _, _, _, _, _ = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind) + end end # Adjust pdf so that after reflecting and cutting off tail, it will obey the diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl index 5fed3dc4c..7b2707d69 100644 --- a/moment_kinetics/src/nonlinear_solvers.jl +++ b/moment_kinetics/src/nonlinear_solvers.jl @@ -1252,7 +1252,7 @@ MGS-GMRES' in Zou (2023) [https://doi.org/10.1016/j.amc.2023.127869]. end parallel_map(solver_type, (x,v) -> x + Jv_scale_factor * v, v, x, v) - residual_func!(rhs_delta, v) + residual_func!(rhs_delta, v; krylov=true) parallel_map(solver_type, (rhs_delta, residual0) -> (rhs_delta - residual0) * inv_Jv_scale_factor, v, rhs_delta, residual0) left_preconditioner(v) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index ce033a06a..f3592dc80 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -3783,7 +3783,7 @@ Do a backward-Euler timestep for all terms in the ion kinetic equation. # `residual` is zero, f_new is the result of a backward-Euler timestep: # (f_new - f_old) / dt = RHS(f_new) # ⇒ f_new - f_old - dt*RHS(f_new) = 0 - function residual_func!(residual, f_new) + function residual_func!(residual, f_new; krylov=false) begin_s_r_z_vperp_vpa_region() @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin residual[ivpa,ivperp,iz,ir,is] = f_old[ivpa,ivperp,iz,ir,is] diff --git a/moment_kinetics/src/vpa_advection.jl b/moment_kinetics/src/vpa_advection.jl index 8a04e4936..8abfe38e7 100644 --- a/moment_kinetics/src/vpa_advection.jl +++ b/moment_kinetics/src/vpa_advection.jl @@ -267,7 +267,7 @@ end # `residual` is zero, f_new is the result of a backward-Euler timestep: # (f_new - f_old) / dt = RHS(f_new) # ⇒ f_new - f_old - dt*RHS(f_new) = 0 - function residual_func!(residual, f_new) + function residual_func!(residual, f_new; krylov=false) apply_bc!(f_new) residual .= f_old advance_f_local!(residual, f_new, vpa_advect[is], ivperp, iz, ir, vpa, dt, diff --git a/moment_kinetics/test/nonlinear_solver_tests.jl b/moment_kinetics/test/nonlinear_solver_tests.jl index ab68389f4..36c74eb21 100644 --- a/moment_kinetics/test/nonlinear_solver_tests.jl +++ b/moment_kinetics/test/nonlinear_solver_tests.jl @@ -67,7 +67,7 @@ function linear_test() zeros(mk_float, 0, 0)) coords = NamedTuple(c => the_coord for c ∈ coord_names) - function rhs_func!(residual, x) + function rhs_func!(residual, x; krylov=false) if serial_solve residual .= A * x - b else @@ -180,7 +180,7 @@ function nonlinear_test() zeros(mk_float, 0, 0)) coords = NamedTuple(c => the_coord for c ∈ coord_names) - function rhs_func!(residual, x) + function rhs_func!(residual, x; krylov=false) if serial_solve i = 1 D = abs(x[i])^2.5 From d3c3e8e654d547e4e7640ab652a45ed87b1f4f37 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 21 Nov 2024 09:27:49 +0000 Subject: [PATCH 31/43] Add missing @views, move struct field lookups out of loops for electrons Should reduce allocations. --- moment_kinetics/src/electron_fluid_equations.jl | 2 +- moment_kinetics/src/electron_vpa_advection.jl | 14 +++++++++----- moment_kinetics/src/electron_z_advection.jl | 14 +++++++++----- 3 files changed, 19 insertions(+), 11 deletions(-) diff --git a/moment_kinetics/src/electron_fluid_equations.jl b/moment_kinetics/src/electron_fluid_equations.jl index d62ed7a73..bd2f1cdbb 100644 --- a/moment_kinetics/src/electron_fluid_equations.jl +++ b/moment_kinetics/src/electron_fluid_equations.jl @@ -878,7 +878,7 @@ function calculate_electron_qpar_from_pdf_no_r!(qpar, ppar, vth, pdf, vpa, ir) begin_z_region() ivperp = 1 @loop_z iz begin - @views qpar[iz] = 2*ppar[iz]*vth[iz]*integrate_over_vspace(pdf[:, ivperp, iz], vpa.grid.^3, vpa.wgts) + @views qpar[iz] = 2*ppar[iz]*vth[iz]*integrate_over_vspace(pdf[:, ivperp, iz], vpa.grid, 3, vpa.wgts) end end diff --git a/moment_kinetics/src/electron_vpa_advection.jl b/moment_kinetics/src/electron_vpa_advection.jl index 0c2f7d02a..2ffcc3298 100644 --- a/moment_kinetics/src/electron_vpa_advection.jl +++ b/moment_kinetics/src/electron_vpa_advection.jl @@ -22,6 +22,9 @@ calculate the wpa-advection term for the electron kinetic equation ir) = begin begin_z_vperp_region() + adv_fac = advect[1].adv_fac + speed = advect[1].speed + # create a reference to a scratch_dummy array to store the wpa-derivative of the electron pdf dpdf_dvpa = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir] #d2pdf_dvpa2 = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir] @@ -31,7 +34,7 @@ calculate the wpa-advection term for the electron kinetic equation electron_source_settings, ir) # update adv_fac @loop_z_vperp iz ivperp begin - @views @. advect[1].adv_fac[:,ivperp,iz,ir] = -advect[1].speed[:,ivperp,iz,ir] + @views @. adv_fac[:,ivperp,iz,ir] = -speed[:,ivperp,iz,ir] end #calculate the upwind derivative of the electron pdf w.r.t. wpa @loop_z_vperp iz ivperp begin @@ -43,7 +46,7 @@ calculate the wpa-advection term for the electron kinetic equation #end # calculate the advection term @loop_z_vperp iz ivperp begin - @. pdf_out[:,ivperp,iz] += dt * advect[1].adv_fac[:,ivperp,iz,ir] * dpdf_dvpa[:,ivperp,iz] + @views @. pdf_out[:,ivperp,iz] += dt * adv_fac[:,ivperp,iz,ir] * dpdf_dvpa[:,ivperp,iz] #@. pdf_out[:,ivperp,iz] -= advect[1].adv_fac[:,ivperp,iz,ir] * dpdf_dvpa[:,ivperp,iz] + 0.0001*d2pdf_dvpa2[:,ivperp,iz] end return nothing @@ -58,10 +61,11 @@ function update_electron_speed_vpa!(advect, density, upar, ppar, moments, vpa, dppar_dz = @view moments.electron.dppar_dz[:,ir] dqpar_dz = @view moments.electron.dqpar_dz[:,ir] dvth_dz = @view moments.electron.dvth_dz[:,ir] + speed = advect.speed # calculate the advection speed in wpa @loop_z_vperp_vpa iz ivperp ivpa begin - advect.speed[ivpa,ivperp,iz,ir] = ((vth[iz] * dppar_dz[iz] + vpa[ivpa] * dqpar_dz[iz]) - / (2 * ppar[iz]) - vpa[ivpa]^2 * dvth_dz[iz]) + speed[ivpa,ivperp,iz,ir] = ((vth[iz] * dppar_dz[iz] + vpa[ivpa] * dqpar_dz[iz]) + / (2 * ppar[iz]) - vpa[ivpa]^2 * dvth_dz[iz]) end for index ∈ eachindex(electron_source_settings) @@ -77,7 +81,7 @@ function update_electron_speed_vpa!(advect, density, upar, ppar, moments, vpa, ppar[iz] + 0.5 * source_density_amplitude[iz] / density[iz] @loop_vperp_vpa ivperp ivpa begin - advect.speed[ivpa,ivperp,iz,ir] += term1 + vpa[ivpa] * term2_over_vpa + speed[ivpa,ivperp,iz,ir] += term1 + vpa[ivpa] * term2_over_vpa end end end diff --git a/moment_kinetics/src/electron_z_advection.jl b/moment_kinetics/src/electron_z_advection.jl index 8c78e58ab..ddb530c1b 100644 --- a/moment_kinetics/src/electron_z_advection.jl +++ b/moment_kinetics/src/electron_z_advection.jl @@ -23,6 +23,9 @@ calculate the z-advection term for the electron kinetic equation = wpa * vthe * scratch_dummy, dt, ir) = begin begin_vperp_vpa_region() + adv_fac = advect[1].adv_fac + speed = advect[1].speed + # create a pointer to a scratch_dummy array to store the z-derivative of the electron pdf dpdf_dz = @view scratch_dummy.buffer_vpavperpzr_1[:,:,:,ir] d2pdf_dz2 = @view scratch_dummy.buffer_vpavperpzr_2[:,:,:,ir] @@ -32,11 +35,11 @@ calculate the z-advection term for the electron kinetic equation = wpa * vthe * # update adv_fac -- note that there is no factor of dt here because # in some cases the electron kinetic equation is solved as a steady-state equation iteratively @loop_vperp_vpa ivperp ivpa begin - @views advect[1].adv_fac[:,ivpa,ivperp,ir] = -advect[1].speed[:,ivpa,ivperp,ir] + @views @. adv_fac[:,ivpa,ivperp,ir] = -speed[:,ivpa,ivperp,ir] end #calculate the upwind derivative @views derivative_z_pdf_vpavperpz!( - dpdf_dz, pdf_in, advect[1].adv_fac[:,:,:,ir], + dpdf_dz, pdf_in, adv_fac[:,:,:,ir], scratch_dummy.buffer_vpavperpr_1[:,:,ir], scratch_dummy.buffer_vpavperpr_2[:,:,ir], scratch_dummy.buffer_vpavperpr_3[:,:,ir], @@ -49,8 +52,8 @@ calculate the z-advection term for the electron kinetic equation = wpa * vthe * # calculate the advection term begin_z_vperp_vpa_region() @loop_z_vperp_vpa iz ivperp ivpa begin - pdf_out[ivpa,ivperp,iz] += dt * advect[1].adv_fac[iz,ivpa,ivperp,ir] * dpdf_dz[ivpa,ivperp,iz] - #pdf_out[ivpa,ivperp,iz] += dt * advect[1].adv_fac[iz,ivpa,ivperp,ir] * dpdf_dz[ivpa,ivperp,iz] + 0.0001*d2pdf_dz2[ivpa,ivperp,iz] + pdf_out[ivpa,ivperp,iz] += dt * adv_fac[iz,ivpa,ivperp,ir] * dpdf_dz[ivpa,ivperp,iz] + #pdf_out[ivpa,ivperp,iz] += dt * adv_fac[iz,ivpa,ivperp,ir] * dpdf_dz[ivpa,ivperp,iz] + 0.0001*d2pdf_dz2[ivpa,ivperp,iz] end return nothing end @@ -60,9 +63,10 @@ calculate the electron advection speed in the z-direction at each grid point """ function update_electron_speed_z!(advect, upar, vth, vpa, ir) # the electron advection speed in z is v_par = w_par * v_the + speed = advect.speed @loop_vperp_vpa ivperp ivpa begin #@. @views advect.speed[:,ivpa,ivperp,ir] = vpa[ivpa] * vth - @. @views advect.speed[:,ivpa,ivperp,ir] = vpa[ivpa] * vth + upar + @. speed[:,ivpa,ivperp,ir] = vpa[ivpa] * vth + upar end return nothing end From 651f141b72c006d72a41dcb2d4fbc54777d1db23 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 21 Nov 2024 09:44:24 +0000 Subject: [PATCH 32/43] Clean up @views in enforce_boundary_condition_on_electron_pdf!() Hopefully reduce allocations. Maybe save a little compilation time by removing some unnecessary `@views`. --- .../src/electron_kinetic_equation.jl | 76 +++++++++---------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index cfc88e187..d6b2c1a4a 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -2317,7 +2317,7 @@ end end if vperp.n > 1 begin_r_z_vpa_region() - @views enforce_vperp_boundary_condition!(pdf, vperp.bc, vperp, vperp_spectral) + enforce_vperp_boundary_condition!(pdf, vperp.bc, vperp, vperp_spectral) end if z.bc == "periodic" @@ -2418,7 +2418,7 @@ end vpa_unnorm[element_with_zero_boundary:sigma_ind-1]) # Interpolate to the 'far from zero' points - reversed_pdf_far_from_zero = vpa.scratch[last_point_near_zero+1:end] + reversed_pdf_far_from_zero = @view vpa.scratch[last_point_near_zero+1:end] @views interpolate_to_grid_1d!(reversed_pdf_far_from_zero, reversed_wpa_of_minus_vpa[1:vpa.n-last_point_near_zero], pdf[:,1,1,ir], vpa, vpa_spectral) @@ -2430,10 +2430,10 @@ end # would be factored in by integrate_over_vspace(). This will need to # change/adapt when we support 2V as well as 1V. density_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,1,ir] * vpa.wgts / sqrt(pi) - flow_integral_pieces = @views @. vpa.scratch4 = density_integral_pieces * vpa_unnorm / vthe[1,ir] - energy_integral_pieces = @views @. vpa.scratch5 = flow_integral_pieces * vpa_unnorm / vthe[1,ir] - cubic_integral_pieces = @views @. vpa.scratch6 = energy_integral_pieces * vpa_unnorm / vthe[1,ir] - quartic_integral_pieces = @views @. vpa.scratch7 = cubic_integral_pieces * vpa_unnorm / vthe[1,ir] + flow_integral_pieces = @. vpa.scratch4 = density_integral_pieces * vpa_unnorm / vthe[1,ir] + energy_integral_pieces = @. vpa.scratch5 = flow_integral_pieces * vpa_unnorm / vthe[1,ir] + cubic_integral_pieces = @. vpa.scratch6 = energy_integral_pieces * vpa_unnorm / vthe[1,ir] + quartic_integral_pieces = @. vpa.scratch7 = cubic_integral_pieces * vpa_unnorm / vthe[1,ir] function get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind) # vcut_fraction is the fraction of the distance between minus_vcut_ind-1 and @@ -2464,12 +2464,12 @@ end part1prime = -integral_pieces[minus_vcut_ind] / (vpa_unnorm[minus_vcut_ind] - vpa_unnorm[minus_vcut_ind-1]) end - part1 = sum(integral_pieces[1:minus_vcut_ind-2]) + integral_vcut_cell_part1 + part1 = sum(@view integral_pieces[1:minus_vcut_ind-2]) + integral_vcut_cell_part1 # Integral contribution from the cell containing sigma integral_sigma_cell = (0.5 * integral_pieces[sigma_ind-1] + 0.5 * integral_pieces[sigma_ind]) - part2 = sum(integral_pieces[minus_vcut_ind+1:sigma_ind-2]) + part2 = sum(@view integral_pieces[minus_vcut_ind+1:sigma_ind-2]) part2 += integral_vcut_cell_part2 + 0.5 * integral_pieces[sigma_ind-1] + sigma_fraction * integral_sigma_cell # part2prime is d(part2)/d(vcut) part2prime = -part1prime @@ -2578,10 +2578,10 @@ end # Need to recalculate these with the updated distribution function density_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,1,ir] * vpa.wgts / sqrt(pi) - flow_integral_pieces = @views @. vpa.scratch4 = density_integral_pieces * vpa_unnorm / vthe[1,ir] - energy_integral_pieces = @views @. vpa.scratch5 = flow_integral_pieces * vpa_unnorm / vthe[1,ir] - cubic_integral_pieces = @views @. vpa.scratch6 = energy_integral_pieces * vpa_unnorm / vthe[1,ir] - quartic_integral_pieces = @views @. vpa.scratch7 = cubic_integral_pieces * vpa_unnorm / vthe[1,ir] + flow_integral_pieces = @. vpa.scratch4 = density_integral_pieces * vpa_unnorm / vthe[1,ir] + energy_integral_pieces = @. vpa.scratch5 = flow_integral_pieces * vpa_unnorm / vthe[1,ir] + cubic_integral_pieces = @. vpa.scratch6 = energy_integral_pieces * vpa_unnorm / vthe[1,ir] + quartic_integral_pieces = @. vpa.scratch7 = cubic_integral_pieces * vpa_unnorm / vthe[1,ir] # Update the part2 integrals since we've applied the A and C factors _, _, _, _, a2, b2, c2, d2 = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind) @@ -2590,7 +2590,7 @@ end # Integral contribution from the cell containing sigma integral_sigma_cell = (0.5 * integral_pieces[sigma_ind-1] + 0.5 * integral_pieces[sigma_ind]) - @views part3 = sum(integral_pieces[sigma_ind+1:plus_vcut_ind+1]) + part3 = sum(@view integral_pieces[sigma_ind+1:plus_vcut_ind+1]) part3 += 0.5 * integral_pieces[sigma_ind] + (1.0 - sigma_fraction) * integral_sigma_cell return part3 @@ -2616,12 +2616,12 @@ end # v_∥^2/vth^2/(1+v_∥^2/vth^2)≈v_∥^2/vth^2≈0. correction0_integral_pieces[ivpa] = 0.0 end - correction1_integral_pieces = @views @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[1,ir] - correction2_integral_pieces = @views @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[1,ir] - correction3_integral_pieces = @views @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[1,ir] - correction4_integral_pieces = @views @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[1,ir] - correction5_integral_pieces = @views @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[1,ir] - correction6_integral_pieces = @views @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[1,ir] + correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[1,ir] + correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[1,ir] + correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[1,ir] + correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[1,ir] + correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[1,ir] + correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[1,ir] alpha = get_part3_for_one_moment_lower(correction0_integral_pieces) beta = get_part3_for_one_moment_lower(correction1_integral_pieces) @@ -2692,22 +2692,22 @@ end vpa_unnorm[sigma_ind+1:element_with_zero_boundary]) # Interpolate to the 'far from zero' points - reversed_pdf = vpa.scratch[1:first_point_near_zero-1] + reversed_pdf = @view vpa.scratch[1:first_point_near_zero-1] @views interpolate_to_grid_1d!(reversed_pdf, reversed_wpa_of_minus_vpa[vpa.n-first_point_near_zero+2:end], pdf[:,1,end,ir], vpa, vpa_spectral) reverse!(reversed_pdf) - pdf[1:first_point_near_zero-1,1,end,ir] .= reversed_pdf[1:first_point_near_zero-1] + pdf[1:first_point_near_zero-1,1,end,ir] .= reversed_pdf # Per-grid-point contributions to moment integrals # Note that we need to include the normalisation factor of 1/sqrt(pi) that # would be factored in by integrate_over_vspace(). This will need to # change/adapt when we support 2V as well as 1V. density_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,end,ir] * vpa.wgts / sqrt(pi) - flow_integral_pieces = @views @. vpa.scratch4 = density_integral_pieces * vpa_unnorm / vthe[end,ir] - energy_integral_pieces = @views @. vpa.scratch5 = flow_integral_pieces * vpa_unnorm / vthe[end,ir] - cubic_integral_pieces = @views @. vpa.scratch6 = energy_integral_pieces * vpa_unnorm / vthe[end,ir] - quartic_integral_pieces = @views @. vpa.scratch7 = cubic_integral_pieces * vpa_unnorm / vthe[end,ir] + flow_integral_pieces = @. vpa.scratch4 = density_integral_pieces * vpa_unnorm / vthe[end,ir] + energy_integral_pieces = @. vpa.scratch5 = flow_integral_pieces * vpa_unnorm / vthe[end,ir] + cubic_integral_pieces = @. vpa.scratch6 = energy_integral_pieces * vpa_unnorm / vthe[end,ir] + quartic_integral_pieces = @. vpa.scratch7 = cubic_integral_pieces * vpa_unnorm / vthe[end,ir] function get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind) # vcut_fraction is the fraction of the distance between plus_vcut_ind and @@ -2737,12 +2737,12 @@ end part1prime = -integral_pieces[plus_vcut_ind] / (vpa_unnorm[plus_vcut_ind+1] - vpa_unnorm[plus_vcut_ind]) end - part1 = sum(integral_pieces[plus_vcut_ind+2:end]) + integral_vcut_cell_part1 + part1 = sum(@view integral_pieces[plus_vcut_ind+2:end]) + integral_vcut_cell_part1 # Integral contribution from the cell containing sigma integral_sigma_cell = (0.5 * integral_pieces[sigma_ind] + 0.5 * integral_pieces[sigma_ind+1]) - part2 = sum(integral_pieces[sigma_ind+2:plus_vcut_ind-1]) + part2 = sum(@view integral_pieces[sigma_ind+2:plus_vcut_ind-1]) part2 += integral_vcut_cell_part2 + 0.5 * integral_pieces[sigma_ind+1] + sigma_fraction * integral_sigma_cell # part2prime is d(part2)/d(vcut) part2prime = -part1prime @@ -2849,10 +2849,10 @@ end # Need to recalculate these with the updated distribution function density_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,end,ir] * vpa.wgts / sqrt(pi) - flow_integral_pieces = @views @. vpa.scratch4 = density_integral_pieces * vpa_unnorm / vthe[end,ir] - energy_integral_pieces = @views @. vpa.scratch5 = flow_integral_pieces * vpa_unnorm / vthe[end,ir] - cubic_integral_pieces = @views @. vpa.scratch6 = energy_integral_pieces * vpa_unnorm / vthe[end,ir] - quartic_integral_pieces = @views @. vpa.scratch7 = cubic_integral_pieces * vpa_unnorm / vthe[end,ir] + flow_integral_pieces = @. vpa.scratch4 = density_integral_pieces * vpa_unnorm / vthe[end,ir] + energy_integral_pieces = @. vpa.scratch5 = flow_integral_pieces * vpa_unnorm / vthe[end,ir] + cubic_integral_pieces = @. vpa.scratch6 = energy_integral_pieces * vpa_unnorm / vthe[end,ir] + quartic_integral_pieces = @. vpa.scratch7 = cubic_integral_pieces * vpa_unnorm / vthe[end,ir] # Update the part2 integrals since we've applied the A and C factors _, _, _, _, a2, b2, c2, d2 = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind) @@ -2861,7 +2861,7 @@ end # Integral contribution from the cell containing sigma integral_sigma_cell = (0.5 * integral_pieces[sigma_ind] + 0.5 * integral_pieces[sigma_ind+1]) - @views part3 = sum(integral_pieces[minus_vcut_ind-1:sigma_ind-1]) + part3 = sum(@view integral_pieces[minus_vcut_ind-1:sigma_ind-1]) part3 += 0.5 * integral_pieces[sigma_ind] + (1.0 - sigma_fraction) * integral_sigma_cell return part3 @@ -2887,12 +2887,12 @@ end # v_∥^2/vth^2/(1+v_∥^2/vth^2)≈v_∥^2/vth^2≈0. correction0_integral_pieces[ivpa] = 0.0 end - correction1_integral_pieces = @views @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[end,ir] - correction2_integral_pieces = @views @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[end,ir] - correction3_integral_pieces = @views @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[end,ir] - correction4_integral_pieces = @views @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[end,ir] - correction5_integral_pieces = @views @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[end,ir] - correction6_integral_pieces = @views @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[end,ir] + correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[end,ir] + correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[end,ir] + correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[end,ir] + correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[end,ir] + correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[end,ir] + correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[end,ir] alpha = get_part3_for_one_moment_upper(correction0_integral_pieces) beta = get_part3_for_one_moment_upper(correction1_integral_pieces) From 18f5f56fc482c03ba579bf25cc62b759a23637d9 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 21 Nov 2024 15:31:34 +0000 Subject: [PATCH 33/43] Improve type stability in enforce_boundary_condition_on_electron_pdf!() --- moment_kinetics/src/boundary_conditions.jl | 3 +- .../src/electron_kinetic_equation.jl | 168 +++++++++--------- moment_kinetics/src/initial_conditions.jl | 18 +- 3 files changed, 99 insertions(+), 90 deletions(-) diff --git a/moment_kinetics/src/boundary_conditions.jl b/moment_kinetics/src/boundary_conditions.jl index 2bf467e74..a6fcbdc89 100644 --- a/moment_kinetics/src/boundary_conditions.jl +++ b/moment_kinetics/src/boundary_conditions.jl @@ -1029,7 +1029,7 @@ function enforce_v_boundary_condition_local!(f, bc, speed, v_diffusion, v, v_spe D0 = v_spectral.lobatto.Dmat[end,:] # adjust F(vpa = L/2) so that d F / d vpa = 0 at vpa = L/2 - f[end] = -sum(D0[1:ngrid-1].*f[end-v.ngrid+1:end-1])/D0[v.ngrid] + f[end] = -sum(D0[1:v.ngrid-1].*f[end-v.ngrid+1:end-1])/D0[v.ngrid] elseif bc == "periodic" f[1] = 0.5*(f[1]+f[end]) f[end] = f[1] @@ -1038,6 +1038,7 @@ function enforce_v_boundary_condition_local!(f, bc, speed, v_diffusion, v, v_spe else error("Unsupported boundary condition option '$bc' for $(v.name)") end + return nothing end """ diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index d6b2c1a4a..b2eff367a 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -404,8 +404,8 @@ function update_electron_pdf_with_time_advance!(scratch, pdf, moments, phi, coll moments.electron.dens, composition) end - apply_electron_bc_and_constraints!(scratch[istage+1], phi, moments, z, vperp, - vpa, vperp_spectral, vpa_spectral, + apply_electron_bc_and_constraints!(scratch[istage+1], phi, moments, r, z, + vperp, vpa, vperp_spectral, vpa_spectral, vpa_advect, num_diss_params, composition) latest_pdf = scratch[istage+1].pdf_electron @@ -1424,7 +1424,7 @@ global_rank[] == 0 && println("recalculating precon") moments.electron.upar[:,ir], z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_advect, moments, num_diss_params.electron.vpa_dissipation_coefficient > 0.0, - composition.me_over_mi; bc_constraints=false, + composition.me_over_mi, ir; bc_constraints=false, update_vcut=!krylov) if evolve_ppar @@ -2068,7 +2068,7 @@ function speedup_hack!(fvec_out, fvec_in, z_speedup_fac, z, vpa; evolve_ppar=fal return nothing end -function apply_electron_bc_and_constraints!(this_scratch, phi, moments, z, vperp, vpa, +function apply_electron_bc_and_constraints!(this_scratch, phi, moments, r, z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_advect, num_diss_params, composition) latest_pdf = this_scratch.pdf_electron @@ -2078,13 +2078,15 @@ function apply_electron_bc_and_constraints!(this_scratch, phi, moments, z, vperp latest_pdf[ivpa,ivperp,iz,ir] = max(latest_pdf[ivpa,ivperp,iz,ir], 0.0) end - # enforce the boundary condition(s) on the electron pdf - enforce_boundary_condition_on_electron_pdf!(latest_pdf, phi, moments.electron.vth, - moments.electron.upar, z, vperp, vpa, - vperp_spectral, vpa_spectral, vpa_advect, - moments, - num_diss_params.electron.vpa_dissipation_coefficient > 0.0, - composition.me_over_mi) + for ir ∈ 1:r.n + # enforce the boundary condition(s) on the electron pdf + @views enforce_boundary_condition_on_electron_pdf!( + latest_pdf[:,:,:,ir], phi[:,ir], moments.electron.vth[:,ir], + moments.electron.upar[:,ir], z, vperp, vpa, vperp_spectral, + vpa_spectral, vpa_advect, moments, + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + composition.me_over_mi, ir) + end begin_r_z_region() A = moments.electron.constraints_A_coefficient @@ -2118,7 +2120,7 @@ function apply_electron_bc_and_constraints_no_r!(f_electron, phi, moments, z, vp f_electron, phi, moments.electron.vth[:,ir], moments.electron.upar[:,ir], z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_advect, moments, num_diss_params.electron.vpa_dissipation_coefficient > 0.0, - composition.me_over_mi) + composition.me_over_mi, ir) begin_z_region() A = moments.electron.constraints_A_coefficient @@ -2297,7 +2299,7 @@ end @timeit global_timer enforce_boundary_condition_on_electron_pdf!( pdf, phi, vthe, upar, z, vperp, vpa, vperp_spectral, - vpa_spectral, vpa_adv, moments, vpa_diffusion, me_over_mi; + vpa_spectral, vpa_adv, moments, vpa_diffusion, me_over_mi, ir; bc_constraints=true, update_vcut=true) = begin @boundscheck bc_constraints && !update_vcut && error("update_vcut is not used when bc_constraints=true, but update_vcut has non-default value") @@ -2306,18 +2308,18 @@ end # Enforce velocity-space boundary conditions if vpa.n > 1 - begin_r_z_vperp_region() - @loop_r_z_vperp ir iz ivperp begin + begin_z_vperp_region() + @loop_z_vperp iz ivperp begin # enforce the vpa BC # use that adv.speed independent of vpa - @views enforce_v_boundary_condition_local!(pdf[:,ivperp,iz,ir], vpa.bc, + @views enforce_v_boundary_condition_local!(pdf[:,ivperp,iz], vpa.bc, vpa_adv[1].speed[:,ivperp,iz,ir], vpa_diffusion, vpa, vpa_spectral) end end if vperp.n > 1 - begin_r_z_vpa_region() - enforce_vperp_boundary_condition!(pdf, vperp.bc, vperp, vperp_spectral) + begin_z_vpa_region() + enforce_vperp_boundary_condition!(pdf, vperp.bc, vperp, vperp_spectral, ir) end if z.bc == "periodic" @@ -2326,21 +2328,25 @@ end elseif z.bc == "constant" begin_r_vperp_vpa_region() density_offset = 1.0 - vwidth = 1.0/sqrt(composition.me_over_mi) + vwidth = 1.0/sqrt(me_over_mi) dens = moments.electron.dens if z.irank == 0 - speed = z_adv[1].speed @loop_r_vperp_vpa ir ivperp ivpa begin - if speed[1,ivpa,ivperp,ir] > 0.0 - pdf[ivpa,ivperp,1,ir,is] = density_offset / dens[1,ir] * vthe[1,ir] * exp(-(speed[1,ivpa,ivperp,ir]^2 + vperp.grid[ivperp]^2)/vwidth^2) + u = moments.electron.upar[1,ir] + vthe = moments.electron.vth[1,ir] + speed = vpa.grid[ivpa] * vthe + u + if speed > 0.0 + pdf[ivpa,ivperp,1,ir] = density_offset / dens[1,ir] * vthe[1,ir] * exp(-(speed^2 + vperp.grid[ivperp]^2)/vwidth^2) end end end if z.irank == z.nrank - 1 - speed = z_adv[is].speed @loop_r_vperp_vpa ir ivperp ivpa begin - if speed[end,ivpa,ivperp,ir] > 0.0 - pdf[ivpa,ivperp,end,ir,is] = density_offset / dens[end,ir] * vthe[end,ir] * exp(-(speed[end,ivpa,ivperp,ir]^2 + vperp.grid[ivperp]^2)/vwidth^2) + u = moments.electron.upar[end,ir] + vthe = moments.electron.vth[end,ir] + speed = vpa.grid[ivpa] * vthe + u + if speed > 0.0 + pdf[ivpa,ivperp,end,ir] = density_offset / dens[end,ir] * vthe[end,ir] * exp(-(speed^2 + vperp.grid[ivperp]^2)/vwidth^2) end end end @@ -2429,16 +2435,16 @@ end # Note that we need to include the normalisation factor of 1/sqrt(pi) that # would be factored in by integrate_over_vspace(). This will need to # change/adapt when we support 2V as well as 1V. - density_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,1,ir] * vpa.wgts / sqrt(pi) - flow_integral_pieces = @. vpa.scratch4 = density_integral_pieces * vpa_unnorm / vthe[1,ir] - energy_integral_pieces = @. vpa.scratch5 = flow_integral_pieces * vpa_unnorm / vthe[1,ir] - cubic_integral_pieces = @. vpa.scratch6 = energy_integral_pieces * vpa_unnorm / vthe[1,ir] - quartic_integral_pieces = @. vpa.scratch7 = cubic_integral_pieces * vpa_unnorm / vthe[1,ir] + density_integral_pieces_lowerz = @views @. vpa.scratch3 = pdf[:,1,1,ir] * vpa.wgts / sqrt(pi) + flow_integral_pieces_lowerz = @. vpa.scratch4 = density_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir] + energy_integral_pieces_lowerz = @. vpa.scratch5 = flow_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir] + cubic_integral_pieces_lowerz = @. vpa.scratch6 = energy_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir] + quartic_integral_pieces_lowerz = @. vpa.scratch7 = cubic_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir] function get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind) # vcut_fraction is the fraction of the distance between minus_vcut_ind-1 and # minus_vcut_ind where -vcut is. - vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm) + local vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm) function get_for_one_moment(integral_pieces) # Integral contributions from the cell containing vcut. @@ -2476,18 +2482,18 @@ end return part1, part1prime, part2, part2prime end - a1, a1prime, a2, a2prime = get_for_one_moment(density_integral_pieces) - b1, b1prime, b2, _ = get_for_one_moment(flow_integral_pieces) - c1, c1prime, c2, c2prime = get_for_one_moment(energy_integral_pieces) - d1, d1prime, d2, _ = get_for_one_moment(cubic_integral_pieces) - e1, e1prime, e2, e2prime = get_for_one_moment(quartic_integral_pieces) - - return get_residual_and_coefficients_for_bc(a1, a1prime, a2, a2prime, b1, - b1prime, c1, c1prime, c2, - c2prime, d1, d1prime, e1, - e1prime, e2, e2prime, - u_over_vt)..., - a2, b2, c2, d2 + this_a1, this_a1prime, this_a2, this_a2prime = get_for_one_moment(density_integral_pieces_lowerz) + this_b1, this_b1prime, this_b2, _ = get_for_one_moment(flow_integral_pieces_lowerz) + this_c1, this_c1prime, this_c2, this_c2prime = get_for_one_moment(energy_integral_pieces_lowerz) + this_d1, this_d1prime, this_d2, _ = get_for_one_moment(cubic_integral_pieces_lowerz) + this_e1, this_e1prime, this_e2, this_e2prime = get_for_one_moment(quartic_integral_pieces_lowerz) + + return get_residual_and_coefficients_for_bc( + this_a1, this_a1prime, this_a2, this_a2prime, this_b1, + this_b1prime, this_c1, this_c1prime, this_c2, this_c2prime, + this_d1, this_d1prime, this_e1, this_e1prime, this_e2, + this_e2prime, u_over_vt)..., + this_a2, this_b2, this_c2, this_d2 end counter = 1 @@ -2577,11 +2583,11 @@ end # interpolation. # Need to recalculate these with the updated distribution function - density_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,1,ir] * vpa.wgts / sqrt(pi) - flow_integral_pieces = @. vpa.scratch4 = density_integral_pieces * vpa_unnorm / vthe[1,ir] - energy_integral_pieces = @. vpa.scratch5 = flow_integral_pieces * vpa_unnorm / vthe[1,ir] - cubic_integral_pieces = @. vpa.scratch6 = energy_integral_pieces * vpa_unnorm / vthe[1,ir] - quartic_integral_pieces = @. vpa.scratch7 = cubic_integral_pieces * vpa_unnorm / vthe[1,ir] + @views @. density_integral_pieces_lowerz = pdf[:,1,1,ir] * vpa.wgts / sqrt(pi) + @. flow_integral_pieces_lowerz = density_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir] + @. energy_integral_pieces_lowerz = flow_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir] + @. cubic_integral_pieces_lowerz = energy_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir] + @. quartic_integral_pieces_lowerz = cubic_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir] # Update the part2 integrals since we've applied the A and C factors _, _, _, _, a2, b2, c2, d2 = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind) @@ -2595,10 +2601,10 @@ end return part3 end - a3 = get_part3_for_one_moment_lower(density_integral_pieces) - b3 = get_part3_for_one_moment_lower(flow_integral_pieces) - c3 = get_part3_for_one_moment_lower(energy_integral_pieces) - d3 = get_part3_for_one_moment_lower(cubic_integral_pieces) + a3 = get_part3_for_one_moment_lower(density_integral_pieces_lowerz) + b3 = get_part3_for_one_moment_lower(flow_integral_pieces_lowerz) + c3 = get_part3_for_one_moment_lower(energy_integral_pieces_lowerz) + d3 = get_part3_for_one_moment_lower(cubic_integral_pieces_lowerz) # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is. sharpness = 4.0 @@ -2703,16 +2709,16 @@ end # Note that we need to include the normalisation factor of 1/sqrt(pi) that # would be factored in by integrate_over_vspace(). This will need to # change/adapt when we support 2V as well as 1V. - density_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,end,ir] * vpa.wgts / sqrt(pi) - flow_integral_pieces = @. vpa.scratch4 = density_integral_pieces * vpa_unnorm / vthe[end,ir] - energy_integral_pieces = @. vpa.scratch5 = flow_integral_pieces * vpa_unnorm / vthe[end,ir] - cubic_integral_pieces = @. vpa.scratch6 = energy_integral_pieces * vpa_unnorm / vthe[end,ir] - quartic_integral_pieces = @. vpa.scratch7 = cubic_integral_pieces * vpa_unnorm / vthe[end,ir] + density_integral_pieces_upperz = @views @. vpa.scratch3 = pdf[:,1,end,ir] * vpa.wgts / sqrt(pi) + flow_integral_pieces_upperz = @. vpa.scratch4 = density_integral_pieces_upperz * vpa_unnorm / vthe[end,ir] + energy_integral_pieces_upperz = @. vpa.scratch5 = flow_integral_pieces_upperz * vpa_unnorm / vthe[end,ir] + cubic_integral_pieces_upperz = @. vpa.scratch6 = energy_integral_pieces_upperz * vpa_unnorm / vthe[end,ir] + quartic_integral_pieces_upperz = @. vpa.scratch7 = cubic_integral_pieces_upperz * vpa_unnorm / vthe[end,ir] function get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind) # vcut_fraction is the fraction of the distance between plus_vcut_ind and # plus_vcut_ind+1 where vcut is. - vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm) + local vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm) function get_for_one_moment(integral_pieces) # Integral contribution from the cell containing vcut @@ -2749,18 +2755,18 @@ end return part1, part1prime, part2, part2prime end - a1, a1prime, a2, a2prime = get_for_one_moment(density_integral_pieces) - b1, b1prime, b2, _ = get_for_one_moment(flow_integral_pieces) - c1, c1prime, c2, c2prime = get_for_one_moment(energy_integral_pieces) - d1, d1prime, d2, _ = get_for_one_moment(cubic_integral_pieces) - e1, e1prime, e2, e2prime = get_for_one_moment(quartic_integral_pieces) - - return get_residual_and_coefficients_for_bc(a1, a1prime, a2, a2prime, b1, - b1prime, c1, c1prime, c2, - c2prime, d1, d1prime, e1, - e1prime, e2, e2prime, - u_over_vt)..., - a2, b2, c2, d2 + this_a1, this_a1prime, this_a2, this_a2prime = get_for_one_moment(density_integral_pieces_upperz) + this_b1, this_b1prime, this_b2, _ = get_for_one_moment(flow_integral_pieces_upperz) + this_c1, this_c1prime, this_c2, this_c2prime = get_for_one_moment(energy_integral_pieces_upperz) + this_d1, this_d1prime, this_d2, _ = get_for_one_moment(cubic_integral_pieces_upperz) + this_e1, this_e1prime, this_e2, this_e2prime = get_for_one_moment(quartic_integral_pieces_upperz) + + return get_residual_and_coefficients_for_bc( + this_a1, this_a1prime, this_a2, this_a2prime, this_b1, + this_b1prime, this_c1, this_c1prime, this_c2, this_c2prime, + this_d1, this_d1prime, this_e1, this_e1prime, this_e2, + this_e2prime, u_over_vt)..., + this_a2, this_b2, this_c2, this_d2 end counter = 1 @@ -2848,11 +2854,11 @@ end # interpolation. # Need to recalculate these with the updated distribution function - density_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,end,ir] * vpa.wgts / sqrt(pi) - flow_integral_pieces = @. vpa.scratch4 = density_integral_pieces * vpa_unnorm / vthe[end,ir] - energy_integral_pieces = @. vpa.scratch5 = flow_integral_pieces * vpa_unnorm / vthe[end,ir] - cubic_integral_pieces = @. vpa.scratch6 = energy_integral_pieces * vpa_unnorm / vthe[end,ir] - quartic_integral_pieces = @. vpa.scratch7 = cubic_integral_pieces * vpa_unnorm / vthe[end,ir] + @views @. density_integral_pieces_upperz = pdf[:,1,end,ir] * vpa.wgts / sqrt(pi) + @. flow_integral_pieces_upperz = density_integral_pieces_upperz * vpa_unnorm / vthe[end,ir] + @. energy_integral_pieces_upperz = flow_integral_pieces_upperz * vpa_unnorm / vthe[end,ir] + @. cubic_integral_pieces_upperz = energy_integral_pieces_upperz * vpa_unnorm / vthe[end,ir] + @. quartic_integral_pieces_upperz = cubic_integral_pieces_upperz * vpa_unnorm / vthe[end,ir] # Update the part2 integrals since we've applied the A and C factors _, _, _, _, a2, b2, c2, d2 = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind) @@ -2866,10 +2872,10 @@ end return part3 end - a3 = get_part3_for_one_moment_upper(density_integral_pieces) - b3 = get_part3_for_one_moment_upper(flow_integral_pieces) - c3 = get_part3_for_one_moment_upper(energy_integral_pieces) - d3 = get_part3_for_one_moment_upper(cubic_integral_pieces) + a3 = get_part3_for_one_moment_upper(density_integral_pieces_upperz) + b3 = get_part3_for_one_moment_upper(flow_integral_pieces_upperz) + c3 = get_part3_for_one_moment_upper(energy_integral_pieces_upperz) + d3 = get_part3_for_one_moment_upper(cubic_integral_pieces_upperz) # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is. sharpness = 4.0 @@ -3008,8 +3014,8 @@ appropriate. update_electron_vth_temperature!(moments, scratch[2].electron_ppar, moments.electron.dens, composition) end - apply_electron_bc_and_constraints!(scratch[t_params.n_rk_stages+1], phi, moments, z, - vperp, vpa, vperp_spectral, vpa_spectral, + apply_electron_bc_and_constraints!(scratch[t_params.n_rk_stages+1], phi, moments, r, + z, vperp, vpa, vperp_spectral, vpa_spectral, vpa_advect, num_diss_params, composition) if evolve_ppar # Reset vth in the `moments` struct to the result consistent with full-accuracy RK diff --git a/moment_kinetics/src/initial_conditions.jl b/moment_kinetics/src/initial_conditions.jl index 3102e137c..522710641 100644 --- a/moment_kinetics/src/initial_conditions.jl +++ b/moment_kinetics/src/initial_conditions.jl @@ -339,7 +339,7 @@ function initialize_electrons!(pdf, moments, fields, geometry, composition, r, z end init_electron_pdf_over_density_and_boundary_phi!( pdf.electron.norm, fields.phi, moments.electron.dens, moments.electron.upar, - moments.electron.vth, z, vpa, vperp, vperp_spectral, vpa_spectral, + moments.electron.vth, r, z, vpa, vperp, vperp_spectral, vpa_spectral, [(speed=speed,)], moments, num_diss_params, composition.me_over_mi, scratch_dummy) end @@ -1564,8 +1564,8 @@ care is taken to ensure that the parallel boundary condition is satisfied; NB: as the electron pdf is obtained via a time-independent equation, this 'initital' value for the electron will just be the first guess in an iterative solution """ -function init_electron_pdf_over_density_and_boundary_phi!(pdf, phi, density, upar, vth, z, - vpa, vperp, vperp_spectral, vpa_spectral, vpa_advect, moments, num_diss_params, +function init_electron_pdf_over_density_and_boundary_phi!(pdf, phi, density, upar, vth, r, + z, vpa, vperp, vperp_spectral, vpa_spectral, vpa_advect, moments, num_diss_params, me_over_mi, scratch_dummy; restart_from_boltzmann=false) if z.bc == "wall" @@ -1581,11 +1581,13 @@ function init_electron_pdf_over_density_and_boundary_phi!(pdf, phi, density, upa end # Apply the sheath boundary condition to get cut-off boundary distribution # functions and boundary values of phi - enforce_boundary_condition_on_electron_pdf!(pdf, phi, vth, upar, z, vperp, vpa, - vperp_spectral, vpa_spectral, - vpa_advect, moments, - num_diss_params.electron.vpa_dissipation_coefficient > 0.0, - me_over_mi) + for ir ∈ 1:r.n + @views enforce_boundary_condition_on_electron_pdf!( + pdf[:,:,:,ir], phi[:,ir], vth[:,ir], upar[:,ir], z, vperp, vpa, + vperp_spectral, vpa_spectral, vpa_advect, moments, + num_diss_params.electron.vpa_dissipation_coefficient > 0.0, + me_over_mi, ir) + end # Distribute the z-boundary pdf values to every process begin_serial_region() From 3f3d7b553d0bb59fe6b22c804c6a0b6e9ef99378 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 21 Nov 2024 21:02:15 +0000 Subject: [PATCH 34/43] Improve type stability in electron_backward_euler!() Several small updates. In particular need to be careful with variables that are captured by a locally-defined function like `residual_func!()`. --- .../src/electron_kinetic_equation.jl | 39 +++++++++---------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index b2eff367a..e7d41d820 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1029,10 +1029,6 @@ global_rank[] == 0 && println("recalculating precon") qpar = @view moments.electron.qpar[:,ir] # Reconstruct w_∥^3 moment of g_e from already-calculated qpar - buffer_1 = @view scratch_dummy.buffer_rs_1[ir,1] - buffer_2 = @view scratch_dummy.buffer_rs_2[ir,1] - buffer_3 = @view scratch_dummy.buffer_rs_3[ir,1] - buffer_4 = @view scratch_dummy.buffer_rs_4[ir,1] third_moment = scratch_dummy.buffer_z_1 dthird_moment_dz = scratch_dummy.buffer_z_2 begin_z_region() @@ -1525,22 +1521,20 @@ global_rank[] == 0 && println("recalculating precon") v_unnorm = vpa.scratch zero = 1.0e-14 if z.irank == 0 - iz = 1 - v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[iz,ir], - moments.electron.upar[iz,ir], true, true) + v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[1,ir], + moments.electron.upar[1,ir], true, true) @loop_vperp_vpa ivperp ivpa begin if v_unnorm[ivpa] > -zero - f_electron_residual[ivpa,ivperp,iz] = 0.0 + f_electron_residual[ivpa,ivperp,1] = 0.0 end end end if z.irank == z.nrank - 1 - iz = z.n - v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[iz,ir], - moments.electron.upar[iz,ir], true, true) + v_unnorm .= vpagrid_to_dzdt(vpa.grid, moments.electron.vth[end,ir], + moments.electron.upar[end,ir], true, true) @loop_vperp_vpa ivperp ivpa begin if v_unnorm[ivpa] < zero - f_electron_residual[ivpa,ivperp,iz] = 0.0 + f_electron_residual[ivpa,ivperp,end] = 0.0 end end end @@ -1630,15 +1624,18 @@ global_rank[] == 0 && println("recalculating precon") nl_solver_params.solves_since_precon_update[] = nl_solver_params.preconditioner_update_interval # Swap old_scratch and new_scratch so that the next step restarts from the - # same state - scratch[1] = new_scratch - scratch[t_params.n_rk_stages+1] = old_scratch - old_scratch = scratch[1] - new_scratch = scratch[t_params.n_rk_stages+1] - f_electron_old = @view old_scratch.pdf_electron[:,:,:,ir] - f_electron_new = @view new_scratch.pdf_electron[:,:,:,ir] - electron_ppar_old = @view old_scratch.electron_ppar[:,ir] - electron_ppar_new = @view new_scratch.electron_ppar[:,ir] + # same state. Copy values over here rather than just swapping references + # to arrays, because f_electron_old and electron_ppar_old are captured by + # residual_func!() above, so any change in the things they refer to will + # cause type instability in residual_func!(). + begin_z_vperp_vpa_region() + @loop_z_vperp_vpa iz ivperp ivpa begin + f_electron_new[ivpa,ivperp,iz] = f_electron_old[ivpa,ivperp,iz] + end + begin_z_region() + @loop_z iz begin + electron_ppar_new[iz] = electron_ppar_old[iz] + end end apply_electron_bc_and_constraints_no_r!(f_electron_new, phi, moments, z, From 3184e4aea5ffbd1695ca4ba8a0665ad61c996e1c Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 25 Nov 2024 11:11:55 +0000 Subject: [PATCH 35/43] Make maximum number of pseudotimesteps and maximum pseudotime settable Input parameters that control maximum number of pseudotimesteps and maximum total pseudotime for each kinetic electron pseudotimestepping loop. --- moment_kinetics/src/initial_conditions.jl | 2 +- moment_kinetics/src/input_structs.jl | 2 ++ moment_kinetics/src/moment_kinetics_input.jl | 2 ++ moment_kinetics/src/time_advance.jl | 22 +++++++++++++++----- 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/moment_kinetics/src/initial_conditions.jl b/moment_kinetics/src/initial_conditions.jl index 522710641..df191e544 100644 --- a/moment_kinetics/src/initial_conditions.jl +++ b/moment_kinetics/src/initial_conditions.jl @@ -668,7 +668,7 @@ function initialize_electron_pdf!(scratch, scratch_electron, pdf, moments, field ##max_electron_pdf_iterations = 10000 #max_electron_sim_time = nothing max_electron_pdf_iterations = nothing - max_electron_sim_time = 2.0 + max_electron_sim_time = max(2.0, t_params.electron.max_pseudotime) if t_params.electron.debug_io !== nothing io_electron = setup_electron_io(t_params.electron.debug_io[1], vpa, vperp, z, r, composition, collisions, diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index 2bf4cf57c..682830365 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -86,6 +86,8 @@ struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero decrease_dt_iteration_threshold::mk_int increase_dt_iteration_threshold::mk_int cap_factor_ion_dt::mk_float + max_pseudotimesteps::mk_int + max_pseudotime::mk_float write_after_fixed_step_count::Bool error_sum_zero::Terrorsum split_operators::Bool diff --git a/moment_kinetics/src/moment_kinetics_input.jl b/moment_kinetics/src/moment_kinetics_input.jl index 18e78aeb6..e1008bf0e 100644 --- a/moment_kinetics/src/moment_kinetics_input.jl +++ b/moment_kinetics/src/moment_kinetics_input.jl @@ -210,6 +210,8 @@ function mk_input(input_dict=OptionsDict(); save_inputs_to_txt=false, ignore_MPI decrease_dt_iteration_threshold=100, increase_dt_iteration_threshold=20, cap_factor_ion_dt=10.0, + max_pseudotimesteps=1000, + max_pseudotime=1.0e-2, no_restart=false, debug_io=false, ) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index f3592dc80..0cd59185d 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -429,6 +429,8 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, decrease_dt_iteration_threshold = t_input["decrease_dt_iteration_threshold"] increase_dt_iteration_threshold = t_input["increase_dt_iteration_threshold"] cap_factor_ion_dt = mk_float(t_input["cap_factor_ion_dt"]) + max_pseudotimesteps = t_input["max_pseudotimesteps"] + max_pseudotime = t_input["max_pseudotime"] electron_t_params = nothing elseif electron === false debug_io = nothing @@ -437,6 +439,8 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, decrease_dt_iteration_threshold = -1 increase_dt_iteration_threshold = typemax(mk_int) cap_factor_ion_dt = Inf + max_pseudotimesteps = -1 + max_pseudotime = Inf electron_t_params = nothing else debug_io = nothing @@ -470,6 +474,8 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, decrease_dt_iteration_threshold = -1 increase_dt_iteration_threshold = typemax(mk_int) cap_factor_ion_dt = Inf + max_pseudotimesteps = -1 + max_pseudotime = Inf electron_t_params = electron end return time_info(n_variables, t_input["nstep"], end_time, t, dt, previous_dt, @@ -495,7 +501,8 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, electron_preconditioner_type, mk_float(t_input["constraint_forcing_rate"]), decrease_dt_iteration_threshold, increase_dt_iteration_threshold, - mk_float(cap_factor_ion_dt), t_input["write_after_fixed_step_count"], + mk_float(cap_factor_ion_dt), mk_int(max_pseudotimesteps), + mk_float(max_pseudotime), t_input["write_after_fixed_step_count"], error_sum_zero, t_input["split_operators"], t_input["steady_state_residual"], mk_float(t_input["converged_residual_value"]), @@ -2962,8 +2969,13 @@ end n_rk_stages = t_params.n_rk_stages - max_electron_pdf_iterations = 1000 - max_electron_sim_time = 1.0e-3 + if t_params.electron !== nothing + max_electron_pdf_iterations = t_params.electron.max_pseudotimesteps + max_electron_sim_time = t_params.electron.max_pseudotime + else + max_electron_pdf_iterations = nothing + max_electron_sim_time = nothing + end first_scratch = scratch[1] @loop_s_r_z_vperp_vpa is ir iz ivperp ivpa begin @@ -3556,8 +3568,8 @@ end t_params.electron, t_params.dt[], nl_solver_params.electron_advance) elseif t_params.implicit_electron_ppar - max_electron_pdf_iterations = 1000 - max_electron_sim_time = 1.0e-3 + max_electron_pdf_iterations = t_params.electron.max_pseudotimesteps + max_electron_sim_time = t_params.electron.max_pseudotime electron_success = update_electron_pdf!(scratch_electron, pdf.electron.norm, moments, fields.phi, r, z, vperp, vpa, z_spectral, vperp_spectral, vpa_spectral, From cc9f967161974d04b059e2d49be10ac12515a776 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 25 Nov 2024 21:27:30 +0000 Subject: [PATCH 36/43] Reset timers at beginning of run_moment_kinetics() Helps if something didn't previously clean up the timers. --- moment_kinetics/src/moment_kinetics.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/moment_kinetics/src/moment_kinetics.jl b/moment_kinetics/src/moment_kinetics.jl index 5d5e9d9f5..10cdbbfca 100644 --- a/moment_kinetics/src/moment_kinetics.jl +++ b/moment_kinetics/src/moment_kinetics.jl @@ -123,6 +123,9 @@ function run_moment_kinetics(input_dict::OptionsDict; restart=false, restart_tim check_so_newer_than_code() end + # Reset timers in case there was a previous run which did not clean them up. + reset_mk_timers!() + mk_state = nothing try @timeit global_timer "moment_kinetics" begin From 63d8808a88183d2c1da76b6d68c5bb9b64d881a8 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 27 Nov 2024 11:14:18 +0000 Subject: [PATCH 37/43] Remove loop over ir in enforce_boundary_condition_on_electron_pdf!() This boundary condition function should act only at one `ir`, which is passed as an argument, so there should be no loop over `ir`. This bug has not impacted simulations so far because we have only used r.n=0 for kinetic electrons so far. --- .../src/electron_kinetic_equation.jl | 194 +++++++++--------- 1 file changed, 97 insertions(+), 97 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index e7d41d820..fad344e69 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1408,7 +1408,7 @@ global_rank[] == 0 && println("recalculating precon") @loop_z iz begin # update the electron thermal speed using the updated electron # parallel pressure - this_vth[iz,ir] = sqrt(abs(2.0 * electron_ppar_newvar[iz,ir] / + this_vth[iz,ir] = sqrt(abs(2.0 * electron_ppar_newvar[iz] / (this_dens[iz,ir] * composition.me_over_mi))) end @@ -2328,22 +2328,22 @@ end vwidth = 1.0/sqrt(me_over_mi) dens = moments.electron.dens if z.irank == 0 - @loop_r_vperp_vpa ir ivperp ivpa begin - u = moments.electron.upar[1,ir] - vthe = moments.electron.vth[1,ir] + @loop_vperp_vpa ivperp ivpa begin + u = moments.electron.upar[1] + vthe = moments.electron.vth[1] speed = vpa.grid[ivpa] * vthe + u if speed > 0.0 - pdf[ivpa,ivperp,1,ir] = density_offset / dens[1,ir] * vthe[1,ir] * exp(-(speed^2 + vperp.grid[ivperp]^2)/vwidth^2) + pdf[ivpa,ivperp,1] = density_offset / dens[1] * vthe[1] * exp(-(speed^2 + vperp.grid[ivperp]^2)/vwidth^2) end end end if z.irank == z.nrank - 1 - @loop_r_vperp_vpa ir ivperp ivpa begin - u = moments.electron.upar[end,ir] - vthe = moments.electron.vth[end,ir] + @loop_vperp_vpa ivperp ivpa begin + u = moments.electron.upar[end] + vthe = moments.electron.vth[end] speed = vpa.grid[ivpa] * vthe + u if speed > 0.0 - pdf[ivpa,ivperp,end,ir] = density_offset / dens[end,ir] * vthe[end,ir] * exp(-(speed^2 + vperp.grid[ivperp]^2)/vwidth^2) + pdf[ivpa,ivperp,end] = density_offset / dens[end] * vthe[end] * exp(-(speed^2 + vperp.grid[ivperp]^2)/vwidth^2) end end end @@ -2358,7 +2358,7 @@ end # the electrostatic potential at the boundary, which determines the critical speed, is unknown a priori; # use the constraint that the first moment of the normalised pdf be zero to choose the potential. - begin_r_region() + begin_serial_region() newton_max_its = 100 @@ -2395,11 +2395,12 @@ end return epsilon, epsilonprime, A, C end - if z.irank == 0 - if z.bc != "wall" - error("Options other than wall, constant or z-periodic bc not implemented yet for electrons") - end - @loop_r ir begin + @serial_region begin + if z.irank == 0 + if z.bc != "wall" + error("Options other than wall, constant or z-periodic bc not implemented yet for electrons") + end + # Impose sheath-edge boundary condition, while also imposing moment # constraints and determining the cut-off velocity (and therefore the sheath # potential). @@ -2415,28 +2416,28 @@ end # 'far from zero' means larger values of v_∥. # Interpolate to the 'near zero' points - @views interpolate_symmetric!(pdf[sigma_ind:last_point_near_zero,1,1,ir], + @views interpolate_symmetric!(pdf[sigma_ind:last_point_near_zero,1,1], vpa_unnorm[sigma_ind:last_point_near_zero], - pdf[element_with_zero_boundary:sigma_ind-1,1,1,ir], + pdf[element_with_zero_boundary:sigma_ind-1,1,1], vpa_unnorm[element_with_zero_boundary:sigma_ind-1]) # Interpolate to the 'far from zero' points reversed_pdf_far_from_zero = @view vpa.scratch[last_point_near_zero+1:end] @views interpolate_to_grid_1d!(reversed_pdf_far_from_zero, reversed_wpa_of_minus_vpa[1:vpa.n-last_point_near_zero], - pdf[:,1,1,ir], vpa, vpa_spectral) + pdf[:,1,1], vpa, vpa_spectral) reverse!(reversed_pdf_far_from_zero) - pdf[last_point_near_zero+1:end,1,1,ir] .= reversed_pdf_far_from_zero + pdf[last_point_near_zero+1:end,1,1] .= reversed_pdf_far_from_zero # Per-grid-point contributions to moment integrals # Note that we need to include the normalisation factor of 1/sqrt(pi) that # would be factored in by integrate_over_vspace(). This will need to # change/adapt when we support 2V as well as 1V. - density_integral_pieces_lowerz = @views @. vpa.scratch3 = pdf[:,1,1,ir] * vpa.wgts / sqrt(pi) - flow_integral_pieces_lowerz = @. vpa.scratch4 = density_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir] - energy_integral_pieces_lowerz = @. vpa.scratch5 = flow_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir] - cubic_integral_pieces_lowerz = @. vpa.scratch6 = energy_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir] - quartic_integral_pieces_lowerz = @. vpa.scratch7 = cubic_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir] + density_integral_pieces_lowerz = @views @. vpa.scratch3 = pdf[:,1,1] * vpa.wgts / sqrt(pi) + flow_integral_pieces_lowerz = @. vpa.scratch4 = density_integral_pieces_lowerz * vpa_unnorm / vthe[1] + energy_integral_pieces_lowerz = @. vpa.scratch5 = flow_integral_pieces_lowerz * vpa_unnorm / vthe[1] + cubic_integral_pieces_lowerz = @. vpa.scratch6 = energy_integral_pieces_lowerz * vpa_unnorm / vthe[1] + quartic_integral_pieces_lowerz = @. vpa.scratch7 = cubic_integral_pieces_lowerz * vpa_unnorm / vthe[1] function get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind) # vcut_fraction is the fraction of the distance between minus_vcut_ind-1 and @@ -2504,17 +2505,17 @@ end # respect to vcut delta_v = - epsilon / epsilonprime - if vcut > vthe[1,ir] && epsilonprime < 0.0 + if vcut > vthe[1] && epsilonprime < 0.0 # epsilon should be increasing with vcut at epsilon=0, so if # epsilonprime is negative, the solution is actually at a lower vcut - # at larger vcut, epsilon will just tend to 0 but never reach it. - delta_v = -0.1 * vthe[1,ir] + delta_v = -0.1 * vthe[1] end # Prevent the step size from getting too big, to make Newton iteration # more robust. - delta_v = min(delta_v, 0.1 * vthe[1,ir]) - delta_v = max(delta_v, -0.1 * vthe[1,ir]) + delta_v = min(delta_v, 0.1 * vthe[1]) + delta_v = max(delta_v, -0.1 * vthe[1]) vcut = vcut + delta_v minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut) @@ -2552,22 +2553,22 @@ end # Adjust pdf so that after reflecting and cutting off tail, it will obey the # constraints. - @. pdf[:,1,1,ir] *= A + C * vpa_unnorm^2 / vthe[1,ir]^2 + @. pdf[:,1,1] *= A + C * vpa_unnorm^2 / vthe[1]^2 plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut) - pdf[plus_vcut_ind+2:end,1,1,ir] .= 0.0 + pdf[plus_vcut_ind+2:end,1,1] .= 0.0 # vcut_fraction is the fraction of the distance between plus_vcut_ind and # plus_vcut_ind+1 where vcut is. vcut_fraction = get_plus_vcut_fraction(vcut, plus_vcut_ind, vpa_unnorm) if vcut_fraction > 0.5 - pdf[plus_vcut_ind+1,1,1,ir] *= vcut_fraction - 0.5 + pdf[plus_vcut_ind+1,1,1] *= vcut_fraction - 0.5 else - pdf[plus_vcut_ind+1,1,1,ir] = 0.0 - pdf[plus_vcut_ind,1,1,ir] *= vcut_fraction + 0.5 + pdf[plus_vcut_ind+1,1,1] = 0.0 + pdf[plus_vcut_ind,1,1] *= vcut_fraction + 0.5 end # update the electrostatic potential at the boundary to be the value corresponding to the updated cutoff velocity - phi[1,ir] = me_over_mi * vcut^2 + phi[1] = me_over_mi * vcut^2 moments.electron.constraints_A_coefficient[1,ir] = A moments.electron.constraints_B_coefficient[1,ir] = 0.0 @@ -2580,11 +2581,11 @@ end # interpolation. # Need to recalculate these with the updated distribution function - @views @. density_integral_pieces_lowerz = pdf[:,1,1,ir] * vpa.wgts / sqrt(pi) - @. flow_integral_pieces_lowerz = density_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir] - @. energy_integral_pieces_lowerz = flow_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir] - @. cubic_integral_pieces_lowerz = energy_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir] - @. quartic_integral_pieces_lowerz = cubic_integral_pieces_lowerz * vpa_unnorm / vthe[1,ir] + @views @. density_integral_pieces_lowerz = pdf[:,1,1] * vpa.wgts / sqrt(pi) + @. flow_integral_pieces_lowerz = density_integral_pieces_lowerz * vpa_unnorm / vthe[1] + @. energy_integral_pieces_lowerz = flow_integral_pieces_lowerz * vpa_unnorm / vthe[1] + @. cubic_integral_pieces_lowerz = energy_integral_pieces_lowerz * vpa_unnorm / vthe[1] + @. quartic_integral_pieces_lowerz = cubic_integral_pieces_lowerz * vpa_unnorm / vthe[1] # Update the part2 integrals since we've applied the A and C factors _, _, _, _, a2, b2, c2, d2 = get_integrals_and_derivatives_lowerz(vcut, minus_vcut_ind) @@ -2605,7 +2606,7 @@ end # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is. sharpness = 4.0 - correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,1,ir] * vpa.wgts / sqrt(pi) * sharpness * vpa_unnorm^2 / vthe[1,ir]^2 / (1.0 + sharpness * vpa_unnorm^2 / vthe[1,ir]^2) + correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,1] * vpa.wgts / sqrt(pi) * sharpness * vpa_unnorm^2 / vthe[1]^2 / (1.0 + sharpness * vpa_unnorm^2 / vthe[1]^2) for ivpa ∈ 1:sigma_ind # We only add the corrections to 'part3', so zero them out for negative v_∥. # I think this is only actually significant for `sigma_ind-1` and @@ -2619,12 +2620,12 @@ end # v_∥^2/vth^2/(1+v_∥^2/vth^2)≈v_∥^2/vth^2≈0. correction0_integral_pieces[ivpa] = 0.0 end - correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[1,ir] - correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[1,ir] - correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[1,ir] - correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[1,ir] - correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[1,ir] - correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[1,ir] + correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[1] + correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[1] + correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[1] + correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[1] + correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[1] + correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[1] alpha = get_part3_for_one_moment_lower(correction0_integral_pieces) beta = get_part3_for_one_moment_lower(correction1_integral_pieces) @@ -2649,31 +2650,30 @@ end ] \ [a2-a3, -b2-b3, c2-c3, -d2-d3] A, B, C, D = solution for ivpa ∈ sigma_ind+1:plus_vcut_ind+1 - v_over_vth = vpa_unnorm[ivpa]/vthe[1,ir] - pdf[ivpa,1,1,ir] = pdf[ivpa,1,1,ir] + + v_over_vth = vpa_unnorm[ivpa]/vthe[1] + pdf[ivpa,1,1] = pdf[ivpa,1,1] + (A + B * v_over_vth + C * v_over_vth^2 + D * v_over_vth^3) * sharpness * v_over_vth^2 / (1.0 + sharpness * v_over_vth^2) * - pdf[ivpa,1,1,ir] + pdf[ivpa,1,1] end end - end - # next enforce the boundary condition at z_max. - # this involves forcing the pdf to be zero for electrons travelling faster than the max speed - # they could attain by accelerating in the electric field between the wall and the simulation boundary; - # for electrons with negative velocities less than this critical value, they must have the same - # pdf as electrons with positive velocities of the same magnitude. - # the electrostatic potential at the boundary, which determines the critical speed, is unknown a priori; - # use the constraint that the first moment of the normalised pdf be zero to choose the potential. - - if z.irank == z.nrank - 1 - if z.bc != "wall" - error("Options other than wall or z-periodic bc not implemented yet for electrons") - end - @loop_r ir begin + # next enforce the boundary condition at z_max. + # this involves forcing the pdf to be zero for electrons travelling faster than the max speed + # they could attain by accelerating in the electric field between the wall and the simulation boundary; + # for electrons with negative velocities less than this critical value, they must have the same + # pdf as electrons with positive velocities of the same magnitude. + # the electrostatic potential at the boundary, which determines the critical speed, is unknown a priori; + # use the constraint that the first moment of the normalised pdf be zero to choose the potential. + + if z.irank == z.nrank - 1 + if z.bc != "wall" + error("Options other than wall or z-periodic bc not implemented yet for electrons") + end + # Impose sheath-edge boundary condition, while also imposing moment # constraints and determining the cut-off velocity (and therefore the sheath # potential). @@ -2689,28 +2689,28 @@ end # 'far from zero' means more negative values of v_∥. # Interpolate to the 'near zero' points - @views interpolate_symmetric!(pdf[first_point_near_zero:sigma_ind,1,end,ir], + @views interpolate_symmetric!(pdf[first_point_near_zero:sigma_ind,1,end], vpa_unnorm[first_point_near_zero:sigma_ind], - pdf[sigma_ind+1:element_with_zero_boundary,1,end,ir], + pdf[sigma_ind+1:element_with_zero_boundary,1,end], vpa_unnorm[sigma_ind+1:element_with_zero_boundary]) # Interpolate to the 'far from zero' points reversed_pdf = @view vpa.scratch[1:first_point_near_zero-1] @views interpolate_to_grid_1d!(reversed_pdf, reversed_wpa_of_minus_vpa[vpa.n-first_point_near_zero+2:end], - pdf[:,1,end,ir], vpa, vpa_spectral) + pdf[:,1,end], vpa, vpa_spectral) reverse!(reversed_pdf) - pdf[1:first_point_near_zero-1,1,end,ir] .= reversed_pdf + pdf[1:first_point_near_zero-1,1,end] .= reversed_pdf # Per-grid-point contributions to moment integrals # Note that we need to include the normalisation factor of 1/sqrt(pi) that # would be factored in by integrate_over_vspace(). This will need to # change/adapt when we support 2V as well as 1V. - density_integral_pieces_upperz = @views @. vpa.scratch3 = pdf[:,1,end,ir] * vpa.wgts / sqrt(pi) - flow_integral_pieces_upperz = @. vpa.scratch4 = density_integral_pieces_upperz * vpa_unnorm / vthe[end,ir] - energy_integral_pieces_upperz = @. vpa.scratch5 = flow_integral_pieces_upperz * vpa_unnorm / vthe[end,ir] - cubic_integral_pieces_upperz = @. vpa.scratch6 = energy_integral_pieces_upperz * vpa_unnorm / vthe[end,ir] - quartic_integral_pieces_upperz = @. vpa.scratch7 = cubic_integral_pieces_upperz * vpa_unnorm / vthe[end,ir] + density_integral_pieces_upperz = @views @. vpa.scratch3 = pdf[:,1,end] * vpa.wgts / sqrt(pi) + flow_integral_pieces_upperz = @. vpa.scratch4 = density_integral_pieces_upperz * vpa_unnorm / vthe[end] + energy_integral_pieces_upperz = @. vpa.scratch5 = flow_integral_pieces_upperz * vpa_unnorm / vthe[end] + cubic_integral_pieces_upperz = @. vpa.scratch6 = energy_integral_pieces_upperz * vpa_unnorm / vthe[end] + quartic_integral_pieces_upperz = @. vpa.scratch7 = cubic_integral_pieces_upperz * vpa_unnorm / vthe[end] function get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind) # vcut_fraction is the fraction of the distance between plus_vcut_ind and @@ -2775,17 +2775,17 @@ end # respect to vcut delta_v = - epsilon / epsilonprime - if vcut > vthe[1,ir] && epsilonprime > 0.0 + if vcut > vthe[1] && epsilonprime > 0.0 # epsilon should be decreasing with vcut at epsilon=0, so if # epsilonprime is positive, the solution is actually at a lower vcut - # at larger vcut, epsilon will just tend to 0 but never reach it. - delta_v = -0.1 * vthe[1,ir] + delta_v = -0.1 * vthe[1] end # Prevent the step size from getting too big, to make Newton iteration # more robust. - delta_v = min(delta_v, 0.1 * vthe[end,ir]) - delta_v = max(delta_v, -0.1 * vthe[end,ir]) + delta_v = min(delta_v, 0.1 * vthe[end]) + delta_v = max(delta_v, -0.1 * vthe[end]) vcut = vcut + delta_v plus_vcut_ind = searchsortedlast(vpa_unnorm, vcut) @@ -2823,22 +2823,22 @@ end # Adjust pdf so that after reflecting and cutting off tail, it will obey the # constraints. - @. pdf[:,1,end,ir] *= A + C * vpa_unnorm^2 / vthe[end,ir]^2 + @. pdf[:,1,end] *= A + C * vpa_unnorm^2 / vthe[end]^2 minus_vcut_ind = searchsortedfirst(vpa_unnorm, -vcut) - pdf[1:minus_vcut_ind-2,1,end,ir] .= 0.0 + pdf[1:minus_vcut_ind-2,1,end] .= 0.0 # vcut_fraction is the fraction of the distance between minus_vcut_ind-1 and # minus_vcut_ind where -vcut is. vcut_fraction = get_minus_vcut_fraction(vcut, minus_vcut_ind, vpa_unnorm) if vcut_fraction < 0.5 - pdf[minus_vcut_ind-1,1,end,ir] *= 0.5 - vcut_fraction + pdf[minus_vcut_ind-1,1,end] *= 0.5 - vcut_fraction else - pdf[minus_vcut_ind-1,1,end,ir] = 0.0 - pdf[minus_vcut_ind,1,end,ir] *= 1.5 - vcut_fraction + pdf[minus_vcut_ind-1,1,end] = 0.0 + pdf[minus_vcut_ind,1,end] *= 1.5 - vcut_fraction end # update the electrostatic potential at the boundary to be the value corresponding to the updated cutoff velocity - phi[end,ir] = me_over_mi * vcut^2 + phi[end] = me_over_mi * vcut^2 moments.electron.constraints_A_coefficient[end,ir] = A moments.electron.constraints_B_coefficient[end,ir] = 0.0 @@ -2851,11 +2851,11 @@ end # interpolation. # Need to recalculate these with the updated distribution function - @views @. density_integral_pieces_upperz = pdf[:,1,end,ir] * vpa.wgts / sqrt(pi) - @. flow_integral_pieces_upperz = density_integral_pieces_upperz * vpa_unnorm / vthe[end,ir] - @. energy_integral_pieces_upperz = flow_integral_pieces_upperz * vpa_unnorm / vthe[end,ir] - @. cubic_integral_pieces_upperz = energy_integral_pieces_upperz * vpa_unnorm / vthe[end,ir] - @. quartic_integral_pieces_upperz = cubic_integral_pieces_upperz * vpa_unnorm / vthe[end,ir] + @views @. density_integral_pieces_upperz = pdf[:,1,end] * vpa.wgts / sqrt(pi) + @. flow_integral_pieces_upperz = density_integral_pieces_upperz * vpa_unnorm / vthe[end] + @. energy_integral_pieces_upperz = flow_integral_pieces_upperz * vpa_unnorm / vthe[end] + @. cubic_integral_pieces_upperz = energy_integral_pieces_upperz * vpa_unnorm / vthe[end] + @. quartic_integral_pieces_upperz = cubic_integral_pieces_upperz * vpa_unnorm / vthe[end] # Update the part2 integrals since we've applied the A and C factors _, _, _, _, a2, b2, c2, d2 = get_integrals_and_derivatives_upperz(vcut, plus_vcut_ind) @@ -2876,7 +2876,7 @@ end # Use scale factor to adjust how sharp the cutoff near vpa_unnorm=0 is. sharpness = 4.0 - correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,end,ir] * vpa.wgts / sqrt(pi) * sharpness * vpa_unnorm^2 / vthe[end,ir]^2 / (1.0 + sharpness * vpa_unnorm^2 / vthe[end,ir]^2) + correction0_integral_pieces = @views @. vpa.scratch3 = pdf[:,1,end] * vpa.wgts / sqrt(pi) * sharpness * vpa_unnorm^2 / vthe[end]^2 / (1.0 + sharpness * vpa_unnorm^2 / vthe[end]^2) for ivpa ∈ sigma_ind:vpa.n # We only add the corrections to 'part3', so zero them out for positive v_∥. # I think this is only actually significant for `sigma_ind` and @@ -2890,12 +2890,12 @@ end # v_∥^2/vth^2/(1+v_∥^2/vth^2)≈v_∥^2/vth^2≈0. correction0_integral_pieces[ivpa] = 0.0 end - correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[end,ir] - correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[end,ir] - correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[end,ir] - correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[end,ir] - correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[end,ir] - correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[end,ir] + correction1_integral_pieces = @. vpa.scratch4 = correction0_integral_pieces * vpa_unnorm / vthe[end] + correction2_integral_pieces = @. vpa.scratch5 = correction1_integral_pieces * vpa_unnorm / vthe[end] + correction3_integral_pieces = @. vpa.scratch6 = correction2_integral_pieces * vpa_unnorm / vthe[end] + correction4_integral_pieces = @. vpa.scratch7 = correction3_integral_pieces * vpa_unnorm / vthe[end] + correction5_integral_pieces = @. vpa.scratch8 = correction4_integral_pieces * vpa_unnorm / vthe[end] + correction6_integral_pieces = @. vpa.scratch9 = correction5_integral_pieces * vpa_unnorm / vthe[end] alpha = get_part3_for_one_moment_upper(correction0_integral_pieces) beta = get_part3_for_one_moment_upper(correction1_integral_pieces) @@ -2920,14 +2920,14 @@ end ] \ [a2-a3, -b2-b3, c2-c3, -d2-d3] A, B, C, D = solution for ivpa ∈ minus_vcut_ind-1:sigma_ind-1 - v_over_vth = vpa_unnorm[ivpa]/vthe[end,ir] - pdf[ivpa,1,end,ir] = pdf[ivpa,1,end,ir] + + v_over_vth = vpa_unnorm[ivpa]/vthe[end] + pdf[ivpa,1,end] = pdf[ivpa,1,end] + (A + B * v_over_vth + C * v_over_vth^2 + D * v_over_vth^3) * sharpness * v_over_vth^2 / (1.0 + sharpness * v_over_vth^2) * - pdf[ivpa,1,end,ir] + pdf[ivpa,1,end] end end end From 81418c45ced0a692e290ffcf050b141165a00292 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 23 Nov 2024 17:18:40 +0000 Subject: [PATCH 38/43] Fix adaptive-timestep IMEX for kinetic electrons Can (?) skip the problematic solve, that was run to update the electron shape function without updating the electron pressure, on the 'explicit first stage' of ESDIRK schemes. This might have the effect of reducing the order of accuracy of the scheme somehow, as the qpar_e used for the 'explicit' calculation of the time derivative of electron_ppar is taken from the most recent implicit solve rather than a solve updated with the new ion/electron profiles. However, the change is probably small (?) and at least the solver does run now - it is useful to have an adaptive ion timestep as it may let the code recover by reducing the ion timestep when an electron implicit solve fails to converge. --- moment_kinetics/src/time_advance.jl | 4 +- .../test/kinetic_electron_tests.jl | 392 +++++++++--------- 2 files changed, 202 insertions(+), 194 deletions(-) diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 0cd59185d..8f2d7095b 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -3116,9 +3116,7 @@ end || (istage == n_rk_stages && t_params.implicit_coefficient_is_zero[1]) || t_params.implicit_coefficient_is_zero[istage+1]) update_electrons = (t_params.rk_coefs_implicit === nothing - || !(t_params.implicit_electron_advance || t_params.implicit_electron_ppar) - || (istage < n_rk_stages && t_params.implicit_coefficient_is_zero[istage+1]) - || (istage == n_rk_stages && t_params.implicit_coefficient_is_zero[1])) + || !(t_params.implicit_electron_advance || t_params.implicit_electron_ppar)) diagnostic_moments = diagnostic_checks && istage == n_rk_stages success = apply_all_bcs_constraints_update_moments!( scratch[istage+1], pdf, moments, fields, boundary_distributions, diff --git a/moment_kinetics/test/kinetic_electron_tests.jl b/moment_kinetics/test/kinetic_electron_tests.jl index 33738748d..63c6b6ee2 100644 --- a/moment_kinetics/test/kinetic_electron_tests.jl +++ b/moment_kinetics/test/kinetic_electron_tests.jl @@ -123,9 +123,14 @@ kinetic_input["nonlinear_solver"] = OptionsDict("nonlinear_max_iterations" => 10 "rtol" => 1.0e-8, "atol" => 1.0e-14, "linear_restart" => 5, - "preconditioner_update_interval" => 1000, + "preconditioner_update_interval" => 100, ) +kinetic_input_adaptive_timestep = deepcopy(kinetic_input) +kinetic_input_adaptive_timestep["output"]["run_name"] = "kinetic_electron_adaptive_timestep_test" +kinetic_input_adaptive_timestep["timestepping"]["type"] = "KennedyCarpenterARK324" +kinetic_input_adaptive_timestep["timestepping"]["maximum_dt"] = 1.0e-5 + """ Run a test for a single set of parameters @@ -136,204 +141,209 @@ function run_test() this_boltzmann_input = deepcopy(boltzmann_input) this_boltzmann_input["output"]["base_directory"] = test_output_directory - this_kinetic_input = deepcopy(kinetic_input) - this_kinetic_input["output"]["base_directory"] = test_output_directory - - # Provide some progress info - println(" - testing kinetic electrons") - - # Suppress console output while running? Test is pretty long, so maybe better to leave - # intermediate output visible. Leaving `quietoutput()` commented out for now... + # Suppress console output while running. quietoutput() do run_moment_kinetics(this_boltzmann_input) + end - restart_from_directory = joinpath(this_boltzmann_input["output"]["base_directory"], this_boltzmann_input["output"]["run_name"]) - restart_from_file_pattern = this_boltzmann_input["output"]["run_name"] * ".dfns*.h5" - restart_from_file = glob(restart_from_file_pattern, restart_from_directory)[1] + for (this_kinetic_input, label, tol) ∈ ((deepcopy(kinetic_input), "", 1.0e-6), + (deepcopy(kinetic_input_adaptive_timestep), "adaptive timestep", 1.0e-4)) + # Provide some progress info + println(" - testing kinetic electrons $label") - # run kinetic electron simulation - run_moment_kinetics(this_kinetic_input; restart=restart_from_file) - end + this_kinetic_input["output"]["base_directory"] = test_output_directory - if global_rank[] == 0 - # Load and analyse output - ######################### - - path = joinpath(realpath(this_kinetic_input["output"]["base_directory"]), this_kinetic_input["output"]["run_name"]) - - # open the output file(s) - run_info = get_run_info_no_setup(path, dfns=true) - - # load fields data - Ez = postproc_load_variable(run_info, "Ez")[:,1,:] - vthe = postproc_load_variable(run_info, "electron_thermal_speed")[:,1,:] - electron_advance_linear_iterations = postproc_load_variable(run_info, "electron_advance_linear_iterations")[end] - - close_run_info(run_info) - - # Regression test - # Benchmark data generated in serial on Linux - if global_size[] == 1 - # Serial solves use LU preconditioner - expected_Ez = [-0.5990683230706185 -1.136483186157602; - -0.4944296396481284 -0.9873296990705788; - -0.30889032954504736 -0.6694380824928302; - -0.2064830747303776 -0.4471331690708596; - -0.21232457328748663 -0.423069171542538; - -0.18233875912042674 -0.3586467595624931; - -0.16711429522309232 -0.3018272987758344; - -0.16920776495088916 -0.27814384649305496; - -0.1629417555658927 -0.26124630661090814; - -0.16619150334079993 -0.2572789330163811; - -0.15918194883360942 -0.23720078037362732; - -0.14034706409006803 -0.20520396656341475; - -0.12602184032280567 -0.1827016549071128; - -0.10928716440800472 -0.15808919669899502; - -0.07053969674257217 -0.10137753767917096; - -0.0249577746169536 -0.0358411459260082; - -2.8327303308330514e-15 -2.0803303361189427e-5; - 0.024957774616960776 0.03584490974053962; - 0.07053969674257636 0.1013692898656727; - 0.10928716440799909 0.15807862358546687; - 0.1260218403227975 0.18263049748179466; - 0.1403470640900294 0.20516566362571026; - 0.1591819488336015 0.23711236692241613; - 0.16619150334082114 0.257126146434857; - 0.16294175556587748 0.2609881259705107; - 0.16920776495090983 0.2778978154805798; - 0.1671142952230893 0.3015349192528757; - 0.1823387591204167 0.3585291689672981; - 0.21232457328753865 0.4231179549656996; - 0.20648307473037922 0.44816400221269476; - 0.3088903295450278 0.6716787105435247; - 0.4944296396481271 0.9861165590258743; - 0.5990683230705801 1.1300034111861956] - expected_vthe = [22.64555285302391 22.485481713141688; - 23.763411647653097 23.63281883616836; - 25.26907160117684 25.181703459470448; - 26.17920352818247 26.12461016686916; - 26.514772631426933 26.476018852279974; - 26.798783188585713 26.774387562937218; - 27.202255545479264 27.203662204308202; - 27.50424749120107 27.527732850637264; - 27.630498656270504 27.6642323848215; - 27.748483758260697 27.79134809261204; - 27.933760382468346 27.990808336620802; - 28.08611508251559 28.153978618442775; - 28.14959662643782 28.221734439130564; - 28.207730844115044 28.283677711828023; - 28.28567669896009 28.36634261525836; - 28.32728392065335 28.410489883644782; - 28.331064506972027 28.41437629072209; - 28.32729968986601 28.41050992096321; - 28.285678151542136 28.366352683865195; - 28.207765527709956 28.28373408727703; - 28.149604559462947 28.221771261090687; - 28.086248527111163 28.154158507899695; - 27.933979289064936 27.991103719847732; - 27.74906125092813 27.792046191405188; - 27.631210333523736 27.66508092926101; - 27.505479130159543 27.529115937508752; - 27.20422756527604 27.20578114592589; - 26.801712351383053 26.77740066591359; - 26.517644511297203 26.478915386575462; - 26.18176436913143 26.127099000267552; - 25.26635932097994 25.178676836919877; - 23.756593489029708 23.625697695979085; - 22.64390166090378 22.48400980852866] - else - # Parallel solves, which here use only shared-memory parallelism, use the ADI - # preconditioner, which should be as accurate, but may give different results - # within Newton-Krylov tolerances. - expected_Ez = [-0.5990683230706185 -1.136484793603861; - -0.4944296396481284 -0.9873300031440772; - -0.30889032954504736 -0.6694378168618197; - -0.2064830747303776 -0.447133132132065; - -0.21232457328748663 -0.42306913446372424; - -0.18233875912042674 -0.3586467771727455; - -0.16711429522309232 -0.30182728110160495; - -0.16920776495088916 -0.27814382747995164; - -0.1629417555658927 -0.2612463784138094; - -0.16619150334079993 -0.25727894258000966; - -0.15918194883360942 -0.23720078814350573; - -0.14034706409006803 -0.20520397188041256; - -0.12602184032280567 -0.18270162474892546; - -0.10928716440800472 -0.1580892035790512; - -0.07053969674257217 -0.10137753682381391; - -0.0249577746169536 -0.03584114725793184; - -2.8327303308330514e-15 -2.0802378395589373e-5; - 0.024957774616960776 0.0358449101669449; - 0.07053969674257636 0.10136928934666747; - 0.10928716440799909 0.15807862867071673; - 0.1260218403227975 0.18263047522175488; - 0.1403470640900294 0.20516566756031385; 0.1591819488336015 0.2371123741024713; - 0.16619150334082114 0.2571261543920033; - 0.16294175556587748 0.2609882062708652; - 0.16920776495090983 0.27789779494370415; - 0.1671142952230893 0.30153489797658445; - 0.1823387591204167 0.35852918516786003; - 0.21232457328753865 0.42311789840457864; - 0.20648307473037922 0.44816400062147066; - 0.3088903295450278 0.6716785459169026; - 0.4944296396481271 0.9861167610959626; - 0.5990683230705801 1.1300045383907789] - expected_vthe = [22.64555338227396 22.48548119549829; - 23.76341164436594 23.632819782771243; - 25.26907163394297 25.18170391887767; - 26.179203467285365 26.12461016927763; - 26.514772629327332 26.47601877788725; - 26.79878318858447 26.774387534342114; - 27.20225551034186 27.20366217166485; - 27.504247525601926 27.527732760234755; - 27.630498605068166 27.66423228184859; - 27.748483763235846 27.791348082529804; - 27.933760371994826 27.990808308571204; - 28.08611509938479 28.153978648601132; - 28.149596610550738 28.221734405417436; - 28.207730848524463 28.28367771694209; - 28.28567670146647 28.366342613061416; - 28.32728392764203 28.410489892675102; - 28.331064498175866 28.414376282256146; - 28.327299695349158 28.41050992979778; - 28.285678155424083 28.366352683054103; - 28.207765532359442 28.28373409338897; - 28.149604554344048 28.22177123547944; - 28.086248537316628 28.154158532699547; - 27.933979285563435 27.991103698041254; - 27.749061255285646 27.79204618050744; - 27.63121031067771 27.665080846653012; - 27.505479148983177 27.529115838548574; - 27.204227550854288 27.205781129997607; - 26.801712356957204 26.777400644678224; - 26.517644516966772 26.478915353716097; - 26.181764354679014 26.12709901369174; - 25.266359355820907 25.178677080491074; - 23.756593465755735 23.625698257711747; - 22.64390180335094 22.48400934735562] - end + # Suppress console output while running. + quietoutput() do + restart_from_directory = joinpath(this_boltzmann_input["output"]["base_directory"], this_boltzmann_input["output"]["run_name"]) + restart_from_file_pattern = this_boltzmann_input["output"]["run_name"] * ".dfns*.h5" + restart_from_file = glob(restart_from_file_pattern, restart_from_directory)[1] - if expected_Ez == nothing - # Error: no expected input provided - println("data tested would be: Ez=", Ez) - @test false - else - @test elementwise_isapprox(Ez, expected_Ez, rtol=0.0, atol=2.0e-6) - end - if expected_vthe == nothing - # Error: no expected input provided - println("data tested would be: vthe=", vthe) - @test false - else - @test elementwise_isapprox(vthe, expected_vthe, rtol=1.0e-6, atol=0.0) + # run kinetic electron simulation + run_moment_kinetics(this_kinetic_input; restart=restart_from_file) end - # Iteration counts are fairly inconsistent, but it's good to check that they at - # least don't unexpectedly increase by an order of magnitude. - # Expected iteration count is from a serial run on Linux. - expected_electron_advance_linear_iterations = 49307 - @test electron_advance_linear_iterations < 2 * expected_electron_advance_linear_iterations - if !(electron_advance_linear_iterations < 2 * expected_electron_advance_linear_iterations) - println("electron_advance_linear_iterations=$electron_advance_linear_iterations was greater than twice the expected $expected_electron_advance_linear_iterations.") + if global_rank[] == 0 + # Load and analyse output + ######################### + + path = joinpath(realpath(this_kinetic_input["output"]["base_directory"]), this_kinetic_input["output"]["run_name"]) + + # open the output file(s) + run_info = get_run_info_no_setup(path, dfns=true) + + # load fields data + Ez = postproc_load_variable(run_info, "Ez")[:,1,:] + vthe = postproc_load_variable(run_info, "electron_thermal_speed")[:,1,:] + electron_advance_linear_iterations = postproc_load_variable(run_info, "electron_advance_linear_iterations")[end] + + close_run_info(run_info) + + # Regression test + # Benchmark data generated in serial on Linux + if global_size[] == 1 + # Serial solves use LU preconditioner + expected_Ez = [-0.5990683230706185 -1.1053138725180998; + -0.4944296396481284 -0.9819332128466166; + -0.30889032954504736 -0.6745656961983237; + -0.2064830747303776 -0.4459531272930669; + -0.21232457328748663 -0.4253218487528007; + -0.18233875912042674 -0.3596054334022437; + -0.16711429522309232 -0.3021381799340685; + -0.16920776495088916 -0.2784335484692499; + -0.1629417555658927 -0.2612551389558109; + -0.16619150334079993 -0.2574841927015592; + -0.15918194883360942 -0.23740132549636406; + -0.14034706409006803 -0.20534503972256973; + -0.12602184032280567 -0.1827098539044343; + -0.10928716440800472 -0.1582133200686042; + -0.07053969674257217 -0.10145491369831482; + -0.0249577746169536 -0.03585934915825971; + -2.8327303308330514e-15 3.742211718942586e-14; + 0.024957774616960776 0.03585934915827381; + 0.07053969674257636 0.10145491369829167; + 0.10928716440799909 0.15821332006862954; + 0.1260218403227975 0.18270985390445083; + 0.1403470640900294 0.20534503972250218; + 0.1591819488336015 0.23740132549634094; + 0.16619150334082114 0.2574841927015898; + 0.16294175556587748 0.261255138955811; + 0.16920776495090983 0.2784335484692798; + 0.1671142952230893 0.3021381799340713; + 0.1823387591204167 0.3596054334022252; + 0.21232457328753865 0.4253218487528467; + 0.20648307473037922 0.44595312729305947; + 0.3088903295450278 0.6745656961983009; + 0.4944296396481271 0.9819332128466268; + 0.5990683230705801 1.1053138725180645] + expected_vthe = [22.654024448490784 22.494016350356883; + 23.744503682730446 23.61361063067715; + 25.26061134578617 25.173128418725682; + 26.177253875120066 26.122412383901523; + 26.510545637302872 26.47158368991228; + 26.798827552847246 26.77429043464489; + 27.202535498354287 27.2038739551587; + 27.506373594650846 27.529813468465488; + 27.631027625644876 27.664719606410365; + 27.750902611036295 27.793759280909274; + 27.935780521313532 27.992775960575692; + 28.089380398280714 28.157198480516957; + 28.15152314377127 28.223553488629253; + 28.211115085781678 28.2870195116558; + 28.28856778918977 28.369130039283018; + 28.330972960680672 28.41411592647979; + 28.33351348538364 28.416680586218863; + 28.330972960680675 28.41411592647976; + 28.288567789189763 28.369130039283064; + 28.211115085781678 28.287019511655785; + 28.15152314377127 28.223553488629236; + 28.089380398280724 28.157198480516957; + 27.93578052131354 27.992775960575713; + 27.750902611036295 27.79375928090935; + 27.63102762564488 27.664719606410383; + 27.506373594650853 27.529813468465495; + 27.202535498354287 27.2038739551587; + 26.79882755284725 26.774290434644872; + 26.510545637302886 26.471583689912283; + 26.177253875120083 26.122412383901523; + 25.26061134578619 25.173128418725696; + 23.744503682730446 23.613610630677236; + 22.65402444849082 22.494016350356937] + else + # Parallel solves, which here use only shared-memory parallelism, use the ADI + # preconditioner, which should be as accurate, but may give different results + # within Newton-Krylov tolerances. + expected_Ez = [-0.5990683230706185 -1.1053137071260657; + -0.4944296396481284 -0.9819330928307715; + -0.30889032954504736 -0.6745656725019216; + -0.2064830747303776 -0.44595313784207047; + -0.21232457328748663 -0.425321828548; + -0.18233875912042674 -0.3596054340570364; + -0.16711429522309232 -0.30213818089568956; + -0.16920776495088916 -0.27843354821637; + -0.1629417555658927 -0.2612551385019989; + -0.16619150334079993 -0.2574841930766524; + -0.15918194883360942 -0.23740132557788143; + -0.14034706409006803 -0.20534504018275174; + -0.12602184032280567 -0.18270985430997166; + -0.10928716440800472 -0.1582133189704785; + -0.07053969674257217 -0.101454914566153; + -0.0249577746169536 -0.035859347929368034; + -2.8327303308330514e-15 -4.536628997349189e-9; + 0.024957774616960776 0.035859348624052545; + 0.07053969674257636 0.10145491474282464; + 0.10928716440799909 0.15821331955573922; + 0.1260218403227975 0.18270985667178208; + 0.1403470640900294 0.2053450392202274; + 0.1591819488336015 0.23740132578753803; + 0.16619150334082114 0.25748419283426127; + 0.16294175556587748 0.2612551396310432; + 0.16920776495090983 0.2784335479625835; + 0.1671142952230893 0.3021381809909585; + 0.1823387591204167 0.35960543399747075; + 0.21232457328753865 0.4253218286915096; + 0.20648307473037922 0.44595313782295487; + 0.3088903295450278 0.6745656725300222; + 0.4944296396481271 0.9819330927685747; + 0.5990683230705801 1.1053137082172033] + expected_vthe = [22.654024454479018 22.494016869931663; + 23.74450367962989 23.61361086266046; + 25.260611341892094 25.173128419566062; + 26.17725387357487 26.122412390676395; + 26.510545632956767 26.47158369227529; + 26.7988275507785 26.774290427357606; + 27.20253549703805 27.20387395613098; + 27.506373594719115 27.529813465559865; + 27.63102762567087 27.6647196112545; + 27.75090260968854 27.79375927764987; + 27.935780521822277 27.992775962652605; + 28.08938039775227 28.157198478502867; + 28.151523156278788 28.223553495610926; + 28.211115080270424 28.28701950947455; + 28.288567793141777 28.369130040934596; + 28.330972955353705 28.414115925374524; + 28.333513456094945 28.41668058720323; + 28.330972961606466 28.414115929999316; + 28.288567792143006 28.369130041232697; + 28.211115083430062 28.287019512466056; + 28.15152314952673 28.223553491119628; + 28.089380398299795 28.157198479157458; + 27.93578052229754 27.99277596224337; + 27.750902609816293 27.79375927871885; + 27.631027625671482 27.664719609967122; + 27.50637359506551 27.52981346582775; + 27.20253549697429 27.203873955958308; + 26.798827550864885 26.77429042759387; + 26.510545632587316 26.471583691722795; + 26.177253873758893 26.122412390844207; + 25.26061134158348 25.17312841929966; + 23.7445036798294 23.613610862832093; + 22.654024453873603 22.494016869407307] + end + + if expected_Ez == nothing + # Error: no expected input provided + println("data tested would be: Ez=", Ez) + @test false + else + @test elementwise_isapprox(Ez, expected_Ez, rtol=0.0, atol=2.0*tol) + end + if expected_vthe == nothing + # Error: no expected input provided + println("data tested would be: vthe=", vthe) + @test false + else + @test elementwise_isapprox(vthe, expected_vthe, rtol=tol, atol=0.0) + end + + # Iteration counts are fairly inconsistent, but it's good to check that they at + # least don't unexpectedly increase by an order of magnitude. + # Expected iteration count is from a serial run on Linux. + expected_electron_advance_linear_iterations = 48716 + @test electron_advance_linear_iterations < 2 * expected_electron_advance_linear_iterations + if !(electron_advance_linear_iterations < 2 * expected_electron_advance_linear_iterations) + println("electron_advance_linear_iterations=$electron_advance_linear_iterations was greater than twice the expected $expected_electron_advance_linear_iterations.") + end end end From 70955ce89edc8dc930a70725d9b0e01e9c476945 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 27 Nov 2024 22:53:38 +0000 Subject: [PATCH 39/43] When using adaptive timestep, don't force fixed output times by default When running kinetic electron simulations, it can cause problems to take a very short ion timestep. When writing outputs at exactly fixed output times, this can happen if the previous timestep happened to end just before the output time. To avoid the very short step default to writing output at whatever time the end of the timestep is that exceeds the set output time. There is an option to force the previous behaviour of a decreased timestep so that output is written exactly at the nominal output time. --- moment_kinetics/src/input_structs.jl | 1 + moment_kinetics/src/moment_kinetics_input.jl | 1 + moment_kinetics/src/runge_kutta.jl | 65 +++++++++++++++---- moment_kinetics/src/time_advance.jl | 25 +++++-- .../test/braginskii_electrons_imex_tests.jl | 1 + .../test/recycling_fraction_tests.jl | 21 +++--- 6 files changed, 86 insertions(+), 28 deletions(-) diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index 682830365..dfc80b3f4 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -57,6 +57,7 @@ struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero limit_caused_by::Vector{mk_int} nwrite_moments::mk_int nwrite_dfns::mk_int + exact_output_times::Bool moments_output_times::Vector{mk_float} dfns_output_times::Vector{mk_float} type::String diff --git a/moment_kinetics/src/moment_kinetics_input.jl b/moment_kinetics/src/moment_kinetics_input.jl index e1008bf0e..c6ff98c85 100644 --- a/moment_kinetics/src/moment_kinetics_input.jl +++ b/moment_kinetics/src/moment_kinetics_input.jl @@ -146,6 +146,7 @@ function mk_input(input_dict=OptionsDict(); save_inputs_to_txt=false, ignore_MPI CFL_prefactor=-1.0, nwrite=1, nwrite_dfns=-1, + exact_output_times=false, type="SSPRK4", split_operators=false, steady_state_residual=false, diff --git a/moment_kinetics/src/runge_kutta.jl b/moment_kinetics/src/runge_kutta.jl index 0c87369b7..772cda8ba 100644 --- a/moment_kinetics/src/runge_kutta.jl +++ b/moment_kinetics/src/runge_kutta.jl @@ -1166,10 +1166,25 @@ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, t_params.failure_caused_by[end] += 1 end - # If we were trying to take a step to the output timestep, dt will be smaller on - # the re-try, so will not reach the output time. - t_params.step_to_moments_output[] = false - t_params.step_to_dfns_output[] = false + if t_params.exact_output_times + # If we were trying to take a step to the output timestep, dt will be smaller on + # the re-try, so will not reach the output time. + t_params.step_to_moments_output[] = false + t_params.step_to_dfns_output[] = false + else + # If with the reduced dt the step will not pass the next output time, + # deactivate step_to_*_output[]. + if (t_params.step_to_moments_output[] + && t_params.t[] + t_params.previous_dt[] + t_params.dt[] < + t_params.moments_output_times[t_params.moments_output_counter[]]) + t_params.step_to_moments_output[] = false + end + if (t_params.step_to_dfns_output[] + && t_params.t[] + t_params.previous_dt[] + t_params.dt[] < + t_params.dfns_output_times[t_params.dfns_output_counter[]]) + t_params.step_to_dfns_output[] = false + end + end elseif (error_norm[] > 1.0 || isnan(error_norm[])) && t_params.dt[] > t_params.minimum_dt * (1.0 + 1.0e-13) # (1.0 + 1.0e-13) fudge factor accounts for possible rounding errors when # t+dt=next_output_time. @@ -1199,10 +1214,25 @@ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, t_params.failure_caused_by[max_error_variable_index] += 1 end - # If we were trying to take a step to the output timestep, dt will be smaller on - # the re-try, so will not reach the output time. - t_params.step_to_moments_output[] = false - t_params.step_to_dfns_output[] = false + if t_params.exact_output_times + # If we were trying to take a step to the output timestep, dt will be smaller on + # the re-try, so will not reach the output time. + t_params.step_to_moments_output[] = false + t_params.step_to_dfns_output[] = false + else + # If with the reduced dt the step will not pass the next output time, + # deactivate step_to_*_output[]. + if (t_params.step_to_moments_output[] + && t_params.t[] + t_params.previous_dt[] + t_params.dt[] < + t_params.moments_output_times[t_params.moments_output_counter[]]) + t_params.step_to_moments_output[] = false + end + if (t_params.step_to_dfns_output[] + && t_params.t[] + t_params.previous_dt[] + t_params.dt[] < + t_params.dfns_output_times[t_params.dfns_output_counter[]]) + t_params.step_to_dfns_output[] = false + end + end #println("t=$t, timestep failed, error_norm=$(error_norm[]), error_norms=$error_norms, decreasing timestep to ", t_params.dt[]) else @@ -1211,9 +1241,11 @@ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, t_params.previous_dt[] = t_params.dt[] if t_params.step_to_moments_output[] || t_params.step_to_dfns_output[] - # Completed an output step, reset dt to what it was before it was reduced to reach - # the output time - t_params.dt[] = t_params.dt_before_output[] + if !t_params.exact_output_times + # Completed an output step, reset dt to what it was before it was reduced to reach + # the output time + t_params.dt[] = t_params.dt_before_output[] + end if t_params.step_to_moments_output[] t_params.step_to_moments_output[] = false @@ -1227,7 +1259,8 @@ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, if t_params.dt[] > CFL_limit[] t_params.dt[] = CFL_limit[] end - else + end + if !t_params.exact_output_times || !(t_params.write_moments_output[] || t_params.write_dfns_output[]) # Adjust timestep according to Fehlberg's suggestion # (https://en.wikipedia.org/wiki/Runge%E2%80%93Kutta%E2%80%93Fehlberg_method). # `step_update_prefactor` is a constant numerical factor to make the estimate @@ -1337,7 +1370,9 @@ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, && (current_time + t_params.dt[] >= t_params.moments_output_times[t_params.moments_output_counter[]])) t_params.dt_before_output[] = current_dt - t_params.dt[] = t_params.moments_output_times[t_params.moments_output_counter[]] - current_time + if t_params.exact_output_times + t_params.dt[] = t_params.moments_output_times[t_params.moments_output_counter[]] - current_time + end t_params.step_to_moments_output[] = true if t_params.dt[] < 0.0 @@ -1352,7 +1387,9 @@ function adaptive_timestep_update_t_params!(t_params, CFL_limits, error_norms, && (current_time + t_params.dt[] >= t_params.dfns_output_times[t_params.dfns_output_counter[]])) t_params.dt_before_output[] = current_dt - t_params.dt[] = t_params.dfns_output_times[t_params.dfns_output_counter[]] - current_time + if t_params.exact_output_times + t_params.dt[] = t_params.dfns_output_times[t_params.dfns_output_counter[]] - current_time + end t_params.step_to_dfns_output[] = true if t_params.dt[] < 0.0 diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index 8f2d7095b..d2c0e295a 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -355,7 +355,7 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, end_time = mk_float(code_time + t_input["dt"] * t_input["nstep"]) epsilon = 1.e-11 - if adaptive || t_input["write_after_fixed_step_count"] + if adaptive && !t_input["write_after_fixed_step_count"] if t_input["nwrite"] == 0 moments_output_times = [end_time] else @@ -483,11 +483,12 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, step_to_moments_output, step_to_dfns_output, write_moments_output, write_dfns_output, Ref(0), Ref(0), Ref{mk_float}(0.0), Ref(0), Ref(0), Ref(0), mk_int[], mk_int[], t_input["nwrite"], - t_input["nwrite_dfns"], moments_output_times, dfns_output_times, - t_input["type"], rk_coefs, rk_coefs_implicit, - implicit_coefficient_is_zero, n_rk_stages, rk_order, adaptive, - low_storage, mk_float(t_input["rtol"]), mk_float(t_input["atol"]), - mk_float(t_input["atol_upar"]), + t_input["nwrite_dfns"], + electron !== nothing && t_input["exact_output_times"], + moments_output_times, dfns_output_times, t_input["type"], rk_coefs, + rk_coefs_implicit, implicit_coefficient_is_zero, n_rk_stages, + rk_order, adaptive, low_storage, mk_float(t_input["rtol"]), + mk_float(t_input["atol"]), mk_float(t_input["atol_upar"]), mk_float(t_input["step_update_prefactor"]), mk_float(t_input["max_increase_factor"]), mk_float(t_input["max_increase_factor_near_last_fail"]), @@ -1878,9 +1879,21 @@ function time_advance!(pdf, scratch, scratch_implicit, scratch_electron, t_para end if write_moments t_params.moments_output_counter[] += 1 + if !t_params.exact_output_times + while (t_params.moments_output_counter[] ≤ length(t_params.moments_output_times) + && t_params.moments_output_times[t_params.moments_output_counter[]] ≤ t_params.t[]) + t_params.moments_output_counter[] += 1 + end + end end if write_dfns t_params.dfns_output_counter[] += 1 + if !t_params.exact_output_times + while (t_params.dfns_output_counter[] ≤ length(t_params.dfns_output_times) + && t_params.dfns_output_times[t_params.dfns_output_counter[]] ≤ t_params.t[]) + t_params.dfns_output_counter[] += 1 + end + end end if write_moments || write_dfns || finish_now diff --git a/moment_kinetics/test/braginskii_electrons_imex_tests.jl b/moment_kinetics/test/braginskii_electrons_imex_tests.jl index 1104271f3..fb91dfc3c 100644 --- a/moment_kinetics/test/braginskii_electrons_imex_tests.jl +++ b/moment_kinetics/test/braginskii_electrons_imex_tests.jl @@ -68,6 +68,7 @@ test_input = OptionsDict( "composition" => OptionsDict("n_ion_species" => 1, "minimum_dt" => 1.e-7, "rtol" => 1.0e-7, "nwrite" => 10000, + "exact_output_times" => true, "high_precision_error_sum" => true), "nonlinear_solver" => OptionsDict("nonlinear_max_iterations" => 100), "r" => OptionsDict("ngrid" => 1, diff --git a/moment_kinetics/test/recycling_fraction_tests.jl b/moment_kinetics/test/recycling_fraction_tests.jl index 04d289637..77b14fc2b 100644 --- a/moment_kinetics/test/recycling_fraction_tests.jl +++ b/moment_kinetics/test/recycling_fraction_tests.jl @@ -169,6 +169,11 @@ test_input_adaptive_split3["timestepping"] = recursive_merge(test_input_adaptive "minimum_dt" => 1.0e-7, "step_update_prefactor" => 0.064)) +# Test exact_output_times option in full-f/split1/split2 cases +test_input_adaptive["timestepping"]["exact_output_times"] = true +test_input_adaptive_split1["timestepping"]["exact_output_times"] = true +test_input_adaptive_split2["timestepping"]["exact_output_times"] = true + """ Run a test for a single set of parameters """ @@ -341,14 +346,14 @@ function runtests() @testset "Adaptive timestep - split 3" begin test_input_adaptive_split3["output"]["base_directory"] = test_output_directory run_test(test_input_adaptive_split3, - [-0.034623352735472034, -0.03200541773193755, -0.02714032291656093, - -0.020924986472905527, -0.01015057042512689, 0.0027893133203071574, - 0.012837899470698978, 0.022096372980618853, 0.0330348469665054, - 0.041531828755231016, 0.045382106043818246, 0.046246244563868354, - 0.042551970615727366, 0.034815169767529956, 0.027080688565416164, - 0.017886490800418996, 0.004784403555306537, -0.007762152788142663, - -0.01629330539573498, -0.02413421820486561, -0.0315621379076817, - -0.03416924694766477], rtol=6.0e-4, atol=2.0e-12) + [-0.0346196925024167, -0.03200201693849987, -0.02713764319615098, + -0.02092311349672712, -0.010150026206894121, 0.0027883420935253572, + 0.012835791449600767, 0.02209326318113659, 0.03303078703903627, + 0.04152829640863164, 0.04538051487359227, 0.04624543438581702, + 0.04254876799453081, 0.03481104153755928, 0.027077084096581314, + 0.01788382934269672, 0.00478320487966262, -0.0077618876322877485, + -0.016292009420807548, -0.024131976958124225, -0.031559093785483404, + -0.0341657304695615], rtol=6.0e-4, atol=2.0e-12) end @long @testset "Check other timestep - $type" for From ae1645870882cf1aa6e1b304f45ca222030db585 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Thu, 28 Nov 2024 13:55:35 +0000 Subject: [PATCH 40/43] Move exact_output_times to different position in time_info struct This seems to avoid compiler errors on macOS. No idea why this should help, or what the original problem was! --- moment_kinetics/src/input_structs.jl | 2 +- moment_kinetics/src/time_advance.jl | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/moment_kinetics/src/input_structs.jl b/moment_kinetics/src/input_structs.jl index dfc80b3f4..047167195 100644 --- a/moment_kinetics/src/input_structs.jl +++ b/moment_kinetics/src/input_structs.jl @@ -57,7 +57,6 @@ struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero limit_caused_by::Vector{mk_int} nwrite_moments::mk_int nwrite_dfns::mk_int - exact_output_times::Bool moments_output_times::Vector{mk_float} dfns_output_times::Vector{mk_float} type::String @@ -66,6 +65,7 @@ struct time_info{Terrorsum <: Real, T_debug_output, T_electron, Trkimp, Timpzero implicit_coefficient_is_zero::Timpzero n_rk_stages::mk_int rk_order::mk_int + exact_output_times::Bool adaptive::Bool low_storage::Bool rtol::mk_float diff --git a/moment_kinetics/src/time_advance.jl b/moment_kinetics/src/time_advance.jl index d2c0e295a..d579c7461 100644 --- a/moment_kinetics/src/time_advance.jl +++ b/moment_kinetics/src/time_advance.jl @@ -483,12 +483,12 @@ function setup_time_info(t_input, n_variables, code_time, dt_reload, step_to_moments_output, step_to_dfns_output, write_moments_output, write_dfns_output, Ref(0), Ref(0), Ref{mk_float}(0.0), Ref(0), Ref(0), Ref(0), mk_int[], mk_int[], t_input["nwrite"], - t_input["nwrite_dfns"], - electron !== nothing && t_input["exact_output_times"], - moments_output_times, dfns_output_times, t_input["type"], rk_coefs, - rk_coefs_implicit, implicit_coefficient_is_zero, n_rk_stages, - rk_order, adaptive, low_storage, mk_float(t_input["rtol"]), - mk_float(t_input["atol"]), mk_float(t_input["atol_upar"]), + t_input["nwrite_dfns"], moments_output_times, dfns_output_times, + t_input["type"], rk_coefs, rk_coefs_implicit, + implicit_coefficient_is_zero, n_rk_stages, rk_order, + electron !== nothing && t_input["exact_output_times"], adaptive, + low_storage, mk_float(t_input["rtol"]), mk_float(t_input["atol"]), + mk_float(t_input["atol_upar"]), mk_float(t_input["step_update_prefactor"]), mk_float(t_input["max_increase_factor"]), mk_float(t_input["max_increase_factor_near_last_fail"]), From 7ad038ef75f31626d2baedc21264e969275d1bf2 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Tue, 3 Dec 2024 12:18:33 +0000 Subject: [PATCH 41/43] Skip Jacobian matrix tests in macOS parallel tests CI job This test is extremely slow when run in parallel on macOS (maybe the macOS servers on Github Actions don't have enough memory?), so skip it in this case to avoid test failures. --- moment_kinetics/test/jacobian_matrix_tests.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/moment_kinetics/test/jacobian_matrix_tests.jl b/moment_kinetics/test/jacobian_matrix_tests.jl index 9bae8296c..b699aa563 100644 --- a/moment_kinetics/test/jacobian_matrix_tests.jl +++ b/moment_kinetics/test/jacobian_matrix_tests.jl @@ -10,6 +10,7 @@ using moment_kinetics.array_allocation: allocate_shared_float using moment_kinetics.boundary_conditions: enforce_v_boundary_condition_local!, enforce_vperp_boundary_condition! using moment_kinetics.calculus: derivative! +using moment_kinetics.communication using moment_kinetics.derivatives: derivative_z!, derivative_z_pdf_vpavperpz! using moment_kinetics.electron_fluid_equations: calculate_electron_qpar_from_pdf_no_r!, electron_energy_equation_no_r!, @@ -3534,6 +3535,11 @@ function test_electron_kinetic_equation(test_input; rtol=(5.0e2*epsilon)^2) end function runtests() + if Sys.isapple() && "CI" ∈ keys(ENV) && global_size[] > 1 + # These tests are too slow in the parallel tests job on macOS, so skip in that + # case. + return nothing + end # Create a temporary directory for test output test_output_directory = get_MPI_tempdir() test_input["output"]["base_directory"] = test_output_directory From 940ca4a8a89e99c6f4952e0125c8da24fa681fc5 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 4 Dec 2024 20:21:19 +0000 Subject: [PATCH 42/43] Make kinetic electron test use shared-memory, and ADI, when possible --- .../test/kinetic_electron_tests.jl | 216 ++++++------------ 1 file changed, 75 insertions(+), 141 deletions(-) diff --git a/moment_kinetics/test/kinetic_electron_tests.jl b/moment_kinetics/test/kinetic_electron_tests.jl index 63c6b6ee2..0b0907c58 100644 --- a/moment_kinetics/test/kinetic_electron_tests.jl +++ b/moment_kinetics/test/kinetic_electron_tests.jl @@ -92,7 +92,14 @@ boltzmann_input = OptionsDict( ) # Test use distributed-memory when possible -boltzmann_input["z"]["nelement_local"] = boltzmann_input["z"]["nelement"] ÷ gcd(boltzmann_input["z"]["nelement"], global_size[]) +if global_size[] % 2 == 0 + # Divide by 2 so that we use shared memory when running in parallel, and so test the + # ADI preconditioner. + procs_to_divide_by = global_size[] ÷ 2 +else + procs_to_divide_by = global_size[] +end +boltzmann_input["z"]["nelement_local"] = boltzmann_input["z"]["nelement"] ÷ gcd(boltzmann_input["z"]["nelement"], procs_to_divide_by) kinetic_input = deepcopy(boltzmann_input) kinetic_input["output"]["run_name"] = "kinetic_electron_test" @@ -180,146 +187,73 @@ function run_test() close_run_info(run_info) # Regression test - # Benchmark data generated in serial on Linux - if global_size[] == 1 - # Serial solves use LU preconditioner - expected_Ez = [-0.5990683230706185 -1.1053138725180998; - -0.4944296396481284 -0.9819332128466166; - -0.30889032954504736 -0.6745656961983237; - -0.2064830747303776 -0.4459531272930669; - -0.21232457328748663 -0.4253218487528007; - -0.18233875912042674 -0.3596054334022437; - -0.16711429522309232 -0.3021381799340685; - -0.16920776495088916 -0.2784335484692499; - -0.1629417555658927 -0.2612551389558109; - -0.16619150334079993 -0.2574841927015592; - -0.15918194883360942 -0.23740132549636406; - -0.14034706409006803 -0.20534503972256973; - -0.12602184032280567 -0.1827098539044343; - -0.10928716440800472 -0.1582133200686042; - -0.07053969674257217 -0.10145491369831482; - -0.0249577746169536 -0.03585934915825971; - -2.8327303308330514e-15 3.742211718942586e-14; - 0.024957774616960776 0.03585934915827381; - 0.07053969674257636 0.10145491369829167; - 0.10928716440799909 0.15821332006862954; - 0.1260218403227975 0.18270985390445083; - 0.1403470640900294 0.20534503972250218; - 0.1591819488336015 0.23740132549634094; - 0.16619150334082114 0.2574841927015898; - 0.16294175556587748 0.261255138955811; - 0.16920776495090983 0.2784335484692798; - 0.1671142952230893 0.3021381799340713; - 0.1823387591204167 0.3596054334022252; - 0.21232457328753865 0.4253218487528467; - 0.20648307473037922 0.44595312729305947; - 0.3088903295450278 0.6745656961983009; - 0.4944296396481271 0.9819332128466268; - 0.5990683230705801 1.1053138725180645] - expected_vthe = [22.654024448490784 22.494016350356883; - 23.744503682730446 23.61361063067715; - 25.26061134578617 25.173128418725682; - 26.177253875120066 26.122412383901523; - 26.510545637302872 26.47158368991228; - 26.798827552847246 26.77429043464489; - 27.202535498354287 27.2038739551587; - 27.506373594650846 27.529813468465488; - 27.631027625644876 27.664719606410365; - 27.750902611036295 27.793759280909274; - 27.935780521313532 27.992775960575692; - 28.089380398280714 28.157198480516957; - 28.15152314377127 28.223553488629253; - 28.211115085781678 28.2870195116558; - 28.28856778918977 28.369130039283018; - 28.330972960680672 28.41411592647979; - 28.33351348538364 28.416680586218863; - 28.330972960680675 28.41411592647976; - 28.288567789189763 28.369130039283064; - 28.211115085781678 28.287019511655785; - 28.15152314377127 28.223553488629236; - 28.089380398280724 28.157198480516957; - 27.93578052131354 27.992775960575713; - 27.750902611036295 27.79375928090935; - 27.63102762564488 27.664719606410383; - 27.506373594650853 27.529813468465495; - 27.202535498354287 27.2038739551587; - 26.79882755284725 26.774290434644872; - 26.510545637302886 26.471583689912283; - 26.177253875120083 26.122412383901523; - 25.26061134578619 25.173128418725696; - 23.744503682730446 23.613610630677236; - 22.65402444849082 22.494016350356937] - else - # Parallel solves, which here use only shared-memory parallelism, use the ADI - # preconditioner, which should be as accurate, but may give different results - # within Newton-Krylov tolerances. - expected_Ez = [-0.5990683230706185 -1.1053137071260657; - -0.4944296396481284 -0.9819330928307715; - -0.30889032954504736 -0.6745656725019216; - -0.2064830747303776 -0.44595313784207047; - -0.21232457328748663 -0.425321828548; - -0.18233875912042674 -0.3596054340570364; - -0.16711429522309232 -0.30213818089568956; - -0.16920776495088916 -0.27843354821637; - -0.1629417555658927 -0.2612551385019989; - -0.16619150334079993 -0.2574841930766524; - -0.15918194883360942 -0.23740132557788143; - -0.14034706409006803 -0.20534504018275174; - -0.12602184032280567 -0.18270985430997166; - -0.10928716440800472 -0.1582133189704785; - -0.07053969674257217 -0.101454914566153; - -0.0249577746169536 -0.035859347929368034; - -2.8327303308330514e-15 -4.536628997349189e-9; - 0.024957774616960776 0.035859348624052545; - 0.07053969674257636 0.10145491474282464; - 0.10928716440799909 0.15821331955573922; - 0.1260218403227975 0.18270985667178208; - 0.1403470640900294 0.2053450392202274; - 0.1591819488336015 0.23740132578753803; - 0.16619150334082114 0.25748419283426127; - 0.16294175556587748 0.2612551396310432; - 0.16920776495090983 0.2784335479625835; - 0.1671142952230893 0.3021381809909585; - 0.1823387591204167 0.35960543399747075; - 0.21232457328753865 0.4253218286915096; - 0.20648307473037922 0.44595313782295487; - 0.3088903295450278 0.6745656725300222; - 0.4944296396481271 0.9819330927685747; - 0.5990683230705801 1.1053137082172033] - expected_vthe = [22.654024454479018 22.494016869931663; - 23.74450367962989 23.61361086266046; - 25.260611341892094 25.173128419566062; - 26.17725387357487 26.122412390676395; - 26.510545632956767 26.47158369227529; - 26.7988275507785 26.774290427357606; - 27.20253549703805 27.20387395613098; - 27.506373594719115 27.529813465559865; - 27.63102762567087 27.6647196112545; - 27.75090260968854 27.79375927764987; - 27.935780521822277 27.992775962652605; - 28.08938039775227 28.157198478502867; - 28.151523156278788 28.223553495610926; - 28.211115080270424 28.28701950947455; - 28.288567793141777 28.369130040934596; - 28.330972955353705 28.414115925374524; - 28.333513456094945 28.41668058720323; - 28.330972961606466 28.414115929999316; - 28.288567792143006 28.369130041232697; - 28.211115083430062 28.287019512466056; - 28.15152314952673 28.223553491119628; - 28.089380398299795 28.157198479157458; - 27.93578052229754 27.99277596224337; - 27.750902609816293 27.79375927871885; - 27.631027625671482 27.664719609967122; - 27.50637359506551 27.52981346582775; - 27.20253549697429 27.203873955958308; - 26.798827550864885 26.77429042759387; - 26.510545632587316 26.471583691722795; - 26.177253873758893 26.122412390844207; - 25.26061134158348 25.17312841929966; - 23.7445036798294 23.613610862832093; - 22.654024453873603 22.494016869407307] - end + # Benchmark data generated in serial on Linux with the LU preconditioner + expected_Ez = [-0.5990683230706185 -1.1053138725180998; + -0.4944296396481284 -0.9819332128466166; + -0.30889032954504736 -0.6745656961983237; + -0.2064830747303776 -0.4459531272930669; + -0.21232457328748663 -0.4253218487528007; + -0.18233875912042674 -0.3596054334022437; + -0.16711429522309232 -0.3021381799340685; + -0.16920776495088916 -0.2784335484692499; + -0.1629417555658927 -0.2612551389558109; + -0.16619150334079993 -0.2574841927015592; + -0.15918194883360942 -0.23740132549636406; + -0.14034706409006803 -0.20534503972256973; + -0.12602184032280567 -0.1827098539044343; + -0.10928716440800472 -0.1582133200686042; + -0.07053969674257217 -0.10145491369831482; + -0.0249577746169536 -0.03585934915825971; + -2.8327303308330514e-15 3.742211718942586e-14; + 0.024957774616960776 0.03585934915827381; + 0.07053969674257636 0.10145491369829167; + 0.10928716440799909 0.15821332006862954; + 0.1260218403227975 0.18270985390445083; + 0.1403470640900294 0.20534503972250218; + 0.1591819488336015 0.23740132549634094; + 0.16619150334082114 0.2574841927015898; + 0.16294175556587748 0.261255138955811; + 0.16920776495090983 0.2784335484692798; + 0.1671142952230893 0.3021381799340713; + 0.1823387591204167 0.3596054334022252; + 0.21232457328753865 0.4253218487528467; + 0.20648307473037922 0.44595312729305947; + 0.3088903295450278 0.6745656961983009; + 0.4944296396481271 0.9819332128466268; + 0.5990683230705801 1.1053138725180645] + expected_vthe = [22.654024448490784 22.494016350356883; + 23.744503682730446 23.61361063067715; + 25.26061134578617 25.173128418725682; + 26.177253875120066 26.122412383901523; + 26.510545637302872 26.47158368991228; + 26.798827552847246 26.77429043464489; + 27.202535498354287 27.2038739551587; + 27.506373594650846 27.529813468465488; + 27.631027625644876 27.664719606410365; + 27.750902611036295 27.793759280909274; + 27.935780521313532 27.992775960575692; + 28.089380398280714 28.157198480516957; + 28.15152314377127 28.223553488629253; + 28.211115085781678 28.2870195116558; + 28.28856778918977 28.369130039283018; + 28.330972960680672 28.41411592647979; + 28.33351348538364 28.416680586218863; + 28.330972960680675 28.41411592647976; + 28.288567789189763 28.369130039283064; + 28.211115085781678 28.287019511655785; + 28.15152314377127 28.223553488629236; + 28.089380398280724 28.157198480516957; + 27.93578052131354 27.992775960575713; + 27.750902611036295 27.79375928090935; + 27.63102762564488 27.664719606410383; + 27.506373594650853 27.529813468465495; + 27.202535498354287 27.2038739551587; + 26.79882755284725 26.774290434644872; + 26.510545637302886 26.471583689912283; + 26.177253875120083 26.122412383901523; + 25.26061134578619 25.173128418725696; + 23.744503682730446 23.613610630677236; + 22.65402444849082 22.494016350356937] if expected_Ez == nothing # Error: no expected input provided From bda160b2dd7fcba9f29a42ff05136ba8411cb97a Mon Sep 17 00:00:00 2001 From: John Omotani Date: Wed, 4 Dec 2024 19:57:59 +0000 Subject: [PATCH 43/43] Communicate block boundary points after each ADI iteration This might help to remove some of the performance loss due to inverting the preconditioner separately in each distributed-MPI block. --- .../src/electron_kinetic_equation.jl | 83 +++--- .../test/kinetic_electron_tests.jl | 252 ++++++++++-------- 2 files changed, 182 insertions(+), 153 deletions(-) diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl index fad344e69..49b5e81fb 100644 --- a/moment_kinetics/src/electron_kinetic_equation.jl +++ b/moment_kinetics/src/electron_kinetic_equation.jl @@ -1245,6 +1245,50 @@ global_rank[] == 0 && println("recalculating precon") v_size = vperp.n * vpa.n pdf_size = z.n * v_size + # Use these views to communicate block-boundary points + output_buffer_pdf_view = reshape(@view(this_output_buffer[1:pdf_size]), size(precon_f)) + output_buffer_ppar_view = @view(this_output_buffer[pdf_size+1:end]) + f_lower_endpoints = @view scratch_dummy.buffer_vpavperpr_1[:,:,ir] + f_upper_endpoints = @view scratch_dummy.buffer_vpavperpr_2[:,:,ir] + receive_buffer1 = @view scratch_dummy.buffer_vpavperpr_3[:,:,ir] + receive_buffer2 = @view scratch_dummy.buffer_vpavperpr_4[:,:,ir] + + function adi_communicate_boundary_points() + # Ensure values of precon_f and precon_ppar are consistent across + # distributed-MPI block boundaries. For precon_f take the upwind + # value, and for precon_ppar take the average. + begin_vperp_vpa_region() + @loop_vperp_vpa ivperp ivpa begin + f_lower_endpoints[ivpa,ivperp] = output_buffer_pdf_view[ivpa,ivperp,1] + f_upper_endpoints[ivpa,ivperp] = output_buffer_pdf_view[ivpa,ivperp,end] + end + # We upwind the z-derivatives in `electron_z_advection!()`, so would + # expect that upwinding the results here in z would make sense. + # However, upwinding here makes convergence much slower (~10x), + # compared to picking the values from one side or other of the block + # boundary, or taking the average of the values on either side. + # Neither direction is special, so taking the average seems most + # sensible (although in an intial test it does not seem to converge + # faster than just picking one or the other). + # Maybe this could indicate that it is more important to have a fully + # self-consistent Jacobian inversion for the + # `electron_vpa_advection()` part rather than taking half(ish) of the + # values from one block and the other half(ish) from the other. + reconcile_element_boundaries_MPI_z_pdf_vpavperpz!( + output_buffer_pdf_view, f_lower_endpoints, f_upper_endpoints, receive_buffer1, + receive_buffer2, z) + + begin_serial_region() + @serial_region begin + buffer_1[] = output_buffer_ppar_view[1] + buffer_2[] = output_buffer_ppar_view[end] + end + reconcile_element_boundaries_MPI!( + output_buffer_ppar_view, buffer_1, buffer_2, buffer_3, buffer_4, z) + + return nothing + end + begin_z_vperp_vpa_region() @loop_z_vperp_vpa iz ivperp ivpa begin row = (iz - 1)*v_size + (ivperp - 1)*vpa.n + ivpa @@ -1325,12 +1369,15 @@ global_rank[] == 0 && println("recalculating precon") first_adi_v_solve!() fill_intermediate_buffer!() adi_z_solve!() + adi_communicate_boundary_points() + for n ∈ 1:n_extra_iterations precon_iterations[] += 1 fill_intermediate_buffer!() adi_v_solve!() fill_intermediate_buffer!() adi_z_solve!() + adi_communicate_boundary_points() end # Unpack preconditioner solution @@ -1345,42 +1392,6 @@ global_rank[] == 0 && println("recalculating precon") precon_ppar[iz] = this_output_buffer[row] end - # Ensure values of precon_f and precon_ppar are consistent across - # distributed-MPI block boundaries. For precon_f take the upwind - # value, and for precon_ppar take the average. - f_lower_endpoints = @view scratch_dummy.buffer_vpavperpr_1[:,:,ir] - f_upper_endpoints = @view scratch_dummy.buffer_vpavperpr_2[:,:,ir] - receive_buffer1 = @view scratch_dummy.buffer_vpavperpr_3[:,:,ir] - receive_buffer2 = @view scratch_dummy.buffer_vpavperpr_4[:,:,ir] - begin_vperp_vpa_region() - @loop_vperp_vpa ivperp ivpa begin - f_lower_endpoints[ivpa,ivperp] = precon_f[ivpa,ivperp,1] - f_upper_endpoints[ivpa,ivperp] = precon_f[ivpa,ivperp,end] - end - # We upwind the z-derivatives in `electron_z_advection!()`, so would - # expect that upwinding the results here in z would make sense. - # However, upwinding here makes convergence much slower (~10x), - # compared to picking the values from one side or other of the block - # boundary, or taking the average of the values on either side. - # Neither direction is special, so taking the average seems most - # sensible (although in an intial test it does not seem to converge - # faster than just picking one or the other). - # Maybe this could indicate that it is more important to have a fully - # self-consistent Jacobian inversion for the - # `electron_vpa_advection()` part rather than taking half(ish) of the - # values from one block and the other half(ish) from the other. - reconcile_element_boundaries_MPI_z_pdf_vpavperpz!( - precon_f, f_lower_endpoints, f_upper_endpoints, receive_buffer1, - receive_buffer2, z) - - begin_serial_region() - @serial_region begin - buffer_1[] = precon_ppar[1] - buffer_2[] = precon_ppar[end] - end - reconcile_element_boundaries_MPI!( - precon_ppar, buffer_1, buffer_2, buffer_3, buffer_4, z) - return nothing end diff --git a/moment_kinetics/test/kinetic_electron_tests.jl b/moment_kinetics/test/kinetic_electron_tests.jl index 0b0907c58..60ef83ae6 100644 --- a/moment_kinetics/test/kinetic_electron_tests.jl +++ b/moment_kinetics/test/kinetic_electron_tests.jl @@ -153,130 +153,148 @@ function run_test() run_moment_kinetics(this_boltzmann_input) end - for (this_kinetic_input, label, tol) ∈ ((deepcopy(kinetic_input), "", 1.0e-6), - (deepcopy(kinetic_input_adaptive_timestep), "adaptive timestep", 1.0e-4)) - # Provide some progress info - println(" - testing kinetic electrons $label") + if ("nelement_local" ∈ keys(kinetic_input["z"]) + && kinetic_input["z"]["nelement"] ÷ kinetic_input["z"]["nelement_local"] < global_size[] + ) + # Using shared-memory parallelism, so should be using ADI preconditioner + adi_precon_iterations_values = (1,2) + else + adi_precon_iterations_values = -1 + end + for (this_kinetic_input, label, tol) ∈ ((deepcopy(kinetic_input), "", 1.0e-6), + (deepcopy(kinetic_input_adaptive_timestep), ", adaptive timestep", 1.0e-4)) this_kinetic_input["output"]["base_directory"] = test_output_directory - # Suppress console output while running. - quietoutput() do - restart_from_directory = joinpath(this_boltzmann_input["output"]["base_directory"], this_boltzmann_input["output"]["run_name"]) - restart_from_file_pattern = this_boltzmann_input["output"]["run_name"] * ".dfns*.h5" - restart_from_file = glob(restart_from_file_pattern, restart_from_directory)[1] - - # run kinetic electron simulation - run_moment_kinetics(this_kinetic_input; restart=restart_from_file) - end - - if global_rank[] == 0 - # Load and analyse output - ######################### - - path = joinpath(realpath(this_kinetic_input["output"]["base_directory"]), this_kinetic_input["output"]["run_name"]) - - # open the output file(s) - run_info = get_run_info_no_setup(path, dfns=true) - - # load fields data - Ez = postproc_load_variable(run_info, "Ez")[:,1,:] - vthe = postproc_load_variable(run_info, "electron_thermal_speed")[:,1,:] - electron_advance_linear_iterations = postproc_load_variable(run_info, "electron_advance_linear_iterations")[end] - - close_run_info(run_info) - - # Regression test - # Benchmark data generated in serial on Linux with the LU preconditioner - expected_Ez = [-0.5990683230706185 -1.1053138725180998; - -0.4944296396481284 -0.9819332128466166; - -0.30889032954504736 -0.6745656961983237; - -0.2064830747303776 -0.4459531272930669; - -0.21232457328748663 -0.4253218487528007; - -0.18233875912042674 -0.3596054334022437; - -0.16711429522309232 -0.3021381799340685; - -0.16920776495088916 -0.2784335484692499; - -0.1629417555658927 -0.2612551389558109; - -0.16619150334079993 -0.2574841927015592; - -0.15918194883360942 -0.23740132549636406; - -0.14034706409006803 -0.20534503972256973; - -0.12602184032280567 -0.1827098539044343; - -0.10928716440800472 -0.1582133200686042; - -0.07053969674257217 -0.10145491369831482; - -0.0249577746169536 -0.03585934915825971; - -2.8327303308330514e-15 3.742211718942586e-14; - 0.024957774616960776 0.03585934915827381; - 0.07053969674257636 0.10145491369829167; - 0.10928716440799909 0.15821332006862954; - 0.1260218403227975 0.18270985390445083; - 0.1403470640900294 0.20534503972250218; - 0.1591819488336015 0.23740132549634094; - 0.16619150334082114 0.2574841927015898; - 0.16294175556587748 0.261255138955811; - 0.16920776495090983 0.2784335484692798; - 0.1671142952230893 0.3021381799340713; - 0.1823387591204167 0.3596054334022252; - 0.21232457328753865 0.4253218487528467; - 0.20648307473037922 0.44595312729305947; - 0.3088903295450278 0.6745656961983009; - 0.4944296396481271 0.9819332128466268; - 0.5990683230705801 1.1053138725180645] - expected_vthe = [22.654024448490784 22.494016350356883; - 23.744503682730446 23.61361063067715; - 25.26061134578617 25.173128418725682; - 26.177253875120066 26.122412383901523; - 26.510545637302872 26.47158368991228; - 26.798827552847246 26.77429043464489; - 27.202535498354287 27.2038739551587; - 27.506373594650846 27.529813468465488; - 27.631027625644876 27.664719606410365; - 27.750902611036295 27.793759280909274; - 27.935780521313532 27.992775960575692; - 28.089380398280714 28.157198480516957; - 28.15152314377127 28.223553488629253; - 28.211115085781678 28.2870195116558; - 28.28856778918977 28.369130039283018; - 28.330972960680672 28.41411592647979; - 28.33351348538364 28.416680586218863; - 28.330972960680675 28.41411592647976; - 28.288567789189763 28.369130039283064; - 28.211115085781678 28.287019511655785; - 28.15152314377127 28.223553488629236; - 28.089380398280724 28.157198480516957; - 27.93578052131354 27.992775960575713; - 27.750902611036295 27.79375928090935; - 27.63102762564488 27.664719606410383; - 27.506373594650853 27.529813468465495; - 27.202535498354287 27.2038739551587; - 26.79882755284725 26.774290434644872; - 26.510545637302886 26.471583689912283; - 26.177253875120083 26.122412383901523; - 25.26061134578619 25.173128418725696; - 23.744503682730446 23.613610630677236; - 22.65402444849082 22.494016350356937] - - if expected_Ez == nothing - # Error: no expected input provided - println("data tested would be: Ez=", Ez) - @test false + for adi_precon_iterations ∈ adi_precon_iterations_values + if adi_precon_iterations < 0 + # Provide some progress info + println(" - testing kinetic electrons $label") else - @test elementwise_isapprox(Ez, expected_Ez, rtol=0.0, atol=2.0*tol) + this_kinetic_input["nonlinear_solver"]["adi_precon_iterations"] = adi_precon_iterations + + # Provide some progress info + println(" - testing kinetic electrons $adi_precon_iterations ADI iterations$label") end - if expected_vthe == nothing - # Error: no expected input provided - println("data tested would be: vthe=", vthe) - @test false - else - @test elementwise_isapprox(vthe, expected_vthe, rtol=tol, atol=0.0) + + # Suppress console output while running. + quietoutput() do + restart_from_directory = joinpath(this_boltzmann_input["output"]["base_directory"], this_boltzmann_input["output"]["run_name"]) + restart_from_file_pattern = this_boltzmann_input["output"]["run_name"] * ".dfns*.h5" + restart_from_file = glob(restart_from_file_pattern, restart_from_directory)[1] + + # run kinetic electron simulation + run_moment_kinetics(this_kinetic_input; restart=restart_from_file) end - # Iteration counts are fairly inconsistent, but it's good to check that they at - # least don't unexpectedly increase by an order of magnitude. - # Expected iteration count is from a serial run on Linux. - expected_electron_advance_linear_iterations = 48716 - @test electron_advance_linear_iterations < 2 * expected_electron_advance_linear_iterations - if !(electron_advance_linear_iterations < 2 * expected_electron_advance_linear_iterations) - println("electron_advance_linear_iterations=$electron_advance_linear_iterations was greater than twice the expected $expected_electron_advance_linear_iterations.") + if global_rank[] == 0 + # Load and analyse output + ######################### + + path = joinpath(realpath(this_kinetic_input["output"]["base_directory"]), this_kinetic_input["output"]["run_name"]) + + # open the output file(s) + run_info = get_run_info_no_setup(path, dfns=true) + + # load fields data + Ez = postproc_load_variable(run_info, "Ez")[:,1,:] + vthe = postproc_load_variable(run_info, "electron_thermal_speed")[:,1,:] + electron_advance_linear_iterations = postproc_load_variable(run_info, "electron_advance_linear_iterations")[end] + + close_run_info(run_info) + + # Regression test + # Benchmark data generated in serial on Linux with the LU preconditioner + expected_Ez = [-0.5990683230706185 -1.1053138725180998; + -0.4944296396481284 -0.9819332128466166; + -0.30889032954504736 -0.6745656961983237; + -0.2064830747303776 -0.4459531272930669; + -0.21232457328748663 -0.4253218487528007; + -0.18233875912042674 -0.3596054334022437; + -0.16711429522309232 -0.3021381799340685; + -0.16920776495088916 -0.2784335484692499; + -0.1629417555658927 -0.2612551389558109; + -0.16619150334079993 -0.2574841927015592; + -0.15918194883360942 -0.23740132549636406; + -0.14034706409006803 -0.20534503972256973; + -0.12602184032280567 -0.1827098539044343; + -0.10928716440800472 -0.1582133200686042; + -0.07053969674257217 -0.10145491369831482; + -0.0249577746169536 -0.03585934915825971; + -2.8327303308330514e-15 3.742211718942586e-14; + 0.024957774616960776 0.03585934915827381; + 0.07053969674257636 0.10145491369829167; + 0.10928716440799909 0.15821332006862954; + 0.1260218403227975 0.18270985390445083; + 0.1403470640900294 0.20534503972250218; + 0.1591819488336015 0.23740132549634094; + 0.16619150334082114 0.2574841927015898; + 0.16294175556587748 0.261255138955811; + 0.16920776495090983 0.2784335484692798; + 0.1671142952230893 0.3021381799340713; + 0.1823387591204167 0.3596054334022252; + 0.21232457328753865 0.4253218487528467; + 0.20648307473037922 0.44595312729305947; + 0.3088903295450278 0.6745656961983009; + 0.4944296396481271 0.9819332128466268; + 0.5990683230705801 1.1053138725180645] + expected_vthe = [22.654024448490784 22.494016350356883; + 23.744503682730446 23.61361063067715; + 25.26061134578617 25.173128418725682; + 26.177253875120066 26.122412383901523; + 26.510545637302872 26.47158368991228; + 26.798827552847246 26.77429043464489; + 27.202535498354287 27.2038739551587; + 27.506373594650846 27.529813468465488; + 27.631027625644876 27.664719606410365; + 27.750902611036295 27.793759280909274; + 27.935780521313532 27.992775960575692; + 28.089380398280714 28.157198480516957; + 28.15152314377127 28.223553488629253; + 28.211115085781678 28.2870195116558; + 28.28856778918977 28.369130039283018; + 28.330972960680672 28.41411592647979; + 28.33351348538364 28.416680586218863; + 28.330972960680675 28.41411592647976; + 28.288567789189763 28.369130039283064; + 28.211115085781678 28.287019511655785; + 28.15152314377127 28.223553488629236; + 28.089380398280724 28.157198480516957; + 27.93578052131354 27.992775960575713; + 27.750902611036295 27.79375928090935; + 27.63102762564488 27.664719606410383; + 27.506373594650853 27.529813468465495; + 27.202535498354287 27.2038739551587; + 26.79882755284725 26.774290434644872; + 26.510545637302886 26.471583689912283; + 26.177253875120083 26.122412383901523; + 25.26061134578619 25.173128418725696; + 23.744503682730446 23.613610630677236; + 22.65402444849082 22.494016350356937] + + if expected_Ez == nothing + # Error: no expected input provided + println("data tested would be: Ez=", Ez) + @test false + else + @test elementwise_isapprox(Ez, expected_Ez, rtol=0.0, atol=2.0*tol) + end + if expected_vthe == nothing + # Error: no expected input provided + println("data tested would be: vthe=", vthe) + @test false + else + @test elementwise_isapprox(vthe, expected_vthe, rtol=tol, atol=0.0) + end + + # Iteration counts are fairly inconsistent, but it's good to check that they at + # least don't unexpectedly increase by an order of magnitude. + # Expected iteration count is from a serial run on Linux. + expected_electron_advance_linear_iterations = 48716 + @test electron_advance_linear_iterations < 2 * expected_electron_advance_linear_iterations + if !(electron_advance_linear_iterations < 2 * expected_electron_advance_linear_iterations) + println("electron_advance_linear_iterations=$electron_advance_linear_iterations was greater than twice the expected $expected_electron_advance_linear_iterations.") + end end end end