From 6b0e67b4ab81898c43a5ee86ce0ca66e470a768a Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 9 Jun 2024 12:27:22 +0100 Subject: [PATCH 1/8] Fix handling of processes not included in any 'anyv' sub-block The algorithm for deciding the size of shared-memory ranges may sometimes exclude a few processes, if this allows the number of grid points to be factorised in a more favourable way. When this happens to an 'anyv' sub-block, some extra special handling (introduced in this commit) is required to make sure that the excluded processes actually don't do anything. --- moment_kinetics/src/array_allocation.jl | 36 ++++++++++++++++++++++--- moment_kinetics/src/communication.jl | 19 ++++++++++++- moment_kinetics/src/looping.jl | 9 +++++-- 3 files changed, 57 insertions(+), 7 deletions(-) diff --git a/moment_kinetics/src/array_allocation.jl b/moment_kinetics/src/array_allocation.jl index 7dc3bc5d3..b265b6879 100644 --- a/moment_kinetics/src/array_allocation.jl +++ b/moment_kinetics/src/array_allocation.jl @@ -9,6 +9,8 @@ using ..communication using ..debugging @debug_initialize_NaN using ..communication: block_rank, _block_synchronize +using MPI + """ allocate array with dimensions given by dims and entries of type Bool """ @@ -55,14 +57,27 @@ function allocate_shared_float(dims...; comm=nothing) array = allocate_shared(mk_float, dims; comm=comm) @debug_initialize_NaN begin # Initialize as NaN to try and catch use of uninitialized values - if block_rank[] == 0 + if comm === nothing + comm_rank = block_rank[] + this_comm = comm_block[] + elseif comm == MPI.COMM_NULL + comm_rank = -1 + this_comm = nothing + else + # Get MPI.Comm_rank when comm is not nothing + comm_rank = MPI.Comm_rank(comm) + this_comm = comm + end + if comm_rank == 0 array .= NaN @debug_track_initialized begin # Track initialization as if the array was not initialized to NaN array.is_initialized .= false end end - _block_synchronize() + if this_comm !== nothing + MPI.Barrier(this_comm) + end end return array end @@ -85,14 +100,27 @@ function allocate_shared_complex(dims...; comm=nothing) array = allocate_shared(Complex{mk_float}, dims; comm=comm) @debug_initialize_NaN begin # Initialize as NaN to try and catch use of uninitialized values - if block_rank[] == 0 + if comm === nothing + comm_rank = block_rank[] + this_comm = comm_block[] + elseif comm == MPI.COMM_NULL + comm_rank = -1 + this_comm = nothing + else + # Get MPI.Comm_rank when comm is not nothing + comm_rank = MPI.Comm_rank(comm) + this_comm = comm + end + if comm_rank == 0 array .= NaN @debug_track_initialized begin # Track initialization as if the array was not initialized to NaN array.is_initialized .= false end end - _block_synchronize() + if this_comm !== nothing + MPI.Barrier(this_comm) + end end return array end diff --git a/moment_kinetics/src/communication.jl b/moment_kinetics/src/communication.jl index ee1b84f3a..d958a5683 100644 --- a/moment_kinetics/src/communication.jl +++ b/moment_kinetics/src/communication.jl @@ -100,7 +100,7 @@ const anyv_subblock_size = Ref{mk_int}() """ """ -const anyv_isubblock_index = Ref{mk_int}() +const anyv_isubblock_index = Ref{Union{mk_int,Nothing}}() """ """ @@ -567,6 +567,19 @@ Array{mk_float} function allocate_shared(T, dims; comm=nothing, maybe_debug=true) if comm === nothing comm = comm_block[] + elseif comm == MPI.COMM_NULL + # If `comm` is a null communicator (on this process), then this array is just a + # dummy that will not be used. + array = Array{T}(undef, (0 for _ ∈ dims)...) + + @debug_shared_array begin + # If @debug_shared_array is active, create DebugMPISharedArray instead of Array + if maybe_debug + array = DebugMPISharedArray(array, comm) + end + end + + return array end br = MPI.Comm_rank(comm) bs = MPI.Comm_size(comm) @@ -909,6 +922,10 @@ sub-blocks, depending on how the species and spatial dimensions are split up. `_anyv_subblock_synchronize()`. """ function _anyv_subblock_synchronize() + if comm_anyv_subblock[] == MPI.COMM_NULL + # No synchronization to do for a null communicator + return nothing + end MPI.Barrier(comm_anyv_subblock[]) diff --git a/moment_kinetics/src/looping.jl b/moment_kinetics/src/looping.jl index 8dd4b4c7a..a6c1ca880 100644 --- a/moment_kinetics/src/looping.jl +++ b/moment_kinetics/src/looping.jl @@ -471,14 +471,19 @@ eval(quote anyv_subblock_size[] = anyv_split[end] number_of_anyv_blocks = prod(anyv_split[1:end-1]) anyv_subblock_index = block_rank[] ÷ anyv_subblock_size[] - anyv_rank_within_subblock = block_rank[] % anyv_subblock_size[] + if anyv_subblock_index ≥ number_of_anyv_blocks + anyv_subblock_index = nothing + anyv_rank_within_subblock = -1 + else + anyv_rank_within_subblock = block_rank[] % anyv_subblock_size[] + end # Create communicator for the anyv subblock. OK to do this here as # communication.setup_distributed_memory_MPI() must have already been called # to set block_size[] and block_rank[] comm_anyv_subblock[] = MPI.Comm_split(comm_block[], anyv_subblock_index, anyv_rank_within_subblock) - anyv_subblock_rank[] = MPI.Comm_rank(comm_anyv_subblock[]) + anyv_subblock_rank[] = anyv_rank_within_subblock anyv_isubblock_index[] = anyv_subblock_index anyv_nsubblocks_per_block[] = number_of_anyv_blocks anyv_rank0 = (anyv_subblock_rank[] == 0) From 795adba8a9a53d36dd1d0d97aa8c3cc78062c265 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 9 Jun 2024 12:55:57 +0100 Subject: [PATCH 2/8] Fix debug features for anyv-subblock shared memory arrays --- moment_kinetics/src/communication.jl | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/moment_kinetics/src/communication.jl b/moment_kinetics/src/communication.jl index d958a5683..2f88c8cd9 100644 --- a/moment_kinetics/src/communication.jl +++ b/moment_kinetics/src/communication.jl @@ -531,6 +531,11 @@ end # instances, so their is_read and is_written members can be checked and # reset by _block_synchronize() const global_debugmpisharedarray_store = Vector{DebugMPISharedArray}(undef, 0) + # 'anyv' regions require a separate array store, because within an anyv region, + # processes in the same shared memory block may still not be synchronized if they are + # in different anyv sub-blocks, so debug checks within an anyv region should only + # consider the anyv-specific arrays. + const global_anyv_debugmpisharedarray_store = Vector{DebugMPISharedArray}(undef, 0) end """ @@ -646,7 +651,11 @@ function allocate_shared(T, dims; comm=nothing, maybe_debug=true) # If @debug_shared_array is active, create DebugMPISharedArray instead of Array if maybe_debug debug_array = DebugMPISharedArray(array, comm) - push!(global_debugmpisharedarray_store, debug_array) + if comm == comm_anyv_subblock[] + push!(global_anyv_debugmpisharedarray_store, debug_array) + else + push!(global_debugmpisharedarray_store, debug_array) + end return debug_array end end @@ -775,11 +784,17 @@ end Raises an error if any array has been accessed incorrectly since the previous call to _block_synchronize() - Can be added when debugging to help in down where an error occurs. + Can be added when debugging to help pin down where an error occurs. """ - function debug_check_shared_memory(; kwargs...) - for (arraynum, array) ∈ enumerate(global_debugmpisharedarray_store) - debug_check_shared_array(array; kwargs...) + function debug_check_shared_memory(; comm=comm_block[], kwargs...) + if comm == comm_anyv_subblock[] + for (arraynum, array) ∈ enumerate(global_anyv_debugmpisharedarray_store) + debug_check_shared_array(array; comm=comm, kwargs...) + end + else + for (arraynum, array) ∈ enumerate(global_debugmpisharedarray_store) + debug_check_shared_array(array; comm=comm, kwargs...) + end end return nothing end @@ -960,7 +975,7 @@ function _anyv_subblock_synchronize() # * If an element is written to, only the rank that writes to it should read it. # @debug_detect_redundant_block_synchronize previous_was_unnecessary = true - for (arraynum, array) ∈ enumerate(global_debugmpisharedarray_store) + for (arraynum, array) ∈ enumerate(global_anyv_debugmpisharedarray_store) debug_check_shared_array(array; comm=comm_anyv_subblock[]) @@ -1074,6 +1089,7 @@ end """ function free_shared_arrays() @debug_shared_array resize!(global_debugmpisharedarray_store, 0) + @debug_shared_array resize!(global_anyv_debugmpisharedarray_store, 0) for w ∈ global_Win_store MPI.free(w) From 9c3c446b1269d5b13fa191a41556496c950eac51 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sat, 8 Jun 2024 18:03:31 +0100 Subject: [PATCH 3/8] Fix initialisation and parallelisation in 2D_FEM_assembly_test.jl --- test_scripts/2D_FEM_assembly_test.jl | 41 +++++++++++++++------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/test_scripts/2D_FEM_assembly_test.jl b/test_scripts/2D_FEM_assembly_test.jl index 12e280648..a983e1c13 100644 --- a/test_scripts/2D_FEM_assembly_test.jl +++ b/test_scripts/2D_FEM_assembly_test.jl @@ -112,14 +112,14 @@ end println("made inputs") println("vpa: ngrid: ",ngrid," nelement: ",nelement_local_vpa, " Lvpa: ",Lvpa) println("vperp: ngrid: ",ngrid," nelement: ",nelement_local_vperp, " Lvperp: ",Lvperp) - vpa, vpa_spectral = define_coordinate(vpa_input) - vperp, vperp_spectral = define_coordinate(vperp_input) # Set up MPI if standalone initialize_comms!() end setup_distributed_memory_MPI(1,1,1,1) + vpa, vpa_spectral = define_coordinate(vpa_input) + vperp, vperp_spectral = define_coordinate(vperp_input) looping.setup_loop_ranges!(block_rank[], block_size[]; s=1, sn=1, r=1, z=1, vperp=vperp.n, vpa=vpa.n, @@ -229,23 +229,25 @@ end msp = 1.0 nussp = 1.0 begin_serial_region() - for ivperp in 1:vperp.n - for ivpa in 1:vpa.n - Fs_M[ivpa,ivperp] = F_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp) - F_M[ivpa,ivperp] = F_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) - H_M_exact[ivpa,ivperp] = H_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) - G_M_exact[ivpa,ivperp] = G_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) - d2Gdvpa2_M_exact[ivpa,ivperp] = d2Gdvpa2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) - d2Gdvperp2_M_exact[ivpa,ivperp] = d2Gdvperp2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) - dGdvperp_M_exact[ivpa,ivperp] = dGdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) - d2Gdvperpdvpa_M_exact[ivpa,ivperp] = d2Gdvperpdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) - dHdvpa_M_exact[ivpa,ivperp] = dHdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) - dHdvperp_M_exact[ivpa,ivperp] = dHdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) - C_M_exact[ivpa,ivperp] = Cssp_Maxwellian_inputs(denss,upars,vths,ms, - dens,upar,vth,msp, - nussp,vpa,vperp,ivpa,ivperp) + @serial_region begin + for ivperp in 1:vperp.n + for ivpa in 1:vpa.n + Fs_M[ivpa,ivperp] = F_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp) + F_M[ivpa,ivperp] = F_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) + H_M_exact[ivpa,ivperp] = H_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) + G_M_exact[ivpa,ivperp] = G_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) + d2Gdvpa2_M_exact[ivpa,ivperp] = d2Gdvpa2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) + d2Gdvperp2_M_exact[ivpa,ivperp] = d2Gdvperp2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) + dGdvperp_M_exact[ivpa,ivperp] = dGdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) + d2Gdvperpdvpa_M_exact[ivpa,ivperp] = d2Gdvperpdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) + dHdvpa_M_exact[ivpa,ivperp] = dHdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) + dHdvperp_M_exact[ivpa,ivperp] = dHdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) + C_M_exact[ivpa,ivperp] = Cssp_Maxwellian_inputs(denss,upars,vths,ms, + dens,upar,vth,msp, + nussp,vpa,vperp,ivpa,ivperp) + end end - end + end rpbd_exact = allocate_rosenbluth_potential_boundary_data(vpa,vperp) begin_s_r_z_anyv_region() @@ -280,7 +282,8 @@ end algebraic_solve_for_d2Gdvperp2=false,calculate_GG=true,calculate_dGdvperp=true) # extract C[Fs,Fs'] result # and Rosenbluth potentials for testing - begin_vperp_vpa_region() + begin_s_r_z_anyv_region() + begin_anyv_vperp_vpa_region() @loop_vperp_vpa ivperp ivpa begin C_M_num[ivpa,ivperp] = fkpl_arrays.CC[ivpa,ivperp] G_M_num[ivpa,ivperp] = fkpl_arrays.GG[ivpa,ivperp] From 6b98483765b1e83254ff592dab35123e9f824ec2 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Sun, 9 Jun 2024 14:20:10 +0100 Subject: [PATCH 4/8] Fix MPI path in macOS CI --- .github/workflows/parallel_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/parallel_test.yml b/.github/workflows/parallel_test.yml index 88fd99e63..4118bc28f 100644 --- a/.github/workflows/parallel_test.yml +++ b/.github/workflows/parallel_test.yml @@ -54,9 +54,9 @@ jobs: version: '1.10' - uses: julia-actions/cache@v1 - run: | - MPILIBPATH=$(find /opt/homebrew/Cellar/open-mpi/ -name libmpi.dylib) + export MPILIBPATH=$(find /opt/homebrew/Cellar/open-mpi/ -name libmpi.dylib) touch Project.toml - julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["MPI", "MPIPreferences"]); using MPIPreferences; MPIPreferences.use_system_binary(library_names="/opt/homebrew/Cellar/open-mpi/5.0.3/lib/libmpi.dylib")' + julia --project -O3 --check-bounds=no -e "import Pkg; Pkg.add([\"MPI\", \"MPIPreferences\"]); using MPIPreferences; MPIPreferences.use_system_binary(library_names=\"$MPILIBPATH\")" julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.add(["NCDatasets", "Random", "SpecialFunctions", "Test"]); Pkg.develop(path="moment_kinetics/")' julia --project -O3 --check-bounds=no -e 'import Pkg; Pkg.precompile()' # Need to use openmpi so that the following arguments work: From 2f9f5a64375b0f302cd895e0fa746652a6beb33a Mon Sep 17 00:00:00 2001 From: Michael Hardman Date: Sun, 9 Jun 2024 22:53:00 +0100 Subject: [PATCH 5/8] Move the position of define_coordinate() to remove MPI initialisation error mentioned in #221. --- moment_kinetics/test/fokker_planck_tests.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/moment_kinetics/test/fokker_planck_tests.jl b/moment_kinetics/test/fokker_planck_tests.jl index edfd0d4d4..63fdd01e6 100644 --- a/moment_kinetics/test/fokker_planck_tests.jl +++ b/moment_kinetics/test/fokker_planck_tests.jl @@ -53,12 +53,12 @@ function create_grids(ngrid,nelement_vpa,nelement_vperp; #println("made inputs") #println("vpa: ngrid: ",ngrid," nelement: ",nelement_local_vpa, " Lvpa: ",Lvpa) #println("vperp: ngrid: ",ngrid," nelement: ",nelement_local_vperp, " Lvperp: ",Lvperp) - vpa, vpa_spectral = define_coordinate(vpa_input) - vperp, vperp_spectral = define_coordinate(vperp_input) # Set up MPI initialize_comms!() setup_distributed_memory_MPI(1,1,1,1) + vpa, vpa_spectral = define_coordinate(vpa_input) + vperp, vperp_spectral = define_coordinate(vperp_input) looping.setup_loop_ranges!(block_rank[], block_size[]; s=1, sn=1, r=1, z=1, vperp=vperp.n, vpa=vpa.n, From 62e5c92574e353eb242687fc8887f66c0ee6f0cc Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 10 Jun 2024 22:27:59 +0100 Subject: [PATCH 6/8] Revert incorrect part of 9c3c446b 9c3c446b was "Fix initialisation and parallelisation in 2D_FEM_assembly_test.jl" - should not have introduced a `@serial_region` where non-shared-memory arrays were being filled. --- test_scripts/2D_FEM_assembly_test.jl | 35 +++++++++++++--------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/test_scripts/2D_FEM_assembly_test.jl b/test_scripts/2D_FEM_assembly_test.jl index a983e1c13..9f443d493 100644 --- a/test_scripts/2D_FEM_assembly_test.jl +++ b/test_scripts/2D_FEM_assembly_test.jl @@ -228,26 +228,23 @@ end ms = 1.0 msp = 1.0 nussp = 1.0 - begin_serial_region() - @serial_region begin - for ivperp in 1:vperp.n - for ivpa in 1:vpa.n - Fs_M[ivpa,ivperp] = F_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp) - F_M[ivpa,ivperp] = F_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) - H_M_exact[ivpa,ivperp] = H_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) - G_M_exact[ivpa,ivperp] = G_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) - d2Gdvpa2_M_exact[ivpa,ivperp] = d2Gdvpa2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) - d2Gdvperp2_M_exact[ivpa,ivperp] = d2Gdvperp2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) - dGdvperp_M_exact[ivpa,ivperp] = dGdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) - d2Gdvperpdvpa_M_exact[ivpa,ivperp] = d2Gdvperpdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) - dHdvpa_M_exact[ivpa,ivperp] = dHdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) - dHdvperp_M_exact[ivpa,ivperp] = dHdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) - C_M_exact[ivpa,ivperp] = Cssp_Maxwellian_inputs(denss,upars,vths,ms, - dens,upar,vth,msp, - nussp,vpa,vperp,ivpa,ivperp) - end + for ivperp in 1:vperp.n + for ivpa in 1:vpa.n + Fs_M[ivpa,ivperp] = F_Maxwellian(denss,upars,vths,vpa,vperp,ivpa,ivperp) + F_M[ivpa,ivperp] = F_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) + H_M_exact[ivpa,ivperp] = H_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) + G_M_exact[ivpa,ivperp] = G_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) + d2Gdvpa2_M_exact[ivpa,ivperp] = d2Gdvpa2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) + d2Gdvperp2_M_exact[ivpa,ivperp] = d2Gdvperp2_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) + dGdvperp_M_exact[ivpa,ivperp] = dGdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) + d2Gdvperpdvpa_M_exact[ivpa,ivperp] = d2Gdvperpdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) + dHdvpa_M_exact[ivpa,ivperp] = dHdvpa_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) + dHdvperp_M_exact[ivpa,ivperp] = dHdvperp_Maxwellian(dens,upar,vth,vpa,vperp,ivpa,ivperp) + C_M_exact[ivpa,ivperp] = Cssp_Maxwellian_inputs(denss,upars,vths,ms, + dens,upar,vth,msp, + nussp,vpa,vperp,ivpa,ivperp) end - end + end rpbd_exact = allocate_rosenbluth_potential_boundary_data(vpa,vperp) begin_s_r_z_anyv_region() From 23372f52453bf50e4b5f163cf8edc7ba0db618e8 Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 10 Jun 2024 22:42:19 +0100 Subject: [PATCH 7/8] Fix anyv parallelisation in fokker_planck_tests.jl --- moment_kinetics/test/fokker_planck_tests.jl | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/moment_kinetics/test/fokker_planck_tests.jl b/moment_kinetics/test/fokker_planck_tests.jl index 63fdd01e6..c5b192bac 100644 --- a/moment_kinetics/test/fokker_planck_tests.jl +++ b/moment_kinetics/test/fokker_planck_tests.jl @@ -279,7 +279,8 @@ function runtests() calculate_GG=true, calculate_dGdvperp=true) # extract C[Fs,Fs'] result # and Rosenbluth potentials for testing - begin_vperp_vpa_region() + begin_s_r_z_anyv_region() + begin_anyv_vperp_vpa_region() @loop_vperp_vpa ivperp ivpa begin G_M_num[ivpa,ivperp] = fkpl_arrays.GG[ivpa,ivperp] H_M_num[ivpa,ivperp] = fkpl_arrays.HH[ivpa,ivperp] @@ -419,7 +420,8 @@ function runtests() conserving_corrections!(fkpl_arrays.CC,Fs_M,vpa,vperp,dummy_array) end # extract C[Fs,Fs'] result - begin_vperp_vpa_region() + begin_s_r_z_anyv_region() + begin_anyv_vperp_vpa_region() @loop_vperp_vpa ivperp ivpa begin C_M_num[ivpa,ivperp] = fkpl_arrays.CC[ivpa,ivperp] end @@ -579,7 +581,8 @@ function runtests() density_conserving_correction!(fkpl_arrays.CC,Fs_M,vpa,vperp,dummy_array) end # extract C[Fs,Fs'] result - begin_vperp_vpa_region() + begin_s_r_z_anyv_region() + begin_anyv_vperp_vpa_region() @loop_vperp_vpa ivperp ivpa begin C_M_num[ivpa,ivperp] = fkpl_arrays.CC[ivpa,ivperp] end From f57f3a7fdb14f86f019b9a308dcff6bd9bc2734d Mon Sep 17 00:00:00 2001 From: John Omotani Date: Mon, 10 Jun 2024 22:54:00 +0100 Subject: [PATCH 8/8] Fix anyv MPI.Bcast() calls in fokker_plack Need to skip the `Bcast()` if `comm_anyv_subblock[]` is a null communicator (indicating that the process is not part of an anyv subblock). --- moment_kinetics/src/fokker_planck.jl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/moment_kinetics/src/fokker_planck.jl b/moment_kinetics/src/fokker_planck.jl index 0352f9a1f..1f520e9f6 100644 --- a/moment_kinetics/src/fokker_planck.jl +++ b/moment_kinetics/src/fokker_planck.jl @@ -664,7 +664,9 @@ function conserving_corrections!(CC,pdf_in,vpa,vperp,dummy_vpavperp) # Broadcast x0, x1, x2 to all processes in the 'anyv' subblock param_vec = [x0, x1, x2, upar] - MPI.Bcast!(param_vec, 0, comm_anyv_subblock[]) + if comm_anyv_subblock[] != MPI.COMM_NULL + MPI.Bcast!(param_vec, 0, comm_anyv_subblock[]) + end (x0, x1, x2, upar) = param_vec # correct CC @@ -695,7 +697,9 @@ function density_conserving_correction!(CC,pdf_in,vpa,vperp,dummy_vpavperp) # Broadcast x0 to all processes in the 'anyv' subblock param_vec = [x0] - MPI.Bcast!(param_vec, 0, comm_anyv_subblock[]) + if comm_anyv_subblock[] != MPI.COMM_NULL + MPI.Bcast!(param_vec, 0, comm_anyv_subblock[]) + end x0 = param_vec[1] # correct CC