-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
working on CuArray, but really need an optimised searchsortedlast; ne…
…ed a KernelAbstractions.jl sorted to work on different accelerators
- Loading branch information
Andrei Leonard Nicusan
committed
Nov 9, 2023
1 parent
b88ea28
commit 91e2db9
Showing
6 changed files
with
108 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# File : basic.jl | ||
# License: MIT | ||
# Author : Andrei Leonard Nicusan <[email protected]> | ||
# Date : 13.10.2022 | ||
|
||
|
||
using MPI | ||
using MPISort | ||
using Random | ||
|
||
using CUDA | ||
|
||
|
||
# Initialise MPI, get communicator for all ranks, rank index, number of ranks | ||
MPI.Init() | ||
|
||
comm = MPI.COMM_WORLD | ||
rank = MPI.Comm_rank(comm) | ||
nranks = MPI.Comm_size(comm) | ||
|
||
# Generate local GPU array on each MPI rank - even with different number of elements | ||
rng = Xoshiro(rank) | ||
num_elements = 50 + rank * 2 | ||
local_array = CuArray(rand(rng, 1:500, num_elements)) | ||
|
||
# Sort arrays across all MPI ranks | ||
alg = SIHSort(comm) | ||
sorted_local_array = mpisort!(local_array; alg=alg) | ||
|
||
# Print each local array sequentially | ||
for i in 0:nranks - 1 | ||
rank == i && @show rank sorted_local_array alg.stats | ||
MPI.Barrier(comm) | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
# File : largescale.jl | ||
# License: MIT | ||
# Author : Andrei Leonard Nicusan <[email protected]> | ||
# Date : 13.10.2022 | ||
|
||
|
||
using MPI | ||
using MPISort | ||
using Random | ||
|
||
using CUDA | ||
|
||
|
||
# Initialise MPI, get communicator for all ranks, rank index, number of ranks | ||
MPI.Init() | ||
|
||
comm = MPI.COMM_WORLD | ||
rank = MPI.Comm_rank(comm) | ||
nranks = MPI.Comm_size(comm) | ||
|
||
|
||
function largescale(num_elements=500_000) | ||
|
||
# Generate local array on each MPI rank - even with different number of elements | ||
rng = Xoshiro(rank) | ||
num_elements = 500_000 + rank * (num_elements ÷ 20) | ||
local_array = CuArray(rand(rng, Int32(1):Int32(10 * num_elements), num_elements)) | ||
|
||
# Sort arrays across all MPI ranks | ||
alg = SIHSort(comm) | ||
@time sorted_local_array = mpisort!(local_array; alg=alg) | ||
|
||
# Print each local array sequentially | ||
for i in 0:nranks - 1 | ||
rank == i && @show rank alg.stats | ||
MPI.Barrier(comm) | ||
end | ||
end | ||
|
||
|
||
# Run once to compile everything, then again to benchmark | ||
rank == 0 && println("First run, compiling...") | ||
largescale() | ||
|
||
rank == 0 && println("Single benchmark run") | ||
largescale() |