-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Optimize run_spin_excitation! for GPU (#462)
- Loading branch information
Showing
3 changed files
with
107 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
61 changes: 61 additions & 0 deletions
61
KomaMRICore/src/simulation/SimMethods/Bloch/KernelFunctions.jl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
using KernelAbstractions: @kernel, @Const, @index, @uniform, @groupsize, @localmem | ||
|
||
## COV_EXCL_START | ||
|
||
@kernel function apply_excitation!(Mxy, Mz, @Const(φ), @Const(B1), @Const(Bz), @Const(B), @Const(ΔT1), @Const(ΔT2), @Const(ρ)) | ||
i_g = @index(Global) | ||
i_l = @index(Local) | ||
|
||
@uniform T = eltype(φ) | ||
@uniform N = @groupsize()[1] | ||
@uniform N_Δt = size(φ, 2) | ||
|
||
s_α_r = @localmem T (N,) | ||
s_α_i = @localmem T (N,) | ||
s_β_i = @localmem T (N,) | ||
s_β_r = @localmem T (N,) | ||
s_Mxy_r = @localmem T (N,) | ||
s_Mxy_i = @localmem T (N,) | ||
s_Mxy_new_r = @localmem T (N,) | ||
s_Mxy_new_i = @localmem T (N,) | ||
s_Mz = @localmem T (N,) | ||
s_Mz_new = @localmem T (N,) | ||
s_ρ = @localmem T (N,) | ||
|
||
@inbounds s_Mxy_r[i_l] = real(Mxy[i_g]) | ||
@inbounds s_Mxy_i[i_l] = imag(Mxy[i_g]) | ||
@inbounds s_Mz[i_l] = Mz[i_g] | ||
@inbounds s_ρ[i_l] = ρ[i_g] | ||
|
||
@inbounds for t = 1 : N_Δt | ||
sin_φ = sin(φ[i_g, t]) #TO-DO: use sincos once oneAPI releases version with https://github.com/JuliaGPU/oneAPI.jl/commit/260a4dda0ea223dbf0893de7b4a13d994ae27bd1 | ||
cos_φ = cos(φ[i_g, t]) | ||
s_α_r[i_l] = cos_φ | ||
if (iszero(B[i_g, t])) | ||
s_α_i[i_l] = -(Bz[i_g, t] / (B[i_g, t] + eps(T))) * sin_φ | ||
s_β_r[i_l] = (imag(B1[t]) / (B[i_g, t] + eps(T))) * sin_φ | ||
s_β_i[i_l] = -(real(B1[t]) / (B[i_g, t] + eps(T))) * sin_φ | ||
else | ||
s_α_i[i_l] = -(Bz[i_g, t] / B[i_g, t]) * sin_φ | ||
s_β_r[i_l] = (imag(B1[t]) / B[i_g, t]) * sin_φ | ||
s_β_i[i_l] = -(real(B1[t]) / B[i_g, t]) * sin_φ | ||
end | ||
s_Mxy_new_r[i_l] = 2 * (s_Mxy_i[i_l] * (s_α_r[i_l] * s_α_i[i_l] - s_β_r[i_l] * s_β_i[i_l]) + | ||
s_Mz[i_l] * (s_α_i[i_l] * s_β_i[i_l] + s_α_r[i_l] * s_β_r[i_l])) + | ||
s_Mxy_r[i_l] * (s_α_r[i_l]^2 - s_α_i[i_l]^2 - s_β_r[i_l]^2 + s_β_i[i_l]^2) | ||
s_Mxy_new_i[i_l] = -2 * (s_Mxy_r[i_l] * (s_α_r[i_l] * s_α_i[i_l] + s_β_r[i_l] * s_β_i[i_l]) - | ||
s_Mz[i_l] * (s_α_r[i_l] * s_β_i[i_l] - s_α_i[i_l] * s_β_r[i_l])) + | ||
s_Mxy_i[i_l] * (s_α_r[i_l]^2 - s_α_i[i_l]^2 + s_β_r[i_l]^2 - s_β_i[i_l]^2) | ||
s_Mz_new[i_l] = s_Mz[i_l] * (s_α_r[i_l]^2 + s_α_i[i_l]^2 - s_β_r[i_l]^2 - s_β_i[i_l]^2) - | ||
2 * (s_Mxy_r[i_l] * (s_α_r[i_l] * s_β_r[i_l] - s_α_i[i_l] * s_β_i[i_l]) + | ||
s_Mxy_i[i_l] * (s_α_r[i_l] * s_β_i[i_l] + s_α_i[i_l] * s_β_r[i_l])) | ||
s_Mxy_r[i_l] = s_Mxy_new_r[i_l] * ΔT2[i_g, t] | ||
s_Mxy_i[i_l] = s_Mxy_new_i[i_l] * ΔT2[i_g, t] | ||
s_Mz[i_l] = s_Mz_new[i_l] * ΔT1[i_g, t] + s_ρ[i_l] * (1 - ΔT1[i_g, t]) | ||
end | ||
|
||
@inbounds Mxy[i_g] = s_Mxy_r[i_l] + 1im * s_Mxy_i[i_l] | ||
@inbounds Mz[i_g] = s_Mz[i_l] | ||
end | ||
|
||
## COV_EXCL_STOP |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1b6c5be
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
KomaMRI Benchmarks
MRI Lab/Bloch/CPU/2 thread(s)
226897725.5
ns227517325.5
ns1.00
MRI Lab/Bloch/CPU/4 thread(s)
134821691
ns135033124
ns1.00
MRI Lab/Bloch/CPU/8 thread(s)
143505624
ns171880824
ns0.83
MRI Lab/Bloch/CPU/1 thread(s)
343336904
ns396561930.5
ns0.87
MRI Lab/Bloch/GPU/CUDA
56897573.5
ns138134905
ns0.41
MRI Lab/Bloch/GPU/oneAPI
516675827
ns14155999496.5
ns0.03649871753158408
MRI Lab/Bloch/GPU/Metal
561507500
ns3171338479
ns0.18
MRI Lab/Bloch/GPU/AMDGPU
36448199
ns75482754
ns0.48
Slice Selection 3D/Bloch/CPU/2 thread(s)
1157773672
ns1168211452
ns0.99
Slice Selection 3D/Bloch/CPU/4 thread(s)
621102018
ns612565463
ns1.01
Slice Selection 3D/Bloch/CPU/8 thread(s)
383824312
ns495427593
ns0.77
Slice Selection 3D/Bloch/CPU/1 thread(s)
1947221684
ns2245843835
ns0.87
Slice Selection 3D/Bloch/GPU/CUDA
101669032
ns108701927
ns0.94
Slice Selection 3D/Bloch/GPU/oneAPI
649875888
ns776956866
ns0.84
Slice Selection 3D/Bloch/GPU/Metal
565000312.5
ns769082459
ns0.73
Slice Selection 3D/Bloch/GPU/AMDGPU
60318280
ns64232156
ns0.94
This comment was automatically generated by workflow using github-action-benchmark.