-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Optimize run_spin_precession! for GPU (#459)
Optimize run_spin_precession! for GPU
- Loading branch information
Showing
17 changed files
with
264 additions
and
93 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
153 changes: 153 additions & 0 deletions
153
KomaMRICore/src/simulation/SimMethods/Bloch/BlochGPU.jl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
"""These properties are redundant with seq.Δt and seq.ADC, but it is much faster | ||
to calculate them on the CPU before the simulation is run.""" | ||
struct SeqBlockProperties{T<:Real} | ||
length::Int64 | ||
nADC::Int64 | ||
first_ADC::Bool | ||
ADC_indices::AbstractVector{Int64} | ||
tp_ADC::AbstractVector{T} | ||
dur::T | ||
end | ||
|
||
@functor SeqBlockProperties | ||
|
||
"""Stores information added to the prealloc struct for use in run_spin_precession! | ||
and run_spin_excitation!. This information is calculated on the CPU before the | ||
simulation is run.""" | ||
struct BlochGPUPrecalc{T} <: PrecalcResult{T} | ||
seq_properties::AbstractVector{SeqBlockProperties{T}} | ||
end | ||
|
||
@functor BlochGPUPrecalc | ||
|
||
"""Precalculates sequence properties for use in run_spin_precession!""" | ||
function precalculate( | ||
sim_method::Bloch, | ||
backend::KA.GPU, | ||
seq::DiscreteSequence{T}, | ||
parts::Vector{UnitRange{S}}, | ||
excitation_bool::Vector{Bool} | ||
) where {T<:Real,S<:Integer} | ||
seq_properties = SeqBlockProperties{T}[] | ||
for (block, p) in enumerate(parts) | ||
seq_block = @view seq[p] | ||
if excitation_bool[block] | ||
push!(seq_properties, SeqBlockProperties( | ||
length(seq_block.t), | ||
count(seq_block.ADC), | ||
false, | ||
Int64[], | ||
T[], | ||
zero(T) | ||
)) | ||
else | ||
ADC_indices = findall(seq_block.ADC) .- 1 | ||
if seq_block.ADC[1] | ||
deleteat!(ADC_indices, 1) | ||
end | ||
tp = cumsum(seq_block.Δt) | ||
push!(seq_properties, SeqBlockProperties( | ||
length(seq_block.t), | ||
count(seq_block.ADC), | ||
seq_block.ADC[1], | ||
ADC_indices, | ||
tp[ADC_indices], | ||
last(tp) | ||
)) | ||
end | ||
end | ||
|
||
return BlochGPUPrecalc(seq_properties) | ||
end | ||
|
||
"""Stores preallocated structs for use in Bloch GPU run_spin_precession function.""" | ||
struct BlochGPUPrealloc{T} <: PreallocResult{T} | ||
seq_properties::AbstractVector{SeqBlockProperties{T}} | ||
Bz::AbstractMatrix{T} | ||
Δϕ::AbstractMatrix{T} | ||
ϕ::AbstractArray{T} | ||
Mxy::AbstractMatrix{Complex{T}} | ||
ΔBz::AbstractVector{T} | ||
end | ||
|
||
Base.view(p::BlochGPUPrealloc{T}, i::UnitRange) where {T<:Real} = p | ||
function prealloc_block(p::BlochGPUPrealloc{T}, i::Integer) where {T<:Real} | ||
seq_block = p.seq_properties[i] | ||
|
||
return BlochGPUPrealloc( | ||
[seq_block], | ||
view(p.Bz,:,1:seq_block.length), | ||
view(p.Δϕ,:,1:seq_block.length-1), | ||
seq_block.nADC > 0 ? view(p.ϕ,:,1:seq_block.length-1) : view(p.ϕ,:,1), | ||
view(p.Mxy,:,1:seq_block.nADC), | ||
p.ΔBz | ||
) | ||
end | ||
|
||
"""Preallocates arrays for use in run_spin_precession! and run_spin_excitation!.""" | ||
function prealloc(sim_method::Bloch, backend::KA.GPU, obj::Phantom{T}, M::Mag{T}, max_block_length::Integer, precalc) where {T<:Real} | ||
if !(precalc isa BlochGPUPrecalc) @error """Sim method Bloch() does not support calling run_sim_time_iter directly. Use method BlochSimple() instead.""" end | ||
|
||
return BlochGPUPrealloc( | ||
precalc.seq_properties, | ||
KA.zeros(backend, T, (size(obj.x, 1), max_block_length)), | ||
KA.zeros(backend, T, (size(obj.x, 1), max_block_length-1)), | ||
KA.zeros(backend, T, (size(obj.x, 1), max_block_length-1)), | ||
KA.zeros(backend, Complex{T}, (size(obj.x, 1), max_block_length)), | ||
obj.Δw ./ T(2π .* γ) | ||
) | ||
end | ||
|
||
@inline function calculate_precession!(Δϕ::AbstractArray{T}, Δt::AbstractArray{T}, Bz::AbstractArray{T}) where {T<:Real} | ||
Δϕ .= (Bz[:,2:end] .+ Bz[:,1:end-1]) .* Δt .* T(-π .* γ) | ||
end | ||
@inline function apply_precession!(ϕ::AbstractVector{T}, Δϕ::AbstractArray{T}) where {T<:Real} | ||
ϕ .= sum(Δϕ, dims=2) | ||
end | ||
function apply_precession!(ϕ::AbstractArray{T}, Δϕ::AbstractArray{T}) where {T<:Real} | ||
cumsum!(ϕ, Δϕ, dims=2) | ||
end | ||
|
||
function run_spin_precession!( | ||
p::Phantom{T}, | ||
seq::DiscreteSequence{T}, | ||
sig::AbstractArray{Complex{T}}, | ||
M::Mag{T}, | ||
sim_method::Bloch, | ||
backend::KA.GPU, | ||
pre::BlochGPUPrealloc | ||
) where {T<:Real} | ||
#Simulation | ||
#Motion | ||
x, y, z = get_spin_coords(p.motion, p.x, p.y, p.z, seq.t') | ||
|
||
#Sequence block info | ||
seq_block = pre.seq_properties[1] | ||
|
||
#Effective field | ||
pre.Bz .= x .* seq.Gx' .+ y .* seq.Gy' .+ z .* seq.Gz' .+ pre.ΔBz | ||
|
||
#Rotation | ||
calculate_precession!(pre.Δϕ, seq.Δt', pre.Bz) | ||
# Reduces Δϕ Nspins x Nt to ϕ Nspins x Nt, if Nadc = 0, to Nspins x 1 | ||
apply_precession!(pre.ϕ, pre.Δϕ) | ||
|
||
#Acquired signal | ||
if seq_block.nADC > 0 | ||
ϕ_ADC = @view pre.ϕ[:,seq_block.ADC_indices] | ||
if seq_block.first_ADC | ||
pre.Mxy[:,1] .= M.xy | ||
pre.Mxy[:,2:end] .= M.xy .* exp.(-seq_block.tp_ADC' ./ p.T2) .* _cis.(ϕ_ADC) | ||
else | ||
pre.Mxy .= M.xy .* exp.(-seq_block.tp_ADC' ./ p.T2) .* _cis.(ϕ_ADC) | ||
end | ||
|
||
sig .= transpose(sum(pre.Mxy; dims=1)) | ||
end | ||
|
||
#Mxy precession and relaxation, and Mz relaxation | ||
M.z .= M.z .* exp.(-seq_block.dur ./ p.T1) .+ p.ρ .* (T(1) .- exp.(-seq_block.dur ./ p.T1)) | ||
M.xy .= M.xy .* exp.(-seq_block.dur ./ p.T2) .* _cis.(pre.ϕ[:,end]) | ||
|
||
return nothing | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
1457a4c
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
KomaMRI Benchmarks
MRI Lab/Bloch/CPU/2 thread(s)
227517325.5
ns235229966.5
ns0.97
MRI Lab/Bloch/CPU/4 thread(s)
135033124
ns140495905
ns0.96
MRI Lab/Bloch/CPU/8 thread(s)
171880824
ns169591756.5
ns1.01
MRI Lab/Bloch/CPU/1 thread(s)
396561930.5
ns419227547
ns0.95
MRI Lab/Bloch/GPU/CUDA
138134905
ns135837984
ns1.02
MRI Lab/Bloch/GPU/oneAPI
14155999496.5
ns18356788557
ns0.77
MRI Lab/Bloch/GPU/Metal
3171338479
ns2931106125
ns1.08
MRI Lab/Bloch/GPU/AMDGPU
75482754
ns1750964243
ns0.04310924926180803
Slice Selection 3D/Bloch/CPU/2 thread(s)
1168211452
ns1174040352
ns1.00
Slice Selection 3D/Bloch/CPU/4 thread(s)
612565463
ns622515059.5
ns0.98
Slice Selection 3D/Bloch/CPU/8 thread(s)
495427593
ns492840880
ns1.01
Slice Selection 3D/Bloch/CPU/1 thread(s)
2245843835
ns2264093136
ns0.99
Slice Selection 3D/Bloch/GPU/CUDA
108701927
ns257306603
ns0.42
Slice Selection 3D/Bloch/GPU/oneAPI
776956866
ns1678945735.5
ns0.46
Slice Selection 3D/Bloch/GPU/Metal
769082459
ns1129875875
ns0.68
Slice Selection 3D/Bloch/GPU/AMDGPU
64232156
ns679066674
ns0.0945888797953292
This comment was automatically generated by workflow using github-action-benchmark.