KomaMRI.jl
KomaMRI.jl copied to clipboard
BlochDict simulation method not working with multiple threads
When performing a simulation with the BlochDict
method and with multiple threads, the REPL throws an error. Here is a MWE
julia> using KomaMRI
julia> seq = read_seq(joinpath(dirname(dirname(pathof(KomaMRI))), "examples", "1.sequences", "epi_se.seq"))
julia> obj = brain_phantom2D()
julia> sys = Scanner()
julia> simParams = KomaMRICore.default_sim_params()
julia> simParams["Nthreads"] = 2
julia> simParams["sim_method"] = KomaMRICore.BlochDict()
julia> simulate(obj, seq, sys; simParams)
ERROR: DimensionMismatch: array could not be broadcast to match destination
Stacktrace:
...
Full Stack Trace
┌ Info: Running simulation in the GPU (NVIDIA GeForce GTX 1650)
│ koma_version = v"0.7.5"
│ sim_method = BlochDict(save_Mz=false)
│ spins = 6506
│ time_points = 11079
└ adc_points = 10201
ERROR: DimensionMismatch: array could not be broadcast to match destination
Stacktrace:
[1] check_broadcast_shape
@ .\broadcast.jl:553 [inlined]
[2] check_broadcast_shape
@ .\broadcast.jl:554 [inlined]
[3] check_broadcast_axes
@ .\broadcast.jl:556 [inlined]
[4] instantiate
@ .\broadcast.jl:297 [inlined]
[5] materialize!
@ C:\Users\beorostica\.julia\packages\GPUArrays\5XhED\src\host\broadcast.jl:41 [inlined]
[6] materialize!(dest::SubArray{ComplexF32, 2, CUDA.CuArray{ComplexF32, 4, CUDA.Mem.DeviceBuffer}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}, Int64, Int64}, false}, bc::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{2}, Nothing, typeof(identity), Tuple{LinearAlgebra.Transpose{ComplexF32, CUDA.CuArray{ComplexF32, 2, CUDA.Mem.DeviceBuffer}}}})
@ Base.Broadcast .\broadcast.jl:881
[7] run_spin_precession!(p::Phantom{Float32}, seq::KomaMRICore.DiscreteSequence{Float32}, sig::SubArray{ComplexF32, 3, CUDA.CuArray{ComplexF32, 4, CUDA.Mem.DeviceBuffer}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Int64}, false}, M::Mag{Float32}, sim_method::BlochDict)
@ KomaMRICore C:\Users\beorostica\.julia\packages\KomaMRICore\COLVZ\src\simulation\Bloch\BlochDictSimulationMethod.jl:60
[8] (::KomaMRICore.var"#171#173"{Phantom{Float32}, KomaMRICore.DiscreteSequence{Float32}, SubArray{ComplexF32, 4, CUDA.CuArray{ComplexF32, 4, CUDA.Mem.DeviceBuffer}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}}, false}, Mag{Float32}, BlochDict, Vector{Colon}})(::Tuple{Int64, UnitRange{Int64}})
@ KomaMRICore C:\Users\beorostica\.julia\packages\KomaMRICore\COLVZ\src\simulation\SimulatorCore.jl:49
[9] next
@ C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\library.jl:54 [inlined]
[10] next
@ C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\core.jl:787 [inlined]
[11] macro expansion
@ C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\core.jl:181 [inlined]
[12] __foldl__(rf::Transducers.Reduction{Transducers.NoComplete, Transducers.Reduction{Transducers.Map{KomaMRICore.var"#171#173"{Phantom{Float32}, KomaMRICore.DiscreteSequence{Float32}, SubArray{ComplexF32, 4, CUDA.CuArray{ComplexF32, 4, CUDA.Mem.DeviceBuffer}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}}, false}, Mag{Float32}, BlochDict, Vector{Colon}}}, Transducers.BottomRF{Transducers.Completing{typeof(ThreadsX.Implementations.return_nothing)}}}}, init::Nothing, coll::Base.Iterators.Enumerate{SubArray{UnitRange{Int64}, 1, Vector{UnitRange{Int64}}, Tuple{UnitRange{Int64}}, true}})
@ Transducers C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\processes.jl:157
[13] foldl_basecase
@ C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\processes.jl:365 [inlined]
[14] _reduce_basecase(rf::Transducers.Reduction{Transducers.Map{KomaMRICore.var"#171#173"{Phantom{Float32}, KomaMRICore.DiscreteSequence{Float32}, SubArray{ComplexF32, 4, CUDA.CuArray{ComplexF32, 4, CUDA.Mem.DeviceBuffer}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}}, false}, Mag{Float32}, BlochDict, Vector{Colon}}}, Transducers.BottomRF{Transducers.Completing{typeof(ThreadsX.Implementations.return_nothing)}}}, init::Nothing, reducible::Transducers.SizedReducible{Base.Iterators.Enumerate{SubArray{UnitRange{Int64}, 1, Vector{UnitRange{Int64}}, Tuple{UnitRange{Int64}}, true}}, Int64})
@ Transducers C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\threading_utils.jl:58
[15] _reduce(ctx::Transducers.CancellableDACContext, rf::Transducers.Reduction{Transducers.Map{KomaMRICore.var"#171#173"{Phantom{Float32}, KomaMRICore.DiscreteSequence{Float32}, SubArray{ComplexF32, 4, CUDA.CuArray{ComplexF32, 4, CUDA.Mem.DeviceBuffer}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}}, false}, Mag{Float32}, BlochDict, Vector{Colon}}}, Transducers.BottomRF{Transducers.Completing{typeof(ThreadsX.Implementations.return_nothing)}}}, init::Nothing, reducible::Transducers.SizedReducible{Base.Iterators.Enumerate{SubArray{UnitRange{Int64}, 1, Vector{UnitRange{Int64}}, Tuple{UnitRange{Int64}}, true}}, Int64})
@ Transducers C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\reduce.jl:150
[16] _reduce(ctx::Transducers.CancellableDACContext, rf::Transducers.Reduction{Transducers.Map{KomaMRICore.var"#171#173"{Phantom{Float32}, KomaMRICore.DiscreteSequence{Float32}, SubArray{ComplexF32, 4, CUDA.CuArray{ComplexF32, 4, CUDA.Mem.DeviceBuffer}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}}, false}, Mag{Float32}, BlochDict, Vector{Colon}}}, Transducers.BottomRF{Transducers.Completing{typeof(ThreadsX.Implementations.return_nothing)}}}, init::Nothing, reducible::Transducers.SizedReducible{Base.Iterators.Enumerate{Vector{UnitRange{Int64}}}, Int64})
@ Transducers C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\reduce.jl:159
[17] _transduce_assoc_nocomplete
@ C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\reduce.jl:139 [inlined]
[18] #transduce_assoc#177
@ C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\reduce.jl:108 [inlined]
[19] transduce_assoc
@ C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\reduce.jl:84 [inlined]
[20] #foldxt#185
@ C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\reduce.jl:246 [inlined]
[21] foldxt
@ C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\reduce.jl:246 [inlined]
[22] foreach(f::KomaMRICore.var"#171#173"{Phantom{Float32}, KomaMRICore.DiscreteSequence{Float32}, SubArray{ComplexF32, 4, CUDA.CuArray{ComplexF32, 4, CUDA.Mem.DeviceBuffer}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}}, false}, Mag{Float32}, BlochDict, Vector{Colon}}, itr::Base.Iterators.Enumerate{Vector{UnitRange{Int64}}}; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ ThreadsX.Implementations C:\Users\beorostica\.julia\packages\ThreadsX\vXfYv\src\foreach.jl:118
[23] foreach(f::KomaMRICore.var"#171#173"{Phantom{Float32}, KomaMRICore.DiscreteSequence{Float32}, SubArray{ComplexF32, 4, CUDA.CuArray{ComplexF32, 4, CUDA.Mem.DeviceBuffer}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}}, false}, Mag{Float32}, BlochDict, Vector{Colon}}, itr::Base.Iterators.Enumerate{Vector{UnitRange{Int64}}})
@ ThreadsX.Implementations C:\Users\beorostica\.julia\packages\ThreadsX\vXfYv\src\foreach.jl:118
[24] run_spin_precession_parallel!(obj::Phantom{Float32}, seq::KomaMRICore.DiscreteSequence{Float32}, sig::SubArray{ComplexF32, 4, CUDA.CuArray{ComplexF32, 4, CUDA.Mem.DeviceBuffer}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}}, false}, Xt::Mag{Float32}, sim_method::BlochDict; Nthreads::Int64)
@ KomaMRICore C:\Users\beorostica\.julia\packages\KomaMRICore\COLVZ\src\simulation\SimulatorCore.jl:48
[25] run_sim_time_iter!(obj::Phantom{Float32}, seq::KomaMRICore.DiscreteSequence{Float32}, sig::CUDA.CuArray{ComplexF32, 4, CUDA.Mem.DeviceBuffer}, Xt::Mag{Float32}, sim_method::BlochDict; Nblocks::Int64, Nthreads::Int64, parts::Vector{UnitRange{Int64}}, excitation_bool::Vector{Bool}, w::Nothing)
@ KomaMRICore C:\Users\beorostica\.julia\packages\KomaMRICore\COLVZ\src\simulation\SimulatorCore.jl:132
[26] macro expansion
@ .\timing.jl:501 [inlined]
[27] simulate(obj::Phantom{Float64}, seq::Sequence, sys::Scanner; simParams::Dict{String, Any}, w::Nothing)
@ KomaMRICore C:\Users\beorostica\.julia\packages\KomaMRICore\COLVZ\src\simulation\SimulatorCore.jl:213
[28] top-level scope
@ REPL[63]:1
[29] top-level scope
@ C:\Users\beorostica\.julia\packages\CUDA\BbliS\src\initialization.jl:52
The problem doesn't happen if we use just 1 thread.
It is a dimension mismatch error, so probably we need to pay attention to extreme points when performing the simulation with this method.