KomaMRI.jl icon indicating copy to clipboard operation
KomaMRI.jl copied to clipboard

BlochDict simulation method not working with multiple threads

Open beorostica opened this issue 8 months ago • 0 comments

When performing a simulation with the BlochDict method and with multiple threads, the REPL throws an error. Here is a MWE

julia> using KomaMRI
julia> seq = read_seq(joinpath(dirname(dirname(pathof(KomaMRI))), "examples", "1.sequences", "epi_se.seq"))
julia> obj = brain_phantom2D()
julia> sys = Scanner()
julia> simParams = KomaMRICore.default_sim_params()
julia> simParams["Nthreads"] = 2
julia> simParams["sim_method"] = KomaMRICore.BlochDict()
julia> simulate(obj, seq, sys; simParams)
ERROR: DimensionMismatch: array could not be broadcast to match destination
Stacktrace:
...
Full Stack Trace
┌ Info: Running simulation in the GPU (NVIDIA GeForce GTX 1650)
│   koma_version = v"0.7.5"
│   sim_method = BlochDict(save_Mz=false)
│   spins = 6506
│   time_points = 11079
└   adc_points = 10201
ERROR: DimensionMismatch: array could not be broadcast to match destination
Stacktrace:
  [1] check_broadcast_shape
    @ .\broadcast.jl:553 [inlined]
  [2] check_broadcast_shape
    @ .\broadcast.jl:554 [inlined]
  [3] check_broadcast_axes
    @ .\broadcast.jl:556 [inlined]
  [4] instantiate
    @ .\broadcast.jl:297 [inlined]
  [5] materialize!
    @ C:\Users\beorostica\.julia\packages\GPUArrays\5XhED\src\host\broadcast.jl:41 [inlined]
  [6] materialize!(dest::SubArray{ComplexF32, 2, CUDA.CuArray{ComplexF32, 4, CUDA.Mem.DeviceBuffer}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}, Int64, Int64}, false}, bc::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{2}, Nothing, typeof(identity), Tuple{LinearAlgebra.Transpose{ComplexF32, CUDA.CuArray{ComplexF32, 2, CUDA.Mem.DeviceBuffer}}}})
    @ Base.Broadcast .\broadcast.jl:881
  [7] run_spin_precession!(p::Phantom{Float32}, seq::KomaMRICore.DiscreteSequence{Float32}, sig::SubArray{ComplexF32, 3, CUDA.CuArray{ComplexF32, 4, CUDA.Mem.DeviceBuffer}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Int64}, false}, M::Mag{Float32}, sim_method::BlochDict)
    @ KomaMRICore C:\Users\beorostica\.julia\packages\KomaMRICore\COLVZ\src\simulation\Bloch\BlochDictSimulationMethod.jl:60
  [8] (::KomaMRICore.var"#171#173"{Phantom{Float32}, KomaMRICore.DiscreteSequence{Float32}, SubArray{ComplexF32, 4, CUDA.CuArray{ComplexF32, 4, CUDA.Mem.DeviceBuffer}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}}, false}, Mag{Float32}, BlochDict, Vector{Colon}})(::Tuple{Int64, UnitRange{Int64}})
    @ KomaMRICore C:\Users\beorostica\.julia\packages\KomaMRICore\COLVZ\src\simulation\SimulatorCore.jl:49
  [9] next
    @ C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\library.jl:54 [inlined]
 [10] next
    @ C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\core.jl:787 [inlined]
 [11] macro expansion
    @ C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\core.jl:181 [inlined]
 [12] __foldl__(rf::Transducers.Reduction{Transducers.NoComplete, Transducers.Reduction{Transducers.Map{KomaMRICore.var"#171#173"{Phantom{Float32}, KomaMRICore.DiscreteSequence{Float32}, SubArray{ComplexF32, 4, CUDA.CuArray{ComplexF32, 4, CUDA.Mem.DeviceBuffer}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}}, false}, Mag{Float32}, BlochDict, Vector{Colon}}}, Transducers.BottomRF{Transducers.Completing{typeof(ThreadsX.Implementations.return_nothing)}}}}, init::Nothing, coll::Base.Iterators.Enumerate{SubArray{UnitRange{Int64}, 1, Vector{UnitRange{Int64}}, Tuple{UnitRange{Int64}}, true}})
    @ Transducers C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\processes.jl:157
 [13] foldl_basecase
    @ C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\processes.jl:365 [inlined]
 [14] _reduce_basecase(rf::Transducers.Reduction{Transducers.Map{KomaMRICore.var"#171#173"{Phantom{Float32}, KomaMRICore.DiscreteSequence{Float32}, SubArray{ComplexF32, 4, CUDA.CuArray{ComplexF32, 4, CUDA.Mem.DeviceBuffer}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}}, false}, Mag{Float32}, BlochDict, Vector{Colon}}}, Transducers.BottomRF{Transducers.Completing{typeof(ThreadsX.Implementations.return_nothing)}}}, init::Nothing, reducible::Transducers.SizedReducible{Base.Iterators.Enumerate{SubArray{UnitRange{Int64}, 1, Vector{UnitRange{Int64}}, Tuple{UnitRange{Int64}}, true}}, Int64})
    @ Transducers C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\threading_utils.jl:58
 [15] _reduce(ctx::Transducers.CancellableDACContext, rf::Transducers.Reduction{Transducers.Map{KomaMRICore.var"#171#173"{Phantom{Float32}, KomaMRICore.DiscreteSequence{Float32}, SubArray{ComplexF32, 4, CUDA.CuArray{ComplexF32, 4, CUDA.Mem.DeviceBuffer}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}}, false}, Mag{Float32}, BlochDict, Vector{Colon}}}, Transducers.BottomRF{Transducers.Completing{typeof(ThreadsX.Implementations.return_nothing)}}}, init::Nothing, reducible::Transducers.SizedReducible{Base.Iterators.Enumerate{SubArray{UnitRange{Int64}, 1, Vector{UnitRange{Int64}}, Tuple{UnitRange{Int64}}, true}}, Int64})
    @ Transducers C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\reduce.jl:150
 [16] _reduce(ctx::Transducers.CancellableDACContext, rf::Transducers.Reduction{Transducers.Map{KomaMRICore.var"#171#173"{Phantom{Float32}, KomaMRICore.DiscreteSequence{Float32}, SubArray{ComplexF32, 4, CUDA.CuArray{ComplexF32, 4, CUDA.Mem.DeviceBuffer}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}}, false}, Mag{Float32}, BlochDict, Vector{Colon}}}, Transducers.BottomRF{Transducers.Completing{typeof(ThreadsX.Implementations.return_nothing)}}}, init::Nothing, reducible::Transducers.SizedReducible{Base.Iterators.Enumerate{Vector{UnitRange{Int64}}}, Int64})
    @ Transducers C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\reduce.jl:159
 [17] _transduce_assoc_nocomplete
    @ C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\reduce.jl:139 [inlined]
 [18] #transduce_assoc#177
    @ C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\reduce.jl:108 [inlined]
 [19] transduce_assoc
    @ C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\reduce.jl:84 [inlined]
 [20] #foldxt#185
    @ C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\reduce.jl:246 [inlined]
 [21] foldxt
    @ C:\Users\beorostica\.julia\packages\Transducers\yTXrD\src\reduce.jl:246 [inlined]
 [22] foreach(f::KomaMRICore.var"#171#173"{Phantom{Float32}, KomaMRICore.DiscreteSequence{Float32}, SubArray{ComplexF32, 4, CUDA.CuArray{ComplexF32, 4, CUDA.Mem.DeviceBuffer}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}}, false}, Mag{Float32}, BlochDict, Vector{Colon}}, itr::Base.Iterators.Enumerate{Vector{UnitRange{Int64}}}; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
    @ ThreadsX.Implementations C:\Users\beorostica\.julia\packages\ThreadsX\vXfYv\src\foreach.jl:118
 [23] foreach(f::KomaMRICore.var"#171#173"{Phantom{Float32}, KomaMRICore.DiscreteSequence{Float32}, SubArray{ComplexF32, 4, CUDA.CuArray{ComplexF32, 4, CUDA.Mem.DeviceBuffer}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}}, false}, Mag{Float32}, BlochDict, Vector{Colon}}, itr::Base.Iterators.Enumerate{Vector{UnitRange{Int64}}})
    @ ThreadsX.Implementations C:\Users\beorostica\.julia\packages\ThreadsX\vXfYv\src\foreach.jl:118
 [24] run_spin_precession_parallel!(obj::Phantom{Float32}, seq::KomaMRICore.DiscreteSequence{Float32}, sig::SubArray{ComplexF32, 4, CUDA.CuArray{ComplexF32, 4, CUDA.Mem.DeviceBuffer}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}, Base.Slice{Base.OneTo{Int64}}}, false}, Xt::Mag{Float32}, sim_method::BlochDict; Nthreads::Int64)
    @ KomaMRICore C:\Users\beorostica\.julia\packages\KomaMRICore\COLVZ\src\simulation\SimulatorCore.jl:48
 [25] run_sim_time_iter!(obj::Phantom{Float32}, seq::KomaMRICore.DiscreteSequence{Float32}, sig::CUDA.CuArray{ComplexF32, 4, CUDA.Mem.DeviceBuffer}, Xt::Mag{Float32}, sim_method::BlochDict; Nblocks::Int64, Nthreads::Int64, parts::Vector{UnitRange{Int64}}, excitation_bool::Vector{Bool}, w::Nothing)
    @ KomaMRICore C:\Users\beorostica\.julia\packages\KomaMRICore\COLVZ\src\simulation\SimulatorCore.jl:132
 [26] macro expansion
    @ .\timing.jl:501 [inlined]
 [27] simulate(obj::Phantom{Float64}, seq::Sequence, sys::Scanner; simParams::Dict{String, Any}, w::Nothing)
    @ KomaMRICore C:\Users\beorostica\.julia\packages\KomaMRICore\COLVZ\src\simulation\SimulatorCore.jl:213
 [28] top-level scope
    @ REPL[63]:1
 [29] top-level scope
    @ C:\Users\beorostica\.julia\packages\CUDA\BbliS\src\initialization.jl:52

The problem doesn't happen if we use just 1 thread.

It is a dimension mismatch error, so probably we need to pay attention to extreme points when performing the simulation with this method.

beorostica avatar Oct 25 '23 17:10 beorostica