LoopVectorization.jl
LoopVectorization.jl copied to clipboard
CartesianIndices always run on only a single thread
With one thread, everything is fine:
julia> using LoopVectorization
julia> function foo!(dst, src)
@turbo thread=true for i in eachindex(dst, src)
dst[i] = src[i]
end
end
foo! (generic function with 1 method)
julia> src_big = rand(4, 10); dst_big = similar(src_big); src = view(src_big, 2:3, :); dst = view(dst_big, 2:3, :);
julia> foo!(dst, src); dst ≈ src
true
However, if I run this with four threads, I either get
ERROR: UndefRefError: access to undefined reference
Stacktrace:
[1] getindex
@ ./array.jl:861 [inlined]
[2] add_loops!(ls::LoopVectorization.LoopSet, LPSYM::Vector{Any}, LB::Core.SimpleVector)
@ LoopVectorization ~/.julia/packages/LoopVectorization/3Zpn2/src/reconstruct_loopset.jl:83
[3] avx_loopset!(ls::LoopVectorization.LoopSet, instr::Vector{LoopVectorization.Instruction}, ops::Vector{LoopVectorization.OperationStruct}, arf::Vector{LoopVectorization.ArrayRefStruct}, AM::Vector{Any}, LPSYM::Vector{Any}, LB::Core.SimpleVector, vargs::Core.SimpleVector)
@ LoopVectorization ~/.julia/packages/LoopVectorization/3Zpn2/src/reconstruct_loopset.jl:633
[4] _turbo_loopset(OPSsv::Any, ARFsv::Any, AMsv::Any, LPSYMsv::Any, LBsv::Core.SimpleVector, vargs::Core.SimpleVector, UNROLL::Tuple{Bool, Int8, Int8, Int8, Bool, Int64, Int64, Int64, Int64, Int64, Int64, Int64, UInt64})
@ LoopVectorization ~/.julia/packages/LoopVectorization/3Zpn2/src/reconstruct_loopset.jl:691
[5] #s166#85
@ ~/.julia/packages/LoopVectorization/3Zpn2/src/reconstruct_loopset.jl:717 [inlined]
[6] var"#s166#85"(::Any, ::Any, ::Any, ::Any, ::Any, ::Any, ::Any, vargs#::Any, ::Any, #unused#::Type, #unused#::Type, #unused#::Type, #unused#::Type, #unused#::Type, #unused#::Any, var#arguments#::Any)
@ LoopVectorization ./none:0
[7] (::Core.GeneratedFunctionStub)(::Any, ::Vararg{Any})
@ Core ./boot.jl:580
[8] macro expansion
@ ~/.julia/packages/LoopVectorization/3Zpn2/src/codegen/lower_threads.jl:652 [inlined]
[9] macro expansion
@ ~/.julia/packages/LoopVectorization/3Zpn2/src/reconstruct_loopset.jl:713 [inlined]
[10] _turbo_!(::Val{(false, 0, 0, 0, false, 4, 32, 15, 64, 32768, 262144, 12582912, 0x0000000000000004)}, ::Val{(:LoopVectorization, :getindex, LoopVector0000000000000000000000000, 0x00000000000000000000000000000000, LoopVectorization.memload, 0x0001, 0x01), :LoopVectorization, :identity, LoopVectorization.OperationStruct(0x00000000000000000000000000000001, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000001, 0x00000000000000000000000000000000, LoopVectorization.compute, 0x0002, 0x00), :LoopVectorization, :setindex!, LoopVectorization.OperationStruct(0x00000000000000000000000000000001, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000002, 0x00000000000000000000000000000000, LoopVectorization.memstore, 0x0003, 0x02))}, ::Val{(LoopVectorization.ArrayRefStruct{:src, Symbol("##vptr##_src")}(0x00000000000000000000000000000001, 0x00000000000000000000000000000001, 0x00000000000000000000000000000000, 0x00000000000000000000000000000001), LoopVectorization.ArrayRefStruct{:dst, Symbol("##vptr##_dst")}(0x00000000000000000000000000000001, 0x00000000000000000000000000000001, 0x00000000000000000000000000000000, 0x00000000000000000000000000000001))}, ::Val{(0, (), (), (), (), (), ())}, ::Val{(:i,)}, ::Val{Tuple{Tuple{CartesianIndices{2, Tuple{CloseOpenIntervals.CloseOpen{Static.StaticInt{0}, Int64}, CloseOpenIntervals.CloseOpen{Static.StaticInt{0}, Int64}}}}, Tuple{LayoutPointers.GroupedStridedPointers{Tuple{Ptr{Float64}, Ptr{Float64}}, (1, 1), (0, 0), ((1, 2), (1, 2)), ((1, 2), (3, 4)), Tuple{Static.StaticInt{8}, Int64, Static.StaticInt{8}, Int64}, NTuple{4, Static.StaticInt{0}}}}}}, ::Int64, ::Int64, ::Ptr{Float64}, ::Ptr{Float64}, ::Int64, ::Int64)
@ LoopVectorization ~/.julia/packages/LoopVectorization/3Zpn2/src/reconstruct_loopset.jl:713
[11] foo!(dst::SubArray{Float64, 2, Matrix{Float64}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}}, false}, src::SubArray{Float64, 2, Matrix{Float64}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}}, false})
@ Main ./REPL[3]:2
[12] top-level scope
@ REPL[5]:1
or a segfault like
signal (11): Segmentation fault
in expression starting at REPL[6]:1
typekeyvalue_hash at /buildworker/worker/package_linux64/build/src/jltypes.c:1152 [inlined]
lookup_typevalue at /buildworker/worker/package_linux64/build/src/jltypes.c:722
lookup_arg_type_tuple at /buildworker/worker/package_linux64/build/src/gf.c:1850 [inlined]
jl_lookup_generic_ at /buildworker/worker/package_linux64/build/src/gf.c:2363 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2425
add_loops! at ~/.julia/packages/LoopVectorization/3Zpn2/src/reconstruct_loopset.jl:87
unknown function (ip: 0x7fea2d14a359)
avx_loopset! at ~/.julia/packages/LoopVectorization/3Zpn2/src/reconstruct_loopset.jl:633
unknown function (ip: 0x7fea2d147e79)
_turbo_loopset at ~/.julia/packages/LoopVectorization/3Zpn2/src/reconstruct_loopset.jl:691
unknown function (ip: 0x7fea2d147e51)
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2247 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2429
#s166#85 at ~/.julia/packages/LoopVectorization/3Zpn2/src/reconstruct_loopset.jl:717 [inlined]
#s166#85 at ./none:0
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2247 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2429
GeneratedFunctionStub at ./boot.jl:580
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2247 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2429
jl_apply at /buildworker/worker/package_linux64/build/src/julia.h:1790 [inlined]
jl_call_staged at /buildworker/worker/package_linux64/build/src/method.c:431
jl_code_for_staged at /buildworker/worker/package_linux64/build/src/method.c:482
get_staged at ./compiler/utilities.jl:111
retrieve_code_info at ./compiler/utilities.jl:123 [inlined]
InferenceState at ./compiler/inferencestate.jl:234
typeinf_ext at ./compiler/typeinfer.jl:907
typeinf_ext_toplevel at ./compiler/typeinfer.jl:942
typeinf_ext_toplevel at ./compiler/typeinfer.jl:938
jfptr_typeinf_ext_toplevel_9542.clone_1 at ~/Software/julia-1.7.0-rc2/lib/julia/sys.so (unknown line)
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2247 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2429
jl_apply at /buildworker/worker/package_linux64/build/src/julia.h:1790 [inlined]
jl_type_infer at /buildworker/worker/package_linux64/build/src/gf.c:295
jl_generate_fptr at /buildworker/worker/package_linux64/build/src/jitlayers.cpp:338
jl_compile_method_internal at /buildworker/worker/package_linux64/build/src/gf.c:1980
jl_compile_method_internal at /buildworker/worker/package_linux64/build/src/gf.c:2246 [inlined]
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2239 [inlined]
jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2254
macro expansion at ~/.julia/packages/LoopVectorization/3Zpn2/src/codegen/lower_threads.jl:652 [inlined]
macro expansion at ~/.julia/packages/LoopVectorization/3Zpn2/src/reconstruct_loopset.jl:713 [inlined]
_turbo_! at ~/.julia/packages/LoopVectorization/3Zpn2/src/reconstruct_loopset.jl:713
foo! at ./REPL[3]:2
unknown function (ip: 0x7fea2d180128)
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2247 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2429
jl_apply at /buildworker/worker/package_linux64/build/src/julia.h:1790 [inlined]
do_call at /buildworker/worker/package_linux64/build/src/interpreter.c:126
eval_value at /buildworker/worker/package_linux64/build/src/interpreter.c:215
eval_stmt_value at /buildworker/worker/package_linux64/build/src/interpreter.c:166 [inlined]
eval_body at /buildworker/worker/package_linux64/build/src/interpreter.c:587
jl_interpret_toplevel_thunk at /buildworker/worker/package_linux64/build/src/interpreter.c:731
jl_toplevel_eval_flex at /buildworker/worker/package_linux64/build/src/toplevel.c:885
jl_toplevel_eval_flex at /buildworker/worker/package_linux64/build/src/toplevel.c:830
jl_toplevel_eval_flex at /buildworker/worker/package_linux64/build/src/toplevel.c:830
jl_toplevel_eval_in at /buildworker/worker/package_linux64/build/src/toplevel.c:944
eval at ./boot.jl:373 [inlined]
eval_user_input at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.7/REPL/src/REPL.jl:150
repl_backend_loop at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.7/REPL/src/REPL.jl:244
start_repl_backend at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.7/REPL/src/REPL.jl:229
#run_repl#47 at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.7/REPL/src/REPL.jl:362
run_repl at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.7/REPL/src/REPL.jl:349
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2247 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2429
#930 at ./client.jl:394
jfptr_YY.930_44567.clone_1 at ~/Software/julia-1.7.0-rc2/lib/julia/sys.so (unknown line)
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2247 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2429
jl_apply at /buildworker/worker/package_linux64/build/src/julia.h:1790 [inlined]
jl_f__call_latest at /buildworker/worker/package_linux64/build/src/builtins.c:757
#invokelatest#2 at ./essentials.jl:716 [inlined]
invokelatest at ./essentials.jl:714 [inlined]
run_main_repl at ./client.jl:379
exec_options at ./client.jl:309
_start at ./client.jl:495
jfptr__start_38325.clone_1 at ~/Software/julia-1.7.0-rc2/lib/julia/sys.so (unknown line)
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2247 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2429
jl_apply at /buildworker/worker/package_linux64/build/src/julia.h:1790 [inlined]
true_main at /buildworker/worker/package_linux64/build/src/jlapi.c:559
jl_repl_entrypoint at /buildworker/worker/package_linux64/build/src/jlapi.c:701
main at julia (unknown line)
__libc_start_main at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
unknown function (ip: 0x400808)
Allocations: 25541662 (Pool: 25531996; Big: 9666); GC: 29
Segmentation fault (core dumped)
```ization.OperationStruct(0x00000000000000000000000000000001, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, LoopVectorization.memload, 0x0001, 0x01), :LoopVectorization, :identity, LoopVectorization.OperationStruct(0x00000000000000000000000000000001, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000001, 0x00000000000000000000000000000000, LoopVectorization.compute, 0x0002, 0x00), :LoopVectorization, :setindex!, LoopVectorization.OperationStruct(0x00000000000000000000000000000001, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000002, 0x00000000000000000000000000000000, LoopVectorization.memstore, 0x0003, 0x02))}, ::Val{(LoopVectorization.ArrayRefStruct{:src, Symbol("##vptr##_src")}(0x00000000000000000000000000000001, 0x00000000000000000000000000000001, 0x00000000000000000000000000000000, 0x00000000000000000000000000000001), LoopVectorization.ArrayRefStruct{:dst, Symbol("##vptr##_dst")}(0x00000000000000000000000000000001, 0x00000000000000000000000000000001, 0x00000000000000000000000000000000, 0x00000000000000000000000000000001))}, ::Val{(0, (), (), (), (), (), ())}, ::Val{(:i,)}, ::Val{Tuple{Tuple{CartesianIndices{2, Tuple{CloseOpenIntervals.CloseOpen{Static.StaticInt{0}, Int64}, CloseOpenIntervals.CloseOpen{Static.StaticInt{0}, Int64}}}}, Tuple{LayoutPointers.GroupedStridedPointers{Tuple{Ptr{Float64}, Ptr{Float64}}, (1, 1), (0, 0), ((1, 2), (1, 2)), ((1, 2), (3, 4)), Tuple{Static.StaticInt{8}, Int64, Static.StaticInt{8}, Int64}, NTuple{4, Static.StaticInt{0}}}}}}, ::Int64, ::Int64, ::Ptr{Float64}, ::Ptr{Float64}, ::Int64, ::Int64)
@ LoopVectorization ~/.julia/packages/LoopVectorization/3Zpn2/src/reconstruct_loopset.jl:713
[11] foo!(dst::SubArray{Float64, 2, Matrix{Float64}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}}, false}, src::SubArray{Float64, 2, Matrix{Float64}, Tuple{UnitRange{Int64}, Base.Slice{Base.OneTo{Int64}}}, false})
@ Main ./REPL[3]:2
[12] top-level scope
@ REPL[5]:1
or a segfault like
signal (11): Segmentation fault
in expression starting at REPL[6]:1
typekeyvalue_hash at /buildworker/worker/package_linux64/build/src/jltypes.c:1152 [inlined]
lookup_typevalue at /buildworker/worker/package_linux64/build/src/jltypes.c:722
lookup_arg_type_tuple at /buildworker/worker/package_linux64/build/src/gf.c:1850 [inlined]
jl_lookup_generic_ at /buildworker/worker/package_linux64/build/src/gf.c:2363 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2425
add_loops! at ~/.julia/packages/LoopVectorization/3Zpn2/src/reconstruct_loopset.jl:87
unknown function (ip: 0x7fea2d14a359)
avx_loopset! at ~/.julia/packages/LoopVectorization/3Zpn2/src/reconstruct_loopset.jl:633
unknown function (ip: 0x7fea2d147e79)
_turbo_loopset at ~/.julia/packages/LoopVectorization/3Zpn2/src/reconstruct_loopset.jl:691
unknown function (ip: 0x7fea2d147e51)
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2247 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2429
#s166#85 at ~/.julia/packages/LoopVectorization/3Zpn2/src/reconstruct_loopset.jl:717 [inlined]
#s166#85 at ./none:0
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2247 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2429
GeneratedFunctionStub at ./boot.jl:580
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2247 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2429
jl_apply at /buildworker/worker/package_linux64/build/src/julia.h:1790 [inlined]
jl_call_staged at /buildworker/worker/package_linux64/build/src/method.c:431
jl_code_for_staged at /buildworker/worker/package_linux64/build/src/method.c:482
get_staged at ./compiler/utilities.jl:111
retrieve_code_info at ./compiler/utilities.jl:123 [inlined]
InferenceState at ./compiler/inferencestate.jl:234
typeinf_ext at ./compiler/typeinfer.jl:907
typeinf_ext_toplevel at ./compiler/typeinfer.jl:942
typeinf_ext_toplevel at ./compiler/typeinfer.jl:938
jfptr_typeinf_ext_toplevel_9542.clone_1 at ~/Software/julia-1.7.0-rc2/lib/julia/sys.so (unknown line)
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2247 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2429
jl_apply at /buildworker/worker/package_linux64/build/src/julia.h:1790 [inlined]
jl_type_infer at /buildworker/worker/package_linux64/build/src/gf.c:295
jl_generate_fptr at /buildworker/worker/package_linux64/build/src/jitlayers.cpp:338
jl_compile_method_internal at /buildworker/worker/package_linux64/build/src/gf.c:1980
jl_compile_method_internal at /buildworker/worker/package_linux64/build/src/gf.c:2246 [inlined]
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2239 [inlined]
jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2254
macro expansion at ~/.julia/packages/LoopVectorization/3Zpn2/src/codegen/lower_threads.jl:652 [inlined]
macro expansion at ~/.julia/packages/LoopVectorization/3Zpn2/src/reconstruct_loopset.jl:713 [inlined]
_turbo_! at ~/.julia/packages/LoopVectorization/3Zpn2/src/reconstruct_loopset.jl:713
foo! at ./REPL[3]:2
unknown function (ip: 0x7fea2d180128)
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2247 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2429
jl_apply at /buildworker/worker/package_linux64/build/src/julia.h:1790 [inlined]
do_call at /buildworker/worker/package_linux64/build/src/interpreter.c:126
eval_value at /buildworker/worker/package_linux64/build/src/interpreter.c:215
eval_stmt_value at /buildworker/worker/package_linux64/build/src/interpreter.c:166 [inlined]
eval_body at /buildworker/worker/package_linux64/build/src/interpreter.c:587
jl_interpret_toplevel_thunk at /buildworker/worker/package_linux64/build/src/interpreter.c:731
jl_toplevel_eval_flex at /buildworker/worker/package_linux64/build/src/toplevel.c:885
jl_toplevel_eval_flex at /buildworker/worker/package_linux64/build/src/toplevel.c:830
jl_toplevel_eval_flex at /buildworker/worker/package_linux64/build/src/toplevel.c:830
jl_toplevel_eval_in at /buildworker/worker/package_linux64/build/src/toplevel.c:944
eval at ./boot.jl:373 [inlined]
eval_user_input at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.7/REPL/src/REPL.jl:150
repl_backend_loop at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.7/REPL/src/REPL.jl:244
start_repl_backend at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.7/REPL/src/REPL.jl:229
#run_repl#47 at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.7/REPL/src/REPL.jl:362
run_repl at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.7/REPL/src/REPL.jl:349
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2247 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2429
#930 at ./client.jl:394
jfptr_YY.930_44567.clone_1 at ~/Software/julia-1.7.0-rc2/lib/julia/sys.so (unknown line)
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2247 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2429
jl_apply at /buildworker/worker/package_linux64/build/src/julia.h:1790 [inlined]
jl_f__call_latest at /buildworker/worker/package_linux64/build/src/builtins.c:757
#invokelatest#2 at ./essentials.jl:716 [inlined]
invokelatest at ./essentials.jl:714 [inlined]
run_main_repl at ./client.jl:379
exec_options at ./client.jl:309
_start at ./client.jl:495
jfptr__start_38325.clone_1 at ~/Software/julia-1.7.0-rc2/lib/julia/sys.so (unknown line)
_jl_invoke at /buildworker/worker/package_linux64/build/src/gf.c:2247 [inlined]
jl_apply_generic at /buildworker/worker/package_linux64/build/src/gf.c:2429
jl_apply at /buildworker/worker/package_linux64/build/src/julia.h:1790 [inlined]
true_main at /buildworker/worker/package_linux64/build/src/jlapi.c:559
jl_repl_entrypoint at /buildworker/worker/package_linux64/build/src/jlapi.c:701
main at julia (unknown line)
__libc_start_main at /lib/x86_64-linux-gnu/libc.so.6 (unknown line)
unknown function (ip: 0x400808)
Allocations: 25541662 (Pool: 25531996; Big: 9666); GC: 29
Segmentation fault (core dumped)
The above commit disables threading when you have CartesianIndices.
We can leave this issue open until we get a proper fix.
The problem basically is that, given CartesianIndices, LV updates a bunch of information it has about the loops (i.e., it splits the CartesianIndices into independent loops, also updating the associated operations, etc).
When threading, it also currently forwards a lot of -- but not all -- of the parameters describing the loop structure to another _turbo_! call.
Some of these descriptions are invalidated when performing the CartesianIndex update.
The easiest fix in case of CartesianIndices might be to call condense_loopset again, and take another trip through _turbo_! with the expanded loops.
Great, thanks!