LoopVectorization.jl
LoopVectorization.jl copied to clipboard
Odd `UndefVarError` caused by `LoopVectorization.jl`
The mul_trace function gives uninformative error when called with Float32 matrices.
mul_trace: Error During Test at /home/runner/work/ReactiveMP.jl/ReactiveMP.jl/test/algebra/test_helpers.jl:78
Test threw exception
Expression: ReactiveMP.mul_trace(A, B) ≈ tr(A * B)
UndefVarError: ####op#279__0 not defined
Stacktrace:
[1] macro expansion
@ ~/.julia/packages/LoopVectorization/ndGJi/src/reconstruct_loopset.jl:713 [inlined]
[2] _turbo_!(::Val{(false, 0, 0, 0, false, 16, 64, 32, 64, 32768, 1048576, 37486592, 0x0000000000000001)}, ::Val{(:LoopVectorization, :getindex, LoopVectorization.OperationStruct(0x00000000000000000000000000000012, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, LoopVectorization.memload, 0x0001, 0x01), :LoopVectorization, :getindex, LoopVectorization.OperationStruct(0x00000000000000000000000000000021, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, LoopVectorization.memload, 0x0002, 0x02), Symbol("##DROPPED#CONSTANT##"), Symbol("##DROPPED#CONSTANT##"), LoopVectorization.OperationStruct(0x00000000000000000000000000000000, 0x00000000000000000000000000000000, 0x00000000000000000000000000000012, 0x00000000000000000000000000000000, 0x00000000000000000000000000000000, LoopVectorization.constant, 0x0003, 0x00), :LoopVectorization, :vfmadd_fast, LoopVectorization.OperationStruct(0x00000000000000000000000000000012, 0x00000000000000000000000000000012, 0x00000000000000000000000000000000, 0x00000000000000000000000100020003, 0x00000000000000000000000000000000, LoopVectorization.compute, 0x0003, 0x00))}, ::Val{(LoopVectorization.ArrayRefStruct{:A, Symbol("##vptr##_A")}(0x00000000000000000000000000000101, 0x00000000000000000000000000000102, 0x00000000000000000000000000000000, 0x00000000000000000000000000000101), LoopVectorization.ArrayRefStruct{:B, Symbol("##vptr##_B")}(0x00000000000000000000000000000101, 0x00000000000000000000000000000201, 0x00000000000000000000000000000000, 0x00000000000000000000000000000101))}, ::Val{(0, (4,), (3,), (), (), (), ())}, ::Val{(:i, :j)}, ::Val{Tuple{Tuple{CloseOpenIntervals.CloseOpen{Static.StaticInt{0}, Int64}, CloseOpenIntervals.CloseOpen{Static.StaticInt{0}, Int64}}, Tuple{LayoutPointers.GroupedStridedPointers{Tuple{Ptr{Float32}, Ptr{Float32}}, (1, 1), (0, 0), ((1, 2), (1, 2)), ((1, 2), (3, 4)), Tuple{Static.StaticInt{4}, Int64, Static.StaticInt{4}, Int64}, NTuple{4, Static.StaticInt{0}}}, DataType}}}, ::Int64, ::Int64, ::Ptr{Float32}, ::Ptr{Float32}, ::Int64, ::Int64, ::LoopVectorization.StaticType{Float32})
@ LoopVectorization ~/.julia/packages/LoopVectorization/ndGJi/src/reconstruct_loopset.jl:713
[3] mul_trace(A::Matrix{Float32}, B::Matrix{Float32})
The function itself simply computes tr(A * B):
using LoopVectorization
function mul_trace(A::AbstractMatrix, B::AbstractMatrix)
sA, sB = size(A), size(B)
@assert (sA === sB) && (length(sA) === 2) && (first(sA) === last(sA))
result = zero(promote_type(eltype(A), eltype(B)))
n = first(sA)
@turbo for i in 1:n
for j in 1:n
result += A[i, j] * B[j, i]
end
end
return result
end
using Test
@testset "mul_trace" begin
rng = MersenneTwister(1234)
for size in 2:4, T1 in (Float32, Float64), T2 in (Float32, Float64)
A = rand(rng, T1, size, size)
B = rand(rng, T2, size, size)
@test mul_trace(A, B) ≈ tr(A * B)
end
end
Works perfectly fine locally on my machine (macos). I could not reproduce it locally, but sometimes it fails and sometimes it doesn't on our CI. Link. It makes me think that the issue is machine/OS related.
It makes me think that the issue is machine/OS related.
I'm assuming it only shows up on systems with AVX512.