LoopVectorization.jl
LoopVectorization.jl copied to clipboard
Support `getproperty`/`setproperty!` in reductions
trafficstars
julia> using LoopVectorization
julia> mutable struct foo
s
end
julia> f = foo(0)
foo(0)
julia> m = rand(Float32, 4,4)
4×4 Matrix{Float32}:
0.925989 0.8695 0.784866 0.0669142
0.258075 0.165664 0.787071 0.697996
0.219509 0.580573 0.298625 0.479805
0.000333428 0.200458 0.858297 0.571481
julia> s = 0
0
julia> @avxt for i in eachindex(m)
s += ifelse(m[i]>0.5, 1, 0)
end
julia> s
8
julia> @avxt for i in eachindex(m)
f.s += ifelse(m[i]>0.5, 1, 0)
end
ERROR: LoadError: LHS not understood; only `:ref`s and `:tuple`s are currently supported.
f.s
Stacktrace:
[1] add_assignment!(ls::LoopVectorization.LoopSet, LHS::Expr, RHS::Expr, elementbytes::Int64, position::Int64)
@ LoopVectorization C:\Users\89639\.julia\packages\LoopVectorization\wL8Qh\src\modeling\graphs.jl:1148
[2] push!(ls::LoopVectorization.LoopSet, ex::Expr, elementbytes::Int64, position::Int64)
@ LoopVectorization C:\Users\89639\.julia\packages\LoopVectorization\wL8Qh\src\modeling\graphs.jl:1165
in expression starting at REPL[8]:1
The problem is the f.s +=, not the ifelse.
julia> mutable struct foo
s
end
julia> f = foo(0f0);
julia> m = rand(Float32, 4,4);
julia> s = 0f0;
julia> @avxt for i in eachindex(m)
s += m[i]
end
julia> s, sum(m)
(8.579453f0, 8.579453f0)
julia> @avxt for i in eachindex(m)
f.s += m[i]
end
ERROR: LoadError: LHS not understood; only `:ref`s and `:tuple`s are currently supported.
f.s
Stacktrace:
[1] add_assignment!(ls::LoopVectorization.LoopSet, LHS::Expr, RHS::Expr, elementbytes::Int64, position::Int64)
@ LoopVectorization ~/.julia/dev/LoopVectorization/src/modeling/graphs.jl:1148
I'd accept a PR adding support for this, but otherwise it will have to wait for the LoopVectorization rewrite.
Basically, to support this, either add the transform of
for i in eachindex(m)
f.s += m[i]
end
into
s = 0.0
for i in eachindex(m)
s += m[i]
end
f.s += s
before LoopVectorization does anything else with the expression, or add special handling in the parsing to interpret it and then again here to do the store when _turbo_! returns.
The former approach is easier, so I'd suggest that one.
Ideally, the PR would also handle something like f.s[1] += ....