KernelAbstractions.jl
KernelAbstractions.jl copied to clipboard
Compilation error with dynamic ndrange
using KernelAbstractions
using KernelAbstractions.Extras: @unroll
using CuArrays
@kernel function kernel!(::Val{nreduce}, ::Val{Nq}) where {nreduce, Nq}
s_MJQ = @localmem Float64 (Nq * Nq)
i, j = @index(Local, NTuple)
@inbounds begin
@unroll for n = 11:-1:1
if nreduce ≥ 2^n
ij = i + Nq * (j-1)
ijshift = ij + 2^(n-1)
if ij ≤ 2^(n-1) && ijshift ≤ Nq * Nq
s_MJQ[ij] += s_MJQ[ijshift]
end
@synchronize
end
end
end
end
let
Nq = 5
nreduce = 2^ceil(Int, log2(Nq*Nq))
# this works
event = kernel!(CUDA(), (Nq, Nq), (Nq, Nq))(Val(nreduce), Val(Nq))
wait(event)
# this fails
event = kernel!(CUDA(), (Nq, Nq))(Val(nreduce), Val(Nq); ndrange=(Nq, Nq))
wait(event)
end
Error:
ERROR: LoadError: InvalidIRError: compiling gpu_kernel!(Cassette.Context{nametype(CUDACtx),KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize,true,Nothing,CartesianIndices{2,Tuple{Base.OneTo{Int64},Base.OneTo{Int64}}},KernelAbstractions.NDIteration.NDRange{2,KernelAbstractions.NDIteration.DynamicSize,KernelAbstractions.NDIteration.StaticSize{(5, 5)},CartesianIndices{2,Tuple{Base.OneTo{Int64},Base.OneTo{Int64}}},Nothing}},Nothing,KernelAbstractions.var"##PassType#422",Nothing,Cassette.DisableHooks}, typeof(gpu_kernel!), Val{32}, Val{5}) resulted in invalid LLVM IR
Reason: unsupported dynamic function invocation (call to overdub(overdub_context::Cassette.Context, overdub_arguments...) in Cassette at /home/mwarusz/.julia/packages/Cassette/7OymZ/src/overdub.jl:532)
Stacktrace:
[1] macro expansion at /home/mwarusz/repos/KernelAbstractions.jl/src/extras/loopinfo.jl:26
[2] macro expansion at /home/mwarusz/repos/KernelAbstractions.jl/test/compile_fail.jl:11
[3] gpu_kernel! at /home/mwarusz/repos/KernelAbstractions.jl/src/macros.jl:70
[4] overdub at /home/mwarusz/.julia/packages/Cassette/7OymZ/src/overdub.jl:0
Reason: unsupported dynamic function invocation (call to overdub(overdub_context::Cassette.Context, overdub_arguments...) in Cassette at /home/mwarusz/.julia/packages/Cassette/7OymZ/src/overdub.jl:532)
Stacktrace:
[1] macro expansion at /home/mwarusz/repos/KernelAbstractions.jl/test/compile_fail.jl:14
[2] macro expansion at /home/mwarusz/repos/KernelAbstractions.jl/src/extras/loopinfo.jl:26
[3] macro expansion at /home/mwarusz/repos/KernelAbstractions.jl/test/compile_fail.jl:11
[4] gpu_kernel! at /home/mwarusz/repos/KernelAbstractions.jl/src/macros.jl:70
[5] overdub at /home/mwarusz/.julia/packages/Cassette/7OymZ/src/overdub.jl:0
Reason: unsupported dynamic function invocation (call to overdub(overdub_context::Cassette.Context, overdub_arguments...) in Cassette at /home/mwarusz/.julia/packages/Cassette/7OymZ/src/overdub.jl:532)
Stacktrace:
[1] macro expansion at /home/mwarusz/repos/KernelAbstractions.jl/test/compile_fail.jl:15
[2] macro expansion at /home/mwarusz/repos/KernelAbstractions.jl/src/extras/loopinfo.jl:26
[3] macro expansion at /home/mwarusz/repos/KernelAbstractions.jl/test/compile_fail.jl:11
[4] gpu_kernel! at /home/mwarusz/repos/KernelAbstractions.jl/src/macros.jl:70
[5] overdub at /home/mwarusz/.julia/packages/Cassette/7OymZ/src/overdub.jl:0
Stacktrace:
[1] check_ir(::CUDAnative.CompilerJob, ::LLVM.Module) at /home/mwarusz/.julia/packages/CUDAnative/1UYFF/src/compiler/validation.jl:116
[2] macro expansion at /home/mwarusz/.julia/packages/CUDAnative/1UYFF/src/compiler/driver.jl:186 [inlined]
[3] macro expansion at /home/mwarusz/.julia/packages/TimerOutputs/7Id5J/src/TimerOutput.jl:228 [inlined]
[4] #codegen#172(::Bool, ::Bool, ::Bool, ::Bool, ::Bool, ::typeof(CUDAnative.codegen), ::Symbol, ::CUDAnative.CompilerJob) at /home/mwarusz/.julia/packages/CUDAnative/1UYFF/src/compiler/driver.jl:184
[5] #codegen at /home/mwarusz/.julia/packages/CUDAnative/1UYFF/src/compiler/driver.jl:0 [inlined]
[6] #compile#171(::Bool, ::Bool, ::Bool, ::Bool, ::Bool, ::typeof(CUDAnative.compile), ::Symbol, ::CUDAnative.CompilerJob) at /home/mwarusz/.julia/packages/CUDAnative/1UYFF/src/compiler/driver.jl:45
[7] #compile at /home/mwarusz/.julia/packages/CUDAnative/1UYFF/src/compiler/common.jl:0 [inlined]
[8] #compile#170 at /home/mwarusz/.julia/packages/CUDAnative/1UYFF/src/compiler/driver.jl:33 [inlined]
[9] #compile at ./none:0 [inlined] (repeats 2 times)
[10] macro expansion at /home/mwarusz/.julia/packages/CUDAnative/1UYFF/src/execution.jl:395 [inlined]
[11] #cufunction#216(::String, ::Base.Iterators.Pairs{Symbol,Int64,Tuple{Symbol},NamedTuple{(:maxthreads,),Tuple{Int64}}}, ::typeof(CUDAnative.cufunction), ::typeof(Cassette.overdub), ::Type{Tuple{Cassette.Context{nametype(CUDACtx),KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize,true,Nothing,CartesianIndices{2,Tuple{Base.OneTo{Int64},Base.OneTo{Int64}}},KernelAbstractions.NDIteration.NDRange{2,KernelAbstractions.NDIteration.DynamicSize,KernelAbstractions.NDIteration.StaticSize{(5, 5)},CartesianIndices{2,Tuple{Base.OneTo{Int64},Base.OneTo{Int64}}},Nothing}},Nothing,KernelAbstractions.var"##PassType#422",Nothing,Cassette.DisableHooks},typeof(gpu_kernel!),Val{32},Val{5}}}) at /home/mwarusz/.julia/packages/CUDAnative/1UYFF/src/execution.jl:360
[12] (::CUDAnative.var"#kw##cufunction")(::NamedTuple{(:name, :maxthreads),Tuple{String,Int64}}, ::typeof(CUDAnative.cufunction), ::Function, ::Type) at ./none:0
[13] #_#40(::Tuple{Int64,Int64}, ::Nothing, ::Nothing, ::KernelAbstractions.Kernel{CUDA,KernelAbstractions.NDIteration.StaticSize{(5, 5)},KernelAbstractions.NDIteration.DynamicSize,typeof(gpu_kernel!)}, ::Val{32}, ::Vararg{Any,N} where N) at /home/mwarusz/.julia/packages/CUDAnative/1UYFF/src/execution.jl:179
[14] (::Core.var"#kw#Any")(::NamedTuple{(:ndrange,),Tuple{Tuple{Int64,Int64}}}, ::KernelAbstractions.Kernel{CUDA,KernelAbstractions.NDIteration.StaticSize{(5, 5)},KernelAbstractions.NDIteration.DynamicSize,typeof(gpu_kernel!)}, ::Val{32}, ::Vararg{Any,N} where N) at ./none:0
[15] top-level scope at /home/mwarusz/repos/KernelAbstractions.jl/test/compile_fail.jl:33
in expression starting at /home/mwarusz/repos/KernelAbstractions.jl/test/compile_fail.jl:24