KernelAbstractions.jl
KernelAbstractions.jl copied to clipboard
@print not functional on AMDGPU
Ok, maybe I'm just tired, but...
using AMDGPU
using ROCKernels
using KernelAbstractions
@kernel function f_test_kernel!()
@print(1, '\n')
end
function f_test!(AT; numcores = 4, numthreads = 256)
if AT == Array
kernel! = f_test_kernel!(CPU(), numcores)
elseif AT == ROCArray
kernel! = f_test_kernel!(ROCDevice(), numthreads)
end
kernel!(ndrange=100)
end
Gives me:
julia> f_test!(ROCArray)
ERROR: InvalidIRError: compiling kernel gpu_f_test_kernel!(KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256,)}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, Nothing}}) resulted in invalid LLVM IR
Reason: unsupported call to an unknown function (call to jl_f_getfield)
Stacktrace:
[1] getindex
@ ./tuple.jl:29
[2] iterate
@ ./tuple.jl:69
[3] #__print
@ ~/.julia/packages/ROCKernels/7KbwQ/src/ROCKernels.jl:312
[4] macro expansion
@ ~/projects/KernelAbstractions.jl/src/KernelAbstractions.jl:274
[5] macro expansion
@ ~/projects/simuleios/GPU/test_print.jl:7
[6] gpu_f_test_kernel!
@ ~/projects/KernelAbstractions.jl/src/macros.jl:81
[7] gpu_f_test_kernel!
@ ./none:0
Hint: catch this exception as `err` and call `code_typed(err; interactive = true)` to introspect the erronous code with Cthulhu.jl
Stacktrace:
[1] check_ir(job::GPUCompiler.CompilerJob{GPUCompiler.GCNCompilerTarget, AMDGPU.Compiler.ROCCompilerParams, GPUCompiler.FunctionSpec{typeof(gpu_f_test_kernel!), Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256,)}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, Nothing}}}}}, args::LLVM.Module)
@ GPUCompiler ~/.julia/packages/GPUCompiler/Fu1YT/src/validation.jl:141
[2] macro expansion
@ ~/.julia/packages/GPUCompiler/Fu1YT/src/driver.jl:418 [inlined]
[3] macro expansion
@ ~/.julia/packages/TimerOutputs/LHjFw/src/TimerOutput.jl:253 [inlined]
[4] macro expansion
@ ~/.julia/packages/GPUCompiler/Fu1YT/src/driver.jl:416 [inlined]
[5] emit_asm(job::GPUCompiler.CompilerJob, ir::LLVM.Module; strip::Bool, validate::Bool, format::LLVM.API.LLVMCodeGenFileType)
@ GPUCompiler ~/.julia/packages/GPUCompiler/Fu1YT/src/utils.jl:68
[6] (::AMDGPU.Compiler.var"#33#36"{GPUCompiler.CompilerJob{GPUCompiler.GCNCompilerTarget, AMDGPU.Compiler.ROCCompilerParams, GPUCompiler.FunctionSpec{typeof(gpu_f_test_kernel!), Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256,)}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, Nothing}}}}}, Core.MethodInstance})(ctx::LLVM.Context)
@ AMDGPU.Compiler ~/.julia/packages/AMDGPU/dFwIZ/src/compiler.jl:139
[7] JuliaContext(f::AMDGPU.Compiler.var"#33#36"{GPUCompiler.CompilerJob{GPUCompiler.GCNCompilerTarget, AMDGPU.Compiler.ROCCompilerParams, GPUCompiler.FunctionSpec{typeof(gpu_f_test_kernel!), Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256,)}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, Nothing}}}}}, Core.MethodInstance})
@ GPUCompiler ~/.julia/packages/GPUCompiler/Fu1YT/src/driver.jl:76
[8] rocfunction_compile(job::GPUCompiler.CompilerJob)
@ AMDGPU.Compiler ~/.julia/packages/AMDGPU/dFwIZ/src/compiler.jl:135
[9] cached_compilation(cache::Dict{UInt64, Any}, job::GPUCompiler.CompilerJob, compiler::typeof(AMDGPU.Compiler.rocfunction_compile), linker::typeof(AMDGPU.Compiler.rocfunction_link))
@ GPUCompiler ~/.julia/packages/GPUCompiler/Fu1YT/src/cache.jl:90
[10] rocfunction(f::typeof(gpu_f_test_kernel!), tt::Type; name::String, device::ROCDevice, global_hooks::NamedTuple{(), Tuple{}})
@ AMDGPU.Compiler ~/.julia/packages/AMDGPU/dFwIZ/src/compiler.jl:121
[11] macro expansion
@ ~/.julia/packages/AMDGPU/dFwIZ/src/highlevel.jl:406 [inlined]
[12] (::KernelAbstractions.Kernel{ROCDevice, KernelAbstractions.NDIteration.StaticSize{(256,)}, KernelAbstractions.NDIteration.DynamicSize, typeof(gpu_f_test_kernel!)})(; ndrange::Int64, dependencies::Nothing, workgroupsize::Nothing, progress::Nothing)
@ ROCKernels ~/.julia/packages/ROCKernels/7KbwQ/src/ROCKernels.jl:227
[13] f_test!(AT::Type; numcores::Int64, numthreads::Int64)
@ Main ~/projects/simuleios/GPU/test_print.jl:18
[14] f_test!(AT::Type)
@ Main ~/projects/simuleios/GPU/test_print.jl:12
[15] top-level scope
@ REPL[3]:1
And it looks like the print testsuite is commented out for ROCArrays:
if backend_str != "ROCM"
@testset "Printing" begin
printing_testsuite(backend)
end
end
Ok, I think I'm doing something screwy here. If you enable the tests, they seem to pass, but somehow I still cannot get this code (or even just @print("yo\n") to work. That one just hangs forever.
Looking into it, I think we could probably use a better @print() interface. Namely, adding specific @printf() and println() calls.
for context, the macro was added in at #61, and there re 2 other issues related to printing:
- #112, which can probably just be closed
- #17, which can probably also be closed
- #73, which specifically asks for
printfsupport instead ofprint
We probably need to add an @generated __print implementation that generates a printf-compatible string for the arguments for @rocprintf, because we don't have an equivalent function to _cuprint right now.