KernelAbstractions.jl icon indicating copy to clipboard operation
KernelAbstractions.jl copied to clipboard

@print not functional on AMDGPU

Open leios opened this issue 3 years ago • 2 comments

Ok, maybe I'm just tired, but...

using AMDGPU
using ROCKernels
using KernelAbstractions

@kernel function f_test_kernel!()
    @print(1, '\n')
end

function f_test!(AT; numcores = 4, numthreads = 256)

    if AT == Array
        kernel! = f_test_kernel!(CPU(), numcores)
    elseif AT == ROCArray
        kernel! = f_test_kernel!(ROCDevice(), numthreads)
    end

    kernel!(ndrange=100)
end

Gives me:

julia> f_test!(ROCArray)

ERROR: InvalidIRError: compiling kernel gpu_f_test_kernel!(KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256,)}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, Nothing}}) resulted in invalid LLVM IR
Reason: unsupported call to an unknown function (call to jl_f_getfield)
Stacktrace:
 [1] getindex
   @ ./tuple.jl:29
 [2] iterate
   @ ./tuple.jl:69
 [3] #__print
   @ ~/.julia/packages/ROCKernels/7KbwQ/src/ROCKernels.jl:312
 [4] macro expansion
   @ ~/projects/KernelAbstractions.jl/src/KernelAbstractions.jl:274
 [5] macro expansion
   @ ~/projects/simuleios/GPU/test_print.jl:7
 [6] gpu_f_test_kernel!
   @ ~/projects/KernelAbstractions.jl/src/macros.jl:81
 [7] gpu_f_test_kernel!
   @ ./none:0
Hint: catch this exception as `err` and call `code_typed(err; interactive = true)` to introspect the erronous code with Cthulhu.jl
Stacktrace:
  [1] check_ir(job::GPUCompiler.CompilerJob{GPUCompiler.GCNCompilerTarget, AMDGPU.Compiler.ROCCompilerParams, GPUCompiler.FunctionSpec{typeof(gpu_f_test_kernel!), Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256,)}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, Nothing}}}}}, args::LLVM.Module)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/Fu1YT/src/validation.jl:141
  [2] macro expansion
    @ ~/.julia/packages/GPUCompiler/Fu1YT/src/driver.jl:418 [inlined]
  [3] macro expansion
    @ ~/.julia/packages/TimerOutputs/LHjFw/src/TimerOutput.jl:253 [inlined]
  [4] macro expansion
    @ ~/.julia/packages/GPUCompiler/Fu1YT/src/driver.jl:416 [inlined]
  [5] emit_asm(job::GPUCompiler.CompilerJob, ir::LLVM.Module; strip::Bool, validate::Bool, format::LLVM.API.LLVMCodeGenFileType)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/Fu1YT/src/utils.jl:68
  [6] (::AMDGPU.Compiler.var"#33#36"{GPUCompiler.CompilerJob{GPUCompiler.GCNCompilerTarget, AMDGPU.Compiler.ROCCompilerParams, GPUCompiler.FunctionSpec{typeof(gpu_f_test_kernel!), Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256,)}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, Nothing}}}}}, Core.MethodInstance})(ctx::LLVM.Context)
    @ AMDGPU.Compiler ~/.julia/packages/AMDGPU/dFwIZ/src/compiler.jl:139
  [7] JuliaContext(f::AMDGPU.Compiler.var"#33#36"{GPUCompiler.CompilerJob{GPUCompiler.GCNCompilerTarget, AMDGPU.Compiler.ROCCompilerParams, GPUCompiler.FunctionSpec{typeof(gpu_f_test_kernel!), Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{1, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256,)}, CartesianIndices{1, Tuple{Base.OneTo{Int64}}}, Nothing}}}}}, Core.MethodInstance})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/Fu1YT/src/driver.jl:76
  [8] rocfunction_compile(job::GPUCompiler.CompilerJob)
    @ AMDGPU.Compiler ~/.julia/packages/AMDGPU/dFwIZ/src/compiler.jl:135
  [9] cached_compilation(cache::Dict{UInt64, Any}, job::GPUCompiler.CompilerJob, compiler::typeof(AMDGPU.Compiler.rocfunction_compile), linker::typeof(AMDGPU.Compiler.rocfunction_link))
    @ GPUCompiler ~/.julia/packages/GPUCompiler/Fu1YT/src/cache.jl:90
 [10] rocfunction(f::typeof(gpu_f_test_kernel!), tt::Type; name::String, device::ROCDevice, global_hooks::NamedTuple{(), Tuple{}})
    @ AMDGPU.Compiler ~/.julia/packages/AMDGPU/dFwIZ/src/compiler.jl:121
 [11] macro expansion
    @ ~/.julia/packages/AMDGPU/dFwIZ/src/highlevel.jl:406 [inlined]
 [12] (::KernelAbstractions.Kernel{ROCDevice, KernelAbstractions.NDIteration.StaticSize{(256,)}, KernelAbstractions.NDIteration.DynamicSize, typeof(gpu_f_test_kernel!)})(; ndrange::Int64, dependencies::Nothing, workgroupsize::Nothing, progress::Nothing)
    @ ROCKernels ~/.julia/packages/ROCKernels/7KbwQ/src/ROCKernels.jl:227
 [13] f_test!(AT::Type; numcores::Int64, numthreads::Int64)
    @ Main ~/projects/simuleios/GPU/test_print.jl:18
 [14] f_test!(AT::Type)
    @ Main ~/projects/simuleios/GPU/test_print.jl:12
 [15] top-level scope
    @ REPL[3]:1

And it looks like the print testsuite is commented out for ROCArrays:

    if backend_str != "ROCM"
        @testset "Printing" begin
            printing_testsuite(backend)
        end
    end

leios avatar Nov 22 '22 20:11 leios

Ok, I think I'm doing something screwy here. If you enable the tests, they seem to pass, but somehow I still cannot get this code (or even just @print("yo\n") to work. That one just hangs forever.

Looking into it, I think we could probably use a better @print() interface. Namely, adding specific @printf() and println() calls.

for context, the macro was added in at #61, and there re 2 other issues related to printing:

  1. #112, which can probably just be closed
  2. #17, which can probably also be closed
  3. #73, which specifically asks for printf support instead of print

leios avatar Nov 23 '22 08:11 leios

We probably need to add an @generated __print implementation that generates a printf-compatible string for the arguments for @rocprintf, because we don't have an equivalent function to _cuprint right now.

jpsamaroo avatar Nov 25 '22 16:11 jpsamaroo