JIT session error triggered by precompilation
Using PrecompileTools.jl to precompile GaussianSpaltting.jl triggers following error on 1.10 when executing code after precompilation:
JIT session error: Symbols not found: [ julia.gpu.state_getter ]
JIT session error: Failed to materialize symbols: { (JuliaOJIT, { julia_#throw_inexacterror_2603 }) }
JIT session error: Failed to materialize symbols: { (JuliaOJIT, { julia_#GaussianRasterizer#16_2598 }) }
MWE:
The error seems to be triggered at the moment of calling GaussianRasterizer ctor.
using CUDA
using cuDNN
using GaussianSplatting
function main()
kab = GaussianSplatting.gpu_backend()
points = CuArray(rand(Float32, 3, 128))
colors = CuArray(rand(Float32, 3, 128))
scales = CuArray(rand(Float32, 3, 128))
camera = GaussianSplatting.Camera(; fx=100f0, fy=100f0, width=256, height=256)
gaussians = GaussianSplatting.GaussianModel(points, colors, scales; max_sh_degree=0)
rasterizer = GaussianSplatting.GaussianRasterizer(kab, camera; auxiliary=false)
return
end
main()
cc @vchuravy
With AMDGPU on 1.10 there is no such issue, howerver, 1.11 fails during precompilation. Without precompilation the package works fine.
julia> using AMDGPU, GaussianSplatting
Precompiling GaussianSplattingAMDGPUExt...
Info Given GaussianSplattingAMDGPUExt was explicitly requested, output will be shown live
[ Info: Precompiling for `AMDGPU.ROCKernels.ROCBackend()` GPU backend.
[ Info: Done precompiling!
LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.dispatch.ptr
[19065] signal 6 (-6): Aborted
in expression starting at none:0
Allocations: 128924616 (Pool: 128919573; Big: 5043); GC: 41
┌ Error: Error during loading of extension GaussianSplattingAMDGPUExt of GaussianSplatting, use `Base.retry_load_extensions()` to retry.
│ exception =
│ 1-element ExceptionStack:
│ Failed to precompile GaussianSplattingAMDGPUExt [ce7b01fd-24c6-5d8a-8407-52c975a293bc] to "/home/pxlth/.julia/compiled/v1.11/GaussianSplattingAMDGPUExt/jl_1oLeu5".
│ Stacktrace:
│ [1] error(s::String)
│ @ Base ./error.jl:35
│ [2] compilecache(pkg::Base.PkgId, path::String, internal_stderr::IO, internal_stdout::IO, keep_loaded_modules::Bool; flags::Cmd, cacheflags::Base.CacheFlags, reasons::Dict{String, Int64})
│ @ Base ./loading.jl:3032
│ [3] (::Base.var"#1080#1081"{Base.PkgId})()
│ @ Base ./loading.jl:2420
│ [4] mkpidlock(f::Base.var"#1080#1081"{Base.PkgId}, at::String, pid::Int32; kwopts::@Kwargs{stale_age::Int64, wait::Bool})
│ @ FileWatching.Pidfile ~/bin/julia-1.11.0-rc2/share/julia/stdlib/v1.11/FileWatching/src/pidfile.jl:95
│ [5] #mkpidlock#6
│ @ ~/bin/julia-1.11.0-rc2/share/julia/stdlib/v1.11/FileWatching/src/pidfile.jl:90 [inlined]
│ [6] trymkpidlock(::Function, ::Vararg{Any}; kwargs::@Kwargs{stale_age::Int64})
│ @ FileWatching.Pidfile ~/bin/julia-1.11.0-rc2/share/julia/stdlib/v1.11/FileWatching/src/pidfile.jl:116
│ [7] #invokelatest#2
│ @ ./essentials.jl:1045 [inlined]
│ [8] invokelatest
│ @ ./essentials.jl:1040 [inlined]
│ [9] maybe_cachefile_lock(f::Base.var"#1080#1081"{Base.PkgId}, pkg::Base.PkgId, srcpath::String; stale_age::Int64)
│ @ Base ./loading.jl:3550
│ [10] maybe_cachefile_lock
│ @ ./loading.jl:3547 [inlined]
│ [11] _require(pkg::Base.PkgId, env::Nothing)
│ @ Base ./loading.jl:2416
│ [12] __require_prelocked(uuidkey::Base.PkgId, env::Nothing)
│ @ Base ./loading.jl:2243
│ [13] #invoke_in_world#3
│ @ ./essentials.jl:1077 [inlined]
│ [14] invoke_in_world
│ @ ./essentials.jl:1074 [inlined]
│ [15] _require_prelocked
│ @ ./loading.jl:2230 [inlined]
│ [16] _require_prelocked
│ @ ./loading.jl:2229 [inlined]
│ [17] run_extension_callbacks(extid::Base.ExtensionId)
│ @ Base ./loading.jl:1430
│ [18] run_extension_callbacks(pkgid::Base.PkgId)
│ @ Base ./loading.jl:1465
│ [19] run_package_callbacks(modkey::Base.PkgId)
│ @ Base ./loading.jl:1293
│ [20] __require_prelocked(uuidkey::Base.PkgId, env::String)
│ @ Base ./loading.jl:2254
│ [21] #invoke_in_world#3
│ @ ./essentials.jl:1077 [inlined]
│ [22] invoke_in_world
│ @ ./essentials.jl:1074 [inlined]
│ [23] _require_prelocked(uuidkey::Base.PkgId, env::String)
│ @ Base ./loading.jl:2230
│ [24] macro expansion
│ @ ./loading.jl:2169 [inlined]
│ [25] macro expansion
│ @ ./lock.jl:273 [inlined]
│ [26] __require(into::Module, mod::Symbol)
│ @ Base ./loading.jl:2126
│ [27] #invoke_in_world#3
│ @ ./essentials.jl:1077 [inlined]
│ [28] invoke_in_world
│ @ ./essentials.jl:1074 [inlined]
│ [29] require(into::Module, mod::Symbol)
│ @ Base ./loading.jl:2119
│ [30] eval
│ @ ./boot.jl:429 [inlined]
│ [31] eval_user_input(ast::Any, backend::REPL.REPLBackend, mod::Module)
│ @ REPL ~/bin/julia-1.11.0-rc2/share/julia/stdlib/v1.11/REPL/src/REPL.jl:224
│ [32] repl_backend_loop(backend::REPL.REPLBackend, get_module::Function)
│ @ REPL ~/bin/julia-1.11.0-rc2/share/julia/stdlib/v1.11/REPL/src/REPL.jl:321
│ [33] start_repl_backend(backend::REPL.REPLBackend, consumer::Any; get_module::Function)
│ @ REPL ~/bin/julia-1.11.0-rc2/share/julia/stdlib/v1.11/REPL/src/REPL.jl:306
│ [34] run_repl(repl::REPL.AbstractREPL, consumer::Any; backend_on_current_task::Bool, backend::Any)
│ @ REPL ~/bin/julia-1.11.0-rc2/share/julia/stdlib/v1.11/REPL/src/REPL.jl:462
│ [35] run_repl(repl::REPL.AbstractREPL, consumer::Any)
│ @ REPL ~/bin/julia-1.11.0-rc2/share/julia/stdlib/v1.11/REPL/src/REPL.jl:448
│ [36] (::Base.var"#1137#1139"{Bool, Symbol, Bool})(REPL::Module)
│ @ Base ./client.jl:441
│ [37] #invokelatest#2
│ @ ./essentials.jl:1043 [inlined]
│ [38] invokelatest
│ @ ./essentials.jl:1040 [inlined]
│ [39] run_main_repl(interactive::Bool, quiet::Bool, banner::Symbol, history_file::Bool, color_set::Bool)
│ @ Base ./client.jl:425
│ [40] repl_main
│ @ ./client.jl:562 [inlined]
│ [41] _start()
│ @ Base ./client.jl:536
└ @ Base loading.jl:1436
PackageCompiler fails to compile CUDA with compile=all. As suggested in Discourse, this issue might be related or the root of it.
I wish to raise the importance of this issue as it blocks compilation of any CUDA.jl related project (using both PrecompileTools and PackageCompiler).
MWE:
module MWE
using GPUCompiler, PrecompileTools
struct CompilerParams <: AbstractCompilerParams end
@inline threadfence_block() = ccall("llvm.nvvm.membar.cta", llvmcall, Cvoid, ())
kernel() = threadfence_block()
@setup_workload begin
source = methodinstance(typeof(kernel), Tuple{})
target = PTXCompilerTarget(cap=v"5.0")
params = CompilerParams()
config = CompilerConfig(target, params)
job = CompilerJob(source, config)
@compile_workload begin
JuliaContext() do ctx
GPUCompiler.compile(:llvm, job)
end
end
end
end # module MWE
Precompiling MWE...
LLVM ERROR: Cannot select: intrinsic %llvm.nvvm.membar.cta
It is either random or it got worse with Julia 1.12. I am now getting this error with default settings when compiling with PackageCompiler. Previously I got error only with --compile=all.
Should we open issue on Julia github as probably solution should be seeked there?
It would probably help to reduce this to an AbstractInterpreter example, i.e. not using GPUCompiler, before filing upstream.