GPUCompiler.jl icon indicating copy to clipboard operation
GPUCompiler.jl copied to clipboard

Compilation error with `always_inline=true` with conversions within broadcasts

Open Keluaa opened this issue 8 months ago • 2 comments

Here is a reproducer for this CUDA.jl issue, but with the native backend and Julia 1.11.4.

The setup code for native compilation (same as within "test/helpers/native.jl"):

using GPUCompiler

Base.Experimental.@MethodTable(test_method_table)

struct CompilerParams <: GPUCompiler.AbstractCompilerParams
   entry_safepoint::Bool
   method_table

   CompilerParams(entry_safepoint::Bool=false, method_table=test_method_table) =
       new(entry_safepoint, method_table)
end

NativeCompilerJob = GPUCompiler.CompilerJob{GPUCompiler.NativeCompilerTarget, CompilerParams}

module TestRuntime
    # dummy methods
    signal_exception() = return
    malloc(sz) = C_NULL
    report_oom(sz) = return
    report_exception(ex) = return
    report_exception_name(ex) = return
    report_exception_frame(idx, func, file, line) = return
end

GPUCompiler.runtime_module(::NativeCompilerJob) = TestRuntime
GPUCompiler.method_table(@nospecialize(job::NativeCompilerJob)) = job.config.params.method_table
GPUCompiler.can_safepoint(@nospecialize(job::NativeCompilerJob)) = job.config.params.entry_safepoint

function create_job(@nospecialize(func), @nospecialize(types);
                    entry_safepoint::Bool=false, method_table=test_method_table, kwargs...)
    config_kwargs, kwargs = split_kwargs(kwargs, GPUCompiler.CONFIG_KWARGS)
    source = methodinstance(typeof(func), Base.to_tuple_type(types), Base.get_world_counter())
    target = NativeCompilerTarget()
    params = CompilerParams(entry_safepoint, method_table)
    config = CompilerConfig(target, params; kernel=false, config_kwargs...)
    CompilerJob(source, config)
end

function compiler(job)
    JuliaContext() do ctx
        GPUCompiler.compile(:asm, job)
    end
end

The actual reproducer:

using UnsafePointers

function mul_bcast(I, i, x, y, d)
    (x[i], y[i]) = I .* d
    return nothing
end

x_cpu = zeros(Int64, 100)
y_cpu = zeros(Int64, 100)

x_cpu_ptr = UnsafePtr(x_cpu)
y_cpu_ptr = UnsafePtr(y_cpu)

d = (rand(Float64), rand(Float64))

no_conv_args_types = Base.typesof((1.0, 1.0), 1, x_cpu_ptr, y_cpu_ptr, d)
conv_args_types    = Base.typesof((1, 1), 1, x_cpu_ptr, y_cpu_ptr, d)  # will convert `(1, 1)` within the broadcast to `NTuple{2, Float64}`

# This compiles fine
compiler(create_job(mul_bcast, no_conv_args_types; always_inline=false));
compiler(create_job(mul_bcast, no_conv_args_types; always_inline=true));
compiler(create_job(mul_bcast, conv_args_types; always_inline=false));

# This fails
compiler(create_job(mul_bcast, conv_args_types; always_inline=true));
Error stacktrace

ERROR: InvalidIRError: compiling MethodInstance for mul_bcast(::Tuple{Int64, Int64}, ::Int64, ::UnsafePtr{Int64}, ::UnsafePtr{Int64}, ::Tuple{Float64, Float64}) resulted in invalid LLVM IR
Reason: unsupported dynamic function invocation (call to getindex(t::Tuple, i::Int64) @ Base tuple.jl:31)
Stacktrace:
 [1] _getindex
   @ ./broadcast.jl:674
 [2] _broadcast_getindex
   @ ./broadcast.jl:650
 [3] #17
   @ ./broadcast.jl:1102
 [4] ntuple
   @ ./ntuple.jl:49
 [5] copy
   @ ./broadcast.jl:1102
 [6] materialize
   @ ./broadcast.jl:872
 [7] mul_bcast
   @ ./REPL[76]:2

Note that I tried it out with Julia 1.12.0-beta1, but I got the same issue as in #687, so I could not tell if the bug is still there.

Keluaa avatar Apr 22 '25 09:04 Keluaa

@aviatesk Any idea why always_inline=true (which sets inline_cost_threshold to typemax(Int)) could result in additional dynamic invocations?

maleadt avatar Apr 22 '25 11:04 maleadt

If a method is declared as @noinline, then we skip the inlining cost model calculation for it and just mark it non-inlineable: https://github.com/JuliaLang/julia/blob/760b2e5b7396f9cc0da5efce0cadd5d1974c4069/base/compiler/optimize.jl#L491-L492

I guess this causes the issue?

aviatesk avatar Apr 23 '25 15:04 aviatesk