MadNLP.jl
MadNLP.jl copied to clipboard
LapackGPUSolver goes into feasibility restoration when LapackCPUSolver does not
I get an error in the attached script because the solver is trying to perform feasibility restoration. If I change the tolerance to 1e-6, there is no error. If I run the equivalent problem on the CPU using LapackCPUSolver and MadNLP.InteriorPointSolver instead of LapackGPUSolver and MadNLPGPU.CuInteriorPointSolver, then the problem solves fine and does not try to perform a feasibility restoration.
using MatrixEquations
using MadNLPGPU
using NLPModels, QuadraticModels
function MadNLP.jac_dense!(nlp::DenseLQDynamicModel{T, V, M1, M2, M3}, x, jac) where {T, V, M1<: AbstractMatrix, M2 <: AbstractMatrix, M3 <: AbstractMatrix}
NLPModels.increment!(nlp, :neval_jac)
J = nlp.data.A
copyto!(jac, J)
end
function MadNLP.hess_dense!(nlp::DenseLQDynamicModel{T, V, M1, M2, M3}, x, w1l, hess; obj_weight = 1.0) where {T, V, M1<: AbstractMatrix, M2 <: AbstractMatrix, M3 <: AbstractMatrix}
NLPModels.increment!(nlp, :neval_hess)
H = nlp.data.H
copyto!(hess, H)
end
function build_3D_heating_AB(dx, nx, dt)
A = zeros(nx^3, nx^3)
B = zeros(nx^3, 6)
k = 400. # thermal conductivity of copper, W/(m-K)
k2 = 400
rho = 8960. # density of copper, kg/m^3
specificHeat = 386. # specific heat of copper, J/(kg-K)
conduction_constant = k * dt / rho / specificHeat / dx^2
input_constant = k2 * dt / rho / specificHeat / dx^2
# Set A matrix
for i in 1:nx^3
A[i, i] = 1 - 6 * conduction_constant
# Set links in x direction
if i%nx != 0 && i%nx != 1
A[i, i - 1] = conduction_constant
A[i, i + 1] = conduction_constant
#y has boundaries if i%100 < 10 or i %100 >90
#z has boundaries if i%1000 < 100 and i%1000 > 900
#A[i, i] += -2 * conduction_constant
elseif i%nx == 0
A[i, i - 1] = conduction_constant
#A[i, i] += -1 * conduction_constant
else
A[i, i + 1] = conduction_constant
#A[i, i] += -1 * conduction_constant
end
# Set links in the y direction
if i%(nx^2) in 1:nx
A[i, i + nx] = conduction_constant
#A[i, i] += -1 * conduction_constant
elseif i%(nx^2) == 0 || i%(nx^2) > nx^2 - nx
A[i, i - nx] = conduction_constant
#A[i, i] += -1 * conduction_constant
else
A[i, i + nx] = conduction_constant
A[i, i - nx] = conduction_constant
#A[i, i] += -2 * conduction_constant
end
# Set links in the z direction
if i <= nx^2
A[i, i + nx^2] = conduction_constant
#A[i, i] += -1 * conduction_constant
elseif i > nx^3 - nx^2
A[i, i - nx^2] = conduction_constant
#A[i, i] += -1 * conduction_constant
else
A[i, i + nx^2] = conduction_constant
A[i, i - nx^2] = conduction_constant
#A[i, i] += -2 * conduction_constant
end
end
#Set B matrix
B[1:nx^2, 1] .= input_constant
B[(nx^3 - nx^2):(nx^3), 2] .= input_constant
for i in 1:nx^3
if i%nx == 1
B[i, 3] += input_constant
end
if i %nx == 0
B[i, 4] += input_constant
end
if i%nx^2 in 1:nx
B[i, 5] += input_constant
end
if i%nx^2 == 0 || i%nx^2 > nx^2 - nx
B[i, 6] += input_constant
end
end
return A, B
end
function set_d!(d, nx, N, Tmax, Tstart)
fill!(d, Tstart)
Tdiff = (Tmax - Tstart)/2
Tmin = (Tmax - Tstart)/10
for j in 1:(N + 1)
for i in 1:nx^3
x = i%nx
y = div(i % nx^2, nx)
z = div(i, nx^2)
d[i, j] = Tstart + Tmin + (1 - z/nx) * (2 * sin(3.14159 * x/nx) + 2 * sin(3.14159 * y/nx)) * Tdiff * (j / N / 3)
end
end
end
function build_3D_PDE(N, nx, dx, dt, Tmax, Tstart; dense::Bool = true, implicit = false)
ns = nx^3
nu = 6
Q = 10. * Matrix(LinearAlgebra.I, ns, ns)
Qf = 10. * Matrix(LinearAlgebra.I, ns, ns)./dt
R = 1.0 * Matrix(LinearAlgebra.I, nu, nu)./10
A, B = build_3D_heating_AB(dx, nx, dt)
s0 = fill(Tstart, ns)
sl = fill(200., ns)
su = fill(550., ns)
ul = fill(300., nu)
uu = fill(500., nu)
S = -.001 * Matrix(I, ns, nu)
Q_scale = 1
R_scale = 1
if dense
if implicit
lqdm = DenseLQDynamicModel(s0, A, B, Q, R, N; Qf = Qf, sl = sl, su = su, ul = ul, uu = uu, S = S, implicit=implicit)
else
lqdm = DenseLQDynamicModel(s0, A, B, Q, R, N; Qf = Qf, sl = sl, su = su, ul = ul, uu = uu, S = S)
end
else
lqdm = SparseLQDynamicModel(s0, sparse(A), sparse(B), sparse(Q), sparse(R), N; Qf = sparse(Qf), sl = sl, su = su, ul = ul, uu = uu, S = sparse(S))
end
d = zeros(nx^3, N + 1)
set_d!(d, nx, N, Tmax, Tstart)
block_Q = SparseArrays.sparse([],[],eltype(Q)[], ns * (N + 1), ns * (N + 1))
for i in 1:N
block_Q[(1 + (i - 1) * ns):(ns * i), (1 + (i - 1) * ns):(ns * i)] = Q
end
block_Q[(1 + ns * N):end, (1 + ns * N):end] = Qf
Qd = zeros(size(d, 1))
Qdvec = zeros(length(d))
dQd = 0
for i in 1:N
LinearAlgebra.mul!(Qd, Q, d[:, i])
Qdvec[(1 + ns * (i - 1)):ns * i] = Qd
dQd += LinearAlgebra.dot(Qd, d[:, i])
end
LinearAlgebra.mul!(Qd, Qf, d[:, N + 1])
Qdvec[(1 + ns * N):end] = Qd
dQd += LinearAlgebra.dot(Qd, d[:, N + 1])
# Add c and c0 that result from (x-d)^T Q (x-d) in the objective function
if dense
block_A = lqdm.blocks.A
block_B = lqdm.blocks.B
As0 = zeros(size(block_A, 1))
LinearAlgebra.mul!(As0, block_A, s0)
dQB = zeros(nu * N)
dQB_sub_block = zeros(nu)
for i in 1:N
B_sub_block = block_B[(1 + ns * (i - 1)):ns * i, :]
for j in N:-1:i
Qd_sub_block = Qdvec[(1 + ns * j):(ns * (j + 1))]
LinearAlgebra.mul!(dQB_sub_block, B_sub_block', Qd_sub_block)
dQB[(1 + nu * (j - i)):nu * (j - i + 1)] .+= dQB_sub_block
end
end
lqdm.data.c0 += dQd / 2
lqdm.data.c0 += -LinearAlgebra.dot(Qdvec, As0)
lqdm.data.c += - dQB
else
uvec = zeros(nu * N)
full_Qd = vcat(Qdvec, uvec)
lqdm.data.c0 += dQd / 2
lqdm.data.c += - full_Qd
end
return lqdm
end
N = 250
nx = 9
lenx = .02
dt = .5
Tmax = 350.
Tstart = 300.
lqdm = build_3D_PDE(N, nx, lenx, dt, Tmax, Tstart; dense = true, implicit = false)
madnlp_options = Dict{Symbol, Any}(
:kkt_system=>MadNLP.DENSE_CONDENSED_KKT_SYSTEM,
:linear_solver=>LapackGPUSolver,
:jacobian_constant=>true,
:hessian_constant=>true,
:lapack_algorithm=>MadNLP.CHOLESKY,
:nlp_scaling=>false,
:max_iter=>100
)
ips1 = MadNLPGPU.CuInteriorPointSolver(lqdm, option_dict=madnlp_options)
sol_ref = MadNLP.optimize!(ips1)```
@dlcole3 could you also share the error message?
Error message:
ERROR: LoadError: GPU compilation of kernel #broadcast_kernel#17(CUDA.CuKernelContext, CUDA.CuDeviceVector{Float64, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(*), Tuple{Float64, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.literal_pow), Tuple{CUDA.CuRefValue{typeof(^)}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, CUDA.CuRefValue{Val{2}}}}}}}}, Int64) failed
KernelError: passing and using non-bitstype argument
Argument 4 to your kernel function is of type Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(*), Tuple{Float64, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.literal_pow), Tuple{CUDA.CuRefValue{typeof(^)}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, CUDA.CuRefValue{Val{2}}}}}}}}, which is not isbits:
.args is of type Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(*), Tuple{Float64, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.literal_pow), Tuple{CUDA.CuRefValue{typeof(^)}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, CUDA.CuRefValue{Val{2}}}}}}} which is not isbits.
.1 is of type Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}}} which is not isbits.
.args is of type Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}} which is not isbits.
.1 is of type Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}} which is not isbits.
.args is of type Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}} which is not isbits.
.1 is of type Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}} which is not isbits.
.x is of type Vector{Float64} which is not isbits.
.2 is of type Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}} which is not isbits.
.args is of type Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}} which is not isbits.
.1 is of type Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}} which is not isbits.
.x is of type Vector{Float64} which is not isbits.
.2 is of type Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}} which is not isbits.
.x is of type Vector{Float64} which is not isbits.
.2 is of type Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}} which is not isbits.
.args is of type Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}} which is not isbits.
.1 is of type Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}} which is not isbits.
.x is of type Vector{Float64} which is not isbits.
.2 is of type Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}} which is not isbits.
.args is of type Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}} which is not isbits.
.1 is of type Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}} which is not isbits.
.x is of type Vector{Float64} which is not isbits.
.2 is of type Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}} which is not isbits.
.x is of type Vector{Float64} which is not isbits.
.2 is of type Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(*), Tuple{Float64, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.literal_pow), Tuple{CUDA.CuRefValue{typeof(^)}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, CUDA.CuRefValue{Val{2}}}}}} which is not isbits.
.args is of type Tuple{Float64, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.literal_pow), Tuple{CUDA.CuRefValue{typeof(^)}, Base.Broadcast.Extruded{Vector{Float64}
, Tuple{Bool}, Tuple{Int64}}, CUDA.CuRefValue{Val{2}}}}} which is not isbits.
.2 is of type Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.literal_pow), Tuple{CUDA.CuRefValue{typeof(^)}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, CUDA.CuRefValue{Val{2}}}} which is not isbits.
.args is of type Tuple{CUDA.CuRefValue{typeof(^)}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, CUDA.CuRefValue{Val{2}}} which is not isbits.
.2 is of type Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}} which is not isbits.
.x is of type Vector{Float64} which is not isbits.
Stacktrace:
[1] check_invocation(job::GPUCompiler.CompilerJob)
@ GPUCompiler ~/.julia/packages/GPUCompiler/iaKrd/src/validation.jl:86
[2] macro expansion
@ ~/.julia/packages/GPUCompiler/iaKrd/src/driver.jl:413 [inlined]
[3] macro expansion
@ ~/.julia/packages/TimerOutputs/jgSVI/src/TimerOutput.jl:252 [inlined]
[4] macro expansion
@ ~/.julia/packages/GPUCompiler/iaKrd/src/driver.jl:412 [inlined]
[5] emit_asm(job::GPUCompiler.CompilerJob, ir::LLVM.Module; strip::Bool, validate::Bool, format::LLVM.API.LLVMCodeGenFileType)
@ GPUCompiler ~/.julia/packages/GPUCompiler/iaKrd/src/utils.jl:64
[6] cufunction_compile(job::GPUCompiler.CompilerJob, ctx::LLVM.Context)
@ CUDA ~/.julia/packages/CUDA/tTK8Y/src/compiler/execution.jl:354
[7] #224
@ ~/.julia/packages/CUDA/tTK8Y/src/compiler/execution.jl:347 [inlined]
[8] JuliaContext(f::CUDA.var"#224#225"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams, GPUCompiler.FunctionSpec{GPUArrays.var"#broadcast_kernel#17", Tuple{CUDA.CuKernelContext, CUDA.CuDeviceVector{Float64, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(*), Tuple{Float64, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.literal_pow), Tuple{CUDA.CuRefValue{typeof(^)}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, CUDA.CuRefValue{Val{2}}}}}}}}, Int64}}}})
@ GPUCompiler ~/.julia/packages/GPUCompiler/iaKrd/src/driver.jl:74
[9] cufunction_compile(job::GPUCompiler.CompilerJob)
@ CUDA ~/.julia/packages/CUDA/tTK8Y/src/compiler/execution.jl:346
[10] cached_compilation(cache::Dict{UInt64, Any}, job::GPUCompiler.CompilerJob, compiler::typeof(CUDA.cufunction_compile), linker::typeof(CUDA.cufunction_link))
@ GPUCompiler ~/.julia/packages/GPUCompiler/iaKrd/src/cache.jl:90
[11] cufunction(f::GPUArrays.var"#broadcast_kernel#17", tt::Type{Tuple{CUDA.CuKernelContext, CUDA.CuDeviceVector{Float64, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(*), Tuple{Float64, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.literal_pow), Tuple{CUDA.CuRefValue{typeof(^)}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, CUDA.CuRefValue{Val{2}}}}}}}}, Int64}}; name::Nothing, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ CUDA ~/.julia/packages/CUDA/tTK8Y/src/compiler/execution.jl:299
[12] cufunction(f::GPUArrays.var"#broadcast_kernel#17", tt::Type{Tuple{CUDA.CuKernelContext, CUDA.CuDeviceVector{Float64, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(*), Tuple{Float64, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.literal_pow), Tuple{CUDA.CuRefValue{typeof(^)}, Base.Broadcast.Extruded{Vector{Float64}, Tuple{Bool}, Tuple{Int64}}, CUDA.CuRefValue{Val{2}}}}}}}}, Int64}})
@ CUDA ~/.julia/packages/CUDA/tTK8Y/src/compiler/execution.jl:293
[13] macro expansion
@ ~/.julia/packages/CUDA/tTK8Y/src/compiler/execution.jl:102 [inlined]
[14] #launch_heuristic#248
@ ~/.julia/packages/CUDA/tTK8Y/src/gpuarrays.jl:17 [inlined]
[15] _copyto!(dest::CUDA.CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, bc::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(+), Tuple{Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Vector{Float64}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Vector{Float64}, Vector{Float64}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(/), Tuple{Vector{Float64}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(-), Tuple{Vector{Float64}, Vector{Float64}}}}}}}, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(*), Tuple{Float64, Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{1}, Nothing, typeof(Base.literal_pow), Tuple{Base.RefValue{typeof(^)}, Vector{Float64}, Base.RefValue{Val{2}}}}}}}})
@ GPUArrays ~/.julia/packages/GPUArrays/gok9K/src/host/broadcast.jl:73
[16] materialize!
@ ~/.julia/packages/GPUArrays/gok9K/src/host/broadcast.jl:51 [inlined]
[17] materialize!
@ ./broadcast.jl:868 [inlined]
[18] set_aug_RR!(kkt::MadNLP.DenseCondensedKKTSystem{Float64, CUDA.CuArray{Float64, 1}, CUDA.CuArray{Float64, 2}}, ips::MadNLP.InteriorPointSolver{Float64, MadNLP.DenseCondensedKKTSystem{Float64, CUDA.CuArray{Float64, 1}, CUDA.CuArray{Float64, 2}}}, RR::MadNLP.RobustRestorer{Float64})
@ MadNLP ~/git/MadNLP.jl/src/IPM/kernels.jl:19
[19] robust!(ips::MadNLP.InteriorPointSolver{Float64, MadNLP.DenseCondensedKKTSystem{Float64, CUDA.CuArray{Float64, 1}, CUDA.CuArray{Float64, 2}}})
@ MadNLP ~/git/MadNLP.jl/src/IPM/solver.jl:409
[20] optimize!(ips::MadNLP.InteriorPointSolver{Float64, MadNLP.DenseCondensedKKTSystem{Float64, CUDA.CuArray{Float64, 1}, CUDA.CuArray{Float64, 2}}})
@ MadNLP ~/git/MadNLP.jl/src/IPM/solver.jl:99
[21] top-level scope
@ ~/Moonshot/Moonshot_files/updated_timing_files/GPU_error_script.jl:245
[22] include(fname::String)
@ Base.MainInclude ./client.jl:451
[23] top-level scope
@ REPL[1]:1
[24] top-level scope
@ ~/.julia/packages/CUDA/tTK8Y/src/initialization.jl:52
in expression starting at
Thanks for reporting @dlcole3. We'll look into it