DiffEqGPU.jl icon indicating copy to clipboard operation
DiffEqGPU.jl copied to clipboard

Constant static matrices cause compilation issues

Open nilsbecker opened this issue 5 years ago • 26 comments

this example should not allocate:

using LinearAlgebra, DifferentialEquations, DiffEqGPU
A  = Float32[1. 0  0 -5; 4 -2  4 -3; -4  0  0  1; 5 -2  2  3]
u0 = Array{Float32}(rand(4,2))
tspan = (0.0f0,1.0f0)
function fip(du,u,p,t) 
   mul!(du, A, u)
end
probip = ODEProblem(fip,u0,tspan)
function prob_func(prob, i, repeat)
    prob
end
ensprobip=EnsembleProblem(probip, prob_func=prob_func)
trajs = 123
simgpu = solve(ensprobip, Tsit5(), EnsembleGPUArray(), trajectories=trajs)

i ran it on a linux machine with CUDA 9.0 and several Tesla K80 GPUs with compute capability 3.7.

see also this thread: https://discourse.julialang.org/t/test-diffeqgpu-errors-with-unsupported-call-to-the-julia-runtime/28893/17

with EnsembleCPUArray it works fine, but it fails to compile on GPU with the stack trace:

InvalidIRError: compiling gpu_kernel(Cassette.Context{nametype(Ctx),Nothing,Nothing,getfield(GPUifyLoops, Symbol("##PassType#371")),Nothing,Cassette.DisableHooks}, typeof(DiffEqGPU.gpu_kernel), ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing}, CUDAnative.CuDeviceArray{Float32,2,CUDAnative.AS.Global}, CUDAnative.CuDeviceArray{Float32,2,CUDAnative.AS.Global}, CUDAnative.CuDeviceArray{DiffEqBase.NullParameters,2,CUDAnative.AS.Global}, Float32) resulted in invalid LLVM IR
Reason: unsupported dynamic function invocation (call to overdub)
Stacktrace:
 [1] fip at In[1]:10
 [2] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/8uyX3/src/diffeqfunction.jl:230
 [3] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [4] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0

Stacktrace:
 [1] check_ir(::CUDAnative.CompilerJob, ::LLVM.Module) at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/CUDAnative/UWBIY/src/compiler/validation.jl:114
 [2] macro expansion at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/CUDAnative/UWBIY/src/compiler/driver.jl:188 [inlined]
 [3] macro expansion at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/TimerOutputs/7zSea/src/TimerOutput.jl:216 [inlined]
 [4] #codegen#130(::Bool, ::Bool, ::Bool, ::Bool, ::Bool, ::typeof(CUDAnative.codegen), ::Symbol, ::CUDAnative.CompilerJob) at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/CUDAnative/UWBIY/src/compiler/driver.jl:186
 [5] #codegen at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/CUDAnative/UWBIY/src/compiler/driver.jl:0 [inlined]
 [6] #compile#129(::Bool, ::Bool, ::Bool, ::Bool, ::Bool, ::typeof(CUDAnative.compile), ::Symbol, ::CUDAnative.CompilerJob) at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/CUDAnative/UWBIY/src/compiler/driver.jl:47
 [7] #compile at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/CUDAnative/UWBIY/src/compiler/common.jl:0 [inlined]
 [8] #compile#128 at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/CUDAnative/UWBIY/src/compiler/driver.jl:28 [inlined]
 [9] #compile at ./none:0 [inlined] (repeats 2 times)
 [10] macro expansion at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/CUDAnative/UWBIY/src/execution.jl:389 [inlined]
 [11] #cufunction#170(::String, ::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::typeof(CUDAnative.cufunction), ::typeof(Cassette.overdub), ::Type{Tuple{Cassette.Context{nametype(Ctx),Nothing,Nothing,getfield(GPUifyLoops, Symbol("##PassType#371")),Nothing,Cassette.DisableHooks},typeof(DiffEqGPU.gpu_kernel),ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},CUDAnative.CuDeviceArray{Float32,2,CUDAnative.AS.Global},CUDAnative.CuDeviceArray{Float32,2,CUDAnative.AS.Global},CUDAnative.CuDeviceArray{DiffEqBase.NullParameters,2,CUDAnative.AS.Global},Float32}}) at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/CUDAnative/UWBIY/src/execution.jl:357
 [12] (::getfield(CUDAnative, Symbol("#kw##cufunction")))(::NamedTuple{(:name,),Tuple{String}}, ::typeof(CUDAnative.cufunction), ::Function, ::Type) at ./none:0
 [13] #launch#50(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::typeof(GPUifyLoops.launch), ::GPUifyLoops.CUDA, ::typeof(DiffEqGPU.gpu_kernel), ::Function, ::Vararg{Any,N} where N) at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/GPUifyLoops/mjszO/src/GPUifyLoops.jl:125
 [14] launch at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/GPUifyLoops/mjszO/src/GPUifyLoops.jl:119 [inlined]
 [15] macro expansion at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/GPUifyLoops/mjszO/src/GPUifyLoops.jl:54 [inlined]
 [16] #12 at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:61 [inlined]
 [17] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/8uyX3/src/diffeqfunction.jl:230 [inlined]
 [18] initialize!(::OrdinaryDiffEq.ODEIntegrator{Tsit5,true,CuArrays.CuArray{Float32,2},Float32,CuArrays.CuArray{DiffEqBase.NullParameters,2},Float32,Float32,Float32,Array{CuArrays.CuArray{Float32,2},1},ODESolution{Float32,3,Array{CuArrays.CuArray{Float32,2},1},Nothing,Nothing,Array{Float32,1},Array{Array{CuArrays.CuArray{Float32,2},1},1},ODEProblem{CuArrays.CuArray{Float32,2},Tuple{Float32,Float32},true,CuArrays.CuArray{DiffEqBase.NullParameters,2},ODEFunction{true,getfield(DiffEqGPU, Symbol("##12#22")){ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing}},UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},DiffEqBase.StandardODEProblem},Tsit5,OrdinaryDiffEq.InterpolationData{ODEFunction{true,getfield(DiffEqGPU, Symbol("##12#22")){ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing}},UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},Array{CuArrays.CuArray{Float32,2},1},Array{Float32,1},Array{Array{CuArrays.CuArray{Float32,2},1},1},OrdinaryDiffEq.Tsit5Cache{CuArrays.CuArray{Float32,2},CuArrays.CuArray{Float32,2},CuArrays.CuArray{Float32,2},OrdinaryDiffEq.Tsit5ConstantCache{Float32,Float32}}},DiffEqBase.DEStats},ODEFunction{true,getfield(DiffEqGPU, Symbol("##12#22")){ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing}},UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},OrdinaryDiffEq.Tsit5Cache{CuArrays.CuArray{Float32,2},CuArrays.CuArray{Float32,2},CuArrays.CuArray{Float32,2},OrdinaryDiffEq.Tsit5ConstantCache{Float32,Float32}},OrdinaryDiffEq.DEOptions{Float32,Float32,Float32,Float32,typeof(DiffEqBase.ODE_DEFAULT_NORM),typeof(opnorm),CallbackSet{Tuple{},Tuple{}},typeof(DiffEqBase.ODE_DEFAULT_ISOUTOFDOMAIN),typeof(DiffEqBase.ODE_DEFAULT_PROG_MESSAGE),typeof(DiffEqBase.ODE_DEFAULT_UNSTABLE_CHECK),DataStructures.BinaryHeap{Float32,DataStructures.LessThan},DataStructures.BinaryHeap{Float32,DataStructures.LessThan},Nothing,Nothing,Int64,Array{Float32,1},Array{Float32,1},Array{Float32,1}},CuArrays.CuArray{Float32,2},Float32,Nothing}, ::OrdinaryDiffEq.Tsit5Cache{CuArrays.CuArray{Float32,2},CuArrays.CuArray{Float32,2},CuArrays.CuArray{Float32,2},OrdinaryDiffEq.Tsit5ConstantCache{Float32,Float32}}) at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/OrdinaryDiffEq/tQd6p/src/perform_step/low_order_rk_perform_step.jl:623
 [19] #__init#335(::Array{Float32,1}, ::Array{Float32,1}, ::Array{Float32,1}, ::Nothing, ::Bool, ::Bool, ::Bool, ::Bool, ::Nothing, ::Bool, ::Bool, ::Float32, ::Float32, ::Float32, ::Bool, ::Bool, ::Rational{Int64}, ::Nothing, ::Nothing, ::Rational{Int64}, ::Int64, ::Int64, ::Int64, ::Rational{Int64}, ::Bool, ::Int64, ::Nothing, ::Nothing, ::Int64, ::typeof(DiffEqBase.ODE_DEFAULT_NORM), ::typeof(opnorm), ::typeof(DiffEqBase.ODE_DEFAULT_ISOUTOFDOMAIN), ::typeof(DiffEqBase.ODE_DEFAULT_UNSTABLE_CHECK), ::Bool, ::Bool, ::Bool, ::Bool, ::Bool, ::Bool, ::Bool, ::Int64, ::String, ::typeof(DiffEqBase.ODE_DEFAULT_PROG_MESSAGE), ::Nothing, ::Bool, ::Bool, ::Bool, ::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::typeof(DiffEqBase.__init), ::ODEProblem{CuArrays.CuArray{Float32,2},Tuple{Float32,Float32},true,CuArrays.CuArray{DiffEqBase.NullParameters,2},ODEFunction{true,getfield(DiffEqGPU, Symbol("##12#22")){ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing}},UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},DiffEqBase.StandardODEProblem}, ::Tsit5, ::Array{CuArrays.CuArray{Float32,2},1}, ::Array{Float32,1}, ::Array{Any,1}, ::Type{Val{true}}) at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/OrdinaryDiffEq/tQd6p/src/solve.jl:352
 [20] __init(::ODEProblem{CuArrays.CuArray{Float32,2},Tuple{Float32,Float32},true,CuArrays.CuArray{DiffEqBase.NullParameters,2},ODEFunction{true,getfield(DiffEqGPU, Symbol("##12#22")){ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing}},UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},DiffEqBase.StandardODEProblem}, ::Tsit5, ::Array{CuArrays.CuArray{Float32,2},1}, ::Array{Float32,1}, ::Array{Any,1}, ::Type{Val{true}}) at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/OrdinaryDiffEq/tQd6p/src/solve.jl:66 (repeats 4 times)
 [21] #__solve#334 at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/OrdinaryDiffEq/tQd6p/src/solve.jl:4 [inlined]
 [22] __solve at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/OrdinaryDiffEq/tQd6p/src/solve.jl:4 [inlined]
 [23] #solve_call#425(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::typeof(DiffEqBase.solve_call), ::ODEProblem{CuArrays.CuArray{Float32,2},Tuple{Float32,Float32},true,CuArrays.CuArray{DiffEqBase.NullParameters,2},ODEFunction{true,getfield(DiffEqGPU, Symbol("##12#22")){ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing}},UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},DiffEqBase.StandardODEProblem}, ::Tsit5) at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/8uyX3/src/solve.jl:40
 [24] solve_call at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/8uyX3/src/solve.jl:37 [inlined]
 [25] #solve#426 at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/8uyX3/src/solve.jl:57 [inlined]
 [26] solve(::ODEProblem{CuArrays.CuArray{Float32,2},Tuple{Float32,Float32},true,CuArrays.CuArray{DiffEqBase.NullParameters,2},ODEFunction{true,getfield(DiffEqGPU, Symbol("##12#22")){ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing}},UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},DiffEqBase.StandardODEProblem}, ::Tsit5) at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/8uyX3/src/solve.jl:45
 [27] #batch_solve#5(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::typeof(DiffEqGPU.batch_solve), ::EnsembleProblem{ODEProblem{Array{Float32,2},Tuple{Float32,Float32},true,DiffEqBase.NullParameters,ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},DiffEqBase.StandardODEProblem},typeof(prob_func),getfield(DiffEqBase, Symbol("##332#338")),getfield(DiffEqBase, Symbol("##334#340")),Array{Any,1}}, ::Tsit5, ::EnsembleGPUArray, ::UnitRange{Int64}) at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:66
 [28] batch_solve at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:45 [inlined]
 [29] (::getfield(DiffEqGPU, Symbol("##3#4")){Int64,Int64,Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},EnsembleProblem{ODEProblem{Array{Float32,2},Tuple{Float32,Float32},true,DiffEqBase.NullParameters,ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},DiffEqBase.StandardODEProblem},typeof(prob_func),getfield(DiffEqBase, Symbol("##332#338")),getfield(DiffEqBase, Symbol("##334#340")),Array{Any,1}},Tsit5,EnsembleGPUArray})(::Int64) at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:37
 [30] iterate at ./generator.jl:47 [inlined]
 [31] _collect(::UnitRange{Int64}, ::Base.Generator{UnitRange{Int64},getfield(DiffEqGPU, Symbol("##3#4")){Int64,Int64,Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},EnsembleProblem{ODEProblem{Array{Float32,2},Tuple{Float32,Float32},true,DiffEqBase.NullParameters,ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},DiffEqBase.StandardODEProblem},typeof(prob_func),getfield(DiffEqBase, Symbol("##332#338")),getfield(DiffEqBase, Symbol("##334#340")),Array{Any,1}},Tsit5,EnsembleGPUArray}}, ::Base.EltypeUnknown, ::Base.HasShape{1}) at ./array.jl:619
 [32] collect_similar(::UnitRange{Int64}, ::Base.Generator{UnitRange{Int64},getfield(DiffEqGPU, Symbol("##3#4")){Int64,Int64,Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},EnsembleProblem{ODEProblem{Array{Float32,2},Tuple{Float32,Float32},true,DiffEqBase.NullParameters,ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},DiffEqBase.StandardODEProblem},typeof(prob_func),getfield(DiffEqBase, Symbol("##332#338")),getfield(DiffEqBase, Symbol("##334#340")),Array{Any,1}},Tsit5,EnsembleGPUArray}}) at ./array.jl:548
 [33] map(::Function, ::UnitRange{Int64}) at ./abstractarray.jl:2073
 [34] macro expansion at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:31 [inlined]
 [35] macro expansion at ./util.jl:213 [inlined]
 [36] #__solve#2(::Int64, ::Int64, ::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::typeof(DiffEqBase.__solve), ::EnsembleProblem{ODEProblem{Array{Float32,2},Tuple{Float32,Float32},true,DiffEqBase.NullParameters,ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},DiffEqBase.StandardODEProblem},typeof(prob_func),getfield(DiffEqBase, Symbol("##332#338")),getfield(DiffEqBase, Symbol("##334#340")),Array{Any,1}}, ::Tsit5, ::EnsembleGPUArray) at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:30
 [37] #__solve at ./none:0 [inlined]
 [38] #solve#427 at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/8uyX3/src/solve.jl:64 [inlined]
 [39] (::getfield(DiffEqBase, Symbol("#kw##solve")))(::NamedTuple{(:trajectories,),Tuple{Int64}}, ::typeof(solve), ::EnsembleProblem{ODEProblem{Array{Float32,2},Tuple{Float32,Float32},true,DiffEqBase.NullParameters,ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},DiffEqBase.StandardODEProblem},typeof(prob_func),getfield(DiffEqBase, Symbol("##332#338")),getfield(DiffEqBase, Symbol("##334#340")),Array{Any,1}}, ::Tsit5, ::EnsembleGPUArray) at ./none:0
 [40] top-level scope at In[3]:15

nilsbecker avatar Sep 20 '19 14:09 nilsbecker

@vchuravy this one seems to "follow all of the rules", but maybe it's an issue with using the generic matmul fallback in GPUifyLoops?

ChrisRackauckas avatar Sep 20 '19 14:09 ChrisRackauckas

this seems related: when using DiffEqGPU i sometimes get this warning:

WARNING: Method definition overdub(Cassette.Context{N, M, T, P, B, H} where H<:Union{Cassette.DisableHooks, Nothing} where B<:Union{Nothing, Base.IdDict{Module, Base.Dict{Symbol, Cassette.BindingMeta}}} where P<:Cassette.AbstractPass where T<:Union{Nothing, Cassette.Tag{N, X, E} where E where X where N<:Cassette.AbstractContextName} where M where N<:Cassette.AbstractContextName, Any...) in module Cassette at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:524 overwritten in module GPUifyLoops at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:524.
  ** incremental compilation may be fatally broken for this module **

WARNING: Method definition recurse(Cassette.Context{N, M, T, P, B, H} where H<:Union{Cassette.DisableHooks, Nothing} where B<:Union{Nothing, Base.IdDict{Module, Base.Dict{Symbol, Cassette.BindingMeta}}} where P<:Cassette.AbstractPass where T<:Union{Nothing, Cassette.Tag{N, X, E} where E where X where N<:Cassette.AbstractContextName} where M where N<:Cassette.AbstractContextName, Any...) in module Cassette at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:537 overwritten in module GPUifyLoops at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:537.
  ** incremental compilation may be fatally broken for this module **

nilsbecker avatar Sep 20 '19 15:09 nilsbecker

this seems related: when using DiffEqGPU i sometimes get this warning:

This is a known (unrelated) issue. Annoying, but harmless.

A  = Float32[1. 0  0 -5; 4 -2  4 -3; -4  0  0  1; 5 -2  2  3]

function fip(du,u,p,t) 
  mul!(du, A, u)
end

fib captures A, which is a reference to a host array.

vchuravy avatar Sep 20 '19 15:09 vchuravy

ah yes. what's a general way to avoid that? do i need to put A into the parameter argument p? if yes how? (does it have to be a vector or can i keep p a matrix?)

nilsbecker avatar Sep 20 '19 15:09 nilsbecker

PS. does this mean the 'rules' include: 'no references to the cpu's memory' ?

nilsbecker avatar Sep 20 '19 15:09 nilsbecker

PS. does this mean the 'rules' include: 'no references to the cpu's memory' ?

Yes that should be the first rule. ;)

If your array is only ever this big and you don't need to change it you can make it a constant global StaticArray.

vchuravy avatar Sep 20 '19 15:09 vchuravy

Ahh, I should've caught that. Yes, for this setup the arrays shouldn't ever get big because otherwise this form of GPU parallelism would run out of memory before parallelizing to large numbers of trajectories (which would be really bad for performance). So we should say it just needs to be a static array and call it a day.

ChrisRackauckas avatar Sep 20 '19 16:09 ChrisRackauckas

ok, i am trying now with using StaticArrays then

B  = Float32[
    1. 0  0 -5; 4 -2  4 -3; -4  0  0  1; 5 -2  2  3]
const C = SMatrix{4,4}(B)
u0 = Array{Float32}(rand(4))
tspan = (0.0f0,1.0f0)
function fip(du,u,p,t) 
   mul!(du, C, u)
end

this fails again with a somewhat different error. am i doing it wrong?

there is no step which explicitly puts C onto the GPU. is this supposed to happen automatically? does u0 need to get the same treatment? (PS. afk until monday most probably)

nilsbecker avatar Sep 20 '19 16:09 nilsbecker

trace (long)

┌ Warning: Decoding arguments to jl_apply_generic failed, please file a bug with a reproducer.
│   inst =   %35 = call nonnull %jl_value_t addrspace(10)* @jl_apply_generic(%jl_value_t addrspace(10)** nonnull %.sub, i32 4), !dbg !173
│   bb = 
L48:                                              ; preds = %top
  %.sroa.2229.4..sroa_idx = getelementptr inbounds { [2 x i64], i64 }, { [2 x i64], i64 } addrspace(11)* %0, i64 0, i32 0, i64 0
  %.sroa.2229.4.copyload = load i64, i64 addrspace(11)* %.sroa.2229.4..sroa_idx, align 8
  %.sroa.5231.4..sroa_idx232 = getelementptr inbounds { [2 x i64], i64 }, { [2 x i64], i64 } addrspace(11)* %0, i64 0, i32 1
  %.sroa.5231.4.copyload = load i64, i64 addrspace(11)* %.sroa.5231.4..sroa_idx232, align 8
  %.sroa.6233.28..sroa_idx = getelementptr inbounds { [2 x i64], i64 }, { [2 x i64], i64 } addrspace(11)* %1, i64 0, i32 0, i64 0
  %.sroa.6233.28.copyload = load i64, i64 addrspace(11)* %.sroa.6233.28..sroa_idx, align 8
  %.sroa.9.28..sroa_idx235 = getelementptr inbounds { [2 x i64], i64 }, { [2 x i64], i64 } addrspace(11)* %1, i64 0, i32 1
  %.sroa.9.28.copyload = load i64, i64 addrspace(11)* %.sroa.9.28..sroa_idx235, align 8
  %.sroa.4.4..sroa_idx230 = getelementptr inbounds { [2 x i64], i64 }, { [2 x i64], i64 } addrspace(11)* %0, i64 0, i32 0, i64 1
  %.sroa.4.4.copyload = load i64, i64 addrspace(11)* %.sroa.4.4..sroa_idx230, align 8
  %16 = icmp sgt i64 %.sroa.2229.4.copyload, 0, !dbg !130
  %17 = select i1 %16, i64 %.sroa.2229.4.copyload, i64 0, !dbg !130
  %18 = add nsw i64 %12, -1, !dbg !153
  %19 = mul i64 %18, %17, !dbg !167
  %20 = icmp sgt i64 %.sroa.6233.28.copyload, 0, !dbg !130
  %21 = select i1 %20, i64 %.sroa.6233.28.copyload, i64 0, !dbg !130
  %22 = mul i64 %18, %21, !dbg !167
  %23 = call fastcc %jl_value_t addrspace(10)* @ptx_gc_pool_alloc(i64 56), !dbg !169
  %24 = bitcast %jl_value_t addrspace(10)* %23 to i8 addrspace(10)*, !dbg !169
  %.sroa.0213.0..sroa_cast = bitcast %jl_value_t addrspace(10)* %23 to i64 addrspace(10)*, !dbg !169
  store i64 %.sroa.2229.4.copyload, i64 addrspace(10)* %.sroa.0213.0..sroa_cast, align 8, !dbg !169
  %.sroa.2214.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %24, i64 8, !dbg !169
  %.sroa.2214.0..sroa_cast = bitcast i8 addrspace(10)* %.sroa.2214.0..sroa_idx to i64 addrspace(10)*, !dbg !169
  store i64 %.sroa.4.4.copyload, i64 addrspace(10)* %.sroa.2214.0..sroa_cast, align 8, !dbg !169
  %.sroa.3215.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %24, i64 16, !dbg !169
  %.sroa.3215.0..sroa_cast = bitcast i8 addrspace(10)* %.sroa.3215.0..sroa_idx to i64 addrspace(10)*, !dbg !169
  store i64 %.sroa.5231.4.copyload, i64 addrspace(10)* %.sroa.3215.0..sroa_cast, align 8, !dbg !169
  %.sroa.4.sroa.0223.0..sroa.4.0..sroa_raw_idx.sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %24, i64 24, !dbg !169
  %.sroa.4.sroa.0223.0..sroa.4.0..sroa_raw_idx.sroa_cast = bitcast i8 addrspace(10)* %.sroa.4.sroa.0223.0..sroa.4.0..sroa_raw_idx.sroa_idx to i64 addrspace(10)*, !dbg !169
  store i64 %17, i64 addrspace(10)* %.sroa.4.sroa.0223.0..sroa.4.0..sroa_raw_idx.sroa_cast, align 4, !dbg !169
  %.sroa.4.sroa.2224.0..sroa.4.0..sroa_raw_idx.sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %24, i64 32, !dbg !169
  %.sroa.4.sroa.2224.0..sroa.4.0..sroa_raw_idx.sroa_cast = bitcast i8 addrspace(10)* %.sroa.4.sroa.2224.0..sroa.4.0..sroa_raw_idx.sroa_idx to i64 addrspace(10)*, !dbg !169
  store i64 %12, i64 addrspace(10)* %.sroa.4.sroa.2224.0..sroa.4.0..sroa_raw_idx.sroa_cast, align 4, !dbg !169
  %.sroa.5217.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %24, i64 40, !dbg !169
  %.sroa.5217.0..sroa_cast = bitcast i8 addrspace(10)* %.sroa.5217.0..sroa_idx to i64 addrspace(10)*, !dbg !169
  store i64 %19, i64 addrspace(10)* %.sroa.5217.0..sroa_cast, align 8, !dbg !169
  %.sroa.6218.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %24, i64 48, !dbg !169
  %.sroa.6218.0..sroa_cast = bitcast i8 addrspace(10)* %.sroa.6218.0..sroa_idx to i64 addrspace(10)*, !dbg !169
  store i64 1, i64 addrspace(10)* %.sroa.6218.0..sroa_cast, align 8, !dbg !169
  %25 = call fastcc %jl_value_t addrspace(10)* @ptx_gc_pool_alloc(i64 56), !dbg !169
  %26 = bitcast %jl_value_t addrspace(10)* %25 to i8 addrspace(10)*, !dbg !169
  %.sroa.0.0..sroa_cast = bitcast %jl_value_t addrspace(10)* %25 to i64 addrspace(10)*, !dbg !169
  store i64 %.sroa.6233.28.copyload, i64 addrspace(10)* %.sroa.0.0..sroa_cast, align 8, !dbg !169
  %.sroa.2.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %26, i64 8, !dbg !169
  %.sroa.2.0..sroa_cast = bitcast i8 addrspace(10)* %.sroa.2.0..sroa_idx to i64 addrspace(10)*, !dbg !169
  store i64 %.sroa.8.28.copyload, i64 addrspace(10)* %.sroa.2.0..sroa_cast, align 8, !dbg !169
  %.sroa.3.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %26, i64 16, !dbg !169
  %.sroa.3.0..sroa_cast = bitcast i8 addrspace(10)* %.sroa.3.0..sroa_idx to i64 addrspace(10)*, !dbg !169
  store i64 %.sroa.9.28.copyload, i64 addrspace(10)* %.sroa.3.0..sroa_cast, align 8, !dbg !169
  %.sroa.4.sroa.0.0..sroa.4.0..sroa_raw_idx.sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %26, i64 24, !dbg !169
  %.sroa.4.sroa.0.0..sroa.4.0..sroa_raw_idx.sroa_cast = bitcast i8 addrspace(10)* %.sroa.4.sroa.0.0..sroa.4.0..sroa_raw_idx.sroa_idx to i64 addrspace(10)*, !dbg !169
  store i64 %21, i64 addrspace(10)* %.sroa.4.sroa.0.0..sroa.4.0..sroa_raw_idx.sroa_cast, align 4, !dbg !169
  %.sroa.4.sroa.2.0..sroa.4.0..sroa_raw_idx.sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %26, i64 32, !dbg !169
  %.sroa.4.sroa.2.0..sroa.4.0..sroa_raw_idx.sroa_cast = bitcast i8 addrspace(10)* %.sroa.4.sroa.2.0..sroa.4.0..sroa_raw_idx.sroa_idx to i64 addrspace(10)*, !dbg !169
  store i64 %12, i64 addrspace(10)* %.sroa.4.sroa.2.0..sroa.4.0..sroa_raw_idx.sroa_cast, align 4, !dbg !169
  %.sroa.5.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %26, i64 40, !dbg !169
  %.sroa.5.0..sroa_cast = bitcast i8 addrspace(10)* %.sroa.5.0..sroa_idx to i64 addrspace(10)*, !dbg !169
  store i64 %22, i64 addrspace(10)* %.sroa.5.0..sroa_cast, align 8, !dbg !169
  %.sroa.6.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %26, i64 48, !dbg !169
  %.sroa.6.0..sroa_cast = bitcast i8 addrspace(10)* %.sroa.6.0..sroa_idx to i64 addrspace(10)*, !dbg !169
  store i64 1, i64 addrspace(10)* %.sroa.6.0..sroa_cast, align 8, !dbg !169
  store %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 140239635568512 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** %.sub, align 8, !dbg !169
  %27 = getelementptr inbounds [8 x %jl_value_t addrspace(10)*], [8 x %jl_value_t addrspace(10)*]* %2, i64 0, i64 1, !dbg !169
  store %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 140239635570032 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** %27, align 8, !dbg !169
  %28 = getelementptr inbounds [8 x %jl_value_t addrspace(10)*], [8 x %jl_value_t addrspace(10)*]* %2, i64 0, i64 2, !dbg !169
  store %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 140241914908144 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** %28, align 8, !dbg !169
  %29 = getelementptr inbounds [8 x %jl_value_t addrspace(10)*], [8 x %jl_value_t addrspace(10)*]* %2, i64 0, i64 3, !dbg !169
  store %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 140241914880352 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** %29, align 8, !dbg !169
  %30 = getelementptr inbounds [8 x %jl_value_t addrspace(10)*], [8 x %jl_value_t addrspace(10)*]* %2, i64 0, i64 4, !dbg !169
  store %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 140241801391680 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** %30, align 8, !dbg !169
  %31 = getelementptr inbounds [8 x %jl_value_t addrspace(10)*], [8 x %jl_value_t addrspace(10)*]* %2, i64 0, i64 5, !dbg !169
  store %jl_value_t addrspace(10)* %23, %jl_value_t addrspace(10)** %31, align 8, !dbg !169
  %32 = getelementptr inbounds [8 x %jl_value_t addrspace(10)*], [8 x %jl_value_t addrspace(10)*]* %2, i64 0, i64 6, !dbg !169
  store %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 140239663763376 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** %32, align 8, !dbg !169
  %33 = getelementptr inbounds [8 x %jl_value_t addrspace(10)*], [8 x %jl_value_t addrspace(10)*]* %2, i64 0, i64 7, !dbg !169
  store %jl_value_t addrspace(10)* %25, %jl_value_t addrspace(10)** %33, align 8, !dbg !169
  %34 = call nonnull %jl_value_t addrspace(10)* @jl_invoke(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 140239682109776 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** nonnull %.sub, i32 8), !dbg !169
  store %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 140239635568512 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** %.sub, align 8, !dbg !173
  store %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 140239635570032 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** %27, align 8, !dbg !173
  store %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 140241922398864 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** %28, align 8, !dbg !173
  store %jl_value_t addrspace(10)* %34, %jl_value_t addrspace(10)** %29, align 8, !dbg !173
  %35 = call nonnull %jl_value_t addrspace(10)* @jl_apply_generic(%jl_value_t addrspace(10)** nonnull %.sub, i32 4), !dbg !173
  %36 = bitcast %jl_value_t addrspace(10)* %35 to i64 addrspace(10)*, !dbg !173
  %37 = getelementptr i64, i64 addrspace(10)* %36, i64 -1, !dbg !173
  %38 = load i64, i64 addrspace(10)* %37, align 4, !dbg !173, !tbaa !186, !range !190
  %39 = and i64 %38, -16, !dbg !173
  %40 = inttoptr i64 %39 to %jl_value_t*, !dbg !173
  %41 = addrspacecast %jl_value_t* %40 to %jl_value_t addrspace(10)*, !dbg !173
  %42 = icmp eq %jl_value_t addrspace(10)* %41, addrspacecast (%jl_value_t* inttoptr (i64 140241804675952 to %jl_value_t*) to %jl_value_t addrspace(10)*), !dbg !173
  br i1 %42, label %pass, label %fail, !dbg !173

└ @ CUDAnative /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/CUDAnative/UWBIY/src/compiler/validation.jl:222

InvalidIRError: compiling gpu_kernel(Cassette.Context{nametype(Ctx),Nothing,Nothing,getfield(GPUifyLoops, Symbol("##PassType#371")),Nothing,Cassette.DisableHooks}, typeof(DiffEqGPU.gpu_kernel), ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing}, CUDAnative.CuDeviceArray{Float32,2,CUDAnative.AS.Global}, CUDAnative.CuDeviceArray{Float32,2,CUDAnative.AS.Global}, CUDAnative.CuDeviceArray{DiffEqBase.NullParameters,2,CUDAnative.AS.Global}, Float32) resulted in invalid LLVM IR
Reason: unsupported dynamic function invocation (call to overdub(overdub_context::Cassette.Context, overdub_arguments...) in GPUifyLoops at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:524)
Stacktrace:
 [1] _tuple_any at tuple.jl:399
 [2] has_offset_axes at abstractarray.jl:86
 [3] require_one_based_indexing at abstractarray.jl:89
 [4] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:501
 [5] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [6] fip at In[1]:8
 [7] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [8] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [9] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call to the Julia runtime (call to jl_apply_generic)
Stacktrace:
 [1] require_one_based_indexing at abstractarray.jl:89
 [2] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:501
 [3] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [4] fip at In[1]:8
 [5] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [6] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [7] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call to the Julia runtime (call to jl_f_getfield)
Stacktrace:
 [1] print_to_string at strings/io.jl:123
 [2] string at strings/io.jl:168
 [3] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:505
 [4] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [5] fip at In[1]:8
 [6] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [7] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [8] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call to the Julia runtime (call to jl_get_nth_field_checked)
Stacktrace:
 [1] getindex at tuple.jl:24
 [2] iterate at tuple.jl:43
 [3] print_to_string at strings/io.jl:124
 [4] string at strings/io.jl:168
 [5] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:505
 [6] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [7] fip at In[1]:8
 [8] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [9] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [10] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call to the Julia runtime (call to jl_f_tuple)
Stacktrace:
 [1] iterate at tuple.jl:43
 [2] print_to_string at strings/io.jl:124
 [3] string at strings/io.jl:168
 [4] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:505
 [5] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [6] fip at In[1]:8
 [7] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [8] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [9] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to jl_alloc_string)
Stacktrace:
 [1] _string_n at strings/string.jl:60
 [2] StringVector at iobuffer.jl:31
 [3] #IOBuffer#318 at iobuffer.jl:114
 [4] Type at none:0
 [5] print_to_string at strings/io.jl:127
 [6] string at strings/io.jl:168
 [7] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:505
 [8] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [9] fip at In[1]:8
 [10] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [11] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [12] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to jl_string_to_array)
Stacktrace:
 [1] unsafe_wrap at strings/string.jl:71
 [2] StringVector at iobuffer.jl:31
 [3] #IOBuffer#318 at iobuffer.jl:114
 [4] Type at none:0
 [5] print_to_string at strings/io.jl:127
 [6] string at strings/io.jl:168
 [7] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:505
 [8] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [9] fip at In[1]:8
 [10] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [11] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [12] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to __memset_sse2)
Stacktrace:
 [1] fill! at array.jl:366
 [2] #IOBuffer#318 at iobuffer.jl:121
 [3] Type at none:0
 [4] print_to_string at strings/io.jl:127
 [5] string at strings/io.jl:168
 [6] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:505
 [7] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [8] fip at In[1]:8
 [9] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [10] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [11] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call to the Julia runtime (call to jl_f_getfield)
Stacktrace:
 [1] print_to_string at strings/io.jl:128
 [2] string at strings/io.jl:168
 [3] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:505
 [4] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [5] fip at In[1]:8
 [6] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [7] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [8] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call to the Julia runtime (call to jl_excstack_state)
Stacktrace:
 [1] print at strings/io.jl:36
 [2] print_to_string at strings/io.jl:129
 [3] string at strings/io.jl:168
 [4] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:505
 [5] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [6] fip at In[1]:8
 [7] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [8] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [9] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call to an unknown function (call to julia.except_enter)
Stacktrace:
 [1] print at strings/io.jl:36
 [2] print_to_string at strings/io.jl:129
 [3] string at strings/io.jl:168
 [4] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:505
 [5] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [6] fip at In[1]:8
 [7] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [8] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [9] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to jl_alloc_string)
Stacktrace:
 [1] _string_n at strings/string.jl:60
 [2] StringVector at iobuffer.jl:31
 [3] dec at intfuncs.jl:574
 [4] #string#323 at intfuncs.jl:646
 [5] string at intfuncs.jl:638
 [6] show at show.jl:589
 [7] print at strings/io.jl:37
 [8] print_to_string at strings/io.jl:129
 [9] string at strings/io.jl:168
 [10] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:505
 [11] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [12] fip at In[1]:8
 [13] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [14] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [15] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to jl_string_to_array)
Stacktrace:
 [1] unsafe_wrap at strings/string.jl:71
 [2] StringVector at iobuffer.jl:31
 [3] dec at intfuncs.jl:574
 [4] #string#323 at intfuncs.jl:646
 [5] string at intfuncs.jl:638
 [6] show at show.jl:589
 [7] print at strings/io.jl:37
 [8] print_to_string at strings/io.jl:129
 [9] string at strings/io.jl:168
 [10] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:505
 [11] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [12] fip at In[1]:8
 [13] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [14] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [15] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to jl_array_to_string)
Stacktrace:
 [1] Type at strings/string.jl:39
 [2] dec at intfuncs.jl:581
 [3] #string#323 at intfuncs.jl:646
 [4] string at intfuncs.jl:638
 [5] show at show.jl:589
 [6] print at strings/io.jl:37
 [7] print_to_string at strings/io.jl:129
 [8] string at strings/io.jl:168
 [9] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:505
 [10] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [11] fip at In[1]:8
 [12] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [13] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [14] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to jl_array_grow_end)
Stacktrace:
 [1] _growend! at array.jl:811
 [2] ensureroom at iobuffer.jl:325
 [3] unsafe_write at iobuffer.jl:414
 [4] macro expansion at gcutils.jl:87
 [5] write at strings/io.jl:177
 [6] show at show.jl:589
 [7] print at strings/io.jl:37
 [8] print_to_string at strings/io.jl:129
 [9] string at strings/io.jl:168
 [10] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:505
 [11] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [12] fip at In[1]:8
 [13] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [14] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [15] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call to the Julia runtime (call to jl_pop_handler)
Stacktrace:
 [1] print at strings/io.jl:37
 [2] print_to_string at strings/io.jl:129
 [3] string at strings/io.jl:168
 [4] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:505
 [5] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [6] fip at In[1]:8
 [7] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [8] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [9] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to jl_rethrow)
Stacktrace:
 [1] rethrow at error.jl:51
 [2] print at strings/io.jl:39
 [3] print_to_string at strings/io.jl:129
 [4] string at strings/io.jl:168
 [5] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:505
 [6] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [7] fip at In[1]:8
 [8] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [9] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [10] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to jl_array_grow_end)
Stacktrace:
 [1] _growend! at array.jl:811
 [2] ensureroom at iobuffer.jl:325
 [3] unsafe_write at iobuffer.jl:414
 [4] macro expansion at gcutils.jl:87
 [5] write at strings/io.jl:177
 [6] print at strings/io.jl:179
 [7] print_to_string at strings/io.jl:129
 [8] string at strings/io.jl:168
 [9] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:505
 [10] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [11] fip at In[1]:8
 [12] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [13] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [14] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call to the Julia runtime (call to jl_get_nth_field_checked)
Stacktrace:
 [1] getindex at tuple.jl:24
 [2] iterate at tuple.jl:43
 [3] print_to_string at strings/io.jl:129
 [4] string at strings/io.jl:168
 [5] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:505
 [6] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [7] fip at In[1]:8
 [8] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [9] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [10] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call to the Julia runtime (call to jl_f_tuple)
Stacktrace:
 [1] iterate at tuple.jl:43
 [2] print_to_string at strings/io.jl:129
 [3] string at strings/io.jl:168
 [4] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:505
 [5] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [6] fip at In[1]:8
 [7] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [8] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [9] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to jl_array_grow_end)
Stacktrace:
 [1] _growend! at array.jl:811
 [2] resize! at array.jl:1003
 [3] print_to_string at strings/io.jl:131
 [4] string at strings/io.jl:168
 [5] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:505
 [6] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [7] fip at In[1]:8
 [8] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [9] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [10] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to jl_array_del_end)
Stacktrace:
 [1] _deleteend! at array.jl:820
 [2] resize! at array.jl:1008
 [3] print_to_string at strings/io.jl:131
 [4] string at strings/io.jl:168
 [5] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:505
 [6] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [7] fip at In[1]:8
 [8] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [9] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [10] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to jl_array_to_string)
Stacktrace:
 [1] Type at strings/string.jl:39
 [2] print_to_string at strings/io.jl:131
 [3] string at strings/io.jl:168
 [4] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:505
 [5] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [6] fip at In[1]:8
 [7] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [8] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [9] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call to the Julia runtime (call to jl_f_getfield)
Stacktrace:
 [1] print_to_string at strings/io.jl:123
 [2] string at strings/io.jl:168
 [3] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:508
 [4] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [5] fip at In[1]:8
 [6] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [7] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [8] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call to the Julia runtime (call to jl_get_nth_field_checked)
Stacktrace:
 [1] getindex at tuple.jl:24
 [2] iterate at tuple.jl:43
 [3] print_to_string at strings/io.jl:124
 [4] string at strings/io.jl:168
 [5] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:508
 [6] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [7] fip at In[1]:8
 [8] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [9] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [10] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call to the Julia runtime (call to jl_f_tuple)
Stacktrace:
 [1] iterate at tuple.jl:43
 [2] print_to_string at strings/io.jl:124
 [3] string at strings/io.jl:168
 [4] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:508
 [5] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [6] fip at In[1]:8
 [7] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [8] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [9] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to jl_alloc_string)
Stacktrace:
 [1] _string_n at strings/string.jl:60
 [2] StringVector at iobuffer.jl:31
 [3] #IOBuffer#318 at iobuffer.jl:114
 [4] Type at none:0
 [5] print_to_string at strings/io.jl:127
 [6] string at strings/io.jl:168
 [7] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:508
 [8] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [9] fip at In[1]:8
 [10] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [11] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [12] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to jl_string_to_array)
Stacktrace:
 [1] unsafe_wrap at strings/string.jl:71
 [2] StringVector at iobuffer.jl:31
 [3] #IOBuffer#318 at iobuffer.jl:114
 [4] Type at none:0
 [5] print_to_string at strings/io.jl:127
 [6] string at strings/io.jl:168
 [7] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:508
 [8] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [9] fip at In[1]:8
 [10] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [11] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [12] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to __memset_sse2)
Stacktrace:
 [1] fill! at array.jl:366
 [2] #IOBuffer#318 at iobuffer.jl:121
 [3] Type at none:0
 [4] print_to_string at strings/io.jl:127
 [5] string at strings/io.jl:168
 [6] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:508
 [7] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [8] fip at In[1]:8
 [9] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [10] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [11] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call to the Julia runtime (call to jl_f_getfield)
Stacktrace:
 [1] print_to_string at strings/io.jl:128
 [2] string at strings/io.jl:168
 [3] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:508
 [4] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [5] fip at In[1]:8
 [6] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [7] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [8] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call to the Julia runtime (call to jl_excstack_state)
Stacktrace:
 [1] print at strings/io.jl:36
 [2] print_to_string at strings/io.jl:129
 [3] string at strings/io.jl:168
 [4] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:508
 [5] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [6] fip at In[1]:8
 [7] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [8] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [9] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call to an unknown function (call to julia.except_enter)
Stacktrace:
 [1] print at strings/io.jl:36
 [2] print_to_string at strings/io.jl:129
 [3] string at strings/io.jl:168
 [4] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:508
 [5] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [6] fip at In[1]:8
 [7] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [8] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [9] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to jl_alloc_string)
Stacktrace:
 [1] _string_n at strings/string.jl:60
 [2] StringVector at iobuffer.jl:31
 [3] dec at intfuncs.jl:574
 [4] #string#323 at intfuncs.jl:646
 [5] string at intfuncs.jl:638
 [6] show at show.jl:589
 [7] print at strings/io.jl:37
 [8] print_to_string at strings/io.jl:129
 [9] string at strings/io.jl:168
 [10] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:508
 [11] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [12] fip at In[1]:8
 [13] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [14] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [15] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to jl_string_to_array)
Stacktrace:
 [1] unsafe_wrap at strings/string.jl:71
 [2] StringVector at iobuffer.jl:31
 [3] dec at intfuncs.jl:574
 [4] #string#323 at intfuncs.jl:646
 [5] string at intfuncs.jl:638
 [6] show at show.jl:589
 [7] print at strings/io.jl:37
 [8] print_to_string at strings/io.jl:129
 [9] string at strings/io.jl:168
 [10] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:508
 [11] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [12] fip at In[1]:8
 [13] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [14] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [15] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to jl_array_to_string)
Stacktrace:
 [1] Type at strings/string.jl:39
 [2] dec at intfuncs.jl:581
 [3] #string#323 at intfuncs.jl:646
 [4] string at intfuncs.jl:638
 [5] show at show.jl:589
 [6] print at strings/io.jl:37
 [7] print_to_string at strings/io.jl:129
 [8] string at strings/io.jl:168
 [9] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:508
 [10] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [11] fip at In[1]:8
 [12] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [13] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [14] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to jl_array_grow_end)
Stacktrace:
 [1] _growend! at array.jl:811
 [2] ensureroom at iobuffer.jl:325
 [3] unsafe_write at iobuffer.jl:414
 [4] macro expansion at gcutils.jl:87
 [5] write at strings/io.jl:177
 [6] show at show.jl:589
 [7] print at strings/io.jl:37
 [8] print_to_string at strings/io.jl:129
 [9] string at strings/io.jl:168
 [10] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:508
 [11] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [12] fip at In[1]:8
 [13] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [14] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [15] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call to the Julia runtime (call to jl_pop_handler)
Stacktrace:
 [1] print at strings/io.jl:37
 [2] print_to_string at strings/io.jl:129
 [3] string at strings/io.jl:168
 [4] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:508
 [5] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [6] fip at In[1]:8
 [7] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [8] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [9] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to jl_rethrow)
Stacktrace:
 [1] rethrow at error.jl:51
 [2] print at strings/io.jl:39
 [3] print_to_string at strings/io.jl:129
 [4] string at strings/io.jl:168
 [5] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:508
 [6] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [7] fip at In[1]:8
 [8] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [9] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [10] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to jl_array_grow_end)
Stacktrace:
 [1] _growend! at array.jl:811
 [2] ensureroom at iobuffer.jl:325
 [3] unsafe_write at iobuffer.jl:414
 [4] macro expansion at gcutils.jl:87
 [5] write at strings/io.jl:177
 [6] print at strings/io.jl:179
 [7] print_to_string at strings/io.jl:129
 [8] string at strings/io.jl:168
 [9] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:508
 [10] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [11] fip at In[1]:8
 [12] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [13] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [14] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call to the Julia runtime (call to jl_get_nth_field_checked)
Stacktrace:
 [1] getindex at tuple.jl:24
 [2] iterate at tuple.jl:43
 [3] print_to_string at strings/io.jl:129
 [4] string at strings/io.jl:168
 [5] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:508
 [6] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [7] fip at In[1]:8
 [8] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [9] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [10] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call to the Julia runtime (call to jl_f_tuple)
Stacktrace:
 [1] iterate at tuple.jl:43
 [2] print_to_string at strings/io.jl:129
 [3] string at strings/io.jl:168
 [4] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:508
 [5] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [6] fip at In[1]:8
 [7] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [8] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [9] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to jl_array_grow_end)
Stacktrace:
 [1] _growend! at array.jl:811
 [2] resize! at array.jl:1003
 [3] print_to_string at strings/io.jl:131
 [4] string at strings/io.jl:168
 [5] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:508
 [6] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [7] fip at In[1]:8
 [8] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [9] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [10] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to jl_array_del_end)
Stacktrace:
 [1] _deleteend! at array.jl:820
 [2] resize! at array.jl:1008
 [3] print_to_string at strings/io.jl:131
 [4] string at strings/io.jl:168
 [5] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:508
 [6] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [7] fip at In[1]:8
 [8] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [9] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [10] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to jl_array_to_string)
Stacktrace:
 [1] Type at strings/string.jl:39
 [2] print_to_string at strings/io.jl:131
 [3] string at strings/io.jl:168
 [4] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:508
 [5] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [6] fip at In[1]:8
 [7] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [8] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [9] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call to the Julia runtime (call to jl_type_error)
Stacktrace:
 [1] require_one_based_indexing at abstractarray.jl:89
 [2] generic_matvecmul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:501
 [3] mul! at /buildworker/worker/package_linux64/build/usr/share/julia/stdlib/v1.2/LinearAlgebra/src/matmul.jl:77
 [4] fip at In[1]:8
 [5] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230
 [6] gpu_kernel at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:6
 [7] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
Reason: unsupported call through a literal pointer (call to memmove)
Stacktrace:
 [1] unsafe_copyto! at array.jl:226
 [2] unsafe_copyto! at array.jl:245
 [3] copyto! at array.jl:275
 [4] compact at iobuffer.jl:290
 [5] ensureroom_slowpath at iobuffer.jl:310
 [6] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
 [7] multiple call sites at unknown:0
Reason: unsupported call to the Julia runtime (call to jl_f_tuple)
Stacktrace:
 [1] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:524
 [2] multiple call sites at unknown:0
Reason: unsupported call to the Julia runtime (call to jl_f_getfield)
Stacktrace:
 [1] throw_inexacterror at boot.jl:560
 [2] overdub at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/Cassette/YCOeN/src/overdub.jl:0
 [3] multiple call sites at unknown:0

Stacktrace:
 [1] check_ir(::CUDAnative.CompilerJob, ::LLVM.Module) at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/CUDAnative/UWBIY/src/compiler/validation.jl:114
 [2] macro expansion at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/CUDAnative/UWBIY/src/compiler/driver.jl:188 [inlined]
 [3] macro expansion at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/TimerOutputs/7zSea/src/TimerOutput.jl:216 [inlined]
 [4] #codegen#130(::Bool, ::Bool, ::Bool, ::Bool, ::Bool, ::typeof(CUDAnative.codegen), ::Symbol, ::CUDAnative.CompilerJob) at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/CUDAnative/UWBIY/src/compiler/driver.jl:186
 [5] #codegen at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/CUDAnative/UWBIY/src/compiler/driver.jl:0 [inlined]
 [6] #compile#129(::Bool, ::Bool, ::Bool, ::Bool, ::Bool, ::typeof(CUDAnative.compile), ::Symbol, ::CUDAnative.CompilerJob) at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/CUDAnative/UWBIY/src/compiler/driver.jl:47
 [7] #compile at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/CUDAnative/UWBIY/src/compiler/common.jl:0 [inlined]
 [8] #compile#128 at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/CUDAnative/UWBIY/src/compiler/driver.jl:28 [inlined]
 [9] #compile at ./none:0 [inlined] (repeats 2 times)
 [10] macro expansion at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/CUDAnative/UWBIY/src/execution.jl:389 [inlined]
 [11] #cufunction#170(::String, ::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::typeof(CUDAnative.cufunction), ::typeof(Cassette.overdub), ::Type{Tuple{Cassette.Context{nametype(Ctx),Nothing,Nothing,getfield(GPUifyLoops, Symbol("##PassType#371")),Nothing,Cassette.DisableHooks},typeof(DiffEqGPU.gpu_kernel),ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},CUDAnative.CuDeviceArray{Float32,2,CUDAnative.AS.Global},CUDAnative.CuDeviceArray{Float32,2,CUDAnative.AS.Global},CUDAnative.CuDeviceArray{DiffEqBase.NullParameters,2,CUDAnative.AS.Global},Float32}}) at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/CUDAnative/UWBIY/src/execution.jl:357
 [12] (::getfield(CUDAnative, Symbol("#kw##cufunction")))(::NamedTuple{(:name,),Tuple{String}}, ::typeof(CUDAnative.cufunction), ::Function, ::Type) at ./none:0
 [13] #launch#50(::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::typeof(GPUifyLoops.launch), ::GPUifyLoops.CUDA, ::typeof(DiffEqGPU.gpu_kernel), ::Function, ::Vararg{Any,N} where N) at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/GPUifyLoops/mjszO/src/GPUifyLoops.jl:125
 [14] launch at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/GPUifyLoops/mjszO/src/GPUifyLoops.jl:119 [inlined]
 [15] macro expansion at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/GPUifyLoops/mjszO/src/GPUifyLoops.jl:54 [inlined]
 [16] #12 at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:61 [inlined]
 [17] ODEFunction at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/diffeqfunction.jl:230 [inlined]
 [18] initialize!(::OrdinaryDiffEq.ODEIntegrator{Tsit5,true,CuArrays.CuArray{Float32,2},Float32,CuArrays.CuArray{DiffEqBase.NullParameters,2},Float32,Float32,Float32,Array{CuArrays.CuArray{Float32,2},1},ODESolution{Float32,3,Array{CuArrays.CuArray{Float32,2},1},Nothing,Nothing,Array{Float32,1},Array{Array{CuArrays.CuArray{Float32,2},1},1},ODEProblem{CuArrays.CuArray{Float32,2},Tuple{Float32,Float32},true,CuArrays.CuArray{DiffEqBase.NullParameters,2},ODEFunction{true,getfield(DiffEqGPU, Symbol("##12#22")){ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing}},UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},DiffEqBase.StandardODEProblem},Tsit5,OrdinaryDiffEq.InterpolationData{ODEFunction{true,getfield(DiffEqGPU, Symbol("##12#22")){ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing}},UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},Array{CuArrays.CuArray{Float32,2},1},Array{Float32,1},Array{Array{CuArrays.CuArray{Float32,2},1},1},OrdinaryDiffEq.Tsit5Cache{CuArrays.CuArray{Float32,2},CuArrays.CuArray{Float32,2},CuArrays.CuArray{Float32,2},OrdinaryDiffEq.Tsit5ConstantCache{Float32,Float32}}},DiffEqBase.DEStats},ODEFunction{true,getfield(DiffEqGPU, Symbol("##12#22")){ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing}},UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},OrdinaryDiffEq.Tsit5Cache{CuArrays.CuArray{Float32,2},CuArrays.CuArray{Float32,2},CuArrays.CuArray{Float32,2},OrdinaryDiffEq.Tsit5ConstantCache{Float32,Float32}},OrdinaryDiffEq.DEOptions{Float32,Float32,Float32,Float32,typeof(DiffEqBase.ODE_DEFAULT_NORM),typeof(opnorm),CallbackSet{Tuple{},Tuple{}},typeof(DiffEqBase.ODE_DEFAULT_ISOUTOFDOMAIN),typeof(DiffEqBase.ODE_DEFAULT_PROG_MESSAGE),typeof(DiffEqBase.ODE_DEFAULT_UNSTABLE_CHECK),DataStructures.BinaryHeap{Float32,DataStructures.LessThan},DataStructures.BinaryHeap{Float32,DataStructures.LessThan},Nothing,Nothing,Int64,Array{Float32,1},Float64,Array{Float32,1}},CuArrays.CuArray{Float32,2},Float32,Nothing}, ::OrdinaryDiffEq.Tsit5Cache{CuArrays.CuArray{Float32,2},CuArrays.CuArray{Float32,2},CuArrays.CuArray{Float32,2},OrdinaryDiffEq.Tsit5ConstantCache{Float32,Float32}}) at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/OrdinaryDiffEq/tQd6p/src/perform_step/low_order_rk_perform_step.jl:623
 [19] #__init#335(::Float64, ::Array{Float32,1}, ::Array{Float32,1}, ::Nothing, ::Bool, ::Bool, ::Bool, ::Bool, ::Nothing, ::Bool, ::Bool, ::Float32, ::Float32, ::Float32, ::Bool, ::Bool, ::Rational{Int64}, ::Nothing, ::Nothing, ::Rational{Int64}, ::Int64, ::Int64, ::Int64, ::Rational{Int64}, ::Bool, ::Int64, ::Nothing, ::Nothing, ::Int64, ::typeof(DiffEqBase.ODE_DEFAULT_NORM), ::typeof(opnorm), ::typeof(DiffEqBase.ODE_DEFAULT_ISOUTOFDOMAIN), ::typeof(DiffEqBase.ODE_DEFAULT_UNSTABLE_CHECK), ::Bool, ::Bool, ::Bool, ::Bool, ::Bool, ::Bool, ::Bool, ::Int64, ::String, ::typeof(DiffEqBase.ODE_DEFAULT_PROG_MESSAGE), ::Nothing, ::Bool, ::Bool, ::Bool, ::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, ::typeof(DiffEqBase.__init), ::ODEProblem{CuArrays.CuArray{Float32,2},Tuple{Float32,Float32},true,CuArrays.CuArray{DiffEqBase.NullParameters,2},ODEFunction{true,getfield(DiffEqGPU, Symbol("##12#22")){ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing}},UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},DiffEqBase.StandardODEProblem}, ::Tsit5, ::Array{CuArrays.CuArray{Float32,2},1}, ::Array{Float32,1}, ::Array{Any,1}, ::Type{Val{true}}) at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/OrdinaryDiffEq/tQd6p/src/solve.jl:352
 [20] (::getfield(DiffEqBase, Symbol("#kw##__init")))(::NamedTuple{(:saveat,),Tuple{Float64}}, ::typeof(DiffEqBase.__init), ::ODEProblem{CuArrays.CuArray{Float32,2},Tuple{Float32,Float32},true,CuArrays.CuArray{DiffEqBase.NullParameters,2},ODEFunction{true,getfield(DiffEqGPU, Symbol("##12#22")){ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing}},UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},DiffEqBase.StandardODEProblem}, ::Tsit5, ::Array{CuArrays.CuArray{Float32,2},1}, ::Array{Float32,1}, ::Array{Any,1}, ::Type{Val{true}}) at ./none:0 (repeats 4 times)
 [21] #__solve#334 at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/OrdinaryDiffEq/tQd6p/src/solve.jl:4 [inlined]
 [22] #__solve at ./none:0 [inlined]
 [23] #solve_call#425(::Base.Iterators.Pairs{Symbol,Float64,Tuple{Symbol},NamedTuple{(:saveat,),Tuple{Float64}}}, ::typeof(DiffEqBase.solve_call), ::ODEProblem{CuArrays.CuArray{Float32,2},Tuple{Float32,Float32},true,CuArrays.CuArray{DiffEqBase.NullParameters,2},ODEFunction{true,getfield(DiffEqGPU, Symbol("##12#22")){ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing}},UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},DiffEqBase.StandardODEProblem}, ::Tsit5) at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/solve.jl:40
 [24] #solve_call at ./none:0 [inlined]
 [25] #solve#426 at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/solve.jl:57 [inlined]
 [26] (::getfield(DiffEqBase, Symbol("#kw##solve")))(::NamedTuple{(:saveat,),Tuple{Float64}}, ::typeof(solve), ::ODEProblem{CuArrays.CuArray{Float32,2},Tuple{Float32,Float32},true,CuArrays.CuArray{DiffEqBase.NullParameters,2},ODEFunction{true,getfield(DiffEqGPU, Symbol("##12#22")){ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing}},UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},DiffEqBase.StandardODEProblem}, ::Tsit5) at ./none:0
 [27] #batch_solve#5(::Base.Iterators.Pairs{Symbol,Float64,Tuple{Symbol},NamedTuple{(:saveat,),Tuple{Float64}}}, ::typeof(DiffEqGPU.batch_solve), ::EnsembleProblem{ODEProblem{Array{Float32,1},Tuple{Float32,Float32},true,DiffEqBase.NullParameters,ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},DiffEqBase.StandardODEProblem},typeof(prob_func),getfield(DiffEqBase, Symbol("##332#338")),getfield(DiffEqBase, Symbol("##334#340")),Array{Any,1}}, ::Tsit5, ::EnsembleGPUArray, ::UnitRange{Int64}) at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:66
 [28] #batch_solve at ./none:0 [inlined]
 [29] (::getfield(DiffEqGPU, Symbol("##3#4")){Int64,Int64,Base.Iterators.Pairs{Symbol,Float64,Tuple{Symbol},NamedTuple{(:saveat,),Tuple{Float64}}},EnsembleProblem{ODEProblem{Array{Float32,1},Tuple{Float32,Float32},true,DiffEqBase.NullParameters,ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},DiffEqBase.StandardODEProblem},typeof(prob_func),getfield(DiffEqBase, Symbol("##332#338")),getfield(DiffEqBase, Symbol("##334#340")),Array{Any,1}},Tsit5,EnsembleGPUArray})(::Int64) at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:37
 [30] iterate at ./generator.jl:47 [inlined]
 [31] _collect(::UnitRange{Int64}, ::Base.Generator{UnitRange{Int64},getfield(DiffEqGPU, Symbol("##3#4")){Int64,Int64,Base.Iterators.Pairs{Symbol,Float64,Tuple{Symbol},NamedTuple{(:saveat,),Tuple{Float64}}},EnsembleProblem{ODEProblem{Array{Float32,1},Tuple{Float32,Float32},true,DiffEqBase.NullParameters,ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},DiffEqBase.StandardODEProblem},typeof(prob_func),getfield(DiffEqBase, Symbol("##332#338")),getfield(DiffEqBase, Symbol("##334#340")),Array{Any,1}},Tsit5,EnsembleGPUArray}}, ::Base.EltypeUnknown, ::Base.HasShape{1}) at ./array.jl:619
 [32] collect_similar(::UnitRange{Int64}, ::Base.Generator{UnitRange{Int64},getfield(DiffEqGPU, Symbol("##3#4")){Int64,Int64,Base.Iterators.Pairs{Symbol,Float64,Tuple{Symbol},NamedTuple{(:saveat,),Tuple{Float64}}},EnsembleProblem{ODEProblem{Array{Float32,1},Tuple{Float32,Float32},true,DiffEqBase.NullParameters,ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},DiffEqBase.StandardODEProblem},typeof(prob_func),getfield(DiffEqBase, Symbol("##332#338")),getfield(DiffEqBase, Symbol("##334#340")),Array{Any,1}},Tsit5,EnsembleGPUArray}}) at ./array.jl:548
 [33] map(::Function, ::UnitRange{Int64}) at ./abstractarray.jl:2073
 [34] macro expansion at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:31 [inlined]
 [35] macro expansion at ./util.jl:213 [inlined]
 [36] #__solve#2(::Int64, ::Int64, ::Base.Iterators.Pairs{Symbol,Float64,Tuple{Symbol},NamedTuple{(:saveat,),Tuple{Float64}}}, ::typeof(DiffEqBase.__solve), ::EnsembleProblem{ODEProblem{Array{Float32,1},Tuple{Float32,Float32},true,DiffEqBase.NullParameters,ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},DiffEqBase.StandardODEProblem},typeof(prob_func),getfield(DiffEqBase, Symbol("##332#338")),getfield(DiffEqBase, Symbol("##334#340")),Array{Any,1}}, ::Tsit5, ::EnsembleGPUArray) at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqGPU/QB1WC/src/DiffEqGPU.jl:30
 [37] #__solve at ./none:0 [inlined]
 [38] #solve#427 at /home/bq_nbecker/isi/julia_pkg/GPUTest/packages/DiffEqBase/f3eXU/src/solve.jl:64 [inlined]
 [39] (::getfield(DiffEqBase, Symbol("#kw##solve")))(::NamedTuple{(:trajectories, :saveat),Tuple{Int64,Float64}}, ::typeof(solve), ::EnsembleProblem{ODEProblem{Array{Float32,1},Tuple{Float32,Float32},true,DiffEqBase.NullParameters,ODEFunction{true,typeof(fip),UniformScaling{Bool},Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing,Nothing},Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}},DiffEqBase.StandardODEProblem},typeof(prob_func),getfield(DiffEqBase, Symbol("##332#338")),getfield(DiffEqBase, Symbol("##334#340")),Array{Any,1}}, ::Tsit5, ::EnsembleGPUArray) at ./none:0
 [40] top-level scope at In[2]:1

nilsbecker avatar Sep 20 '19 16:09 nilsbecker

I think your initial condition has to be a vector (for now). I don't know why that would give you an issue like this though.

ChrisRackauckas avatar Sep 21 '19 11:09 ChrisRackauckas

This looks like we are trying to decide a generic call (to give you a better error) and failed in the process

vchuravy avatar Sep 21 '19 20:09 vchuravy

i tried with

const uu0 = SVector{4}(u0)
probip = ODEProblem(fip, uu0, tspan)

but this results in a similar error. if i additionally set CuArrays.allowscalar(false) i get a different error: scalar setindex! is dissallowed

let me know if i can do anything else to test. can you reproduce this behavior at all?

nilsbecker avatar Sep 23 '19 09:09 nilsbecker

I'll give this a try later today.

ChrisRackauckas avatar Sep 23 '19 10:09 ChrisRackauckas

You might want to try moving uu0 into fip :/ accessing global data like that is finecky.

vchuravy avatar Sep 23 '19 14:09 vchuravy

You might want to try moving uu0 into fip :/ accessing global data like that is finecky.

hmm, i don't think i get it. fip is the rhs of the ODE, uu0 is supposed to be the initial condition. i could build uu0 within fip but how would i pass it to ODEProblem then?

nilsbecker avatar Sep 23 '19 14:09 nilsbecker

Oh sorry I misunderstood the code snippet. uu0 = CuArray(u0) might do the trick.

vchuravy avatar Sep 23 '19 15:09 vchuravy

i tried uu0 = CuArray(u0) which resulted in an error involving jl_apply_generic. i then also tried replacing C with CuArray(B) which resulted in InvalidIRError. my latest full code is:

using LinearAlgebra, DifferentialEquations, DiffEqGPU, Plots, StaticArrays, CuArrays
CuArrays.allowscalar(false)

B  = Float32[1. 0  0 -5; 4 -2  4 -3; -4  0  0  1; 5 -2  2  3]
# const C = SMatrix{4,4}(B)

C = CuArray(B)

u0 = Array{Float32}(rand(4))

#const uu0 = SVector{4}(u0)

uu0 = CuArray(u0)

tspan = (0.0f0,1.0f0)

function fip(du,u,p,t) 
   mul!(du, C, u)
end

probip = ODEProblem(fip, uu0, tspan)

function prob_func(prob, i, repeat)
    prob
end
ensprobip = EnsembleProblem(probip, prob_func=prob_func)

simgpu = solve(ensprobip, Tsit5(), EnsembleGPUArray(), trajectories=123)

this latest version does not work with EnsembleCPUArray which might be expected for arrays on the gpu.

nilsbecker avatar Sep 23 '19 15:09 nilsbecker

https://github.com/JuliaDiffEq/DiffEqGPU.jl/issues/10#issuecomment-533617690 is the one that should be correct. u0 should be a vector and the C should be a static matrix. I am not sure why that combination isn't working though.

ChrisRackauckas avatar Sep 23 '19 15:09 ChrisRackauckas

Here's one that's minimized to an MWE and it told me I should report it :)

using DiffEqGPU, OrdinaryDiffEq, StaticArrays, LinearAlgebra
function fip2(du,u,p,t)
   C = @SMatrix Float32[1. 0  0 -5; 4 -2  4 -3; -4  0  0  1; 5 -2  2  3]
   mul!(du, C, u)
end
prob = ODEProblem(fip2,rand(Float32,4),(0.0f0,1.0f0))
monteprob = EnsembleProblem(prob)
sol = solve(monteprob,Tsit5(),EnsembleGPUArray(),trajectories=100,saveat=1.0f0)
┌ Warning: Decoding arguments to jl_apply_generic failed, please file a bug with a reproducer.
│   inst =   %34 = call nonnull %jl_value_t addrspace(10)* @jl_apply_generic(%jl_value_t addrspace(10)** nonnull %.sub, i32 4), !dbg !177
│   bb =
│ 
│    L49:                                              ; preds = %top
│      %.sroa.0207.0..sroa_idx = getelementptr inbounds { [2 x i64], i64 }, { [2 x i64], i64 } addrspace(11)* %0, i64 0, i32 0, i64 0    
│      %.sroa.0207.0.copyload = load i64, i64 addrspace(11)* %.sroa.0207.0..sroa_idx, align 8
│      %.sroa.3210.0..sroa_idx211 = getelementptr inbounds { [2 x i64], i64 }, { [2 x i64], i64 } addrspace(11)* %0, i64 0, i32 1
│      %.sroa.3210.0.copyload = load i64, i64 addrspace(11)* %.sroa.3210.0..sroa_idx211, align 8
│      %.sroa.4.24..sroa_idx = getelementptr inbounds { [2 x i64], i64 }, { [2 x i64], i64 } addrspace(11)* %1, i64 0, i32 0, i64 0
│      %.sroa.4.24.copyload = load i64, i64 addrspace(11)* %.sroa.4.24..sroa_idx, align 8
│      %.sroa.7.24..sroa_idx214 = getelementptr inbounds { [2 x i64], i64 }, { [2 x i64], i64 } addrspace(11)* %1, i64 0, i32 1
│      %.sroa.7.24.copyload = load i64, i64 addrspace(11)* %.sroa.7.24..sroa_idx214, align 8
│      %.sroa.2208.0..sroa_idx209 = getelementptr inbounds { [2 x i64], i64 }, { [2 x i64], i64 } addrspace(11)* %0, i64 0, i32 0, i64 1
│      %.sroa.2208.0.copyload = load i64, i64 addrspace(11)* %.sroa.2208.0..sroa_idx209, align 8
│      %15 = icmp sgt i64 %.sroa.0207.0.copyload, 0, !dbg !134
│      %16 = select i1 %15, i64 %.sroa.0207.0.copyload, i64 0, !dbg !134
│      %17 = add nsw i64 %11, -1, !dbg !157
│      %18 = mul i64 %17, %16, !dbg !171
│      %19 = icmp sgt i64 %.sroa.4.24.copyload, 0, !dbg !134
│      %20 = select i1 %19, i64 %.sroa.4.24.copyload, i64 0, !dbg !134
│      %21 = mul i64 %17, %20, !dbg !171
│      %22 = call fastcc %jl_value_t addrspace(10)* @ptx_gc_pool_alloc(i64 56), !dbg !173
│      %23 = bitcast %jl_value_t addrspace(10)* %22 to i8 addrspace(10)*, !dbg !173
│      %.sroa.0193.0..sroa_cast = bitcast %jl_value_t addrspace(10)* %22 to i64 addrspace(10)*, !dbg !173
│      store i64 %.sroa.0207.0.copyload, i64 addrspace(10)* %.sroa.0193.0..sroa_cast, align 8, !dbg !173
│      %.sroa.2194.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %23, i64 8, !dbg !173
│      %.sroa.2194.0..sroa_cast = bitcast i8 addrspace(10)* %.sroa.2194.0..sroa_idx to i64 addrspace(10)*, !dbg !173
│      store i64 %.sroa.2208.0.copyload, i64 addrspace(10)* %.sroa.2194.0..sroa_cast, align 8, !dbg !173
│      %.sroa.3195.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %23, i64 16, !dbg !173
│      %.sroa.3195.0..sroa_cast = bitcast i8 addrspace(10)* %.sroa.3195.0..sroa_idx to i64 addrspace(10)*, !dbg !173
│      store i64 %.sroa.3210.0.copyload, i64 addrspace(10)* %.sroa.3195.0..sroa_cast, align 8, !dbg !173
│      %.sroa.4.sroa.0203.0..sroa.4.0..sroa_raw_idx.sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %23, i64 24, !dbg !173
│      %.sroa.4.sroa.0203.0..sroa.4.0..sroa_raw_idx.sroa_cast = bitcast i8 addrspace(10)* %.sroa.4.sroa.0203.0..sroa.4.0..sroa_raw_idx.sroa_idx to i64 addrspace(10)*, !dbg !173
│      store i64 %16, i64 addrspace(10)* %.sroa.4.sroa.0203.0..sroa.4.0..sroa_raw_idx.sroa_cast, align 4, !dbg !173
│      %.sroa.4.sroa.2204.0..sroa.4.0..sroa_raw_idx.sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %23, i64 32, !dbg !173
│      %.sroa.4.sroa.2204.0..sroa.4.0..sroa_raw_idx.sroa_cast = bitcast i8 addrspace(10)* %.sroa.4.sroa.2204.0..sroa.4.0..sroa_raw_idx.sroa_idx to i64 addrspace(10)*, !dbg !173
│      store i64 %11, i64 addrspace(10)* %.sroa.4.sroa.2204.0..sroa.4.0..sroa_raw_idx.sroa_cast, align 4, !dbg !173
│      %.sroa.5197.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %23, i64 40, !dbg !173
│      %.sroa.5197.0..sroa_cast = bitcast i8 addrspace(10)* %.sroa.5197.0..sroa_idx to i64 addrspace(10)*, !dbg !173
│      store i64 %18, i64 addrspace(10)* %.sroa.5197.0..sroa_cast, align 8, !dbg !173
│      %.sroa.6198.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %23, i64 48, !dbg !173
│      %.sroa.6198.0..sroa_cast = bitcast i8 addrspace(10)* %.sroa.6198.0..sroa_idx to i64 addrspace(10)*, !dbg !173
│      store i64 1, i64 addrspace(10)* %.sroa.6198.0..sroa_cast, align 8, !dbg !173
│      %24 = call fastcc %jl_value_t addrspace(10)* @ptx_gc_pool_alloc(i64 56), !dbg !173
│      %25 = bitcast %jl_value_t addrspace(10)* %24 to i8 addrspace(10)*, !dbg !173
│      %.sroa.0186.0..sroa_cast = bitcast %jl_value_t addrspace(10)* %24 to i64 addrspace(10)*, !dbg !173
│      store i64 %.sroa.4.24.copyload, i64 addrspace(10)* %.sroa.0186.0..sroa_cast, align 8, !dbg !173
│      %.sroa.2.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %25, i64 8, !dbg !173
│      %.sroa.2.0..sroa_cast = bitcast i8 addrspace(10)* %.sroa.2.0..sroa_idx to i64 addrspace(10)*, !dbg !173
│      store i64 %.sroa.6212.24.copyload, i64 addrspace(10)* %.sroa.2.0..sroa_cast, align 8, !dbg !173
│      %.sroa.3.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %25, i64 16, !dbg !173
│      %.sroa.3.0..sroa_cast = bitcast i8 addrspace(10)* %.sroa.3.0..sroa_idx to i64 addrspace(10)*, !dbg !173
│      store i64 %.sroa.7.24.copyload, i64 addrspace(10)* %.sroa.3.0..sroa_cast, align 8, !dbg !173
│      %.sroa.4.sroa.0.0..sroa.4.0..sroa_raw_idx.sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %25, i64 24, !dbg !173
│      %.sroa.4.sroa.0.0..sroa.4.0..sroa_raw_idx.sroa_cast = bitcast i8 addrspace(10)* %.sroa.4.sroa.0.0..sroa.4.0..sroa_raw_idx.sroa_idx to i64 addrspace(10)*, !dbg !173
│      store i64 %20, i64 addrspace(10)* %.sroa.4.sroa.0.0..sroa.4.0..sroa_raw_idx.sroa_cast, align 4, !dbg !173
│      %.sroa.4.sroa.2.0..sroa.4.0..sroa_raw_idx.sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %25, i64 32, !dbg !173
│      %.sroa.4.sroa.2.0..sroa.4.0..sroa_raw_idx.sroa_cast = bitcast i8 addrspace(10)* %.sroa.4.sroa.2.0..sroa.4.0..sroa_raw_idx.sroa_idx to i64 addrspace(10)*, !dbg !173
│      store i64 %11, i64 addrspace(10)* %.sroa.4.sroa.2.0..sroa.4.0..sroa_raw_idx.sroa_cast, align 4, !dbg !173
│      %.sroa.5.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %25, i64 40, !dbg !173
│      %.sroa.5.0..sroa_cast = bitcast i8 addrspace(10)* %.sroa.5.0..sroa_idx to i64 addrspace(10)*, !dbg !173
│      store i64 %21, i64 addrspace(10)* %.sroa.5.0..sroa_cast, align 8, !dbg !173
│      %.sroa.6.0..sroa_idx = getelementptr inbounds i8, i8 addrspace(10)* %25, i64 48, !dbg !173
│      %.sroa.6.0..sroa_cast = bitcast i8 addrspace(10)* %.sroa.6.0..sroa_idx to i64 addrspace(10)*, !dbg !173
│      store i64 1, i64 addrspace(10)* %.sroa.6.0..sroa_cast, align 8, !dbg !173
│      store %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 260309968 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** %.sub, align 8, !dbg !173
│      %26 = getelementptr inbounds [8 x %jl_value_t addrspace(10)*], [8 x %jl_value_t addrspace(10)*]* %2, i64 0, i64 1, !dbg !173
│      store %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 284413088 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** %26, align 8, !dbg !173
│      %27 = getelementptr inbounds [8 x %jl_value_t addrspace(10)*], [8 x %jl_value_t addrspace(10)*]* %2, i64 0, i64 2, !dbg !173
│      store %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 124647104 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** %27, align 8, !dbg !173
│      %28 = getelementptr inbounds [8 x %jl_value_t addrspace(10)*], [8 x %jl_value_t addrspace(10)*]* %2, i64 0, i64 3, !dbg !173
│      store %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 124608160 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** %28, align 8, !dbg !173
│      %29 = getelementptr inbounds [8 x %jl_value_t addrspace(10)*], [8 x %jl_value_t addrspace(10)*]* %2, i64 0, i64 4, !dbg !173
│      store %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 252976656 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** %29, align 8, !dbg !173
│      %30 = getelementptr inbounds [8 x %jl_value_t addrspace(10)*], [8 x %jl_value_t addrspace(10)*]* %2, i64 0, i64 5, !dbg !173
│      store %jl_value_t addrspace(10)* %22, %jl_value_t addrspace(10)** %30, align 8, !dbg !173
│      %31 = getelementptr inbounds [8 x %jl_value_t addrspace(10)*], [8 x %jl_value_t addrspace(10)*]* %2, i64 0, i64 6, !dbg !173
│      store %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 781758112 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** %31, align 8, !dbg !173
│      %32 = getelementptr inbounds [8 x %jl_value_t addrspace(10)*], [8 x %jl_value_t addrspace(10)*]* %2, i64 0, i64 7, !dbg !173
│      store %jl_value_t addrspace(10)* %24, %jl_value_t addrspace(10)** %32, align 8, !dbg !173
│      %33 = call nonnull %jl_value_t addrspace(10)* @jl_invoke(%jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 954205328 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** n│      store %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 260309968 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** %.sub, align 8, !dbg !177
│      store %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 284413088 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** %26, align 8, !dbg !177
│      store %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 128580352 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)** %27, align 8, !dbg !177
│      store %jl_value_t addrspace(10)* %33, %jl_value_t addrspace(10)** %28, align 8, !dbg !177
│      %34 = call nonnull %jl_value_t addrspace(10)* @jl_apply_generic(%jl_value_t addrspace(10)** nonnull %.sub, i32 4), !dbg !177
│      %35 = bitcast %jl_value_t addrspace(10)* %34 to i64 addrspace(10)*, !dbg !177
│      %36 = getelementptr i64, i64 addrspace(10)* %35, i64 -1, !dbg !177
│      %37 = load i64, i64 addrspace(10)* %36, align 4, !dbg !177, !tbaa !185, !range !189
│      %38 = and i64 %37, -16, !dbg !177
│      %39 = inttoptr i64 %38 to %jl_value_t*, !dbg !177
│      %40 = addrspacecast %jl_value_t* %39 to %jl_value_t addrspace(10)*, !dbg !177
│      %41 = icmp eq %jl_value_t addrspace(10)* %40, addrspacecast (%jl_value_t* inttoptr (i64 260246384 to %jl_value_t*) to %jl_value_t addrspace(10)*), !dbg !177
│      br i1 %41, label %pass, label %fail, !dbg !177
│
└ @ CUDAnative C:\Users\accou\.julia\packages\CUDAnative\UWBIY\src\compiler\validation.jl:222
InvalidIRError: compiling gpu_kernel(Cassette.Context{nametype(Ctx),Nothing,Nothing,getfield(GPUifyLoops, Symbol("##PassType#363")),Nothing,Cassette.DisableHooks}, typeof(DiffEqGPU.gpu_kernel), typeof(fip2), CUDAnative.CuDeviceArray{Float32,2,CUDAnative.AS.Global}, CUDAnative.CuDeviceArray{Float32,2,CUDAnative.AS.Global}, CUDAnative.CuDeviceArray{DiffEqBase.NullParameters,2,CUDAnative.AS.Global}, Float32) resulted in invalid LLVM IR
Reason: unsupported dynamic function invocation (call to overdub(::Cassette.Context, ...) in GPUifyLoops at C:\Users\accou\.julia\packages\Cassette\YCOeN\src\overdub.jl:524)
Stacktrace:
 [1] _tuple_any at tuple.jl:415
 [2] has_offset_axes at abstractarray.jl:86
 [3] generic_matvecmul! at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.1\LinearAlgebra\src\matmul.jl:498
 [4] mul! at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.1\LinearAlgebra\src\matmul.jl:76
 [5] fip2 at C:\Users\accou\.julia\dev\DiffEqGPU\test\runtests.jl:93
 [6] gpu_kernel at C:\Users\accou\.julia\dev\DiffEqGPU\src\DiffEqGPU.jl:7
 [7] overdub at C:\Users\accou\.julia\packages\Cassette\YCOeN\src\overdub.jl:0
Reason: unsupported call to the Julia runtime (call to jl_apply_generic)
Stacktrace:
 [1] generic_matvecmul! at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.1\LinearAlgebra\src\matmul.jl:498
 [2] mul! at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.1\LinearAlgebra\src\matmul.jl:76
 [3] fip2 at C:\Users\accou\.julia\dev\DiffEqGPU\test\runtests.jl:93
 [4] gpu_kernel at C:\Users\accou\.julia\dev\DiffEqGPU\src\DiffEqGPU.jl:7
 [5] overdub at C:\Users\accou\.julia\packages\Cassette\YCOeN\src\overdub.jl:0
Reason: unsupported call to the Julia runtime (call to jl_f_getfield)
Stacktrace:
 [1] print_to_string at strings/io.jl:117
 [2] string at strings/io.jl:156
 [3] generic_matvecmul! at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.1\LinearAlgebra\src\matmul.jl:502
 [4] mul! at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.1\LinearAlgebra\src\matmul.jl:76
 [5] fip2 at C:\Users\accou\.julia\dev\DiffEqGPU\test\runtests.jl:93
 [6] gpu_kernel at C:\Users\accou\.julia\dev\DiffEqGPU\src\DiffEqGPU.jl:7
 [7] overdub at C:\Users\accou\.julia\packages\Cassette\YCOeN\src\overdub.jl:0
Reason: unsupported call to the Julia runtime (call to jl_get_nth_field_checked)
Stacktrace:
 [1] getindex at tuple.jl:24
 [2] iterate at tuple.jl:43
 [3] print_to_string at strings/io.jl:118
 [4] string at strings/io.jl:156
 [5] generic_matvecmul! at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.1\LinearAlgebra\src\matmul.jl:502
 [6] mul! at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.1\LinearAlgebra\src\matmul.jl:76
 [7] fip2 at C:\Users\accou\.julia\dev\DiffEqGPU\test\runtests.jl:93
 [8] gpu_kernel at C:\Users\accou\.julia\dev\DiffEqGPU\src\DiffEqGPU.jl:7
 [9] overdub at C:\Users\accou\.julia\packages\Cassette\YCOeN\src\overdub.jl:0
Reason: unsupported call to the Julia runtime (call to jl_f_tuple)
Stacktrace:
 [1] iterate at tuple.jl:43
 [2] print_to_string at strings/io.jl:118
 [3] string at strings/io.jl:156
 [4] generic_matvecmul! at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.1\LinearAlgebra\src\matmul.jl:502
 [5] mul! at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.1\LinearAlgebra\src\matmul.jl:76
 [6] fip2 at C:\Users\accou\.julia\dev\DiffEqGPU\test\runtests.jl:93
 [7] gpu_kernel at C:\Users\accou\.julia\dev\DiffEqGPU\src\DiffEqGPU.jl:7
 [8] overdub at C:\Users\accou\.julia\packages\Cassette\YCOeN\src\overdub.jl:0
Reason: unsupported call through a literal pointer (call to jl_alloc_string)
Stacktrace:
 [1] _string_n at strings/string.jl:60
 [2] StringVector at iobuffer.jl:31
 [3] #IOBuffer#316 at iobuffer.jl:114
 [4] Type at none:0
 [5] print_to_string at strings/io.jl:121
 [6] string at strings/io.jl:156
 [7] generic_matvecmul! at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.1\LinearAlgebra\src\matmul.jl:502
 [8] mul! at C:\cygwin\home\Administrator\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.1\LinearAlgebra\src\matmul.jl:76
 [9] fip2 at C:\Users\accou\.julia\dev\DiffEqGPU\test\runtests.jl:93
 [10] gpu_kernel at C:\Users\accou\.julia\dev\DiffEqGPU\src\DiffEqGPU.jl:7
 [11] overdub at C:\Users\accou\.julia\packages\Cassette\YCOeN\src\overdub.jl:0
Reason: unsupported call through a literal pointer (call to jl_string_to_array)
Stacktrace:
 [1] unsafe_wrap at strings/string.jl:71
 [2] StringVector at iobuffer.jl:31
 [3] #IOBuffer#316 at iobuffer.jl:114
 [4] Type at none:0
 [5] print_to_string at strings/io.jl:121
 [6] string at st...

ChrisRackauckas avatar Sep 24 '19 13:09 ChrisRackauckas

FWIW, this also errors, but differently:

using DiffEqGPU, OrdinaryDiffEq, StaticArrays, LinearAlgebra
function fip3(du,u,p,t)
   C = @SMatrix Float32[1. 0  0 -5; 4 -2  4 -3; -4  0  0  1; 5 -2  2  3]
   du .= C * u
end
prob = ODEProblem(fip3,rand(Float32,4),(0.0f0,1.0f0))
monteprob = EnsembleProblem(prob)
sol = solve(monteprob,Tsit5(),EnsembleGPUArray(),trajectories=100,saveat=1.0f0)
InvalidIRError: compiling gpu_kernel(Cassette.Context{nametype(Ctx),Nothing,Nothing,getfield(GPUifyLoops, Symbol("##PassType#363")),Nothing,Cassette.DisableHooks}, typeof(DiffEqGPU.gpu_kernel), typeof(fip3), CUDAnative.CuDeviceArray{Float32,2,CUDAnative.AS.Global}, CUDAnative.CuDeviceArray{Float32,2,CUDAnative.AS.Global}, CUDAnative.CuDeviceArray{DiffEqBase.NullParameters,2,CUDAnative.AS.Global}, Float32) resulted in invalid LLVM IR
Reason: unsupported call through a literal pointer (call to jl_get_keyword_sorter)
Stacktrace:
 [1] kwfunc at boot.jl:330
 [2] call at C:\Users\accou\.julia\packages\Cassette\YCOeN\src\context.jl:447
 [3] fallback at C:\Users\accou\.julia\packages\Cassette\YCOeN\src\context.jl:445
 [4] overdub at C:\Users\accou\.julia\packages\Cassette\YCOeN\src\context.jl:270
 [5] print_to_string at strings/io.jl:121
 [6] string at strings/io.jl:156
 [7] macro expansion at C:\Users\accou\.julia\packages\StaticArrays\3KEjZ\src\matrix_multiply.jl:37
 [8] _mul at C:\Users\accou\.julia\packages\StaticArrays\3KEjZ\src\matrix_multiply.jl:28
 [9] * at C:\Users\accou\.julia\packages\StaticArrays\3KEjZ\src\matrix_multiply.jl:7
 [10] fip3 at C:\Users\accou\.julia\dev\DiffEqGPU\test\runtests.jl:93
 [11] gpu_kernel at C:\Users\accou\.julia\dev\DiffEqGPU\src\DiffEqGPU.jl:7
 [12] overdub at C:\Users\accou\.julia\packages\Cassette\YCOeN\src\overdub.jl:0
Reason: unsupported dynamic function invocation (call to Cassette.overdub)
Stacktrace:
 [1] print_to_string at strings/io.jl:118
 [2] string at strings/io.jl:156
 [3] macro expansion at C:\Users\accou\.julia\packages\StaticArrays\3KEjZ\src\matrix_multiply.jl:37
 [4] _mul at C:\Users\accou\.julia\packages\StaticArrays\3KEjZ\src\matrix_multiply.jl:28
 [5] * at C:\Users\accou\.julia\packages\StaticArrays\3KEjZ\src\matrix_multiply.jl:7
 [6] fip3 at C:\Users\accou\.julia\dev\DiffEqGPU\test\runtests.jl:93
 [7] gpu_kernel at C:\Users\accou\.julia\dev\DiffEqGPU\src\DiffEqGPU.jl:7
 [8] overdub at C:\Users\accou\.julia\packages\Cassette\YCOeN\src\overdub.jl:0
Reason: unsupported call to the Julia runtime (call to jl_f_tuple)
Stacktrace:
 [1] print_to_string at strings/io.jl:121
 [2] string at strings/io.jl:156
 [3] macro expansion at C:\Users\accou\.julia\packages\StaticArrays\3KEjZ\src\matrix_multiply.jl:37
 [4] _mul at C:\Users\accou\.julia\packages\StaticArrays\3KEjZ\src\matrix_multiply.jl:28
 [5] * at C:\Users\accou\.julia\packages\StaticArrays\3KEjZ\src\matrix_multiply.jl:7
 [6] fip3 at C:\Users\accou\.julia\dev\DiffEqGPU\test\runtests.jl:93
 [7] gpu_kernel at C:\Users\accou\.julia\dev\DiffEqGPU\src\DiffEqGPU.jl:7
 [8] overdub at C:\Users\accou\.julia\packages\Cassette\YCOeN\src\overdub.jl:0
Reason: unsupported dynamic function invocation (call to Cassette.overdub)
Stacktrace:
 [1] print_to_string at strings/io.jl:121
 [2] string at strings/io.jl:156
 [3] macro expansion at C:\Users\accou\.julia\packages\StaticArrays\3KEjZ\src\matrix_multiply.jl:37
 [4] _mul at C:\Users\accou\.julia\packages\StaticArrays\3KEjZ\src\matrix_multiply.jl:28
 [5] * at C:\Users\accou\.julia\packages\StaticArrays\3KEjZ\src\matrix_multiply.jl:7
 [6] fip3 at C:\Users\accou\.julia\dev\DiffEqGPU\test\runtests.jl:93
 [7] gpu_kernel at C:\Users\accou\.julia\dev\DiffEqGPU\src\DiffEqGPU.jl:7
 [8] overdub at C:\Users\accou\.julia\packages\Cassette\YCOeN\src\overdub.jl:0
Reason: unsupported dynamic function invocation (call to Cassette.overdub)
Stacktrace:
 [1] print_to_string at strings/io.jl:123
 [2] string at strings/io.jl:156
 [3] macro expansion at C:\Users\accou\.julia\packages\StaticArrays\3KEjZ\src\matrix_multiply.jl:37
 [4] _mul at C:\Users\accou\.julia\packages\StaticArrays\3KEjZ\src\matrix_multiply.jl:28
 [5] * at C:\Users\accou\.julia\packages\StaticArrays\3KEjZ\src\matrix_multiply.jl:7
 [6] fip3 at C:\Users\accou\.julia\dev\DiffEqGPU\test\runtests.jl:93
 [7] gpu_kernel at C:\Users\accou\.julia\dev\DiffEqGPU\src\DiffEqGPU.jl:7
 [8] overdub at C:\Users\accou\.julia\packages\Cassette\YCOeN\src\overdub.jl:0
Reason: unsupported call to the Julia runtime (call to jl_get_nth_field_checked)
Stacktrace:
 [1] getindex at tuple.jl:24
 [2] iterate at tuple.jl:43
 [3] print_to_string at strings/io.jl:123
 [4] string at strings/io.jl:156
 [5] macro expansion at C:\Users\accou\.julia\packages\StaticArrays\3KEjZ\src\matrix_multiply.jl:37
 [6] _mul at C:\Users\accou\.julia\packages\StaticArrays\3KEjZ\src\matrix_multiply.jl:28
 [7] * at C:\Users\accou\.julia\packages\StaticArrays\3KEjZ\src\matrix_multiply.jl:7
 [8] fip3 at C:\Users\accou\.julia\dev\DiffEqGPU\test\runtests.jl:93
 [9] gpu_kernel at C:\Users\accou\.julia\dev\DiffEqGPU\src\DiffEqGPU.jl:7
 [10] overdub at C:\Users\accou\.julia\packages\Cassette\YCOeN\src\overdub.jl:0
Reason: unsupported dynamic function invocation (call to Cassette.overdub)
Stacktrace:
 [1] call at C:\Users\accou\.julia\packages\Cassette\YCOeN\src\context.jl:447
 [2] fallback at C:\Users\accou\.julia\packages\Cassette\YCOeN\src\context.jl:445
 [3] overdub at C:\Users\accou\....

ChrisRackauckas avatar Sep 24 '19 13:09 ChrisRackauckas

Curiously, mul! uses the generic matrix fallback, while * is using the one from StaticArrays. Both fail, but for different reasons.

ChrisRackauckas avatar Sep 24 '19 13:09 ChrisRackauckas

you're now building C within fip{2,3}. does this mean there will be a static matrix generated fresh on every call (on the gpu)?

nilsbecker avatar Sep 24 '19 13:09 nilsbecker

Static matrices aren't really "generated". What should happen here is it should essentially just become a constant in the compiled code.

ChrisRackauckas avatar Sep 24 '19 13:09 ChrisRackauckas

ah ok. i'm still getting to grips with a mental model for JIT compiled code evaluation. i would have thought that what you described happens only if C is statically defined outside and then captured by the closure of fip.

nilsbecker avatar Sep 24 '19 14:09 nilsbecker

Hello, do you have a solution to this? I also need to use of matrix-vector multiplication. The simplest example should be

using QuPhys
using LinearAlgebra
using SparseArrays
using DifferentialEquations
using DiffEqGPU

A = sprand(ComplexF32, 100, 100, 0.1) # Also dense should be ok. But better if sparse.
A += A'
A = -1im * A

t_l = collect(LinRange{Float32}(0, 10, 100))
tspan = (t_l[1], t_l[end])

function dudt!(du,u,p,t)
    A = p[1]
    mul!(du, A, u)
    nothing
end

u0 = normalize(rand(ComplexF32, N))
p = [A]
prob = ODEProblem(dudt!,u0,tspan,p)
prob_func = (prob,i,repeat) -> remake(prob,u0=normalize(rand(ComplexF32, N)))
monteprob = EnsembleProblem(prob, prob_func = prob_func, safetycopy=false)
@time sol = solve(monteprob, Tsit5(), EnsembleGPUArray(), trajectories=10, saveat=t_l)

But it doesn't work. The only way to make it work is by defining

function dudt!(du,u,p,t)
    A = p
    @inbounds begin
        for i in 1:size(A, 1)
            sum = 0
            for j in 1:size(A, 2)
                sum += A[i, j] * u[j]
            end
            du[i] = sum
        end
    end
    nothing
end

p = A

But it is very slow, and I need to introduce other parameters in the future.

albertomercurio avatar Oct 05 '22 23:10 albertomercurio

That's completely unrelated to this thread. You cannot put a CPU-based array into a GPU kernel: the computation needs to take place on the GPU. For your case, is there a reason you wouldn't just modelingtoolkitize the f to let that build a better kernel? But this is a completely different subject so please take it to a different issue.

ChrisRackauckas avatar Oct 06 '22 03:10 ChrisRackauckas