NormalizingFlows.jl
NormalizingFlows.jl copied to clipboard
CUDAarray support
using CUDA
using LinearAlgebra
using FunctionChains
using Bijectors
using Flux
T = Float32
q0 = MvNormal(ones(T, 2))
Distributions._rand!(rng, q0_g, xx)
ts = reduce(∘, [f32(Bijectors.PlanarLayer(2)) for _ in 1:2])
flow = transformed(q0, ts)
# gpu
CUDA.functional()
flow_g = gpu(flow)
ts_g = gpu(ts)
xs = rand(flow_g.dist, 10) # on cpu
ys_g = transform(ts_g, cu(xs)) # good
logpdf(flow_g, ys_g[:, 1]) # good
rand(flow_g) # bug
output
julia> rand(flow_g) # bug
ERROR: MethodError: no method matching dot(::Int64, ::CuPtr{Float32}, ::Int64, ::Ptr{Float32}, ::Int64)
Closest candidates are:
dot(::Integer, ::Union{Ptr{Float32}, AbstractArray{Float32}}, ::Integer, ::Union{Ptr{Float32}, AbstractArray{Float32}}, ::Integer)
@ LinearAlgebra ~/.julia/juliaup/julia-1.9.2+0.x64.linux.gnu/share/julia/stdlib/v1.9/LinearAlgebra/src/blas.jl:344
dot(::Integer, ::Union{Ptr{Float64}, AbstractArray{Float64}}, ::Integer, ::Union{Ptr{Float64}, AbstractArray{Float64}}, ::Integer)
@ LinearAlgebra ~/.julia/juliaup/julia-1.9.2+0.x64.linux.gnu/share/julia/stdlib/v1.9/LinearAlgebra/src/blas.jl:344
Stacktrace:
[1] dot(x::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, y::Vector{Float32})
@ LinearAlgebra.BLAS ~/.julia/juliaup/julia-1.9.2+0.x64.linux.gnu/share/julia/stdlib/v1.9/LinearAlgebra/src/blas.jl:395
[2] dot(x::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, y::Vector{Float32})
@ LinearAlgebra ~/.julia/juliaup/julia-1.9.2+0.x64.linux.gnu/share/julia/stdlib/v1.9/LinearAlgebra/src/matmul.jl:14
[3] aT_b(a::CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, b::Vector{Float32})
@ Bijectors ~/.julia/packages/Bijectors/SxXKg/src/utils.jl:4
[4] _transform(flow::PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, z::Vector{Float32})
@ Bijectors ~/.julia/packages/Bijectors/SxXKg/src/bijectors/planar_layer.jl:77
[5] transform(b::PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, z::Vector{Float32})
@ Bijectors ~/.julia/packages/Bijectors/SxXKg/src/bijectors/planar_layer.jl:82
[6] (::PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}})(x::Vector{Float32})
@ Bijectors ~/.julia/packages/Bijectors/SxXKg/src/interface.jl:80
[7] call_composed(fs::Tuple{PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}, x::Tuple{Vector{Float32}}, kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Base ./operators.jl:1035
[8] call_composed
@ ./operators.jl:1034 [inlined]
[9] (::ComposedFunction{PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}})(x::Vector{Float32}; kw::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Base ./operators.jl:1031
[10] (::ComposedFunction{PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}})(x::Vector{Float32})
@ Base ./operators.jl:1031
[11] rand(td::MultivariateTransformed{MvNormal{Float32, PDMats.PDiagMat{Float32, Vector{Float32}}, FillArrays.Zeros{Float32, 1, Tuple{Base.OneTo{Int64}}}}, ComposedFunction{PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, PlanarLayer{CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}, CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}})
@ Bijectors ~/.julia/packages/Bijectors/SxXKg/src/transformed_distribution.jl:159
[12] top-level scope
@ REPL[67]:1
[13] top-level scope
@ ~/.julia/packages/CUDA/p5OVK/src/initialization.jl:171