Turing.jl
Turing.jl copied to clipboard
Failure when dimension changes
From Slack, this causes an error in Turing 8.3:
using Turing
data = [
0.388235 0.388235 0.388235 0.388235;
0.388235 0.388235 0.388235 0.388235;
0.388235 0.388235 0.388235 0.388235;
]
@model mixture(x) = begin
n = size(x, 2)
γ = 1
# Number of components and component attributions:
k ~ Geometric(.1)
π ~ Dirichlet(k+1, γ)
y = tzeros(Int, n)
for i in 1:n
y[i] ~ Categorical(π)
end
# Component parameters:
u = tzeros(Float64, (3,k+1))
v = tzeros(Float64, (3,k+1))
for i in 1:3
for j in 1:k+1
u[i,j] ~ Uniform()
v[i,j] ~ Gamma()
end
end
# Draw observation:
for i in 1:3
for j in 1:n
α = u[i,y[j]] * v[i,y[j]]
β = (1 - u[i,y[j]]) * v[i,y[j]]
x[i,j] ~ Beta(α, β)
end
end
end
model = mixture(data)
chain = sample(model, MH(), 2)
Error:
ERROR: LoadError: DimensionMismatch("Inconsistent array dimensions.")
Stacktrace: [1] logpdf at /home/cameron/.julia/packages/Distributions/kPXE9/src/multivariates.jl:193 [inlined] [2] assume(::DynamicPPL.Sampler{MH{()},Turing.Inference.MHState{DynamicPPL.VarInfo{NamedTuple{(:k, :π, :y, :u, :v),Tuple{DynamicPPL.Metadata{Dict{DynamicPPL.VarName{:k},Int64},Array{Geometric{Float64},1},Array{DynamicPPL.VarName{:k},1},Array{Int64,1},Ar$
ay{Set{DynamicPPL.Selector},1}},DynamicPPL.Metadata{Dict{DynamicPPL.VarName{:π},Int64},Array{Dirichlet{Float64},1},Array{DynamicPPL.VarName{:π},1},Array{Float64,1},Array{Set{DynamicPPL.Selector},1}},DynamicPPL.Metadata{Dict{DynamicPPL.VarName{:y},Int64},$
rray{DiscreteNonParametric{Int64,Float64,Base.OneTo{Int64},Array{Float64,1}},1},Array{DynamicPPL.VarName{:y},1},Array{Int64,1},Array{Set{DynamicPPL.Selector},1}},DynamicPPL.Metadata{Dict{DynamicPPL.VarName{:u},Int64},Array{Uniform{Float64},1},Array{Dynam$
cPPL.VarName{:u},1},Array{Float64,1},Array{Set{DynamicPPL.Selector},1}},DynamicPPL.Metadata{Dict{DynamicPPL.VarName{:v},Int64},Array{Gamma{Float64},1},Array{DynamicPPL.VarName{:v},1},Array{Float64,1},Array{Set{DynamicPPL.Selector},1}}}},Float64}}}, ::Dir$
chlet{Float64}, ::DynamicPPL.VarName{:π}, ::DynamicPPL.VarInfo{NamedTuple{(:k, :π, :y, :u, :v),Tuple{DynamicPPL.Metadata{Dict{DynamicPPL.VarName{:k},Int64},Array{Geometric{Float64},1},Array{DynamicPPL.VarName{:k},1},Array{Int64,1},Array{Set{DynamicPPL.Se$ector},1}},DynamicPPL.Metadata{Dict{DynamicPPL.VarName{:π},Int64},Array{Dirichlet{Float64},1},Array{DynamicPPL.VarName{:π},1},Array{Float64,1},Array{Set{DynamicPPL.Selector},1}},DynamicPPL.Metadata{Dict{DynamicPPL.VarName{:y},Int64},Array{DiscreteNonPara$etric{Int64,Float64,Base.OneTo{Int64},Array{Float64,1}},1},Array{DynamicPPL.VarName{:y},1},Array{Int64,1},Array{Set{DynamicPPL.Selector},1}},DynamicPPL.Metadata{Dict{DynamicPPL.VarName{:u},Int64},Array{Uniform{Float64},1},Array{DynamicPPL.VarName{:u},1},$
rray{Float64,1},Array{Set{DynamicPPL.Selector},1}},DynamicPPL.Metadata{Dict{DynamicPPL.VarName{:v},Int64},Array{Gamma{Float64},1},Array{DynamicPPL.VarName{:v},1},Array{Float64,1},Array{Set{DynamicPPL.Selector},1}}}},Float64}) at /home/cameron/.julia/pack$ges/Turing/vg86q/src/inference/mh.jl:148
Looks like the error comes about when the Dirichlet
is drawn in the second iteration, π ~ Dirichlet(k+1, γ)
. Not sure why this is happening, since I thought Turing was fairly resilient to this kind of thing.
The error seems to be caused by the evaluation of logpdf(dist, old_val)
in https://github.com/TuringLang/Turing.jl/blob/0d4c8fe74f7abf943f7fe33e85cd51163839683c/src/inference/mh.jl#L148. I don't see how this could possibly work if the dimension of dist
changes in between subsequent steps of the MH algorithm - is it actually possible to run this example with any previous version of Turing?
That's right, one would need a reversible jump MCMC. But how about SMC? In the tutorial on infinite mixture models a stochastic dimension model is run with SMC. However, for the model above, chain = sample(model, SMC(), 2)
causes a MethodError
regarding Chains
. What's the fundamental difference between this model and the one in the tutorial?
However, for the model above, chain = sample(model, SMC(), 2) causes a MethodError regarding Chains.
I'm currently updating Turing to AbstractMCMC 0.4 and MCMCChains 2.0 - maybe that will fix bugs (or make it easier to fix them) if there are any issues with the current setup of Chains in Turing.
I quickly checked your example with https://github.com/TuringLang/Turing.jl/pull/1116 and discovered a small bug unrelated to the Chains
issue. Unfortunately, the construction of a chain still fails since parray
in https://github.com/TuringLang/Turing.jl/blob/9dc8a297aaed4e898beba698491116c28e40c533/src/inference/Inference.jl#L346 can't be inferred correctly - its of type Matrix{Any}
for which no constructor of Chains
is defined, hence the call in https://github.com/TuringLang/Turing.jl/blob/9dc8a297aaed4e898beba698491116c28e40c533/src/inference/Inference.jl#L369-L376 fails. However, if I run sample(model, SMC(), 2; chain_type = Any)
Turing successfully returns a vector of the raw transitions. Hence SMC
does work in principle, we just have to fix the type inference problem :slightly_smiling_face:
Another quick debugging yields that the problem is that eltype(vals) = Any
in
https://github.com/TuringLang/Turing.jl/blob/9dc8a297aaed4e898beba698491116c28e40c533/src/inference/Inference.jl#L339.
An ugly fix is to replace https://github.com/TuringLang/Turing.jl/blob/9dc8a297aaed4e898beba698491116c28e40c533/src/inference/Inference.jl#L263 with
return names, map(identity, vals)
Then eltype(vals) = Real
at least, which is sufficient for Chains
. However, this is not a proper fix, instead we should just promote the types of the different parameters correctly.
Hmm maybe we don't actually want to convert the integers that we obtain for, e.g., k
to floating point numbers I guess? Then eltype(vals) = Real
would be fine.
The following seems to solve the issue but promotes everything to a common type:
function _params_to_array(ts::Vector)
# obtain set of all sampled parameters
names = Set{Symbol}()
for t in ts, name in keys(t)
push!(names, name)
end
# extract and combine the values from each transition
values = mapreduce(hcat, names) do name
mapreduce(vcat, ts) do t
extract_value(t, name)
end
end
return collect(names), values
end
extract_value(t::NamedTuple, name::Symbol) =
name in keys(t) ? getfield(t, name) : missing
It yields
julia> _params_to_array([(a = 3,), (a = 4,)])
(Symbol[:a], [3, 4])
julia> _params_to_array([(a = 3.0,), (a = 4,)])
(Symbol[:a], [3.0, 4.0])
julia> _params_to_array([(a = 3, b = 2), (a = 4, b = 5)])
(Symbol[:a, :b], [3 2; 4 5])
julia> _params_to_array([(a = 3, b = 1.0), (a = 4, b = 5.0)])
(Symbol[:a, :b], [3.0 1.0; 4.0 5.0])
julia> _params_to_array([(a = 3,), (a = 4, b = 5.0)])
(Symbol[:a, :b], Union{Missing, Float64}[3.0 missing; 4.0 5.0])
I think that's probably fine. It's pretty hard to avoid the promote-to-float stuff when we stick everything in an array.
I'll put together a PR.
Maybe related: #1095
I completely missed that one has to consider nested non-scalar parameters as well. This seems to make it a bit more difficult, so I have to see if a similar approach is still possible.
Thanks!
Duplicate of https://github.com/TuringLang/DynamicPPL.jl/issues/434