Turing.jl Getting the preconditioner/checking that it is functional

@torfjelde

using Turing, Random
@model function gdemo(xs)
    # Assumptions
    σ² ~ InverseGamma(2, 3)
    μ ~ Normal(0, √σ²)
    # Observations
    for i = 1:length(xs)
        xs[i] ~ Normal(μ, √σ²)
    end
end


function sample_model(it, sample::T, state, max_samples) where {T}
    samples = T[]
    metrics = Vector{Vector{Float64}}(undef, 0)
    while length(samples) < max_samples
        # For an iterator we pass in the previous `state` as the second argument
        sample, state = iterate(it, state)

        push!(metrics, copy(state.hamiltonian.metric.M⁻¹)) # includes the adaptation process for the metrics

        # Save `sample` if we're not adapting anymore
        if state.i > nadapts
            push!(samples, sample)
        end
    end
    return samples, metrics
end

# setup model and sampling algorithm
xs = randn(100);
m = gdemo(xs);
alg = NUTS(0.65);
spl = DynamicPPL.Sampler(alg);
rng = MersenneTwister(42);
nadapts = 50;                                                # number of adaptation steps to use for NUTS
it = AbstractMCMC.Stepper(rng, m, spl, (nadapts=nadapts,)); # create iterator (using first 50 to adapt)


# Get initial sample, then iterate
sample, state = iterate(it)
samples, metrics = sample_model(it, sample, state, 50)
@show metrics

An attempt to adapt code to see what the preconditioners look like during sampling. I get

┌ Info: Found initial step size
└   ϵ = 0.2
metrics = [[1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]]

Is there a bug in the code here, is this the wrong way to access the diagonal preconditioner, or is this actually ending up with no preconditioning?

Jul 15 '22 18:07 jlperla

So it's definitively working on my end with [email protected] in a clean project:

julia> @model function gdemo(xs)
           # Assumptions
           σ² ~ InverseGamma(2, 3)
           μ ~ Normal(0, √σ²)
           # Observations
           for i = 1:length(xs)
               xs[i] ~ Normal(μ, √σ²)
           end
       end
gdemo (generic function with 2 methods)

julia> # setup model and sampling algorithm
       xs = randn(100);

julia> m = gdemo(xs);

julia> chain = sample(m, NUTS(), 1000, save_state=true);

julia> chain.info.samplerstate.hamiltonian.metric
DiagEuclideanMetric([0.017822320394359714, 0.01 ...])

julia> versioninfo()
Julia Version 1.7.0
Commit 3bf9d17731 (2021-11-30 12:12 UTC)
Platform Info:
  OS: Linux (x86_64-pc-linux-gnu)
  CPU: Intel(R) Core(TM) i7-10710U CPU @ 1.10GHz
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-12.0.1 (ORCJIT, skylake)

What version are you on?

EDIT: Can you maybe post the output of ]status --manifest?

Jul 26 '22 14:07 torfjelde

Thanks @torfjelde I tried a new project file and ensured it had the latest Turing and still getting the same results with the code I posted but replicated the correct calculation of the preconditioner with yours.

But playing with it, I think this might actually have something to do with heuristics on the number of samples/adaptations/dimensions? Maybe it doesn't precondition at all in certain circumstances?

Try this tiny update on your code. All it does is set the number of adaptations relative to the number off samples.

xs = randn(100)
m = gdemo(xs)
samples = 1000
num_adapts_prop = 0.1
num_adapts = Int(floor(samples * num_adapts_prop))
δ = 0.65
alg = NUTS(num_adapts, δ)
chain = sample(m, alg, samples, save_state=true);
chain.info.samplerstate.hamiltonian.metric

This doesn't calculate the preconditioner. If you increase the num_adapts_prop to 0.2 it will though. Similarly, if you do samples = 500 with num_adapts_prop = 0.3 it will precondition.

Digging into this, It seems there is a threshod with this model at least. Set samples = 1000 then with num_adapts_prop = 0.14 it doesn't calculate the preconditioner but with num_adapts_prop = 0.15 it does.

We don't necessarily need to do super-short runs, though for some robustness checks it would be nice. But the main concern is that there is some heuristic that is in relative rather than absolute terms since I think we saw this lack of preconditioning with 5000 samples and 500ish adaptations runs for certain model sizes. Any ideas?

Jul 26 '22 16:07 jlperla

I will close since it seems like ther eis some number of adapts where it starts preconditioning

Aug 16 '22 21:08 jlperla