KernelDensity.jl icon indicating copy to clipboard operation
KernelDensity.jl copied to clipboard

Borrow some ideas from GetDist Python package

Open marius311 opened this issue 3 years ago • 0 comments

Generally I find the GetDist Python package based on this paper does a much better job producing accurate contours than this one, in particular for close-to-Gaussian distributions and especialy with boundaries. E.g. here's a comparison for 100 samples from a unit normal distribution (top row) or a unit normal truncated below 0 (bottom row; code at the end of this post):

image

image

I don't know if there's any motivation here from anyone to borrow / implement some ideas from that paper, but I figured I'd put it out there, it would be very nice to have a pure-Julia version.

using PyCall, PyPlot, KernelDensityEstimate, KernelDensity, Distributions, Random
getdist = pyimport("getdist")
getdist.chains.print_load_details = false;

## no boundary

Random.seed!(0)
samples = randn(100);

xs = range(-4,4,length=100);
figure(figsize=(10,4))

ax = nothing

for (i,(label, Pxs)) in enumerate([
    ("KernelDensity.jl",         KernelDensity.pdf(KernelDensity.kde(samples), xs)),
    ("KernelDensityEstimate.jl", KernelDensityEstimate.evaluateDualTree(KernelDensityEstimate.kde!(samples), xs)),
    ("GetDist.py",               getdist.MCSamples(;samples, weights=nothing, names=["x"]).get1DDensity(0).normalize("integral")(xs))
])

    ax = subplot(1,3,i, sharex=ax, sharey=ax)
    plot(xs, pdf.(Normal(), xs),c="k", ls="--", alpha=0.6, label="truth")
    hist(samples, color="k", alpha=0.2, density=true)
    plot(xs, Pxs)
    title(label)

end

## with boundary

Random.seed!(0)
samples = filter(>(0), randn(200))

xs = range(-4,4,length=100)
figure(figsize=(10,4))

ax = nothing

for (i,(label, Pxs)) in enumerate([
    ("KernelDensity.jl",         KernelDensity.pdf(KernelDensity.kde(samples, boundary=(0,10)), xs)),
    ("KernelDensityEstimate.jl", KernelDensityEstimate.evaluateDualTree(KernelDensityEstimate.kde!(samples), xs)),
    ("GetDist.py",               getdist.MCSamples(;samples, weights=nothing, names=["x"], ranges=Dict("x"=>(0,nothing))).get1DDensity(0).normalize("integral")(xs))
])

    ax = subplot(1,3,i, sharex=ax, sharey=ax)
    plot(xs, pdf.(TruncatedNormal(0,1,0,Inf), xs),c="k", ls="--", alpha=0.4, label="truth")
    hist(samples, color="k", alpha=0.2, density=true)
    plot(xs, Pxs)
    title(label)

end

marius311 avatar Jan 29 '21 10:01 marius311