KernelDensity.jl
KernelDensity.jl copied to clipboard
Borrow some ideas from GetDist Python package
Generally I find the GetDist Python package based on this paper does a much better job producing accurate contours than this one, in particular for close-to-Gaussian distributions and especialy with boundaries. E.g. here's a comparison for 100 samples from a unit normal distribution (top row) or a unit normal truncated below 0 (bottom row; code at the end of this post):
I don't know if there's any motivation here from anyone to borrow / implement some ideas from that paper, but I figured I'd put it out there, it would be very nice to have a pure-Julia version.
using PyCall, PyPlot, KernelDensityEstimate, KernelDensity, Distributions, Random
getdist = pyimport("getdist")
getdist.chains.print_load_details = false;
## no boundary
Random.seed!(0)
samples = randn(100);
xs = range(-4,4,length=100);
figure(figsize=(10,4))
ax = nothing
for (i,(label, Pxs)) in enumerate([
("KernelDensity.jl", KernelDensity.pdf(KernelDensity.kde(samples), xs)),
("KernelDensityEstimate.jl", KernelDensityEstimate.evaluateDualTree(KernelDensityEstimate.kde!(samples), xs)),
("GetDist.py", getdist.MCSamples(;samples, weights=nothing, names=["x"]).get1DDensity(0).normalize("integral")(xs))
])
ax = subplot(1,3,i, sharex=ax, sharey=ax)
plot(xs, pdf.(Normal(), xs),c="k", ls="--", alpha=0.6, label="truth")
hist(samples, color="k", alpha=0.2, density=true)
plot(xs, Pxs)
title(label)
end
## with boundary
Random.seed!(0)
samples = filter(>(0), randn(200))
xs = range(-4,4,length=100)
figure(figsize=(10,4))
ax = nothing
for (i,(label, Pxs)) in enumerate([
("KernelDensity.jl", KernelDensity.pdf(KernelDensity.kde(samples, boundary=(0,10)), xs)),
("KernelDensityEstimate.jl", KernelDensityEstimate.evaluateDualTree(KernelDensityEstimate.kde!(samples), xs)),
("GetDist.py", getdist.MCSamples(;samples, weights=nothing, names=["x"], ranges=Dict("x"=>(0,nothing))).get1DDensity(0).normalize("integral")(xs))
])
ax = subplot(1,3,i, sharex=ax, sharey=ax)
plot(xs, pdf.(TruncatedNormal(0,1,0,Inf), xs),c="k", ls="--", alpha=0.4, label="truth")
hist(samples, color="k", alpha=0.2, density=true)
plot(xs, Pxs)
title(label)
end