Weird sampling behaviour when sampling parameter in log space
Hello MCMC hammerers,
I have a potential theoretical question, a possible bug or some very stupid thing I'm missing.
I was investigating what happens when you sample a parameter in log space as I was seeing some weird behaviour in my walkers when using the default StretchMove. I've boiled down to a very simple problem that I hope you can help me with.
Here's a basic set up to estimate the mean and std of a Gaussian data. In linear space, everything works as expected
import numpy as np
import emcee
import corner
from multiprocessing import Pool
import matplotlib.pyplot as plt
def loglikehood(params, data):
return -0.5 * np.sum( ( (data - params[0])/ params[1])**2) - len(data) * np.log(params[1])
def logposterior(params, data, parambounds):
if not np.all(np.logical_and(parambounds[:, 0] <= params, params <= parambounds[:, 1])):
return -np.inf
lgprior = 0
return loglikehood(params, data) + lgprior
np.random.seed(25)
ndata = 250
sigma = 1
truth = [0, sigma]
parnames = ["$\mu$", "$\sigma$"]
data = np.random.normal(0, sigma, size=ndata)
parambounds = np.array([(-100, 100), (0, 1e3)])
ndim = len(parnames)
cores = 14
nwalkers = 2 * cores
initial_samples = np.array([np.random.uniform(low, high, nwalkers) for low, high in parambounds]).T
MAX_N = 500
with Pool(cores) as pool:
sampler = emcee.EnsembleSampler(nwalkers, ndim, logposterior,
pool=pool, args=(data, parambounds))
sampler.run_mcmc(initial_samples, MAX_N, progress=True)
discard = MAX_N //2
final_samples = sampler.get_chain(discard=discard, flat=True)
loglikes = sampler.get_log_prob(discard=discard, flat=True)
corner_fig = corner.corner(final_samples, labels=parnames, title_fmt='.2f',
quantiles=[0.16, 0.5, 0.84], show_titles=True, truths=truth,
title_kwargs={"fontsize": 20}, max_n_ticks=3, labelpad=0.08,
levels=(1 - np.exp(-0.5), 1 - np.exp(-0.5 * 2 ** 2)))
corner_fig.savefig("linear_corner.png")
chain = sampler.get_chain(flat=True)
median_values = np.median(chain, axis=0)
chain_fig, axes = plt.subplots(ndim, sharex=True, gridspec_kw={'hspace': 0.05, 'wspace': 0})
for param, parname, ax, median in zip(chain.T, parnames, axes, median_values):
ax.plot(param, linestyle="None", marker="+", color="black")
ax.set_ylabel(parname.replace("kernel:", "").replace("log_", ""))
ax.axhline(y=median)
ax.axvline(discard * nwalkers, ls="--", color="red")
chain_fig.savefig("chain_linear.png")
plt.figure()
plt.hist(data)
plt.axvline(np.median(final_samples, axis=0)[0], color="black")
plt.savefig("data_model_linear.png")
Now because the std deviation is a positively defined scale, it makes sense to work in log. Let's try that.
import numpy as np
import emcee
import corner
from multiprocessing import Pool
import matplotlib.pyplot as plt
def loglikehood(params , data):
linsigma = 10**params[1]
return -0.5 * np.sum( ( (data - params[0])/ linsigma)**2) - len(data) * np.log(linsigma)
def logposterior(params, data, parambounds):
#if not np.all(np.logical_and(parambounds[:, 0] <= params, params <= parambounds[:, 1])):
# return -np.inf
if not np.all(np.logical_and(parambounds[:, 0] <= params, params <= parambounds[:, 1])):
return -np.inf
lgprior = 0
return loglikehood(params, data) + lgprior
np.random.seed(25)
ndata = 250
sigma = 1
truth = [0, sigma]
parnames = ["$\mu$", "$\sigma$"]
data = np.random.normal(0, sigma, size=ndata)
parambounds = np.array([(-100, 100), (-2, 2)]) # from 0.01 to 100
ndim = len(parnames)
cores = 14
nwalkers = 2 * cores
initial_samples = np.array([np.random.uniform(low, high, nwalkers) for low, high in parambounds]).T
MAX_N = 500
with Pool(cores) as pool:
sampler = emcee.EnsembleSampler(nwalkers, ndim, logposterior,
pool=pool, args=(data, parambounds))
sampler.run_mcmc(initial_samples, MAX_N, progress=True)
discard = MAX_N // 2
final_samples = sampler.get_chain(discard=discard, flat=True).copy()
loglikes = sampler.get_log_prob(discard=discard, flat=True)
# back to linear space
final_samples.T[1] = 10**final_samples.T[1]
corner_fig = corner.corner(final_samples, labels=parnames, title_fmt='.2f',
quantiles=[0.16, 0.5, 0.84], show_titles=True, truths=truth,
title_kwargs={"fontsize": 20}, max_n_ticks=3, labelpad=0.08,
levels=(1 - np.exp(-0.5), 1 - np.exp(-0.5 * 2 ** 2))) # plots 1 and 2 sigma levels
corner_fig.savefig("log_corner.png")
chain = sampler.get_chain(flat=True).copy()
# back to linear space
chain.T[1] = 10**chain.T[1]
median_values = np.median(chain, axis=0)
chain_fig, axes = plt.subplots(ndim, sharex=True, gridspec_kw={'hspace': 0.05, 'wspace': 0})
for param, parname, ax, median in zip(chain.T, parnames, axes, median_values):
ax.plot(param, linestyle="None", marker="+", color="black")
ax.set_ylabel(parname.replace("kernel:", "").replace("log_", ""))
ax.axhline(y=median)
ax.axvline(discard * nwalkers, ls="--", color="red")
chain_fig.savefig("chain_log.png")
plt.figure()
plt.hist(data)
plt.axvline(np.median(final_samples, axis=0)[0], color="black")
plt.savefig("data_model_log.png")
As you can see one (or multiple walkers) have jumped to weird places of the parameter space.
The weird thing is that if you set the upper bound for sigma to 4 (i.e. 10**4), then you get the expected behaviour.
This makes me wonder whether I'm missing something like a Jacobian adjustment? My understanding of that is limited, however the few things I have tried have all failed. I was considering whether we are going from a Uniform prior to a Log Uniform prior? But if that's the case it's still unclear to me.
Please educate me, I'm sure I'm missing something obvious here...