Weird sampling behaviour when sampling parameter in log space

Open andresgur opened this issue 10 months ago • 0 comments

Hello MCMC hammerers,

I have a potential theoretical question, a possible bug or some very stupid thing I'm missing.

I was investigating what happens when you sample a parameter in log space as I was seeing some weird behaviour in my walkers when using the default StretchMove. I've boiled down to a very simple problem that I hope you can help me with.

Here's a basic set up to estimate the mean and std of a Gaussian data. In linear space, everything works as expected

import numpy as np
import emcee
import corner
from multiprocessing import Pool
import matplotlib.pyplot as plt

def loglikehood(params, data):
    return -0.5 * np.sum( ( (data - params[0])/ params[1])**2)  - len(data) * np.log(params[1])
    
def logposterior(params, data, parambounds):

    if not np.all(np.logical_and(parambounds[:, 0] <= params, params <= parambounds[:, 1])):
        return -np.inf
    
    lgprior = 0

    return loglikehood(params, data) + lgprior

np.random.seed(25)

ndata = 250
sigma = 1
truth = [0, sigma]

parnames = ["$\mu$", "$\sigma$"]
data = np.random.normal(0, sigma, size=ndata)
parambounds = np.array([(-100, 100), (0, 1e3)])

ndim = len(parnames)
cores = 14
nwalkers = 2 * cores

initial_samples = np.array([np.random.uniform(low, high, nwalkers) for low, high in parambounds]).T

MAX_N = 500

with Pool(cores) as pool:
    sampler = emcee.EnsembleSampler(nwalkers, ndim, logposterior,
                                    pool=pool, args=(data, parambounds))
    sampler.run_mcmc(initial_samples, MAX_N, progress=True)

discard = MAX_N //2

final_samples = sampler.get_chain(discard=discard, flat=True)
loglikes = sampler.get_log_prob(discard=discard, flat=True)

corner_fig = corner.corner(final_samples, labels=parnames, title_fmt='.2f',
                            quantiles=[0.16, 0.5, 0.84], show_titles=True, truths=truth,
                            title_kwargs={"fontsize": 20}, max_n_ticks=3, labelpad=0.08,
                            levels=(1 - np.exp(-0.5), 1 - np.exp(-0.5 * 2 ** 2)))

corner_fig.savefig("linear_corner.png")

chain = sampler.get_chain(flat=True)
median_values = np.median(chain, axis=0)

chain_fig, axes = plt.subplots(ndim, sharex=True, gridspec_kw={'hspace': 0.05, 'wspace': 0})

for param, parname, ax, median in zip(chain.T, parnames, axes, median_values):
    ax.plot(param, linestyle="None", marker="+", color="black")
    ax.set_ylabel(parname.replace("kernel:", "").replace("log_", ""))
    ax.axhline(y=median)
    ax.axvline(discard * nwalkers, ls="--", color="red")

chain_fig.savefig("chain_linear.png")

plt.figure()
plt.hist(data)
plt.axvline(np.median(final_samples, axis=0)[0], color="black")
plt.savefig("data_model_linear.png")

Now because the std deviation is a positively defined scale, it makes sense to work in log. Let's try that.

import numpy as np
import emcee
import corner
from multiprocessing import Pool
import matplotlib.pyplot as plt


def loglikehood(params , data):
    linsigma = 10**params[1]
    return -0.5 * np.sum( ( (data - params[0])/ linsigma)**2)  - len(data) * np.log(linsigma)


def logposterior(params, data, parambounds):

    #if not np.all(np.logical_and(parambounds[:, 0] <= params, params <= parambounds[:, 1])):
    #    return -np.inf
    if not np.all(np.logical_and(parambounds[:, 0] <= params, params <= parambounds[:, 1])):
        return -np.inf
    
    lgprior = 0
    return loglikehood(params, data) + lgprior
    

np.random.seed(25)

ndata = 250
sigma = 1
truth = [0, sigma]

parnames = ["$\mu$", "$\sigma$"]
data = np.random.normal(0, sigma, size=ndata)
parambounds = np.array([(-100, 100), (-2, 2)]) # from 0.01 to 100


ndim = len(parnames)
cores = 14
nwalkers = 2 * cores

initial_samples = np.array([np.random.uniform(low, high, nwalkers) for low, high in parambounds]).T
MAX_N = 500

with Pool(cores) as pool:
    sampler = emcee.EnsembleSampler(nwalkers, ndim, logposterior,
                                    pool=pool, args=(data, parambounds))
    sampler.run_mcmc(initial_samples, MAX_N, progress=True)

discard = MAX_N // 2

final_samples = sampler.get_chain(discard=discard, flat=True).copy()    
loglikes = sampler.get_log_prob(discard=discard, flat=True)

# back to linear space
final_samples.T[1]  = 10**final_samples.T[1]

corner_fig = corner.corner(final_samples, labels=parnames, title_fmt='.2f',
                            quantiles=[0.16, 0.5, 0.84], show_titles=True,  truths=truth,
                            title_kwargs={"fontsize": 20}, max_n_ticks=3, labelpad=0.08,
                            levels=(1 - np.exp(-0.5), 1 - np.exp(-0.5 * 2 ** 2))) # plots 1 and 2 sigma levels
corner_fig.savefig("log_corner.png")

chain = sampler.get_chain(flat=True).copy()

# back to linear space
chain.T[1]  = 10**chain.T[1]

median_values = np.median(chain, axis=0)

chain_fig, axes = plt.subplots(ndim, sharex=True, gridspec_kw={'hspace': 0.05, 'wspace': 0})

for param, parname, ax, median in zip(chain.T, parnames, axes, median_values):
    ax.plot(param, linestyle="None", marker="+", color="black")
    ax.set_ylabel(parname.replace("kernel:", "").replace("log_", ""))
    ax.axhline(y=median)
    ax.axvline(discard * nwalkers, ls="--", color="red")

chain_fig.savefig("chain_log.png")


plt.figure()
plt.hist(data)
plt.axvline(np.median(final_samples, axis=0)[0], color="black")
plt.savefig("data_model_log.png")

As you can see one (or multiple walkers) have jumped to weird places of the parameter space.

The weird thing is that if you set the upper bound for sigma to 4 (i.e. 10**4), then you get the expected behaviour.

This makes me wonder whether I'm missing something like a Jacobian adjustment? My understanding of that is limited, however the few things I have tried have all failed. I was considering whether we are going from a Uniform prior to a Log Uniform prior? But if that's the case it's still unclear to me.

Please educate me, I'm sure I'm missing something obvious here...

Feb 21 '25 10:02 andresgur