particles
particles copied to clipboard
Restricting sampling of parameters
Hello again,
I am still trying to implement particle filters for the following SDE-model:
$$ \begin{cases} dS(t) = -\alpha S(t)I(t)dt+\frac{1}{\sqrt{100}}\sqrt{\alpha S(t)I(t)}dB_1(t) \ dI(t) = \alpha S(t)I(t)-\gamma I(t)dt-\frac{1}{\sqrt{100}}\sqrt{\alpha S(t)I(t)}dB_1(t)+\frac{1}{\sqrt{100}}\sqrt{\gamma I(t)}dB_2(t) \end{cases} $$
For $X=(S,I)^T$ this can be written as
$$ dX(t) = \mu(X(t))dt+\sigma(X(t))dB(t) $$
with
$$ \mu(X)=\left(\begin{array}{c} -\alpha X[1]X[2] \ \alpha X[1]X[2]-\gamma*X[2] \end{array}\right) \text{ and } \sigma(X)=\frac{1}{\sqrt{100}}\left(\begin{array}{cc} \sqrt{\alpha X[1]X[2]} & 0\ -\sqrt{\alpha X[1]X[2]} & \sqrt{\gamma X[2]} \end{array}\right) $$
The symmetric product of the diffusion coefficient would then be the positive (semi-)definite matrix
$$ \Sigma(X)=\frac{1}{100}\left(\begin{array}{cc} \alpha X[1]X[2] & -\alpha X[1]X[2]\ -\alpha X[1]X[2] & \alpha X[1]X[2]+\gamma X[2] \end{array}\right) $$
I already saw the new multivariate distribution to deal with varying covariance matrices across particles, thanks for that. I use a similar, self-implemented one. My problem is now, that when I want to run the following code for the guided particle filter:
import warnings; warnings.simplefilter('ignore') # hide warnings
# standard libraries
from matplotlib import pyplot as plt
import numpy as np
import seaborn as sb
import pandas as pd
import scipy
import scipy.stats as stats
from scipy import linalg
import code
from collections import OrderedDict
# modules from particles
import particles # core module
from particles import distributions as dists # where probability distributions are defined
from particles import state_space_models as ssm # where state-space models are defined
from particles.collectors import Moments
from particles import mcmc
from particles import kalman
from visual_checks import sir_trajectory_check
# New try without increasing dimensions
def multi_dist(xp, beta, gamma, dt): # xp means X_{t-1}, defines the BM parts
loc = np.empty_like(xp)
loc[:,0] = xp[:,0] + np.array([-np.exp(beta)*xp[:,0]*xp[:,1], np.exp(beta)*xp[:,0]*xp[:,1]-np.exp(gamma)*xp[:,1]])[0]*dt
loc[:,1] = xp[:,1] + np.array([-np.exp(beta)*xp[:,0]*xp[:,1], np.exp(beta)*xp[:,0]*xp[:,1]-np.exp(gamma)*xp[:,1]])[1]*dt
cov = np.zeros((xp.shape[0], xp.shape[1], xp.shape[1]))
cov[:,0,0] = np.exp(beta)*xp[:,0]*xp[:,1]
cov[:,0,1] = -np.exp(beta)*xp[:,0]*xp[:,1]
cov[:,1,0] = -np.exp(beta)*xp[:,0]*xp[:,1]
cov[:,1,1] = np.exp(beta)*xp[:,0]*xp[:,1]+np.exp(gamma)*xp[:,1]
cov = (dt/100)*cov
return dists.MvNormal_new(loc=loc, cov=cov, trunc_rv=True)
class SIRModel(ssm.StateSpaceModel):
default_params = {'beta': -1.60944,
'gamma': -2.9957,
'dt': 0.1,
'sigma': 0.01}
dx = 2 #dimension of x
N = 100 #number of particles to use
# @property
def f_next(self, xp):
loc = np.empty_like(xp)
loc[:,0] = xp[:,0] + np.array([-np.exp(self.beta)*xp[:,0]*xp[:,1], np.exp(self.beta)*xp[:,0]*xp[:,1]-np.exp(self.gamma)*xp[:,1]])[0]*self.dt
loc[:,1] = xp[:,1] + np.array([-np.exp(self.beta)*xp[:,0]*xp[:,1], np.exp(self.beta)*xp[:,0]*xp[:,1]-np.exp(self.gamma)*xp[:,1]])[1]*self.dt
return loc
def SigX(self, xp):
out = np.zeros((self.N, self.dx, self.dx))
out[:,0,0] = np.exp(self.beta)*xp[:,0]*xp[:,1]
out[:,0,1] = -np.exp(self.beta)*xp[:,0]*xp[:,1]
out[:,1,0] = -np.exp(self.beta)*xp[:,0]*xp[:,1]
out[:,1,1] = np.exp(self.beta)*xp[:,0]*xp[:,1]+np.exp(self.gamma)*xp[:,1]
out = (self.dt/100)*out
return out
def SigY(self):
out = np.zeros((self.N, self.dx, self.dx))
out [:,0,0] = self.sigma*np.array([1]*100)
out[:,1,1] = self.sigma*np.array([1]*100)
return out
def SigOpt(self, Sx, Sy):
K = np.empty_like(Sx)
for i in range(K.shape[0]):
if np.all(Sx[i]==0): # I=0 makes the SigX = 0
K[i] = Sy[i]
elif np.any(Sx[i]==0): #S=0 so just some of Sx is zero
SxInv = 1/Sx[i] # elementwise inverse
SxInv[SxInv == np.inf] = 0 # set 0 before to 0 after
K[i] = np.linalg.inv(SxInv+np.linalg.inv(Sy[i]))
else:
K[i] = np.linalg.inv(np.linalg.inv(Sx[i])+np.linalg.inv(Sy[i]))
return K
def predmean(self, t, xp, data):
SigmaX = self.SigX(xp)
SigmaY = self.SigY()
SigmaOpt = self.SigOpt(SigmaX, SigmaY)
f = self.f_next(xp)
out = np.empty_like(xp)
for i in range(out.shape[0]):
if np.all(SigmaX[i]==0): # I=0 makes the SigX = 0 and no dynamics in the process, so reflect data
out[i] = data[t]
elif np.any(SigmaX[i]==0): # S=0 so just some of Sx is zero
SxInv = 1/SigmaX[i] # elementwise inverse
SxInv[SxInv == np.inf] = 0 # set 0 before to 0 after
out[i] = np.dot(SigmaOpt[i], np.dot(SxInv, f[i])+np.dot(np.linalg.inv(SigmaY[i]), data[t]))
else:
out[i] = np.dot(SigmaOpt[i], np.dot(np.linalg.inv(SigmaX[i]), f[i])+np.dot(np.linalg.inv(SigmaY[i]), data[t]))
return out
def PX0(self):
return multi_dist(np.array([[0.96, 0.04]]), self.beta, self.gamma, self.dt)
def PX(self, t, xp):
return multi_dist(xp, self.beta, self.gamma, self.dt)
# def PY(t, xp, x, sigma):
# d = {'Y1': dists.Normal(loc=x[:,0], scale=sigma),
# 'Y2': dists.Normal(loc=x[:,1], scale=sigma)
# }
# return dists.StructDist(d)
# def PY(self, t, xp, x):
# d = {'Y': dists.Normal(loc=x[:,0], scale=self.sigma)}
# return dists.StructDist(d)
def PY(self, t, xp, x):
return dists.MvNormal_new(loc=x,
cov=np.array([(self.sigma**2)*np.eye(2)]*x.shape[0]),
trunc_rv=True
)
def proposal0(self, data):
return multi_dist(np.array([[0.96, 0.04]]), self.beta, self.gamma, self.dt)
def proposal(self, t, xp, data):
SigmaX = self.SigX(xp)
SigmaY = self.SigY()
mean = self.predmean(t, xp, data)
cov = self.SigOpt(SigmaX, SigmaY)
return dists.MvNormal_new(loc=mean, cov=cov, trunc_rv=True)
prior_dict = {'beta': dists.LogD(dists.TruncNormal(mu=0.2, sigma=0.1, a=1e-5, b=1.0)),
'gamma': dists.LogD(dists.TruncNormal(mu=0.05, sigma=0.03, a=1e-5, b=1.0)),
'sigma': dists.TruncNormal(mu=.01, sigma=0.005, a=1e-5, b=1.0)
}
my_prior = dists.StructDist(prior_dict)
# true value is 0.2 and 0.05
data = pd.read_csv('/home/vinc777/masterthesis/two_variant_model/own_simulation/results/SIR_1000_simulation_noisy.csv')
data_array = np.array(data)
pmmh = mcmc.PMMH(ssm_cls=SIRModel, prior=my_prior, data=data_array, Nx=100, niter=10000, fk_cls=ssm.GuidedPF)
# pmmh.run()
pmmh.step0()
for i in range(100):
try:
pmmh.step(i)
except:
code.interact(banner=str(i), local=locals())
# save results
results = pmmh.chain.theta
results_df = pd.DataFrame(results, columns=['beta', 'gamma', 'sigma'])
results_df.to_csv('output/SIR_guided'+str(10000)+'iterations.csv')
I eventually get the error message
return self.ssm.proposal(t, xp, self.data).rvs(size=xp.shape[0])
File "/home/vinc777/masterthesis/two_variant_model/thesis_venv/lib/python3.8/site-packages/particles/distributions.py", line 1019, in rvs
x[n, :] = np.maximum(0, random.multivariate_normal(self.loc[n, :], self.cov[n, :, :]))
File "mtrand.pyx", line 4132, in numpy.random.mtrand.RandomState.multivariate_normal
File "<__array_function__ internals>", line 180, in svd
File "/home/vinc777/masterthesis/two_variant_model/thesis_venv/lib/python3.8/site-packages/numpy/linalg/linalg.py", line 1648, in svd
u, s, vh = gufunc(a, signature=signature, extobj=extobj)
File "/home/vinc777/masterthesis/two_variant_model/thesis_venv/lib/python3.8/site-packages/numpy/linalg/linalg.py", line 97, in _raise_linalgerror_svd_nonconvergence
raise LinAlgError("SVD did not converge")
numpy.linalg.LinAlgError: SVD did not converge
So I still encounter problems with the covariance matrix. In this case I guess it is because of the parameter values being quite close or below zero. That is why I already gave truncated and log-transformed prios on them to ensure that they don't "destroy" my covariance matrix.
But looking on the pmmh.theta.chain
that is created until I get the error, I see that the parameters are first always being zero and then being values in the order of $10^{-60}$ or even a lot smaller. And this makes my covariance matrix then being either unsolvable for the decomposition or singular since the values are too close together.
So my question is, how does the chain sample the parameter vector $\theta=(\beta, \gamma,\sigma)^T$ and is there a way to restric this to some particular space, so that I can avoid to small values for example.
Thanks for your your ideas and help again.
Hi,
The proposal distribution in mcmc.pmmh
is a Gaussian random walk; i.e. the proposed parameter value is simulated from $\theta^p | \theta \sim N(\theta, \Sigma)$. The proposal covariance $\Sigma$ is either given by the user, or, if not, it is adapted (as in adaptive MCMC, i.e. it is estimated sequentially from past MCMC states).
First, can you ask you whether this point was clear to you? Otherwise, I think I should review the documentation. (This point is explained at least in the corresponding notebook tutorial, but maybe not in the docstrings.)
Anyway, I guess what happens here is that eventually the algorithm proposes a value of theta which gives you singular cov matrices (I'm talking about the cov matrix of the proposal of the states this time, inside each PF). Could you adapt the prior so that the prior probability of this happening is really zero?
In that case, pmmh
will not even run the PF, and the problem should not arise.
Thanks for your response. It isn't clear to me how I could give the proposal covariance for the simulation of the new $\theta^p$. But unless this it is well explaind in the notebook tutorials.
My covariance matrix gets singular if it appears that $\gamma=0$ is chosen. But since I gave as a prior a truncated normal distirbution which is truncated at $10^{-5}$ this should not be possible, because 0 is out of range. But still it simulates $\gamma=0$ or at least so small that the values in the covariance matrix become numerically indistinguishable close I guess.
First, to set the covariance matrix of the proposal, use argument rw_cov
; see here:
https://particles-sequential-monte-carlo-in-python.readthedocs.io/en/latest/_autosummary/particles.mcmc.PMMH.html#particles.mcmc.PMMH
But now that you mention it, I can see that the docstring is not super-clear... I'll try to fix it shortly.
Second, yes, you're right, a value below $10^{-5}$ should not be proposed, since it's outside the prior range. I'll look into it, but, in the mean time, you could adapt your model to forbid values below $10^{-5}$ (e.g. by forcing $\gamma$ to be $=10^{-5}$ whenever the supplied value is smaller.
More details are now given in the docstrings of PMMH and parent classes regarding parameter rw_cov
, and more generally how to calibrate random walk proposals.