Observed values input for JointDistributionNamed give Value Error
Hello, I am replicating the pymc3 example Bayesian Estimation Supersedes the T-Test. I am using tensorflow==2.10.1 and tensorflow-probability==0.17.0. As a TFP example, I am following the tutorial Gaussian Process Regression. The observed variables are input in the target_log_prob_fn.
def target_log_prob(amplitude, length_scale, observation_noise_variance):
return gp_joint_model.log_prob({
'amplitude': amplitude,
'length_scale': length_scale,
'observation_noise_variance': observation_noise_variance,
'observations': observations_
})
My model is defined as:
drug_observations_ = tf.cast(drug_observations.reshape(-1,1), dtype)
placebo_observations_ = tf.cast(placebo_observations.reshape(-1,1), dtype)
mu_m = tf.cast(df.value.mean(), dtype)
mu_s = tf.cast(df.value.std() * 2, dtype)
nu_rate = tf.cast(1/29.0, dtype)
sigma_init = tf.cast(1/10., dtype)
def get_model():
model = tfd.JointDistributionNamed(dict(
nu = tfd.Exponential(name="nu", rate=nu_rate),
drug_sigma = tfd.Exponential(name="drug_sigma", rate=sigma_init),
drug_mean = tfd.Normal(name='drug_mean', loc=mu_m, scale=mu_s),
placebo_sigma = tfd.Exponential(name="placebo_sigma", rate=sigma_init),
placebo_mean = tfd.Normal(name='placebo_mean', loc=mu_m, scale=mu_s),
drug = lambda nu, drug_mean, drug_sigma: tfd.StudentT(name='drug',
df=nu,
loc=drug_mean,
scale=drug_sigma),
placebo = lambda nu, placebo_mean, placebo_sigma: tfd.StudentT(name="placebo",
df=nu,
loc=placebo_mean,
scale=placebo_sigma)
), validate_args=True)
return model
def target_log_prob_fn(nu, drug_mean, drug_sigma, placebo_mean, placebo_sigma):
return -model.log_prob({
'nu': nu,
'drug_mean': drug_mean,
'drug_sigma': drug_sigma,
'placebo_mean': placebo_mean,
'placebo_sigma': placebo_sigma,
'drug': drug_observations_,
'placebo': placebo_observations_
})
constrain_positive = tfb.Shift(np.finfo(dtype).tiny)(tfb.Exp())
nu_var = tfp.util.TransformedVariable(
initial_value=1.,
bijector=constrain_positive,
name='nu',
dtype=dtype)
drug_sigma_var = tfp.util.TransformedVariable(
initial_value=1.,
bijector=constrain_positive,
name='drug_sigma',
dtype=dtype)
placebo_sigma_var = tfp.util.TransformedVariable(
initial_value=1.,
bijector=constrain_positive,
name='placebo_sigma',
dtype=dtype)
drug_mean_var = tfp.util.TransformedVariable(
initial_value=1.,
bijector=tfb.Identity(),
name='drug_mean',
dtype=dtype)
placebo_mean_var = tfp.util.TransformedVariable(
initial_value=1.,
bijector=tfb.Identity(),
name='placebo_mean',
dtype=dtype)
variables = [nu_var, drug_mean_var, drug_sigma_var, placebo_mean_var, placebo_sigma_var]
trainable_variables = [v.trainable_variables[0] for v in variables]
num_iters = 1000
optimizer = tf.optimizers.Adam(learning_rate=.01)
# Use `tf.function` to trace the loss for more efficient evaluation.
@tf.function(autograph=False, jit_compile=False)
def train_model():
with tf.GradientTape() as tape:
loss = target_log_prob_fn(*variables)
grads = tape.gradient(loss, trainable_variables)
optimizer.apply_gradients(zip(grads, trainable_variables))
return loss
The code below works for my specified model. I get a good dict for the sample and a scaler for the log_prob().
sample_dict = model.sample()
model.log_prob(sample_dict)
I input my observed variables in the target_log_prob_fn, but I get an error executing that function : ValueError: Broadcasting probably indicates an error in model. I get the same when running HMC.
I think I need to input my observations within the model function, but I do not see how to do it without releasing the dependency on the drug_mean and placebo_mean variables. In the pymc3 model, these are what are being learned and compared.