botorch
botorch copied to clipboard
[Bug] Multi-fidelity KG acquisition function
Hello, I am applying the tutorial "Multi-Fidelity BO with Discrete Fidelities using KG" to a normalized Currin function with an extra fidelity. I thought it was working so far until I increased the number of initial points to 40 low-fidelity and 20 high-fidelity as part of the study. Now I get the error:
raise NotPSDError(
gpytorch.utils.errors.NotPSDError: Matrix not positive definite after repeatedly adding jitter up to 1.0e-06. Original error on first attempt: cholesky_cpu: For batch 199: U(2,2) is zero, singular U.
This happens when I call the function :
To reproduce
X_init = gen_one_shot_kg_initial_conditions(
acq_function = mfkg_acqf,
bounds=bounds,
q=4,
num_restarts=10,
raw_samples=512,
)
The Currin function is defined here:
To reproduce
from __future__ import annotations
import math
import torch
from botorch.test_functions.synthetic import SyntheticTestFunction
from torch import Tensor
from typing import Optional
import pandas as pd
import numpy as np
class MultifidelityCurrin(SyntheticTestFunction):
def __init__(self, noise_std: Optional[float] = None, negate: bool = False, normalize=True, normalize_coef=14.0) -> None:
self.dim = 3
self._bounds = [(0.0, 1.0), (0.0, 1.0), (0.0, 1.0)]
# self._bounds = [(0.0, 1.0) for _ in range(3)]
self._optimizers = None
super(MultifidelityCurrin, self).__init__(noise_std=noise_std, negate=negate)
self.normalize = normalize
self.normalize_coef = normalize_coef
self._logger = Logger()
# self._logger.write("Succesfully initialized TwoDimensionalAlpacaCrystalGrowth test function")
def target_fidelities(self):
return {self.dim - 1: 1.0}
def get_low_fidelity(self, x1, x2):
term1 = self.get_high_fidelity(x1 + 0.05, x2 + 0.05) + \
self.get_high_fidelity(x1 + 0.05, np.maximum(0, x2 - 0.05))
term2 = self.get_high_fidelity(x1 - 0.05, x2 + 0.05) + \
self.get_high_fidelity(x1 - 0.05, np.maximum(0, x2 - 0.05))
y = 0.25 * (term1 + term2)
if self.normalize:
y = y / self.normalize_coef
return y
def get_high_fidelity(self, x1, x2):
term1 = 1 - np.exp(-(1. / (2 * x2 + 1e-6)))
term2 = 2300 * x1 ** 3 + 1900 * x1 ** 2 + 2092 * x1 + 60
term3 = 100 * x1 ** 3 + 500 * x1 ** 2 + 4 * x1 + 20
y = term1 * term2 / term3
if self.normalize:
y = y / self.normalize_coef
return y
def evaluate_single_batch(self, X_single: Tensor) -> Tensor:
outputs = []
for i in range(X_single.shape[0]):
fidelity = X_single[i][2]
x1 = X_single[i][0]
x2 = X_single[i][1]
if fidelity == 0.0:
outputs.append(self.get_low_fidelity(x1, x2))
elif fidelity == 1.0:
outputs.append(self.get_high_fidelity(x1, x2))
else:
print("Fidelity level not implemented !")
result = torch.stack(outputs, 0)
return result
def evaluate_true(self, X: Tensor) -> Tensor:
r"""Evaluate the function (w/o observation noise) on a set of points."""
# return np.absolute(0.4 * X[0]**2 - 0.2)
return self.evaluate_single_batch(X_single=X)
def get_bounds(self, **tkwargs) -> Tensor:
return torch.tensor([[0.0, 0.0, 0.], [0.1, 1., 1.]])
def get_target_fidelities(self):
return {self.dim: 1.0}
Is it a normal behavior or is the function somehow ill defined?
Is it a normal behavior or is the function somehow ill defined?
It seems like you're running into some numerical issues b/c of very ill-conditioned kernel matrices. This isn't too surprising b/c gen_one_shot_kg_initial_conditions under the hood will evaluate 512 of q=4 batches, and in 3 dimensions there is quite a high likelihood that some of the randomly generated points in that heuristic will end up very close to each other.
You could try to reduce the raw_samples to make this less likely. Or you could instead use sequential greedy generation of the q-batch (in which case q=1 in gen_one_shot_kg_initial_conditions), which should get rid of the problem of evaluating the joint posterior of two very similar points, which according to my guess is what is causing this here. To be sure, do you have a fully reproable example (incl. the optimization loop and the full stack trace)? Thanks!
Thank you, I wasnt aware of this problem. But I actually noticed that this happens only when I standardize the Y-training data, which I need to do on my real case. Here is the code: ##To reproduce:
import os
import torch
from botorch.models.transforms import Standardize
from botorch.utils import standardize
from gpinformationfusion.test_functions.currin_augmented import MultifidelityCurrin #Replace this with the function above
from botorch import fit_gpytorch_model
from botorch.models.cost import AffineFidelityCostModel
from botorch.acquisition.cost_aware import InverseCostWeightedUtility
from botorch.acquisition import PosteriorMean
from botorch.acquisition.knowledge_gradient import qMultiFidelityKnowledgeGradient
from botorch.acquisition.fixed_feature import FixedFeatureAcquisitionFunction
from botorch.optim.optimize import optimize_acqf
from botorch.acquisition.utils import project_to_target_fidelity
from botorch.optim.initializers import gen_one_shot_kg_initial_conditions
from botorch.optim.optimize import optimize_acqf_mixed
from botorch.models.gp_regression_fidelity import SingleTaskMultiFidelityGP
from gpytorch.mlls.exact_marginal_log_likelihood import ExactMarginalLogLikelihood
tkwargs = {
"dtype": torch.double,
"device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
}
SMOKE_TEST = os.environ.get("SMOKE_TEST")
problem = MultifidelityCurrin(negate=False).to(**tkwargs)
fidelities = torch.tensor([0.0, 1.0], **tkwargs)
def generate_data(objective, n_high, n_low):
x_high = torch.rand(n_high, 2, **tkwargs)
x_low = torch.rand(n_low, 2, **tkwargs)
train_x = torch.cat((x_high, x_low), dim=0)
train_f_high = torch.ones(n_high, 1)
train_f_low = torch.zeros(n_low, 1)
train_x_full = torch.cat((train_x, torch.cat((train_f_high, train_f_low), dim=0)), dim=1)
train_obj = objective(train_x_full).unsqueeze(-1)
return train_x_full, train_obj
def initialize_model(train_x, train_obj):
# define a surrogate model suited for a "training data"-like fidelity parameter
# in dimension 6, as in [2]
model = SingleTaskMultiFidelityGP(
train_x,
#train_obj,
standardize(train_obj),
#outcome_transform=Standardize(m=1),
data_fidelity=2
)
mll = ExactMarginalLogLikelihood(model.likelihood, model)
return mll, model
bounds = torch.tensor([[0.0] * problem.dim, [1.0] * problem.dim], **tkwargs)
target_fidelities = {3: 1.0}
cost_model = AffineFidelityCostModel(fidelity_weights={2: 1.0}, fixed_cost=5.0)
cost_aware_utility = InverseCostWeightedUtility(cost_model=cost_model)
def project(X):
return project_to_target_fidelity(X=X, target_fidelities=target_fidelities)
def get_mfkg(model):
curr_val_acqf = FixedFeatureAcquisitionFunction(
acq_function=PosteriorMean(model),
d=3,
columns=[2],
values=[1],
)
_, current_value = optimize_acqf(
acq_function=curr_val_acqf,
bounds=bounds[:, :-1],
q=1,
num_restarts=10 ,
raw_samples=1024,
options={"batch_limit": 10, "maxiter": 200},
)
return qMultiFidelityKnowledgeGradient(
model=model,
num_fantasies=128 if not SMOKE_TEST else 2,
current_value=current_value,
cost_aware_utility=cost_aware_utility,
project=project,
)
torch.set_printoptions(precision=5, sci_mode=False)
NUM_RESTARTS = 10
RAW_SAMPLES = 512
def optimize_mfkg_and_get_observation(mfkg_acqf):
"""Optimizes MFKG and returns a new candidate, observation, and cost."""
X_init = gen_one_shot_kg_initial_conditions(
acq_function=mfkg_acqf,
bounds=bounds,
q=2,
num_restarts=10,
raw_samples=512,
)
candidates, _ = optimize_acqf_mixed(
acq_function=mfkg_acqf,
bounds=bounds,
fixed_features_list=[{2: 0.0}, {2: 1.0}],
q=2,
num_restarts=NUM_RESTARTS,
raw_samples=RAW_SAMPLES,
batch_initial_conditions=X_init,
options={"batch_limit": 5, "maxiter": 200},
)
# observe new values
cost = cost_model(candidates).sum()
new_x = candidates.detach()
new_obj = problem(new_x).unsqueeze(-1)
return new_x, new_obj, cost
def get_recommendation(model):
rec_acqf = FixedFeatureAcquisitionFunction(
acq_function=PosteriorMean(model),
d=3,
columns=[2],
values=[1],
)
final_rec, _ = optimize_acqf(
acq_function=rec_acqf,
bounds=bounds[:, :-1],
q=1,
num_restarts=10,
raw_samples=512,
options={"batch_limit": 5, "maxiter": 200},
)
final_rec = rec_acqf._construct_X_full(final_rec)
objective_value = problem(final_rec)
print(f"recommended point:\n{final_rec}\n\nobjective value:\n{objective_value}")
return final_rec
train_x, train_obj = generate_data(problem, n_high=10, n_low=30)
cumulative_cost = 0.0
N_ITER = 20
for _ in range(N_ITER):
mll, model = initialize_model(train_x, train_obj)
mfkg_acqf = get_mfkg(model)
fit_gpytorch_model(mll)
#mfkg_acqf = get_mfkg(model)
new_x, new_obj, cost = optimize_mfkg_and_get_observation(mfkg_acqf)
print("new observed points are: ", new_x)
train_x = torch.cat([train_x, new_x])
train_obj = torch.cat([train_obj, new_obj])
cumulative_cost += cost
final_rec = get_recommendation(model)
print(f"\ntotal cost: {cumulative_cost}\n")
You can try with both. For me, this works only when I don't standardize the data (in the model definition). But even when it works, it doesn't improve above 0.9856..
Closing as inactive.