Issue description

I'm trying to implement multi-objective parallel bayesian optimization with constraints according to the tutorial. However, the generated queries are not respecting the constraint.

So I would mainly need some input with regards to whether or not my implementation of this constraint is OK.

Thanks!

Code example

#for git

import os import torch import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn import gaussian_process from sklearn.gaussian_process.kernels import Matern, WhiteKernel, ConstantKernel, DotProduct, RBF from botorch.utils.multi_objective.hypervolume import Hypervolume from botorch.utils.multi_objective.pareto import is_non_dominated from botorch import fit_gpytorch_model

#some script parameters BATCH_SIZE = 2 NUM_RESTARTS = 2 RAW_SAMPLES = 4 dim = 2 N_BATCH = 5 MC_SAMPLES = 16 N_SOBOL = 6

#here I construct my 'true' objective functions from data masterDataGS = pd.read_csv("parameterStudyResultsGS.txt", encoding= 'unicode_escape') masterDataGS.rename(columns={masterDataGS.columns[1]: 'Unit cell width (µm)', masterDataGS.columns[2]: 'Cylinder diameter (µm)'}, inplace=True) parameters = masterDataGS.columns[1:3] responses = masterDataGS.columns[3:] Perm_s = masterDataGS[responses[2]]*100000000000 masterDataGS['Perm_s'] = Perm_s responses = masterDataGS.columns[3:]

X = masterDataGS[[parameters[0],parameters[1]]].values Y = masterDataGS[responses[1]].values print(responses[1]) kernel = RBF(length_scale=np.ones(X.shape[1])) gpSVtrue = gaussian_process.GaussianProcessRegressor(kernel, normalize_y = True, n_restarts_optimizer=1000) gpSVtrue = gpSVtrue.fit(X,Y) print(gpSVtrue.kernel_) print('MAPE: ' + str(np.mean(np.divide((np.abs(gpSVtrue.predict(X)-Y)),Y))))

X = masterDataGS[[parameters[0],parameters[1]]].values Y = masterDataGS[responses[-1]].values print(responses[-1]) kernel = RBF(length_scale=np.ones(X.shape[1])) gpPermtrue = gaussian_process.GaussianProcessRegressor(kernel, normalize_y = False, n_restarts_optimizer=1000) gpPermtrue = gpPermtrue.fit(X,Y) print(gpPermtrue.kernel_) print('MAPE: ' + str(np.mean(np.divide((np.abs(gpPermtrue.predict(X)-Y)),Y))))

def evaluateGSGP(x): objpred = np.zeros(x.shape) objpred[:,0] = gpPermtrue.predict(x) objpred[:,1] = gpSVtrue.predict(x) objpred = torch.from_numpy(objpred) return objpred

#define a constraint, and some bounds def evaluateCons1(x): conspred = x[:,0]*0.25+50-x[:,1] conspred = conspred.unsqueeze(1) return conspred bounds = np.array([[400,150],[1000,700]])

#define a reference point refp = torch.from_numpy(np.array([0,0])).float()

#function to that only returns feasible sobol samples def generateSobolSamples(N): #generate feasible sobol samples found = 0 while found < N: train_x_temp = draw_sobol_samples(torch.from_numpy(bounds).type(torch.FloatTensor),n=128, q=1).squeeze(1) train_x_temp_check = evaluateCons1(train_x_temp) is_feas_sobol = (train_x_temp_check <= 0).all(dim=-1) train_x_temp = train_x_temp[is_feas_sobol] found = train_x_temp.shape[0] train_x = train_x_temp[:N,:] return train_x

standard_bounds = torch.zeros(2, dim) standard_bounds[1] = 1

hv = Hypervolume(ref_point=refp) hvs = []

#generate initial data and fit model train_x = generateSobolSamples(N_SOBOL) train_obj = evaluateGSGP(train_x) train_con = evaluateCons1(train_x)

train_x_norm = normalize(train_x, bounds) train_y = torch.cat([train_obj, train_con], dim=-1)

models = [] for i in range(train_y.shape[-1]): models.append( SingleTaskGP(train_x_norm, train_y[..., i : i + 1], outcome_transform=Standardize(m=1)) ) model = ModelListGP(*models) mll = SumMarginalLogLikelihood(model.likelihood, model)

compute pareto front

is_feas = (train_con <= 0).all(dim=-1) feas_train_obj = train_obj[is_feas] if feas_train_obj.shape[0] > 0: pareto_mask = is_non_dominated(feas_train_obj) pareto_y = feas_train_obj[pareto_mask] # compute hypervolume volume = hv.compute(pareto_y) else: volume = 0.0 hvs.append(volume)

for i in range(10): #BO loop print(i)

fit_gpytorch_model(mll)
sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES)

acq_func = qNoisyExpectedHypervolumeImprovement(
    model=model,
    ref_point=refp,  # use known reference point
    X_baseline=train_x_norm,
    sampler=sampler,
    prune_baseline=True,
    # define an objective that specifies which outcomes are the objectives
    objective=IdentityMCMultiOutputObjective(outcomes=[0, 1]),
    # specify that the constraint is on the last outcome
    constraints=[lambda Z: Z[..., -1]],
)

# optimize
candidates, _ = optimize_acqf(
    acq_function=acq_func,
    bounds=standard_bounds,
    q=BATCH_SIZE,
    num_restarts=NUM_RESTARTS,
    raw_samples=RAW_SAMPLES,  # used for intialization heuristic
    options={"batch_limit": 5, "maxiter": 200},
    sequential=True,
)

# observe new values 
new_x = unnormalize(candidates.detach(), bounds=bounds)
new_obj = evaluateGSGP(new_x)
new_con = evaluateCons1(new_x)

# update dataset
train_x = torch.cat([train_x, new_x])
train_obj = torch.cat([train_obj, new_obj])
train_con = torch.cat([train_con, new_con])

# compute pareto front
is_feas = (train_con <= 0).all(dim=-1)
feas_train_obj = train_obj[is_feas]
if feas_train_obj.shape[0] > 0:
    pareto_mask = is_non_dominated(feas_train_obj)
    pareto_y = feas_train_obj[pareto_mask]
    # compute hypervolume
    volume = hv.compute(pareto_y)
else:
    volume = 0.0
hvs.append(volume)

#fit models
train_x_norm = normalize(train_x, bounds)
train_y = torch.cat([train_obj, train_con], dim=-1)
models = []
for i in range(train_y.shape[-1]):
    models.append(
        SingleTaskGP(train_x_norm, train_y[..., i : i + 1], outcome_transform=Standardize(m=1))
    )
model = ModelListGP(*models)
mll = SumMarginalLogLikelihood(model.likelihood, model)

#make a scatter plot with the constraint in red, sobol points in blue, and qnehvi queries in orange fig,ax=plt.subplots(1,1, figsize = [7.5, 4.8]) ax.set_xlim([400,1000]) ax.set_ylim([150,700]) plt.plot([w1,w2],[d1b,d2b],c='r',label='Print constraint') plt.scatter(train_x[:6,0],train_x[:6,1]) plt.scatter(train_x[6:,0],train_x[6:,1])

Jul 27 '22 11:07 EvanClaes

Hi @EvanClaes. I cannot run your code since it requires a file read in pd.read_csv("parameterStudyResultsGS.txt", encoding= 'unicode_escape'). So, here are some comments based on the code only:

constraints=[lambda Z: Z[..., -1]], specifies that a design is feasible if the last observation is negative. So, it's the correct way of using the constraints, assuming that negative values imply feasibility.
qNEHVI can produce infeasible candidates. It does not know what the true feasibility is like, it just goes by the model predictions. To see if it expected the candidate to be feasible (which it would, otherwise the expected HVI would be 0) you can check the model predictions at new_x (of the model that was used in the acquisition function).
In constrained optimization, the best solutions are often found at the boundary of the feasible space, so the acquisition functions may often recommend infeasible candidates until they have a reliable model of the constraints.

Jul 27 '22 18:07 saitcakmak

Hello @saitcakmak,

Thank you so much for you reply.

If qNEHVI (and then I assume EHVI, qEHVI, NEHVI as well) can produce infeasible candidates, how is this typically dealt with in practice, assuming that it is actually impossible to evaluate the objectives for these candidates (e.g. in the case of physical experiments)? Is it best to drop them from the candidate set, and evaluate only the feasible ones?

On the other hand, the constraint value can be evaluated, even for infeasible candidates. Would it maybe be possible to, before starting the BO procedure, train a model separately just for the constraint on an extensive ' grid dataset' of candidates so that this constraint model is already quite accurate?

Have a good day!

Jul 27 '22 18:07 EvanClaes

In the settings we've considered, the infeasible candidates are generally possible to evaluate (and we do not know that they are infeasible without evaluating them) but they are infeasible due to violating some hard limitation on the constraint metric. The example in the notebook mimics such a scenario.

In your case, it sounds like you know whether a design is feasible or not before evaluating it. So, the best approach would be to share this information with the acquisition function, so that it would only produce feasible candidates (*feasibility is approximated via sigmoid for differentiability, so infeasible candidates may still get produced, though this would be unlikely).

If the feasibility is a known cheap-to-evaluate function, you can wrap it in a DeterministicModel and combine it with the objectives in a ModelList. This way, whenever the acquisition function tries to evaluate the model predictions, it would directly evaluate the known feasibility function.

Would it maybe be possible to, before starting the BO procedure, train a model separately just for the constraint on an extensive ' grid dataset' of candidates so that this constraint model is already quite accurate?

This would be the next option if the constraint is not cheap enough to be repeatedly evaluated but is cheap enough to evaluate on a grid etc. You can fit a GP on the constraint and combine it with the models of the objectives in the ModelListGP. Having a more accurate model of the constraint would help reduce the number of infeasible candidates that get generated.

Jul 27 '22 18:07 saitcakmak

To add to this, if you have a (cheap to evaluate) function that can tell you upfront whether a design is feasible to evaluate or not, then adding that as a constraint to the acquisition function optimization problem (via these args) would be the most direct solution.

Jul 27 '22 22:07 Balandat

@saitcakmak @Balandat, thanks. Really appreciate your input. Will try the deterministic approach first.

Have a good day!

Jul 28 '22 07:07 EvanClaes

So I tried max's proposal. The code is complaining that I didn't provide 'batch_initial_conditions', although I did. Maybe in the wrong format?

# optimize candidates, _ = optimize_acqf( acq_function=acq_func, bounds=standard_bounds, q=BATCH_SIZE, num_restarts=NUM_RESTARTS, raw_samples=RAW_SAMPLES, # used for intialization heuristic options={"batch_limit": 5, "maxiter": 200}, sequential=True, batch_initial_conditions=torch.from_numpy(np.random.rand(4,2)), nonlinear_inequality_constraints=evaluateCons1, )

Error message

NotImplementedError Traceback (most recent call last) ~\AppData\Local\Temp/ipykernel_7900/3509009059.py in 30 31 # optimize ---> 32 candidates, _ = optimize_acqf( 33 acq_function=acq_func, 34 bounds=standard_bounds,

~\anaconda3\lib\site-packages\botorch\optim\optimize.py in optimize_acqf(acq_function, bounds, q, num_restarts, raw_samples, options, inequality_constraints, equality_constraints, nonlinear_inequality_constraints, fixed_features, post_processing_func, batch_initial_conditions, return_best_only, sequential, **kwargs) 138 base_X_pending = acq_function.X_pending 139 for i in range(q): --> 140 candidate, acq_value = optimize_acqf( 141 acq_function=acq_function, 142 bounds=bounds,

~\anaconda3\lib\site-packages\botorch\optim\optimize.py in optimize_acqf(acq_function, bounds, q, num_restarts, raw_samples, options, inequality_constraints, equality_constraints, nonlinear_inequality_constraints, fixed_features, post_processing_func, batch_initial_conditions, return_best_only, sequential, **kwargs) 183 if batch_initial_conditions is None: 184 if nonlinear_inequality_constraints: --> 185 raise NotImplementedError( 186 "batch_initial_conditions must be given if there are non-linear " 187 "inequality constraints."

NotImplementedError: batch_initial_conditions must be given if there are non-linear inequality constraints.

Jul 28 '22 09:07 EvanClaes

Also tried Siat's suggestion, in which case I get the following:

models = [] for i in range(train_y.shape[-1]): models.append( SingleTaskGP(train_x, train_y[..., i : i + 1], outcome_transform=Standardize(m=1)) ) models.append(GenericDeterministicModel(evaluateCons1)) model = ModelListGP(*models)

Error message

ValueError Traceback (most recent call last) ~\AppData\Local\Temp/ipykernel_7900/2820781124.py in 5 ) 6 models.append(GenericDeterministicModel(evaluateCons1)) ----> 7 model = ModelListGP(*models)

~\anaconda3\lib\site-packages\botorch\models\model_list_gp_regression.py in init(self, *gp_models) 47 >>> model = ModelListGP(model1, model2) 48 """ ---> 49 super().init(*gp_models) 50 51 def condition_on_observations(

~\anaconda3\lib\site-packages\gpytorch\models\model_list.py in init(self, *models) 31 for m in models: 32 if not hasattr(m, "likelihood"): ---> 33 raise ValueError( 34 "IndependentModelList currently only supports models that have a likelihood (e.g. ExactGPs)" 35 )

ValueError: IndependentModelList currently only supports models that have a likelihood (e.g. ExactGPs)

Jul 28 '22 11:07 EvanClaes

The code is complaining that I didn't provide 'batch_initial_conditions', although I did. Maybe in the wrong format?

Hmm here BATCH_SIZE is > 1 right? Looking at the optimize_acqf code I think there may be a bug with that, it's using sequential greedy optimization by default but is not passing in batch_initial_conditions to the successive internal calls: https://github.com/pytorch/botorch/blob/main/botorch/optim/optimize.py#L164

This is a hard problem; essentially we'll need some way of automatically generating feasible initial conditions, which is hard to do for general nonlinear constraints (rejection sampling is one approach but that won't scale if the size of the feasible set is small relative to the bounding box). In your case, would you be able to provide such a function that could generate initial conditions for optimization that we could pass into and call inside optimize_acqf?

Jul 28 '22 15:07 Balandat

Hello @Balandat, again thanks so much for your reply. Really love how much you guys are committing to (leading?) the open source community around BO.

For the problems I'm faced with so far, the feasible set is relatively large compared to the bounding box, so rejection sampling is a viable option I think. At the moment I'm doing the same for generating the sobol initial samples, dropping infeasible ones. Definitely won't be the bottleneck time-wise I suspect?

Is there an example in the documentation (or even better, in a tutorial?) of how I would need to call this function in optimize_acqf?

Jul 28 '22 18:07 EvanClaes

Is there an example in the documentation (or even better, in a tutorial?) of how I would need to call this function in optimize_acqf?

So there is no canned code that will do that for you out of the box at this point and it would require changing optimize_acqf, but I can put together an example PR for how to do that.

Jul 29 '22 00:07 Balandat

I just pushed https://github.com/Balandat/botorch/commit/fedd2e7c2ca4af6d16f0dafab2983835a62225c3 which allows you to use a ic_gen kwarg to optimize_acqf that allows you to implement your own custom initial condition generator. The API of that will need to comply with that of gen_batch_initial_conditions.

If you can implement that using rejection sampling (or any other approach really) then this will hopefully unblock you.

Jul 29 '22 00:07 Balandat

@Balandat thanks for this. Will try it out.

I'm curious if it would also be possible to do the sequential greedy optimization outside of optimize_acqf? If I execute it for q = 1 and then sample the GP for this candidate, add it do the data, until I reach the batch size that I want?

Jul 29 '22 08:07 EvanClaes

Yeah this would be an option as well. But you'll probably want to sample more than just once and then integrate the model predictions over those samples.

Jul 29 '22 13:07 Balandat

@EvanClaes Hi，Have you handled it? Recently i ran the same problem with you.The generated queries are not respecting the constraint. So how to solve this problem. Thank you ,Have a nice day!

Sep 23 '22 04:09 1bing2

constraints=[lambda Z: Z[..., -1]],

Hi, I just started using these acquisition functions, I am wondering if my question has 5 objectives and 2 constraints, how to specify constraint here, should it be written as constraints=[lambda Z: Z[..., 5:]]? it seems this doesn't work, could you please help me with this? Thanks a lot.

Dec 26 '22 01:12 open2333

Constraints are scalar-valued, You'll want to specify multiple scalar objectives rather than specifying a vector-valued constraint. Something like

constraints=[
    lambda Z: Z[..., 5],
    lambda Z: Z[..., 6],
]

assuming indices 0-4 are your 5 objectives.

Note also that 5 objectives is pushing things to the limit in terms of scalability, at least when you're using a hypervolume-based acquisition function.

Dec 26 '22 20:12 Balandat

Closing as answered / duplicate of #1358

Jan 30 '23 17:01 esantorella

botorch
botorch copied to clipboard

Batch queries not respecting constraints in multi-objective setting

Issue description

Code example

compute pareto front

botorch botorch copied to clipboard

Batch queries not respecting constraints in multi-objective setting

Issue description

Code example

compute pareto front

botorch
botorch copied to clipboard