keras-tuner
keras-tuner copied to clipboard
Tuner.on_epoch_end fails to save the model.
Tuner.on_epoch_end
fails to save the model with error:
Traceback (most recent call last):
File "./test.py", line 355, in <module>
main()
File "./test.py", line 352, in main
tune(options)
File "./test.py", line 333, in tune
tuner.search()
File "/home/max/python-quant/lib/python3.6/site-packages/kerastuner/engine/base_tuner.py", line 131, in search
self.run_trial(trial, *fit_args, **fit_kwargs)
File "./test.py", line 296, in run_trial
self.on_epoch_end(trial, model, epoch, logs={'Alpha': alpha, 'Beta': beta})
File "/home/max/python/lib/python3.6/site-packages/kerastuner/engine/tuner.py", line 247, in on_epoch_end
self.save_model(trial.trial_id, model, step=epoch)
File "/home/max/python/lib/python3.6/site-packages/kerastuner/engine/tuner.py", line 186, in save_model
trial_id).metrics.get_best_step(self.oracle.objective.name)
File "/home/max/python/lib/python3.6/site-packages/kerastuner/engine/metrics_tracking.py", line 214, in get_best_step
self._assert_exists(name)
File "/home/max/python/lib/python3.6/site-packages/kerastuner/engine/metrics_tracking.py", line 258, in _assert_exists
raise ValueError('Unknown metric: %s' % (name,))
ValueError: Unknown metric: Alpha
The cause of the error is Tuner.on_epoch_end
doing:
self.save_model(trial.trial_id, model, step=epoch)
# Report intermediate metrics to the `Oracle`.
status = self.oracle.update_trial(trial.trial_id, metrics=logs, step=epoch)
Instead, it should invoke update_trial
before save_model
:
# Report intermediate metrics to the `Oracle`.
status = self.oracle.update_trial(trial.trial_id, metrics=logs, step=epoch)
self.save_model(trial.trial_id, model, step=epoch)
Would you paste your entire script?
I think it is related to the metrics you use while you call compile
or the objective
you use when you call search
.
Would you paste your entire script? I think it is related to the metrics you use while you call
compile
or theobjective
you use when you callsearch
.
I can post redacted code, which should give you an idea.
The model implements custom train and test step functions which calculate multiple metrics. Because of that I do not specify any metrics to model.compile
or tuner.search
. Should I?
class Model(keras.Model):
def _step(self, data, training):
# ...
# Update metrics.
self.beta_tracker.update_state(beta)
self.alpha_tracker.update_state(alpha)
return {"Alpha": self.alpha_tracker.result(),
"Beta": self.beta_tracker.result()}
def train_step(self, data):
return self._step(data, training=True)
def test_step(self, data):
return self._step(data, training=False)
@property
def metrics(self):
return [self.alpha_tracker, self.beta_tracker]
def build_model(hp):
keras.backend.clear_session()
model = Model(...)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp['learning_rate']))
return model
class Tuner(kt.Tuner):
def run_trial(self, trial, log_dir):
run_dir = f"{log_dir}/{trial.trial_id}"
hp = trial.hyperparameters
if "tuner/trial_id" in hp:
past_trial = self.oracle.get_trial(hp['tuner/trial_id'])
model = self.load_model(past_trial)
else:
model = self.hypermodel.build(hp)
learning_rate = hp['learning_rate']
batch_size = hp['batch_size']
Ip = hp['Ip']
train_ds = tf.data.Dataset.from_generator(...)
val_ds = tf.data.Dataset.from_generator(...)
initial_epoch = hp['tuner/initial_epoch']
epochs = hp['tuner/epochs']
for epoch in range(initial_epoch, epochs):
self.on_epoch_begin(trial, model, epoch)
r = model.fit(x=train_ds,
validation_data=val_ds,
validation_steps=None,
shuffle=False,
initial_epoch=epoch,
epochs=epochs,
verbose=1,
callbacks=[
keras.callbacks.EarlyStopping(monitor='alpha', mode="max", patience=1, min_delta=0.0001)
])
alpha = r.history["val_Alpha"][-1]
beta = r.history["val_Beta"][-1]
logs = {'Alpha': alpha, 'Beta': beta}
self.oracle.update_trial(trial.trial_id, metrics=logs, step=epoch) # TODO: self.on_epoch_end should do that first.
self.on_epoch_end(trial, model, epoch, logs=logs)
def tune():
log_dir = "logs/" + Datetime.now().strftime("%Y%m%d-%H%M%S")
hp = kt.HyperParameters()
learning_rate = hp.Float('learning_rate', 0.01, 0.95, default=0.9)
batch_size = hp.Choice('batch_size', (32, 64, 128, 256, 512, 1024, 2048))
Ip = hp.Int('Ip', 8, 4096, default=1024)
tuner = Tuner(
oracle=kt.oracles.Hyperband(
objective=kt.Objective('Alpha', 'max'),
hyperband_iterations=10,
factor=2,
max_epochs=5,
hyperparameters=hp),
hypermodel=build_model,
directory=log_dir,
project_name="tuning"
)
tuner.search(log_dir)
best_hps = tuner.get_best_hyperparameters()[0]
print(best_hps.values)
Adding call to self.oracle.update_trial
before self.on_epoch_end
fixed ValueError
exception in the original report.
Great! Do you have a new error after that?
The idea is that you should compile the model with metrics=[Alpha]
.
So that when self.oracle.update_trial(trial.trial_id, metrics=logs, step=epoch)
, the logs is a dictionary with the key of "Alpha"
.
The idea is that you should compile the model with
metrics=[Alpha]
.
Alpha
object doesn't exist, metrics=[Alpha]
expression isn't well formed.
As I mentioned, the fix is for Tuner.on_epoch_end
to follow the end-to-end example: update_trial
first, save_model
only after that.