ray_lightning
ray_lightning copied to clipboard
Trails did not complete error
I wanted to do distributed hyperparameter tuning. And I am getting trials did not complete error
import lightning as pl
from ray import air, tune
from ray_lightning import RayStrategy
from ray_lightning.tune import TuneReportCallback, get_tune_resources
def main(config):
model = LighteningModel(config)
callback = TuneReportCallback(
{
"val_loss": "val_total_loss",
},
on="validation_end")
trainer = pl.Trainer(max_epochs=4, callbacks=[callbacks],
strategy=RayStrategy(num_workers=1, use_gpu=False))
trainer.fit(model, train_dataloader, val_dataloader)
def train():
search_space = {
'lr': tune.choice([1e-2, 1e-3, 1e-4])
}
num_samples=1
tuner = tune.Tuner(
tune.with_resources(
main,
get_tune_resources(num_workers=1, use_gpu=False),
),
param_space=search_space,
)
results = tuner.fit()
I am following these two links.
- https://docs.ray.io/en/latest/ray-more-libs/using-ray-with-pytorch-lightning.html#distributed-hyperparameter-optimization-with-ray-tune
- https://github.com/ray-project/ray_lightning