darts
darts copied to clipboard
[BUG] TFM using optuna multiple jobs error AttributeError: _model_call
Hi
Im trying to run deep learning optimisation using optuna. It works fine if I have n_trails=1 however if I increase that number to say 2 I get a error AttributeError: _model_call. I have enough cpus. The full error is shown below. If you want the code thats fine but will take be abit to seperate it all out so hoping for a easy fix.
Traceback (most recent call last): File "/opt/conda/lib/python3.10/concurrent/futures/process.py", line 246, in _process_worker r = call_item.fn(*call_item.args, **call_item.kwargs) File "/opt/conda/lib/python3.10/concurrent/futures/process.py", line 205, in _process_chunk return [fn(*args) for args in chunk] File "/opt/conda/lib/python3.10/concurrent/futures/process.py", line 205, in
return [fn(*args) for args in chunk] File "/opt/conda/lib/python3.10/site-packages/VeryForecast/VeryForcasting.py", line 572, in OptimiseDataframeParallel_wrapper best_model_df, final, df_models = self.OptimiseSeries( File "/opt/conda/lib/python3.10/site-packages/VeryForecast/VeryForcasting.py", line 348, in OptimiseSeries DeepLearningModels( File "/opt/conda/lib/python3.10/site-packages/VeryForecast/ForcastingModels.py", line 1468, in DeepLearningModels brnn_params = optimize_model(BlockRNNModelOptimise, "block_rnn", df, future_rts_df,past_rts_df,all_past_rts, forecast_horizon, backtest, stride[1], metric_list, metric, cpus,brrn_hparams) File "/opt/conda/lib/python3.10/site-packages/VeryForecast/ForcastingHelper.py", line 520, in optimize_model study.optimize(objective, n_trials=n_trials,n_jobs=2) File "/opt/conda/lib/python3.10/site-packages/optuna/study/study.py", line 451, in optimize _optimize( File "/opt/conda/lib/python3.10/site-packages/optuna/study/_optimize.py", line 103, in _optimize f.result() File "/opt/conda/lib/python3.10/concurrent/futures/_base.py", line 451, in result return self.__get_result() File "/opt/conda/lib/python3.10/concurrent/futures/_base.py", line 403, in __get_result raise self._exception File "/opt/conda/lib/python3.10/concurrent/futures/thread.py", line 58, in run result = self.fn(*self.args, **self.kwargs) File "/opt/conda/lib/python3.10/site-packages/optuna/study/_optimize.py", line 163, in _optimize_sequential frozen_trial = _run_trial(study, func, catch) File "/opt/conda/lib/python3.10/site-packages/optuna/study/_optimize.py", line 251, in _run_trial raise func_err File "/opt/conda/lib/python3.10/site-packages/optuna/study/_optimize.py", line 200, in _run_trial value_or_values = func(trial) File "/opt/conda/lib/python3.10/site-packages/VeryForecast/DeepLearningOptuna.py", line 276, in call model = BlockRNNModel( File "/opt/conda/lib/python3.10/site-packages/darts/models/forecasting/forecasting_model.py", line 107, in call return super().call(**all_params) File "/opt/conda/lib/python3.10/site-packages/darts/models/forecasting/block_rnn_model.py", line 409, in init super().init(**self._extract_torch_model_params(**self.model_params)) File "/opt/conda/lib/python3.10/site-packages/darts/utils/torch.py", line 112, in decorator return decorated(self, *args, **kwargs) File "/opt/conda/lib/python3.10/site-packages/darts/models/forecasting/torch_forecasting_model.py", line 293, in init super().init(add_encoders=add_encoders) File "/opt/conda/lib/python3.10/site-packages/darts/models/forecasting/forecasting_model.py", line 2160, in init super().init(add_encoders=add_encoders) File "/opt/conda/lib/python3.10/site-packages/darts/models/forecasting/forecasting_model.py", line 135, in init self._model_params = self._extract_model_creation_params() File "/opt/conda/lib/python3.10/site-packages/darts/models/forecasting/forecasting_model.py", line 1873, in _extract_model_creation_params del self.class._model_call AttributeError: _model_call
Hi @AndrewJGroves, a minimal reproducible example would indeed be nice to debug. Could you try to provide one?
Of course, this is a example. As soon as I change n_jobs to be more than 1 I get the error. Im not using a gpu system. Interestingly I get the same error if I run prophet this way but don't seem to see it with regression models, all the classes are based on the same principle with very little changes. My pytourch-lightning version is 2.1.2 and torch is 2.2.0
#!pip install darts "optuna<=3.4.0"
from darts import TimeSeries
from darts.metrics import rmse
from darts.models import BlockRNNModel
from darts.utils.model_selection import train_test_split
import optuna
import torch
from darts.datasets import WeatherDataset
from optuna.integration import PyTorchLightningPruningCallback
class BlockRNNModelOptimise(object):
def __init__(
self,
df,
all_past_rts,
forecast_horizon,
):
self.df = df
self.all_past_rts = all_past_rts
self.forecast_horizon = forecast_horizon
def __call__(self, trial):
if torch.cuda.is_available():
pl_trainer_kwargs = {
"accelerator":"gpu",
"devices":"auto",
"callbacks": [PyTorchLightningPruningCallback(trial, monitor="train_loss")],
"enable_progress_bar":True
}
num_workers = 4
else:
pl_trainer_kwargs = {
"accelerator": "cpu",
"devices": 1,
"callbacks": [PyTorchLightningPruningCallback(trial, monitor="train_loss")],
"enable_progress_bar":True
}
num_workers = 0
input_chunk_length = trial.suggest_int("input_chunk_length", 1, 5)
output_chunk_length = trial.suggest_int("output_chunk_length",1, 1)
model = trial.suggest_categorical("model", ["RNN", "LSTM", "GRU"])
hidden_dim = trial.suggest_int("hidden_dim", 25, 30)
n_rnn_layers = trial.suggest_int("n_rnn_layers", 1, 3)
lr = trial.suggest_float("lr", 0.005, 0.01)
train, test = train_test_split(
self.df,
test_size=0.20,
input_size=input_chunk_length,
horizon=self.forecast_horizon,
vertical_split_type="model-aware",
)
model = BlockRNNModel(
n_epochs=10,
random_state=1,
input_chunk_length=input_chunk_length,
output_chunk_length=output_chunk_length,
model=model,
hidden_dim=hidden_dim,
n_rnn_layers=n_rnn_layers,
optimizer_kwargs={"lr": lr},
pl_trainer_kwargs=pl_trainer_kwargs,
)
model.fit(
train,
val_series=test,
past_covariates=self.all_past_rts,
val_past_covariates=self.all_past_rts,
num_loader_workers=num_workers,
verbose=False,
)
backtesting = model.backtest(
self.df,
past_covariates=self.all_past_rts,
start=0.8,
forecast_horizon=self.forecast_horizon,
stride=4,
metric=rmse,
)
return backtesting
series = WeatherDataset().load()
# predicting atmospheric pressure
df = series['p (mbar)'][:100]
# optionally, use past observed rainfall (pretending to be unknown beyond index 100)
all_past_rts = series['rain (mm)'][:100]
forecast_horizon = 6
storage = optuna.storages.RDBStorage(
url="sqlite:///example.db",
engine_kwargs={"connect_args": {"check_same_thread": False}},
)
objective = BlockRNNModelOptimise(df,all_past_rts, forecast_horizon)
study = optuna.create_study(
direction="minimize",
storage=storage,
sampler=optuna.samplers.TPESampler(),
pruner=optuna.pruners.MedianPruner()
)
study.optimize(objective, n_trials=5,n_jobs=2) #when n_jobs = 1 it works
brnn_params = study.best_params
if os.path.exists('example.db'):
os.remove('example.db')
storage.remove_session()
pl_trainer_kwargs = {
"accelerator": "cpu",
"devices": 1,
"enable_progress_bar":True
}
num_workers = 0
model = BlockRNNModel(
optimizer_kwargs={"lr": brnn_params.pop("lr",None)},
**brnn_params,
n_epochs=100,
random_state=1,
pl_trainer_kwargs=pl_trainer_kwargs,
)
model.fit(
df,
past_covariates=all_past_rts,
num_loader_workers=0,
verbose=False,
)
backtesting = model.backtest(
df,
past_covariates=all_past_rts,
start=0.8,
forecast_horizon=forecast_horizon,
stride=4,
metric=rmse
)
print(backtesting)
Hi,
I investigated a bit and it appears that the line responsible for the bug is when n_jobs > 1
:
https://github.com/unit8co/darts/blob/46004539b929176396310a9550457b6a14571c74/darts/models/forecasting/forecasting_model.py#L1873C1-L1873C39
However, when the line is deleted, the model in some trials do not correspond to the desired/provided parameters (reading from the class attribute from another trial?!). Fixing this would probably require some refactoring of the ModelMeta
class.
Thanks for looking, sounds like its a hard fix. So your aware I dont get this error when running FourTheta, liner regression and lgboost but I get the same error for prophet as well as RNNModel, TransformerModel, NBEATSModel and NHiTSModel (These are the only models I have tried optuna with)