neuralforecast
neuralforecast copied to clipboard
All Multivariate Models (Except SOFTS) cannot be trained on multiple GPUs
What happened + What you expected to happen
I'm seeing multiple issues (all related to matrix dimensions it seems) for all multivariate models (Except HINT because I could not determine S parameter from documentation and SOFT which seems to work). This is reproducible in both standard models and Auto models.
Errors presented are not the full stack but reduced for cleanliness:
TSMixer
File "/burg/pmg/users/aec2244/mambaforge/torch/lib/python3.10/site-packages/neuralforecast/models/tsmixer.py", line 118, in forward
x = x * self.weight
RuntimeError: The size of tensor a (4) must match the size of tensor b (7) at non-singleton dimension 2
TSMixerx
File "/burg/pmg/users/aec2244/mambaforge/torch/lib/python3.10/site-packages/neuralforecast/models/tsmixerx.py", line 146, in forward
x = x * self.weight
RuntimeError: The size of tensor a (4) must match the size of tensor b (7) at non-singleton dimension 3
TimeMixer
File "/burg/pmg/users/aec2244/mambaforge/torch/lib/python3.10/site-packages/neuralforecast/models/timemixer.py", line 85, in _normalize
x = x * self.affine_weight
RuntimeError: The size of tensor a (4) must match the size of tensor b (7) at non-singleton dimension 2
StemGNN
File "/burg/pmg/users/aec2244/mambaforge/torch/lib/python3.10/site-packages/neuralforecast/models/stemgnn.py", line 339, in self_graph_attention
key = torch.matmul(input, self.weight_key)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (175x4 and 7x1)
MLPMultivariate
File "/burg/pmg/users/aec2244/mambaforge/torch/lib/python3.10/site-packages/torch/nn/modules/linear.py", line 117, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (25x384 and 672x1024)
Versions / Dependencies
neuralforecast is 1.7.4 datasetsforecast is 0.0.8 pytorch_lightning is 2.3.0 torch is 2.4.0+cu121
Reproduction script
Reproduced the error with only Nixtla related packages, including NHITS as working example
import optuna
import pandas as pd
from neuralforecast import NeuralForecast
from neuralforecast.losses.pytorch import MAE
from neuralforecast.auto import AutoStemGNN, AutoTSMixer, AutoTSMixerx, AutoNHITS,AutoSOFTS, AutoHint, AutoTimeMixer, AutoMLPMultivariate
from neuralforecast.models import NHITS
from datasetsforecast.long_horizon import LongHorizon
# Change this to your own data to try the model
Y_df, _, _ = LongHorizon.load(directory='./', group='ETTm2')
Y_df['ds'] = pd.to_datetime(Y_df['ds'])
Y_df = Y_df[["ds", "unique_id","y"]]
#need to reduce this for memory reasons on the system im using (should not affect reproduction of issue)
Y_df = Y_df[Y_df.ds <= Y_df.ds.median()]
Y_df = Y_df[Y_df.ds <= Y_df.ds.median()]
H=96
num_samples=10
num_gpus=1
nhits_default_config = AutoNHITS.get_default_config(h=H, backend="optuna")
tsmixer_default_config = AutoTSMixer.get_default_config(h=H, backend="optuna", n_series=Y_df["unique_id"].nunique())
tsmixerx_default_config = AutoTSMixerx.get_default_config(h=H, backend="optuna", n_series=Y_df["unique_id"].nunique())
stemgnn_default_config = AutoStemGNN.get_default_config(h=H, backend="optuna", n_series=Y_df["unique_id"].nunique())
mlp_default_config = AutoMLPMultivariate.get_default_config(h=H, backend="optuna", n_series=Y_df["unique_id"].nunique())
timemix_default_config = AutoTimeMixer.get_default_config(h=H, backend="optuna", n_series=Y_df["unique_id"].nunique())
#these work
soft_default_config = AutoSOFTS.get_default_config(h=H, backend="optuna", n_series=Y_df["unique_id"].nunique())
models = [AutoNHITS(h=H,
config=nhits_default_config,
gpus=num_gpus,
valid_loss=MAE(),
search_alg=optuna.samplers.TPESampler(),
backend="optuna",
num_samples=num_samples),
AutoTimeMixer(h=H,
n_series=Y_df["unique_id"].nunique(),
config=timemix_default_config,
gpus=num_gpus,
valid_loss=MAE(),
search_alg=optuna.samplers.TPESampler(),
backend='optuna',
num_samples=num_samples),
AutoSOFTS(h=H,
n_series=Y_df["unique_id"].nunique(),
config=soft_default_config,
gpus=num_gpus,
valid_loss=MAE(),
search_alg=optuna.samplers.TPESampler(),
backend='optuna',
num_samples=num_samples),
AutoTSMixer(h=H,
n_series=Y_df["unique_id"].nunique(),
config=tsmixer_default_config,
gpus=num_gpus,
valid_loss=MAE(),
search_alg=optuna.samplers.TPESampler(),
backend='optuna',
num_samples=num_samples),
AutoTSMixerx(h=H,
n_series=Y_df["unique_id"].nunique(),
config=tsmixerx_default_config,
gpus=num_gpus,
valid_loss=MAE(),
search_alg=optuna.samplers.TPESampler(),
backend='optuna',
num_samples=num_samples),
AutoStemGNN(h=H,
n_series=Y_df["unique_id"].nunique(),
config=stemgnn_default_config,
gpus=num_gpus,
valid_loss=MAE(),
search_alg=optuna.samplers.TPESampler(),
backend='optuna',
num_samples=num_samples),
AutoMLPMultivariate(h=H,
n_series=Y_df["unique_id"].nunique(),
config=mlp_default_config,
gpus=num_gpus,
valid_loss=MAE(),
search_alg=optuna.samplers.TPESampler(),
backend='optuna',
num_samples=num_samples)
]
nf = NeuralForecast(models=[models[1]], freq="15min")
Y_df.sort_values("ds", inplace=True)
nf.fit(df=Y_df)
I also reproduced these errors with the base models.
from neuralforecast.models import StemGNN, TSMixer, HINT, TSMixerx, NHITS, MLPMultivariate, SOFTS, TimeMixer
models = [NHITS(h=H,
input_size=H,
windows_batch_size = 25,
max_steps = 100,
loss=MAE()),
TimeMixer(h=H,
input_size=H,
batch_size = 25,
max_steps = 100,
n_series=Y_df["unique_id"].nunique(),
loss=MAE()),
SOFTS(h=H,
input_size=H,
batch_size = 25,
max_steps = 100,
n_series=Y_df["unique_id"].nunique(),
loss=MAE()),
TSMixer(h=H,
input_size=H,
batch_size = 25,
max_steps = 100,
n_series=Y_df["unique_id"].nunique(),
loss=MAE()),
TSMixerx(h=H,
input_size=H,
batch_size = 25,
max_steps = 100,
n_series=Y_df["unique_id"].nunique(),
loss=MAE()),
StemGNN(h=H,
input_size=H,
batch_size = 25,
max_steps = 100,
n_series=Y_df["unique_id"].nunique(),
loss=MAE()),
MLPMultivariate(h=H,
input_size=H,
n_series=Y_df["unique_id"].nunique(),
batch_size = 25,
max_steps = 100,
loss=MAE())]
nf = NeuralForecast(models=[models[1]], freq="15min")
Y_df.sort_values("ds", inplace=True)
nf.fit(df=Y_df)
Issue Severity
High: It blocks me from completing my task.