darts Help Reproducing the TIDE Paper

Hi, I am trying to reproduce TIDE as described in this issue (https://github.com/unit8co/darts/issues/2343). Despite following everything mentioned in the issue step-by-step, I am unable to achieve the original results.

The paper reports results of MSE: 0.454 and MAE: 0.465 for the ETTh1 (720) dataset, however, I am getting MAE: 0.85356873 and MSE: 1.1463453. Any pointers on this issue would be greatly appreciated. Thank you.

import torch
from darts.models import TiDEModel
from darts.datasets import ETTh1Dataset
from darts.dataprocessing.transformers.scaler import Scaler
from darts.metrics import mae, mse
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt  

epochs = 100
lookback = 720
horizon = 720
series = ETTh1Dataset().load().astype('float32')

hidden_size = 256
num_encoder_layers = 2
num_decoder_layers = 2
decoder_output_dim = 8
temporal_decoder_hidden = 128
dropout = .3
use_layer_norm =  True
lr = 3.82e-5
rev_in = True
batch_size = 512

train, temp = series.split_after(0.7)
val, test = temp.split_after(0.33)

standard_scaler = StandardScaler()

scaler = Scaler(standard_scaler)
train = scaler.fit_transform(train)
val = scaler.transform(val)
test = scaler.transform(test)

pl_trainer_kwargs = {
    "max_epochs": epochs,
    "gradient_clip_val": 0.5, 
}

optimizer_kwargs = {
    "lr": lr,
}

lr_scheduler_cls = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts
lr_scheduler_kwargs = {
    "T_0": 10,
    "eta_min": 0,  
}


model_tide = TiDEModel(
    input_chunk_length = lookback,
    output_chunk_length = horizon,

    hidden_size = hidden_size,
    num_encoder_layers = num_encoder_layers,
    num_decoder_layers = num_decoder_layers,
    decoder_output_dim = decoder_output_dim,
    temporal_decoder_hidden = temporal_decoder_hidden,
    dropout = dropout,
    use_layer_norm = use_layer_norm,
    use_reversible_instance_norm=rev_in,
    batch_size = batch_size,

    pl_trainer_kwargs = pl_trainer_kwargs,
    optimizer_kwargs = optimizer_kwargs,
    lr_scheduler_cls = lr_scheduler_cls,
    lr_scheduler_kwargs = lr_scheduler_kwargs,
    save_checkpoints = True, 
    force_reset = True,
    model_name="tide",
)

model_tide.fit(
    series=train,
    val_series=val,
)

best_model = model_tide.load_from_checkpoint(model_name="tide", best=True)

best_model.save("tide_best_model")



untransformed_test = test 

test = scaler.transform(test) 

forecasts = best_model.historical_forecasts(series=test, 
                                           start=0, 
                                           forecast_horizon=horizon, 
                                           retrain=False)

forecasts_mae = mae(untransformed_test, scaler.inverse_transform(forecasts))  
forecasts_mse = mse(untransformed_test, scaler.inverse_transform(forecasts))  

print("forcast_mae:", forecasts_mae) 
print("forcast_mse:", forecasts_mse)

Jan 18 '25 13:01 msasen

Just from a first glance i see that you transform test twice. Also your untransformed_test is already transformed.

Jan 18 '25 13:01 dennisbader

Thank you for taking the time to reply! I made the corrections you mentioned, and the results are forecasts_mae(inverse): 3.0143383, forecasts_mse(inverse): 20.395443, forecasts_mae: 0.8434442, forecasts_mse: 1.195973. Any other suggestions on what I might be doing wrong?

import torch
from darts.models import TiDEModel
from darts.datasets import ETTh1Dataset
from darts.dataprocessing.transformers.scaler import Scaler
from darts.metrics import mae, mse
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt  

epochs = 100
lookback = 720
horizon = 720
series = ETTh1Dataset().load().astype('float32')

hidden_size = 256
num_encoder_layers = 2
num_decoder_layers = 2
decoder_output_dim = 8
temporal_decoder_hidden = 128
dropout = .3
use_layer_norm =  True
lr = 3.82e-5
rev_in = True
batch_size = 512

train, temp = series.split_after(0.7)
val, test = temp.split_after(0.33)

standard_scaler = StandardScaler()

scaler = Scaler(standard_scaler)
train = scaler.fit_transform(train)
val = scaler.transform(val)
test = scaler.transform(test)



pl_trainer_kwargs = {
    "max_epochs": epochs,
    "gradient_clip_val": 0.5, 
}

optimizer_kwargs = {
    "lr": lr,
}

lr_scheduler_cls = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts
lr_scheduler_kwargs = {
    "T_0": 10,
    "eta_min": 0,  
}


model_tide = TiDEModel(
    input_chunk_length = lookback,
    output_chunk_length = horizon,

    hidden_size = hidden_size,
    num_encoder_layers = num_encoder_layers,
    num_decoder_layers = num_decoder_layers,
    decoder_output_dim = decoder_output_dim,
    temporal_decoder_hidden = temporal_decoder_hidden,
    dropout = dropout,
    use_layer_norm = use_layer_norm,
    use_reversible_instance_norm=rev_in,
    batch_size = batch_size,

    pl_trainer_kwargs = pl_trainer_kwargs,
    optimizer_kwargs = optimizer_kwargs,
    lr_scheduler_cls = lr_scheduler_cls,
    lr_scheduler_kwargs = lr_scheduler_kwargs,
    save_checkpoints = True, 
    force_reset = True,
    model_name="tide",
)

model_tide.fit(
    series=train,
    val_series=val,
)

best_model = model_tide.load_from_checkpoint(model_name="tide", best=True)
best_model.save("tide_best_model")


forecasts = best_model.historical_forecasts(series=test, 
                                           start=0, 
                                           forecast_horizon=horizon, 
                                           retrain=False)


print("forecasts_mae(inverse):",  mae(scaler.inverse_transform(test), scaler.inverse_transform(forecasts))  ) 
print("forecasts_mse(inverse):",  mse(scaler.inverse_transform(test), scaler.inverse_transform(forecasts ))  )


print("forecasts_mae:", mae(test,forecasts)  ) 
print("forecasts_mse:",  mse(test,forecasts))

Jan 18 '25 14:01 msasen

I'd be surprised if this explained such a large discrepancy, but while it wasn't clear in their paper the TiDE authors used temporal_hidden_size_past=64.

Jan 27 '25 16:01 eschibli

Thank you @eschibli for taking the time to help on this thread.

@msasen Any news on the reproduction of the results?

Mar 05 '25 14:03 madtoinou

I was unable to reproduce the results using Darts, which is why I opted for Time-Series-Library.

Being a big fan of Darts, I’d be more than happy to contribute to its documentation if I manage to reproduce the results with it.

Mar 05 '25 20:03 msasen

Did you manage the reproduce the results using this other library?

It would be really cool for Darts to include a section in the example notebook of this model where the results of the original paper are at least partially reproduced. If you are willing to contribute, I can attribute this issue to you (no pressure of course).

Mar 05 '25 20:03 madtoinou