[Question] How to increase TFT's accuracy?

Open gsamaras opened this issue 3 years ago • 1 comments
Any tip on increasing TFT's accuracy? I got a MAPE of 1.45 with N-BEATS and 2.36 with TFT (with a much larger network I got 2.05). If needed I can provide an online notebook to experiment.
Here is my code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from darts import TimeSeries
from darts import concatenate
from sklearn.preprocessing import MinMaxScaler
from darts.dataprocessing.transformers import Scaler
from darts.models import TFTModel
from darts.utils.likelihood_models import QuantileRegression, BernoulliLikelihood, BetaLikelihood, GaussianLikelihood
from darts.dataprocessing.transformers import Scaler, MissingValuesFiller
from darts.metrics import mae, mape, mse, rmse, r2_score
from darts.datasets import EnergyDataset
from pickle import dump, load
from torch import optim
from typing import Optional, Union
import torch

def define_TFT_model(
    train_set: Optional[TimeSeries] = None,
    val_set: Optional[TimeSeries] = None,
    gridsearch: bool = False,
) -> TFTModel:
    """
    Setup TFT model's architecture.

    :param train_set (Optional[TimeSeries]): Train set (used in grid search)
    :param val_set (Optional[TimeSeries]): Validation set (used in grid search)
    :param gridsearch (Optional[bool]): Perform grid search or not
    :return: An TFT model
    """
    quantiles = [
        0.01,
        0.05,
        0.1,
        0.15,
        0.2,
        0.25,
        0.3,
        0.4,
        0.5,
        0.6,
        0.7,
        0.75,
        0.8,
        0.85,
        0.9,
        0.95,
        0.99,
    ]
    if gridsearch == True:
        parameters = {
            "input_chunk_length": [3],
            "output_chunk_length": [1],
            "hidden_size": [32],
            "lstm_layers": [4],
            "num_attention_heads": [4],
            "dropout": [0.1],
            "n_epochs": [20],
            "add_relative_index": [True],
            "batch_size": [16],
            "add_encoders": [None],
            "random_state": [0],
            "likelihood": [QuantileRegression(quantiles=quantiles), BernoulliLikelihood, BetaLikelihood, GaussianLikelihood],
            "optimizer_cls": [
                optim.Adam
            ],  # [optim.Adadelta, optim.Adagrad, optim.Adam, optim.AdamW, optim.Adamax, optim.ASGD, optim.NAdam,
            # optim.RAdam, optim.RMSprop, optim.Rprop, optim.SGD],
            "optimizer_kwargs": [{"lr": 1e-3}],
            "lr_scheduler_cls": [optim.lr_scheduler.ReduceLROnPlateau],
            # "torch_device_str": ["cuda:0"],
            "model_name": ["tft_run"],
        }
      
        # Randomized gridsearch
        res = TFTModel.gridsearch(
            parameters=parameters,
            series=train_set,
            val_series=val_set,
            start=0.1,  # starting point in training set
            last_points_only=False,
            metric=mape,
            reduction=np.mean,
            n_jobs=-1,
            # n_random_samples=0.99,  # % of full search space to evaluate
            verbose=True,
        )
        best_model, dict_bestparams = res
        print(f"dict_bestparams: {dict_bestparams}")
        model_tft = best_model
    else:
        # default quantiles for QuantileRegression
        input_chunk_length = 3
        forecast_horizon = 1
        model_tft = TFTModel(
            input_chunk_length=input_chunk_length,
            output_chunk_length=forecast_horizon,
            hidden_size=32,
            lstm_layers=4,
            num_attention_heads=4,
            dropout=0.1,
            batch_size=16,
            n_epochs=200,#50,
            add_relative_index=True,
            add_encoders=None,
            likelihood=QuantileRegression(
                quantiles=quantiles
            ),  # QuantileRegression is set per default
            # loss_fn=MSELoss(),
            random_state=0,
            optimizer_cls=optim.Adam,
            optimizer_kwargs={"lr": 1e-3},
            lr_scheduler_cls=optim.lr_scheduler.ReduceLROnPlateau,
            lr_scheduler_kwargs={
                "optimizer": optim.Adam,
                "threshold": 0.0001,
                "verbose": True,
            },
            #torch_device_str="cuda:0",
        )
    return model_tft
    
def eval_backtest(backtest_series, actual_series, horizon, start, transformer):
    plt.figure(figsize=figsize)
    actual_series.plot(label="actual")
    backtest_series.plot(
        low_quantile=lowest_q, high_quantile=highest_q, label=label_q_outer
    )
    backtest_series.plot(low_quantile=low_q, high_quantile=high_q, label=label_q_inner)
    plt.legend()
    plt.title(f"Backtest, starting {start}, {horizon}-months horizon")
    print(
        "MAPE: {:.2f}%".format(
            mape(
                transformer.inverse_transform(actual_series),
                transformer.inverse_transform(backtest_series),
            )
        )
    )

if __name__ == "__main__":
    data = pd.read_csv("data.csv")
    # Data also have requests and hits on the cache. Hits almost overlap with 'y'.
    # Requests are pretty much of same curvature, just with greater magnitude.
    series = data["y"].reset_index(drop=True, inplace=False)
    trainset_size = 0.60
    train, val = series.split_after(trainset_size)

    # Normalize the time series (note: we avoid fitting the transformer on the validation set)
    scaler = MinMaxScaler(feature_range=(0.01, 1.01))
    transformer = Scaler(scaler)
    train_transformed = transformer.fit_transform(train)
    val_transformed = transformer.transform(val)
    series_transformed = transformer.transform(series)

    model_tft = define_TFT_model(
        train_set=train_transformed,
        val_set=val_transformed,
        gridsearch=False,
    )
    model_tft.fit(
        series=train_transformed, val_series=val_transformed, verbose=True
    )
   
    pred_series = model_tft.historical_forecasts(
        series_transformed,
        start=trainset_size,
        forecast_horizon=1,
        stride=1,
        retrain=False,
        verbose=True,
    )
    
    figsize = (9, 6)
    lowest_q, low_q, high_q, highest_q = 0.01, 0.1, 0.9, 0.99
    label_q_outer = f"{int(lowest_q * 100)}-{int(highest_q * 100)}th percentiles"
    label_q_inner = f"{int(low_q * 100)}-{int(high_q * 100)}th percentiles"

    eval_backtest(
        backtest_series=concatenate(pred_series),
        actual_series=series_transformed,
        horizon="1 horizon",
        start=trainset_size,
        transformer=transformer,
    )
Aug 09 '22 21:08 gsamaras
By using requests and hits as future covariates (as explained in the Quickstart guide), I was able to reduce MAPE to 1.78! Unless you guys have something else to propose, please feel free to close this.
Aug 11 '22 09:08 gsamaras