MAPIE icon indicating copy to clipboard operation
MAPIE copied to clipboard

Sklearn wrapping for pytorch forecasting model example?

Open AugustComte opened this issue 6 months ago • 1 comments

Hello there,

Is your documentation request related to a problem? Please describe. I've been having difficulty in wrapping my pytorch forecasting models to use with your library. While I have read that you can "Easily wrap any model (scikit-learn, tensorflow, pytorch, …) with, if needed, a scikit-learn-compatible wrapper for the purposes just mentioned.". I personally find this to be non-trivial, as I have never done it. But more importantly, because most documentation is on regressors and classifiers, and there is little on time-series forecasting models especially for pytorch. The example in the time series section of the MAPIE docs uses random forest I believe, which if fine when using models that are easily suited for sklearn, but pytorch models seem harder to set up. I have attempted to use Skorch to aid me in this, but there documentation also lacks examples where time series models are wrapped. Secondly, most examples use univariate data with no exogenous variables, while univariate with exogenous is perhaps more common in a business setting outside of finance...even then, a lot of financial models use exogenous to inform the models.

Describe the solution you'd like I would like to see a clear example of the applications of MAPIE conformal intervals (enbpi and the improved version), taking a pytorch model, wrapping it, and applying it to time series data with/ and without exogenous variables. If the data is created as tensors previously as exogenous variables, how do we reshape it? Or should we not do that at all, and allow something like skorch to handle it, and just set a batching param? The example should show how to validate the performance of the approaches for time series similar to this (https://github.com/mtorabirad/MLBoost/blob/main/Episode15/Episode15Main.ipynb).

Here's my rather patchy code, minus attempts to turn into an sklearn wrapped model, if it helps you are welcome to use it. pseudo_sales.csv


import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import lightning as L
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from torch.utils.data import DataLoader,Dataset

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error

import matplotlib.pyplot as plt
import numpy as np


from lightning import Trainer
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
plt.style.use('fivethirtyeight')

gpu_available = torch.cuda.is_available()
device_name = torch.cuda.get_device_name(0) if gpu_available else "CPU"


data = pd.read_csv('data\pseudo_sales.csv')
data['date'] = pd.to_datetime(data['date'], format='%d/%m/%Y')
data = data.sort_values('date')

def add_datetime_features(df, datetime_column):
    df[datetime_column] = pd.to_datetime(df[datetime_column])
    df['year'] = df[datetime_column].dt.year
    df['month'] = df[datetime_column].dt.month
    df['week'] = df[datetime_column].dt.isocalendar().week
    df['day'] = df[datetime_column].dt.day
    
    return df.copy()

df = add_datetime_features(data, "date")

date_col = df['date'].copy()
features = df.drop(columns=['sales','date']).copy()
target = df['sales'].copy()

# scaling 
feature_scaler = MinMaxScaler()
target_scaler = MinMaxScaler()
features_scaled = feature_scaler.fit_transform(features)
target_scaled = target_scaler.fit_transform(target.values.reshape(-1, 1))

def create_time_windows(features, target, window_size):
    inputs = []
    targets = []
    for i in range(len(features) - window_size):
        inputs.append(features[i:i+window_size])
        targets.append(target[i+window_size])
    return np.array(inputs), np.array(targets)

window_size = 32
inputs, targets = create_time_windows(features_scaled, target_scaled, window_size)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# apply device to tensors and create datasets
class TimeSeriesDataset(Dataset):
    def __init__(self, inputs, targets):
        self.inputs = torch.tensor(inputs, dtype=torch.float32).to(device)
        self.targets = torch.tensor(targets, dtype=torch.float32).to(device)

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        return self.inputs[idx], self.targets[idx]
        
        train_size = int(0.6 * len(inputs))
val_size = int(0.2 * len(inputs))
test_size = len(inputs) - train_size - val_size

train_dataset = TimeSeriesDataset(inputs[:train_size], targets[:train_size])
val_dataset = TimeSeriesDataset(inputs[train_size:train_size + val_size], targets[train_size:train_size + val_size])
test_dataset = TimeSeriesDataset(inputs[train_size + val_size:], targets[train_size + val_size:])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)  # No shuffle for time-series
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

class ffnetwork(L.LightningModule):
    def __init__(self, input_dim, sequence_length, hidden_dim, num_layers=1, output_dim=1, learning_rate=0.0001, dropout_rate=0.5, activation_func=nn.ReLU()):
        super(ffnetwork, self).__init__()
        
        # Use sequence_length passed to the model to define the first layer
        self.layers = nn.ModuleList([nn.Linear(input_dim * sequence_length, hidden_dim)])
        
        # Add additional hidden layers
        for _ in range(num_layers - 1):
            self.layers.append(nn.Linear(hidden_dim, hidden_dim))
        
        # Define the output layer
        self.layers.append(nn.Linear(hidden_dim, output_dim))
        
        # Additional properties
        self.dropout = nn.Dropout(dropout_rate)
        self.activation = activation_func
        self.learning_rate = learning_rate

    def forward(self, x):
        # Flatten the input while preserving the batch size
        batch_size, sequence_length, input_dim = x.size()
        x = x.view(batch_size, sequence_length * input_dim)
        
        # Pass through each layer in the network
        for i in range(len(self.layers) - 1):
            x = self.activation(self.layers[i](x))
            x = self.dropout(x)  # Apply dropout
        
        x = self.layers[-1](x)  # Output layer
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = nn.functional.mse_loss(y_hat, y)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = nn.functional.mse_loss(y_hat, y)
        self.log('val_loss', loss)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer
        
input_dim = inputs.shape[2]  
sequence_length = inputs.shape[1]  
hidden_dim = 63  
num_layers = 4   
output_dim = 1   
learning_rate = 0.000689
dropout_rate = 0.170971


model = ffnetwork(input_dim=input_dim, sequence_length=sequence_length, hidden_dim=hidden_dim,
                  num_layers=num_layers, output_dim=output_dim, learning_rate=learning_rate, dropout_rate=dropout_rate).to(device)
                  
                  early_stopping = EarlyStopping(
    monitor='val_loss',  # Metric to monitor
    patience=50,         # Number of epochs to wait without improvement
    verbose=True,
    mode='min'           # 'min' because we want to minimize the validation loss
)

# Initialise and train
trainer = Trainer(
    accelerator="gpu", devices=1,
    max_epochs=300, 
    callbacks=[early_stopping], 
    logger=True)

trainer.fit(model, train_loader, val_loader)

# List to store predictions and actual values
predictions = []
actuals = []

# Set the model to evaluation mode
model.eval()

# back to cpu
device = next(model.parameters()).device

with torch.no_grad():  
    for batch in test_loader:
        inputs, targets = batch
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        predictions.extend(outputs.squeeze().numpy())
        actuals.extend(targets.squeeze().numpy())

# backtransform
predictions = target_scaler.inverse_transform(np.array(predictions).reshape(-1, 1))
actuals = target_scaler.inverse_transform(np.array(actuals).reshape(-1, 1))

Thank you for reading.

AugustComte avatar Jul 31 '24 15:07 AugustComte