MAPIE
MAPIE copied to clipboard
Sklearn wrapping for pytorch forecasting model example?
Hello there,
Is your documentation request related to a problem? Please describe. I've been having difficulty in wrapping my pytorch forecasting models to use with your library. While I have read that you can "Easily wrap any model (scikit-learn, tensorflow, pytorch, …) with, if needed, a scikit-learn-compatible wrapper for the purposes just mentioned.". I personally find this to be non-trivial, as I have never done it. But more importantly, because most documentation is on regressors and classifiers, and there is little on time-series forecasting models especially for pytorch. The example in the time series section of the MAPIE docs uses random forest I believe, which if fine when using models that are easily suited for sklearn, but pytorch models seem harder to set up. I have attempted to use Skorch to aid me in this, but there documentation also lacks examples where time series models are wrapped. Secondly, most examples use univariate data with no exogenous variables, while univariate with exogenous is perhaps more common in a business setting outside of finance...even then, a lot of financial models use exogenous to inform the models.
Describe the solution you'd like I would like to see a clear example of the applications of MAPIE conformal intervals (enbpi and the improved version), taking a pytorch model, wrapping it, and applying it to time series data with/ and without exogenous variables. If the data is created as tensors previously as exogenous variables, how do we reshape it? Or should we not do that at all, and allow something like skorch to handle it, and just set a batching param? The example should show how to validate the performance of the approaches for time series similar to this (https://github.com/mtorabirad/MLBoost/blob/main/Episode15/Episode15Main.ipynb).
Here's my rather patchy code, minus attempts to turn into an sklearn wrapped model, if it helps you are welcome to use it. pseudo_sales.csv
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import lightning as L
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from torch.utils.data import DataLoader,Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import numpy as np
from lightning import Trainer
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
plt.style.use('fivethirtyeight')
gpu_available = torch.cuda.is_available()
device_name = torch.cuda.get_device_name(0) if gpu_available else "CPU"
data = pd.read_csv('data\pseudo_sales.csv')
data['date'] = pd.to_datetime(data['date'], format='%d/%m/%Y')
data = data.sort_values('date')
def add_datetime_features(df, datetime_column):
df[datetime_column] = pd.to_datetime(df[datetime_column])
df['year'] = df[datetime_column].dt.year
df['month'] = df[datetime_column].dt.month
df['week'] = df[datetime_column].dt.isocalendar().week
df['day'] = df[datetime_column].dt.day
return df.copy()
df = add_datetime_features(data, "date")
date_col = df['date'].copy()
features = df.drop(columns=['sales','date']).copy()
target = df['sales'].copy()
# scaling
feature_scaler = MinMaxScaler()
target_scaler = MinMaxScaler()
features_scaled = feature_scaler.fit_transform(features)
target_scaled = target_scaler.fit_transform(target.values.reshape(-1, 1))
def create_time_windows(features, target, window_size):
inputs = []
targets = []
for i in range(len(features) - window_size):
inputs.append(features[i:i+window_size])
targets.append(target[i+window_size])
return np.array(inputs), np.array(targets)
window_size = 32
inputs, targets = create_time_windows(features_scaled, target_scaled, window_size)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# apply device to tensors and create datasets
class TimeSeriesDataset(Dataset):
def __init__(self, inputs, targets):
self.inputs = torch.tensor(inputs, dtype=torch.float32).to(device)
self.targets = torch.tensor(targets, dtype=torch.float32).to(device)
def __len__(self):
return len(self.inputs)
def __getitem__(self, idx):
return self.inputs[idx], self.targets[idx]
train_size = int(0.6 * len(inputs))
val_size = int(0.2 * len(inputs))
test_size = len(inputs) - train_size - val_size
train_dataset = TimeSeriesDataset(inputs[:train_size], targets[:train_size])
val_dataset = TimeSeriesDataset(inputs[train_size:train_size + val_size], targets[train_size:train_size + val_size])
test_dataset = TimeSeriesDataset(inputs[train_size + val_size:], targets[train_size + val_size:])
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False) # No shuffle for time-series
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
class ffnetwork(L.LightningModule):
def __init__(self, input_dim, sequence_length, hidden_dim, num_layers=1, output_dim=1, learning_rate=0.0001, dropout_rate=0.5, activation_func=nn.ReLU()):
super(ffnetwork, self).__init__()
# Use sequence_length passed to the model to define the first layer
self.layers = nn.ModuleList([nn.Linear(input_dim * sequence_length, hidden_dim)])
# Add additional hidden layers
for _ in range(num_layers - 1):
self.layers.append(nn.Linear(hidden_dim, hidden_dim))
# Define the output layer
self.layers.append(nn.Linear(hidden_dim, output_dim))
# Additional properties
self.dropout = nn.Dropout(dropout_rate)
self.activation = activation_func
self.learning_rate = learning_rate
def forward(self, x):
# Flatten the input while preserving the batch size
batch_size, sequence_length, input_dim = x.size()
x = x.view(batch_size, sequence_length * input_dim)
# Pass through each layer in the network
for i in range(len(self.layers) - 1):
x = self.activation(self.layers[i](x))
x = self.dropout(x) # Apply dropout
x = self.layers[-1](x) # Output layer
return x
def training_step(self, batch, batch_idx):
x, y = batch
y_hat = self.forward(x)
loss = nn.functional.mse_loss(y_hat, y)
self.log('train_loss', loss)
return loss
def validation_step(self, batch, batch_idx):
x, y = batch
y_hat = self.forward(x)
loss = nn.functional.mse_loss(y_hat, y)
self.log('val_loss', loss)
def configure_optimizers(self):
optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
return optimizer
input_dim = inputs.shape[2]
sequence_length = inputs.shape[1]
hidden_dim = 63
num_layers = 4
output_dim = 1
learning_rate = 0.000689
dropout_rate = 0.170971
model = ffnetwork(input_dim=input_dim, sequence_length=sequence_length, hidden_dim=hidden_dim,
num_layers=num_layers, output_dim=output_dim, learning_rate=learning_rate, dropout_rate=dropout_rate).to(device)
early_stopping = EarlyStopping(
monitor='val_loss', # Metric to monitor
patience=50, # Number of epochs to wait without improvement
verbose=True,
mode='min' # 'min' because we want to minimize the validation loss
)
# Initialise and train
trainer = Trainer(
accelerator="gpu", devices=1,
max_epochs=300,
callbacks=[early_stopping],
logger=True)
trainer.fit(model, train_loader, val_loader)
# List to store predictions and actual values
predictions = []
actuals = []
# Set the model to evaluation mode
model.eval()
# back to cpu
device = next(model.parameters()).device
with torch.no_grad():
for batch in test_loader:
inputs, targets = batch
inputs, targets = inputs.to(device), targets.to(device)
outputs = model(inputs)
predictions.extend(outputs.squeeze().numpy())
actuals.extend(targets.squeeze().numpy())
# backtransform
predictions = target_scaler.inverse_transform(np.array(predictions).reshape(-1, 1))
actuals = target_scaler.inverse_transform(np.array(actuals).reshape(-1, 1))
Thank you for reading.