quantile_ensembles icon indicating copy to clipboard operation
quantile_ensembles copied to clipboard

Book chapter on computing quantile forecasts for ensembles

Quantile forecasting with ensembles and combinations

Book chapter by Rob J Hyndman.

library(dplyr)
library(tidyr)
library(ggplot2)
library(tsibble)
library(fable)
library(lubridate)
library(distributional)
set.seed(2020-08-08)

First we download the Australian cafe data from the Australian Bureau of Statistics, and keep 2006-2019.

cafe <- readabs::read_abs(series_id = "A3349870V") %>%
  select(date, value) %>%
  mutate(date = yearmonth(date)) %>%
  as_tsibble(index = date) %>%
  filter(
    date >= yearmonth("2006 Jan"),
    date <= yearmonth("2019 Dec")
  )

We will fit ETS and ARIMA models, and a combination of the two.

train <- cafe %>%
  filter(year(date) <= 2018)
fit <- train %>%
  model(
    ETS = ETS(value),
    ARIMA = ARIMA(value ~ pdq(d = 1) + PDQ(D = 1)),
    SNAIVE = SNAIVE(value)
  ) %>%
  mutate(COMBINATION = (ETS + ARIMA) / 2)

Figure 1: future sample paths

future <- fit %>%
  select(ETS, ARIMA) %>%
  generate(times = 5, h = "1 year") %>%
  mutate(modrep = paste0(.model, .rep))
train %>%
  filter(year(date) >= 2015) %>%
  autoplot(value) +
  geom_line(data = future,  aes(y = .sim, col = .model, group = modrep)) +
  labs(x = "Month", y = "Turnover (A$million)") +
  guides(colour = guide_legend("Model"))

Figure 2: deciles from ETS model

fit %>%
  select(ETS) %>%
  generate(times = 1000, h = "1 year") %>%
  as_tibble() %>%
  group_by(date) %>%
  summarise(
    qs = quantile(.sim, seq(from = 0.1, to = 0.9, by = 0.1)), prob = seq(from = 0.1, to = 0.9, by = 0.1)
  ) %>%
  ggplot(aes(x = date)) +
  geom_line(aes(y = qs, group = prob), col = "blue", alpha = 0.5) +
  geom_line(aes(y = value), data = cafe %>% filter(year(date) == 2019)) +
  geom_line(aes(y = value), data = train %>% filter(year(date) >= 2015)) +
  labs(x = "Month", y = "Turnover (A$million)")

Ensemble combining ETS and ARIMA sample paths

ensemble <- fit %>%
  select(-SNAIVE) %>%
  generate(times = 10000, h = "1 year") %>%
  summarise(
    value = dist_sample(list(.sim)),
    .mean = mean(value)
  ) %>%
  mutate(
    .model = "ENSEMBLE"
  ) %>% 
  as_fable(distribution = value, response = "value")
#> Warning: The dimnames of the fable's distribution are missing and have been set
#> to match the response variables.

CRPS calculations

fcasts <- fit %>%
  forecast(h = "1 year") %>%
  bind_rows(ensemble)
crps <- fcasts %>%
  accuracy(cafe, measures = list(CRPS = CRPS))
snaive_crps <- crps %>%
  filter(.model == "SNAIVE") %>%
  pull(CRPS)
crps %>%
  mutate(skillscore = 100 * (1 - CRPS / snaive_crps)) %>%
  arrange(skillscore)
#> # A tibble: 5 x 4
#>   .model      .type  CRPS skillscore
#>   <chr>       <chr> <dbl>      <dbl>
#> 1 SNAIVE      Test   68.6        0  
#> 2 ARIMA       Test   32.9       52.0
#> 3 ETS         Test   31.5       54.0
#> 4 COMBINATION Test   30.9       54.9
#> 5 ENSEMBLE    Test   29.7       56.7