forecastHybrid
forecastHybrid copied to clipboard
Matrix of weights
My suggestion is to create an matrix with all possible combinations of like
Then a function can create an matrix with all possible combinations e.g:
aut<- auto.arima() = 0.1
ets <- ets() = 0,4
aut||---||--- ---||ets||--- ---||---||the aut||ets||--- aut||---||aut ---||ets||aut aut||ets||the Then divide each line by sum of members
Maybe this demo demonstrates it better example from https://robjhyndman.com/hyndsight/benchmark-combination/
h<- 12
y<- USAccDeaths
fcasts <- rbind(
N = snaive(y, h)$mean,
E = forecast(ets(USAccDeaths), h)$mean,
A = forecast(auto.arima(y), h)$mean,
T = thetaf(y, h)$mean)
colnames(fcasts) <- seq(h)
method_names <- rownames(fcasts)
# Compute all possible combinations
method_choice <- rep(list(0:1), length(method_names))
names(method_choice) <- method_names
combinations <- expand.grid(method_choice) %>% tail(-1) %>% as.matrix() #Here
# Construct names for all combinations
for (i in seq(NROW(combinations))) {
rownames(combinations)[i] <- paste0(method_names[which(combinations[i, ] > 0)],
collapse = "")
}
# Compute combination weights
combinations <- sweep(combinations, 1, rowSums(combinations), FUN = "/")
The only thing holding me back is my inexperience with programming, but I would replace the 1s the #Here,we also we have an unique opportunity to leverage our knowledge of the weights generated in the cross validation process and semi-automate a good choice for all series for example if weight x < y , 1 in combinations = 0.
This matrix could be used to create accuracy() function to see which is the overall better combination, an extension would be to add the matrix for weights = "equal" and weights = "insample.errors"
Maybe add a similar matrix for all Horizons so we could implement #17.
Sorry if it was confusing, and for the bad english.
Ok I have sucesfully implemented a way to check out of bounds performance of all methods
library(forecastHybrid)
library(tictoc)
library(tidyverse)
library(furrr)
benchmarks <- function(y, h) {
require(forecast)
hm1<- hybridModel(
y,
models = "aefnstz",
weights = "cv.errors",
cvHorizon = frequency(y),
windowSize = length(y) - frequency(y) * 2,
horizonAverage = TRUE
)
aC = forecast(hm1$auto.arima,h)
eC = forecast(hm1$ets,h)
fC = forecast(hm1$thetam,h)
nC = forecast(hm1$nnetar,h)
sC = forecast(hm1$stlm,h)
tC = forecast(hm1$tbats,h)
zC = forecast(hm1$snaive,h)
fcastsC <- rbind(
aC = aC$mean,
eC = eC$mean,
fC = fC$mean,
nC = nC$mean,
sC = sC$mean,
tC = tC$mean,
zC = zC$mean[1:h]
)
colnames(fcastsC) <- seq(h)
method_names <- rownames(fcastsC)
# Compute all possible combinations
Weights <- rbind(hm1$weights)
method_choice <-list()
Lenght <-rep(1, length(Weights))
for (i in seq_along(Lenght)) {
method_choice[[i]] <- c(0, Weights[i])
}
names(method_choice) <- method_names
combinations <- expand.grid(method_choice) %>% tail(-1) %>% as.matrix()
# Construct names for all combinations
for (i in seq(NROW(combinations))) {
rownames(combinations)[i] <- paste0(method_names[which(combinations[i, ] > 0)],
collapse = "")
}
combinationsC <- sweep(combinations, 1, rowSums(combinations), FUN = "/")
#Combinations Averages
aA = forecast(hm1$auto.arima,h)
eA = forecast(hm1$ets,h)
fA = forecast(hm1$thetam,h)
nA = forecast(hm1$nnetar,h)
sA = forecast(hm1$stlm,h)
tA = forecast(hm1$tbats,h)
zA = forecast(hm1$snaive,h)
fcastsA <- rbind(
aA = aA$mean,
eA = eA$mean,
fA = fA$mean,
nA = nA$mean,
sA = sA$mean,
tA = tA$mean,
zA = zA$mean[1:h]
)
colnames(fcastsA) <- seq(h)
method_names <- rownames(fcastsA)
# Compute all possible combinations
method_choice <- rep(list(0:1), length(method_names))
names(method_choice) <- method_names
combinations <- expand.grid(method_choice) %>% tail(-1) %>% as.matrix()
# Construct names for all combinations
for (i in seq(NROW(combinations))) {
rownames(combinations)[i] <- paste0(method_names[which(combinations[i, ] > 0)],
collapse = "")
}
# Compute combination weights
combinationsA <- sweep(combinations, 1, rowSums(combinations), FUN = "/")
CombinationsFC <- combinationsC %*% fcastsC
combinationsFA <- combinationsA %*% fcastsA
combinationsFAC <- rbind(CombinationsFC,combinationsFA)
return(combinationsFAC)
}
Then we can apply this function to any dataset we want a good summary on how well Model Combinations fare
Test Data Since I arbitarily choose a value for cv.horrizon I need to crop the dataset for 2 too short series
library(Mcomp)
library(rlist)
SubsetM3 <- list.filter(M3, "MONTHLY" %in% period & n > 48)
Create Function to test errors
ErrorFunction <- function(u) {
train <- u$x
test <- u$xx
print(u$sn)
tic(paste("Hybrid Model Call",u$sn,sep = ""))
f <- benchmarks(train, u$h)
toc()
error <- -sweep(f, 2, test)
pcerror <- (200 * abs(error) / sweep(abs(f), 2, abs(test), FUN = "+")) %>%
as_tibble() %>%
mutate(Method = rownames(f)) %>%
gather(key = h, value = sAPE, -Method)
scalederror <- (abs(error) / mean(abs(diff(train, lag = frequency(train))))) %>%
as_tibble() %>%
mutate(Method = rownames(f)) %>%
gather(key = h, value = ASE, -Method)
return(list(pcerror = pcerror, scalederror = scalederror))
}
Compute "symmetric" percentage errors and scaled errors
plan(multiprocess)
tic("Whole Process")
errors <- future_map(SubsetM3,ErrorFunction)
toc()
Construct a tibble with all results
M3errors <- tibble(
Series = names(SubsetM3),
Period = map_chr(SubsetM3, "period"),
se = map(errors, "scalederror"),
pce = map(errors, "pcerror")) %>%
unnest() %>%
select(-h1, -Method1) %>%
mutate(h = as.integer(h),
Period = factor(str_to_title(Period),
levels = c("Monthly","Quarterly","Yearly","Other")))
accuracy <- M3errors %>%
group_by(Period, Method, h) %>%
summarize(MASE=mean(ASE), sMAPE=mean(sAPE)) %>%
ungroup()
Find names of original methods
original_methods <- unique(accuracy[["Method"]])
original_methods <- original_methods[str_length(original_methods)==1L]
Compute summary table of accuracy measures
accuracy_table <- accuracy %>%
group_by(Method,Period) %>%
summarise(
sMAPE = mean(sMAPE, na.rm = TRUE),
MASE = mean(MASE, na.rm = TRUE) ) %>%
arrange(sMAPE,MASE) %>% ungroup()
best <- accuracy_table %>% filter(MASE==min(MASE))
accuracy_table <- accuracy_table %>%
filter(Method %in% original_methods | Method %in% best$Method) %>%
arrange(Period, MASE) %>%
select(Period, Method, sMAPE, MASE)
knitr::kable(accuracy_table)
Can you show me how to solve all possible combinations of models with the cv.horrizon up to 18 #17? I think you may need to drop the stlm model, but that should be fine.