modeltime icon indicating copy to clipboard operation
modeltime copied to clipboard

How to obtain the Confidence Intervals

Open mbanco opened this issue 2 years ago • 0 comments

Hello Matt,

I don't know how to get the confidence intervals in the projection:

library(modeltime); library(tidymodels); library(workflowsets); library(tidyverse); library(timetk);

nivel_confianza <- 0.75; cant_proyec <- 60;

dataset_tbl <- walmart_sales_weekly %>% select(id, Date, Weekly_Sales) %>% set_names(c('id', 'fecha', 'datos'));

cant <- dataset_tbl %>% group_by(id) %>% count(id) ;

splits <- time_series_split( dataset_tbl, date_var = fecha, assess = ceiling(min(cant$n) * 0.3), cumulative = TRUE );

splits %>%

tk_time_series_cv_plan() %>%

plot_time_series_cv_plan(fecha, datos, .interactive = F);

recipe_spec <- recipe(datos ~ ., data = training(splits)) %>% step_timeseries_signature(fecha) %>% step_rm(fecha) %>% step_normalize(fecha_index.num) %>% step_zv(all_predictors()) %>% step_dummy(all_nominal_predictors(), one_hot = TRUE);

model_tbl <- tibble( learn_rate = c(0.001, 0.010, 0.100, 0.350, 0.500, 0.650)) %>% create_model_grid(f_model_spec=boost_tree, engine_name="xgboost", mode="regression");

model_list <- model_tbl$.models;

model_wfset <- workflow_set( preproc = list( recipe_spec ), models = model_list, cross = TRUE );

model_parallel_tbl <- model_wfset %>% modeltime_fit_workflowset( data = training(splits), control = control_fit_workflowset( verbose = TRUE, allow_par = TRUE ) );

test <- model_parallel_tbl %>% modeltime_calibrate(testing(splits), id="id", conf_interval=nivel_confianza, conf_by_id=TRUE) %>% modeltime_accuracy(acc_by_id = FALSE);

refit_tbl <- model_parallel_tbl %>% modeltime_refit(data = dataset_tbl, control_refit(allow_par=TRUE));

future_tbl <- dataset_tbl %>% group_by(id) %>% future_frame(.length_out=cant_proyec, .bind_data=FALSE, .date_var=fecha);

projection <- refit_tbl[which.min(test$rmse),] %>% modeltime_forecast( new_data = future_tbl, actual_data = dataset_tbl, keep_data = FALSE, conf_interval = nivel_confianza, conf_by_id = TRUE ) %>% group_by(id);

parallel_stop();

Thanks.

Best Regards

Mauricio

mbanco avatar Feb 09 '23 17:02 mbanco