caretEnsemble
caretEnsemble copied to clipboard
c operator to combine train objects into a list should check for identical indexes (and fail if they're not)
I ran into this issue while working on a different data set/project, but the minimal dataset I detailed below seems to reproduce the issue. However, I do get several warnings running the below code that I don't get when using my other data. If it's necessary, I'll try to construct a more representative facsimile of my data.
When I attempt to use the 'caretStack' function, I'm getting a strange error message:
"Error { .... is not TRUE"
I'm including a screenshot for verification.
https://imgur.com/JbDFqlR
I have no idea how to go about fixing the problem, as I have no idea what is actually broken. I was getting this error in both Windows 10 and Ubuntu 14.04.
Minimal dataset:
library(caret)
col <- c(rnorm(10)*2000)
Data <- data.frame(
X = sample(1:10),
Y = sample(c("yes", "no"), 10, replace = TRUE)
)
Data <- cbind(Data,col)
colnames(Data)[3] <- "loss"
dmy <- dummyVars(loss~ ., data = Data)
Data.1 <- predict(dmy, newdata=Data)
Data.1.df <- as.data.frame(Data.1)
Data <- Data.1.df
Data <- cbind(Data,col)
colnames(Data)[4] <- "loss"
Minimal, runnable code:
library(elasticnet)
library(pls)
library(nnet)
library(e1071)
library(randomForest)
library(gbm)
library(plyr)
library(MASS)
library(caretEnsemble)
control <- trainControl(method="repeatedcv", number=10, repeats=3, verboseIter=TRUE)
model1 <- train(loss~., data = as.data.frame(Data), method='glm', trControl=control)
model2 <- train(loss~., data = as.data.frame(Data), method='svmRadial', trControl=control)
model3 <- train(loss~., data = as.data.frame(Data), method='svmPoly', trControl=control)
model4 <- train(loss~., data = as.data.frame(Data), method='elm', trControl=control)
model5 <- train(loss~., data = as.data.frame(Data), method='nnet', trControl=control)
model6 <- train(loss~., data = as.data.frame(Data), method='rf', trControl=control)
model7 <- train(loss~., data = as.data.frame(Data), method='lasso', trControl=control)
models <- c(model1, model2, model3, model4, model5, model6, model7)
caretStack(models, method = "rf")
caretStack(models, method = "gbm")
models <- c(model1,model3)
caretStack(models,method = "rf")
caretStack(models,method = "gbm")
Session Info:
>sessionInfo()
R version 3.3.1 (2016-06-21)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 10586)
locale:
[1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252 LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C LC_TIME=English_United States.1252
attached base packages:
[1] parallel splines stats graphics grDevices utils datasets methods base
other attached packages:
[1] elmNN_1.0 kernlab_0.9-24 caretEnsemble_2.0.0 MASS_7.3-45 plyr_1.8.4 gbm_2.1.1 survival_2.39-4
[8] randomForest_4.6-12 e1071_1.6-7 caret_6.0-71 ggplot2_2.1.0 lattice_0.20-33 nnet_7.3-12 pls_2.5-0
[15] elasticnet_1.1 lars_1.2
loaded via a namespace (and not attached):
[1] Rcpp_0.12.5 compiler_3.3.1 nloptr_1.0.4 iterators_1.0.8 class_7.3-14 tools_3.3.1 lme4_1.1-12
[8] digest_0.6.10 nlme_3.1-128 gtable_0.2.0 mgcv_1.8-12 Matrix_1.2-6 foreach_1.4.3 SparseM_1.7
[15] gridExtra_2.2.1 stringr_1.1.0 MatrixModels_0.4-1 stats4_3.3.1 grid_3.3.1 data.table_1.9.6 pbapply_1.3-0
[22] minqa_1.2.4 reshape2_1.4.1 car_2.1-2 magrittr_1.5 scales_0.4.0 codetools_0.2-14 pbkrtest_0.4-6
[29] colorspace_1.2-6 quantreg_5.26 stringi_1.1.1 munsell_0.4.3 chron_2.3-47
If there's anything else I need to provide, let me know.
The problem is that your models all use different re-sampling folds, because you do not explicitly define them in your trainControl.
Please use the caretList helper function:
library(caret)
col <- c(rnorm(10)*2000)
Data <- data.frame(
X = sample(1:10),
Y = sample(c("yes", "no"), 10, replace = TRUE)
)
Data <- cbind(Data,col)
colnames(Data)[3] <- "loss"
dmy <- dummyVars(loss~ ., data = Data)
Data.1 <- predict(dmy, newdata=Data)
Data.1.df <- as.data.frame(Data.1)
Data <- Data.1.df
Data <- cbind(Data,col)
colnames(Data)[4] <- "loss"
library(elasticnet)
library(pls)
library(nnet)
library(e1071)
library(randomForest)
library(gbm)
library(plyr)
library(MASS)
library(caretEnsemble)
control <- trainControl(method="repeatedcv", number=10, repeats=3, verboseIter=TRUE)
models <- caretList(loss~., data = as.data.frame(Data), methodList = c('glm', 'svmRadial', 'svmPoly', 'elm', 'nnet', 'rf'), trControl=control)
caretStack(models, method = "rf")
caretStack(models, method = "gbm", tuneGrid=expand.grid(n.minobsinnode=1, n.trees=10, interaction.depth=1, shrinkage=0.1))
If you must create the models one at a time, you MUST specify an explicitly index to the trainControl:
library(caret)
col <- c(rnorm(10)*2000)
Data <- data.frame(
X = sample(1:10),
Y = sample(c("yes", "no"), 10, replace = TRUE)
)
Data <- cbind(Data,col)
colnames(Data)[3] <- "loss"
dmy <- dummyVars(loss~ ., data = Data)
Data.1 <- predict(dmy, newdata=Data)
Data.1.df <- as.data.frame(Data.1)
Data <- Data.1.df
Data <- cbind(Data,col)
colnames(Data)[4] <- "loss"
library(elasticnet)
library(pls)
library(nnet)
library(e1071)
library(randomForest)
library(gbm)
library(plyr)
library(MASS)
library(caretEnsemble)
index <- createMultiFolds(Data[['loss']], 10, 3)
control <- trainControl(method="repeatedcv", number=10, repeats=3, verboseIter=TRUE, index=index, savePredictions=TRUE, classProbs=TRUE)
model1 <- train(loss~., data = as.data.frame(Data), method='glm', trControl=control)
model2 <- train(loss~., data = as.data.frame(Data), method='svmRadial', trControl=control)
model3 <- train(loss~., data = as.data.frame(Data), method='svmPoly', trControl=control)
model4 <- train(loss~., data = as.data.frame(Data), method='elm', trControl=control)
model5 <- train(loss~., data = as.data.frame(Data), method='nnet', trControl=control)
model6 <- train(loss~., data = as.data.frame(Data), method='rf', trControl=control)
#model7 <- train(loss~., data = as.data.frame(Data), method='lasso', trControl=control) #Always fails
models <- c(model1, model2, model3, model4, model5, model6)
caretStack(models, method = "rf")
caretStack(models, method = "gbm", tuneGrid=expand.grid(n.minobsinnode=1, n.trees=10, interaction.depth=1, shrinkage=0.1))
models <- c(model1,model3)
caretStack(models,method = "rf")
caretStack(models,method = "gbm", tuneGrid=expand.grid(n.minobsinnode=1, n.trees=10, interaction.depth=1, shrinkage=0.1))