caret-machine-learning
caret-machine-learning copied to clipboard
Rborist hard crashes R under caret
Hard crash with "R for Windows has stopped working"
# Use of all 160 caret models for binary classification and diabetes set
# The output from fast (working) binary classification models is
# exported to a sortable table in a web browser using the DT library
# https://github.com/tobigithub/caret-machine-learning
# Tobias Kind (2015)
# use mlbench, caret and DT library
require(mlbench)
require(caret)
require(DT)
# load diabetes set 768 x 9
data(PimaIndiansDiabetes)
dim(PimaIndiansDiabetes)
# get all model names for classification
m <- unique(modelLookup()[modelLookup()$forClass,c(1)])
length(m); m;
# slow classification models ("rbf" crashes; "dwdLinear", "ownn", "snn" have issues)
# all others may have just failed and are not listed here
#
removeModels <- c("AdaBag", "AdaBoost.M1", "FH.GBML", "pda2", "PenalizedLDA",
"GFS.GCCL", "rbf", "RFlda", "nodeHarvest", "ORFsvm", "dwdLinear", "dwdPoly", "gam",
"gaussprLinear", "ownn", "sddaLDA", "sddaQDA", "SLAVE", "smda", "snn", "rmda",
"rFerns", "wsrf","ordinalNet","awnb", "awtan","manb","nbDiscrete","nbSearch","tan",
"tanSearch","bartMachine","randomGLM", "Rborist")
#remove all slow and failed models from model list
m <- m[!m %in% removeModels]
m <- c("rf","Rborist")
# pre-load all packages (does not really work due to other dependencies)
suppressPackageStartupMessages(ll <-lapply(m, require, character.only = TRUE))
# show which libraries were loaded
sessionInfo()
# load X and Y (this will be transferred to to train function)
X = PimaIndiansDiabetes[1:40,1:8]
Y = PimaIndiansDiabetes$diabetes[1:40]
# register parallel front-end
library(doParallel); cl <- makeCluster(detectCores()); registerDoParallel(cl)
# this is required otherwise the first method is benchmarked wrong
warmup <-train(y=Y, x=X, "rf", trControl = trainControl(method = "boot632"))
# this setup actually calls the caret::train function, in order to provide
# minimal error handling this type of construct is needed.
trainCall <- function(i)
{
cat("----------------------------------------------------","\n");
set.seed(123); cat(i," <- loaded\n");
return(tryCatch(
t2 <- train(y=Y, x=X, (i), trControl = trainControl(method = "boot632")),
error=function(e) NULL))
}
# use lapply/loop to run everything, required for try/catch error function to work
t2 <- lapply(m, trainCall)
#remove NULL values, we only allow succesful methods, provenance is deleted.
t2 <- t2[!sapply(t2, is.null)]
# this setup extracts the results with minimal error handling
# TrainKappa can be sometimes zero, but Accuracy SD can be still available
# see Kappa value http://epiville.ccnmtl.columbia.edu/popup/how_to_calculate_kappa.html
printCall <- function(i)
{
return(tryCatch(
{
cat(sprintf("%-22s",(m[i])))
cat(round(getTrainPerf(t2[[i]])$TrainAccuracy,4),"\t")
cat(round(getTrainPerf(t2[[i]])$TrainKappa,4),"\t")
cat(t2[[i]]$times$everything[3],"\n")},
error=function(e) NULL))
}
r2 <- lapply(1:length(t2), printCall)
# stop cluster and register sequntial front end
stopCluster(cl); registerDoSEQ();
# preallocate data types
i = 1; MAX = length(t2);
x1 <- character() # Name
x2 <- numeric() # R2
x3 <- numeric() # RMSE
x4 <- numeric() # time [s]
x5 <- character() # long model name
# fill data and check indexes and NA with loop/lapply
for (i in 1:length(t2)) {
x1[i] <- t2[[i]]$method
x2[i] <- as.numeric(round(getTrainPerf(t2[[i]])$TrainAccuracy,4))
x3[i] <- as.numeric(round(getTrainPerf(t2[[i]])$TrainKappa,4))
x4[i] <- as.numeric(t2[[i]]$times$everything[3])
x5[i] <- t2[[i]]$modelInfo$label
}
# coerce to data frame
df1 <- data.frame(x1,x2,x3,x4,x5, stringsAsFactors=FALSE)
# print all results to R-GUI
df1
# plot models, just as example
# ggplot(t2[[1]])
# ggplot(t2[[1]])
# call web output with correct column names
datatable(df1, options = list(
columnDefs = list(list(className = 'dt-left', targets = c(0,1,2,3,4,5))),
pageLength = MAX,
order = list(list(2, 'desc'))),
colnames = c('Num', 'Name', 'Accuracy', 'Kappa', 'time [s]', 'Model name'),
caption = paste('Classification results from caret models',Sys.time()),
class = 'cell-border stripe') %>%
formatRound('x2', 3) %>%
formatRound('x3', 3) %>%
formatRound('x4', 3) %>%
formatStyle(2,
background = styleColorBar(x2, 'steelblue'),
backgroundSize = '100% 90%',
backgroundRepeat = 'no-repeat',
backgroundPosition = 'center'
)
### END
R version 3.3.1 (2016-06-21)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1
locale:
[1] LC_COLLATE=English_United States.1252
[2] LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages:
[1] parallel splines grid stats graphics grDevices utils
[8] datasets methods base
other attached packages:
[1] plyr_1.8.4 wsrf_1.5.47 spls_2.2-1
[4] sparseLDA_0.1-7 sdwd_1.0.2 sda_1.3.7
[7] fdrtool_1.2.15 corpcor_1.6.8 rrlda_1.1
[10] matrixcalc_1.0-3 glasso_1.8 mvoutlier_2.0.6
[13] sgeostat_1.0-27 pcaPP_1.9-60 RRF_1.6
[16] rpartScore_1.0-1 rotationForest_0.1 rocc_1.2
[19] ROCR_1.0-7 gplots_3.0.1 Rborist_0.1-1
[22] Rcpp_0.12.5 randomGLM_1.02-1 doParallel_1.0.10
[25] MASS_7.3-45 protoclass_1.0 pls_2.5-0
[28] partDSA_0.9.10 ordinalNet_1.4 oblique.tree_1.1.1
[31] tree_1.0-37 nodeHarvest_0.7-3 nnet_7.3-12
[34] mda_0.4-8 class_7.3-14 kknn_1.3.1
[37] hda_0.2-14 gpls_1.44.0 glmnet_2.0-5
[40] gbm_2.1.1 survival_2.39-4 gam_1.12
[43] fda_2.4.4 Matrix_1.2-6 extraTrees_1.0.5
[46] evtree_1.0-0 partykit_1.0-5 earth_4.4.4
[49] plotmo_3.1.4 TeachingDemos_2.10 plotrix_3.6-2
[52] deepboost_0.1.4 Boruta_5.0.0 ranger_0.5.0
[55] binda_1.0.3 entropy_1.2.1 bartMachine_1.2.3
[58] missForest_1.4 itertools_0.1-3 iterators_1.0.8
[61] foreach_1.4.3 randomForest_4.6-12 car_2.1-2
[64] bartMachineJARs_1.0 rJava_0.9-8 ada_2.0-5
[67] rpart_4.1-10 DT_0.1 caret_6.0-70
[70] ggplot2_2.1.0 lattice_0.20-33 mlbench_2.1-1
loaded via a namespace (and not attached):
[1] minqa_1.2.4 colorspace_1.2-6 MatrixModels_0.4-1
[4] cvTools_0.3.2 mvtnorm_1.0-5 codetools_0.2-14
[7] sROC_0.1-2 robustbase_0.92-6 nloptr_1.0.4
[10] robCompositions_2.0.0 pbkrtest_0.4-6 cluster_2.0.4
[13] compiler_3.3.1 rrcov_1.3-11 lars_1.2
[16] htmltools_0.3.5 quantreg_5.26 tools_3.3.1
[19] igraph_1.0.1 gtable_0.2.0 reshape2_1.4.1
[22] gdata_2.17.0 nlme_3.1-128 lmtest_0.9-34
[25] laeken_0.4.6 stringr_1.0.0 lme4_1.1-12
[28] gtools_3.5.0 DEoptimR_1.0-6 zoo_1.7-13
[31] scales_0.4.0 VIM_4.5.0 SparseM_1.7
[34] elasticnet_1.1 reshape_0.8.5 stringi_1.1.1
[37] e1071_1.6-7 caTools_1.17.1 boot_1.3-18
[40] chron_2.3-47 bitops_1.0-6 htmlwidgets_0.6
[43] GGally_1.2.0 magrittr_1.5 mgcv_1.8-12
[46] sp_1.2-3 KernSmooth_2.23-15 data.table_1.9.6
[49] vcd_1.4-1 digest_0.6.9 stats4_3.3.1
[52] munsell_0.4.3 quadprog_1.5-5