caret-machine-learning icon indicating copy to clipboard operation
caret-machine-learning copied to clipboard

Rborist hard crashes R under caret

Open tobigithub opened this issue 9 years ago • 0 comments

Hard crash with "R for Windows has stopped working"

# Use of all 160 caret models for binary classification and diabetes set
# The  output from  fast (working) binary classification models is
# exported to a sortable table in a web browser using the DT library
# https://github.com/tobigithub/caret-machine-learning
# Tobias Kind (2015)

# use mlbench, caret and DT library
require(mlbench)
require(caret)
require(DT)

# load diabetes set 768 x 9
data(PimaIndiansDiabetes) 
dim(PimaIndiansDiabetes) 

# get all model names for classification
m <- unique(modelLookup()[modelLookup()$forClass,c(1)])
length(m); m;

# slow classification models ("rbf" crashes; "dwdLinear", "ownn", "snn" have issues)
# all others may have just failed and are not listed here
#
removeModels <- c("AdaBag", "AdaBoost.M1", "FH.GBML", "pda2", "PenalizedLDA",
"GFS.GCCL", "rbf", "RFlda", "nodeHarvest", "ORFsvm", "dwdLinear", "dwdPoly", "gam",
"gaussprLinear", "ownn", "sddaLDA", "sddaQDA", "SLAVE", "smda", "snn", "rmda", 
"rFerns", "wsrf","ordinalNet","awnb", "awtan","manb","nbDiscrete","nbSearch","tan",
"tanSearch","bartMachine","randomGLM", "Rborist")

#remove all slow and failed models from model list
m <- m[!m %in% removeModels]

m <- c("rf","Rborist")

# pre-load all packages (does not really work due to other dependencies)
suppressPackageStartupMessages(ll <-lapply(m, require, character.only = TRUE))

# show which libraries were loaded  
sessionInfo()

# load X and Y (this will be transferred to to train function)
X = PimaIndiansDiabetes[1:40,1:8]
Y = PimaIndiansDiabetes$diabetes[1:40]

# register parallel front-end
library(doParallel); cl <- makeCluster(detectCores()); registerDoParallel(cl)

# this is required otherwise the first method is benchmarked wrong
warmup <-train(y=Y, x=X, "rf", trControl = trainControl(method = "boot632"))

# this setup actually calls the caret::train function, in order to provide
# minimal error handling this type of construct is needed.
trainCall <- function(i) 
    {
         cat("----------------------------------------------------","\n");
         set.seed(123); cat(i," <- loaded\n");
         return(tryCatch(
                t2 <- train(y=Y, x=X, (i), trControl = trainControl(method = "boot632")),
                error=function(e) NULL))
    }

# use lapply/loop to run everything, required for try/catch error function to work
t2 <- lapply(m, trainCall)

#remove NULL values, we only allow succesful methods, provenance is deleted.
t2 <- t2[!sapply(t2, is.null)]

# this setup extracts the results with minimal error handling 
# TrainKappa can be sometimes zero, but Accuracy SD can be still available
# see Kappa value http://epiville.ccnmtl.columbia.edu/popup/how_to_calculate_kappa.html
printCall <- function(i) 
    {
         return(tryCatch(
            {
             cat(sprintf("%-22s",(m[i])))
         cat(round(getTrainPerf(t2[[i]])$TrainAccuracy,4),"\t")
         cat(round(getTrainPerf(t2[[i]])$TrainKappa,4),"\t")
         cat(t2[[i]]$times$everything[3],"\n")},
             error=function(e) NULL))
    }

r2 <- lapply(1:length(t2), printCall)

# stop cluster and register sequntial front end
stopCluster(cl); registerDoSEQ();

# preallocate data types
i = 1; MAX = length(t2);
x1 <- character() # Name
x2 <- numeric()   # R2
x3 <- numeric()   # RMSE
x4 <- numeric()   # time [s]
x5 <- character() # long model name

# fill data and check indexes and NA with loop/lapply 
for (i in 1:length(t2)) {
    x1[i] <- t2[[i]]$method
    x2[i] <- as.numeric(round(getTrainPerf(t2[[i]])$TrainAccuracy,4))
    x3[i] <- as.numeric(round(getTrainPerf(t2[[i]])$TrainKappa,4))
    x4[i] <- as.numeric(t2[[i]]$times$everything[3])
    x5[i] <- t2[[i]]$modelInfo$label
}

# coerce to data frame
df1 <- data.frame(x1,x2,x3,x4,x5, stringsAsFactors=FALSE)

# print all results to R-GUI
df1

# plot models, just as example
# ggplot(t2[[1]])
# ggplot(t2[[1]])

# call web output with correct column names
datatable(df1,  options = list(
        columnDefs = list(list(className = 'dt-left', targets = c(0,1,2,3,4,5))),
        pageLength = MAX,
        order = list(list(2, 'desc'))),
        colnames = c('Num', 'Name', 'Accuracy', 'Kappa', 'time [s]', 'Model name'),
            caption = paste('Classification results from caret models',Sys.time()),
            class = 'cell-border stripe')  %>%         
            formatRound('x2', 3) %>%  
            formatRound('x3', 3) %>%
            formatRound('x4', 3) %>%
            formatStyle(2,
            background = styleColorBar(x2, 'steelblue'),
            backgroundSize = '100% 90%',
            backgroundRepeat = 'no-repeat',
            backgroundPosition = 'center'
)


### END
R version 3.3.1 (2016-06-21)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1

locale:
[1] LC_COLLATE=English_United States.1252 
[2] LC_CTYPE=English_United States.1252   
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C                          
[5] LC_TIME=English_United States.1252    

attached base packages:
 [1] parallel  splines   grid      stats     graphics  grDevices utils    
 [8] datasets  methods   base     

other attached packages:
 [1] plyr_1.8.4          wsrf_1.5.47         spls_2.2-1         
 [4] sparseLDA_0.1-7     sdwd_1.0.2          sda_1.3.7          
 [7] fdrtool_1.2.15      corpcor_1.6.8       rrlda_1.1          
[10] matrixcalc_1.0-3    glasso_1.8          mvoutlier_2.0.6    
[13] sgeostat_1.0-27     pcaPP_1.9-60        RRF_1.6            
[16] rpartScore_1.0-1    rotationForest_0.1  rocc_1.2           
[19] ROCR_1.0-7          gplots_3.0.1        Rborist_0.1-1      
[22] Rcpp_0.12.5         randomGLM_1.02-1    doParallel_1.0.10  
[25] MASS_7.3-45         protoclass_1.0      pls_2.5-0          
[28] partDSA_0.9.10      ordinalNet_1.4      oblique.tree_1.1.1 
[31] tree_1.0-37         nodeHarvest_0.7-3   nnet_7.3-12        
[34] mda_0.4-8           class_7.3-14        kknn_1.3.1         
[37] hda_0.2-14          gpls_1.44.0         glmnet_2.0-5       
[40] gbm_2.1.1           survival_2.39-4     gam_1.12           
[43] fda_2.4.4           Matrix_1.2-6        extraTrees_1.0.5   
[46] evtree_1.0-0        partykit_1.0-5      earth_4.4.4        
[49] plotmo_3.1.4        TeachingDemos_2.10  plotrix_3.6-2      
[52] deepboost_0.1.4     Boruta_5.0.0        ranger_0.5.0       
[55] binda_1.0.3         entropy_1.2.1       bartMachine_1.2.3  
[58] missForest_1.4      itertools_0.1-3     iterators_1.0.8    
[61] foreach_1.4.3       randomForest_4.6-12 car_2.1-2          
[64] bartMachineJARs_1.0 rJava_0.9-8         ada_2.0-5          
[67] rpart_4.1-10        DT_0.1              caret_6.0-70       
[70] ggplot2_2.1.0       lattice_0.20-33     mlbench_2.1-1      

loaded via a namespace (and not attached):
 [1] minqa_1.2.4           colorspace_1.2-6      MatrixModels_0.4-1   
 [4] cvTools_0.3.2         mvtnorm_1.0-5         codetools_0.2-14     
 [7] sROC_0.1-2            robustbase_0.92-6     nloptr_1.0.4         
[10] robCompositions_2.0.0 pbkrtest_0.4-6        cluster_2.0.4        
[13] compiler_3.3.1        rrcov_1.3-11          lars_1.2             
[16] htmltools_0.3.5       quantreg_5.26         tools_3.3.1          
[19] igraph_1.0.1          gtable_0.2.0          reshape2_1.4.1       
[22] gdata_2.17.0          nlme_3.1-128          lmtest_0.9-34        
[25] laeken_0.4.6          stringr_1.0.0         lme4_1.1-12          
[28] gtools_3.5.0          DEoptimR_1.0-6        zoo_1.7-13           
[31] scales_0.4.0          VIM_4.5.0             SparseM_1.7          
[34] elasticnet_1.1        reshape_0.8.5         stringi_1.1.1        
[37] e1071_1.6-7           caTools_1.17.1        boot_1.3-18          
[40] chron_2.3-47          bitops_1.0-6          htmlwidgets_0.6      
[43] GGally_1.2.0          magrittr_1.5          mgcv_1.8-12          
[46] sp_1.2-3              KernSmooth_2.23-15    data.table_1.9.6     
[49] vcd_1.4-1             digest_0.6.9          stats4_3.3.1         
[52] munsell_0.4.3         quadprog_1.5-5       

tobigithub avatar Jul 21 '16 07:07 tobigithub