caret-machine-learning
caret-machine-learning copied to clipboard
blackboost kills rscript clients with memory bug
Example code results in hard crash only when "blackboost" is included. Parallel and sequential crash. "memory could not be written. Click on OK to terminate".
require(caret)
require(gbm)
require(randomForest)
# load iris set
data(iris)
dim(iris)
m <- c("rf" ,"gbm","blackboost")
# load X and Y (this will be transferred to to train function)
X = iris[,1:3]
Y = iris$Species
# register parallel front-end
library(doParallel); cl <- makeCluster(detectCores()); registerDoParallel(cl)
# this setup actually calls the caret::train function, in order to provide
# minimal error handling this type of construct is needed.
trainCall <- function(i)
{
cat("----------------------------------------------------","\n");
set.seed(123); cat(i," <- loaded\n");
return(tryCatch(
t2 <- train(y=Y, x=X, (i), trControl = trainControl(method = "cv")),
error=function(e) NULL))
}
# use lapply/loop to run everything, required for try/catch error function to work
t2 <- lapply(m, trainCall)
#remove NULL values, we only allow succesful methods, provenance is deleted.
t2 <- t2[!sapply(t2, is.null)]
# this setup extracts the results with minimal error handling
# TrainKappa can be sometimes zero, but Accuracy SD can be still available
# see Kappa value http://epiville.ccnmtl.columbia.edu/popup/how_to_calculate_kappa.html
printCall <- function(i)
{
return(tryCatch(
{
cat(sprintf("%-22s",(m[i])))
cat(round(getTrainPerf(t2[[i]])$TrainAccuracy,4),"\t")
cat(round(getTrainPerf(t2[[i]])$TrainKappa,4),"\t")
cat(t2[[i]]$times$everything[3],"\n")},
error=function(e) NULL))
}
r2 <- lapply(1:length(t2), printCall)
# stop cluster and register sequntial front end
stopCluster(cl); registerDoSEQ();
> sessionInfo()
R version 3.3.1 (2016-06-21)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1
locale:
[1] LC_COLLATE=English_United States.1252
[4] LC_NUMERIC=C
attached base packages:
[1] splines parallel stats4 grid stats graphics grDevices utils datasets methods base
other attached packages:
[1] gbm_2.1.1 survival_2.39-4 ada_2.0-5 rpart_4.1-10 DT_0.1 randomForest_4.6-12
[7] doParallel_1.0.10 iterators_1.0.8 foreach_1.4.3 plyr_1.8.4 mboost_2.6-0 stabs_0.5-1
[13] party_1.0-25 strucchange_1.5-1 sandwich_2.3-4 zoo_1.7-13 modeltools_0.2-21 mvtnorm_1.0-5
[19] caret_6.0-70 ggplot2_2.1.0 lattice_0.20-33
loaded via a namespace (and not attached):
[1] coin_1.1-2 reshape2_1.4.1 colorspace_1.2-6 htmltools_0.3.5 mgcv_1.8-12 e1071_1.6-7
[7] nloptr_1.0.4 multcomp_1.4-5 stringr_1.0.0 MatrixModels_0.4-1 munsell_0.4.3 gtable_0.2.0
[13] htmlwidgets_0.6 codetools_0.2-14 SparseM_1.7 quantreg_5.26 pbkrtest_0.4-6 class_7.3-14
[19] TH.data_1.0-7 Rcpp_0.12.5 scales_0.4.0 lme4_1.1-12 digest_0.6.9 stringi_1.1.1
[25] quadprog_1.5-5 tools_3.3.1 magrittr_1.5 car_2.1-2 MASS_7.3-45 Matrix_1.2-6
[31] pROC_1.8 nnls_1.4 minqa_1.2.4 nnet_7.3-12 nlme_3.1-128 compiler_3.3.1