Cubist icon indicating copy to clipboard operation
Cubist copied to clipboard

Models trained in v0.0.19 do not predict in v0.0.21

Open boudrejp opened this issue 7 years ago • 0 comments

I have models trained in cubist version 0.0.19 that do not predict similarly with cubist version 0.0.20. The sample code below is training a model on my machine in v0.0.19 installed from CRAN from User1, and then attempting to predict using a CRAN installation of v0.0.21 with User2. We looked at the differences in the code and suspected it had something to do with cubist.model$caseWeights, which looks new in v0.0.20, but we were not able to give a quick fix.

SAMPLE CODE:

> ###using Cubist 0.0.19, caret 6.0-77
> set.seed(387)
> library(Cubist)
> library(caret)
> 
> 
> model.data <- datasets::Theoph
> 
> prob <- 0.6
> 
> train.rows <- createDataPartition(model.data$conc, p = prob)$Resample1
> predictors <- colnames(model.data)[-c(1,5)]
> 
> cubist.v.19.model <- cubist(y = model.data$conc[train.rows], x = model.data[train.rows, predictors], committees = 2)
> 
> predictions.v.19 <- predict(cubist.v.19.model, newdata = model.data[-train.rows, predictors], neighbors = 1)
> 
> save(cubist.v.19.model, predictions.v.19, model.data, train.rows, predictors, file = "C:/Users/User1/Desktop/CubistExample.Rdata")
> 
> sessionInfo()

R version 3.4.2 (2017-09-28)
Platform: i386-w64-mingw32/i386 (32-bit)
Running under: Windows >= 8 x64 (build 9200)

Matrix products: default

locale:
[1] LC_COLLATE=English_United States.1252  LC_CTYPE=English_United States.1252   
[3] LC_MONETARY=English_United States.1252 LC_NUMERIC=C                          
[5] LC_TIME=English_United States.1252    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] caret_6.0-77    ggplot2_2.2.1   Cubist_0.0.19   lattice_0.20-35

loaded via a namespace (and not attached):
 [1] withr_2.0.0        class_7.3-14       pkgconfig_2.0.1    gower_0.1.2        compiler_3.4.2    
 [6] stringr_1.2.0      sfsmisc_1.1-1      dimRed_0.1.0       Rcpp_0.12.13       lava_1.5.1        
[11] plyr_1.8.4         DEoptimR_1.0-8     tools_3.4.2        MASS_7.3-47        nlme_3.1-131      
[16] R6_2.2.2           bindrcpp_0.2       purrr_0.2.3        bindr_0.1          kernlab_0.9-25    
[21] scales_0.5.0       nnet_7.3-12        assertthat_0.2.0   gtable_0.2.0       Matrix_1.2-11     
[26] foreach_1.4.3      stringi_1.1.5      ddalpha_1.3.1      reshape2_1.4.2     ModelMetrics_1.1.0
[31] rpart_4.1-11       munsell_0.4.3      grid_3.4.2         lubridate_1.6.0    colorspace_1.3-2  
[36] glue_1.1.1         timeDate_3012.100  rlang_0.1.2        magrittr_1.5       DRR_0.0.2         
[41] splines_3.4.2      stats4_3.4.2       survival_2.41-3    ipred_0.9-6        lazyeval_0.2.0    
[46] RcppRoll_0.2.2     prodlim_1.6.1      robustbase_0.92-7  dplyr_0.7.4        iterators_1.0.8   
[51] recipes_0.1.0      codetools_0.2-15   CVST_0.2-1         tibble_1.3.4      
> 
##################################################################
###########----     Sent to colleague, using Cubist v .20       ----###################
##################################################################
'
> load("C:/Users/User2/Desktop/CubistExample.Rdata")
> library(Cubist)
Loading required package: lattice
Warning message:
package ‘Cubist’ was built under R version 3.4.3 
> 
> set.seed(387)
> 
> # try predicting without setting model$caseWeights
> predictions.v.20_try1 <- predict(cubist.v.19.model, newdata = model.data[-train.rows, predictors], neighbors = 1)
Error in if (object$caseWeights) newdata$case_weight_pred <- NA : 
  argument is of length zero
> predictions.v.19 == predictions.v.20_try1
Error: object 'predictions.v.20_try1' not found
> 
> # try predicting with setting model$caseWeights = FALSE
> cubist.v.19.model$caseWeights <- FALSE
> predictions.v.20_try2 <- predict(cubist.v.19.model, newdata = model.data[-train.rows, predictors], neighbors = 1)
> predictions.v.19 == predictions.v.20_try2
 [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
[16]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
[31]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
[46] FALSE  TRUE  TRUE FALSE  TRUE  TRUE
> 
> # try predicting with setting model$caseWeights = TRUE
> cubist.v.19.model$caseWeights <- FALSE
> predictions.v.20_try3 <- predict(cubist.v.19.model, newdata = model.data[-train.rows, predictors], neighbors = 1)
> predictions.v.19 == predictions.v.20_try3
 [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
[16]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
[31]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
[46] FALSE  TRUE  TRUE FALSE  TRUE  TRUE
> 
> 
> sessionInfo()
R version 3.4.2 (2017-09-28)
Platform: i386-w64-mingw32/i386 (32-bit)
Running under: Windows >= 8 x64 (build 9200)

Matrix products: default

locale:
[1] LC_COLLATE=English_South Africa.1252  LC_CTYPE=English_South Africa.1252   
[3] LC_MONETARY=English_South Africa.1252 LC_NUMERIC=C                         
[5] LC_TIME=English_South Africa.1252    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] Cubist_0.2.1    lattice_0.20-35

loaded via a namespace (and not attached):
[1] compiler_3.4.2 magrittr_1.5   plyr_1.8.4     tools_3.4.2    reshape2_1.4.2 Rcpp_0.12.14  
[7] stringi_1.1.5  grid_3.4.2     stringr_1.2.0 

boudrejp avatar Jan 16 '18 14:01 boudrejp