glmnetUtils icon indicating copy to clipboard operation
glmnetUtils copied to clipboard

Inconcistent behavior with formulae with interactions depending on whether sparse=TRUE/FALSE

Open lrossouw opened this issue 1 year ago • 1 comments

library(glmnetUtils)

set.seed(123456)

data <- data.frame(
  y = rnorm(100),
  x1 = rnorm(100),
  f1 = factor(rbinom(100, 1, 0.5)),
  f2 = factor(rbinom(100, 1, 0.5)),
  f3 = factor(rbinom(100, 1, 0.5))
)

model_1 <- cv.glmnet(y ~ f1 * (f2 + f3), data  = data)

head(predict(
  model_1,
  newdata = data,
  type = "response",
  s = "lambda.min"
))
#>   lambda.min
#> 1 -0.1149001
#> 2  0.1461569
#> 3 -0.1149001
#> 4  0.1461569
#> 5 -0.2340375
#> 6  0.2652943

model_2 <-
  cv.glmnet(y ~ f1 * (f2 + f3), sparse = TRUE, data  = data)

head(predict(
  model_2,
  newdata = data,
  type = "response",
  s = "lambda.min"
))
#>   lambda.min
#> 1 -0.1151198
#> 2  0.1459328
#> 3 -0.1151198
#> 4  0.1459328
#> 5 -0.2564479
#> 6  0.2872609

model_3 <- cv.glmnet(y ~ f1:(f2 + f3), data  = data)

head(predict(
  model_3,
  newdata = data,
  type = "response",
  s = "lambda.min"
))
#>    lambda.min
#> 1 -0.19260584
#> 2 -0.02163255
#> 3 -0.14659154
#> 4  0.14518262
#> 5 -0.16255188
#> 6  0.41037310

model_4 <- cv.glmnet(y ~ f1:(f2 + f3), sparse = TRUE, data  = data)

head(predict(
  model_4,
  newdata = data,
  type = "response",
  s = "lambda.min"
))
#> Error in predict.glmnet(object$glmnet.fit, newx, s = lambda, ...): The number of variables in newx must be 8

model_5 <- cv.glmnet(y ~ x1:(f2 + f3), data  = data)

head(predict(
  model_3,
  newdata = data,
  type = "response",
  s = "lambda.min"
))
#>    lambda.min
#> 1 -0.19260584
#> 2 -0.02163255
#> 3 -0.14659154
#> 4  0.14518262
#> 5 -0.16255188
#> 6  0.41037310

model_6 <- cv.glmnet(y ~ x1:(f2 + f3), sparse = TRUE, data  = data)

head(predict(
  model_4,
  newdata = data,
  type = "response",
  s = "lambda.min"
))
#> Error in predict.glmnet(object$glmnet.fit, newx, s = lambda, ...): The number of variables in newx must be 8

Created on 2023-05-15 with reprex v2.0.2

Session info:

> sessionInfo()
R version 4.2.1 (2022-06-23)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.6 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=en_US.UTF-8   
 [6] LC_MESSAGES=en_US.UTF-8    LC_PAPER=en_US.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices datasets  utils     methods   base     

other attached packages:
[1] glmnetUtils_1.1.8

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.10      rstudioapi_0.14  knitr_1.42       splines_4.2.1    lattice_0.20-45  R6_2.5.1         rlang_1.1.1      fastmap_1.1.1   
 [9] foreach_1.5.2    tools_4.2.1      parallel_4.2.1   grid_4.2.1       glmnet_4.1-7     xfun_0.39        cli_3.6.1        withr_2.5.0     
[17] htmltools_0.5.5  iterators_1.0.14 yaml_2.3.7       digest_0.6.31    survival_3.3-1   lifecycle_1.0.3  processx_3.8.1   Matrix_1.4-1    
[25] callr_3.7.3      ps_1.7.5         fs_1.6.2         codetools_0.2-18 shape_1.4.6      glue_1.6.2       evaluate_0.21    rmarkdown_2.21  
[33] reprex_2.0.2     compiler_4.2.1   renv_0.17.3  

lrossouw avatar May 14 '23 18:05 lrossouw

Updated example. Seems to occur specifically when using : interaction terms but not * interaction terms in the formula. Also when using brackets as shown above.

lrossouw avatar May 14 '23 18:05 lrossouw