glmnetUtils
glmnetUtils copied to clipboard
Inconcistent behavior with formulae with interactions depending on whether sparse=TRUE/FALSE
library(glmnetUtils)
set.seed(123456)
data <- data.frame(
y = rnorm(100),
x1 = rnorm(100),
f1 = factor(rbinom(100, 1, 0.5)),
f2 = factor(rbinom(100, 1, 0.5)),
f3 = factor(rbinom(100, 1, 0.5))
)
model_1 <- cv.glmnet(y ~ f1 * (f2 + f3), data = data)
head(predict(
model_1,
newdata = data,
type = "response",
s = "lambda.min"
))
#> lambda.min
#> 1 -0.1149001
#> 2 0.1461569
#> 3 -0.1149001
#> 4 0.1461569
#> 5 -0.2340375
#> 6 0.2652943
model_2 <-
cv.glmnet(y ~ f1 * (f2 + f3), sparse = TRUE, data = data)
head(predict(
model_2,
newdata = data,
type = "response",
s = "lambda.min"
))
#> lambda.min
#> 1 -0.1151198
#> 2 0.1459328
#> 3 -0.1151198
#> 4 0.1459328
#> 5 -0.2564479
#> 6 0.2872609
model_3 <- cv.glmnet(y ~ f1:(f2 + f3), data = data)
head(predict(
model_3,
newdata = data,
type = "response",
s = "lambda.min"
))
#> lambda.min
#> 1 -0.19260584
#> 2 -0.02163255
#> 3 -0.14659154
#> 4 0.14518262
#> 5 -0.16255188
#> 6 0.41037310
model_4 <- cv.glmnet(y ~ f1:(f2 + f3), sparse = TRUE, data = data)
head(predict(
model_4,
newdata = data,
type = "response",
s = "lambda.min"
))
#> Error in predict.glmnet(object$glmnet.fit, newx, s = lambda, ...): The number of variables in newx must be 8
model_5 <- cv.glmnet(y ~ x1:(f2 + f3), data = data)
head(predict(
model_3,
newdata = data,
type = "response",
s = "lambda.min"
))
#> lambda.min
#> 1 -0.19260584
#> 2 -0.02163255
#> 3 -0.14659154
#> 4 0.14518262
#> 5 -0.16255188
#> 6 0.41037310
model_6 <- cv.glmnet(y ~ x1:(f2 + f3), sparse = TRUE, data = data)
head(predict(
model_4,
newdata = data,
type = "response",
s = "lambda.min"
))
#> Error in predict.glmnet(object$glmnet.fit, newx, s = lambda, ...): The number of variables in newx must be 8
Created on 2023-05-15 with reprex v2.0.2
Session info:
> sessionInfo()
R version 4.2.1 (2022-06-23)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.6 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8
[6] LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats graphics grDevices datasets utils methods base
other attached packages:
[1] glmnetUtils_1.1.8
loaded via a namespace (and not attached):
[1] Rcpp_1.0.10 rstudioapi_0.14 knitr_1.42 splines_4.2.1 lattice_0.20-45 R6_2.5.1 rlang_1.1.1 fastmap_1.1.1
[9] foreach_1.5.2 tools_4.2.1 parallel_4.2.1 grid_4.2.1 glmnet_4.1-7 xfun_0.39 cli_3.6.1 withr_2.5.0
[17] htmltools_0.5.5 iterators_1.0.14 yaml_2.3.7 digest_0.6.31 survival_3.3-1 lifecycle_1.0.3 processx_3.8.1 Matrix_1.4-1
[25] callr_3.7.3 ps_1.7.5 fs_1.6.2 codetools_0.2-18 shape_1.4.6 glue_1.6.2 evaluate_0.21 rmarkdown_2.21
[33] reprex_2.0.2 compiler_4.2.1 renv_0.17.3
Updated example. Seems to occur specifically when using :
interaction terms but not *
interaction terms in the formula. Also when using brackets as shown above.