tmle3 icon indicating copy to clipboard operation
tmle3 copied to clipboard

problem with exercise 2 in TMLE chapter, stratified tmle

Open jbriggs7 opened this issue 3 years ago • 1 comments

`## ----tmle3-ex2---------------------------------------------------------------- ist_data <- fread( paste0( "https://raw.githubusercontent.com/tlverse/deming2019-workshop/", "master/data/ist_sample.csv" ) )

ist <- ist %>% mutate(REGION = as.factor(REGION))

----tmle3-node-list----------------------------------------------------------

node_list <- list( W = c( "RDELAY", "RCONSC", "SEX", "AGE",
"RSLEEP", "RATRIAL", "RCT", "RVISINF",
"RHEP24", "RASP3", "RSBP","RDEF1",
"RDEF2","RDEF3","RDEF4", "RDEF5",
"RDEF6", "RDEF7", "RDEF8", "STYPE",
"RXHEP","REGION", "MISSING_RATRIAL_RASP3","MISSING_RHEP24"
), A = "RXASP", Y = "DRSISC" )

----tmle3-ate-spec-----------------------------------------------------------

ate_spec <- tmle_ATE( treatment_level = 1, control_level = 0 )

----tmle3-learner-list-------------------------------------------------------

lrnr_mean <- make_learner(Lrnr_mean) lrnr_glmfast <- make_learner(Lrnr_glm_fast)

define metalearner appropriate to data types

metalearner <- make_learner( Lrnr_solnp, loss_function = loss_loglik_binomial, learner_function = metalearner_logistic_binomial )

sl_Y <- Lrnr_sl$new( learners = list(lrnr_mean, lrnr_glmfast), metalearner = metalearner ) sl_A <- Lrnr_sl$new( learners = list(lrnr_mean, lrnr_glmfast), metalearner = metalearner )

sl_Delta <- Lrnr_sl$new( learners = list(lrnr_mean, lrnr_glmfast), metalearner = metalearner )

learner_list <- list(A = sl_A, delta_Y = sl_Delta, Y = sl_Y)

----tmle3-spec-fit-----------------------------------------------------------

tmle_fit <- tmle3(ate_spec, ist, node_list, learner_list) print(tmle_fit)

----tmle3-spec-summary-------------------------------------------------------

node2 <- node_list node2$V = "REGION" node2$W <- setdiff(node_list$W, node2$V)

ist2 <- ist

tmle_spec <- tmle_stratified(ate_spec) stratified_fit <- tmle3(tmle_spec, ist2, node2, learner_list)

`

ERROR(S): stratified_fit <- tmle3(tmle_spec, ist2, node2, learner_list) Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 43 of j is 46 which is outside the column number range [1,ncol=45] Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 44 of j is 47 which is outside the column number range [1,ncol=46] Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 44 of j is 47 which is outside the column number range [1,ncol=46] Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 44 of j is 47 which is outside the column number range [1,ncol=46] Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 40 of j is 43 which is outside the column number range [1,ncol=42] Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 41 of j is 44 which is outside the column number range [1,ncol=43] Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 41 of j is 44 which is outside the column number range [1,ncol=43] Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 41 of j is 44 which is outside the column number range [1,ncol=43] Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 39 of j is 42 which is outside the column number range [1,ncol=40] Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 40 of j is 43 which is outside the column number range [1,ncol=41] Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 40 of j is 43 which is outside the column number range [1,ncol=41] Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 40 of j is 43 which is outside the column number range [1,ncol=41] Error in ind_ref_mat[as.numeric(x), , drop = FALSE] : incorrect number of dimensions Error in ind_ref_mat[as.numeric(x), , drop = FALSE] : incorrect number of dimensions Error in ind_ref_mat[as.numeric(x), , drop = FALSE] : incorrect number of dimensions Error in ind_ref_mat[as.numeric(x), , drop = FALSE] : incorrect number of dimensions Error in ind_ref_mat[as.numeric(x), , drop = FALSE] : incorrect number of dimensions Error in ind_ref_mat[as.numeric(x), , drop = FALSE] : incorrect number of dimensions Error in ind_ref_mat[as.numeric(x), , drop = FALSE] : incorrect number of dimensions Error in ind_ref_mat[as.numeric(x), , drop = FALSE] : incorrect number of dimensions Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 36 of j is 37 which is outside the column number range [1,ncol=36] Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 37 of j is 38 which is outside the column number range [1,ncol=37] Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 37 of j is 38 which is outside the column number range [1,ncol=37] Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 37 of j is 38 which is outside the column number range [1,ncol=37] Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 43 of j is 46 which is outside the column number range [1,ncol=45] Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 43 of j is 46 which is outside the column number range [1,ncol=45] Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 43 of j is 46 which is outside the column number range [1,ncol=45] Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 43 of j is 46 which is outside the column number range [1,ncol=45] Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 43 of j is 46 which is outside the column number range [1,ncol=45] Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 43 of j is 46 which is outside the column number range [1,ncol=45] Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 43 of j is 46 which is outside the column number range [1,ncol=45] Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 43 of j is 46 which is outside the column number range [1,ncol=45] Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 43 of j is 46 which is outside the column number range [1,ncol=45] Error in [.data.table(X, , which(!is.na(coef)), drop = FALSE, with = FALSE) : Item 43 of j is 46 which is outside the column number range [1,ncol=45] Error in self$subset_covariates(task) : Task missing the following covariates expected by Lrnr_solnp_TRUE_TRUE_FALSE_1e-05: Lrnr_glm_fast_TRUE_Cholesky Failed on predict Error in self$compute_step() : Error in self$subset_covariates(task) : Task missing the following covariates expected by Lrnr_solnp_TRUE_TRUE_FALSE_1e-05: Lrnr_glm_fast_TRUE_Cholesky

jbriggs7 avatar Mar 17 '21 20:03 jbriggs7

Cleaned up example with fix:

library(sl3)
library(tmle3)
library(tidyverse)
ist_data <- fread(
  paste0(
    "https://raw.githubusercontent.com/tlverse/deming2019-workshop/",
    "master/data/ist_sample.csv"
  )
)

ist <- ist_data %>% mutate(REGION = as.factor(REGION))

  node_list <- list(
    W = c(
      "RDELAY", "RCONSC", "SEX", "AGE",
      "RSLEEP", "RATRIAL", "RCT", "RVISINF",
      "RHEP24", "RASP3", "RSBP","RDEF1",
      "RDEF2","RDEF3","RDEF4", "RDEF5",
      "RDEF6", "RDEF7", "RDEF8", "STYPE",
      "RXHEP","REGION", "MISSING_RATRIAL_RASP3","MISSING_RHEP24"
    ),
    A = "RXASP",
    Y = "DRSISC"
  )

  ate_spec <- tmle_ATE(
    treatment_level = 1,
    control_level = 0
  )

lrnr_mean <- make_learner(Lrnr_mean)
lrnr_glmfast <- make_learner(Lrnr_glm_fast)

# define metalearner appropriate to data types
metalearner <- make_learner(
  Lrnr_solnp,
  loss_function = loss_loglik_binomial,
  learner_function = metalearner_logistic_binomial
)

sl_Y <- Lrnr_sl$new(
  learners = list(lrnr_mean, lrnr_glmfast),
  metalearner = metalearner
)
sl_A <- Lrnr_sl$new(
  learners = list(lrnr_mean, lrnr_glmfast),
  metalearner = metalearner
)

sl_Delta <- Lrnr_sl$new(
  learners = list(lrnr_mean, lrnr_glmfast),
  metalearner = metalearner
)

learner_list <- list(A = sl_A, delta_Y = sl_Delta, Y = sl_Y)

tmle_fit <- tmle3(ate_spec, ist, node_list, learner_list)

node2 <- node_list
node2$V = "REGION"
node2$W <- setdiff(node_list$W, node2$V)

ist2 <- ist

char_cols <- names(ist2)[sapply(ist2, is.character)]
for (char_col in char_cols) {
  set(ist2, , char_col, as.factor(unlist(ist2[, char_col, with = FALSE])))
}

stratified_fit <- tmle3(tmle_spec, ist2, node2, learner_list)

Adding a processing step analogous to sl3::process_data to tmle3_task would resolve this

jeremyrcoyle avatar Mar 17 '21 21:03 jeremyrcoyle