tmle3
tmle3 copied to clipboard
We should produce a warning when bounded continuous is used with density type outcomes
Param_ATT
is used in the example below but this seems to affect other params as well.
library(tmle3) ## tmle3 0.1.7
library(sl3) ## sl3 1.2.0
set.seed(9519)
A <- rbinom(10, 1, 0.3)
Y_unscaled <- 30*A + 20 + rnorm(10)
bounds <- c(min(Y_unscaled), max(Y_unscaled))
Y_scaled <- (Y_unscaled - bounds[1]) / (bounds[2] - bounds[1])
df <- data.frame(A, Y_unscaled, Y_scaled)
run_example <- function(npsem) {
lrnr_glm <- Lrnr_glm$new()
factor_list <- list(
define_lf(LF_fit, "A", lrnr_glm),
define_lf(LF_fit, "Y", lrnr_glm)
)
likelihood_def <- Likelihood$new(factor_list)
treatment_intervention <- define_lf(LF_static, "A", value = 1)
control_intervention <- define_lf(LF_static, "A", value = 0)
tmle_task <- tmle3_Task$new(df, npsem = npsem)
likelihood <- likelihood_def$train(tmle_task)
att <- define_param(
Param_ATT, likelihood,
treatment_intervention, control_intervention
)
updater <- tmle3_Update$new()
updater$tmle_params <- att
targeted_likelihood <- Targeted_Likelihood$new(likelihood, updater)
tmle_fit <- suppressWarnings(
fit_tmle3(tmle_task, targeted_likelihood, att, updater)
)
tmle_fit$estimates
}
npsem1 <- c(
define_node('A', 'A'),
define_node(
'Y', 'Y_scaled', c('A')
)
)
npsem2 <- c(
define_node('A', 'A'),
define_node(
'Y', 'Y_unscaled', c('A'),
variable_type('continuous', bounds = bounds)
)
)
npsem3 <- c(
define_node('A', 'A'),
define_node(
'Y', 'Y_unscaled', c('A'), scale = TRUE
)
)
run_example(npsem1) ## scaled
## [[1]]
## [[1]]$psi
## [1] 0.9450154
## [[1]]$IC
## [1] -0.019166418 -0.011467197 0.023985418 0.023000715 0.031983918
## [6] -0.034262603 -0.027609057 0.005902324 0.019357548 -0.011724648
run_example(npsem2) ## unscaled, specify variable_type
## [[1]]
## [[1]]$psi
## [1] 0.9450154
## [[1]]$IC
## [1] 48.63069 48.87336 -19.63742 49.95977 -19.38532 -21.47337 -21.26365
## [8] -20.20739 49.84494 48.86525
run_example(npsem3) ## unscaled, use 'scale' arg
## [[1]]
## [[1]]$psi
## [1] 0.9450154
## [[1]]$IC
## [1] 48.63069 48.87336 -19.63742 49.95977 -19.38532 -21.47337 -21.26365
## [8] -20.20739 49.84494 48.86525
Thanks for the detailed example. The issue can be solved by changing the NPSEM to specify that the Y likelihood factor is a mean E(Y|A) not a density p(Y|A) as follows:
factor_list <- list(
define_lf(LF_fit, "A", lrnr_glm),
define_lf(LF_fit, "Y", lrnr_glm, type="mean")
)
Bounded continuous only really makes sense for "mean-type" likelihood factors, not "density-type" likelihood factors (which are rarely used for outcome variables anyway). I'm going to leave the issue open but change the title to reflect the fact that we should produce a warning when bounded continuous is used with density type outcomes.
Thanks for the clarification!