causal-inference-in-R
causal-inference-in-R copied to clipboard
16.02: Calculating estimates with G-Computation
- [ ] https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5008911/
Here is a post with IPCW vs g-comp for a specific DAG that is interesting: https://stats.stackexchange.com/questions/628315/correcting-for-selection-bias-with-standardisation-g-computation
library(tidyverse)
sim_data <- function(n,seed){
set.seed(seed)
A <- rbinom(n, 1, 0.5)
U <- rnorm(n)
L <- rbinom(n, 1, plogis(-0.5 + A + U))
C <- rbinom(n, 1, plogis(-1 + 3*L))
Y <- U + rnorm(n)
tibble(A,L,C,Y)
}
df <- sim_data(n=100000,seed=123)
# IPC weighting
C_model <-
glm(C==0 ~ L, data = df, family = binomial)
df$ipc_weights <-
1/predict(C_model, newdata = df,
type = "response")
Y_model <-
lm(Y ~ A,
data = filter(df, C == 0),
weights =
filter(df, C == 0)$ipc_weights)
coef(Y_model)["A"]
#0.0001701217
# G-computation
df_c <- filter(df, C == 0)
treated_L0 <-
mean(df_c[df_c$A == 1 & df_c$L==0,]$Y)
treated_L1 <-
mean(df_c[df_c$A == 1 & df_c$L==1,]$Y)
untreated_L0 <-
mean(df_c[df_c$A == 0 & df_c$L==0,]$Y)
untreated_L1 <-
mean(df_c[df_c$A == 0 & df_c$L==1,]$Y)
Pr_L0 <- sum(df$L==0)/nrow(df)
Pr_L1 <- 1 - Pr_L0
out <- ((treated_L0 - untreated_L0)*Pr_L0) +
((treated_L1 - untreated_L1)*Pr_L1)
out
# -0.1629754