causal-inference-in-R icon indicating copy to clipboard operation
causal-inference-in-R copied to clipboard

Think about collapsibility of the odds ratio

Open LucyMcGowan opened this issue 1 year ago • 1 comments

hmmm @malcolmbarrett this is reminding me of simulations I ran in my doctorate program, but looks like collapsibility is not fixed by IPW and in fact is worse?

library(tidyverse)
library(propensity)
library(survey)

set.seed(1)
n <- 10000

dat <- tibble(
  x_1 = rnorm(n),
  x_2 = rnorm(n),
  x_3 = rnorm(n),
  trt_lin = - 0.5 + 0.25 * x_1 + 0.75 * x_2,
  trt_p = exp(trt_lin) / (1 + exp(trt_lin)),
  treatment = rbinom(n, 1, trt_p),
  y_lin = - 3 + treatment + x_1 + x_2 + 2 * x_3,
  y_p = exp(y_lin) / (1 + exp(y_lin)),
  outcome = rbinom(n, 1, y_p)
)


dat %>%
  mutate(
    p = predict(glm(treatment ~ x_1 + x_2, data = dat, family = binomial()), type = "response"),
    wt = wt_atm(p, treatment)
  ) -> dat
#> ℹ Setting treatment to `1`

## collapsibility is bad (removing x_3 hurts even though not related to treatment)
glm(outcome ~ treatment + x_1 + x_2, data = dat, family = binomial)
#> 
#> Call:  glm(formula = outcome ~ treatment + x_1 + x_2, family = binomial, 
#>     data = dat)
#> 
#> Coefficients:
#> (Intercept)    treatment          x_1          x_2  
#>     -1.9825       0.7342       0.6274       0.6727  
#> 
#> Degrees of Freedom: 9999 Total (i.e. Null);  9996 Residual
#> Null Deviance:       9980 
#> Residual Deviance: 8465  AIC: 8473
## effect should be 1, but is 0.7342

## when we put x_3 in this is fixed
glm(outcome ~ treatment + x_1 + x_2 + x_3, data = dat, family = binomial)
#> 
#> Call:  glm(formula = outcome ~ treatment + x_1 + x_2 + x_3, family = binomial, 
#>     data = dat)
#> 
#> Coefficients:
#> (Intercept)    treatment          x_1          x_2          x_3  
#>     -3.0349       1.0268       0.9537       1.0201       1.9572  
#> 
#> Degrees of Freedom: 9999 Total (i.e. Null);  9995 Residual
#> Null Deviance:       9980 
#> Residual Deviance: 5710  AIC: 5720
## yay!

## let's try to fit the model with weighting
d <- svydesign(
  ~1,
  weights = ~wt,
  data = dat
)
svyglm(outcome ~ treatment, design = d, family = binomial)
#> Warning in eval(family$initialize): non-integer #successes in a binomial glm!
#> Independent Sampling design (with replacement)
#> svydesign(~1, weights = ~wt, data = dat)
#> 
#> Call:  svyglm(formula = outcome ~ treatment, design = d, family = binomial)
#> 
#> Coefficients:
#> (Intercept)    treatment  
#>     -1.6251       0.6821  
#> 
#> Degrees of Freedom: 9999 Total (i.e. Null);  9998 Residual
#> Null Deviance:       10600 
#> Residual Deviance: 10400     AIC: 12060
## uh oh, it's even worse! (0.6821 should be 1)

LucyMcGowan avatar Oct 21 '22 17:10 LucyMcGowan