ggmice icon indicating copy to clipboard operation
ggmice copied to clipboard

Plot the regression line based on multiply imputed data

Open mikemattaUH opened this issue 2 years ago • 4 comments

mikemattaUH avatar May 05 '22 17:05 mikemattaUH

I would like to have a way to plot the fitted lines from models fit from multiply-imputed data using a mice-oriented workflow. It would be great to include the pooled conditional mean line as well as the pooled conditional confidence interval bands. This would be a (very much needed) generalization of the process described by Dr. Solomon Kurz on his blog here: https://solomonkurz.netlify.app/post/2021-10-21-if-you-fit-a-model-with-multiply-imputed-data-you-can-still-plot-the-line/

mikemattaUH avatar May 05 '22 17:05 mikemattaUH

Link update: https://solomonkurz.netlify.app/blog/2021-10-21-if-you-fit-a-model-with-multiply-imputed-data-you-can-still-plot-the-line/

hanneoberman avatar Feb 09 '23 13:02 hanneoberman

Work in progress:

# load packages
# devtools::load_all()
library(ggmice)
library(ggplot2)
library(mice)
library(dplyr)
# load some data
dat <- nhanes
# visualize the incomplete data
ggmice(dat, aes(age, bmi)) + geom_point()

# impute the incomplete data
imp <- mice(dat, m = 10, seed = 1)
#>  ...
# visualize the imputed data
ggmice(imp, aes(age, bmi)) + geom_point()


# fit a model
fit <- with(imp, lm(bmi~age))
# tidy
tidy_fit <- function(fit){
  purrr::map_dfr(1:fit$call1$m, ~{
  broom::augment(fit$analyses[[.x]], conf.int = TRUE) %>%
    cbind(.imp = .x)
  })}
td <- tidy_fit(fit)

ggplot(td, aes(age, bmi)) +
  geom_point() +
  geom_line(aes(age, .fitted, group = .imp))


# pool estimates
est <- pool(fit)

ggplot(td, aes(age, bmi)) +
  # geom_point(aes(age, bmi)) +
  geom_line(aes(age, .fitted, group = .imp), color = "grey") +
  geom_abline(intercept = est$pooled[1, "estimate"], slope = est$pooled[2, "estimate"], linewidth = 1)


fit2 <- with(imp, glm(hyp - 1 ~ age, family = "binomial"))
est2 <- pool(fit2)
td <- tidy_fit(fit2)

ggplot(td, aes(age, `hyp - 1`)) +
  geom_point() +
  geom_line(aes(age, .fitted, group = .imp))


# ggplot(td, aes(age, bmi)) +
#   # geom_point(aes(age, bmi)) +
#   geom_line(aes(age, .fitted, group = .imp), color = "grey") +
#   geom_smooth(method = "lm", se = FALSE) +
#   theme_mice()

## TODO: re-create leiden85 survival curves
## TODO: check if this works with glm estimates

Created on 2023-12-19 with reprex v2.0.2

hanneoberman avatar Dec 19 '23 13:12 hanneoberman

Just the code:

# load packages
# devtools::load_all()
library(ggmice)
library(ggplot2)
library(mice)
library(dplyr)
# load some data
dat <- nhanes
# visualize the incomplete data
ggmice(dat, aes(age, bmi)) + geom_point()
# impute the incomplete data
imp <- mice(dat, m = 10, seed = 1)
# visualize the imputed data
ggmice(imp, aes(age, bmi)) + geom_point()

# fit a model
fit <- with(imp, lm(bmi~age))
# tidy
tidy_fit <- function(fit){
  purrr::map_dfr(1:fit$call1$m, ~{
  broom::augment(fit$analyses[[.x]], conf.int = TRUE) %>%
    cbind(.imp = .x)
  })}
td <- tidy_fit(fit)

ggplot(td, aes(age, bmi)) +
  geom_point() +
  geom_line(aes(age, .fitted, group = .imp))

# pool estimates
est <- pool(fit)

ggplot(td, aes(age, bmi)) +
  # geom_point(aes(age, bmi)) +
  geom_line(aes(age, .fitted, group = .imp), color = "grey") +
  geom_abline(intercept = est$pooled[1, "estimate"], slope = est$pooled[2, "estimate"], linewidth = 1)

fit2 <- with(imp, glm(hyp - 1 ~ age, family = "binomial"))
est2 <- pool(fit2)
td <- tidy_fit(fit2)

ggplot(td, aes(age, `hyp - 1`)) +
  geom_point() +
  geom_line(aes(age, .fitted, group = .imp))

# ggplot(td, aes(age, bmi)) +
#   # geom_point(aes(age, bmi)) +
#   geom_line(aes(age, .fitted, group = .imp), color = "grey") +
#   geom_smooth(method = "lm", se = FALSE) +
#   theme_mice()

## TODO: re-create leiden85 survival curves
## TODO: check if this works with glm estimates

hanneoberman avatar Dec 19 '23 13:12 hanneoberman