sitrep
sitrep copied to clipboard
put geom_squares() function in {epikit}
Adapted from {incidence} but makes it possible to use directly with {ggplot2} maintaining the use of scale_x_date() functions. Seems to work dates or month (presumably works with whatever geom_histogram() is fed to it.... but need to add tests.
Also need to re-structure so can used it with the ggplot2 + rather than %>%
pacman::p_load("rio", "tidyverse", "lubridate")
## define a function for plotting squares
## plot: ggplot object (geom_histogram)
## color: colour for the outline of the squares
## fill: colour for filling in the squares (default is NA)
## position: where the squares go (should inherit from the ggplot obj)
add_squares <- function(plot,
color = "black",
fill = NA,
position = "stack"
) {
df <- ggplot_build(plot)$data[[1]]
# define squares for plotting over
squaredf <- df[rep(seq.int(nrow(df)), df[["count"]]), ]
squaredf[["count"]] <- 1
squaredf <- mutate(squaredf,
x = as.Date(x, origin = "1970-01-01"))
## add the squares to the basic plot
plot +
geom_histogram(data = squaredf,
mapping = aes(x = x, y = count),
stat = "identity",
color = "black",
fill = NA,
position = "stack",
width = squaredf$xmax - squaredf$xmin
)
}
# file import
linelist <- import("https://github.com/appliedepi/epirhandbook/raw/master/inst/extdata/case_linelists/linelist_cleaned.rds")
# fix factor levels
linelist <- linelist %>%
mutate(outcome = fct_explicit_na(outcome, na_level = "Missing"),
outcome = fct_rev(outcome)
)
# linelist for central hospital
central_linelist <- linelist %>%
filter(hospital == "Central Hospital") %>%
mutate(epiweek = floor_date(date_onset, "week", week_start = 1),
month = floor_date(date_onset, "month")) %>%
select(date_onset, epiweek, outcome) %>%
arrange(date_onset)
############################### weekly #########################################
# weekly histo breaks for central hospital
weekly_breaks_central <- seq.Date(
from = floor_date(min(central_linelist$date_onset, na.rm=T) - 1, "week", week_start = 1), # monday before first case
to = ceiling_date(max(central_linelist$date_onset, na.rm=T) + 1, "week", week_start = 1), # monday after last case
by = "week")
# define total number
numz <- paste0("N = ", nrow(central_linelist))
# define caption dates
capz <- paste0("*Monday weeks from ",
min(central_linelist$date_onset, na.rm = TRUE) %>%
format("%d %B %Y"), " to ",
max(central_linelist$date_onset, na.rm = TRUE) %>%
format("%d %B %Y"),
". \n",
sum(is.na(central_linelist$date_onset)),
" cases missing date of onset and not shown.")
## use the counts dataset (feed geom_col)
basic_plot <- central_linelist %>%
ggplot() +
# bar chart (if plotting from aggregated counts)
geom_histogram(
# define what to plot and colour
mapping = aes(x = date_onset, fill = outcome),
# define the breaks to use
breaks = weekly_breaks_central,
# start end closed
closed = "left"
) +
# y-axis scale as before
scale_y_continuous(expand = c(0,0)) +
# x-axis scale sets efficient date labels
scale_x_date(
expand = c(0,0), # remove excess x-axis space below and after case bars
date_breaks = "months",
labels = scales::label_date_short()) + # auto efficient date labels
scale_fill_brewer(type = "div",
palette = 7) +
# labels and theme
labs(
# Alex: would stay away from defining as "incidence":
# while not technically wrong because it can be defined as cases/time-period,
# traditionalists would say it should be used as cases/population/time-period
title = "Weekly cases of disease X, by outcome",
subtitle = numz,
x = "Week of symptom onset*",
# Alex: dont need to say "weekly" here because the axis is should counts (small n)
# time period denoted by the x-axis
y = "Cases (n)",
fill = "Outcome",
# No need to repeat N here (as in title)
caption = capz)+
theme_classic(16)+
theme(legend.position = "right",
plot.caption = element_text(hjust=0, face = "italic"))
## add the squares to the basic plot
basic_plot %>%
add_squares()
############################### monthly #########################################
# monthly histo breaks for central hospital
monthly_breaks_central <- seq.Date(
from = floor_date(min(central_linelist$date_onset, na.rm=T) - 1, "month"),
to = ceiling_date(max(central_linelist$date_onset, na.rm=T) + 1, "month"),
by = "month")
# define total number
numz <- paste0("N = ", nrow(central_linelist))
# define caption dates
capz <- paste0("*Monday weeks from ",
min(central_linelist$date_onset, na.rm = TRUE) %>%
format("%d %B %Y"), " to ",
max(central_linelist$date_onset, na.rm = TRUE) %>%
format("%d %B %Y"),
". \n",
sum(is.na(central_linelist$date_onset)),
" cases missing date of onset and not shown.")
## use the counts dataset (feed geom_col)
basic_plot <- central_linelist %>%
ggplot() +
# bar chart (if plotting from aggregated counts)
geom_histogram(
# define what to plot and colour
mapping = aes(x = date_onset, fill = outcome),
# define the breaks to use
breaks = monthly_breaks_central,
# start end closed
closed = "left"
) +
# y-axis scale as before
scale_y_continuous(expand = c(0,0)) +
# x-axis scale sets efficient date labels
scale_x_date(
limits = c(min(monthly_breaks_central), max(monthly_breaks_central)),
expand = c(0,0), # remove excess x-axis space below and after case bars
date_breaks = "months",
labels = scales::label_date_short()) + # auto efficient date labels
scale_fill_brewer(type = "div",
palette = 7) +
# labels and theme
labs(
# Alex: would stay away from defining as "incidence":
# while not technically wrong because it can be defined as cases/time-period,
# traditionalists would say it should be used as cases/population/time-period
title = "Weekly cases of disease X, by outcome",
subtitle = numz,
x = "Week of symptom onset*",
# Alex: dont need to say "weekly" here because the axis is should counts (small n)
# time period denoted by the x-axis
y = "Cases (n)",
fill = "Outcome",
# No need to repeat N here (as in title)
caption = capz)+
theme_classic(16)+
theme(legend.position = "right",
plot.caption = element_text(hjust=0, face = "italic"))
## add the squares to the basic plot
basic_plot %>%
add_squares()