ggmosaic icon indicating copy to clipboard operation
ggmosaic copied to clipboard

Cross-Tabulation feature

Open nickvence opened this issue 4 years ago • 2 comments
trafficstars

It would be nice to print a count in the middle of sufficiently-large boxes.

nickvence avatar Mar 05 '21 22:03 nickvence

We'd be happy to accept a pull request :)

On Fri, Mar 5, 2021 at 4:51 PM Nicholas Vence [email protected] wrote:

It would be nice to print a count in the middle of sufficiently-large boxes.

— You are receiving this because you are subscribed to this thread. Reply to this email directly, view it on GitHub https://github.com/haleyjeppson/ggmosaic/issues/54, or unsubscribe https://github.com/notifications/unsubscribe-auth/AAAWVUSVZMDKLLJSAA732J3TCFN7TANCNFSM4YWAP5PA .

heike avatar Mar 05 '21 23:03 heike

@heike , @nickvence Maybe you find useful this code. Sadly, this has to be repeated every time, so it's worthwhile to think about making a function...

ggplot(data = my_data) +
  geom_mosaic(aes(x = product(Age, Gender), fill=Age)) +
  facet_wrap(~Level) +
  ggplot2::theme_bw() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  scale_fill_brewer(palette="Dark2") -> tmp_mosaic

mosaic_data <- ggplot_build(tmp_mosaic)$data[[1]]

mosaic_data %>% 
  rename_with(~ (gsub("x__", "", .x, fixed = TRUE))) %>% 
  rename_with(~ (gsub("fill__", "", .x, fixed = TRUE))) %>% 
  mutate(label = .wt,
         Level = factor(PANEL, labels=rev(unique(my_data$Level)))) -> mosaic_data


mosaic_data %>% group_by(Level, Gender) %>% 
  mutate(p = 100* .wt / sum(.wt),
         N = sum(.wt)) -> mosaic_data

tmp_mosaic +
  geom_text(data = mosaic_data,
            aes(x = (xmin+xmax)/2,
                y = (ymin+ymax)/2,
                label = sprintf("%d\n%.1f%%",.wt, p)))+
  geom_text(data = mosaic_data, aes(x=(xmin+xmax)/2, y=1.03, label=  N)) +
  theme(legend.position = "none")

The data:


my_data <- structure(list(Gender = c("Male", "Male", "Male", "Male", "Female", 
"Male", "Male", "Female", "Female", "Male", "Female", "Male", 
"Male", "Female", "Male", "Female", "Male", "Female", "Male", 
"Male", "Female", "Male", "Male", "Female", "Female", "Male", 
"Male", "Male", "Female", "Male", "Male", "Male", "Male", "Female", 
"Male", "Male", "Male", "Male", "Male", "Female", "Female", "Male", 
"Male", "Male", "Male", "Female", "Male", "Female", "Female", 
"Female", "Male", "Male", "Female", "Male", "Female", "Male", 
"Male", "Male", "Male", "Female", "Male", "Male", "Male", "Female", 
"Male", "Male", "Male", "Male", "Male", "Male", "Female", "Male", 
"Male", "Female", "Female", "Female", "Male", "Male", "Male", 
"Male", "Female", "Female", "Male", "Female", "Male", "Male", 
"Male", "Male", "Female", "Female", "Female", "Male", "Male", 
"Male", "Male", "Male", "Male", "Female", "Female", "Male"), 
    Age = c("<65", "<U+2265>65", "<U+2265>65", "<65", "<U+2265>65", 
    "<65", "<U+2265>65", "<U+2265>65", "<U+2265>65", "<65", "<65", 
    "<U+2265>65", "<U+2265>65", "<65", "<U+2265>65", "<U+2265>65", 
    "<65", "<U+2265>65", "<U+2265>65", "<U+2265>65", "<U+2265>65", 
    "<65", "<65", "<U+2265>65", "<65", "<U+2265>65", "<U+2265>65", 
    "<U+2265>65", "<U+2265>65", "<U+2265>65", "<U+2265>65", "<65", 
    "<65", "<U+2265>65", "<U+2265>65", "<65", "<U+2265>65", "<U+2265>65", 
    "<65", "<U+2265>65", "<U+2265>65", "<U+2265>65", "<U+2265>65", 
    "<U+2265>65", "<65", "<65", "<65", "<65", "<65", "<65", "<U+2265>65", 
    "<U+2265>65", "<65", "<65", "<U+2265>65", "<U+2265>65", "<U+2265>65", 
    "<65", "<65", "<65", "<65", "<U+2265>65", "<U+2265>65", "<U+2265>65", 
    "<65", "<65", "<U+2265>65", "<65", "<U+2265>65", "<65", "<U+2265>65", 
    "<U+2265>65", "<65", "<65", "<65", "<65", "<65", "<U+2265>65", 
    "<U+2265>65", "<U+2265>65", "<65", "<U+2265>65", "<65", "<U+2265>65", 
    "<U+2265>65", "<65", "<U+2265>65", "<65", "<65", "<65", "<U+2265>65", 
    "<U+2265>65", "<U+2265>65", "<U+2265>65", "<65", "<U+2265>65", 
    "<65", "<U+2265>65", "<U+2265>65", "<65"), Level = c("Level Y", 
    "Level Y", "Level X", "Level Y", "Level X", "Level X", "Level X", 
    "Level X", "Level Y", "Level Y", "Level X", "Level Y", "Level Y", 
    "Level Y", "Level X", "Level X", "Level Y", "Level X", "Level X", 
    "Level Y", "Level Y", "Level X", "Level X", "Level Y", "Level Y", 
    "Level Y", "Level Y", "Level Y", "Level X", "Level X", "Level X", 
    "Level X", "Level Y", "Level X", "Level X", "Level Y", "Level Y", 
    "Level X", "Level Y", "Level X", "Level X", "Level Y", "Level X", 
    "Level X", "Level X", "Level X", "Level X", "Level X", "Level X", 
    "Level X", "Level X", "Level Y", "Level Y", "Level X", "Level X", 
    "Level Y", "Level X", "Level X", "Level X", "Level X", "Level X", 
    "Level Y", "Level X", "Level Y", "Level Y", "Level Y", "Level X", 
    "Level Y", "Level Y", "Level X", "Level X", "Level Y", "Level X", 
    "Level X", "Level X", "Level X", "Level Y", "Level Y", "Level X", 
    "Level X", "Level X", "Level X", "Level X", "Level X", "Level Y", 
    "Level X", "Level Y", "Level X", "Level Y", "Level X", "Level X", 
    "Level X", "Level Y", "Level Y", "Level X", "Level Y", "Level X", 
    "Level X", "Level X", "Level Y")), row.names = c(NA, 100L
), class = "data.frame")

And the result: obraz


Version 2 - with % of row and column:

ggplot(data = my_data) +
  geom_mosaic(aes(x = product(Age, Gender), fill=Age)) +
  facet_wrap(~Level) +
  ggplot2::theme_bw() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
  scale_fill_brewer(palette="Dark2") -> tmp_mosaic

mosaic_data <- ggplot_build(tmp_mosaic)$data[[1]]

mosaic_data %>% 
  rename_with(~ (gsub("x__", "", .x, fixed = TRUE))) %>% 
  rename_with(~ (gsub("fill__", "", .x, fixed = TRUE))) %>% 
  mutate(label = .wt,
         Level = factor(PANEL, labels=rev(unique(my_data$Level)))) -> mosaic_data


mosaic_data %>%
  group_by(Level, Gender) %>% 
  mutate(p_col = 100* .wt / sum(.wt),
         N = sum(.wt)) %>% 
  ungroup() %>% 
  cbind(
    mosaic_data %>% 
      group_by(Level, Age) %>% 
      mutate(p_row = 100* .wt / sum(.wt),
             N = sum(.wt)) %>% 
      ungroup() %>% 
      dplyr::select(p_row)) -> mosaic_data

tmp_mosaic +
  geom_text(data = mosaic_data,
            aes(x = (xmin+xmax)/2,
                y = (ymin+ymax)/2,
                label = sprintf("%d\nCol: %.1f%%\nRow: %.1f%%",.wt, p_col, p_row)))+
  geom_text(data = mosaic_data, aes(x=(xmin+xmax)/2, y=1.03, label=  N)) +
  theme(legend.position = "none")

obraz

Would be easy to add % with respect to N_panel (facet_wrap/grid) or the N_total.

Generalized avatar Jun 21 '21 17:06 Generalized