dtplyr
dtplyr copied to clipboard
Using `TRUE` in `case_when` causes error if output vector length does not equal group size .N
Originally posted by @KesterJ in https://github.com/tidyverse/dtplyr/issues/300#issuecomment-1036179822
I've encountered a version of this issue that doesn't involve &&
, and where group_by()
is called after lazy_dt()
. Reprex below:
library(dplyr, warn.conflicts = FALSE)
library(dtplyr, warn.conflicts = FALSE)
options(dplyr.summarise.inform = FALSE)
loans <- tibble(
borrower_id = c(1,1,1,1,2,2),
loan_id = c("A", "A", "B", "B", "C", "C"),
year = c(2020, 2021, 2020, 2021, 2020, 2021),
repayments = c(0, 0, 0, 200, 150, 50)
)
#In dplyr (works)
loans %>%
group_by(borrower_id, year) %>%
summarise(
status = case_when(any(repayments > 0) ~ "Made repayments",
TRUE ~ "Did not make any repayments")
) %>%
ungroup()
#> # A tibble: 4 x 3
#> borrower_id year status
#> <dbl> <dbl> <chr>
#> 1 1 2020 Did not make any repayments
#> 2 1 2021 Made repayments
#> 3 2 2020 Made repayments
#> 4 2 2021 Made repayments
#In dtplyr (does not work)
loans %>%
lazy_dt() %>%
group_by(borrower_id, year) %>%
summarise(
status = case_when(any(repayments > 0) ~ "Made repayments",
TRUE ~ "Did not make any repayments")
) %>%
ungroup() %>%
as_tibble()
#> Error in fcase(any(repayments > 0), "Made repayments", rep(TRUE, .N), : Argument #3 has a different length than argument #1. Please make sure all logical conditions have the same length.
#In dtplyr with different grouping that includes only one row per group (works)
loans %>%
lazy_dt() %>%
group_by(loan_id, year) %>%
summarise(
status = case_when(any(repayments > 0) ~ "Made repayments",
TRUE ~ "Did not make any repayments")
) %>%
ungroup() %>%
as_tibble()
#> # A tibble: 6 x 3
#> loan_id year status
#> <chr> <dbl> <chr>
#> 1 A 2020 Did not make any repayments
#> 2 A 2021 Did not make any repayments
#> 3 B 2020 Did not make any repayments
#> 4 B 2021 Made repayments
#> 5 C 2020 Made repayments
#> 6 C 2021 Made repayments
Created on 2022-02-11 by the reprex package (v2.0.1)
I think the only way to address this would be to assign the first argument and its length to variables, then pass them to fcase
with the TRUE
/T
s rep
licated the right number of times. But, that seems like a bad idea
library(dplyr, warn.conflicts = FALSE)
library(dtplyr, warn.conflicts = FALSE)
options(dplyr.summarise.inform = FALSE)
loans <- tibble(
borrower_id = c(1,1,1,1,2,2),
loan_id = c("A", "A", "B", "B", "C", "C"),
year = c(2020, 2021, 2020, 2021, 2020, 2021),
repayments = c(0, 0, 0, 200, 150, 50)
)
dtp_out <-
loans %>%
lazy_dt() %>%
group_by(borrower_id, year) %>%
summarise(
status = case_when(any(repayments > 0) ~ "Made repayments",
TRUE ~ "Did not make any repayments")
)
dtp_out %>%
ungroup() %>%
as_tibble()
#> # A tibble: 4 × 3
#> borrower_id year status
#> <dbl> <dbl> <chr>
#> 1 1 2020 Did not make any repayments
#> 2 1 2021 Made repayments
#> 3 2 2020 Made repayments
#> 4 2 2021 Made repayments
dtp_out %>%
show_query()
#> `_DT1`[, .(status = local({
#> .dtp_case_arg1 <- any(repayments > 0)
#> .dtp_case_len <- length(.dtp_case_arg1)
#> fcase(.dtp_case_arg1, "Made repayments", rep(TRUE, .dtp_case_len),
#> "Did not make any repayments")
#> })), keyby = .(borrower_id, year)]
Created on 2022-02-12 by the reprex package (v2.0.1)