tidyr icon indicating copy to clipboard operation
tidyr copied to clipboard

unnest_wider() fails when list columns have additional classes and missing values

Open allenbaron opened this issue 3 years ago • 7 comments
trafficstars

I recently upgraded from 1.1.4 to 1.2.0 and ran into an error with unnest_wider(). It now fails with a type conversion error when two occurrences overlap:

  1. the list column to be unnested is composed of list(s) with at least one empty object (NULL or list()).
  2. the list column to be unnested is composed of list(s) with an additional class.
library(tidyr)
#> Warning: package 'tidyr' was built under R version 4.1.2
library(tibble)

x <- list(a = 1, b = NULL)
df <- tibble::tibble(c = 1, d = list(x))
str(df)
#> tibble [1 × 2] (S3: tbl_df/tbl/data.frame)
#>  $ c: num 1
#>  $ d:List of 1
#>   ..$ :List of 2
#>   .. ..$ a: num 1
#>   .. ..$ b: NULL


# works
tidyr::unnest_wider(df, d)
#> # A tibble: 1 × 3
#>       c     a b    
#>   <dbl> <dbl> <lgl>
#> 1     1     1 NA


# does not work
class(x) <- c("fake", "list")
df <- tibble::tibble(c = 1, d = list(x))
tidyr::unnest_wider(df, d)
#> Error: Can't convert <list> to <fake>.


# works again
x$b <- 1
df <- tibble::tibble(c = 1, d = list(x))
tidyr::unnest_wider(df, d)
#> # A tibble: 1 × 3
#>       c     a     b
#>   <dbl> <dbl> <dbl>
#> 1     1     1     1

Created on 2022-02-25 by the reprex package (v2.0.1)

rlang::last_error()
rlang::last_error()
#> <error/vctrs_error_incompatible_type>
#> Can't convert <list> to <fake>.
#> Backtrace:
#>   1. tidyr::unnest_wider(df, d)
#>   2. tidyr:::df_simplify(...)
#>   3. tidyr:::col_simplify(...)
#>   4. tidyr:::list_init_empty(x, null = TRUE, typed = TRUE)
#>   5. vctrs::vec_assign(x, empty_null, replacement)
#>   7. vctrs::vec_default_cast(...)
#>   8. vctrs::stop_incompatible_cast(...)
#>   9. vctrs::stop_incompatible_type(...)
#>  10. vctrs:::stop_incompatible(...)
#>  11. vctrs:::stop_vctrs(...)
#> Run `rlang::last_trace()` to see the full context.
rlang::last_trace()
rlang::last_trace()
#> <error/vctrs_error_incompatible_type>
#> Can't convert <list> to <fake>.
#> Backtrace:
#>      █
#>   1. └─tidyr::unnest_wider(df, d)
#>   2.   └─tidyr:::df_simplify(...)
#>   3.     └─tidyr:::col_simplify(...)
#>   4.       └─tidyr:::list_init_empty(x, null = TRUE, typed = TRUE)
#>   5.         └─vctrs::vec_assign(x, empty_null, replacement)
#>   6.           └─(function () ...
#>   7.             └─vctrs::vec_default_cast(...)
#>   8.               └─vctrs::stop_incompatible_cast(...)
#>   9.                 └─vctrs::stop_incompatible_type(...)
#>  10.                   └─vctrs:::stop_incompatible(...)
#>  11.                     └─vctrs:::stop_vctrs(...)
Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value                       
#>  version  R version 4.1.1 (2021-08-10)
#>  os       macOS Big Sur 10.16         
#>  system   x86_64, darwin17.0          
#>  ui       X11                         
#>  language (EN)                        
#>  collate  en_US.UTF-8                 
#>  ctype    en_US.UTF-8                 
#>  tz       America/New_York            
#>  date     2022-02-25                  
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  ! package     * version date       lib source        
#>    assertthat    0.2.1   2019-03-21 [1] CRAN (R 4.1.0)
#>    backports     1.2.1   2020-12-09 [1] CRAN (R 4.1.0)
#>  P cli           3.0.1   2021-07-17 [?] CRAN (R 4.1.0)
#>    crayon        1.4.1   2021-02-08 [1] CRAN (R 4.1.0)
#>    DBI           1.1.1   2021-01-15 [1] CRAN (R 4.1.0)
#>    digest        0.6.27  2020-10-24 [1] CRAN (R 4.1.0)
#>  P dplyr         1.0.7   2021-06-18 [?] CRAN (R 4.1.0)
#>    ellipsis      0.3.2   2021-04-29 [1] CRAN (R 4.1.0)
#>    evaluate      0.14    2019-05-28 [1] CRAN (R 4.1.0)
#>    fansi         0.5.0   2021-05-25 [1] CRAN (R 4.1.0)
#>    fs            1.5.0   2020-07-31 [1] CRAN (R 4.1.0)
#>    generics      0.1.0   2020-10-31 [1] CRAN (R 4.1.0)
#>    glue          1.4.2   2020-08-27 [1] CRAN (R 4.1.0)
#>    highr         0.9     2021-04-16 [1] CRAN (R 4.1.0)
#>    htmltools     0.5.1.1 2021-01-22 [1] CRAN (R 4.1.0)
#>    knitr         1.33    2021-04-24 [1] CRAN (R 4.1.0)
#>    lifecycle     1.0.0   2021-02-15 [1] CRAN (R 4.1.0)
#>    magrittr      2.0.1   2020-11-17 [1] CRAN (R 4.1.0)
#>  P pillar        1.6.2   2021-07-29 [?] CRAN (R 4.1.0)
#>    pkgconfig     2.0.3   2019-09-22 [1] CRAN (R 4.1.0)
#>    purrr         0.3.4   2020-04-17 [1] CRAN (R 4.1.0)
#>  P R6            2.5.1   2021-08-19 [?] CRAN (R 4.1.0)
#>    reprex        2.0.1   2021-08-05 [1] CRAN (R 4.1.0)
#>    rlang         0.4.11  2021-04-30 [1] CRAN (R 4.1.0)
#>    rmarkdown     2.10    2021-08-06 [1] CRAN (R 4.1.0)
#>    rstudioapi    0.13    2020-11-12 [1] CRAN (R 4.1.0)
#>    sessioninfo   1.1.1   2018-11-05 [1] CRAN (R 4.1.0)
#>    stringi       1.7.3   2021-07-16 [1] CRAN (R 4.1.0)
#>    stringr       1.4.0   2019-02-10 [1] CRAN (R 4.1.0)
#>    styler        1.5.1   2021-07-13 [1] CRAN (R 4.1.0)
#>    tibble      * 3.1.3   2021-07-23 [1] CRAN (R 4.1.0)
#>    tidyr       * 1.2.0   2022-02-01 [1] CRAN (R 4.1.2)
#>    tidyselect    1.1.1   2021-04-30 [1] CRAN (R 4.1.0)
#>  P utf8          1.2.2   2021-07-24 [?] CRAN (R 4.1.0)
#>    vctrs         0.3.8   2021-04-29 [1] CRAN (R 4.1.0)
#>    withr         2.4.2   2021-04-18 [1] CRAN (R 4.1.0)
#>    xfun          0.25    2021-08-06 [1] CRAN (R 4.1.0)
#>    yaml          2.2.1   2020-02-01 [1] CRAN (R 4.1.0)
#> 
#> [1] /Library/Frameworks/R.framework/Versions/4.1/Resources/library
#> 
#>  P ── Loaded and on-disk path mismatch.

allenbaron avatar Feb 25 '22 14:02 allenbaron

Were you using some package that created a "fake" object? Or is this something you created yourself?

DavisVaughan avatar Feb 25 '22 15:02 DavisVaughan

Developer note:

This is due to this section of list_init_empty(): https://github.com/tidyverse/tidyr/blob/5da45d05b86004ffd8b32f5c39a46e05c75ee3b0/R/utils.R#L163-L166

We try to construct a replacement element for all of the NULL elements in the list, replacing them with their size 1 equivalents, i.e. unspecified(1). To use vec_assign() we have to wrap this in a list(), so it is the same type as x. But x here is a classed list, not just a bare list, and you don't automatically get casting methods from a bare list up to a classed list, so this doesn't work out of the box.

Ideally we'd be able to construct a replacement using the classed list, approximately like fake(unspecified(1)), but I'm not sure if that is viable in all cases.

It might be that this is the "best" we can do, and that developer who created "fake" should provide a list->fake cast method.

DavisVaughan avatar Feb 25 '22 15:02 DavisVaughan

I just created "fake" as a dummy class for the reprex (it shouldn't have any methods). The actual usage where I discovered this was in converting the esummary class of the rentrez package to a data frame. I've already created a work-around for my use case by resetting the esummary class to a list.

allenbaron avatar Feb 25 '22 15:02 allenbaron

Can you please also post a full reprex with rentrez to ensure that we fix that specific problem too? I appreciate the minimal reprex, that is definitely the right thing to do here, but it'll also be nice to know that we fixed this for rentrez too, which might be more complex

DavisVaughan avatar Feb 25 '22 15:02 DavisVaughan

library(rentrez)
library(tidyr)
#> Warning: package 'tidyr' was built under R version 4.1.2
library(tibble)

pmid <- "30407550"

x <- rentrez::entrez_summary(
    db = "pubmed",
    pmid,
    always_return_list = TRUE
)

df <- tibble::tibble(id = pmid, record = x)

tidyr::unnest_wider(df, record)
#> Error: Can't convert <list> to <esummary>.

Created on 2022-02-25 by the reprex package (v2.0.1)

allenbaron avatar Feb 25 '22 15:02 allenbaron

This stackoverflow post gives a more intuive example of this problem: https://stackoverflow.com/questions/71938462/unnest-doesnt-work-with-list-columns-any-more

renzheyu avatar Jul 12 '22 20:07 renzheyu

df <- mtcars %>% 
  nest(data = -c(cyl)) %>% 
  mutate(aov  = map(data, ~ aov(mpg ~ hp, data = .x))) %>% 
  mutate(dunned = map(data, ~ rstatix::dunn_test(mpg ~ hp, data = .x)))


df
# A tibble: 3 × 4
    cyl data               aov    dunned                 
  <dbl> <list>             <list> <list>                 
1     6 <tibble [7 × 10]>  <aov>  <rstatix_test [6 × 9]> 
2     4 <tibble [11 × 10]> <aov>  <rstatix_test [45 × 9]>
3     8 <tibble [14 × 10]> <aov>  <rstatix_test [36 × 9]>

And

df %>% unnest(aov) # Error: Input must be a vector, not a <aov/lm> object.
df %>% unnest_wider(aov) # Error: Input must be list of vectors
df %>% unnest_legacy(aov) # Error: Each column must either be a list of vectors or a list of data frames [lm]

renzheyu avatar Jul 12 '22 20:07 renzheyu

Actual reprex:

library(dplyr, warn.conflicts = FALSE)
library(tidyr)

df <- mtcars %>% 
  nest(data = -c(cyl)) %>% 
  mutate(aov  = lapply(data, \(df) aov(mpg ~ hp, data = df)))

df %>% unnest(aov)
#> Error in `list_sizes()`:
#> ! `x[[1]]` must be a vector, not a <aov/lm> object.
#> Backtrace:
#>      ▆
#>   1. ├─df %>% unnest(aov)
#>   2. ├─tidyr::unnest(., aov)
#>   3. ├─tidyr:::unnest.data.frame(., aov)
#>   4. │ └─tidyr::unchop(data, any_of(cols), keep_empty = keep_empty, ptype = ptype)
#>   5. │   └─tidyr:::df_unchop(cols, ptype = ptype, keep_empty = keep_empty)
#>   6. │     └─tidyr:::list_init_empty(x = col, null = TRUE, typed = keep_empty)
#>   7. │       └─vctrs::list_sizes(x)
#>   8. └─vctrs:::stop_scalar_type(`<fn>`(`<aov>`), "x[[1]]", `<env>`)
#>   9.   └─vctrs:::stop_vctrs(...)
#>  10.     └─rlang::abort(message, class = c(class, "vctrs_error"), ..., call = vctrs_error_call(call))

df %>% unnest_wider(aov)
#> Error in `unnest_wider()`:
#> ! List-column `aov` must contain only vectors.
#> Backtrace:
#>     ▆
#>  1. ├─df %>% unnest_wider(aov)
#>  2. └─tidyr::unnest_wider(., aov)
#>  3.   └─tidyr:::col_to_wide(...)
#>  4.     └─purrr::map(...)
#>  5.       └─tidyr (local) .f(.x[[i]], ...)
#>  6.         └─cli::cli_abort(...)
#>  7.           └─rlang::abort(...)

Created on 2022-10-19 with reprex v2.0.2

@renzheyu but that appears to be unrelated to this issue, and I think the errors are reasonably clear about telling you what the problem is.

hadley avatar Oct 19 '22 21:10 hadley