readr icon indicating copy to clipboard operation
readr copied to clipboard

`readr::problems()` does not show results when `col_select()` is provided

Open andreranza opened this issue 1 year ago • 2 comments

I have encountered this issue with readr::problems() lately.

I have been using it in the past and it provided much help. But it seems not quite working with the col_select parameter.

Am I overseeing something here?

Thanks!

library(tidyverse)
sessionInfo()
#> R version 4.1.3 (2022-03-10)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Big Sur/Monterey 10.16
#> 
#> Matrix products: default
#> BLAS:   /opt/R/4.1.3/Resources/lib/libRblas.0.dylib
#> LAPACK: /opt/R/4.1.3/Resources/lib/libRlapack.dylib
#> 
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] forcats_0.5.2   stringr_1.4.1   dplyr_1.0.9     purrr_0.3.4    
#> [5] readr_2.1.2     tidyr_1.2.0     tibble_3.1.8    ggplot2_3.3.6  
#> [9] tidyverse_1.3.2
#> 
#> loaded via a namespace (and not attached):
#>  [1] lubridate_1.8.0     assertthat_0.2.1    digest_0.6.29      
#>  [4] utf8_1.2.2          R6_2.5.1            cellranger_1.1.0   
#>  [7] backports_1.4.1     reprex_2.0.2        evaluate_0.16      
#> [10] httr_1.4.4          highr_0.9           pillar_1.8.1       
#> [13] rlang_1.0.4         googlesheets4_1.0.1 readxl_1.4.1       
#> [16] rstudioapi_0.14     R.utils_2.12.0      R.oo_1.25.0        
#> [19] rmarkdown_2.16      styler_1.7.0        googledrive_2.0.0  
#> [22] munsell_0.5.0       broom_1.0.0         compiler_4.1.3     
#> [25] modelr_0.1.9        xfun_0.32           pkgconfig_2.0.3    
#> [28] htmltools_0.5.3     tidyselect_1.1.2    fansi_1.0.3        
#> [31] crayon_1.5.1        tzdb_0.3.0          dbplyr_2.2.1       
#> [34] withr_2.5.0         R.methodsS3_1.8.2   grid_4.1.3         
#> [37] jsonlite_1.8.0      gtable_0.3.0        lifecycle_1.0.1    
#> [40] DBI_1.1.3           magrittr_2.0.3      scales_1.2.1       
#> [43] cli_3.3.0           stringi_1.7.8       fs_1.5.2           
#> [46] xml2_1.3.3          ellipsis_0.3.2      generics_0.1.3     
#> [49] vctrs_0.4.1         tools_4.1.3         R.cache_0.16.0     
#> [52] glue_1.6.2          hms_1.1.2           fastmap_1.1.0      
#> [55] yaml_2.3.5          colorspace_2.0-3    gargle_1.2.0       
#> [58] rvest_1.0.3         knitr_1.40          haven_2.5.1

# `col_select` is not provided, `problems()` works well
df <- readr::read_delim(
  "~/some_data.csv",
  delim = ";",
  locale = readr::locale(encoding = "latin1")
)
#> New names:
#> • `` -> `...54`
#> Warning: One or more parsing issues, see `problems()` for details
#> Rows: 5519 Columns: 54
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ";"
#> chr  (48): Registrationsnummer, Projekt-Bezeichnung, Gebaeude-Bezeichnung, S...
#> dbl   (3): Projektnummer, BFS-Nummer, Anzahl Gebäude
#> lgl   (1): ...54
#> dttm  (2): Mutationsdatum, Definitives Zertifikat
#> 
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

dim(df)
#> [1] 5519   54

print(select(readr::problems(df), -file))
#> # A tibble: 4 × 4
#>     row   col expected        actual
#>   <int> <int> <chr>           <chr> 
#> 1  3703    19 date in ISO8601 " "   
#> 2  5039    19 date in ISO8601 " "   
#> 3  5040    19 date in ISO8601 " "   
#> 4  5041    19 date in ISO8601 " "

# `col_select` is provided, `problems()` seems not working
df2 <- readr::read_delim(
  "~/some_data.csv",
  delim = ";",
  col_select = -last_col(),
  locale = readr::locale(encoding = "latin1")
)
#> New names:
#> • `` -> `...54`
#> Warning: One or more parsing issues, see `problems()` for details
#> Rows: 5519 Columns: 53
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ";"
#> chr  (48): Registrationsnummer, Projekt-Bezeichnung, Gebaeude-Bezeichnung, S...
#> dbl   (3): Projektnummer, BFS-Nummer, Anzahl Gebäude
#> dttm  (2): Mutationsdatum, Definitives Zertifikat
#> 
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

print(select(readr::problems(df2)))
#> # A tibble: 0 × 0

# same as above
df3 <- readr::read_delim(
  "~/some_data.csv",
  delim = ";",
  col_select = everything(),
  locale = readr::locale(encoding = "latin1")
)
#> New names:
#> • `` -> `...54`
#> Warning: One or more parsing issues, see `problems()` for details
#> Rows: 5519 Columns: 54
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ";"
#> chr  (48): Registrationsnummer, Projekt-Bezeichnung, Gebaeude-Bezeichnung, S...
#> dbl   (3): Projektnummer, BFS-Nummer, Anzahl Gebäude
#> lgl   (1): ...54
#> dttm  (2): Mutationsdatum, Definitives Zertifikat
#> 
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

print(select(readr::problems(df3)))
#> # A tibble: 0 × 0

Created on 2022-09-14 with reprex v2.0.2

andreranza avatar Sep 14 '22 07:09 andreranza

I'm able to reproduce this

dates <- as.character(sample(
  seq(
    as.Date("2020/01/01"),
    as.Date("2022/12/31"),
    by = "day"
  ), 1001
))

values <- seq(1, 1001)

data <- tibble::tibble(date = dates, value = values)

# create a parsing error
data$date[1000] <- "20220907"

tf <- withr::local_tempfile()
write_csv(data, tf)

# supply col_select
my_data <- read_csv(tf, col_select = "date", show_col_types = FALSE)
#> Warning: One or more parsing issues, call `problems()` on your data frame
#> for details, e.g.:
#>   dat <- vroom(...)
#>   problems(dat)
problems(my_data)

# don't supply col_select
my_data <- read_csv(tf, show_col_types = FALSE)
#> Warning: One or more parsing issues, call `problems()` on your data frame
#> for details, e.g.:
#>   dat <- vroom(...)
#>   problems(dat)
problems(my_data)
#> # A tibble: 1 × 5
#>     row   col expected        actual   file                         
#>   <int> <int> <chr>           <chr>    <chr>                        
#> 1  1001     1 date in ISO8601 20220907 /private/var/folders/4g/9jcx…

sbearrows avatar Sep 19 '22 21:09 sbearrows