readr
readr copied to clipboard
`readr::problems()` does not show results when `col_select()` is provided
I have encountered this issue with readr::problems()
lately.
I have been using it in the past and it provided much help. But it seems not quite working with the col_select
parameter.
Am I overseeing something here?
Thanks!
library(tidyverse)
sessionInfo()
#> R version 4.1.3 (2022-03-10)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Big Sur/Monterey 10.16
#>
#> Matrix products: default
#> BLAS: /opt/R/4.1.3/Resources/lib/libRblas.0.dylib
#> LAPACK: /opt/R/4.1.3/Resources/lib/libRlapack.dylib
#>
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] forcats_0.5.2 stringr_1.4.1 dplyr_1.0.9 purrr_0.3.4
#> [5] readr_2.1.2 tidyr_1.2.0 tibble_3.1.8 ggplot2_3.3.6
#> [9] tidyverse_1.3.2
#>
#> loaded via a namespace (and not attached):
#> [1] lubridate_1.8.0 assertthat_0.2.1 digest_0.6.29
#> [4] utf8_1.2.2 R6_2.5.1 cellranger_1.1.0
#> [7] backports_1.4.1 reprex_2.0.2 evaluate_0.16
#> [10] httr_1.4.4 highr_0.9 pillar_1.8.1
#> [13] rlang_1.0.4 googlesheets4_1.0.1 readxl_1.4.1
#> [16] rstudioapi_0.14 R.utils_2.12.0 R.oo_1.25.0
#> [19] rmarkdown_2.16 styler_1.7.0 googledrive_2.0.0
#> [22] munsell_0.5.0 broom_1.0.0 compiler_4.1.3
#> [25] modelr_0.1.9 xfun_0.32 pkgconfig_2.0.3
#> [28] htmltools_0.5.3 tidyselect_1.1.2 fansi_1.0.3
#> [31] crayon_1.5.1 tzdb_0.3.0 dbplyr_2.2.1
#> [34] withr_2.5.0 R.methodsS3_1.8.2 grid_4.1.3
#> [37] jsonlite_1.8.0 gtable_0.3.0 lifecycle_1.0.1
#> [40] DBI_1.1.3 magrittr_2.0.3 scales_1.2.1
#> [43] cli_3.3.0 stringi_1.7.8 fs_1.5.2
#> [46] xml2_1.3.3 ellipsis_0.3.2 generics_0.1.3
#> [49] vctrs_0.4.1 tools_4.1.3 R.cache_0.16.0
#> [52] glue_1.6.2 hms_1.1.2 fastmap_1.1.0
#> [55] yaml_2.3.5 colorspace_2.0-3 gargle_1.2.0
#> [58] rvest_1.0.3 knitr_1.40 haven_2.5.1
# `col_select` is not provided, `problems()` works well
df <- readr::read_delim(
"~/some_data.csv",
delim = ";",
locale = readr::locale(encoding = "latin1")
)
#> New names:
#> • `` -> `...54`
#> Warning: One or more parsing issues, see `problems()` for details
#> Rows: 5519 Columns: 54
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ";"
#> chr (48): Registrationsnummer, Projekt-Bezeichnung, Gebaeude-Bezeichnung, S...
#> dbl (3): Projektnummer, BFS-Nummer, Anzahl Gebäude
#> lgl (1): ...54
#> dttm (2): Mutationsdatum, Definitives Zertifikat
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
dim(df)
#> [1] 5519 54
print(select(readr::problems(df), -file))
#> # A tibble: 4 × 4
#> row col expected actual
#> <int> <int> <chr> <chr>
#> 1 3703 19 date in ISO8601 " "
#> 2 5039 19 date in ISO8601 " "
#> 3 5040 19 date in ISO8601 " "
#> 4 5041 19 date in ISO8601 " "
# `col_select` is provided, `problems()` seems not working
df2 <- readr::read_delim(
"~/some_data.csv",
delim = ";",
col_select = -last_col(),
locale = readr::locale(encoding = "latin1")
)
#> New names:
#> • `` -> `...54`
#> Warning: One or more parsing issues, see `problems()` for details
#> Rows: 5519 Columns: 53
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ";"
#> chr (48): Registrationsnummer, Projekt-Bezeichnung, Gebaeude-Bezeichnung, S...
#> dbl (3): Projektnummer, BFS-Nummer, Anzahl Gebäude
#> dttm (2): Mutationsdatum, Definitives Zertifikat
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(select(readr::problems(df2)))
#> # A tibble: 0 × 0
# same as above
df3 <- readr::read_delim(
"~/some_data.csv",
delim = ";",
col_select = everything(),
locale = readr::locale(encoding = "latin1")
)
#> New names:
#> • `` -> `...54`
#> Warning: One or more parsing issues, see `problems()` for details
#> Rows: 5519 Columns: 54
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ";"
#> chr (48): Registrationsnummer, Projekt-Bezeichnung, Gebaeude-Bezeichnung, S...
#> dbl (3): Projektnummer, BFS-Nummer, Anzahl Gebäude
#> lgl (1): ...54
#> dttm (2): Mutationsdatum, Definitives Zertifikat
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(select(readr::problems(df3)))
#> # A tibble: 0 × 0
Created on 2022-09-14 with reprex v2.0.2
I'm able to reproduce this
dates <- as.character(sample(
seq(
as.Date("2020/01/01"),
as.Date("2022/12/31"),
by = "day"
), 1001
))
values <- seq(1, 1001)
data <- tibble::tibble(date = dates, value = values)
# create a parsing error
data$date[1000] <- "20220907"
tf <- withr::local_tempfile()
write_csv(data, tf)
# supply col_select
my_data <- read_csv(tf, col_select = "date", show_col_types = FALSE)
#> Warning: One or more parsing issues, call `problems()` on your data frame
#> for details, e.g.:
#> dat <- vroom(...)
#> problems(dat)
problems(my_data)
# don't supply col_select
my_data <- read_csv(tf, show_col_types = FALSE)
#> Warning: One or more parsing issues, call `problems()` on your data frame
#> for details, e.g.:
#> dat <- vroom(...)
#> problems(dat)
problems(my_data)
#> # A tibble: 1 × 5
#> row col expected actual file
#> <int> <int> <chr> <chr> <chr>
#> 1 1001 1 date in ISO8601 20220907 /private/var/folders/4g/9jcx…