readr
readr copied to clipboard
readr::read_delim cannot detect a numeric column if the the column contains lot of NAs.
With readr::read_delim, there is a case where it failed to detect a numeric column if it contains many NAs.
- For https://www.dropbox.com/scl/fi/tnsj9vfcaryb7l9cq0eq1/can_load_numeric.csv, it is detected as numeric properly even if the column contains a lot of NAs.
- For https://www.dropbox.com/scl/fi/55ljrgu12poce7mzyjo6c/numeric_test_data.csv?rlkey=bhl27vm7353rrrcvmbr7creck&dl=1, it is NOT detected as a numeric column but as a logical column.
# Load a CSV file that can detect numeric column properly
df0 <- readr::read_delim("https://www.dropbox.com/scl/fi/tnsj9vfcaryb7l9cq0eq1/can_load_numeric.csv?rlkey=xw7a84wzs2mnqfgpaxyqe2968&dl=1")
#> Rows: 51290 Columns: 3
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> dbl (1): 売上
#> lgl (2): 製品名, 数量
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
class(df0$売上)
#> [1] "numeric"
# Load a CSV file that CANNOT detect numeric column properly
df1 <- readr::read_delim("https://www.dropbox.com/scl/fi/55ljrgu12poce7mzyjo6c/numeric_test_data.csv?rlkey=bhl27vm7353rrrcvmbr7creck&dl=1")
#> Warning: One or more parsing issues, call `problems()` on your data frame for details,
#> e.g.:
#> dat <- vroom(...)
#> problems(dat)
#> Rows: 70966 Columns: 3
#> ── Column specification ────────────────────────────────────────────────────────
#> Delimiter: ","
#> lgl (3): Data Source (File Name), メディアのエンゲージメント数(X), Cost per 1,000 people r...
#>
#> ℹ Use `spec()` to retrieve the full column specification for this data.
#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
class(df1$`メディアのエンゲージメント数(X)`)
#> [1] "logical"
<sup>Created on 2025-03-26 with [reprex v2.1.1](https://reprex.tidyverse.org)</sup>
"R version 4.4.0 (2024-04-24)",
"Platform: aarch64-apple-darwin20",
"Running under: macOS Sonoma 14.6.1",
"",
"Matrix products: default",
"BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib ",
"LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0",
"",
"locale:",
"[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US/en_US.UTF-8",
"",
"time zone: Asia/Kolkata",
"tzcode source: internal",
"",
"attached base packages:",
"[1] stats graphics grDevices utils datasets methods base ",
"",
"other attached packages:",
" [1] stringi_1.8.4 jsonlite_1.9.0 vroom_1.6.5 rmarkdown_2.29 ",
" [5] knitr_1.49 crayon_1.5.3 exploratory_12.0.4 zipangu_0.3.3 ",
" [9] bit64_4.6.0-1 bit_4.5.0.1 tibble_3.2.1 dplyr_1.1.4 ",
"[13] RcppRoll_0.3.1 forcats_1.0.0 cpp11_0.5.1 readr_2.1.5 ",
"[17] stringr_1.5.1 tidyr_1.3.1 hms_1.1.3 lubridate_1.9.4 ",
"[21] janitor_2.2.1 ",
"",
"loaded via a namespace (and not attached):",
" [1] gtable_0.3.6 prediction_0.3.14 xfun_0.51 ggplot2_3.5.1 ",
" [5] psych_2.4.12 lattice_0.22-6 tzdb_0.4.0 odbc_1.4.2.1 ",
" [9] vctrs_0.6.5 tools_4.4.0 generics_0.1.3 curl_6.2.1 ",
"[13] parallel_4.4.0 proxy_0.4-27 blob_1.2.4 pacman_0.5.1 ",
"[17] pkgconfig_2.0.3 data.table_1.17.0 dbplyr_2.5.0 assertthat_0.2.1 ",
"[21] lifecycle_1.0.4 compiler_4.4.0 progress_1.2.3 munsell_0.5.1 ",
"[25] mnormt_2.1.1 codetools_0.2-20 snakecase_0.11.1 sodium_1.4.0 ",
"[29] htmltools_0.5.8.1 pillar_1.10.1 RPostgres_1.4.6 iterators_1.0.14 ",
"[33] foreach_1.5.2 parallelly_1.42.0 nlme_3.1-164 tidyselect_1.2.1 ",
"[37] digest_0.6.37 future_1.34.0 listenv_0.9.1 reshape2_1.4.4 ",
"[41] purrr_1.0.4 arrow_18.1.0.1 RPresto_1.4.7 fastmap_1.2.0 ",
"[45] grid_4.4.0 colorspace_2.1-1 cli_3.6.4 magrittr_2.0.3 ",
"[49] triebeard_0.4.1 broom_1.0.7 withr_3.0.2 prettyunits_1.2.0",
"[53] scales_1.3.0 backports_1.5.0 timechange_0.3.0 httr_1.4.7 ",
"[57] globals_0.16.3 zoo_1.8-13 evaluate_1.0.3 dtw_1.23-1 ",
"[61] rlang_1.1.5 urltools_1.7.3 Rcpp_1.0.14 DBI_1.2.3 ",
"[65] glue_1.8.0 R6_2.6.1 anonymizer_0.2.2 plyr_1.8.9 "
],
Created on 2025-03-26 with reprex v2.1.1