r-polars
r-polars copied to clipboard
`pl$read_csv()` throws error for compressed files
When using pl$read_csv()
to read a gzipped file, an error message is thrown directing the user to "use read_csv
for compressed data" (which is what is being used). It looks like read_csv
in R is just a wrapper around scan_csv
. scan_csv
can't be used for compressed data, so this error is thrown for read_csv
as well.
I've added a reprex below. I think a previous version of r-polars let me use read_csv
on compressed data, but I could be wrong; this could also be breaking change on the Rust side.
library(polars)
if (!suppressMessages(require(R.utils))) stop("Need `R.utils` to gzip file")
# Version used
packageVersion("polars")
#> [1] '0.16.1'
# Make some data
pl_df <- pl$DataFrame(col1 = letters, col2 = 1:26)
# Save
path <- tempfile(fileext = "csv")
pl_df$write_csv(path)
# Compress
gz_path <- R.utils::gzip(path)
# Try to read back - throws error
pl$read_csv(gz_path)
#> Error: Execution halted with the following contexts
#> 0: In R: in pl$read_csv():
#> 0: During function call [base::tryCatch(base::withCallingHandlers({
#> NULL
#> base::saveRDS(base::do.call(base::do.call, base::c(base::readRDS("C:\\Users\\jsmith79\\AppData\\Local\\Temp\\RtmpMbY9aJ\\callr-fun-98e475e83a42"),
#> base::list(envir = .GlobalEnv, quote = TRUE)), envir = .GlobalEnv,
#> quote = TRUE), file = "C:\\Users\\jsmith79\\AppData\\Local\\Temp\\RtmpMbY9aJ\\callr-res-98e415445ef",
#> compress = FALSE)
#> base::flush(base::stdout())
#> base::flush(base::stderr())
#> NULL
#> base::invisible()
#> }, error = function(e) {
#> {
#> callr_data <- base::as.environment("tools:callr")$`__callr_data__`
#> err <- callr_data$err
#> if (FALSE) {
#> base::assign(".Traceback", base::.traceback(4), envir = callr_data)
#> utils::dump.frames("__callr_dump__")
#> base::assign(".Last.dump", .GlobalEnv$`__callr_dump__`,
#> envir = callr_data)
#> base::rm("__callr_dump__", envir = .GlobalEnv)
#> }
#> e <- err$process_call(e)
#> e2 <- err$new_error("error in callr subprocess")
#> class <- base::class
#> class(e2) <- base::c("callr_remote_error", class(e2))
#> e2 <- err$add_trace_back(e2)
#> cut <- base::which(e2$trace$scope == "global")[1]
#> if (!base::is.na(cut)) {
#> e2$trace <- e2$trace[-(1:cut), ]
#> }
#> base::saveRDS(base::list("error", e2, e), file = base::paste0("C:\\Users\\jsmith79\\AppData\\Local\\Temp\\RtmpMbY9aJ\\callr-res-98e415445ef",
#> ".error"))
#> }
#> }, interrupt = function(e) {
#> {
#> callr_data <- base::as.environment("tools:callr")$`__callr_data__`
#> err <- callr_data$err
#> if (FALSE) {
#> base::assign(".Traceback", base::.traceback(4), envir = callr_data)
#> utils::dump.frames("__callr_dump__")
#> base::assign(".Last.dump", .GlobalEnv$`__callr_dump__`,
#> envir = callr_data)
#> base::rm("__callr_dump__", envir = .GlobalEnv)
#> }
#> e <- err$process_call(e)
#> e2 <- err$new_error("error in callr subprocess")
#> class <- base::class
#> class(e2) <- base::c("callr_remote_error", class(e2))
#> e2 <- err$add_trace_back(e2)
#> cut <- base::which(e2$trace$scope == "global")[1]
#> if (!base::is.na(cut)) {
#> e2$trace <- e2$trace[-(1:cut), ]
#> }
#> base::saveRDS(base::list("error", e2, e), file = base::paste0("C:\\Users\\jsmith79\\AppData\\Local\\Temp\\RtmpMbY9aJ\\callr-res-98e415445ef",
#> ".error"))
#> }
#> }, callr_message = function(e) {
#> base::try(base::signalCondition(e))
#> }), error = function(e) {
#> NULL
#> if (FALSE) {
#> base::try(base::stop(e))
#> }
#> else {
#> base::invisible()
#> }
#> }, interrupt = function(e) {
#> NULL
#> if (FALSE) {
#> e
#> }
#> else {
#> base::invisible()
#> }
#> })]
#> 1: Encountered the following error in Rust-Polars:
#> cannot scan compressed csv; use `read_csv` for compressed data
Created on 2024-04-23 with reprex v2.1.0
Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.3.3 (2024-02-29 ucrt)
#> os Windows 11 x64 (build 22631)
#> system x86_64, mingw32
#> ui RTerm
#> language (EN)
#> collate English_United States.utf8
#> ctype English_United States.utf8
#> tz America/Chicago
#> date 2024-04-23
#> pandoc 3.1.1 @ C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date (UTC) lib source
#> cli 3.6.2 2023-12-11 [1] CRAN (R 4.3.3)
#> digest 0.6.35 2024-03-11 [1] CRAN (R 4.3.3)
#> evaluate 0.23 2023-11-01 [1] CRAN (R 4.3.3)
#> fastmap 1.1.1 2023-02-24 [1] CRAN (R 4.3.3)
#> fs 1.6.3 2023-07-20 [1] CRAN (R 4.3.3)
#> glue 1.7.0 2024-01-09 [1] CRAN (R 4.3.3)
#> htmltools 0.5.8.1 2024-04-04 [1] CRAN (R 4.3.3)
#> knitr 1.46 2024-04-06 [1] CRAN (R 4.3.3)
#> lifecycle 1.0.4 2023-11-07 [1] CRAN (R 4.3.3)
#> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.3.3)
#> polars * 0.16.1 2024-04-16 [1] https://r~
#> purrr 1.0.2 2023-08-10 [1] CRAN (R 4.3.3)
#> R.cache 0.16.0 2022-07-21 [1] CRAN (R 4.3.3)
#> R.methodsS3 * 1.8.2 2022-06-13 [1] CRAN (R 4.3.3)
#> R.oo * 1.26.0 2024-01-24 [1] CRAN (R 4.3.3)
#> R.utils * 2.12.3 2023-11-18 [1] CRAN (R 4.3.3)
#> reprex 2.1.0 2024-01-11 [1] CRAN (R 4.3.3)
#> rlang 1.1.3 2024-01-10 [1] CRAN (R 4.3.3)
#> rmarkdown 2.26 2024-03-05 [1] CRAN (R 4.3.3)
#> rstudioapi 0.16.0 2024-03-24 [1] CRAN (R 4.3.3)
#> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.3.3)
#> styler 1.10.3 2024-04-07 [1] CRAN (R 4.3.3)
#> vctrs 0.6.5 2023-12-01 [1] CRAN (R 4.3.3)
#> withr 3.0.0 2024-01-16 [1] CRAN (R 4.3.3)
#> xfun 0.43 2024-03-25 [1] CRAN (R 4.3.3)
#> yaml 2.3.8 2023-12-11 [1] CRAN (R 4.3.2)
#>
#> [1] D:/ProgramFiles/R/R-4.3.3/library
#>
#> ──────────────────────────────────────────────────────────────────────────────
Thanks for the report and the reprex, I confirm pl.read_csv()
on this gzipped file works. We need to see in the python code if they have a special path for this kind of file
Similar issue: pola-rs/polars-cli#60
Perhaps polars::prelude::CsvReader::new()
should be used here as in Python.
https://github.com/pola-rs/polars/blob/f1846a93f347b7967176d5f0276ad58584781bd6/py-polars/src/dataframe/io.rs#L89-L120
Contributions are welcome!
I'm happy to give it a go, actually! I've been dipping my toes into Rust for the past couple of months and looking for ways to put that to use.
Glad to hear that! Feel free to open WIP PRs.