httr2
httr2 copied to clipboard
Failure to download 0 byte file, in parallel: resp_has_body(x)) {: missing value where TRUE/FALSE needed
I'm downloading a bunch of files in parallel, some turn out to be 0 bytes.
Example of file: https://aloftdata.s3-eu-west-1.amazonaws.com/baltrad/hdf5/frbla/2021/02/28/frbla_vp_20210228T190000Z_0xb.h5
This works;
zero_byte_file_url <- "https://aloftdata.s3-eu-west-1.amazonaws.com/baltrad/hdf5/frbla/2021/02/28/frbla_vp_20210228T190000Z_0xb.h5"
# Fetch the file
httr2::request(zero_byte_file_url) |>
httr2::req_perform(path = basename(zero_byte_file_url))
# Check if we downloaded the file
file.exists(basename(zero_byte_file_url))
# Clean up after ourselves
file.remove(basename(zero_byte_file_url))
This works as well:
# Function to create a list of requests, with retries
create_file_requests <- function(urls) {
purrr::map(urls,
~httr2::req_retry(
httr2::request(.x),
max_tries = 10)
)
}
# Fetch the file
create_file_requests(zero_byte_file_url) %>%
httr2::req_perform_sequential(paths = basename(zero_byte_file_url))
# Check if we downloaded the file
file.exists(basename(zero_byte_file_url))
# Clean up after ourselves
file.remove(basename(zero_byte_file_url))
However, this doesn't work in parallel. Initially I thought it might be due to the retry not being allowed in parallel (although the documentation claims it would just get ignored), but it doesn't work with this omitted either:
# Function to create a list of requests, no retries this time!
create_file_requests_no_retry <- function(urls) {
purrr::map(urls,
~httr2::request(.x)
)
}
# Fetch the file, this fails
create_file_requests_no_retry(zero_byte_file_url) |>
httr2::req_perform_parallel(paths = basename(zero_byte_file_url))
# Check if we downloaded the file
file.exists(basename(zero_byte_file_url))
I trigger a condition in resp_has_body()
that doesn't have a clear message:
GET
https://aloftdata.s3-eu-west-1.amazonaws.com/baltrad/hdf5/frbla/2021/02/28/frbla_vp_20210228T190000Z_0xb.h5
Status: 200 OK
Content-Type: binary/octet-stream
Error in if (!resp_has_body(x)) { : missing value where TRUE/FALSE needed
reprex
# single request ----------------------------------------------------------
zero_byte_file_url <- "https://aloftdata.s3-eu-west-1.amazonaws.com/baltrad/hdf5/frbla/2021/02/28/frbla_vp_20210228T190000Z_0xb.h5"
# Fetch the file
httr2::request(zero_byte_file_url) |>
httr2::req_perform(path = basename(zero_byte_file_url))
#> <httr2_response>
#> GET
#> https://aloftdata.s3-eu-west-1.amazonaws.com/baltrad/hdf5/frbla/2021/02/28/frbla_vp_20210228T190000Z_0xb.h5
#> Status: 200 OK
#> Content-Type: binary/octet-stream
#> Body: None
# Check if we downloaded the file
file.exists(basename(zero_byte_file_url))
#> [1] TRUE
# Clean up after ourselves
file.remove(basename(zero_byte_file_url))
#> [1] TRUE
# sequentially ------------------------------------------------------------
# Function to create a list of requests, with retries
create_file_requests <- function(urls) {
purrr::map(urls,
~httr2::req_retry(
httr2::request(.x),
max_tries = 10)
)
}
# Fetch the file
create_file_requests(zero_byte_file_url) |>
httr2::req_perform_sequential(paths = basename(zero_byte_file_url))
#> [[1]]
#> <httr2_response>
#> GET
#> https://aloftdata.s3-eu-west-1.amazonaws.com/baltrad/hdf5/frbla/2021/02/28/frbla_vp_20210228T190000Z_0xb.h5
#> Status: 200 OK
#> Content-Type: binary/octet-stream
#> Body: None
# Check if we downloaded the file
file.exists(basename(zero_byte_file_url))
#> [1] TRUE
# Clean up after ourselves
file.remove(basename(zero_byte_file_url))
#> [1] TRUE
# in parallel, no retries this time ---------------------------------------
create_file_requests_no_retry <- function(urls) {
purrr::map(urls,
~httr2::request(.x)
)
}
# Fetch the file, this fails
create_file_requests_no_retry(zero_byte_file_url) |>
httr2::req_perform_parallel(paths = basename(zero_byte_file_url))
#> [[1]]
#> <httr2_response>
#> GET
#> https://aloftdata.s3-eu-west-1.amazonaws.com/baltrad/hdf5/frbla/2021/02/28/frbla_vp_20210228T190000Z_0xb.h5
#> Status: 200 OK
#> Content-Type: binary/octet-stream
#> Error in if (!resp_has_body(x)) {: missing value where TRUE/FALSE needed
# Check if we downloaded the file
file.exists(basename(zero_byte_file_url))
#> [1] FALSE
Created on 2024-06-07 with reprex v2.1.0