httr2 icon indicating copy to clipboard operation
httr2 copied to clipboard

Failure to download 0 byte file, in parallel: resp_has_body(x)) {: missing value where TRUE/FALSE needed

Open PietrH opened this issue 8 months ago • 3 comments

I'm downloading a bunch of files in parallel, some turn out to be 0 bytes.

Example of file: https://aloftdata.s3-eu-west-1.amazonaws.com/baltrad/hdf5/frbla/2021/02/28/frbla_vp_20210228T190000Z_0xb.h5

This works;

zero_byte_file_url <- "https://aloftdata.s3-eu-west-1.amazonaws.com/baltrad/hdf5/frbla/2021/02/28/frbla_vp_20210228T190000Z_0xb.h5"

# Fetch the file
httr2::request(zero_byte_file_url) |>
  httr2::req_perform(path = basename(zero_byte_file_url))

# Check if we downloaded the file
file.exists(basename(zero_byte_file_url))
# Clean up after ourselves
file.remove(basename(zero_byte_file_url))

This works as well:

# Function to create a list of requests, with retries
create_file_requests <- function(urls) {
  purrr::map(urls,
             ~httr2::req_retry(
               httr2::request(.x),
               max_tries = 10)
  )
}

# Fetch the file
create_file_requests(zero_byte_file_url) %>% 
  httr2::req_perform_sequential(paths = basename(zero_byte_file_url))

# Check if we downloaded the file
file.exists(basename(zero_byte_file_url))
# Clean up after ourselves
file.remove(basename(zero_byte_file_url))

However, this doesn't work in parallel. Initially I thought it might be due to the retry not being allowed in parallel (although the documentation claims it would just get ignored), but it doesn't work with this omitted either:

# Function to create a list of requests, no retries this time! 
create_file_requests_no_retry <- function(urls) {
  purrr::map(urls, 
             ~httr2::request(.x)
  )
}

# Fetch the file, this fails
create_file_requests_no_retry(zero_byte_file_url) |>
  httr2::req_perform_parallel(paths = basename(zero_byte_file_url))
# Check if we downloaded the file
file.exists(basename(zero_byte_file_url))

I trigger a condition in resp_has_body() that doesn't have a clear message:

GET
https://aloftdata.s3-eu-west-1.amazonaws.com/baltrad/hdf5/frbla/2021/02/28/frbla_vp_20210228T190000Z_0xb.h5
Status: 200 OK
Content-Type: binary/octet-stream
Error in if (!resp_has_body(x)) { : missing value where TRUE/FALSE needed

reprex

# single request ----------------------------------------------------------

zero_byte_file_url <- "https://aloftdata.s3-eu-west-1.amazonaws.com/baltrad/hdf5/frbla/2021/02/28/frbla_vp_20210228T190000Z_0xb.h5"

# Fetch the file
httr2::request(zero_byte_file_url) |>
  httr2::req_perform(path = basename(zero_byte_file_url))
#> <httr2_response>
#> GET
#> https://aloftdata.s3-eu-west-1.amazonaws.com/baltrad/hdf5/frbla/2021/02/28/frbla_vp_20210228T190000Z_0xb.h5
#> Status: 200 OK
#> Content-Type: binary/octet-stream
#> Body: None

# Check if we downloaded the file
file.exists(basename(zero_byte_file_url))
#> [1] TRUE
# Clean up after ourselves
file.remove(basename(zero_byte_file_url))
#> [1] TRUE


# sequentially ------------------------------------------------------------

# Function to create a list of requests, with retries
create_file_requests <- function(urls) {
  purrr::map(urls,
             ~httr2::req_retry(
               httr2::request(.x),
               max_tries = 10)
  )
}

# Fetch the file
create_file_requests(zero_byte_file_url) |>
  httr2::req_perform_sequential(paths = basename(zero_byte_file_url))
#> [[1]]
#> <httr2_response>
#> GET
#> https://aloftdata.s3-eu-west-1.amazonaws.com/baltrad/hdf5/frbla/2021/02/28/frbla_vp_20210228T190000Z_0xb.h5
#> Status: 200 OK
#> Content-Type: binary/octet-stream
#> Body: None

# Check if we downloaded the file
file.exists(basename(zero_byte_file_url))
#> [1] TRUE
# Clean up after ourselves
file.remove(basename(zero_byte_file_url))
#> [1] TRUE


# in parallel, no retries this time ---------------------------------------

create_file_requests_no_retry <- function(urls) {
  purrr::map(urls, 
             ~httr2::request(.x)
  )
}

# Fetch the file, this fails
create_file_requests_no_retry(zero_byte_file_url) |>
  httr2::req_perform_parallel(paths = basename(zero_byte_file_url))
#> [[1]]
#> <httr2_response>
#> GET
#> https://aloftdata.s3-eu-west-1.amazonaws.com/baltrad/hdf5/frbla/2021/02/28/frbla_vp_20210228T190000Z_0xb.h5
#> Status: 200 OK
#> Content-Type: binary/octet-stream
#> Error in if (!resp_has_body(x)) {: missing value where TRUE/FALSE needed
# Check if we downloaded the file
file.exists(basename(zero_byte_file_url))
#> [1] FALSE

Created on 2024-06-07 with reprex v2.1.0

PietrH avatar Jun 07 '24 14:06 PietrH