paws icon indicating copy to clipboard operation
paws copied to clipboard

Spaces in S3 prefix

Open juliasilge opened this issue 1 year ago • 1 comments

I notice that #243 added support for spaces in keys but I am having trouble getting the objects back out if they are saved with spaces in the key.

Here I have a bucket where I have written mtcars both as "my-mtcars" and "my mtcars" (via pins as outlined in rstudio/pins-r#630). You can see them both if I do list_objects_v2():

library(paws)
#> Warning: package 'paws' was built under R version 4.0.5
svc <- s3()
my_bucket <- "sagemaker-rusted-americanquarterhorse"

svc$list_objects_v2(Bucket = my_bucket)
#> $IsTruncated
#> [1] FALSE
#> 
#> $Contents
#> $Contents[[1]]
#> $Contents[[1]]$Key
#> [1] "My key has many spaces"
#> 
#> $Contents[[1]]$LastModified
#> [1] "2022-08-10 18:19:14 GMT"
#> 
#> $Contents[[1]]$ETag
#> [1] "\"cc78575667603286a62436137080e84d\""
#> 
#> $Contents[[1]]$Size
#> [1] 17
#> 
#> $Contents[[1]]$StorageClass
#> [1] "STANDARD"
#> 
#> $Contents[[1]]$Owner
#> $Contents[[1]]$Owner$DisplayName
#> character(0)
#> 
#> $Contents[[1]]$Owner$ID
#> character(0)
#> 
#> 
#> 
#> $Contents[[2]]
#> $Contents[[2]]$Key
#> [1] "my mtcars/20220810T182101Z-ce918/data.txt"
#> 
#> $Contents[[2]]$LastModified
#> [1] "2022-08-10 18:21:02 GMT"
#> 
#> $Contents[[2]]$ETag
#> [1] "\"344f300b029b31b9a9b49e3aa9199a8f\""
#> 
#> $Contents[[2]]$Size
#> [1] 179
#> 
#> $Contents[[2]]$StorageClass
#> [1] "STANDARD"
#> 
#> $Contents[[2]]$Owner
#> $Contents[[2]]$Owner$DisplayName
#> character(0)
#> 
#> $Contents[[2]]$Owner$ID
#> character(0)
#> 
#> 
#> 
#> $Contents[[3]]
#> $Contents[[3]]$Key
#> [1] "my mtcars/20220810T182101Z-ce918/my mtcars.rds"
#> 
#> $Contents[[3]]$LastModified
#> [1] "2022-08-10 18:21:02 GMT"
#> 
#> $Contents[[3]]$ETag
#> [1] "\"35e05a7c2dec7b2b7edf6f56a0dbc239\""
#> 
#> $Contents[[3]]$Size
#> [1] 1217
#> 
#> $Contents[[3]]$StorageClass
#> [1] "STANDARD"
#> 
#> $Contents[[3]]$Owner
#> $Contents[[3]]$Owner$DisplayName
#> character(0)
#> 
#> $Contents[[3]]$Owner$ID
#> character(0)
#> 
#> 
#> 
#> $Contents[[4]]
#> $Contents[[4]]$Key
#> [1] "my-mtcars/20220810T182106Z-ce918/data.txt"
#> 
#> $Contents[[4]]$LastModified
#> [1] "2022-08-10 18:21:07 GMT"
#> 
#> $Contents[[4]]$ETag
#> [1] "\"4b4d29db1c2c6c705e95c7004a550089\""
#> 
#> $Contents[[4]]$Size
#> [1] 179
#> 
#> $Contents[[4]]$StorageClass
#> [1] "STANDARD"
#> 
#> $Contents[[4]]$Owner
#> $Contents[[4]]$Owner$DisplayName
#> character(0)
#> 
#> $Contents[[4]]$Owner$ID
#> character(0)
#> 
#> 
#> 
#> $Contents[[5]]
#> $Contents[[5]]$Key
#> [1] "my-mtcars/20220810T182106Z-ce918/my-mtcars.rds"
#> 
#> $Contents[[5]]$LastModified
#> [1] "2022-08-10 18:21:07 GMT"
#> 
#> $Contents[[5]]$ETag
#> [1] "\"35e05a7c2dec7b2b7edf6f56a0dbc239\""
#> 
#> $Contents[[5]]$Size
#> [1] 1217
#> 
#> $Contents[[5]]$StorageClass
#> [1] "STANDARD"
#> 
#> $Contents[[5]]$Owner
#> $Contents[[5]]$Owner$DisplayName
#> character(0)
#> 
#> $Contents[[5]]$Owner$ID
#> character(0)
#> 
#> 
#> 
#> 
#> $Name
#> [1] "sagemaker-rusted-americanquarterhorse"
#> 
#> $Prefix
#> character(0)
#> 
#> $Delimiter
#> character(0)
#> 
#> $MaxKeys
#> [1] 1000
#> 
#> $CommonPrefixes
#> list()
#> 
#> $EncodingType
#> character(0)
#> 
#> $KeyCount
#> [1] 5
#> 
#> $ContinuationToken
#> character(0)
#> 
#> $NextContinuationToken
#> character(0)
#> 
#> $StartAfter
#> character(0)

## works fine for this:
svc$list_objects_v2(Bucket = my_bucket, Prefix = "my-mtcars/")
#> $IsTruncated
#> [1] FALSE
#> 
#> $Contents
#> $Contents[[1]]
#> $Contents[[1]]$Key
#> [1] "my-mtcars/20220810T182106Z-ce918/data.txt"
#> 
#> $Contents[[1]]$LastModified
#> [1] "2022-08-10 18:21:07 GMT"
#> 
#> $Contents[[1]]$ETag
#> [1] "\"4b4d29db1c2c6c705e95c7004a550089\""
#> 
#> $Contents[[1]]$Size
#> [1] 179
#> 
#> $Contents[[1]]$StorageClass
#> [1] "STANDARD"
#> 
#> $Contents[[1]]$Owner
#> $Contents[[1]]$Owner$DisplayName
#> character(0)
#> 
#> $Contents[[1]]$Owner$ID
#> character(0)
#> 
#> 
#> 
#> $Contents[[2]]
#> $Contents[[2]]$Key
#> [1] "my-mtcars/20220810T182106Z-ce918/my-mtcars.rds"
#> 
#> $Contents[[2]]$LastModified
#> [1] "2022-08-10 18:21:07 GMT"
#> 
#> $Contents[[2]]$ETag
#> [1] "\"35e05a7c2dec7b2b7edf6f56a0dbc239\""
#> 
#> $Contents[[2]]$Size
#> [1] 1217
#> 
#> $Contents[[2]]$StorageClass
#> [1] "STANDARD"
#> 
#> $Contents[[2]]$Owner
#> $Contents[[2]]$Owner$DisplayName
#> character(0)
#> 
#> $Contents[[2]]$Owner$ID
#> character(0)
#> 
#> 
#> 
#> 
#> $Name
#> [1] "sagemaker-rusted-americanquarterhorse"
#> 
#> $Prefix
#> [1] "my-mtcars/"
#> 
#> $Delimiter
#> character(0)
#> 
#> $MaxKeys
#> [1] 1000
#> 
#> $CommonPrefixes
#> list()
#> 
#> $EncodingType
#> character(0)
#> 
#> $KeyCount
#> [1] 2
#> 
#> $ContinuationToken
#> character(0)
#> 
#> $NextContinuationToken
#> character(0)
#> 
#> $StartAfter
#> character(0)

## notice the + added here:
svc$list_objects_v2(Bucket = my_bucket, Prefix = "my mtcars/")
#> $IsTruncated
#> [1] FALSE
#> 
#> $Contents
#> list()
#> 
#> $Name
#> [1] "sagemaker-rusted-americanquarterhorse"
#> 
#> $Prefix
#> [1] "my+mtcars/"
#> 
#> $Delimiter
#> character(0)
#> 
#> $MaxKeys
#> [1] 1000
#> 
#> $CommonPrefixes
#> list()
#> 
#> $EncodingType
#> character(0)
#> 
#> $KeyCount
#> [1] 0
#> 
#> $ContinuationToken
#> character(0)
#> 
#> $NextContinuationToken
#> character(0)
#> 
#> $StartAfter
#> character(0)

Created on 2022-08-10 by the reprex package (v2.0.1)

When try to do list_objects_v2() with a prefix, it works fine if there are no spaces but it is not working correctly if I have a space.

juliasilge avatar Aug 10 '22 18:08 juliasilge

Ah sorry about that, i will have a look into it over the next couple of days 😄

DyfanJones avatar Aug 10 '22 20:08 DyfanJones

Hi @juliasilge,

I have identified the issue and PR #522 should resolve it. In the meantime feel free to use the dev branch to ensure everything is playing nicely with pins :)

remotes::install_github(repo = "DyfanJones/paws/paws.common/", ref = "s3-prefix-space")

DyfanJones avatar Aug 15 '22 15:08 DyfanJones

This works great now for me after installing from your branch!

library(pins)
#> Warning: package 'pins' was built under R version 4.0.5
library(paws)
#> Warning: package 'paws' was built under R version 4.0.5
b <- pins::board_s3(
  bucket = "sagemaker-rusted-americanquarterhorse", 
  region = "us-east-2"
  )

b %>% pin_write(mtcars, "my mtcars")
#> Guessing `type = 'rds'`
#> Creating new version '20220815T155219Z-ce918'
#> Writing to pin 'my mtcars'
b %>% pin_write(mtcars, "my-mtcars")
#> Guessing `type = 'rds'`
#> Creating new version '20220815T155219Z-ce918'
#> Writing to pin 'my-mtcars'
b %>% pin_list()
#> [1] "my mtcars" "my-mtcars"

Created on 2022-08-15 by the reprex package (v2.0.1)

Thank you so much! 🙏

juliasilge avatar Aug 15 '22 15:08 juliasilge

Hi @juliasilge,

paws.common 0.5.0 has been released onto the cran and addresses this issue. I will close this for now, but please re-open it if the problem presists.

DyfanJones avatar Sep 03 '22 12:09 DyfanJones