gcsfs icon indicating copy to clipboard operation
gcsfs copied to clipboard

rm fails on path with leading slash

Open chiaral opened this issue 3 years ago • 1 comments

What happened:

I tried to remove an object from a Google Cloud Storage. The operation works if I have no leading slash, i.e.

pangeo-scratch/chiaral/test_path/foo

It fails with a leading slash

/pangeo-scratch/chiaral/test_path/foo

However, I can write / list / read the object with or without the leading slash.

What you expected to happen:

Consistent path-related errors independent of the operation (open, rm, ls, info).

Minimal Complete Verifiable Example:

import gcsfs
import os

gcs = gcsfs.GCSFileSystem()

def write_then_rm(path):
    with gcs.open(path, 'wb') as f:
        f.write(b'bar')
    info = gcs.info(path)
    print(info)
    gcs.rm(path)
    
write_then_rm('pangeo-scratch/chiaral/test_path/foo')
write_then_rm('/pangeo-scratch/chiaral/test_path/foo')

Output:

{'kind': 'storage#object', 'id': 'pangeo-scratch/chiaral/test_path/foo/1604072153072543', 'selfLink': 'https://www.googleapis.com/storage/v1/b/pangeo-scratch/o/chiaral%2Ftest_path%2Ffoo', 'mediaLink': 'https://www.googleapis.com/download/storage/v1/b/pangeo-scratch/o/chiaral%2Ftest_path%2Ffoo?generation=1604072153072543&alt=media', 'name': 'pangeo-scratch/chiaral/test_path/foo', 'bucket': 'pangeo-scratch', 'generation': '1604072153072543', 'metageneration': '1', 'contentType': 'application/octet-stream', 'storageClass': 'STANDARD', 'size': 3, 'md5Hash': 'N7UdGUp1E+RbVvZSTy1R8g==', 'crc32c': 'CrcTMQ==', 'etag': 'CJ+Hg7fS3OwCEAE=', 'timeCreated': '2020-10-30T15:35:53.072Z', 'updated': '2020-10-30T15:35:53.072Z', 'timeStorageClassUpdated': '2020-10-30T15:35:53.072Z', 'type': 'file'}
{'kind': 'storage#object', 'id': 'pangeo-scratch/chiaral/test_path/foo/1604072153254208', 'selfLink': 'https://www.googleapis.com/storage/v1/b/pangeo-scratch/o/chiaral%2Ftest_path%2Ffoo', 'mediaLink': 'https://www.googleapis.com/download/storage/v1/b/pangeo-scratch/o/chiaral%2Ftest_path%2Ffoo?generation=1604072153254208&alt=media', 'name': 'pangeo-scratch/chiaral/test_path/foo', 'bucket': 'pangeo-scratch', 'generation': '1604072153254208', 'metageneration': '1', 'contentType': 'application/octet-stream', 'storageClass': 'STANDARD', 'size': 3, 'md5Hash': 'N7UdGUp1E+RbVvZSTy1R8g==', 'crc32c': 'CrcTMQ==', 'etag': 'CMCSjrfS3OwCEAE=', 'timeCreated': '2020-10-30T15:35:53.253Z', 'updated': '2020-10-30T15:35:53.253Z', 'timeStorageClassUpdated': '2020-10-30T15:35:53.253Z', 'type': 'file'}
---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
<ipython-input-6-41cc126f1383> in <module>
     13 write_then_rm('pangeo-scratch/chiaral/test_path/foo')
     14 
---> 15 write_then_rm('/pangeo-scratch/chiaral/test_path/foo')

<ipython-input-6-41cc126f1383> in write_then_rm(path)
      9     info = gcs.info(path)
     10     print(info)
---> 11     gcs.rm(path)
     12 
     13 write_then_rm('pangeo-scratch/chiaral/test_path/foo')

/srv/conda/envs/notebook/lib/python3.8/site-packages/gcsfs/core.py in rm(self, path, recursive, batchsize)
   1183     def rm(self, path, recursive=False, batchsize=20):
   1184         paths = self.expand_path(path, recursive=recursive)
-> 1185         sync(self.loop, self._rm, paths, batchsize=batchsize)
   1186 
   1187     def _open(

/srv/conda/envs/notebook/lib/python3.8/site-packages/fsspec/asyn.py in sync(loop, func, callback_timeout, *args, **kwargs)
     69     if error[0]:
     70         typ, exc, tb = error[0]
---> 71         raise exc.with_traceback(tb)
     72     else:
     73         return result[0]

/srv/conda/envs/notebook/lib/python3.8/site-packages/fsspec/asyn.py in f()
     53             if callback_timeout is not None:
     54                 future = asyncio.wait_for(future, callback_timeout)
---> 55             result[0] = await future
     56         except Exception:
     57             error[0] = sys.exc_info()

/srv/conda/envs/notebook/lib/python3.8/site-packages/gcsfs/core.py in _rm(self, paths, batchsize)
   1003         exs = [ex for ex in exs if ex is not None and "No such object" not in str(ex)]
   1004         if exs:
-> 1005             raise exs[0]
   1006         await asyncio.gather(*[self._rmdir(d) for d in dirs])
   1007 

/srv/conda/envs/notebook/lib/python3.8/site-packages/gcsfs/core.py in _rm_files(self, paths)
    987             pattern = '"message": "([^"]+)"'
    988             out = set(re.findall(pattern, txt))
--> 989             raise OSError(out)
    990 
    991     async def _rm(self, paths, batchsize):

OSError: set()

Environment:

  • gcsfs version: '0.7.1+4.g77b5993'
  • fsspec version: '0.8.4'

chiaral avatar Oct 30 '20 15:10 chiaral