gcsfs
gcsfs copied to clipboard
rm fails on path with leading slash
What happened:
I tried to remove an object from a Google Cloud Storage. The operation works if I have no leading slash, i.e.
pangeo-scratch/chiaral/test_path/foo
It fails with a leading slash
/pangeo-scratch/chiaral/test_path/foo
However, I can write / list / read the object with or without the leading slash.
What you expected to happen:
Consistent path-related errors independent of the operation (open, rm, ls, info).
Minimal Complete Verifiable Example:
import gcsfs
import os
gcs = gcsfs.GCSFileSystem()
def write_then_rm(path):
with gcs.open(path, 'wb') as f:
f.write(b'bar')
info = gcs.info(path)
print(info)
gcs.rm(path)
write_then_rm('pangeo-scratch/chiaral/test_path/foo')
write_then_rm('/pangeo-scratch/chiaral/test_path/foo')
Output:
{'kind': 'storage#object', 'id': 'pangeo-scratch/chiaral/test_path/foo/1604072153072543', 'selfLink': 'https://www.googleapis.com/storage/v1/b/pangeo-scratch/o/chiaral%2Ftest_path%2Ffoo', 'mediaLink': 'https://www.googleapis.com/download/storage/v1/b/pangeo-scratch/o/chiaral%2Ftest_path%2Ffoo?generation=1604072153072543&alt=media', 'name': 'pangeo-scratch/chiaral/test_path/foo', 'bucket': 'pangeo-scratch', 'generation': '1604072153072543', 'metageneration': '1', 'contentType': 'application/octet-stream', 'storageClass': 'STANDARD', 'size': 3, 'md5Hash': 'N7UdGUp1E+RbVvZSTy1R8g==', 'crc32c': 'CrcTMQ==', 'etag': 'CJ+Hg7fS3OwCEAE=', 'timeCreated': '2020-10-30T15:35:53.072Z', 'updated': '2020-10-30T15:35:53.072Z', 'timeStorageClassUpdated': '2020-10-30T15:35:53.072Z', 'type': 'file'}
{'kind': 'storage#object', 'id': 'pangeo-scratch/chiaral/test_path/foo/1604072153254208', 'selfLink': 'https://www.googleapis.com/storage/v1/b/pangeo-scratch/o/chiaral%2Ftest_path%2Ffoo', 'mediaLink': 'https://www.googleapis.com/download/storage/v1/b/pangeo-scratch/o/chiaral%2Ftest_path%2Ffoo?generation=1604072153254208&alt=media', 'name': 'pangeo-scratch/chiaral/test_path/foo', 'bucket': 'pangeo-scratch', 'generation': '1604072153254208', 'metageneration': '1', 'contentType': 'application/octet-stream', 'storageClass': 'STANDARD', 'size': 3, 'md5Hash': 'N7UdGUp1E+RbVvZSTy1R8g==', 'crc32c': 'CrcTMQ==', 'etag': 'CMCSjrfS3OwCEAE=', 'timeCreated': '2020-10-30T15:35:53.253Z', 'updated': '2020-10-30T15:35:53.253Z', 'timeStorageClassUpdated': '2020-10-30T15:35:53.253Z', 'type': 'file'}
---------------------------------------------------------------------------
OSError Traceback (most recent call last)
<ipython-input-6-41cc126f1383> in <module>
13 write_then_rm('pangeo-scratch/chiaral/test_path/foo')
14
---> 15 write_then_rm('/pangeo-scratch/chiaral/test_path/foo')
<ipython-input-6-41cc126f1383> in write_then_rm(path)
9 info = gcs.info(path)
10 print(info)
---> 11 gcs.rm(path)
12
13 write_then_rm('pangeo-scratch/chiaral/test_path/foo')
/srv/conda/envs/notebook/lib/python3.8/site-packages/gcsfs/core.py in rm(self, path, recursive, batchsize)
1183 def rm(self, path, recursive=False, batchsize=20):
1184 paths = self.expand_path(path, recursive=recursive)
-> 1185 sync(self.loop, self._rm, paths, batchsize=batchsize)
1186
1187 def _open(
/srv/conda/envs/notebook/lib/python3.8/site-packages/fsspec/asyn.py in sync(loop, func, callback_timeout, *args, **kwargs)
69 if error[0]:
70 typ, exc, tb = error[0]
---> 71 raise exc.with_traceback(tb)
72 else:
73 return result[0]
/srv/conda/envs/notebook/lib/python3.8/site-packages/fsspec/asyn.py in f()
53 if callback_timeout is not None:
54 future = asyncio.wait_for(future, callback_timeout)
---> 55 result[0] = await future
56 except Exception:
57 error[0] = sys.exc_info()
/srv/conda/envs/notebook/lib/python3.8/site-packages/gcsfs/core.py in _rm(self, paths, batchsize)
1003 exs = [ex for ex in exs if ex is not None and "No such object" not in str(ex)]
1004 if exs:
-> 1005 raise exs[0]
1006 await asyncio.gather(*[self._rmdir(d) for d in dirs])
1007
/srv/conda/envs/notebook/lib/python3.8/site-packages/gcsfs/core.py in _rm_files(self, paths)
987 pattern = '"message": "([^"]+)"'
988 out = set(re.findall(pattern, txt))
--> 989 raise OSError(out)
990
991 async def _rm(self, paths, batchsize):
OSError: set()
Environment:
- gcsfs version: '0.7.1+4.g77b5993'
- fsspec version: '0.8.4'