adlfs
adlfs copied to clipboard
Files and with trailing slash can't be queried for info()
import secrets
from adlfs import AzureBlobFileSystem
from pprint import pprint
BUCKET = "/name/of/the/bucket"
BASE_PATH = BUCKET + "/" + secrets.token_hex(12)
EMPTY_DIR = BASE_PATH + "/empty_dir/"
fs = AzureBlobFileSystem(account_name=_NAME, account_key=_KEY)
fs.makedirs(BASE_PATH)
fs.touch(EMPTY_DIR)
assert fs.exists(EMPTY_DIR)
print("isdir()?", fs.isdir(EMPTY_DIR))
print("isfile()?", fs.isfile(EMPTY_DIR))
print(fs.info(EMPTY_DIR))
The code above simply sums it up. If we create a file that ends with a trailing slash and has no contents in it, it is considered as a file in other prefix based storage implementations (eg s3fs) though here it is neither a file nor a directory. Also .info()
just claims file doesn't exist.
$ python t.py
isdir()? False
isfile()? False
Traceback (most recent call last):
File "t.py", line 16, in <module>
print(fs.info(EMPTY_DIR))
File "/home/isidentical/adlfs/adlfs/spec.py", line 534, in info
fetch_from_azure = (path and self._ls_from_cache(path) is None) or refresh
File "/home/isidentical/.venv38/lib/python3.8/site-packages/fsspec/spec.py", line 346, in _ls_from_cache
raise FileNotFoundError(path)
FileNotFoundError: /test7/foo/1e853243700bd75ca6d3d194/empty_dir/
Another inconsistency is;
print(fs.ls(BASE_PATH, detail=True))
would yield
[{'name': 'test7/foo/d9f36db25c2af114901d3f55/empty_dir/', 'size': 0, 'type': 'directory'}]
though querying EMPTY_DIR
(fs.ls(EMPTY_DIR, detail=True
) would result with a FileNotFoundError.
If we remove the trailing slash
EMPTY_DIR = BASE_PATH + "/empty_dir"
assert fs.exists(EMPTY_DIR)
print("isdir()?", fs.isdir(EMPTY_DIR))
print("isfile()?", fs.isfile(EMPTY_DIR))
print(fs.info(BASE_PATH, detail=True))
print(fs.info(BASE_PATH, detail=True))
isdir()? False
isfile()? True
{'name': 'test7/foo/c0bb7461677c0d3e929d7301/', 'size': 0, 'type': 'directory'}
Traceback (most recent call last):
File "t.py", line 20, in <module>
print(fs.info(BASE_PATH, detail=True))
File "/home/isidentical/adlfs/adlfs/spec.py", line 534, in info
fetch_from_azure = (path and self._ls_from_cache(path) is None) or refresh
File "/home/isidentical/.venv38/lib/python3.8/site-packages/fsspec/spec.py", line 346, in _ls_from_cache
raise FileNotFoundError(path)
FileNotFoundError: /test7/foo/c0bb7461677c0d3e929d7301