filesystem_spec icon indicating copy to clipboard operation
filesystem_spec copied to clipboard

FileNotFoundError when using cache on memory store implementation

Open gmaze opened this issue 10 months ago • 0 comments

Dear all, I'm struggling finding out why this is not working When I use caching on a MemoryFileSystem, fsspec raises a FileNotFoundError with the open method. Here is a reproducible example, I'm using version 2024.3.1, but this has been going on for while

Create a cached memory fs:

fs = fsspec.filesystem(
            "filecache",
            target_protocol='memory',
            # target_options={**{"skip_instance_cache": True}}, # True/False: No impact on issue
            expiry_time=86400,
            # cache_check=False,  # True/False: No impact on issue
        )

then trying to write on file:

with fs.open('sample_file_in_mem.txt', 'w') as f:
    f.write('hello world from memory')

raises a FileNotFoundError:

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
Cell In[31], line 1
----> 1 with fs.open('sample_file_in_mem.txt', 'w') as f:
      2     f.write('hello world from memory')

File ~/miniconda3/envs/argopy-pull318/lib/python3.9/site-packages/fsspec/implementations/cached.py:449, in CachingFileSystem.__getattribute__.<locals>.<lambda>(*args, **kw)
    399 def __getattribute__(self, item):
    400     if item in {
    401         "load_cache",
    402         "_open",
   (...)
    447         # all the methods defined in this class. Note `open` here, since
    448         # it calls `_open`, but is actually in superclass
--> 449         return lambda *args, **kw: getattr(type(self), item).__get__(self)(
    450             *args, **kw
    451         )
    452     if item in ["__reduce_ex__"]:
    453         raise AttributeError

File ~/miniconda3/envs/argopy-pull318/lib/python3.9/site-packages/fsspec/spec.py:1281, in AbstractFileSystem.open(self, path, mode, block_size, cache_options, compression, **kwargs)
   1273     mode = mode.replace("t", "") + "b"
   1275     text_kwargs = {
   1276         k: kwargs.pop(k)
   1277         for k in ["encoding", "errors", "newline"]
   1278         if k in kwargs
   1279     }
   1280     return io.TextIOWrapper(
-> 1281         self.open(
   1282             path,
   1283             mode,
   1284             block_size=block_size,
   1285             cache_options=cache_options,
   1286             compression=compression,
   1287             **kwargs,
   1288         ),
   1289         **text_kwargs,
   1290     )
   1291 else:
   1292     ac = kwargs.pop("autocommit", not self._intrans)

File ~/miniconda3/envs/argopy-pull318/lib/python3.9/site-packages/fsspec/implementations/cached.py:449, in CachingFileSystem.__getattribute__.<locals>.<lambda>(*args, **kw)
    399 def __getattribute__(self, item):
    400     if item in {
    401         "load_cache",
    402         "_open",
   (...)
    447         # all the methods defined in this class. Note `open` here, since
    448         # it calls `_open`, but is actually in superclass
--> 449         return lambda *args, **kw: getattr(type(self), item).__get__(self)(
    450             *args, **kw
    451         )
    452     if item in ["__reduce_ex__"]:
    453         raise AttributeError

File ~/miniconda3/envs/argopy-pull318/lib/python3.9/site-packages/fsspec/spec.py:1293, in AbstractFileSystem.open(self, path, mode, block_size, cache_options, compression, **kwargs)
   1291 else:
   1292     ac = kwargs.pop("autocommit", not self._intrans)
-> 1293     f = self._open(
   1294         path,
   1295         mode=mode,
   1296         block_size=block_size,
   1297         autocommit=ac,
   1298         cache_options=cache_options,
   1299         **kwargs,
   1300     )
   1301     if compression is not None:
   1302         from fsspec.compression import compr

File ~/miniconda3/envs/argopy-pull318/lib/python3.9/site-packages/fsspec/implementations/cached.py:449, in CachingFileSystem.__getattribute__.<locals>.<lambda>(*args, **kw)
    399 def __getattribute__(self, item):
    400     if item in {
    401         "load_cache",
    402         "_open",
   (...)
    447         # all the methods defined in this class. Note `open` here, since
    448         # it calls `_open`, but is actually in superclass
--> 449         return lambda *args, **kw: getattr(type(self), item).__get__(self)(
    450             *args, **kw
    451         )
    452     if item in ["__reduce_ex__"]:
    453         raise AttributeError

File ~/miniconda3/envs/argopy-pull318/lib/python3.9/site-packages/fsspec/implementations/cached.py:666, in WholeFileCacheFileSystem._open(self, path, mode, **kwargs)
    664 path = self._strip_protocol(path)
    665 if "r" not in mode:
--> 666     fn = self._make_local_details(path)
    667     user_specified_kwargs = {
    668         k: v
    669         for k, v in kwargs.items()
    670         # those kwargs were added by open(), we don't want them
    671         if k not in ["autocommit", "block_size", "cache_options"]
    672     }
    673     return LocalTempFile(self, path, mode=mode, fn=fn, **user_specified_kwargs)

File ~/miniconda3/envs/argopy-pull318/lib/python3.9/site-packages/fsspec/implementations/cached.py:449, in CachingFileSystem.__getattribute__.<locals>.<lambda>(*args, **kw)
    399 def __getattribute__(self, item):
    400     if item in {
    401         "load_cache",
    402         "_open",
   (...)
    447         # all the methods defined in this class. Note `open` here, since
    448         # it calls `_open`, but is actually in superclass
--> 449         return lambda *args, **kw: getattr(type(self), item).__get__(self)(
    450             *args, **kw
    451         )
    452     if item in ["__reduce_ex__"]:
    453         raise AttributeError

File ~/miniconda3/envs/argopy-pull318/lib/python3.9/site-packages/fsspec/implementations/cached.py:612, in WholeFileCacheFileSystem._make_local_details(self, path)
    605 hash = self._mapper(path)
    606 fn = os.path.join(self.storage[-1], hash)
    607 detail = {
    608     "original": path,
    609     "fn": hash,
    610     "blocks": True,
    611     "time": time.time(),
--> 612     "uid": self.fs.ukey(path),
    613 }
    614 self._metadata.update_file(path, detail)
    615 logger.debug("Copying %s to local cache", path)

File ~/miniconda3/envs/argopy-pull318/lib/python3.9/site-packages/fsspec/spec.py:1332, in AbstractFileSystem.ukey(self, path)
   1330 def ukey(self, path):
   1331     """Hash of file properties, to tell if it has changed"""
-> 1332     return sha256(str(self.info(path)).encode()).hexdigest()

File ~/miniconda3/envs/argopy-pull318/lib/python3.9/site-packages/fsspec/implementations/memory.py:160, in MemoryFileSystem.info(self, path, **kwargs)
    153     return {
    154         "name": path,
    155         "size": filelike.size,
    156         "type": "file",
    157         "created": getattr(filelike, "created", None),
    158     }
    159 else:
--> 160     raise FileNotFoundError(path)

FileNotFoundError: /sample_file_in_mem.txt

as can be seen on the error trace, the cache system can't get down to the info method, on either the cache or memory file system.

If I "touch" the file before trying to write on it, it goes well. But I don't see why the open fails

I don't know if this is a bug or if I'm not using the memory system as it should

thanks for your insight and help !

gmaze avatar Apr 16 '24 09:04 gmaze