python-diskcache
python-diskcache copied to clipboard
Is it possible to cache with a custom key?
I'm looking for a way to cache the output of processing a file, but I want to use the hash of the file, not the path. This code doesn't work, but it would be more or less what I would need, is it possible to do anything similar with this library?
# More code before this...
cache = FanoutCache(cache_dir)
def compute_file_hash(file_path: Path) -> str:
hash_func = hashlib.sha256()
with file_path.open("rb") as f:
while chunk := f.read(8192):
hash_func.update(chunk)
return hash_func.hexdigest()
def custom_key_builder(func, *args, **kwargs):
file_path = args[0]
file_hash = compute_file_hash(file_path)
return file_hash
@cache.memoize(key=custom_key_builder)
def process_file(file_path: Path):
print("Processing!!")
I guess I could just do:
c = FanoutCache(cache_dir)
def compute_file_hash(file_path: Path) -> str:
hash_func = hashlib.sha256()
with file_path.open("rb") as f:
while chunk := f.read(8192):
hash_func.update(chunk)
return hash_func.hexdigest()
def cache_mine(method):
def wrapper(*args, **kwargs):
file_path = args[0]
assert isinstance(file_path, Path), f"Expected Path, got {type(file_path)}"
file_hash = compute_file_hash(file_path)
with c as reference:
if file_hash in reference:
return reference[file_hash]
out = method(*args, **kwargs)
if out is not None:
reference.set(file_hash, out)
return out
return wrapper
@cache_mine
def process_file(file_path: Path):
print("Processing!!")
return -1
Override the __cache_key__ attribute of the function.
see more: https://github.com/grantjenks/python-diskcache/blob/ebfa37cd99d7ef716ec452ad8af4b4276a8e2233/diskcache/core.py#L1864-L1888