beaker
beaker copied to clipboard
File cache is extremely slow
Hi, I'm trying to use beaker as a base for internal caching, but getting extremely slow performance on file caching.
Here is my basic implementation:
class Cache:
"""
General cache
super().__init__() must be called after all attributes are declared to work properly
"""
def __init__(self, disable=False):
self._disable = disable
self._cache_path = CACHE_DIR / f"{self.__class__.__name__}:{hash(self)}.pkl"
self._cache = self._load_cache()
def __hash__(self):
"""
Must return unique identifier of instance to be cached
Identifier must be the same across runs
"""
self_vars = dict(vars(self))
self_vars.pop("_cache", None)
return consistent_hash(self_vars)
def _get_data(self, key: Any) -> Any:
"""
Return data for key
"""
raise NotImplementedError
def __getitem__(self, key: Any) -> Any:
if self._disable:
return self._get_data(key)
try:
value = self._cache[key]
except KeyError:
value = self._get_data(key)
self._cache[key] = key
return value
def clear(self):
self._cache = {}
self._save_cache()
def _save_cache(self):
with self._cache_path.open("wb") as f:
pickle.dump(self._cache, f)
def _load_cache(self):
try:
with self._cache_path.open("rb") as f:
return pickle.load(f)
except (FileNotFoundError, EOFError):
return {}
def __del__(self):
self._save_cache()
and my implementation using beaker:
class Cache:
"""
General cache
super().__init__() must be called after all attributes are declared to work properly
"""
def __init__(self, expire_time=60 * 60 * 24 * 7, disable=False, cache_type="file"):
self._disable = disable
self._cache = cache.get_cache(f"{self.__class__.__name__}:{hash(self)}", expire=expire_time, type=cache_type)
def __hash__(self):
"""
Must return unique identifier of instance to be cached
Identifier must be the same across runs
"""
self_vars = dict(vars(self))
self_vars.pop("_cache", None)
return consistent_hash(self_vars)
def _get_data(self, key: Any) -> Any:
"""
Return data for key
"""
raise NotImplementedError
def __getitem__(self, key: Any) -> Any:
if self._disable:
return self._get_data(key)
try:
value = self._cache[key]
except KeyError:
value = self._get_data(key)
self._cache[key] = key
return value
def clear(self):
self._cache.clear()
Beaker caching appears to work, but I'm getting very bad performance: My own: 50k+/s vs beakers 100/s, nearly 3 orders of magnitude.
I understand that there is some overhead on checking time and some splitting between different files, but this seems too much.
What am I doing wrong?
The problems appears to be that beaker loads the pickle file on every request, which is not required in vast majority of cases.