dateparser
dateparser copied to clipboard
`RELATIVE_BASE` memory leak
Minimal code:
from dateparser import parse
while True:
ret = parse('24 august', settings={'RELATIVE_BASE': datetime.now()})
Since for each new relative_base, a new settings object is created and cached, memory grows VERY quickly.
my workaround monkeypatch:
def fix_dateparser_cache():
import hashlib
def _get_key(cls, settings=None):
if not settings:
return 'default'
# Remove frequently changing values from cache key
keys = sorted(['%s-%s' % (key, str(settings[key])) for key in settings if key not in ['RELATIVE_BASE', '_mod_settings']])
return hashlib.md5(''.join(keys).encode('utf-8')).hexdigest()
def wrap_new(func):
def constructor(cls, *args, **kwargs):
key = cls.get_key(*args, **kwargs)
instance = func(cls, *args)
# Registry key needed for regex cache
# https://github.com/scrapinghub/dateparser/blob/141199b477/dateparser/languages/dictionary.py#L43
setattr(instance, 'registry_key', key)
return instance
return staticmethod(constructor)
dateparser.conf.Settings.get_key = classmethod(_get_key)
dateparser.conf.Settings.__new__ = wrap_new(dateparser.conf.Settings.__new__)
This is a fairly critical bug in my use case too, any idea when this'll be fixed?