dateparser
dateparser copied to clipboard
dateparser-1.1.1 is incompatible with regex-2022.3.15
Upgraded pip packages;
(env) blurry:>$ pip freeze | grep dateparser
dateparser==1.1.1
(env) blurry:>$ pip freeze | grep regex
regex==2022.3.15
dateparser.parse raises the following exception:
Python 3.8.10 (default, Nov 26 2021, 20:14:08)
Type 'copyright', 'credits' or 'license' for more information
IPython 8.1.1 -- An enhanced Interactive Python. Type '?' for help.
In [1]: import dateparser
In [2]: dateparser.parse('01.JUN.1968')
---------------------------------------------------------------------------
error Traceback (most recent call last)
Input In [2], in <cell line: 1>()
----> 1 dateparser.parse('01.JUN.1968')
File ~/lino/env/lib/python3.8/site-packages/dateparser/conf.py:92, in apply_settings.<locals>.wrapper(*args, **kwargs)
89 if not isinstance(kwargs['settings'], Settings):
90 raise TypeError("settings can only be either dict or instance of Settings class")
---> 92 return f(*args, **kwargs)
File ~/lino/env/lib/python3.8/site-packages/dateparser/__init__.py:61, in parse(date_string, date_formats, languages, locales, region, settings, detect_languages_function)
57 if languages or locales or region or detect_languages_function or not settings._default:
58 parser = DateDataParser(languages=languages, locales=locales,
59 region=region, settings=settings, detect_languages_function=detect_languages_function)
---> 61 data = parser.get_date_data(date_string, date_formats)
63 if data:
64 return data['date_obj']
File ~/lino/env/lib/python3.8/site-packages/dateparser/date.py:428, in DateDataParser.get_date_data(self, date_string, date_formats)
425 date_string = sanitize_date(date_string)
427 for locale in self._get_applicable_locales(date_string):
--> 428 parsed_date = _DateLocaleParser.parse(
429 locale, date_string, date_formats, settings=self._settings)
430 if parsed_date:
431 parsed_date['locale'] = locale.shortname
File ~/lino/env/lib/python3.8/site-packages/dateparser/date.py:178, in _DateLocaleParser.parse(cls, locale, date_string, date_formats, settings)
175 @classmethod
176 def parse(cls, locale, date_string, date_formats=None, settings=None):
177 instance = cls(locale, date_string, date_formats, settings)
--> 178 return instance._parse()
File ~/lino/env/lib/python3.8/site-packages/dateparser/date.py:182, in _DateLocaleParser._parse(self)
180 def _parse(self):
181 for parser_name in self._settings.PARSERS:
--> 182 date_data = self._parsers[parser_name]()
183 if self._is_valid_date_data(date_data):
184 return date_data
File ~/lino/env/lib/python3.8/site-packages/dateparser/date.py:196, in _DateLocaleParser._try_freshness_parser(self)
194 def _try_freshness_parser(self):
195 try:
--> 196 return freshness_date_parser.get_date_data(self._get_translated_date(), self._settings)
197 except (OverflowError, ValueError):
198 return None
File ~/lino/env/lib/python3.8/site-packages/dateparser/date.py:234, in _DateLocaleParser._get_translated_date(self)
232 def _get_translated_date(self):
233 if self._translated_date is None:
--> 234 self._translated_date = self.locale.translate(
235 self.date_string, keep_formatting=False, settings=self._settings)
236 return self._translated_date
File ~/lino/env/lib/python3.8/site-packages/dateparser/languages/locale.py:131, in Locale.translate(self, date_string, keep_formatting, settings)
128 dictionary = self._get_dictionary(settings)
129 date_string_tokens = dictionary.split(date_string, keep_formatting)
--> 131 relative_translations = self._get_relative_translations(settings=settings)
133 for i, word in enumerate(date_string_tokens):
134 word = word.lower()
File ~/lino/env/lib/python3.8/site-packages/dateparser/languages/locale.py:158, in Locale._get_relative_translations(self, settings)
155 if settings.NORMALIZE:
156 if self._normalized_relative_translations is None:
157 self._normalized_relative_translations = (
--> 158 self._generate_relative_translations(normalize=True))
159 return self._normalized_relative_translations
160 else:
File ~/lino/env/lib/python3.8/site-packages/dateparser/languages/locale.py:172, in Locale._generate_relative_translations(self, normalize)
170 value = list(map(normalize_unicode, value))
171 pattern = '|'.join(sorted(value, key=len, reverse=True))
--> 172 pattern = DIGIT_GROUP_PATTERN.sub(r'?P<n>\d+', pattern)
173 pattern = re.compile(r'^(?:{})$'.format(pattern), re.UNICODE | re.IGNORECASE)
174 relative_dictionary[pattern] = key
File ~/lino/env/lib/python3.8/site-packages/regex/regex.py:700, in _compile_replacement_helper(pattern, template)
695 break
696 if ch == "\\":
697 # '_compile_replacement' will return either an int group reference
698 # or a string literal. It returns items (plural) in order to handle
699 # a 2-character literal (an invalid escape sequence).
--> 700 is_group, items = _compile_replacement(source, pattern, is_unicode)
701 if is_group:
702 # It's a group, so first flush the literal.
703 if literal:
File ~/lino/env/lib/python3.8/site-packages/regex/_regex_core.py:1736, in _compile_replacement(source, pattern, is_unicode)
1733 if value is not None:
1734 return False, [value]
-> 1736 raise error("bad escape \\%s" % ch, source.string, source.pos)
1738 if isinstance(source.sep, bytes):
1739 octal_mask = 0xFF
error: bad escape \d at position 7
In [3]:
This bug report states 1.1.1 but actually the problem is worse for 1.1.0 (which I had pinned). 1.1.1 has regex >2022.3.15 marked as incompatible so pip can (in many scenarios at least) install earlier versions and run fine. In 1.1.0 however there is no such incompatibility flag so pip will happily install 2022.3.15.
TLDR; updating from 1.1.0 to 1.1.1 can workaround this issue in many cases.
Pinning is no solution for distributions such as Fedora. See https://bugzilla.redhat.com/show_bug.cgi?id=2080221
See also https://github.com/scrapinghub/dateparser/issues/1045#issuecomment-1129846022
Thank you @thmo! I would encourage you to create a PR with this change (also see https://github.com/scrapinghub/dateparser/issues/1045#issuecomment-1069484011 as a confirmation), which would release the regex library version restriction.
Pinning is no solution for distributions such as Fedora. See https://bugzilla.redhat.com/show_bug.cgi?id=2080221
The same goes for openSUSE (and I guess every other Linux or non-Linux distro).
See also #1045 (comment)
Just to have to confirm that this patch made the test suite to pass. Thank you!
regex-2022.7.9 seems to be compatible with dateparser in the sense that the tests in https://github.com/scrapinghub/dateparser/issues/1045#issue-1170455588 work.
Closing in favor of https://github.com/scrapinghub/dateparser/issues/1045