python-bibtexparser
python-bibtexparser copied to clipboard
Parse error for empty @string definitions
If test.bib contains:
@string{test = ""}
and we call
import bibtexparser
bibtexparser.load(open("test.bib"))
we get an exception (see below). The expected behavior would be to just define test as an empty string. (Bibtex accepts such entries.)
Affected version: PyPI version from May 26, 2020 (1.2.0)
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-78-f9b4a9ef7457> in <module>
1 import bibtexparser
----> 2 bibtexparser.load(open("test.bib"))
/usr/local/lib/python3.8/dist-packages/bibtexparser/__init__.py in load(bibtex_file, parser)
67 if parser is None:
68 parser = bparser.BibTexParser()
---> 69 return parser.parse_file(bibtex_file)
70
71
/usr/local/lib/python3.8/dist-packages/bibtexparser/bparser.py in parse_file(self, file, partial)
167 :rtype: BibDatabase
168 """
--> 169 return self.parse(file.read(), partial=partial)
170
171 def _init_expressions(self):
/usr/local/lib/python3.8/dist-packages/bibtexparser/bparser.py in parse(self, bibtex_str, partial)
145 bibtex_file_obj = self._bibtex_file_obj(bibtex_str)
146 try:
--> 147 self._expr.parseFile(bibtex_file_obj)
148 except self._expr.ParseException as exc:
149 logger.error("Could not parse properly, starting at %s", exc.line)
/usr/local/lib/python3.8/dist-packages/bibtexparser/bibtexexpression.py in parseFile(self, file_obj)
276
277 def parseFile(self, file_obj):
--> 278 return self.main_expression.parseFile(file_obj, parseAll=True)
/usr/local/lib/python3.8/dist-packages/pyparsing.py in parseFile(self, file_or_filename, parseAll)
2575 file_contents = f.read()
2576 try:
-> 2577 return self.parseString(file_contents, parseAll)
2578 except ParseBaseException as exc:
2579 if ParserElement.verbose_stacktrace:
/usr/local/lib/python3.8/dist-packages/pyparsing.py in parseString(self, instring, parseAll)
1941 instring = instring.expandtabs()
1942 try:
-> 1943 loc, tokens = self._parse(instring, 0)
1944 if parseAll:
1945 loc = self.preParse(instring, loc)
/usr/local/lib/python3.8/dist-packages/pyparsing.py in _parseNoCache(self, instring, loc, doActions, callPreParse)
1681 if self.mayIndexError or preloc >= len(instring):
1682 try:
-> 1683 loc, tokens = self.parseImpl(instring, preloc, doActions)
1684 except IndexError:
1685 raise ParseException(instring, len(instring), self.errmsg, self)
/usr/local/lib/python3.8/dist-packages/pyparsing.py in parseImpl(self, instring, loc, doActions)
4779 def parseImpl(self, instring, loc, doActions=True):
4780 try:
-> 4781 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions)
4782 except (ParseException, IndexError):
4783 return loc, []
/usr/local/lib/python3.8/dist-packages/pyparsing.py in parseImpl(self, instring, loc, doActions)
4695 if check_ender:
4696 try_not_ender(instring, loc)
-> 4697 loc, tokens = self_expr_parse(instring, loc, doActions, callPreParse=False)
4698 try:
4699 hasIgnoreExprs = (not not self.ignoreExprs)
/usr/local/lib/python3.8/dist-packages/pyparsing.py in _parseNoCache(self, instring, loc, doActions, callPreParse)
1681 if self.mayIndexError or preloc >= len(instring):
1682 try:
-> 1683 loc, tokens = self.parseImpl(instring, preloc, doActions)
1684 except IndexError:
1685 raise ParseException(instring, len(instring), self.errmsg, self)
/usr/local/lib/python3.8/dist-packages/pyparsing.py in parseImpl(self, instring, loc, doActions)
4252 for e in self.exprs:
4253 try:
-> 4254 ret = e._parse(instring, loc, doActions)
4255 return ret
4256 except ParseException as err:
/usr/local/lib/python3.8/dist-packages/pyparsing.py in _parseNoCache(self, instring, loc, doActions, callPreParse)
1714 for fn in self.parseAction:
1715 try:
-> 1716 tokens = fn(instring, tokensStart, retTokens)
1717 except IndexError as parse_action_exc:
1718 exc = ParseException("exception raised in parse action")
/usr/local/lib/python3.8/dist-packages/pyparsing.py in wrapper(*args)
1314 while 1:
1315 try:
-> 1316 ret = func(*args[limit[0]:])
1317 foundArity[0] = True
1318 return ret
/usr/local/lib/python3.8/dist-packages/bibtexparser/bparser.py in <lambda>(s, l, t)
199 self._expr.string_def.addParseAction(
200 lambda s, l, t: self._add_string(t['StringName'].name,
--> 201 t['StringValue'])
202 )
203
/usr/local/lib/python3.8/dist-packages/pyparsing.py in __getitem__(self, i)
596 else:
597 if i not in self.__accumNames:
--> 598 return self.__tokdict[i][-1][0]
599 else:
600 return ParseResults([v[0] for v in self.__tokdict[i]])
KeyError: 'StringValue'
Thanks @dominique-unruh for reporting this. I could reproduce the issue and open #276 with a candidate fix. Let us know if it solves the issue on your side.
Thanks for the quick reaction. Since you were able to reproduce the error, I assume your fix will fix it on my side, too. (I cannot easily test it at this point in the original scenario due to a workaround that I already put in place, but I am looking forward to removing it again when the next version is published on PyPi.)
Just happened to stumble across the same bug when trying to parse cryptobib, since they have some empty strings. After first patching it myself locally, and later finding this issue, I can confirm that #276 does indeed (as expected) fix the issue.
This needs to be taken over by a new person. #276 is stale and broken.
Fixed in v2.
PR for v1 is stale and thus closed. But if anyone ones to take this PR up again and get it to run, I am happy to re-open.