BinPackage
BinPackage copied to clipboard
KeyError: 'PRússlandskonung'
Ég hef verið að nota Greyni og BÍN mikið nýverið og í kvöld byrjaði ég að fá dularfulla villu frá Greyni, þegar ég reyni að þátta setningar. Dæmi:
g = Greynir() g.parse_single('ég fór í bæinn og keypti ís')
Fyrir mismunandi input þá kom þessi KeyError úr islenska/basics.py og í rótinni kom alltaf sama "KeyError: 'PRússlandskonung'`" villan.
Þetta lagaðist þegar ég restartaði sessioninu í jupyter, en mér datt í hug að láta vita af þessu.
KeyError Traceback (most recent call last)
c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\islenska\basics.py in lookup(self, key, func)
353 try:
--> 354 result = self.cache[key]
355 self.hits += 1
KeyError: 'keypti'
During handling of the above exception, another exception occurred:
KeyError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_1092/3409655977.py in <module>
----> 1 g.parse_single('ég fór í bæinn og keypti ís')
c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\reynir\reynir.py in parse_single(self, sentence, max_sent_tokens)
873 # Returns None if no sentence could be extracted from the text
874 try:
--> 875 return next(iter(job))
876 except StopIteration:
877 return None
c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\reynir\reynir.py in sentences(self)
559 def sentences(self) -> Iterable[_Sentence]:
560 """ Yield the sentences from the token stream """
--> 561 for p in self.paragraphs():
562 yield from p.sentences()
563
c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\reynir\reynir.py in paragraphs(self)
554 # No progress function: use generators throughout
555 plist = paragraphs(self._tokens)
--> 556 for p in plist:
557 yield _Paragraph(self, p)
558
c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\tokenizer\tokenizer.py in paragraphs(tokens)
3014 current_p: List[Tuple[int, List[Tok]]] = [] # Current paragraph
3015
-> 3016 for ix, t in enumerate(tokens):
3017 t0 = t[0]
3018 if t0 == TOK.S_BEGIN:
c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\reynir\bintokenizer.py in disambiguate_phrases(token_stream, token_ctor)
2018
2019 ds = DisambiguationStream(token_ctor)
-> 2020 yield from ds.process(token_stream)
2021
2022
c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\reynir\bintokenizer.py in process(self, token_stream)
1773 while True:
1774
-> 1775 token = next(token_stream)
1776
1777 if token.txt is None:
c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\reynir\bintokenizer.py in fix_abbreviations(token_stream)
1707 token = next(token_stream)
1708 while True:
-> 1709 next_token = next(token_stream)
1710 # If we have a 'name finisher abbreviation'
1711 # (such as 'próf.' for 'prófessor') and the next token
c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\reynir\bintokenizer.py in parse_phrases_3(db, token_stream, token_ctor)
1649 # Make sure that token is None if next() raises StopIteration
1650 token = cast(Tok, None)
-> 1651 token = next(token_stream)
1652 continue
1653 next_token = next(token_stream)
c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\reynir\bintokenizer.py in parse_phrases_2(token_stream, token_ctor, auto_uppercase)
1086
1087 while True:
-> 1088 next_token = next(token_stream)
1089
1090 # Make the lookahead checks we're interested in
c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\tokenizer\tokenizer.py in __next__(self)
859 if self.__lookahead:
860 return self.__lookahead.popleft()
--> 861 return next(self.__it)
862
863 def __iter__(self):
c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\reynir\bintokenizer.py in parse_phrases_1(db, token_ctor, token_stream)
845 token = next(token_stream)
846 while True:
--> 847 next_token: Tok = next(token_stream)
848
849 # Logic for numbers that are partially or entirely
c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\reynir\bintokenizer.py in annotate(db, token_ctor, token_stream, auto_uppercase, no_sentence_start)
637 # If word is found in PREFER_LOWERCASE we skip searching uppercase meanings
638 # (if auto_uppercase is True)
--> 639 w, m = db.lookup_g(
640 w, at_sentence_start, auto_uppercase and w not in PREFER_LOWERCASE
641 )
c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\reynir\bindb.py in lookup_g(self, w, at_sentence_start, auto_uppercase)
76 """ Returns BIN_Tuple instances, which are the Greynir version
77 of islenska.BinEntry """
---> 78 w, m = self._lookup(
79 w,
80 at_sentence_start,
c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\islenska\bindb.py in _lookup(self, w, at_sentence_start, auto_uppercase, lookup_func, ctor)
386 # Start with a straightforward, cached lookup of the word as-is
387 lower_w = w
--> 388 m: List[_T] = lookup_func(w)
389
390 if auto_uppercase and w.islower():
c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\islenska\bindb.py in _meanings_cache_lookup(self, key, compound)
290 """ Attempt to lookup a word in the cache,
291 returning a list of BinEntry instances """
--> 292 klist = self._ksnid_cache_lookup(key, compound=compound)
293 # Convert the cached ksnid list to a list of BinEntry (SHsnid) tuples
294 return [k.to_bin_entry() for k in klist]
c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\islenska\bindb.py in _ksnid_cache_lookup(self, key, compound)
273 """ Attempt to lookup a word in the cache, calling
274 self.ksnid_lookup() on a cache miss """
--> 275 klist = self._ksnid_cache.lookup(key, self._ksnid_lookup)
276 # If we're looking for compound suffixes (compound=True), we
277 # allow items where birting == 'S' (coming from ord.suffix.csv)
c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\islenska\basics.py in lookup(self, key, func)
363 self.maxsize // 10, self.use_count.items(), key=itemgetter(1)
364 ):
--> 365 del self.cache[key], self.use_count[key]
366 return result
367
KeyError: 'PRússlandskonung'
```
Can you still reproduce this?
Nope.
I have a new machine with a new OS and newer Python