BinPackage icon indicating copy to clipboard operation
BinPackage copied to clipboard

KeyError: 'PRússlandskonung'

Open Valdegg opened this issue 2 years ago • 2 comments

Ég hef verið að nota Greyni og BÍN mikið nýverið og í kvöld byrjaði ég að fá dularfulla villu frá Greyni, þegar ég reyni að þátta setningar. Dæmi:

g = Greynir() g.parse_single('ég fór í bæinn og keypti ís')

Fyrir mismunandi input þá kom þessi KeyError úr islenska/basics.py og í rótinni kom alltaf sama "KeyError: 'PRússlandskonung'`" villan.

Þetta lagaðist þegar ég restartaði sessioninu í jupyter, en mér datt í hug að láta vita af þessu.

KeyError                                  Traceback (most recent call last)
c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\islenska\basics.py in lookup(self, key, func)
    353             try:
--> 354                 result = self.cache[key]
    355                 self.hits += 1

KeyError: 'keypti'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_1092/3409655977.py in <module>
----> 1 g.parse_single('ég fór í bæinn og keypti ís')

c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\reynir\reynir.py in parse_single(self, sentence, max_sent_tokens)
    873         # Returns None if no sentence could be extracted from the text
    874         try:
--> 875             return next(iter(job))
    876         except StopIteration:
    877             return None

c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\reynir\reynir.py in sentences(self)
    559     def sentences(self) -> Iterable[_Sentence]:
    560         """ Yield the sentences from the token stream """
--> 561         for p in self.paragraphs():
    562             yield from p.sentences()
    563 

c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\reynir\reynir.py in paragraphs(self)
    554             # No progress function: use generators throughout
    555             plist = paragraphs(self._tokens)
--> 556         for p in plist:
    557             yield _Paragraph(self, p)
    558 

c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\tokenizer\tokenizer.py in paragraphs(tokens)
   3014     current_p: List[Tuple[int, List[Tok]]] = []  # Current paragraph
   3015 
-> 3016     for ix, t in enumerate(tokens):
   3017         t0 = t[0]
   3018         if t0 == TOK.S_BEGIN:

c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\reynir\bintokenizer.py in disambiguate_phrases(token_stream, token_ctor)
   2018 
   2019     ds = DisambiguationStream(token_ctor)
-> 2020     yield from ds.process(token_stream)
   2021 
   2022 

c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\reynir\bintokenizer.py in process(self, token_stream)
   1773             while True:
   1774 
-> 1775                 token = next(token_stream)
   1776 
   1777                 if token.txt is None:

c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\reynir\bintokenizer.py in fix_abbreviations(token_stream)
   1707         token = next(token_stream)
   1708         while True:
-> 1709             next_token = next(token_stream)
   1710             # If we have a 'name finisher abbreviation'
   1711             # (such as 'próf.' for 'prófessor') and the next token

c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\reynir\bintokenizer.py in parse_phrases_3(db, token_stream, token_ctor)
   1649                     # Make sure that token is None if next() raises StopIteration
   1650                     token = cast(Tok, None)
-> 1651                     token = next(token_stream)
   1652                     continue
   1653             next_token = next(token_stream)

c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\reynir\bintokenizer.py in parse_phrases_2(token_stream, token_ctor, auto_uppercase)
   1086 
   1087         while True:
-> 1088             next_token = next(token_stream)
   1089 
   1090             # Make the lookahead checks we're interested in

c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\tokenizer\tokenizer.py in __next__(self)
    859         if self.__lookahead:
    860             return self.__lookahead.popleft()
--> 861         return next(self.__it)
    862 
    863     def __iter__(self):

c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\reynir\bintokenizer.py in parse_phrases_1(db, token_ctor, token_stream)
    845         token = next(token_stream)
    846         while True:
--> 847             next_token: Tok = next(token_stream)
    848 
    849             # Logic for numbers that are partially or entirely

c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\reynir\bintokenizer.py in annotate(db, token_ctor, token_stream, auto_uppercase, no_sentence_start)
    637             # If word is found in PREFER_LOWERCASE we skip searching uppercase meanings
    638             # (if auto_uppercase is True)
--> 639             w, m = db.lookup_g(
    640                 w, at_sentence_start, auto_uppercase and w not in PREFER_LOWERCASE
    641             )

c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\reynir\bindb.py in lookup_g(self, w, at_sentence_start, auto_uppercase)
     76         """ Returns BIN_Tuple instances, which are the Greynir version
     77             of islenska.BinEntry """
---> 78         w, m = self._lookup(
     79             w,
     80             at_sentence_start,

c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\islenska\bindb.py in _lookup(self, w, at_sentence_start, auto_uppercase, lookup_func, ctor)
    386         # Start with a straightforward, cached lookup of the word as-is
    387         lower_w = w
--> 388         m: List[_T] = lookup_func(w)
    389 
    390         if auto_uppercase and w.islower():

c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\islenska\bindb.py in _meanings_cache_lookup(self, key, compound)
    290         """ Attempt to lookup a word in the cache,
    291             returning a list of BinEntry instances """
--> 292         klist = self._ksnid_cache_lookup(key, compound=compound)
    293         # Convert the cached ksnid list to a list of BinEntry (SHsnid) tuples
    294         return [k.to_bin_entry() for k in klist]

c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\islenska\bindb.py in _ksnid_cache_lookup(self, key, compound)
    273         """ Attempt to lookup a word in the cache, calling
    274             self.ksnid_lookup() on a cache miss """
--> 275         klist = self._ksnid_cache.lookup(key, self._ksnid_lookup)
    276         # If we're looking for compound suffixes (compound=True), we
    277         # allow items where birting == 'S' (coming from ord.suffix.csv)

c:\users\hp\appdata\local\programs\python\python39\lib\site-packages\islenska\basics.py in lookup(self, key, func)
    363                         self.maxsize // 10, self.use_count.items(), key=itemgetter(1)
    364                     ):
--> 365                         del self.cache[key], self.use_count[key]
    366             return result
    367 

KeyError: 'PRússlandskonung' 
```

Valdegg avatar Oct 20 '22 19:10 Valdegg

Can you still reproduce this?

sveinbjornt avatar Nov 20 '23 20:11 sveinbjornt

Nope.

I have a new machine with a new OS and newer Python

Valdegg avatar Nov 21 '23 10:11 Valdegg