papermage icon indicating copy to clipboard operation
papermage copied to clipboard

Unable to run quick_start_demo

Open nikogamulin opened this issue 6 months ago • 0 comments

Hi,

When I run the second cell, I get the following error:

`--------------------------------------------------------------------------- KeyError Traceback (most recent call last) Cell In[2], line 5 2 from papermage.recipes import CoreRecipe 3 fixture_path = pathlib.Path(pwd).parent / "tests/fixtures" ----> 5 recipe = CoreRecipe() 6 doc = recipe.run(fixture_path / "papermage.pdf")

File ~/workspace/papermage/papermage/recipes/core_recipe.py:94, in CoreRecipe.init(self, ivila_predictor_path, bio_roberta_predictor_path, svm_word_predictor_path, dpi) 92 with warnings.catch_warnings(): 93 warnings.simplefilter("ignore") ---> 94 self.word_predictor = SVMWordPredictor.from_path(svm_word_predictor_path) 96 self.publaynet_block_predictor = LPEffDetPubLayNetBlockPredictor.from_pretrained() 97 self.ivila_predictor = IVILATokenClassificationPredictor.from_pretrained(ivila_predictor_path)

File ~/workspace/papermage/papermage/predictors/word_predictors.py:227, in SVMWordPredictor.from_path(cls, tar_path) 225 @classmethod 226 def from_path(cls, tar_path: str): --> 227 classifier = SVMClassifier.from_path(tar_path=tar_path) 228 predictor = SVMWordPredictor(classifier=classifier) 229 return predictor

File ~/workspace/papermage/papermage/predictors/word_predictors.py:107, in SVMClassifier.from_path(cls, tar_path) 105 with tarfile.open(tar_path, "r:gz") as tar: 106 tar.extractall(path=tmp_dir) --> 107 return cls.from_directory(tmp_dir)

File ~/workspace/papermage/papermage/predictors/word_predictors.py:111, in SVMClassifier.from_directory(cls, dir) 109 @classmethod 110 def from_directory(cls, dir: str): --> 111 classifier = SVMClassifier.from_paths( 112 ohe_encoder_path=os.path.join(dir, "svm_word_predictor/ohencoder.joblib"), 113 scaler_path=os.path.join(dir, "svm_word_predictor/scaler.joblib"), 114 estimator_path=os.path.join(dir, "svm_word_predictor/hyphen_clf.joblib"), 115 unigram_probs_path=os.path.join(dir, "svm_word_predictor/unigram_probs.pkl"), 116 ) 117 return classifier

File ~/workspace/papermage/papermage/predictors/word_predictors.py:128, in SVMClassifier.from_paths(cls, ohe_encoder_path, scaler_path, estimator_path, unigram_probs_path) 119 @classmethod 120 def from_paths( 121 cls, (...) 125 unigram_probs_path: str, 126 ): 127 ohe_encoder = load(ohe_encoder_path) --> 128 scaler = load(scaler_path) 129 estimator = load(estimator_path) 130 unigram_probs = load(unigram_probs_path)

File ~/anaconda3/envs/papermage/lib/python3.11/site-packages/joblib/numpy_pickle.py:587, in load(filename, mmap_mode) 581 if isinstance(fobj, str): 582 # if the returned file object is a string, this means we 583 # try to load a pickle file generated with an version of 584 # Joblib so we load it with joblib compatibility function. 585 return load_compatibility(fobj) --> 587 obj = _unpickle(fobj, filename, mmap_mode) 588 return obj

File ~/anaconda3/envs/papermage/lib/python3.11/site-packages/joblib/numpy_pickle.py:506, in _unpickle(fobj, filename, mmap_mode) 504 obj = None 505 try: --> 506 obj = unpickler.load() 507 if unpickler.compat_mode: 508 warnings.warn("The file '%s' has been generated with a " 509 "joblib version less than 0.10. " 510 "Please regenerate this pickle file." 511 % filename, 512 DeprecationWarning, stacklevel=3)

File ~/anaconda3/envs/papermage/lib/python3.11/pickle.py:1213, in _Unpickler.load(self) 1211 raise EOFError 1212 assert isinstance(key, bytes_types) -> 1213 dispatchkey[0] 1214 except _Stop as stopinst: 1215 return stopinst.value

KeyError: 173`

nikogamulin avatar Dec 24 '23 16:12 nikogamulin