docformer
docformer copied to clipboard
Error in Example: Please provide the bounding box and words or pass the argument "use_ocr" = True
Ran into this error while running the example notebook.
---------------------------------------------------------------------------
Exception Traceback (most recent call last)
/tmp/ipykernel_33283/863471981.py in <cell line: 2>()
1 ## Using a single batch for the forward propagation
----> 2 features = next(iter(train_data_loader))
3 img,token,x_feat,y_feat, labels = features
~/anaconda3/envs/python3/lib/python3.8/site-packages/torch/utils/data/dataloader.py in __next__(self)
679 # TODO(https://github.com/pytorch/pytorch/issues/76750)
680 self._reset() # type: ignore[call-arg]
--> 681 data = self._next_data()
682 self._num_yielded += 1
683 if self._dataset_kind == _DatasetKind.Iterable and \
~/anaconda3/envs/python3/lib/python3.8/site-packages/torch/utils/data/dataloader.py in _next_data(self)
719 def _next_data(self):
720 index = self._next_index() # may raise StopIteration
--> 721 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
722 if self._pin_memory:
723 data = _utils.pin_memory.pin_memory(data, self._pin_memory_device)
~/anaconda3/envs/python3/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py in fetch(self, possibly_batched_index)
47 def fetch(self, possibly_batched_index):
48 if self.auto_collation:
---> 49 data = [self.dataset[idx] for idx in possibly_batched_index]
50 else:
51 data = self.dataset[possibly_batched_index]
~/anaconda3/envs/python3/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py in <listcomp>(.0)
47 def fetch(self, possibly_batched_index):
48 if self.auto_collation:
---> 49 data = [self.dataset[idx] for idx in possibly_batched_index]
50 else:
51 data = self.dataset[possibly_batched_index]
/tmp/ipykernel_33283/2543102337.py in __getitem__(self, index)
22 If labels are not None, then labels also
23 '''
---> 24 encoding = create_features(self.entries[index],self.tokenizer, apply_mask_for_mlm=self.use_mlm)
25
26 if self.labels==None:
~/docformer/examples/../src/docformer/dataset.py in create_features(image, tokenizer, add_batch_dim, target_size, max_seq_length, path_to_save, save_to_disk, apply_mask_for_mlm, extras_for_debugging, use_ocr, bounding_box, words)
190
191 if (use_ocr == False) and (bounding_box == None or words == None):
--> 192 raise Exception('Please provide the bounding box and words or pass the argument "use_ocr" = True')
193
194 if use_ocr == True:
Exception: Please provide the bounding box and words or pass the argument "use_ocr" = True
Hi @bbalaji-ucsd, actually while using the create_features
function, if you are not explicitly providing the bounding box, you have to pass the argument use_ocr =True
, currently it is False, and hence the error arises.
Hope this helps
Regards, Akarsh