OmniParser
OmniParser copied to clipboard
ValueError: Attention mask should be of size (30, 1, 10, 10), but is torch.Size([30, 1, 5, 5])
I got the following error while running gradio demo script
Traceback (most recent call last):
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\gradio\queueing.py", line 625, in process_events
response = await route_utils.call_process_api(
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\gradio\route_utils.py", line 322, in call_process_api
output = await app.get_blocks().process_api(
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\gradio\blocks.py", line 2044, in process_api
result = await self.call_function(
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\gradio\blocks.py", line 1591, in call_function
prediction = await anyio.to_thread.run_sync( # type: ignore
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\anyio\to_thread.py", line 33, in run_sync
return await get_asynclib().run_sync_in_worker_thread(
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\anyio\_backends\_asyncio.py", line 877, in run_sync_in_worker_thread
return await future
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\anyio\_backends\_asyncio.py", line 807, in run
result = context.run(func, *args)
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\gradio\utils.py", line 883, in wrapper
response = f(*args, **kwargs)
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\OmniParser\gradio_demo.py", line 58, in process
dino_labled_img, label_coordinates, parsed_content_list = get_som_labeled_img(image_save_path, yolo_model, BOX_TRESHOLD = box_threshold, output_coord_in_ratio=True, ocr_bbox=ocr_bbox,draw_bbox_config=draw_bbox_config, caption_model_processor=caption_model_processor, ocr_text=text,iou_threshold=iou_threshold, imgsz=imgsz,)
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\OmniParser\util\utils.py", line 452, in get_som_labeled_img
parsed_content_icon = get_parsed_content_icon(filtered_boxes, starting_idx, image_source, caption_model_processor, prompt=prompt,batch_size=batch_size)
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\torch\utils\_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\OmniParser\util\utils.py", line 117, in get_parsed_content_icon
generated_ids = model.generate(**inputs, max_length=100, num_beams=5, no_repeat_ngram_size=2, early_stopping=True, num_return_sequences=1) # temperature=0.01, do_sample=True,
File "C:\Users\faree\.cache\huggingface\modules\transformers_modules\microsoft\Florence-2-base-ft\9803f52844ec1ae5df004e6089262e9a23e527fd\modeling_florence2.py", line 2796, in generate
return self.language_model.generate(
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\torch\utils\_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\transformers\generation\utils.py", line 2067, in generate
model_kwargs = self._prepare_encoder_decoder_kwargs_for_generation(
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\transformers\generation\utils.py", line 652, in _prepare_encoder_decoder_kwargs_for_generation
model_kwargs["encoder_outputs"]: ModelOutput = encoder(**encoder_kwargs) # type: ignore
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\torch\nn\modules\module.py", line 1739, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\torch\nn\modules\module.py", line 1750, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\faree\.cache\huggingface\modules\transformers_modules\microsoft\Florence-2-base-ft\9803f52844ec1ae5df004e6089262e9a23e527fd\modeling_florence2.py", line 1631, in forward
layer_outputs = encoder_layer(
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\torch\nn\modules\module.py", line 1739, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\torch\nn\modules\module.py", line 1750, in _call_impl
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\torch\nn\modules\module.py", line 1750, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\faree\.cache\huggingface\modules\transformers_modules\microsoft\Florence-2-base-ft\9803f52844ec1ae5df004e6089262e9a23e527fd\modeling_florence2.py", line 1631, in forward
layer_outputs = encoder_layer(
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\torch\nn\modules\module.py", line 1739, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\torch\nn\modules\module.py", line 1750, in _call_impl
layer_outputs = encoder_layer(
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\torch\nn\modules\module.py", line 1739, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\torch\nn\modules\module.py", line 1750, in _call_impl
call_impl
return self._call_impl(*args, **kwargs)
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\torch\nn\modules\module.py", line 1750, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\faree\.cache\huggingface\modules\transformers_modules\microsoft\Florence-2-base-ft\9803f52844ec1ae5df004e6089262e9a23e527fd\modeling_florence2.py", line 1280, in forward
hidden_states, attn_weights, _ = self.self_attn(
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\torch\nn\modules\module.py", line 1739, in _wrapped_call_impl
return forward_call(*args, **kwargs)
File "C:\Users\faree\.cache\huggingface\modules\transformers_modules\microsoft\Florence-2-base-ft\9803f52844ec1ae5df004e6089262e9a23e527fd\modeling_florence2.py", line 1280, in forward
hidden_states, attn_weights, _ = self.self_attn(
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\torch\nn\modules\module.py", line 1739, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
hidden_states, attn_weights, _ = self.self_attn(
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\torch\nn\modules\module.py", line 1739, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "C:\Users\faree\Desktop\deepseek_R1_from_scratch\.venv-r10\lib\site-packages\torch\nn\modules\module.py", line 1750, in _call_impl
return forward_call(*args, **kwargs)
File "C:\Users\faree\.cache\huggingface\modules\transformers_modules\microsoft\Florence-2-base-ft\9803f52844ec1ae5df004e6089262e9a23e527fd\modeling_florence2.py", line 861, in forward
raise ValueError(
ValueError: Attention mask should be of size (30, 1, 10, 10), but is torch.Size([30, 1, 5, 5])
Ensure your folder for icon_caption is called icon_caption_florence as shown in omnitool readme
oops. thank you!
oops. thank you!
how doing ? bro