ValueError: wrong pixel_values size: torch.Size([1, 1]) when running DataPipelineLLM.py. [Bug]
Checklist
- [X] 1. I have searched related issues but cannot get the expected help.
- [X] 2. The bug has not been fixed in the latest version.
- [x] 3. Please note that if the bug-related issue you submitted lacks corresponding environment info and a minimal reproducible demo, it will be challenging for us to reproduce and resolve the issue, reducing the likelihood of receiving feedback.
Describe the bug
When processing a PDF file, the script extracts images from the PDF and passes them to the model to generate captions and descriptions. However, the pixel_values tensor is incorrectly formed, resulting in a shape of [1, 1]. This causes a ValueError in the vision model’s forward pass, indicating that the pixel_values size is incorrect.
Reproduction
- Setup and activate python env.
conda create -n env python=3.11
conda activate env
pip install transformers==4.37.2 flask psutil pillow pytesseract torch accelerate torchvision unstructured pdfminer.six pillow_heif unstructured-inference layoutparser opencv-python-headless sentencepiece einops flash_attn unstructured_pytesseract pillow_heif
sudo apt-get update
sudo apt-get install poppler-utils
sudo apt-get install tesseract-ocr
mkdir -p offload
- Prepare the DataPipelineLLM.py script:
Use the following script for DataPipelineLLM.py:
import os
import logging
import json
import torch
import psutil
from flask import Flask, request, jsonify
from PIL import Image
from io import BytesIO
import pytesseract
from transformers import AutoTokenizer, AutoModel
import torchvision.transforms as T
from torchvision.transforms.functional import InterpolationMode
from unstructured.partition.pdf import partition_pdf
# Configure logging
logging.basicConfig(level=logging.INFO)
# Initialize Flask app
app = Flask(__name__)
# Function to log memory usage
def log_memory_usage():
process = psutil.Process()
mem_info = process.memory_info()
logging.info(f"RSS: {mem_info.rss / (1024 ** 2):.2f} MB")
log_memory_usage()
# Initialize device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load tokenizer and model configuration
tokenizer = AutoTokenizer.from_pretrained("OpenGVLab/InternVL-Chat-V1-5", trust_remote_code=True)
model = AutoModel.from_pretrained(
"OpenGVLab/InternVL-Chat-V1-5",
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True,
trust_remote_code=True
).eval().to(device)
def build_transform(input_size):
IMAGENET_MEAN = (0.485, 0.456, 0.406)
IMAGENET_STD = (0.229, 0.224, 0.225)
transform = T.Compose([
T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC),
T.ToTensor(),
T.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
])
return transform
def dynamic_preprocess(image, min_num=1, max_num=12, image_size=448, use_thumbnail=False):
orig_width, orig_height = image.size
aspect_ratio = orig_width / orig_height
target_ratios = sorted(
((i, j) for n in range(min_num, max_num + 1) for i in range(1, n + 1) for j in range(1, n + 1)
if i * j <= max_num and i * j >= min_num), key=lambda x: x[0] * x[1]
)
target_aspect_ratio = min(target_ratios, key=lambda x: abs((x[0] / x[1]) - aspect_ratio))
target_width = image_size * target_aspect_ratio[0]
target_height = image_size * target_aspect_ratio[1]
blocks = target_aspect_ratio[0] * target_aspect_ratio[1]
resized_img = image.resize((target_width, target_height))
processed_images = []
for i in range(blocks):
box = (
(i % (target_width // image_size)) * image_size,
(i // (target_width // image_size)) * image_size,
((i % (target_width // image_size)) + 1) * image_size,
((i // (target_width // image_size)) + 1) * image_size
)
split_img = resized_img.crop(box)
processed_images.append(split_img)
if use_thumbnail and len(processed_images) != 1:
thumbnail_img = image.resize((image_size, image_size))
processed_images.append(thumbnail_img)
return processed_images
def load_image(image_file, input_size=448, max_num=12):
image = Image.open(image_file).convert('RGB')
transform = build_transform(input_size=input_size)
images = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, max_num=max_num)
logging.info(f"Number of processed images: {len(images)}") # Log the number of processed images
pixel_values = [transform(image) for image in images]
pixel_values = torch.stack(pixel_values)
logging.info(f"Loaded image tensor shape: {pixel_values.shape}") # Log the shape of pixel_values
return pixel_values
def extract_content_from_pdf(file_path):
elements = partition_pdf(file_path)
text = "\n".join([str(element) for element in elements if hasattr(element, 'type') and element.type == 'Text'])
images = [element for element in elements if hasattr(element, 'type') and element.type == 'Image']
return text, images
def ocr_image(image_bytes):
image = Image.open(BytesIO(image_bytes))
text = pytesseract.image_to_string(image)
return text
def generate_text(prompt, max_length=150):
if model:
inputs = tokenizer(prompt, return_tensors="pt").to(device)
logging.info(f"Input IDs shape: {inputs.input_ids.shape}") # Log the shape of input_ids
if not hasattr(model, 'img_context_token_id') or model.img_context_token_id is None:
model.img_context_token_id = tokenizer.convert_tokens_to_ids("<image>")
outputs = model.generate(inputs.input_ids, max_length=max_length, num_return_sequences=1)
text = tokenizer.decode(outputs[0], skip_special_tokens=True)
return text
else:
return "Model loading failed. Unable to generate text."
def caption_image(image_bytes):
image = Image.open(BytesIO(image_bytes))
pixel_values = load_image(image_bytes)
image_description = "A description of the image for captioning purposes."
prompt = f"<image>\nGenerate a caption for the following image description: {image_description}"
caption = generate_text(prompt)
return caption
def explain_image(caption):
prompt = f"Explain this image based on the caption: {caption}"
explanation = generate_text(prompt)
return explanation
def process_pdf(file_path):
text, images = extract_content_from_pdf(file_path)
logging.info(f"Extracted text: {text[:100]}") # Log the beginning of the extracted text
processed_text = generate_text(text)
ocr_results = [ocr_image(img) for img in images]
captions = [caption_image(img) for img in images]
explanations = [explain_image(caption) for caption in captions]
data = {
"text": processed_text,
"images": [{"image": img, "ocr_text": ocr, "caption": caption, "explanation": explanation}
for img, ocr, caption, explanation in zip(images, ocr_results, captions, explanations)]
}
return data
def store_data_locally(data, output_folder, file_name):
output_path = os.path.join(output_folder, f"{file_name}.json")
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=4)
logging.info(f"Saved data to {output_path}")
@app.route('/process', methods=['POST'])
def process_file():
if 'file' not in request.files:
return jsonify({'error': 'No file part in the request'}), 400
file = request.files['file']
if file.filename == '':
return jsonify({'error': 'No file selected for uploading'}), 400
if file and file.filename.endswith('.pdf'):
input_folder = "./input_files"
output_folder = "./output_files"
if not os.path.exists(input_folder):
os.makedirs(input_folder)
if not os.path.exists(output_folder):
os.makedirs(output_folder)
file_path = os.path.join(input_folder, file.filename)
file.save(file_path)
data = process_pdf(file_path)
file_name = os.path.splitext(file.filename)[0]
store_data_locally(data, output_folder, file_name)
return jsonify({'message': f'File processed successfully and saved as {file_name}.json'}), 200
else:
return jsonify({'error': 'Allowed file types are pdf'}), 400
if __name__ == "__main__":
app.run(host='0.0.0.0', port=5000)
- After running the script send the request using curl or postman
curl -X POST -F 'file=@./datatext.pdf' http://127.0.0.1:5000/process
Environment
(env) user@5cb2b72b-4b1d-4df3-8361-1cd4aff1550f:~$ env
SHELL=/bin/bash
CONDA_MKL_INTERFACE_LAYER_BACKUP=
CONDA_EXE=/home/user/mambaforge/bin/conda
_CE_M=
XML_CATALOG_FILES=file:///home/user/mambaforge/envs/env/etc/xml/catalog file:///etc/xml/catalog
PWD=/home/user
GSETTINGS_SCHEMA_DIR=/home/user/mambaforge/envs/env/share/glib-2.0/schemas
LOGNAME=user
XDG_SESSION_TYPE=tty
CONDA_PREFIX=/home/user/mambaforge/envs/env
JUPYTER_SERVER_URL=http://5cb2b72b-4b1d-4df3-8361-1cd4aff1550f:8888/
GSETTINGS_SCHEMA_DIR_CONDA_BACKUP=
MOTD_SHOWN=pam
LINES=48
HOME=/home/user
LANG=C.UTF-8
LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=00:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.webp=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=00;36:*.au=00;36:*.flac=00;36:*.m4a=00;36:*.mid=00;36:*.midi=00;36:*.mka=00;36:*.mp3=00;36:*.mpc=00;36:*.ogg=00;36:*.ra=00;36:*.wav=00;36:*.oga=00;36:*.opus=00;36:*.spx=00;36:*.xspf=00;36:
COLUMNS=147
CONDA_PROMPT_MODIFIER=(env)
LC_TERMINAL=iTerm2
SSH_CONNECTION=154.192.139.31 34046 192.168.122.135 22
LESSCLOSE=/usr/bin/lesspipe %s %s
XDG_SESSION_CLASS=user
JUPYTER_SERVER_ROOT=/home/user
TERM=xterm-256color
_CE_CONDA=
LESSOPEN=| /usr/bin/lesspipe %s
USER=user
CONDA_SHLVL=1
LC_TERMINAL_VERSION=3.5.3
SHLVL=2
PYXTERM_DIMENSIONS=80x25
XDG_SESSION_ID=8
CONDA_PYTHON_EXE=/home/user/mambaforge/bin/python
LD_LIBRARY_PATH=/usr/local/cuda-12.5/lib64:/usr/local/cuda-12.5/lib64:
XDG_RUNTIME_DIR=/run/user/1000
SSH_CLIENT=xxx.xxx.xxx.xx 34046 22
CONDA_DEFAULT_ENV=env
XDG_DATA_DIRS=/usr/local/share:/usr/share:/var/lib/snapd/desktop
PATH=/usr/local/cuda-12.5/bin:/home/user/mambaforge/envs/env/bin:/usr/local/cuda-12.5/bin:/home/user/mambaforge/condabin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin
DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/1000/bus
SSH_TTY=/dev/pts/0
MKL_INTERFACE_LAYER=LP64,GNU
_=/usr/bin/env
(env) user@5cb2b72b-4b1d-4df3-8361-1cd4aff1550f:~$
(env) user@5cb2b72b-4b1d-4df3-8361-1cd4aff1550f:~$ pip list
Package Version
------------------------- --------------
absl-py 2.1.0
accelerate 0.33.0
aiohttp 3.9.5
aiosignal 1.3.1
annotated-types 0.7.0
anyio 4.4.0
argon2-cffi 23.1.0
argon2-cffi-bindings 21.2.0
arrow 1.3.0
asttokens 2.4.1
astunparse 1.6.3
async-lru 2.0.4
attrs 23.2.0
Babel 2.14.0
backoff 2.2.1
beautifulsoup4 4.12.3
bleach 6.1.0
blinker 1.8.2
blis 0.7.10
Bottleneck 1.3.5
Brotli 1.1.0
cached-property 1.5.2
cachetools 5.3.3
catalogue 2.0.10
certifi 2024.6.2
cffi 1.16.0
chardet 5.2.0
charset-normalizer 3.3.2
click 8.1.7
cloudpathlib 0.18.1
colorama 0.4.6
coloredlogs 15.0.1
comm 0.2.2
confection 0.1.4
contourpy 1.2.1
cryptography 42.0.8
cycler 0.12.1
cymem 2.0.8
dataclasses-json 0.6.7
debugpy 1.8.2
decorator 5.1.1
deepdiff 7.0.1
defusedxml 0.7.1
einops 0.8.0
emoji 2.12.1
entrypoints 0.4
exceptiongroup 1.2.0
executing 2.0.1
fastai 2.7.15
fastcore 1.5.48
fastdownload 0.0.7
fastjsonschema 2.20.0
fastprogress 1.0.3
filelock 3.15.4
filetype 1.2.0
flash-attn 2.6.3
Flask 3.0.3
flatbuffers 24.3.25
fonttools 4.53.1
fqdn 1.5.1
frozenlist 1.4.1
fsspec 2024.6.1
gast 0.5.5
gmpy2 2.1.5
google-auth 2.31.0
google-auth-oauthlib 1.0.0
google-pasta 0.2.0
grpcio 1.54.3
h11 0.14.0
h2 4.1.0
h5py 3.11.0
hpack 4.0.0
httpcore 1.0.5
httpx 0.27.0
huggingface-hub 0.24.3
humanfriendly 10.0
hyperframe 6.0.1
idna 3.7
importlib_metadata 8.0.0
importlib_resources 6.4.0
iopath 0.1.10
ipykernel 6.29.5
ipython 8.26.0
ipywidgets 8.1.3
isoduration 20.11.0
itsdangerous 2.2.0
jedi 0.19.1
Jinja2 3.1.4
joblib 1.4.2
json5 0.9.25
jsonpath-python 1.0.6
jsonpointer 3.0.0
jsonschema 4.22.0
jsonschema-specifications 2023.12.1
jupyter 1.0.0
jupyter_client 8.6.2
jupyter-console 6.6.3
jupyter_core 5.7.2
jupyter-events 0.10.0
jupyter-lsp 2.2.5
jupyter_server 2.14.1
jupyter_server_terminals 0.5.3
jupyterlab 4.2.3
jupyterlab_pygments 0.3.0
jupyterlab_server 2.27.2
jupyterlab_widgets 3.0.11
keras 2.14.0
kiwisolver 1.4.5
langcodes 3.4.0
langdetect 1.0.9
language_data 1.2.0
layoutparser 0.3.4
lxml 5.2.2
marisa-trie 1.1.0
Markdown 3.6
markdown-it-py 3.0.0
MarkupSafe 2.1.5
marshmallow 3.21.3
matplotlib 3.8.4
matplotlib-inline 0.1.7
mdurl 0.1.2
mistune 3.0.2
ml-dtypes 0.2.0
mpmath 1.3.0
multidict 6.0.5
munkres 1.1.4
murmurhash 1.0.10
mypy-extensions 1.0.0
nbclient 0.10.0
nbconvert 7.16.4
nbformat 5.10.4
nest_asyncio 1.6.0
networkx 3.3
nltk 3.8.1
notebook 7.2.1
notebook_shim 0.2.4
numexpr 2.10.0
numpy 1.26.4
oauthlib 3.2.2
onnx 1.16.1
onnxruntime 1.18.1
opencv-python 4.10.0.84
opt-einsum 3.3.0
ordered-set 4.1.0
overrides 7.7.0
packaging 24.1
pandas 2.1.1
pandocfilters 1.5.0
parso 0.8.4
pdf2image 1.17.0
pdfminer.six 20231228
pdfplumber 0.11.2
pexpect 4.9.0
pickleshare 0.7.5
pillow 10.3.0
pillow_heif 0.18.0
pip 24.0
pkgutil_resolve_name 1.3.10
platformdirs 4.2.2
ply 3.11
portalocker 2.10.1
preshed 3.0.9
prometheus_client 0.20.0
prompt_toolkit 3.0.47
protobuf 4.21.12
psutil 6.0.0
ptyprocess 0.7.0
pure-eval 0.2.2
pyasn1 0.6.0
pyasn1_modules 0.4.0
pycparser 2.22
pydantic 2.8.2
pydantic_core 2.20.1
Pygments 2.18.0
PyJWT 2.8.0
pyOpenSSL 24.0.0
pyparsing 3.1.2
pypdf 4.3.1
pypdfium2 4.30.0
PyQt5 5.15.9
PyQt5-sip 12.12.2
PySocks 1.7.1
pytesseract 0.3.10
python-dateutil 2.9.0
python-iso639 2024.4.27
python-json-logger 2.0.7
python-magic 0.4.27
python-multipart 0.0.9
pytz 2024.1
pyu2f 0.1.5
PyYAML 6.0.1
pyzmq 26.0.3
qtconsole 5.5.2
QtPy 2.4.1
rapidfuzz 3.9.5
referencing 0.35.1
regex 2024.7.24
requests 2.32.3
requests-oauthlib 2.0.0
requests-toolbelt 1.0.0
rfc3339-validator 0.1.4
rfc3986-validator 0.1.1
rich 13.7.1
rpds-py 0.18.1
rsa 4.9
safetensors 0.4.3
scikit-learn 1.5.1
scipy 1.14.0
Send2Trash 1.8.3
sentencepiece 0.2.0
setuptools 70.1.1
shellingham 1.5.4
sip 6.7.12
six 1.16.0
smart_open 7.0.4
sniffio 1.3.1
soupsieve 2.5
spacy 3.7.5
spacy-legacy 3.0.12
spacy-loggers 1.0.5
srsly 2.4.8
stack-data 0.6.2
sympy 1.12.1
tabulate 0.9.0
tensorboard 2.14.1
tensorboard-data-server 0.7.0
tensorflow 2.14.0
tensorflow-estimator 2.14.0
termcolor 2.4.0
terminado 0.18.1
thinc 8.2.3
threadpoolctl 3.5.0
timm 1.0.8
tinycss2 1.3.0
tokenizers 0.15.2
toml 0.10.2
tomli 2.0.1
torch 2.3.1
torchaudio 2.3.1
torchvision 0.18.1
tornado 6.4.1
tqdm 4.66.4
traitlets 5.14.3
transformers 4.37.2
triton 2.3.1
typer 0.12.3
typer-slim 0.12.3
types-python-dateutil 2.9.0.20240316
typing_extensions 4.12.2
typing-inspect 0.9.0
typing-utils 0.1.0
tzdata 2023.3
unstructured 0.15.0
unstructured-client 0.25.0
unstructured-inference 0.7.36
unstructured.pytesseract 0.3.12
uri-template 1.3.0
urllib3 2.2.2
wasabi 1.1.2
wcwidth 0.2.13
weasel 0.4.1
webcolors 24.6.0
webencodings 0.5.1
websocket-client 1.8.0
Werkzeug 3.0.3
wheel 0.43.0
widgetsnbextension 4.0.11
wrapt 1.14.1
yarl 1.9.4
zipp 3.19.2
zstandard 0.22.0
(env) user@5cb2b72b-4b1d-4df3-8361-1cd4aff1550f:~$
Error traceback
(env) user@5cb2b72b-4b1d-4df3-8361-1cd4aff1550f:~$ python DataPipelineLLM.py
INFO:root:RSS: 693.44 MB
/home/user/mambaforge/envs/env/lib/python3.11/site-packages/huggingface_hub/file_download.py:1150: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
warnings.warn(
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
INFO:transformers_modules.OpenGVLab.InternVL-Chat-V1-5.7e2100537fee41c08e1d0c1436179328dfc4396d.configuration_internvl_chat:vision_select_layer: -1
INFO:transformers_modules.OpenGVLab.InternVL-Chat-V1-5.7e2100537fee41c08e1d0c1436179328dfc4396d.configuration_internvl_chat:ps_version: v2
INFO:transformers_modules.OpenGVLab.InternVL-Chat-V1-5.7e2100537fee41c08e1d0c1436179328dfc4396d.configuration_internvl_chat:min_dynamic_patch: 1
INFO:transformers_modules.OpenGVLab.InternVL-Chat-V1-5.7e2100537fee41c08e1d0c1436179328dfc4396d.configuration_internvl_chat:max_dynamic_patch: 12
INFO:transformers_modules.OpenGVLab.InternVL-Chat-V1-5.7e2100537fee41c08e1d0c1436179328dfc4396d.modeling_internvl_chat:num_image_token: 256
INFO:transformers_modules.OpenGVLab.InternVL-Chat-V1-5.7e2100537fee41c08e1d0c1436179328dfc4396d.modeling_internvl_chat:ps_version: v2
Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████| 11/11 [00:02<00:00, 5.49it/s]
* Serving Flask app 'DataPipelineLLM'
* Debug mode: off
INFO:werkzeug:WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
* Running on all addresses (0.0.0.0)
* Running on http://127.0.0.1:5000
* Running on http://192.168.122.135:5000
INFO:werkzeug:Press CTRL+C to quit
INFO:unstructured:PDF text extraction failed, skip text extraction...
INFO:root:Extracted text:
INFO:root:Input IDs shape: torch.Size([1, 1])
ERROR:DataPipelineLLM:Exception on /process [POST]
Traceback (most recent call last):
File "/home/user/mambaforge/envs/env/lib/python3.11/site-packages/flask/app.py", line 1473, in wsgi_app
response = self.full_dispatch_request()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/mambaforge/envs/env/lib/python3.11/site-packages/flask/app.py", line 882, in full_dispatch_request
rv = self.handle_user_exception(e)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/mambaforge/envs/env/lib/python3.11/site-packages/flask/app.py", line 880, in full_dispatch_request
rv = self.dispatch_request()
^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/mambaforge/envs/env/lib/python3.11/site-packages/flask/app.py", line 865, in dispatch_request
return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args) # type: ignore[no-any-return]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/DataPipelineLLM.py", line 171, in process_file
data = process_pdf(file_path)
^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/DataPipelineLLM.py", line 133, in process_pdf
processed_text = generate_text(text)
^^^^^^^^^^^^^^^^^^^
File "/home/user/DataPipelineLLM.py", line 111, in generate_text
outputs = model.generate(inputs.input_ids, max_length=max_length, num_return_sequences=1)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/mambaforge/envs/env/lib/python3.11/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/home/user/.cache/huggingface/modules/transformers_modules/OpenGVLab/InternVL-Chat-V1-5/7e2100537fee41c08e1d0c1436179328dfc4396d/modeling_internvl_chat.py", line 321, in generate
vit_embeds = self.extract_feature(pixel_values)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/.cache/huggingface/modules/transformers_modules/OpenGVLab/InternVL-Chat-V1-5/7e2100537fee41c08e1d0c1436179328dfc4396d/modeling_internvl_chat.py", line 181, in extract_feature
vit_embeds = self.vision_model(
^^^^^^^^^^^^^^^^^^
File "/home/user/mambaforge/envs/env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/mambaforge/envs/env/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/.cache/huggingface/modules/transformers_modules/OpenGVLab/InternVL-Chat-V1-5/7e2100537fee41c08e1d0c1436179328dfc4396d/modeling_intern_vit.py", line 418, in forward
raise ValueError(f'wrong pixel_values size: {pixel_values.shape}')
ValueError: wrong pixel_values size: torch.Size([1, 1])
INFO:werkzeug:127.0.0.1 - - [31/Jul/2024 09:16:02] "POST /process HTTP/1.1" 500 -
@czczup @opengvlab-admin Can you please take a look at the above error!
Hello, this seems to be the wrong shape of the image tensor you entered, please check it
@czczup What shape image tensor does the input require, and can you please elaborate on your previous comment, I would appreciate it!