LLaVA
LLaVA copied to clipboard
[Error] following `model_kwargs` are not used by the model: ['image_sizes'
Describe the issue
Issue: i am using the bellow code, a modified version of cli.py code, to query a local image.
import sys
sys.path.append(os.getcwd()+ "/LLaVA/")
import argparse
import torch
from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
from llava.conversation import conv_templates, SeparatorStyle
from llava.model.builder import load_pretrained_model
from llava.utils import disable_torch_init
from llava.mm_utils import process_images, tokenizer_image_token, get_model_name_from_path, KeywordsStoppingCriteria
from PIL import Image
import requests
from PIL import Image
from io import BytesIO
from transformers import TextStreamer
def load_image(image_file):
if image_file.startswith('http://') or image_file.startswith('https://'):
response = requests.get(image_file)
image = Image.open(BytesIO(response.content)).convert('RGB')
else:
image = Image.open(image_file).convert('RGB')
return image
model_path="liuhaotian/LLaVA-Lightning-MPT-7B-preview"
model_base=None
load_8bit=False
load_4bit=False
temperature = 0.2
max_new_tokens = 512
image_file = "/localhome/local-vishkumar/gen-ai-app/streams/llava_logo.png"
disable_torch_init()
model_name = get_model_name_from_path(model_path)
tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, model_base, model_name, load_8bit, load_4bit)
if "llama-2" in model_name.lower():
conv_mode = "llava_llama_2"
elif "mistral" in model_name.lower():
conv_mode = "mistral_instruct"
elif "v1.6-34b" in model_name.lower():
conv_mode = "chatml_direct"
elif "v1" in model_name.lower():
conv_mode = "llava_v1"
elif "mpt" in model_name.lower():
conv_mode = "mpt"
else:
conv_mode = "llava_v0"
if conv_mode is not None and conv_mode != conv_mode:
print('[WARNING] the auto inferred conversation mode is {}, while `--conv-mode` is {}, using {}'.format(conv_mode, conv_mode, conv_mode))
else:
conv_mode = conv_mode
conv = conv_templates[conv_mode].copy()
if "mpt" in model_name.lower():
roles = ('user', 'assistant')
else:
roles = conv.roles
image = load_image(image_file)
image_size = image.size
image_tensor = process_images([image], image_processor, model.config)
if type(image_tensor) is list:
image_tensor = [image.to(model.device, dtype=torch.float16) for image in image_tensor]
else:
image_tensor = image_tensor.to(model.device, dtype=torch.float16)
inp = "Describe the image"
print(f"{roles[0]}: {inp}")
print(f"{roles[1]}: ", end="")
if image is not None:
# first message
if model.config.mm_use_im_start_end:
inp = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN + '\n' + inp
else:
inp = DEFAULT_IMAGE_TOKEN + '\n' + inp
conv.append_message(conv.roles[0], inp)
image = None
else:
conv.append_message(conv.roles[0], inp)
conv.append_message(conv.roles[1], None)
prompt = conv.get_prompt()
input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).to(model.device)
stop_str = conv.sep if conv.sep_style != SeparatorStyle.TWO else conv.sep2
keywords = [stop_str]
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
with torch.inference_mode():
output_ids = model.generate(
input_ids,
images=image_tensor,
image_sizes=[image_size],
do_sample=True if temperature > 0 else False,
temperature=temperature,
max_new_tokens=max_new_tokens,
streamer=streamer,
use_cache=True)
outputs = tokenizer.decode(output_ids[0]).strip()
conv.messages[-1][-1] = outputs
print("\n", {"prompt": prompt, "outputs": outputs}, "\n")
Error Message:
user: Describe the image
assistant: Traceback (most recent call last):
File "/localhome/local-vishkumar/gen-ai-app/src/test.py", line 100, in <module>
output_ids = model.generate(
File "/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/localhome/local-vishkumar/.local/lib/python3.10/site-packages/transformers/generation/utils.py", line 1307, in generate
self._validate_model_kwargs(model_kwargs.copy())
File "/localhome/local-vishkumar/.local/lib/python3.10/site-packages/transformers/generation/utils.py", line 1122, in _validate_model_kwargs
raise ValueError(
ValueError: The following `model_kwargs` are not used by the model: ['image_sizes'] (note: typos in the generate arguments will also show up in this list)
Can anyone please tell me why i am getting this issue??
changing the model to model_path="liuhaotian/llava-v1.5-13b" resolved the issue and I'm getting the image description now. Seems like there is bug in LLaVA-Lightning-MPT-7B-preview model path.
getting this also to llava-hf/llava-v1.6-mistral-7b-hf
getting this error in MPT-7b model too