Describe the issue
Issue:
Command:
PASTE THE COMMANDS HERE.
Log:
PASTE THE LOGS HERE.
from llava.model.builder import load_pretrained_model
from llava.mm_utils import get_model_name_from_path
from llava.eval.run_llava import eval_model
model_path = "liuhaotian/llava-v1.6-34b"
tokenizer, model, image_processor, context_len = load_pretrained_model(
model_path=model_path,
model_base=None,
model_name=get_model_name_from_path(model_path),
device="cuda"
)
model_path = "liuhaotian/llava-v1.6-34b"
prompt = "What are the things I should be cautious about when I visit here?"
image_file = "https://llava-vl.github.io/static/images/view.jpg"
args = type('Args', (), {
"model_path": model_path,
"model_base": None,
"model_name": get_model_name_from_path(model_path),
"query": prompt,
"conv_mode": None,
"image_file": image_file,
"sep": ",",
"temperature": 0,
"top_p": None,
"num_beams": 1,
"max_new_tokens": 512
})()
eval_model(args)
GOT THIS ERROR:raceback (most recent call last):
File "/home/ubuntu/Llava-34B-1.6.py", line 34, in
eval_model(args)
File "/home/ubuntu/LLaVA/llava/eval/run_llava.py", line 115, in eval_model
output_ids = model.generate(
File "/home/ubuntu/anaconda3/envs/llava/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/ubuntu/LLaVA/llava/model/language_model/llava_llama.py", line 125, in generate
) = self.prepare_inputs_labels_for_multimodal(
File "/home/ubuntu/LLaVA/llava/model/llava_arch.py", line 157, in prepare_inputs_labels_for_multimodal
image_features = self.encode_images(concat_images)
File "/home/ubuntu/LLaVA/llava/model/llava_arch.py", line 141, in encode_images
image_features = self.get_model().get_vision_tower()(images)
File "/home/ubuntu/anaconda3/envs/llava/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/ubuntu/anaconda3/envs/llava/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/anaconda3/envs/llava/lib/python3.10/site-packages/accelerate/hooks.py", line 165, in new_forward
output = old_forward(*args, **kwargs)
File "/home/ubuntu/anaconda3/envs/llava/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/ubuntu/LLaVA/llava/model/multimodal_encoder/clip_encoder.py", line 54, in forward
image_forward_outs = self.vision_tower(images.to(device=self.device, dtype=self.dtype), output_hidden_states=True)
File "/home/ubuntu/anaconda3/envs/llava/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/ubuntu/anaconda3/envs/llava/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ubuntu/anaconda3/envs/llava/lib/python3.10/site-packages/accelerate/hooks.py", line 160, in new_forward
args, kwargs = module._hf_hook.pre_forward(module, *args, **kwargs)
File "/home/ubuntu/anaconda3/envs/llava/lib/python3.10/site-packages/accelerate/hooks.py", line 290, in pre_forward
return send_to_device(args, self.execution_device), send_to_device(
File "/home/ubuntu/anaconda3/envs/llava/lib/python3.10/site-packages/accelerate/utils/operations.py", line 151, in send_to_device
return honor_type(
File "/home/ubuntu/anaconda3/envs/llava/lib/python3.10/site-packages/accelerate/utils/operations.py", line 83, in honor_type
return type(obj)(generator)
File "/home/ubuntu/anaconda3/envs/llava/lib/python3.10/site-packages/accelerate/utils/operations.py", line 152, in
tensor, (send_to_device(t, device, non_blocking=non_blocking, skip_keys=skip_keys) for t in tensor)
File "/home/ubuntu/anaconda3/envs/llava/lib/python3.10/site-packages/accelerate/utils/operations.py", line 167, in send_to_device
return tensor.to(device, non_blocking=non_blocking)
NotImplementedError: Cannot copy out of meta tensor; no data!
4XV100 GPU 32G
###How Can I run this model to infer ?
Another way to run this model also failed and got OOM on this hardware configuration :CUDA_VISIBLE_DEVICES=0,1,2,3 python3 -m sglang.launch_server --model-path liuhaotian/llava-v1.6-34b --tokenizer-path liuhaotian/llava-v1.6-34b-tokenizer --port 30000 --tp 2
I am struggling with similar issue on my device.
While running llava-v1.5 works fine, running llava-v1.6 in any configuration (7B, 13B and 34B) yields same errors.
This problem may occur if out of memory. Observe trend by "watch nvidia-smi".