I have the following two questions and would very appreciate if anyone can help:
-
In the paper, you stated that you fine-tuned Llama 2 with LoRa. From what I understand, the base model and the adapter from Lora need to be loaded separately and combined using merge_and_unload
from this link. However, I can not seem to see the part of the code in your codebase ?
-
Along with that, I am having a problem running the demo with the 4-bit quantization by change the load_in_4bit
to load_in_8bit
,. Here is the error:
Traceback (most recent call last):
File "/content/private_scripts/MiniGPT4-video/minigpt4_video_inference.py", line 179, in
pred=run(video_path,instruction,model,vis_processor,gen_subtitles=add_subtitles)
File "/content/private_scripts/MiniGPT4-video/minigpt4_video_inference.py", line 128, in run
answers = model.generate(prepared_images, prompt, max_new_tokens=args.max_new_tokens, do_sample=True, lengths=[length],num_beams=1)
File "/usr/local/envs/minigpt4_video/lib/python3.9/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/content/private_scripts/MiniGPT4-video/minigpt4/models/mini_gpt4_llama_v2.py", line 553, in generate
outputs = self.llama_model.generate(
File "/usr/local/envs/minigpt4_video/lib/python3.9/site-packages/peft/peft_model.py", line 580, in generate
return self.base_model.generate(**kwargs)
File "/usr/local/envs/minigpt4_video/lib/python3.9/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/usr/local/envs/minigpt4_video/lib/python3.9/site-packages/transformers/generation/utils.py", line 1525, in generate
return self.sample(
File "/usr/local/envs/minigpt4_video/lib/python3.9/site-packages/transformers/generation/utils.py", line 2622, in sample
outputs = self(
File "/usr/local/envs/minigpt4_video/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/envs/minigpt4_video/lib/python3.9/site-packages/accelerate/hooks.py", line 165, in new_forward
output = module._old_forward(*args, **kwargs)
File "/content/private_scripts/MiniGPT4-video/minigpt4/models/modeling_llama_v2.py", line 66, in forward
outputs = self.model(
File "/usr/local/envs/minigpt4_video/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/envs/minigpt4_video/lib/python3.9/site-packages/accelerate/hooks.py", line 165, in new_forward
output = module._old_forward(*args, **kwargs)
File "/usr/local/envs/minigpt4_video/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 1070, in forward
layer_outputs = decoder_layer(
File "/usr/local/envs/minigpt4_video/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/envs/minigpt4_video/lib/python3.9/site-packages/accelerate/hooks.py", line 165, in new_forward
output = module._old_forward(*args, **kwargs)
File "/usr/local/envs/minigpt4_video/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 798, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/usr/local/envs/minigpt4_video/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/envs/minigpt4_video/lib/python3.9/site-packages/accelerate/hooks.py", line 165, in new_forward
output = module._old_forward(*args, **kwargs)
File "/usr/local/envs/minigpt4_video/lib/python3.9/site-packages/transformers/models/llama/modeling_llama.py", line 386, in forward
query_states = self.q_proj(hidden_states)
File "/usr/local/envs/minigpt4_video/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/envs/minigpt4_video/lib/python3.9/site-packages/peft/tuners/lora.py", line 348, in forward
result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias)
RuntimeError: mat1 and mat2 shapes cannot be multiplied (3030x4096 and 1x8388608)