MiniCPM-O cannot use TTS
Describe the bug
After enabling TTS parameter, the program will crash with error
How to reproduce Steps to reproduce the error:
- intelanalytics/ipex-llm-inference-cpp-xpu:latest
- Follow setup guidance on IPEX-LLM
- Input voice is random input audio.
- There is a script try.py: 5.import torch
import librosa
from transformers import AutoModel
from ipex_llm.transformers import AutoModel
model = AutoModel.from_pretrained('./minicpm-o', trust_remote_code=True,
attn_implementation='sdpa', torch_dtype=torch.bfloat16) # sdpa or flash_attention_2, no eager
model = model.half().to('xpu')
tokenizer = AutoTokenizer.from_pretrained('./minicpm-o', trust_remote_code=True)
model.init_tts()
model.tts.float()
mimick_prompt = "Please repeat each user's speech, including voice style and speech content."
audio_input, _ = librosa.load('./question.wav', sr=16000, mono=True)
msgs = [{'role': 'user', 'content': [mimick_prompt,audio_input]}]
res = model.chat(
msgs=msgs,
tokenizer=tokenizer,
sampling=True,
max_new_tokens=128,
use_tts_template=True,
temperature=0.3,
generate_audio=True,
output_audio_path='./output.wav', # save the tts result to output_audio_path
)
"""
> ERROR Message:
"""model = AutoModel.from_pretrained(
model_path,
trust_remote_code=True,
attn_implementation='sdpa',
load_in_low_bit="sym_int4",
optimize_model=True,
init_vision=True,
init_audio=True,
init_tts=True
) # sdpa or flash_attention_2, no eager
model = model.half().to('xpu')
Traceback (most recent call last):
File "/data/jimin/dev/ipex-llm/python/llm/example/GPU/HuggingFace/Multimodal/MiniCPM-o-2_6/tts.py", line 15, in <module>
model = AutoModel.from_pretrained(
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/miniforge3/envs/llm/lib/python3.11/unittest/mock.py", line 1378, in patched
return func(*newargs, **newkeywargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/miniforge3/envs/llm/lib/python3.11/site-packages/ipex_llm/transformers/model.py", line 349, in from_pretrained
model = cls.load_convert(q_k, optimize_model, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/miniforge3/envs/llm/lib/python3.11/site-packages/ipex_llm/transformers/model.py", line 502, in load_convert
model = ggml_convert_low_bit(model, qtype, optimize_model,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/miniforge3/envs/llm/lib/python3.11/site-packages/ipex_llm/transformers/convert.py", line 1123, in ggml_convert_low_bit
model, has_been_replaced = _replace_with_low_bit_linear(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/miniforge3/envs/llm/lib/python3.11/site-packages/ipex_llm/transformers/convert.py", line 732, in _replace_with_low_bit_linear
_, _flag = _replace_with_low_bit_linear(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/miniforge3/envs/llm/lib/python3.11/site-packages/ipex_llm/transformers/convert.py", line 732, in _replace_with_low_bit_linear
_, _flag = _replace_with_low_bit_linear(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/miniforge3/envs/llm/lib/python3.11/site-packages/ipex_llm/transformers/convert.py", line 722, in _replace_with_low_bit_linear
module.weight = None
^^^^^^^^^^^^^
File "/root/miniforge3/envs/llm/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1768, in __setattr__
super().__setattr__(name, value)
File "/root/miniforge3/envs/llm/lib/python3.11/site-packages/torch/nn/utils/parametrize.py", line 373, in set_original
self.parametrizations[tensor_name].right_inverse(value)
File "/root/miniforge3/envs/llm/lib/python3.11/site-packages/torch/nn/utils/parametrize.py", line 217, in right_inverse
value = module.right_inverse(value)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/miniforge3/envs/llm/lib/python3.11/site-packages/torch/nn/utils/parametrizations.py", line 303, in right_inverse
weight_g = torch.norm_except_dim(weight, 2, self.dim)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: norm_except_dim(): argument 'v' (position 1) must be Tensor, not NoneType
you can try setting modules_to_not_convert=["apm", "vpm", "resampler", "tts"] in AutoModel.from_pretrained
modules_to_not_convert is a parameter of ipex_llm's AutoModel.from_pretrained,
try
from ipex_llm.transformers import AutoModel
model = AutoModel.from_pretrained(
model_path,
trust_remote_code=True,
attn_implementation='sdpa',
torch_dtype=torch.half,
load_in_low_bit='sym_int4',
modules_to_not_convert=["apm", "vpm", "resampler", "tts"],
)
model = model.eval().to('xpu')
model.init_tts()
model.tts.float()
hi, you need to update downloaded modeling file
old modeling file will move inputs to CUDA device, you need to update to latest modeling file, or update your downloaded modeling file according to this PR