实时语音识别示例代码报错
Before asking:
- search the issues.
- search the docs.
Code
from funasr import AutoModel
from funasr.utils.postprocess_utils import rich_transcription_postprocess
model_dir = "iic/SenseVoiceSmall"
chunk_size = [0, 10, 5] #[0, 10, 5] 600ms, [0, 8, 4] 480ms
encoder_chunk_look_back = 4 #number of chunks to lookback for encoder self-attention
decoder_chunk_look_back = 1 #number of encoder chunks to lookback for decoder cross-attention
model = AutoModel(model="paraformer-zh-streaming",model_revision="v2.0.4",disable_update=True)
import soundfile
import os
#wav_file = os.path.join(model.model_path, "example/asr_example.wav")
wav_file = "./test_vad.wav"
speech, sample_rate = soundfile.read(wav_file)
chunk_stride = chunk_size[1] * 960 # 600ms
cache = {}
total_chunk_num = int(len((speech)-1)/chunk_stride+1)
print(f"total_chunk_num={total_chunk_num},chunk_stride={chunk_stride}")
for i in range(total_chunk_num):
speech_chunk = speech[i*chunk_stride:(i+1)*chunk_stride]
is_final = i == total_chunk_num - 1
print(f"i={i},is_final={is_final},speech_chunk={len(speech_chunk)}")
res = model.generate(input=speech_chunk,
cache=cache,
is_final=is_final,
chunk_size=chunk_size,
encoder_chunk_look_back=encoder_chunk_look_back,
decoder_chunk_look_back=decoder_chunk_look_back)
print(res)
What's your environment?
- OS (e.g., Linux): Windows11
- FunASR Version (e.g., 1.0.0): 1.2.4
- ModelScope Version (e.g., 1.11.0): 1.23.1
- PyTorch Version (e.g., 2.0.0): 2.6.0
- How you installed funasr (
pip, source): pip
- Python version: 3.10.16
- GPU (e.g., V100M32)
- CUDA/cuDNN version (e.g., cuda11.7):
- Docker version (e.g., funasr-runtime-sdk-cpu-0.4.1)
- Any other relevant information:
报错信息
ValueError Traceback (most recent call last)
Cell In[38], line 5
3 is_final = i == total_chunk_num - 1
4 print(f"i={i},is_final={is_final},speech_chunk={len(speech_chunk)}")
----> 5 res = model.generate(input=speech_chunk,
6 cache=cache,
7 is_final=is_final,
8 chunk_size=chunk_size,
9 encoder_chunk_look_back=encoder_chunk_look_back,
10 decoder_chunk_look_back=decoder_chunk_look_back)
11 print(res)
File c:\xxx\lib\site-packages\funasr\auto\auto_model.py:303, in AutoModel.generate(self, input, input_len, **cfg)
301 def generate(self, input, input_len=None, **cfg):
302 if self.vad_model is None:
--> 303 return self.inference(input, input_len=input_len, **cfg)
305 else:
306 return self.inference_with_vad(input, input_len=input_len, **cfg)
File c:\xxx\lib\site-packages\funasr\auto\auto_model.py:345, in AutoModel.inference(self, input, input_len, model, kwargs, key, **cfg)
343 time1 = time.perf_counter()
344 with torch.no_grad():
--> 345 res = model.inference(**batch, **kwargs)
346 if isinstance(res, (list, tuple)):
...
--> 437 batch_size, timesteps, input_dim = x.size()
438 start_idx = 0
439 if cache is not None: