为什么使用paraformer-zh + fsmn-vad + cam++等运行第二次使用是CPU?
What is your question?
为什么第一次运行是GPU,然后最后一个进度条特别慢,结束后,再次运行后就是CPU运行了
下面是完整的代码
Code
import os
import sys
import torch
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
from app.config import CONFIG
from app.config.logger import get_logger
from app.config.json_queue import JsonQueue
from funasr import AutoModel
import torchaudio
import tempfile
logger = get_logger("funasr_worker")
ASR_TASKS_QUEUE = JsonQueue("asr_tasks_queue.json")
# 启用 TF32 加速
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
class FunAsrModel:
def __init__(self, device="cuda"):
self.device = device
self.model = None
self.load_model()
def load_model(self):
"""
加载 FunASR 模型及相关模块。
"""
logger.info("Loading FunASR model...")
asr_model_zh = CONFIG.get("models_config", "paraformer-zh")
vad_model = CONFIG.get("models_config", "fsmn-vad")
punc_model = CONFIG.get("models_config", "ct-punc")
spk_model = CONFIG.get("models_config", "spk-model")
# self.model = AutoModel(
# model=asr_model_zh,
# vad_model=vad_model,
# vad_kwargs={"max_single_segment_time": 30000},
# punc_model=punc_model,
# spk_model=spk_model,
# device=self.device,
# disable_update=True
# )
self.model = AutoModel(
model="paraformer-zh",
vad_model="fsmn-vad",
vad_kwargs={"max_single_segment_time": 30000},
punc_model="ct-punc",
spk_model="cam++",
device="cuda",
disable_update=True,
compute_type="float32"
)
logger.info("FunASR model loaded successfully.")
def run_all(self, audio_path, hotword=None):
"""
对音频进行完整识别。
如果输入是 mp3,则先转换为 wav,识别完成后删除临时 wav 文件。
"""
logger.info("正在运行funasr模型...")
temp_wav_path = None
input_path = audio_path
# # 如果是 mp3,则转换为 wav
# if audio_path.lower().endswith(".mp3"):
# waveform, sr = torchaudio.load(audio_path)
# if sr != 16000:
# waveform = torchaudio.transforms.Resample(sr, 16000)(waveform)
# temp_wav_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
# temp_wav_path = temp_wav_file.name
# torchaudio.save(temp_wav_path, waveform, 16000)
# input_path = temp_wav_path
# temp_wav_file.close()
try:
res = self.model.generate(
input=audio_path,
hotword=hotword,
use_itn=True,
batch_size_s=60,
merge_vad=True,
merge_length_s=30,
output_timestamp=True,
vad_kwargs={"max_single_segment_time": 3000}
)
finally:
# 识别完成后删除临时 wav
if temp_wav_path and os.path.exists(temp_wav_path):
os.remove(temp_wav_path)
logger.info(f"已删除临时文件: {temp_wav_path}")
return res
_funasr_model_instance = None
def get_funasr_model():
"""
获取全局 FunAsrModel 单例实例。
"""
global _funasr_model_instance
if _funasr_model_instance is None:
_funasr_model_instance = FunAsrModel()
return _funasr_model_instance
def process_audio_batch(audio_list, device="cuda"):
model = FunAsrModel(device=device)
results = []
for audio_file in audio_list:
res = model.run_all(audio_file, hotword="魔搭")
results.append(res)
return results
if __name__ == "__main__":
model = get_funasr_model()
for audio_file in [r"310772.mp3", "310772.mp3"]:
print(f"===: {torch.cuda.is_available()}")
result = model.run_all(audio_file, hotword="魔搭")
print("完成")
# if __name__ == "__main__":
# audio_files = ["310772.mp3", "310772.mp3"]
# results = process_audio_batch(audio_files)
# for r in results:
# print("完成")
# print(r)
What have you tried?
- 我尝试使用torch.cuda.empty_cache()但是没有效果
- 尝试AI的方法,但是都没有效果。
What's your environment?
- OS (e.g., Linux):
- FunASR Version (e.g., 1.2.6):
- ModelScope Version (e.g., 1.29.0):
- PyTorch Version (e.g., 2.7.1):
- How you installed funasr (
pip, source): pip install funasr - Python version: 3.10
- GPU (e.g., Persistence-M)
- CUDA/cuDNN version (e.g., cuda12.4):
- Any other relevant information:
我查看了类似的问题和答案,然后根据其他人的答案尝试了 在model.generate 前后加了 torch.set_num_threads(4)
torch.set_num_threads(4)
res = self.model.generate()
torch.set_num_threads(4)
目前是解决了这个问题, 不过这个源码里修复吗?
What is your question?
为什么第一次运行是GPU,然后最后一个进度条特别慢,结束后,再次运行后就是CPU运行了
![]()
下面是完整的代码
Code
import os import sys import torch sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))) from app.config import CONFIG from app.config.logger import get_logger from app.config.json_queue import JsonQueue from funasr import AutoModel import torchaudio import tempfile logger = get_logger("funasr_worker") ASR_TASKS_QUEUE = JsonQueue("asr_tasks_queue.json") # 启用 TF32 加速 torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.allow_tf32 = True class FunAsrModel: def __init__(self, device="cuda"): self.device = device self.model = None self.load_model() def load_model(self): """ 加载 FunASR 模型及相关模块。 """ logger.info("Loading FunASR model...") asr_model_zh = CONFIG.get("models_config", "paraformer-zh") vad_model = CONFIG.get("models_config", "fsmn-vad") punc_model = CONFIG.get("models_config", "ct-punc") spk_model = CONFIG.get("models_config", "spk-model") # self.model = AutoModel( # model=asr_model_zh, # vad_model=vad_model, # vad_kwargs={"max_single_segment_time": 30000}, # punc_model=punc_model, # spk_model=spk_model, # device=self.device, # disable_update=True # ) self.model = AutoModel( model="paraformer-zh", vad_model="fsmn-vad", vad_kwargs={"max_single_segment_time": 30000}, punc_model="ct-punc", spk_model="cam++", device="cuda", disable_update=True, compute_type="float32" ) logger.info("FunASR model loaded successfully.") def run_all(self, audio_path, hotword=None): """ 对音频进行完整识别。 如果输入是 mp3,则先转换为 wav,识别完成后删除临时 wav 文件。 """ logger.info("正在运行funasr模型...") temp_wav_path = None input_path = audio_path # # 如果是 mp3,则转换为 wav # if audio_path.lower().endswith(".mp3"): # waveform, sr = torchaudio.load(audio_path) # if sr != 16000: # waveform = torchaudio.transforms.Resample(sr, 16000)(waveform) # temp_wav_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False) # temp_wav_path = temp_wav_file.name # torchaudio.save(temp_wav_path, waveform, 16000) # input_path = temp_wav_path # temp_wav_file.close() try: res = self.model.generate( input=audio_path, hotword=hotword, use_itn=True, batch_size_s=60, merge_vad=True, merge_length_s=30, output_timestamp=True, vad_kwargs={"max_single_segment_time": 3000} ) finally: # 识别完成后删除临时 wav if temp_wav_path and os.path.exists(temp_wav_path): os.remove(temp_wav_path) logger.info(f"已删除临时文件: {temp_wav_path}") return res _funasr_model_instance = None def get_funasr_model(): """ 获取全局 FunAsrModel 单例实例。 """ global _funasr_model_instance if _funasr_model_instance is None: _funasr_model_instance = FunAsrModel() return _funasr_model_instance def process_audio_batch(audio_list, device="cuda"): model = FunAsrModel(device=device) results = [] for audio_file in audio_list: res = model.run_all(audio_file, hotword="魔搭") results.append(res) return results if __name__ == "__main__": model = get_funasr_model() for audio_file in [r"310772.mp3", "310772.mp3"]: print(f"===: {torch.cuda.is_available()}") result = model.run_all(audio_file, hotword="魔搭") print("完成") # if __name__ == "__main__": # audio_files = ["310772.mp3", "310772.mp3"] # results = process_audio_batch(audio_files) # for r in results: # print("完成") # print(r)What have you tried?
- 我尝试使用torch.cuda.empty_cache()但是没有效果
- 尝试AI的方法,但是都没有效果。
What's your environment?
- OS (e.g., Linux):
- FunASR Version (e.g., 1.2.6):
- ModelScope Version (e.g., 1.29.0):
- PyTorch Version (e.g., 2.7.1):
- How you installed funasr (
pip, source): pip install funasr- Python version: 3.10
- GPU (e.g., Persistence-M)
- CUDA/cuDNN version (e.g., cuda12.4):
- Any other relevant information:
方便用稍微长一些的音频测一下吗?
@slin000111 您好,我的音频都是一个小时以上的。 目前我是增加了torch.set_num_threads(4) 临时解决了此问题。
torch.set_num_threads(4)
res = self.model.generate()
torch.set_num_threads(4)
目前已经运行两天,都是使用的GPU。
我把paraformer-zh换成sensevoice-small,报错
ERROR:root:Only 'iic/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch' and 'iic/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' can predict timestamp, and speaker diarization relies on timestamps.
Traceback (most recent call last):
File "/media/DataWork/code/sensevoice/test_funasr.py", line 50, in
你好我这边在npu上运行也不在npu上一直在cpu上不知道是什么原因
@slin000111 您好,我的音频都是一个小时以上的。 目前我是增加了torch.set_num_threads(4) 临时解决了此问题。
torch.set_num_threads(4) res = self.model.generate() torch.set_num_threads(4)目前已经运行两天,都是使用的GPU。
我也遇到这个bug,只有在长音频上会出现,我这边有两种音频,一种1小时以上的,只要不完全重新加载模型,第二次开始必CPU。一种60秒内的,转了几万个都没问题一直GPU。
目前使用这个方法解决了,感谢!
下面是完整的代码