FunASR
FunASR copied to clipboard
No module named 'funasr.datasets.ms_dataset'
What is your question?
进行快速训练的时候出现了报错 ModuleNotFoundError Traceback (most recent call last) Cell In[21], line 10 7 from modelscope.trainers import build_trainer 8 from modelscope.utils.constant import Tasks ---> 10 from funasr.datasets.ms_dataset import MsDataset 11 from funasr.utils.compute_wer import compute_wer 13 # 定义函数
ModuleNotFoundError: No module named 'funasr.datasets.ms_dataset'
Code
import os import json import shutil
from modelscope.pipelines import pipeline from modelscope.metainfo import Trainers from modelscope.trainers import build_trainer from modelscope.utils.constant import Tasks
from funasr.datasets.ms_dataset import MsDataset from funasr.utils.compute_wer import compute_wer
定义函数
def modelscope_finetune(params): if not os.path.exists(params["model_dir"]): os.makedirs(params["model_dir"], exist_ok=True) # dataset split ["train", "validation"] ds_dict = MsDataset.load(params["dataset_name"], namespace='speech_asr') kwargs = dict( model=params["modelscope_model_name"], data_dir=ds_dict, work_dir=params["model_dir"], max_epoch=1) trainer = build_trainer(Trainers.speech_asr_trainer, default_args=kwargs) trainer.train() pretrained_model_path = os.path.join(os.environ["HOME"], ".cache/modelscope/hub", params["modelscope_model_name"]) required_files = ["am.mvn", "decoding.yaml", "configuration.json"] for file_name in required_files: shutil.copy(os.path.join(pretrained_model_path, file_name), os.path.join(params["model_dir"], file_name))
def modelscope_infer(params): # prepare for decoding with open(os.path.join(params["model_dir"], "configuration.json")) as f: config_dict = json.load(f) config_dict["model"]["am_model_name"] = params["decoding_model_name"] with open(os.path.join(params["model_dir"], "configuration.json"), "w") as f: json.dump(config_dict, f, indent=4, separators=(',', ': ')) decoding_path = os.path.join(params["model_dir"], "decode_results") if os.path.exists(decoding_path): shutil.rmtree(decoding_path) os.mkdir(decoding_path)
# decoding
inference_pipeline = pipeline(
task=Tasks.auto_speech_recognition,
model=params["model_dir"],
output_dir=decoding_path,
batch_size=64
)
audio_in = os.path.join(params["test_data_dir"], "wav.scp")
inference_pipeline(audio_in=audio_in)
# computer CER if GT text is set
text_in = os.path.join(params["test_data_dir"], "text")
if os.path.exists(text_in):
text_proc_file = os.path.join(decoding_path, "1best_recog/token")
compute_wer(text_in, text_proc_file, os.path.join(decoding_path, "text.cer"))
os.system("tail -n 3 {}".format(os.path.join(decoding_path, "text.cer")))
设置参数并运行微调和推理
finetune_params = { "modelscope_model_name": "damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online", "dataset_name": "speech_asr_aishell1_subset", "model_dir": "./checkpoint" }
modelscope_finetune(finetune_params)
infer_params = { "model_dir": "./checkpoint", "decoding_model_name": "1epoch.pb", "test_data_dir": "./checkpoint/data/validation" }
modelscope_infer(infer_params)
What's your environment?
OS Version: Linux autodl-container-34fb1182ae-60c31bed 5.4.0-90-generic #101-Ubuntu SMP Fri Oct 15 20:00:55 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux /bin/bash: lsb_release: command not found
FunASR Version: Name: funasr Version: 1.1.2 Summary: FunASR: A Fundamental End-to-End Speech Recognition Toolkit Home-page: https://github.com/alibaba-damo-academy/FunASR.git Author: Speech Lab of Alibaba Group Author-email: [email protected] License: The MIT License Location: /root/FunASR Requires: scipy, librosa, jamo, PyYAML, soundfile, kaldiio, torch-complex, sentencepiece, jieba, rotary-embedding-torch, pytorch-wpe, editdistance, oss2, tqdm, umap-learn, jaconv, hydra-core, tensorboardX, requests Required-by:
ModelScope Version: Name: modelscope Version: 1.16.1 Summary: ModelScope: bring the notion of Model-as-a-Service to life. Home-page: https://github.com/modelscope/modelscope Author: ModelScope team Author-email: [email protected] License: Apache License 2.0 Location: /root/miniconda3/lib/python3.8/site-packages Requires: requests, tqdm, urllib3 Required-by:
PyTorch Version: 2.3.1+cu121
Python Version: 3.8.10 (default, Jun 4 2021, 15:09:15) [GCC 7.5.0]
GPU Info: Available GPUs: NVIDIA A100-SXM4-80GB
CUDA Version: 12.1
cuDNN Version: 8902