Traceback (most recent call last):
File "/Users/guowenchao/Job/AI/FireRedTeadASR/FireRedASR/examples/fireredasr/speech2text.py", line 105, in
main(args)
File "/Users/guowenchao/Job/AI/FireRedTeadASR/FireRedASR/examples/fireredasr/speech2text.py", line 54, in main
results = model.transcribe(
File "/opt/homebrew/Caskroom/miniconda/base/envs/fireredasr/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/Users/guowenchao/Job/AI/FireRedTeadASR/FireRedASR/examples/fireredasr/models/fireredasr.py", line 87, in transcribe
generated_ids = self.model.transcribe(
File "/Users/guowenchao/Job/AI/FireRedTeadASR/FireRedASR/examples/fireredasr/models/fireredasr_llm.py", line 122, in transcribe
encoder_outs, enc_lengths, enc_mask = self.encoder(padded_feat, feat_lengths)
File "/opt/homebrew/Caskroom/miniconda/base/envs/fireredasr/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/homebrew/Caskroom/miniconda/base/envs/fireredasr/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
return forward_call(*args, **kwargs)
File "/Users/guowenchao/Job/AI/FireRedTeadASR/FireRedASR/examples/fireredasr/models/module/conformer_encoder.py", line 37, in forward
enc_output = enc_layer(enc_output, pos_emb, slf_attn_mask=src_mask,
File "/opt/homebrew/Caskroom/miniconda/base/envs/fireredasr/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/homebrew/Caskroom/miniconda/base/envs/fireredasr/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
return forward_call(*args, **kwargs)
File "/Users/guowenchao/Job/AI/FireRedTeadASR/FireRedASR/examples/fireredasr/models/module/conformer_encoder.py", line 67, in forward
out = self.mhsa(out, out, out, pos_emb, mask=slf_attn_mask)[0]
File "/opt/homebrew/Caskroom/miniconda/base/envs/fireredasr/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/opt/homebrew/Caskroom/miniconda/base/envs/fireredasr/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
return forward_call(*args, **kwargs)
File "/Users/guowenchao/Job/AI/FireRedTeadASR/FireRedASR/examples/fireredasr/models/module/conformer_encoder.py", line 316, in forward
attn_scores = matrix_ac + matrix_bd
RuntimeError: The size of tensor a (16263) must match the size of tensor b (5000) at non-singleton dimension 3
- ref=/Users/guowenchao/Job/AI/FireRedTeadASR/test_DATA/out/text
- wer.py --print_sentence_wer 1 --do_tn 0 --rm_special 0 --ref /Users/guowenchao/Job/AI/FireRedTeadASR/test_DATA/out/text --hyp /Users/guowenchao/Job/AI/FireRedTeadASR/test_DATA/out/out.txt
- tail -n8 /Users/guowenchao/Job/AI/FireRedTeadASR/test_DATA/out/out.txt.wer
Audio too long.
https://github.com/FireRedTeam/FireRedASR?tab=readme-ov-file#input-length-limitations