simpleT5
simpleT5 copied to clipboard
ValueError: text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).
import soundfile as sf from scipy.io import wavfile from IPython.display import Audio from transformers import Wav2Vec2ForCTC, Wav2Vec2CTCTokenizer
import speech_recognition as sr import io from pydub import AudioSegment
tokenizer = Wav2Vec2CTCTokenizer.from_pretrained("facebook/wav2vec2-base-960h") model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
r = sr.Recognizer() with sr.Microphone(sample_rate=16000) as source: print("speak") while True: audio = r.listen(source) data = io.BytesIO(audio.get_wav_data()) clip = AudioSegment.from_file(data) x = torch.FloatTensor(clip.get_array_of_samples()) print(x)
inputs = tokenizer(x, sampling_rate=16000, return_tensors='pt', padding='longest').input_values
logits = model(inputs).logits
tokens = torch.argmax(logits, axis=-1)
text = tokenizer.batch_decode(tokens)
print('you said: ', str(text).lower())
take a look at #10