ttsmms
ttsmms copied to clipboard
allow for 16bit PCM - which is more common and return bytes in array
- This makes it all the more useful as you can then use it for manipulating the audio file far easier in say pyaudio or otherwise
Im hoping this shouldnt break anything.
I thought I'd share this somewhere - this probably isn't the best place for it but since it uses my pcm16 code
The problem: Its really slow for recurrent synth calls. The answer - wrap it in a worker thread
import pyaudio
from ttsmms import TTS
import threading
from queue import Queue, Empty
def play_audio(audio_bytes, sample_rate=16000):
try:
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16, # Ensure the format matches 16-bit PCM
channels=1,
rate=sample_rate,
output=True)
stream.write(audio_bytes)
stream.stop_stream()
stream.close()
p.terminate()
except Exception as e:
print(f"Error playing audio: {e}")
class TTSWorker(threading.Thread):
def __init__(self, model_path, queue, response_queue):
super().__init__()
self.tts = TTS(model_path)
self.queue = queue
self.response_queue = response_queue
self.running = True
def run(self):
while self.running:
try:
text = self.queue.get(timeout=1)
if text is None:
self.running = False
else:
result = self.tts.synthesis(text, convert_to_pcm16=True)
self.response_queue.put(result)
except Empty:
continue
def stop(self):
self.running = False
self.queue.put(None)
self.join()
# Create a queue for sending text to the TTS worker
tts_queue = Queue()
response_queue = Queue()
# Create and start the TTS worker
tts_worker = TTSWorker('/Users/willwade/mms_models/eng', tts_queue, response_queue)
tts_worker.start()
def synthesize_speech(text):
tts_queue.put(text)
result = response_queue.get()
return result
# Example usage
text = "Hello world"
result = synthesize_speech(text)
audio_bytes = result["audio_bytes"]
sample_rate = result["sampling_rate"]
# Play the audio bytes
play_audio(audio_bytes, sample_rate)
# Example usage - oart 2
text = "And all my friends"
result = synthesize_speech(text)
audio_bytes = result["audio_bytes"]
sample_rate = result["sampling_rate"]
# Play the audio bytes
play_audio(audio_bytes, sample_rate)
# Stop the TTS worker when done
tts_worker.stop()
Its really quick the second, third etc time around