pipecat icon indicating copy to clipboard operation
pipecat copied to clipboard

Bot Audio Out Cracking and Missing Frames

Open skelleex opened this issue 6 months ago • 3 comments

When using ElevenLabs it seems that audio is cracking and dropping frames. Is there anyway to stop this from happening? The code used is below my only difference is that I have the FrameLogger enabled for logging. Running on Replit but not a deployment.

`import asyncio import aiohttp import os import sys

from pipecat.frames.frames import LLMMessagesFrame from pipecat.pipeline.pipeline import Pipeline from pipecat.pipeline.runner import PipelineRunner from pipecat.pipeline.task import PipelineParams, PipelineTask from pipecat.processors.aggregators.llm_response import ( LLMAssistantResponseAggregator, LLMUserResponseAggregator) from pipecat.services.elevenlabs import ElevenLabsTTSService from pipecat.services.openai import OpenAILLMService from pipecat.transports.services.daily import DailyParams, DailyTransport from pipecat.vad.silero import SileroVADAnalyzer

from runner import configure

from loguru import logger

from dotenv import load_dotenv load_dotenv(override=True)

logger.remove(0) logger.add(sys.stderr, level="DEBUG")

async def main(room_url: str, token): async with aiohttp.ClientSession() as session: transport = DailyTransport( room_url, token, "Respond bot", DailyParams( audio_out_enabled=True, transcription_enabled=True, vad_enabled=True, vad_analyzer=SileroVADAnalyzer() ) )

    tts = ElevenLabsTTSService(
        aiohttp_session=session,
        api_key=os.getenv("ELEVENLABS_API_KEY"),
        voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
    )

    llm = OpenAILLMService(
        api_key=os.getenv("OPENAI_API_KEY"),
        model="gpt-4o")

    messages = [
        {
            "role": "system",
            "content": "You are a helpful LLM in a WebRTC call. Your goal is to demonstrate your capabilities in a succinct way. Your output will be converted to audio so don't include special characters in your answers. Respond to what the user said in a creative and helpful way.",
        },
    ]

    tma_in = LLMUserResponseAggregator(messages)
    tma_out = LLMAssistantResponseAggregator(messages)

    pipeline = Pipeline([
        transport.input(),   # Transport user input
        tma_in,              # User responses
        llm,                 # LLM
        tts,                 # TTS
        transport.output(),  # Transport bot output
        tma_out              # Assistant spoken responses
    ])

    task = PipelineTask(pipeline, PipelineParams(
        allow_interruptions=True,
        enable_metrics=True,
        report_only_initial_ttfb=True,
    ))

    @transport.event_handler("on_first_participant_joined")
    async def on_first_participant_joined(transport, participant):
        transport.capture_participant_transcription(participant["id"])
        # Kick off the conversation.
        messages.append(
            {"role": "system", "content": "Please introduce yourself to the user."})
        await task.queue_frames([LLMMessagesFrame(messages)])

    runner = PipelineRunner()

    await runner.run(task)

if name == "main": (url, token) = configure() asyncio.run(main(url, token))`

skelleex avatar Jul 30 '24 19:07 skelleex