agents icon indicating copy to clipboard operation
agents copied to clipboard

Listen keywords by before_llm_callback

Open enting8696 opened this issue 3 months ago • 0 comments

This is what I'm currently trying to achieve:

when I say "stop talking" to the agent, the agent does not generate a reply, but if I say "start chatting" next time, the agent resumes generating and talking.

I try to use the before_llm_callback function to monitor the content of each conversation between me and the agent. Because "stop talking" only appears for the first time, the process will retain the record of "stop talking" when I say "start chatting" later. As a result, I cannot restore the generation and dialogue. Can you provide assistance? will be grateful!

This is my code:

import asyncio
import logging
from dotenv import load_dotenv
from livekit import rtc
from livekit.agents import (
    AutoSubscribe,
    JobContext,
    JobProcess,
    WorkerOptions,
    cli,
    llm,
    metrics,
)
from livekit.agents.pipeline import VoicePipelineAgent
from livekit.plugins import deepgram, openai, silero

load_dotenv()
logger = logging.getLogger("voice-assistant")


def prewarm(proc: JobProcess):
    proc.userdata["vad"] = silero.VAD.load()


async def entrypoint(ctx: JobContext):
    initial_ctx = llm.ChatContext().append(
        role="system",
        text=(
            "Your name is JR, powerful assistant "
        ),
    )

    logger.info(f"connecting to room {ctx.room.name}")
    await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)

    # Wait for the first participant to connect
    participant = await ctx.wait_for_participant()
    logger.info(f"starting voice assistant for participant {participant.identity}")

    def before_llm_callback(agent, text_context):
        last_message = text_context.messages[-1].content if text_context.messages else ""
        if "stop talking" in last_message:
            print("stop talking')
            agent.suspend_llm = True
            return False 

        if "start chatting" in last_message:
            print("start chatting")
            agent.suspend_llm = False
            return None 

        return None if not getattr(agent, 'suspend_llm', False) else False

    agent = VoicePipelineAgent(
        vad=ctx.proc.userdata["vad"],
        stt=deepgram.STT(),
        llm=openai.LLM(model="gpt-4o"),
        tts=openai.TTS(),
        chat_ctx=initial_ctx,
        before_llm_cb=before_llm_callback
    )

    # Start the agent
    agent.start(ctx.room, participant)

    usage_collector = metrics.UsageCollector()

    # Collect and log metrics
    @agent.on("metrics_collected")
    def _on_metrics_collected(mtrcs: metrics.AgentMetrics):
        metrics.log_metrics(mtrcs)
        usage_collector.collect(mtrcs)

    async def log_usage():
        summary = usage_collector.get_summary()
        logger.info(f"Usage: ${summary}")

    ctx.add_shutdown_callback(log_usage)

    # Handle user speech (STT) and assistant speech (TTS)
    @agent.on("user_speech_committed")
    def on_user_speech_committed(transcript: str):
        print(f"User speech (STT): {transcript.content}")

    @agent.on("agent_speech_committed")
    def on_agent_speech_committed(transcript: str):
        print(f"Agent speech (TTS): {transcript.content}")

    # Initial greeting
    await agent.say("Hi", allow_interruptions=True)


if __name__ == "__main__":
    cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint, prewarm_fnc=prewarm))

enting8696 avatar Nov 03 '24 05:11 enting8696