pipecat icon indicating copy to clipboard operation
pipecat copied to clipboard

Bot message before tool call is not added to context, leading to bot repeating itself.

Open gitraffe opened this issue 2 months ago • 6 comments

pipecat version

0.0.94

Python version

3.13.6

Operating System

macOS 26.0.1 (also reproduced in python:3.12-slim Docker container)

Issue description

When a model receives an instruction and first produces a text response and then calls a tool, the first text response is not added to the context aggregator, but the text is spoken. Because the text is not in the context, the language model will comply with the original instruction, leading to the bot repeating itself.

I've mostly been able to work around this by aggressively prompting the language model to always make tool calls before speaking any text, but it's not 100% reliable.

Hypothesis: This might be a race condition because the relevant frame doesn't reach the aggregator (e.g., because TTS takes a bit of time) before the harness feeds back the tool call result back to the language model.

Reproduction steps

  1. Run the snippet below using SERVICE_ACCOUNT_PATH=... GEMINI_API_KEY=... python simple_chat_bot.py --transport webrtc and connect on http://localhost:7860/client/. SERVICE_ACCOUNT_PATH is the path to a Google Service Account json file and GEMINI_API_KEY is the standard Gemini key.
  2. Instruct the bot with "Say the word banana. Then call get current time." (get_current_time is a tool available to the agent) Edit: Probably saying "First, say the word banana. Second, call get current time." is a slightly more reliable prompt.
  3. The bot will respond "Banana. Banana. The current time is ..."

This doesn't happen the other way round.

  1. Instruct the both with "First, call get current time. Second, say the word banana. Third, tell me the time."
  2. The bot will respond "Banana. The current time is ...".
#
# Simple chat bot based on Pipecat quickstart example.
# Uses Google services (STT, LLM, TTS) and includes get_current_time() function.
#

"""Simple chat bot for testing function calling behavior.

Run the bot using::

    python playground/simple_chat_bot.py --transport webrtc

Then open http://localhost:7860 in your browser to connect.
"""

import os
from datetime import datetime
from dotenv import load_dotenv
from loguru import logger
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.adapters.schemas.tools_schema import ToolsSchema
from pipecat.audio.vad.vad_analyzer import VADParams
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
from pipecat.runner.types import RunnerArguments
from pipecat.runner.utils import create_transport
from pipecat.services.google.llm import GoogleLLMService
from pipecat.services.google.stt import GoogleSTTService
from pipecat.services.google.tts import GoogleTTSService
from pipecat.services.llm_service import FunctionCallParams
from pipecat.transports.base_transport import BaseTransport, TransportParams

logger.info("✅ All components loaded successfully!")

load_dotenv(override=True)


async def get_current_time(params: FunctionCallParams, **kwargs):
    """Get the current time."""
    logger.info("⏰ Function get_current_time() called")

    current_time = datetime.now().strftime("%I:%M %p")
    result = f"The current time is {current_time}"

    logger.info(f"⏰ Function returning: {result}")
    await params.result_callback(result)


async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    logger.info("Starting bot with Google services")

    # Use service account for Google STT/TTS
    service_account_path = os.environ["SERVICE_ACCOUNT_PATH"]

    stt = GoogleSTTService(
        credentials_path=service_account_path,
        language="en-US",
    )

    tts = GoogleTTSService(
        credentials_path=service_account_path,
        voice_id="en-US-Journey-D",
    )

    llm = GoogleLLMService(
        api_key=os.environ["GEMINI_API_KEY"],
        model="gemini-2.0-flash-exp",
    )

    # Register the function
    llm.register_direct_function(get_current_time)
    tools = ToolsSchema(standard_tools=[get_current_time])  # type: ignore

    context = LLMContext([], tools)
    context_aggregator = LLMContextAggregatorPair(context)

    pipeline = Pipeline(
        [
            transport.input(),  # Transport user input
            stt,                # Speech to text
            context_aggregator.user(),  # User responses
            llm,                # LLM
            tts,                # TTS
            transport.output(), # Transport bot output
            context_aggregator.assistant(),  # Assistant spoken responses
        ]
    )

    task = PipelineTask(
        pipeline,
        params=PipelineParams(
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
    )

    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)

    logger.info("💬 Bot ready! Open http://localhost:7860 to connect")

    await runner.run(task)


async def bot(runner_args: RunnerArguments):
    """Main bot entry point."""

    transport_params = {
        "webrtc": lambda: TransportParams(
            audio_in_enabled=True,
            audio_out_enabled=True,
            vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
            turn_analyzer=LocalSmartTurnAnalyzerV3(),
        ),
    }

    transport = await create_transport(runner_args, transport_params)

    await run_bot(transport, runner_args)


if __name__ == "__main__":
    from pipecat.runner.run import main

    main()

Expected behavior

The bot only speaks the same text once.

Actual behavior

The bot repeats itself if it makes the tool call second.

Logs

When the tool call happens second and the bot repeats itself.

2025-11-10 21:02:26.710 | INFO     | pipecat:<module>:14 - ᓚᘏᗢ Pipecat 0.0.94 (Python 3.13.6 (main, Aug 14 2025, 16:07:26) [Clang 20.1.4 ]) ᓚᘏᗢ
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.
2025-11-10 21:02:33.863 | INFO     | __main__:<module>:36 - ✅ All components loaded successfully!

🚀 Bot ready!
   → Open http://localhost:7860/client in your browser

Looking for dist directory at: /Users/till/gitraffe/verity-interview/backend/.venv/lib/python3.13/site-packages/pipecat_ai_small_webrtc_prebuilt/client/dist
INFO:     Started server process [57388]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://localhost:7860 (Press CTRL+C to quit)
INFO:     ::1:53456 - "GET /client/ HTTP/1.1" 304 Not Modified
INFO:     ::1:53458 - "GET /client/assets/index-CJ8kfZwS.js HTTP/1.1" 304 Not Modified
INFO:     ::1:53457 - "GET /client/assets/index-CdJgXBqL.css HTTP/1.1" 304 Not Modified
2025-11-10 21:02:45.615 | DEBUG    | pipecat.transports.smallwebrtc.connection:_initialize:276 - Initializing new peer connection
2025-11-10 21:02:45.654 | DEBUG    | pipecat.transports.smallwebrtc.connection:_create_answer:359 - Creating answer
2025-11-10 21:02:45.666 | DEBUG    | pipecat.transports.smallwebrtc.connection:on_track:341 - Track audio received
2025-11-10 21:02:45.666 | DEBUG    | pipecat.transports.smallwebrtc.connection:on_track:341 - Track video received
2025-11-10 21:02:45.677 | DEBUG    | pipecat.transports.smallwebrtc.connection:on_icegatheringstatechange:337 - ICE gathering state is gathering
2025-11-10 21:02:45.696 | DEBUG    | pipecat.transports.smallwebrtc.connection:on_icegatheringstatechange:337 - ICE gathering state is complete
2025-11-10 21:02:45.697 | DEBUG    | pipecat.transports.smallwebrtc.connection:_create_answer:362 - Setting the answer after the local description is created
2025-11-10 21:02:45.699 | DEBUG    | pipecat.transports.smallwebrtc.request_handler:handle_web_request:213 - webrtc_connection_callback executed successfully for peer: SmallWebRTCConnection#0
INFO:     ::1:53462 - "POST /api/offer HTTP/1.1" 200 OK
2025-11-10 21:02:45.700 | DEBUG    | pipecat.audio.vad.silero:__init__:147 - Loading Silero VAD model...
2025-11-10 21:02:45.880 | DEBUG    | pipecat.audio.vad.silero:__init__:169 - Loaded Silero VAD
2025-11-10 21:02:45.880 | DEBUG    | pipecat.audio.turn.smart_turn.local_smart_turn_v3:__init__:51 - Loading Local Smart Turn v3 model...
2025-11-10 21:02:46.013 | DEBUG    | pipecat.audio.turn.smart_turn.local_smart_turn_v3:__init__:82 - Loaded Local Smart Turn v3
2025-11-10 21:02:46.014 | DEBUG    | pipecat.runner.utils:_get_transport_params:383 - Using transport params for webrtc
2025-11-10 21:02:46.385 | INFO     | __main__:run_bot:53 - Starting bot with Google services
2025-11-10 21:02:47.147 | DEBUG    | pipecat.processors.frame_processor:link:525 - Linking Pipeline#0::Source -> SmallWebRTCInputTransport#0
2025-11-10 21:02:47.147 | DEBUG    | pipecat.processors.frame_processor:link:525 - Linking SmallWebRTCInputTransport#0 -> GoogleSTTService#0
2025-11-10 21:02:47.147 | DEBUG    | pipecat.processors.frame_processor:link:525 - Linking GoogleSTTService#0 -> LLMUserAggregator#0
2025-11-10 21:02:47.147 | DEBUG    | pipecat.processors.frame_processor:link:525 - Linking LLMUserAggregator#0 -> GoogleLLMService#0
2025-11-10 21:02:47.147 | DEBUG    | pipecat.processors.frame_processor:link:525 - Linking GoogleLLMService#0 -> GoogleTTSService#0
2025-11-10 21:02:47.148 | DEBUG    | pipecat.processors.frame_processor:link:525 - Linking GoogleTTSService#0 -> SmallWebRTCOutputTransport#0
2025-11-10 21:02:47.152 | DEBUG    | pipecat.processors.frame_processor:link:525 - Linking SmallWebRTCOutputTransport#0 -> LLMAssistantAggregator#0
2025-11-10 21:02:47.153 | DEBUG    | pipecat.processors.frame_processor:link:525 - Linking LLMAssistantAggregator#0 -> Pipeline#0::Sink
2025-11-10 21:02:47.153 | DEBUG    | pipecat.processors.frame_processor:link:525 - Linking PipelineTask#0::Source -> Pipeline#0
2025-11-10 21:02:47.153 | DEBUG    | pipecat.processors.frame_processor:link:525 - Linking Pipeline#0 -> PipelineTask#0::Sink
2025-11-10 21:02:47.153 | INFO     | __main__:run_bot:102 - 💬 Bot ready! Open http://localhost:7860 to connect
2025-11-10 21:02:47.153 | DEBUG    | pipecat.pipeline.runner:run:71 - Runner PipelineRunner#0 started running PipelineTask#0
2025-11-10 21:02:47.155 | DEBUG    | pipecat.pipeline.task:_wait_for_pipeline_start:602 - PipelineTask#0: Starting. Waiting for StartFrame#0 to reach the end of the pipeline...
2025-11-10 21:02:47.157 | DEBUG    | pipecat.transports.smallwebrtc.connection:on_iceconnectionstatechange:331 - ICE connection state is checking, connection is connecting
2025-11-10 21:02:47.159 | DEBUG    | pipecat.transports.smallwebrtc.connection:_monitoring_connecting_state:529 - Monitoring connecting state
2025-11-10 21:02:47.159 | DEBUG    | pipecat.transports.smallwebrtc.connection:_handle_new_connection_state:564 - Connection state changed to: connecting
2025-11-10 21:02:47.159 | DEBUG    | pipecat.audio.vad.vad_analyzer:set_params:156 - Setting VAD params to: confidence=0.7 start_secs=0.2 stop_secs=0.2 min_volume=0.6
2025-11-10 21:02:47.159 | INFO     | pipecat.transports.smallwebrtc.transport:connect:449 - Connecting to Small WebRTC
2025-11-10 21:02:47.166 | DEBUG    | pipecat.services.google.stt:_connect:701 - Connecting to Google Speech-to-Text
2025-11-10 21:02:47.181 | INFO     | pipecat.transports.smallwebrtc.transport:connect:449 - Connecting to Small WebRTC
2025-11-10 21:02:47.182 | DEBUG    | pipecat.pipeline.task:_wait_for_pipeline_start:605 - PipelineTask#0: StartFrame#0 reached the end of the pipeline, pipeline is now ready.
2025-11-10 21:02:47.245 | DEBUG    | pipecat.transports.smallwebrtc.connection:on_iceconnectionstatechange:331 - ICE connection state is completed, connection is connecting
2025-11-10 21:02:47.274 | DEBUG    | pipecat.transports.smallwebrtc.connection:_cancel_monitoring_connecting_state:549 - Cancelling the connecting timeout task
2025-11-10 21:02:47.277 | DEBUG    | pipecat.transports.smallwebrtc.connection:_handle_new_connection_state:564 - Connection state changed to: connected
2025-11-10 21:02:47.280 | DEBUG    | pipecat.transports.smallwebrtc.transport:on_connected:242 - Peer connection established.
2025-11-10 21:02:47.280 | WARNING  | pipecat.transports.smallwebrtc.connection:screen_video_input_track:649 - No screen video transceiver is available
2025-11-10 21:02:47.280 | DEBUG    | pipecat.transports.smallwebrtc.connection:replace_audio_track:447 - Replacing audio track audio
2025-11-10 21:02:47.290 | DEBUG    | pipecat.transports.smallwebrtc.connection:_handle_signalling_message:682 - Signalling message received: {'type': 'trackStatus', 'receiver_index': 0, 'enabled': True}
2025-11-10 21:02:47.293 | DEBUG    | pipecat.transports.smallwebrtc.connection:_handle_signalling_message:682 - Signalling message received: {'type': 'trackStatus', 'receiver_index': 1, 'enabled': True}
2025-11-10 21:02:47.294 | DEBUG    | pipecat.transports.smallwebrtc.transport:push_app_message:693 - Received app message inside SmallWebRTCInputTransport  {'label': 'rtvi-ai', 'type': 'client-ready', 'data': {'version': '1.0.0', 'about': {'library': '@pipecat-ai/client-react', 'library_version': '1.0.1', 'platform_details': {'browser': 'Safari', 'browser_version': '26.0.1', 'platform_type': 'desktop', 'engine': 'WebKit'}, 'platform': 'macOS', 'platform_version': '10.15.7'}}, 'id': 'f94fa4f1'}
2025-11-10 21:02:55.012 | DEBUG    | pipecat.transports.base_input:_handle_user_interruption:335 - User started speaking
2025-11-10 21:02:55.012 | DEBUG    | pipecat.pipeline.task:_source_push_frame:747 - PipelineTask#0: received interruption task frame InterruptionTaskFrame#0
2025-11-10 21:02:55.014 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.024669885635375977
2025-11-10 21:02:55.014 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_processing_metrics:152 - GoogleSTTService#0 processing time: 0.024826765060424805
2025-11-10 21:02:55.526 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.0010318756103515625
2025-11-10 21:02:55.688 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.014400959014892578
2025-11-10 21:02:55.820 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.008084774017333984
2025-11-10 21:02:56.055 | DEBUG    | pipecat.audio.turn.smart_turn.base_smart_turn:analyze_end_of_turn:165 - End of Turn result: EndOfTurnState.INCOMPLETE
2025-11-10 21:02:56.187 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.012149810791015625
2025-11-10 21:02:56.236 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.016325950622558594
2025-11-10 21:02:56.394 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.0040972232818603516
2025-11-10 21:02:56.813 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.018887996673583984
2025-11-10 21:02:57.020 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.004717111587524414
2025-11-10 21:02:57.424 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.011954069137573242
2025-11-10 21:02:57.623 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.010428905487060547
2025-11-10 21:02:57.913 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.0026891231536865234
2025-11-10 21:02:58.009 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.06113100051879883
2025-11-10 21:02:58.214 | DEBUG    | pipecat.audio.turn.smart_turn.base_smart_turn:analyze_end_of_turn:165 - End of Turn result: EndOfTurnState.COMPLETE
2025-11-10 21:02:58.214 | DEBUG    | pipecat.transports.base_input:_handle_user_interruption:356 - User stopped speaking
2025-11-10 21:02:58.608 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_processing_metrics:152 - GoogleSTTService#0 processing time: 0.017518043518066406
2025-11-10 21:02:59.115 | DEBUG    | pipecat.adapters.base_llm_adapter:from_standard_tools:127 - Retrieving the tools using the adapter: <class 'pipecat.adapters.services.gemini_adapter.GeminiLLMAdapter'>
2025-11-10 21:02:59.115 | DEBUG    | pipecat.services.google.llm:_stream_content_universal_context:871 - GoogleLLMService#0: Generating chat from universal context [None] | [{'parts': [{'text': 'Say banana then call get current time.'}], 'role': 'user'}]
2025-11-10 21:02:59.662 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleLLMService#0 TTFB: 0.5354058742523193
2025-11-10 21:02:59.729 | DEBUG    | pipecat.services.google.llm:_process_context:927 - Function call: get_current_time:668214ef-0f88-42da-abe8-8d0810b941b9
2025-11-10 21:02:59.732 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:start_llm_usage_metrics:173 - GoogleLLMService#0 prompt tokens: 21, completion tokens: 7
2025-11-10 21:02:59.739 | DEBUG    | pipecat.services.llm_service:_run_function_call:551 - GoogleLLMService#0 Calling function [get_current_time:668214ef-0f88-42da-abe8-8d0810b941b9] with arguments {}
2025-11-10 21:02:59.742 | INFO     | __main__:get_current_time:43 - ⏰ Function get_current_time() called
2025-11-10 21:02:59.742 | INFO     | __main__:get_current_time:48 - ⏰ Function returning: The current time is 09:02 PM
2025-11-10 21:02:59.744 | DEBUG    | pipecat.services.google.tts:run_tts:646 - GoogleTTSService#0: Generating TTS [banana
]
2025-11-10 21:02:59.746 | DEBUG    | pipecat.processors.aggregators.llm_response_universal:_handle_function_calls_started:689 - LLMAssistantAggregator#0 FunctionCallsStartedFrame: ['get_current_time:668214ef-0f88-42da-abe8-8d0810b941b9']
2025-11-10 21:02:59.748 | DEBUG    | pipecat.processors.aggregators.llm_response_universal:_handle_function_call_in_progress:694 - LLMAssistantAggregator#0 FunctionCallInProgressFrame: [get_current_time:668214ef-0f88-42da-abe8-8d0810b941b9]
2025-11-10 21:02:59.748 | DEBUG    | pipecat.processors.aggregators.llm_response_universal:_handle_function_call_result:725 - LLMAssistantAggregator#0 FunctionCallResultFrame: [get_current_time:668214ef-0f88-42da-abe8-8d0810b941b9]
2025-11-10 21:02:59.920 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:start_tts_usage_metrics:191 - GoogleTTSService#0 usage characters: 7
2025-11-10 21:03:00.238 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleTTSService#0 TTFB: 0.49398183822631836
2025-11-10 21:03:00.271 | DEBUG    | pipecat.transports.base_output:_bot_started_speaking:599 - Bot started speaking
2025-11-10 21:03:00.454 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_processing_metrics:152 - GoogleTTSService#0 processing time: 0.7099640369415283
2025-11-10 21:03:00.460 | DEBUG    | pipecat.adapters.base_llm_adapter:from_standard_tools:127 - Retrieving the tools using the adapter: <class 'pipecat.adapters.services.gemini_adapter.GeminiLLMAdapter'>
2025-11-10 21:03:00.460 | DEBUG    | pipecat.services.google.llm:_stream_content_universal_context:871 - GoogleLLMService#0: Generating chat from universal context [None] | [{'parts': [{'text': 'Say banana then call get current time.'}], 'role': 'user'}, {'parts': [{'function_call': {'id': '668214ef-0f88-42da-abe8-8d0810b941b9', 'args': {}, 'name': 'get_current_time'}}], 'role': 'model'}, {'parts': [{'function_response': {'id': '668214ef-0f88-42da-abe8-8d0810b941b9', 'name': 'get_current_time', 'response': {'value': 'The current time is 09:02 PM'}}}], 'role': 'user'}]
2025-11-10 21:03:00.893 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleLLMService#0 TTFB: 0.4326040744781494
2025-11-10 21:03:00.975 | DEBUG    | pipecat.services.google.tts:run_tts:646 - GoogleTTSService#0: Generating TTS [banana
The current time is 09:02 PM.]
2025-11-10 21:03:00.988 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:start_llm_usage_metrics:173 - GoogleLLMService#0 prompt tokens: 43, completion tokens: 15
2025-11-10 21:03:00.993 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:start_tts_usage_metrics:191 - GoogleTTSService#0 usage characters: 36
2025-11-10 21:03:01.270 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleTTSService#0 TTFB: 0.2948417663574219
2025-11-10 21:03:01.977 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_processing_metrics:152 - GoogleTTSService#0 processing time: 1.0015168190002441
2025-11-10 21:03:04.865 | DEBUG    | pipecat.transports.base_output:_bot_stopped_speaking:617 - Bot stopped speaking
2025-11-10 21:03:07.166 | WARNING  | pipecat.transports.smallwebrtc.transport:read_audio_frame:361 - Received an unexpected media stream error while reading the audio.
2025-11-10 21:03:07.168 | DEBUG    | pipecat.transports.smallwebrtc.connection:on_ended:346 - Track audio ended
2025-11-10 21:03:07.170 | DEBUG    | pipecat.transports.smallwebrtc.connection:on_iceconnectionstatechange:331 - ICE connection state is closed, connection is closed
2025-11-10 21:03:07.170 | DEBUG    | pipecat.transports.smallwebrtc.connection:_handle_new_connection_state:564 - Connection state changed to: closed
2025-11-10 21:03:07.172 | INFO     | pipecat.transports.smallwebrtc.request_handler:handle_disconnected:207 - Discarding peer connection for pc_id: SmallWebRTCConnection#0
2025-11-10 21:03:07.172 | DEBUG    | pipecat.transports.smallwebrtc.transport:on_closed:252 - Client connection closed.
2025-11-10 21:03:09.317 | DEBUG    | pipecat.transports.smallwebrtc.connection:_idle_watcher:169 - Disabling receiver for audio track after 2.17s idle
2025-11-10 21:03:09.318 | DEBUG    | pipecat.transports.smallwebrtc.connection:discard_old_frames:138 - Discarding old frames

When the tool call happens first and the bot does not repeat itself.

2025-11-10 21:07:12.360 | INFO     | pipecat:<module>:14 - ᓚᘏᗢ Pipecat 0.0.94 (Python 3.13.6 (main, Aug 14 2025, 16:07:26) [Clang 20.1.4 ]) ᓚᘏᗢ
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.
2025-11-10 21:07:17.954 | INFO     | __main__:<module>:36 - ✅ All components loaded successfully!

🚀 Bot ready!
   → Open http://localhost:7860/client in your browser

Looking for dist directory at: /Users/till/gitraffe/verity-interview/backend/.venv/lib/python3.13/site-packages/pipecat_ai_small_webrtc_prebuilt/client/dist
INFO:     Started server process [58381]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://localhost:7860 (Press CTRL+C to quit)
2025-11-10 21:07:27.732 | DEBUG    | pipecat.transports.smallwebrtc.connection:_initialize:276 - Initializing new peer connection
2025-11-10 21:07:27.759 | DEBUG    | pipecat.transports.smallwebrtc.connection:_create_answer:359 - Creating answer
2025-11-10 21:07:27.763 | DEBUG    | pipecat.transports.smallwebrtc.connection:on_track:341 - Track audio received
2025-11-10 21:07:27.763 | DEBUG    | pipecat.transports.smallwebrtc.connection:on_track:341 - Track video received
2025-11-10 21:07:27.773 | DEBUG    | pipecat.transports.smallwebrtc.connection:on_icegatheringstatechange:337 - ICE gathering state is gathering
2025-11-10 21:07:27.778 | DEBUG    | pipecat.transports.smallwebrtc.connection:on_icegatheringstatechange:337 - ICE gathering state is complete
2025-11-10 21:07:27.779 | DEBUG    | pipecat.transports.smallwebrtc.connection:_create_answer:362 - Setting the answer after the local description is created
2025-11-10 21:07:27.780 | DEBUG    | pipecat.transports.smallwebrtc.request_handler:handle_web_request:213 - webrtc_connection_callback executed successfully for peer: SmallWebRTCConnection#0
INFO:     ::1:53521 - "POST /api/offer HTTP/1.1" 200 OK
2025-11-10 21:07:27.781 | DEBUG    | pipecat.audio.vad.silero:__init__:147 - Loading Silero VAD model...
2025-11-10 21:07:27.956 | DEBUG    | pipecat.audio.vad.silero:__init__:169 - Loaded Silero VAD
2025-11-10 21:07:27.956 | DEBUG    | pipecat.audio.turn.smart_turn.local_smart_turn_v3:__init__:51 - Loading Local Smart Turn v3 model...
2025-11-10 21:07:28.057 | DEBUG    | pipecat.audio.turn.smart_turn.local_smart_turn_v3:__init__:82 - Loaded Local Smart Turn v3
2025-11-10 21:07:28.058 | DEBUG    | pipecat.runner.utils:_get_transport_params:383 - Using transport params for webrtc
2025-11-10 21:07:28.304 | INFO     | __main__:run_bot:53 - Starting bot with Google services
2025-11-10 21:07:28.907 | DEBUG    | pipecat.processors.frame_processor:link:525 - Linking Pipeline#0::Source -> SmallWebRTCInputTransport#0
2025-11-10 21:07:28.907 | DEBUG    | pipecat.processors.frame_processor:link:525 - Linking SmallWebRTCInputTransport#0 -> GoogleSTTService#0
2025-11-10 21:07:28.907 | DEBUG    | pipecat.processors.frame_processor:link:525 - Linking GoogleSTTService#0 -> LLMUserAggregator#0
2025-11-10 21:07:28.907 | DEBUG    | pipecat.processors.frame_processor:link:525 - Linking LLMUserAggregator#0 -> GoogleLLMService#0
2025-11-10 21:07:28.907 | DEBUG    | pipecat.processors.frame_processor:link:525 - Linking GoogleLLMService#0 -> GoogleTTSService#0
2025-11-10 21:07:28.907 | DEBUG    | pipecat.processors.frame_processor:link:525 - Linking GoogleTTSService#0 -> SmallWebRTCOutputTransport#0
2025-11-10 21:07:28.908 | DEBUG    | pipecat.processors.frame_processor:link:525 - Linking SmallWebRTCOutputTransport#0 -> LLMAssistantAggregator#0
2025-11-10 21:07:28.908 | DEBUG    | pipecat.processors.frame_processor:link:525 - Linking LLMAssistantAggregator#0 -> Pipeline#0::Sink
2025-11-10 21:07:28.908 | DEBUG    | pipecat.processors.frame_processor:link:525 - Linking PipelineTask#0::Source -> Pipeline#0
2025-11-10 21:07:28.908 | DEBUG    | pipecat.processors.frame_processor:link:525 - Linking Pipeline#0 -> PipelineTask#0::Sink
2025-11-10 21:07:28.908 | INFO     | __main__:run_bot:102 - 💬 Bot ready! Open http://localhost:7860 to connect
2025-11-10 21:07:28.908 | DEBUG    | pipecat.pipeline.runner:run:71 - Runner PipelineRunner#0 started running PipelineTask#0
2025-11-10 21:07:28.917 | DEBUG    | pipecat.pipeline.task:_wait_for_pipeline_start:602 - PipelineTask#0: Starting. Waiting for StartFrame#0 to reach the end of the pipeline...
2025-11-10 21:07:28.921 | DEBUG    | pipecat.transports.smallwebrtc.connection:on_iceconnectionstatechange:331 - ICE connection state is checking, connection is connecting
2025-11-10 21:07:28.921 | DEBUG    | pipecat.transports.smallwebrtc.connection:_monitoring_connecting_state:529 - Monitoring connecting state
2025-11-10 21:07:28.922 | DEBUG    | pipecat.transports.smallwebrtc.connection:_handle_new_connection_state:564 - Connection state changed to: connecting
2025-11-10 21:07:28.922 | DEBUG    | pipecat.audio.vad.vad_analyzer:set_params:156 - Setting VAD params to: confidence=0.7 start_secs=0.2 stop_secs=0.2 min_volume=0.6
2025-11-10 21:07:28.922 | INFO     | pipecat.transports.smallwebrtc.transport:connect:449 - Connecting to Small WebRTC
2025-11-10 21:07:28.923 | DEBUG    | pipecat.services.google.stt:_connect:701 - Connecting to Google Speech-to-Text
2025-11-10 21:07:28.947 | INFO     | pipecat.transports.smallwebrtc.transport:connect:449 - Connecting to Small WebRTC
2025-11-10 21:07:28.948 | DEBUG    | pipecat.pipeline.task:_wait_for_pipeline_start:605 - PipelineTask#0: StartFrame#0 reached the end of the pipeline, pipeline is now ready.
2025-11-10 21:07:28.998 | DEBUG    | pipecat.transports.smallwebrtc.connection:on_iceconnectionstatechange:331 - ICE connection state is completed, connection is connecting
2025-11-10 21:07:29.006 | DEBUG    | pipecat.transports.smallwebrtc.connection:_cancel_monitoring_connecting_state:549 - Cancelling the connecting timeout task
2025-11-10 21:07:29.007 | DEBUG    | pipecat.transports.smallwebrtc.connection:_handle_new_connection_state:564 - Connection state changed to: connected
2025-11-10 21:07:29.007 | DEBUG    | pipecat.transports.smallwebrtc.transport:on_connected:242 - Peer connection established.
2025-11-10 21:07:29.007 | WARNING  | pipecat.transports.smallwebrtc.connection:screen_video_input_track:649 - No screen video transceiver is available
2025-11-10 21:07:29.007 | DEBUG    | pipecat.transports.smallwebrtc.connection:replace_audio_track:447 - Replacing audio track audio
2025-11-10 21:07:29.022 | DEBUG    | pipecat.transports.smallwebrtc.connection:_handle_signalling_message:682 - Signalling message received: {'type': 'trackStatus', 'receiver_index': 0, 'enabled': True}
2025-11-10 21:07:29.022 | DEBUG    | pipecat.transports.smallwebrtc.connection:_handle_signalling_message:682 - Signalling message received: {'type': 'trackStatus', 'receiver_index': 1, 'enabled': True}
2025-11-10 21:07:29.033 | DEBUG    | pipecat.transports.smallwebrtc.transport:push_app_message:693 - Received app message inside SmallWebRTCInputTransport  {'label': 'rtvi-ai', 'type': 'client-ready', 'data': {'version': '1.0.0', 'about': {'library': '@pipecat-ai/client-react', 'library_version': '1.0.1', 'platform_details': {'browser': 'Safari', 'browser_version': '26.0.1', 'platform_type': 'desktop', 'engine': 'WebKit'}, 'platform': 'macOS', 'platform_version': '10.15.7'}}, 'id': 'cccc4a5d'}
2025-11-10 21:07:31.956 | DEBUG    | pipecat.transports.base_input:_handle_user_interruption:335 - User started speaking
2025-11-10 21:07:31.957 | DEBUG    | pipecat.pipeline.task:_source_push_frame:747 - PipelineTask#0: received interruption task frame InterruptionTaskFrame#0
2025-11-10 21:07:31.959 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.021538972854614258
2025-11-10 21:07:31.959 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_processing_metrics:152 - GoogleSTTService#0 processing time: 0.02174997329711914
2025-11-10 21:07:32.204 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.01911783218383789
2025-11-10 21:07:32.589 | DEBUG    | pipecat.audio.turn.smart_turn.base_smart_turn:analyze_end_of_turn:165 - End of Turn result: EndOfTurnState.COMPLETE
2025-11-10 21:07:32.589 | DEBUG    | pipecat.transports.base_input:_handle_user_interruption:356 - User stopped speaking
2025-11-10 21:07:32.662 | DEBUG    | pipecat.transports.base_input:_handle_user_interruption:335 - User started speaking
2025-11-10 21:07:32.662 | DEBUG    | pipecat.pipeline.task:_source_push_frame:747 - PipelineTask#0: received interruption task frame InterruptionTaskFrame#1
2025-11-10 21:07:32.663 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.013664007186889648
2025-11-10 21:07:32.664 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_processing_metrics:152 - GoogleSTTService#0 processing time: 0.013887166976928711
2025-11-10 21:07:32.801 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.003283262252807617
2025-11-10 21:07:33.306 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.05026888847351074
2025-11-10 21:07:33.446 | DEBUG    | pipecat.audio.turn.smart_turn.base_smart_turn:analyze_end_of_turn:165 - End of Turn result: EndOfTurnState.INCOMPLETE
2025-11-10 21:07:33.706 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.012730121612548828
2025-11-10 21:07:33.897 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.0014982223510742188
2025-11-10 21:07:34.135 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.004403114318847656
2025-11-10 21:07:34.208 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.011646270751953125
2025-11-10 21:07:34.311 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.02025914192199707
2025-11-10 21:07:34.404 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.07127499580383301
2025-11-10 21:07:34.513 | DEBUG    | pipecat.audio.turn.smart_turn.base_smart_turn:analyze_end_of_turn:165 - End of Turn result: EndOfTurnState.COMPLETE
2025-11-10 21:07:34.513 | DEBUG    | pipecat.transports.base_input:_handle_user_interruption:356 - User stopped speaking
2025-11-10 21:07:34.877 | DEBUG    | pipecat.transports.base_input:_handle_user_interruption:335 - User started speaking
2025-11-10 21:07:34.877 | DEBUG    | pipecat.pipeline.task:_source_push_frame:747 - PipelineTask#0: received interruption task frame InterruptionTaskFrame#2
2025-11-10 21:07:34.878 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.0212860107421875
2025-11-10 21:07:34.878 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_processing_metrics:152 - GoogleSTTService#0 processing time: 0.021546125411987305
2025-11-10 21:07:34.890 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.011660099029541016
2025-11-10 21:07:34.994 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.02122807502746582
2025-11-10 21:07:35.311 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.01550912857055664
2025-11-10 21:07:35.523 | DEBUG    | pipecat.audio.turn.smart_turn.base_smart_turn:analyze_end_of_turn:165 - End of Turn result: EndOfTurnState.COMPLETE
2025-11-10 21:07:35.523 | DEBUG    | pipecat.transports.base_input:_handle_user_interruption:356 - User stopped speaking
2025-11-10 21:07:35.636 | DEBUG    | pipecat.transports.base_input:_handle_user_interruption:335 - User started speaking
2025-11-10 21:07:35.636 | DEBUG    | pipecat.pipeline.task:_source_push_frame:747 - PipelineTask#0: received interruption task frame InterruptionTaskFrame#3
2025-11-10 21:07:35.637 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.02606511116027832
2025-11-10 21:07:35.637 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_processing_metrics:152 - GoogleSTTService#0 processing time: 0.026403188705444336
2025-11-10 21:07:35.886 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.0065190792083740234
2025-11-10 21:07:35.995 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.020377159118652344
2025-11-10 21:07:36.137 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.021478891372680664
2025-11-10 21:07:36.204 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.0002529621124267578
2025-11-10 21:07:36.593 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.020628929138183594
2025-11-10 21:07:36.720 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.0018219947814941406
2025-11-10 21:07:36.815 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.015879154205322266
2025-11-10 21:07:36.831 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.007734060287475586
2025-11-10 21:07:37.004 | DEBUG    | pipecat.audio.turn.smart_turn.base_smart_turn:analyze_end_of_turn:165 - End of Turn result: EndOfTurnState.COMPLETE
2025-11-10 21:07:37.004 | DEBUG    | pipecat.transports.base_input:_handle_user_interruption:356 - User stopped speaking
2025-11-10 21:07:37.278 | DEBUG    | pipecat.transports.base_input:_handle_user_interruption:335 - User started speaking
2025-11-10 21:07:37.279 | DEBUG    | pipecat.pipeline.task:_source_push_frame:747 - PipelineTask#0: received interruption task frame InterruptionTaskFrame#4
2025-11-10 21:07:37.280 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.02354884147644043
2025-11-10 21:07:37.280 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_processing_metrics:152 - GoogleSTTService#0 processing time: 0.023756027221679688
2025-11-10 21:07:37.397 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.0010411739349365234
2025-11-10 21:07:37.723 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.02864694595336914
2025-11-10 21:07:37.918 | DEBUG    | pipecat.audio.turn.smart_turn.base_smart_turn:analyze_end_of_turn:165 - End of Turn result: EndOfTurnState.INCOMPLETE
2025-11-10 21:07:38.296 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.004790067672729492
2025-11-10 21:07:38.325 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.00463104248046875
2025-11-10 21:07:38.621 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.011852025985717773
2025-11-10 21:07:38.944 | DEBUG    | pipecat.audio.turn.smart_turn.base_smart_turn:analyze_end_of_turn:165 - End of Turn result: EndOfTurnState.COMPLETE
2025-11-10 21:07:38.945 | DEBUG    | pipecat.transports.base_input:_handle_user_interruption:356 - User stopped speaking
2025-11-10 21:07:39.039 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.02314925193786621
2025-11-10 21:07:39.044 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.003103971481323242
2025-11-10 21:07:39.190 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleSTTService#0 TTFB: 0.014261960983276367
2025-11-10 21:07:39.415 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_processing_metrics:152 - GoogleSTTService#0 processing time: 0.002638101577758789
2025-11-10 21:07:39.922 | DEBUG    | pipecat.adapters.base_llm_adapter:from_standard_tools:127 - Retrieving the tools using the adapter: <class 'pipecat.adapters.services.gemini_adapter.GeminiLLMAdapter'>
2025-11-10 21:07:39.922 | DEBUG    | pipecat.services.google.llm:_stream_content_universal_context:871 - GoogleLLMService#0: Generating chat from universal context [None] | [{'parts': [{'text': 'First call get current time. Second, say the word banana. Third tell me the time.'}], 'role': 'user'}]
2025-11-10 21:07:40.583 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleLLMService#0 TTFB: 0.6510701179504395
2025-11-10 21:07:40.583 | DEBUG    | pipecat.services.google.llm:_process_context:927 - Function call: get_current_time:3cfe18fe-f85d-4b37-bc3c-bf95d91eb5e5
2025-11-10 21:07:40.587 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:start_llm_usage_metrics:173 - GoogleLLMService#0 prompt tokens: 32, completion tokens: 5
2025-11-10 21:07:40.588 | DEBUG    | pipecat.services.llm_service:_run_function_call:551 - GoogleLLMService#0 Calling function [get_current_time:3cfe18fe-f85d-4b37-bc3c-bf95d91eb5e5] with arguments {}
2025-11-10 21:07:40.589 | INFO     | __main__:get_current_time:43 - ⏰ Function get_current_time() called
2025-11-10 21:07:40.589 | INFO     | __main__:get_current_time:48 - ⏰ Function returning: The current time is 09:07 PM
2025-11-10 21:07:40.591 | DEBUG    | pipecat.processors.aggregators.llm_response_universal:_handle_function_calls_started:689 - LLMAssistantAggregator#0 FunctionCallsStartedFrame: ['get_current_time:3cfe18fe-f85d-4b37-bc3c-bf95d91eb5e5']
2025-11-10 21:07:40.592 | DEBUG    | pipecat.processors.aggregators.llm_response_universal:_handle_function_call_in_progress:694 - LLMAssistantAggregator#0 FunctionCallInProgressFrame: [get_current_time:3cfe18fe-f85d-4b37-bc3c-bf95d91eb5e5]
2025-11-10 21:07:40.592 | DEBUG    | pipecat.processors.aggregators.llm_response_universal:_handle_function_call_result:725 - LLMAssistantAggregator#0 FunctionCallResultFrame: [get_current_time:3cfe18fe-f85d-4b37-bc3c-bf95d91eb5e5]
2025-11-10 21:07:40.594 | DEBUG    | pipecat.adapters.base_llm_adapter:from_standard_tools:127 - Retrieving the tools using the adapter: <class 'pipecat.adapters.services.gemini_adapter.GeminiLLMAdapter'>
2025-11-10 21:07:40.595 | DEBUG    | pipecat.services.google.llm:_stream_content_universal_context:871 - GoogleLLMService#0: Generating chat from universal context [None] | [{'parts': [{'text': 'First call get current time. Second, say the word banana. Third tell me the time.'}], 'role': 'user'}, {'parts': [{'function_call': {'id': '3cfe18fe-f85d-4b37-bc3c-bf95d91eb5e5', 'args': {}, 'name': 'get_current_time'}}], 'role': 'model'}, {'parts': [{'function_response': {'id': '3cfe18fe-f85d-4b37-bc3c-bf95d91eb5e5', 'name': 'get_current_time', 'response': {'value': 'The current time is 09:07 PM'}}}], 'role': 'user'}]
2025-11-10 21:07:40.980 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleLLMService#0 TTFB: 0.38505077362060547
2025-11-10 21:07:41.014 | DEBUG    | pipecat.services.google.tts:run_tts:646 - GoogleTTSService#0: Generating TTS [Banana.]
2025-11-10 21:07:41.018 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:start_llm_usage_metrics:173 - GoogleLLMService#0 prompt tokens: 54, completion tokens: 15
2025-11-10 21:07:41.182 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:start_tts_usage_metrics:191 - GoogleTTSService#0 usage characters: 7
2025-11-10 21:07:41.437 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleTTSService#0 TTFB: 0.42201995849609375
2025-11-10 21:07:41.460 | DEBUG    | pipecat.transports.base_output:_bot_started_speaking:599 - Bot started speaking
2025-11-10 21:07:41.538 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_processing_metrics:152 - GoogleTTSService#0 processing time: 0.5234770774841309
2025-11-10 21:07:41.539 | DEBUG    | pipecat.services.google.tts:run_tts:646 - GoogleTTSService#0: Generating TTS [ The current time is 09:07 PM.]
2025-11-10 21:07:41.541 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:start_tts_usage_metrics:191 - GoogleTTSService#0 usage characters: 30
2025-11-10 21:07:41.822 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_ttfb_metrics:131 - GoogleTTSService#0 TTFB: 0.28356027603149414
2025-11-10 21:07:42.156 | DEBUG    | pipecat.processors.metrics.frame_processor_metrics:stop_processing_metrics:152 - GoogleTTSService#0 processing time: 0.6172769069671631
2025-11-10 21:07:44.328 | DEBUG    | pipecat.transports.base_output:_bot_stopped_speaking:617 - Bot stopped speaking
2025-11-10 21:07:46.042 | WARNING  | pipecat.transports.smallwebrtc.transport:read_audio_frame:361 - Received an unexpected media stream error while reading the audio.
2025-11-10 21:07:46.042 | DEBUG    | pipecat.transports.smallwebrtc.connection:on_ended:346 - Track audio ended
2025-11-10 21:07:46.044 | DEBUG    | pipecat.transports.smallwebrtc.connection:on_iceconnectionstatechange:331 - ICE connection state is closed, connection is closed
2025-11-10 21:07:46.045 | DEBUG    | pipecat.transports.smallwebrtc.connection:_handle_new_connection_state:564 - Connection state changed to: closed
2025-11-10 21:07:46.045 | INFO     | pipecat.transports.smallwebrtc.request_handler:handle_disconnected:207 - Discarding peer connection for pc_id: SmallWebRTCConnection#0
2025-11-10 21:07:46.045 | DEBUG    | pipecat.transports.smallwebrtc.transport:on_closed:252 - Client connection closed.
2025-11-10 21:07:49.024 | DEBUG    | pipecat.transports.smallwebrtc.connection:_idle_watcher:169 - Disabling receiver for audio track after 3.01s idle
2025-11-10 21:07:49.024 | DEBUG    | pipecat.transports.smallwebrtc.connection:discard_old_frames:138 - Discarding old frames
2025-11-10 21:07:56.054 | DEBUG    | pipecat.services.google.stt:_process_responses:897 - GoogleSTTService#0 Stream aborted due to inactivity (no audio input). Reconnecting automatically...
2025-11-10 21:07:56.054 | WARNING  | pipecat.services.google.stt:_stream_audio:808 - GoogleSTTService#0 Reconnecting: 409 Stream timed out after receiving no more client requests. [type_url: "type.googleapis.com/util.StatusProto"
value: "\010\n\022\007generic\0329Stream timed out after receiving no more client requests.*D\013\020\206\326\215'\032;\0229Stream timed out after receiving no more client requests.\014"
]

gitraffe avatar Nov 11 '25 02:11 gitraffe

Nice catch and great repro case 👏

We'll look into this.

markbackman avatar Nov 11 '25 20:11 markbackman

This is indeed a race condition. I found that with websocket based TTS services, this issue does not occur. So, if you're in a pinch, you can switch to CartesiaTTSService, ElevenLabsTTSService, RimeTTSService, which are all websocket services with word/timestamp pairs.

The websocket services don't hit this issue because they pause processing while the audio is being generated. This ensures correct ordering of frames.

The HTTP TTS services run asynchronously, creating the chance for this issue to occur.

Here's a modified repro case where:

  • Just run the file, no interaction need to repro
  • Supports DailyTransport in addition to SmallWebRTCTransport
  • Commented out version of CartesiaTTSService for convenience
#
# Simple chat bot based on Pipecat quickstart example.
# Uses Google services (STT, LLM, TTS) and includes get_current_time() function.
#

"""Simple chat bot for testing function calling behavior.

Run the bot using::

    python playground/simple_chat_bot.py --transport webrtc

Then open http://localhost:7860 in your browser to connect.
"""

import os
from datetime import datetime

from dotenv import load_dotenv
from loguru import logger
from pipecat_whisker import WhiskerObserver

from pipecat.adapters.schemas.tools_schema import ToolsSchema
from pipecat.audio.turn.smart_turn.local_smart_turn_v3 import LocalSmartTurnAnalyzerV3
from pipecat.audio.vad.silero import SileroVADAnalyzer
from pipecat.audio.vad.vad_analyzer import VADParams
from pipecat.frames.frames import LLMRunFrame
from pipecat.pipeline.pipeline import Pipeline
from pipecat.pipeline.runner import PipelineRunner
from pipecat.pipeline.task import PipelineParams, PipelineTask
from pipecat.processors.aggregators.llm_context import LLMContext
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
from pipecat.runner.types import RunnerArguments
from pipecat.runner.utils import create_transport
from pipecat.services.cartesia.tts import CartesiaTTSService
from pipecat.services.google.llm import GoogleLLMService
from pipecat.services.google.stt import GoogleSTTService
from pipecat.services.google.tts import GoogleTTSService
from pipecat.services.llm_service import FunctionCallParams
from pipecat.transports.base_transport import BaseTransport, TransportParams
from pipecat.transports.daily.transport import DailyParams

logger.info("✅ All components loaded successfully!")

load_dotenv(override=True)


async def get_current_time(params: FunctionCallParams, **kwargs):
    """Get the current time."""
    logger.info("⏰ Function get_current_time() called")

    current_time = datetime.now().strftime("%I:%M %p")
    result = f"The current time is {current_time}"

    logger.info(f"⏰ Function returning: {result}")
    await params.result_callback(result)


async def run_bot(transport: BaseTransport, runner_args: RunnerArguments):
    logger.info("Starting bot with Google services")

    stt = GoogleSTTService(
        credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"),
    )

    # Impacted: Double "banana" output
    tts = GoogleTTSService(
        voice_id="en-US-Chirp3-HD-Charon",
        credentials=os.getenv("GOOGLE_TEST_CREDENTIALS"),
    )

    # Not impacted
    # tts = CartesiaTTSService(
    #     api_key=os.getenv("CARTESIA_API_KEY"),
    #     voice_id="71a7ad14-091c-4e8e-a314-022ece01c121",  # British Reading Lady
    # )

    llm = GoogleLLMService(
        api_key=os.environ["GOOGLE_API_KEY"],
        model="gemini-2.0-flash-exp",
    )

    # Register the function
    llm.register_direct_function(get_current_time)
    tools = ToolsSchema(standard_tools=[get_current_time])  # type: ignore

    messages = [
        {
            "role": "user",
            "content": "Say the word 'Banana.' then get the current time?",
        },
    ]

    context = LLMContext(messages, tools)
    context_aggregator = LLMContextAggregatorPair(context)

    pipeline = Pipeline(
        [
            transport.input(),  # Transport user input
            stt,  # Speech to text
            context_aggregator.user(),  # User responses
            llm,  # LLM
            tts,  # TTS
            transport.output(),  # Transport bot output
            context_aggregator.assistant(),  # Assistant spoken responses
        ]
    )

    task = PipelineTask(
        pipeline,
        params=PipelineParams(
            enable_metrics=True,
            enable_usage_metrics=True,
        ),
        observers=[WhiskerObserver(pipeline)],
    )

    @transport.event_handler("on_client_connected")
    async def on_client_connected(transport, client):
        logger.info(f"Client connected")
        # Kick off the conversation.
        await task.queue_frames([LLMRunFrame()])

    @transport.event_handler("on_client_disconnected")
    async def on_client_disconnected(transport, client):
        logger.info(f"Client disconnected")
        await task.cancel()

    runner = PipelineRunner(handle_sigint=runner_args.handle_sigint)

    logger.info("💬 Bot ready! Open http://localhost:7860 to connect")

    await runner.run(task)


async def bot(runner_args: RunnerArguments):
    """Main bot entry point."""

    transport_params = {
        "daily": lambda: DailyParams(
            audio_in_enabled=True,
            audio_out_enabled=True,
            vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
            turn_analyzer=LocalSmartTurnAnalyzerV3(),
        ),
        "webrtc": lambda: TransportParams(
            audio_in_enabled=True,
            audio_out_enabled=True,
            vad_analyzer=SileroVADAnalyzer(params=VADParams(stop_secs=0.2)),
            turn_analyzer=LocalSmartTurnAnalyzerV3(),
        ),
    }

    transport = await create_transport(runner_args, transport_params)

    await run_bot(transport, runner_args)


if __name__ == "__main__":
    from pipecat.runner.run import main

    main()

markbackman avatar Nov 11 '25 21:11 markbackman

We need to think about the best way to solve this case. We'll update once someone starts working on this.

markbackman avatar Nov 11 '25 21:11 markbackman

Interesting, thank you for the speedy investigation!

Do you know if there's an easy way to wrap the Google Services in a websocket-like layer? Or maybe adding a lock that gets released as a hacky workaround?

gitraffe avatar Nov 11 '25 21:11 gitraffe

I don't have a workaround other than using a different TTS service. We'll get to this soon.

markbackman avatar Nov 11 '25 21:11 markbackman

Perfect, thank you for the fast responses!

gitraffe avatar Nov 11 '25 21:11 gitraffe