speech_recognition openai.Audio is no longer supported when using recognize_whisper

trafficstars

From the official documentation examples folder

#!/usr/bin/env python3

  

# NOTE: this example requires PyAudio because it uses the Microphone class
import  speech_recognition  as  sr

from  config  import  OPENAI_API_KEY
# obtain audio from the microphone

r  =  sr.Recognizer()

with  sr.Microphone() as  source:

print("Say something!")

audio  =  r.listen(source)
# recognize speech using whisper

try:

print("Whisper thinks you said "  +  r.recognize_whisper(audio, language="english"))

except  sr.UnknownValueError:

print("Whisper could not understand audio")

except  sr.RequestError  as  e:

print("Could not request results from Whisper")
# recognize speech using Whisper API

try:

print(f"Whisper API thinks you said {r.recognize_whisper_api(audio, api_key=OPENAI_API_KEY)}")

except  sr.RequestError  as  e:

print("Could not request results from Whisper API")

I get this error


\venv\lib\site-packages\openai\lib\_old_api.py", line 39, in __call__

raise  APIRemovedInV1(symbol=self._symbol)

openai.lib._old_api.APIRemovedInV1:

  

You tried to access openai.Audio, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

  

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface.

  

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

  

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742

Dec 02 '23 11:12 elieobeid7

Same issue, is there a way to use whisper with an with sr.Microphone() as source: ?

Jan 01 '24 16:01 Paracetamole1

I just use this with the OpenAI python package and just send the audio file to the whisper API that way:

import time
import os
from pathlib import Path
from openai import OpenAI
import speech_recognition as sr
from pydub import AudioSegment
import dotenv
dotenv.load_dotenv()
import logging


# gets OPENAI_API_KEY from your environment variables
openai = OpenAI()

# Define the base path for audio files
audio_base_path = Path(__file__).parent.parent / "Assets/audio/"

def whisper(audio_file_name: str) -> None:
    audio_file_path = audio_base_path / audio_file_name
    # Create transcription from audio file
    transcription = openai.audio.transcriptions.create(model="whisper-1", file=audio_file_path)
    return transcription.text

def record_audio_sr():
    logging.basicConfig(level=logging.INFO)
    start = time.perf_counter()
    recognizer = sr.Recognizer()  # Initialize recognizer
    recognizer.adjust_for_ambient_noise(source, duration=0.5) # Adjust for ambient noise and set a longer pause threshold
    recognizer.energy_threshold = 300 # recognizer_instance.energy_threshold - Higher = less sensitive. default is 300. range is 50 - 4000
    recognizer.pause_threshold = 1.0
    #recognizer.dynamic_energy_threshold = True 
    #recognizer.dynamic_energy_adjustment_damping = 0.15
    while True:
        with sr.Microphone() as source:
            print("Listening for audio...")
            try:
                audio = recognizer.listen(source, timeout=5.0, phrase_time_limit=10)

                # Check if the detected audio is long enough (i.e., not just noise)
                if len(audio.frame_data) / audio.sample_rate / audio.sample_width >= 2.5:  # 0.5 seconds of speech
                    audio_file_name = "transcript.wav"
                    audio_file_path = audio_base_path / audio_file_name
                    with open(audio_file_path, "wb") as file:
                        file.write(audio.get_wav_data())

                    print(f"Recording saved as '{audio_file_path}'")
                    transcript = whisper(audio_file_name)
                    print(transcript)
                    end = time.perf_counter()
                    print(end - start)
                else:
                    print("Detected audio too short, likely just noise.")
            except sr.WaitTimeoutError:
                print("No voice activity detected.")
                break