speech_recognition
speech_recognition copied to clipboard
openai.Audio is no longer supported when using recognize_whisper_api
trafficstars
From the official documentation examples folder
#!/usr/bin/env python3
# NOTE: this example requires PyAudio because it uses the Microphone class
import speech_recognition as sr
from config import OPENAI_API_KEY
# obtain audio from the microphone
r = sr.Recognizer()
with sr.Microphone() as source:
print("Say something!")
audio = r.listen(source)
# recognize speech using whisper
try:
print("Whisper thinks you said " + r.recognize_whisper(audio, language="english"))
except sr.UnknownValueError:
print("Whisper could not understand audio")
except sr.RequestError as e:
print("Could not request results from Whisper")
# recognize speech using Whisper API
try:
print(f"Whisper API thinks you said {r.recognize_whisper_api(audio, api_key=OPENAI_API_KEY)}")
except sr.RequestError as e:
print("Could not request results from Whisper API")
I get this error
\venv\lib\site-packages\openai\lib\_old_api.py", line 39, in __call__
raise APIRemovedInV1(symbol=self._symbol)
openai.lib._old_api.APIRemovedInV1:
You tried to access openai.Audio, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.
You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface.
Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`
A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742
Same issue, is there a way to use whisper with an with sr.Microphone() as source: ?
I just use this with the OpenAI python package and just send the audio file to the whisper API that way:
import time
import os
from pathlib import Path
from openai import OpenAI
import speech_recognition as sr
from pydub import AudioSegment
import dotenv
dotenv.load_dotenv()
import logging
# gets OPENAI_API_KEY from your environment variables
openai = OpenAI()
# Define the base path for audio files
audio_base_path = Path(__file__).parent.parent / "Assets/audio/"
def whisper(audio_file_name: str) -> None:
audio_file_path = audio_base_path / audio_file_name
# Create transcription from audio file
transcription = openai.audio.transcriptions.create(model="whisper-1", file=audio_file_path)
return transcription.text
def record_audio_sr():
logging.basicConfig(level=logging.INFO)
start = time.perf_counter()
recognizer = sr.Recognizer() # Initialize recognizer
recognizer.adjust_for_ambient_noise(source, duration=0.5) # Adjust for ambient noise and set a longer pause threshold
recognizer.energy_threshold = 300 # recognizer_instance.energy_threshold - Higher = less sensitive. default is 300. range is 50 - 4000
recognizer.pause_threshold = 1.0
#recognizer.dynamic_energy_threshold = True
#recognizer.dynamic_energy_adjustment_damping = 0.15
while True:
with sr.Microphone() as source:
print("Listening for audio...")
try:
audio = recognizer.listen(source, timeout=5.0, phrase_time_limit=10)
# Check if the detected audio is long enough (i.e., not just noise)
if len(audio.frame_data) / audio.sample_rate / audio.sample_width >= 2.5: # 0.5 seconds of speech
audio_file_name = "transcript.wav"
audio_file_path = audio_base_path / audio_file_name
with open(audio_file_path, "wb") as file:
file.write(audio.get_wav_data())
print(f"Recording saved as '{audio_file_path}'")
transcript = whisper(audio_file_name)
print(transcript)
end = time.perf_counter()
print(end - start)
else:
print("Detected audio too short, likely just noise.")
except sr.WaitTimeoutError:
print("No voice activity detected.")
break
same issue here
@Paracetamole1 @MiskaWasTaken See https://github.com/Uberi/speech_recognition/pull/729#issue-2080645611
You can copy the change to your local file, and have temp fix.