whisper-cpp-python
whisper-cpp-python copied to clipboard
Segmentation fault
I've got this minimal script to record 1 second and then transcribe:
import os
import numpy as np
import speech_recognition as sr
import whisper_cpp_python # Import the whisper-cpp-python library
import tempfile
import wave
from tqdm import tqdm
import urllib.request
from time import sleep
def download_model(model_name="base"):
print(f"Downloading model {model_name} if not already downloaded...")
base_url = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/"
model_path = os.path.join("models", f"ggml-{model_name}.bin")
if not os.path.exists("models"):
os.makedirs("models")
if not os.path.isfile(model_path):
with tqdm(total=100, desc="Downloading Model", unit='%') as progress_bar:
def reporthook(block_num, block_size, total_size):
if total_size > 0:
downloaded = block_num * block_size
progress = min(100, downloaded * 100 / total_size)
progress_bar.n = progress
progress_bar.refresh()
download_url = base_url + f"ggml-{model_name}.bin"
urllib.request.urlretrieve(download_url, model_path, reporthook)
print(f"\nModel {model_name} downloaded to {model_path}.")
return model_path
def record_audio(duration=1):
print("Recording audio for 1 second...")
recognizer = sr.Recognizer()
with sr.Microphone(sample_rate=16000) as source:
recognizer.adjust_for_ambient_noise(source, duration=0.5)
audio = recognizer.record(source, duration=duration)
print("Recording complete.")
return audio.get_raw_data()
def save_audio_to_file(audio_data):
print("Saving audio to a temporary file...")
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
with wave.open(temp_file, 'wb') as wf:
wf.setnchannels(1) # Mono channel
wf.setsampwidth(2) # 16 bits per sample
wf.setframerate(16000) # Sample rate
wf.writeframes(audio_data)
print(f"Audio saved to {temp_file.name}")
return temp_file.name
def transcribe_audio(file_path, model_path):
print(f"Transcribing audio using model at {model_path}...")
whisper_model = whisper_cpp_python.Whisper(model_path)
result = whisper_model.transcribe(file_path)
transcription = result['text'].strip()
print(f"Transcription: {transcription}")
def main():
try:
# Step 1: Download the model
model_path = download_model("base")
print(f"Model path: {model_path}")
# Step 2: Record audio
audio_data = record_audio(duration=1)
print(f"Audio data length: {len(audio_data)}")
# Step 3: Save audio to a file
audio_file_path = save_audio_to_file(audio_data)
print(f"Audio file path: {audio_file_path}")
# Step 4: Transcribe the audio
transcribe_audio(audio_file_path, model_path)
except Exception as e:
print(f"An error occurred: {e}")
finally:
# Clean up
if 'audio_file_path' in locals() and os.path.exists(audio_file_path):
print(f"Removing temporary file: {audio_file_path}")
os.remove(audio_file_path)
if __name__ == "__main__":
main()
And I'm getting a seg fault:
poetry run python test_whisper_cpp_python.py
Downloading model base if not already downloaded...
Model path: models/ggml-base.bin
Recording audio for 1 second...
Recording complete.
Audio data length: 30720
Saving audio to a temporary file...
Audio saved to /var/folders/rk/v7pd4lnx2c514f1qjkkb5s7c0000gn/T/tmpamg7etjh.wav
Audio file path: /var/folders/rk/v7pd4lnx2c514f1qjkkb5s7c0000gn/T/tmpamg7etjh.wav
Transcribing audio using model at models/ggml-base.bin...
whisper_init_from_file_no_state: loading model from 'models/ggml-base.bin'
whisper_model_load: loading model
whisper_model_load: n_vocab = 51865
whisper_model_load: n_audio_ctx = 1500
whisper_model_load: n_audio_state = 512
whisper_model_load: n_audio_head = 8
whisper_model_load: n_audio_layer = 6
whisper_model_load: n_text_ctx = 448
whisper_model_load: n_text_state = 512
whisper_model_load: n_text_head = 8
whisper_model_load: n_text_layer = 6
whisper_model_load: n_mels = 80
whisper_model_load: ftype = 1
whisper_model_load: qntvr = 0
whisper_model_load: type = 2
whisper_model_load: mem required = 310.00 MB (+ 6.00 MB per decoder)
whisper_model_load: adding 1608 extra tokens
whisper_model_load: model ctx = 140.66 MB
whisper_model_load: model size = 140.54 MB
whisper_init_state: kv self size = 5.25 MB
whisper_init_state: kv cross size = 17.58 MB
zsh: segmentation fault poetry run python test_whisper_cpp_python.py
I'm on MacOS (M1), and I installed whisper roughly like this:
brew install ffmpeg
git clone [email protected]:ggerganov/whisper.cpp.git
cd whisper.cpp || exit 1
# Create a virtual environment if it doesn't exist
if [ ! -d "venv" ]; then
python --version 3.10 --no-default -m venv venv
source venv/bin/activate
fi
WHISPER_COREML=1 make -j
chmod +x ./models/generate-coreml-model.sh
add-to-path /Applications/Xcode.app/Contents/Developer/usr/bin
mkdir build
cd build
cmake ..
make
add-to-profile WHISPER_CPP_LIB "$WHISPER_PATH/build/libwhisper.dylib"
Any ideas on next steps, or additional info I should share?
P.S. There was another issue I was having before, which I resolved by changing to cmake and setting that WHISPER_CPP_LIB to the ".dylib" and now that issue is resolved, but it made my ability to run whisper the classic way a little weird, and I haven't gone through the debug cycle on seeing if that direct execution still works, but I can do that if you suspect that as the issue.
Thanks for the project 🥳