manim-voiceover
manim-voiceover copied to clipboard
Assertion error when trying to run with a transcription model
Description of bug / unexpected behavior
After installing packages required to run a transcription model it throws an assertion error when trying to use it
Expected behavior
The transcription model should run fine
How to reproduce the issue
Code for reproducing the problem
from manim import *
from manim_voiceover import VoiceoverScene
from manim_voiceover.services.gtts import GTTSService
class BugScene(VoiceoverScene):
def construct(self):
self.set_speech_service(
GTTSService(transcription_model="base")
)
with self.voiceover("Voice") as trk:
pass
Additional media files
Images/GIFs
Logs
Terminal output
(venv) oz@Ozz:~/repos/GPU_Programming$ manim -pql manim_scripts/temp.py -v DEBUG
Manim Community v0.18.1
Detected language: english
0%| | 0/0.96 [00:00<?, ?sec/s]
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ /home/oz/repos/GPU_Programming/venv/lib/python3.11/site-packages/manim/cli/render/commands.py:12 │
│ 0 in render │
│ │
│ 117 │ │ │ try: │
│ 118 │ │ │ │ with tempconfig({}): │
│ 119 │ │ │ │ │ scene = SceneClass() │
│ ❱ 120 │ │ │ │ │ scene.render() │
│ 121 │ │ │ except Exception: │
│ 122 │ │ │ │ error_console.print_exception() │
│ 123 │ │ │ │ sys.exit(1) │
│ │
│ /home/oz/repos/GPU_Programming/venv/lib/python3.11/site-packages/manim/scene/scene.py:229 in │
│ render │
│ │
│ 226 │ │ """ │
│ 227 │ │ self.setup() │
│ 228 │ │ try: │
│ ❱ 229 │ │ │ self.construct() │
│ 230 │ │ except EndSceneEarlyException: │
│ 231 │ │ │ pass │
│ 232 │ │ except RerunSceneException as e: │
│ │
│ /home/oz/repos/GPU_Programming/manim_scripts/temp.py:39 in construct │
│ │
│ 36 │ self.set_speech_service( │
│ 37 │ │ GTTSService(transcription_model="base") │
│ 38 │ │ ) │
│ ❱ 39 │ with self.voiceover("Voice") as trk: │
│ 40 │ pass │
│ 41 │
│ │
│ /usr/lib/python3.11/contextlib.py:137 in __enter__ │
│ │
│ 134 │ │ # they are only needed for recreation, which is not possible anymore │
│ 135 │ │ del self.args, self.kwds, self.func │
│ 136 │ │ try: │
│ ❱ 137 │ │ │ return next(self.gen) │
│ 138 │ │ except StopIteration: │
│ 139 │ │ │ raise RuntimeError("generator didn't yield") from None │
│ 140 │
│ │
│ /home/oz/repos/GPU_Programming/venv/lib/python3.11/site-packages/manim_voiceover/voiceover_scene │
│ .py:186 in voiceover │
│ │
│ 183 │ │ │
│ 184 │ │ try: │
│ 185 │ │ │ if text is not None: │
│ ❱ 186 │ │ │ │ yield self.add_voiceover_text(text, **kwargs) │
│ 187 │ │ │ elif ssml is not None: │
│ 188 │ │ │ │ yield self.add_voiceover_ssml(ssml, **kwargs) │
│ 189 │ │ finally: │
│ │
│ /home/oz/repos/GPU_Programming/venv/lib/python3.11/site-packages/manim_voiceover/voiceover_scene │
│ .py:69 in add_voiceover_text │
│ │
│ 66 │ │ │ │ "You need to call init_voiceover() before adding a voiceover." │
│ 67 │ │ │ ) │
│ 68 │ │ │
│ ❱ 69 │ │ dict_ = self.speech_service._wrap_generate_from_text(text, **kwargs) │
│ 70 │ │ tracker = VoiceoverTracker(self, dict_, self.speech_service.cache_dir) │
│ 71 │ │ self.add_sound(str(Path(self.speech_service.cache_dir) / dict_["final_audio"])) │
│ 72 │ │ self.current_tracker = tracker │
│ │
│ /home/oz/repos/GPU_Programming/venv/lib/python3.11/site-packages/manim_voiceover/services/base.p │
│ y:95 in _wrap_generate_from_text │
│ │
│ 92 │ │ │
│ 93 │ │ # Check whether word boundaries exist and if not run stt │
│ 94 │ │ if "word_boundaries" not in dict_ and self._whisper_model is not None: │
│ ❱ 95 │ │ │ transcription_result = self._whisper_model.transcribe( │
│ 96 │ │ │ │ str(Path(self.cache_dir) / original_audio), **self.transcription_kwargs │
│ 97 │ │ │ ) │
│ 98 │ │ │ logger.info("Transcription: " + transcription_result.text) │
│ │
│ /home/oz/repos/GPU_Programming/venv/lib/python3.11/site-packages/stable_whisper/whisper_word_lev │
│ el.py:575 in transcribe_stable │
│ │
│ 572 │ │ │ if word_timestamps: │
│ 573 │ │ │ │ if end_timestamp_pos > 0: │
│ 574 │ │ │ │ │ num_samples = min(round(end_timestamp_pos * N_SAMPLES_PER_TOKEN), nu │
│ ❱ 575 │ │ │ │ add_word_timestamps_stable( │
│ 576 │ │ │ │ │ segments=current_segments, │
│ 577 │ │ │ │ │ model=model, │
│ 578 │ │ │ │ │ tokenizer=tokenizer, │
│ │
│ /home/oz/repos/GPU_Programming/venv/lib/python3.11/site-packages/stable_whisper/timing.py:259 in │
│ add_word_timestamps_stable │
│ │
│ 256 │ │ │ │ │ ) │
│ 257 │ │ │ │ ) │
│ 258 │ │
│ ❱ 259 │ align() │
│ 260 │ if ( │
│ 261 │ │ │ gap_padding is not None and │
│ 262 │ │ │ any( │
│ │
│ /home/oz/repos/GPU_Programming/venv/lib/python3.11/site-packages/stable_whisper/timing.py:225 in │
│ align │
│ │
│ 222 │ │ text_tokens, token_split, seg_indices = split_word_tokens(segments, tokenizer, │
│ 223 │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ │ padding=gap_padding, s │
│ 224 │ │ │
│ ❱ 225 │ │ alignment = find_alignment_stable(model, tokenizer, text_tokens, mel, num_sample │
│ 226 │ │ │ │ │ │ │ │ │ │ **kwargs, │
│ 227 │ │ │ │ │ │ │ │ │ │ token_split=token_split, │
│ 228 │ │ │ │ │ │ │ │ │ │ audio_features=audio_features, │
│ │
│ /home/oz/repos/GPU_Programming/venv/lib/python3.11/site-packages/stable_whisper/timing.py:79 in │
│ find_alignment_stable │
│ │
│ 76 │ weights = (weights * qk_scale).softmax(dim=-1) │
│ 77 │ std, mean = torch.std_mean(weights, dim=-2, keepdim=True, unbiased=False) │
│ 78 │ weights = (weights - mean) / std │
│ ❱ 79 │ weights = median_filter(weights, medfilt_width) │
│ 80 │ │
│ 81 │ matrix = weights.mean(axis=0) │
│ 82 │ matrix = matrix[len(tokenizer.sot_sequence): -1] │
│ │
│ /home/oz/repos/GPU_Programming/venv/lib/python3.11/site-packages/whisper/timing.py:38 in │
│ median_filter │
│ │
│ 35 │ x = F.pad(x, (filter_width // 2, filter_width // 2, 0, 0), mode="reflect") │
│ 36 │ if x.is_cuda: │
│ 37 │ │ try: │
│ ❱ 38 │ │ │ from .triton_ops import median_filter_cuda │
│ 39 │ │ │ │
│ 40 │ │ │ result = median_filter_cuda(x, filter_width) │
│ 41 │ │ except (RuntimeError, subprocess.CalledProcessError): │
│ │
│ /home/oz/repos/GPU_Programming/venv/lib/python3.11/site-packages/whisper/triton_ops.py:7 in │
│ <module> │
│ │
│ 4 import torch │
│ 5 │
│ 6 try: │
│ ❱ 7 │ import triton │
│ 8 │ import triton.language as tl │
│ 9 except ImportError: │
│ 10 │ raise RuntimeError("triton import failed; try `pip install --pre triton`") │
│ │
│ /home/oz/repos/GPU_Programming/venv/lib/python3.11/site-packages/triton/__init__.py:20 in │
│ <module> │
│ │
│ 17 │ reinterpret, │
│ 18 │ TensorWrapper, │
│ 19 ) │
│ ❱ 20 from .runtime import ( │
│ 21 │ autotune, │
│ 22 │ Config, │
│ 23 │ heuristics, │
│ │
│ /home/oz/repos/GPU_Programming/venv/lib/python3.11/site-packages/triton/runtime/__init__.py:1 in │
│ <module> │
│ │
│ ❱ 1 from .autotuner import Config, Heuristics, autotune, heuristics │
│ 2 from .jit import JITFunction, KernelInterface, version_key │
│ 3 │
│ 4 __all__ = [ │
│ │
│ /home/oz/repos/GPU_Programming/venv/lib/python3.11/site-packages/triton/runtime/autotuner.py:7 │
│ in <module> │
│ │
│ 4 import time │
│ 5 from typing import Dict │
│ 6 │
│ ❱ 7 from ..compiler import OutOfResources │
│ 8 from ..testing import do_bench │
│ 9 from .jit import KernelInterface │
│ 10 │
│ │
│ /home/oz/repos/GPU_Programming/venv/lib/python3.11/site-packages/triton/compiler.py:22 in │
│ <module> │
│ │
│ 19 from sysconfig import get_paths │
│ 20 from typing import Any, Callable, Dict, Tuple, Union │
│ 21 │
│ ❱ 22 import setuptools │
│ 23 import torch │
│ 24 from filelock import FileLock │
│ 25 │
│ │
│ /home/oz/repos/GPU_Programming/venv/lib/python3.11/site-packages/setuptools/__init__.py:8 in │
│ <module> │
│ │
│ 5 import re │
│ 6 import warnings │
│ 7 │
│ ❱ 8 import _distutils_hack.override # noqa: F401 │
│ 9 │
│ 10 import distutils.core │
│ 11 from distutils.errors import DistutilsOptionError │
│ │
│ /home/oz/repos/GPU_Programming/venv/lib/python3.11/site-packages/_distutils_hack/override.py:1 │
│ in <module> │
│ │
│ ❱ 1 __import__('_distutils_hack').do_override() │
│ 2 │
│ │
│ /home/oz/repos/GPU_Programming/venv/lib/python3.11/site-packages/_distutils_hack/__init__.py:77 │
│ in do_override │
│ │
│ 74 │ """ │
│ 75 │ if enabled(): │
│ 76 │ │ warn_distutils_present() │
│ ❱ 77 │ │ ensure_local_distutils() │
│ 78 │
│ 79 │
│ 80 class _TrivialRe: │
│ │
│ /home/oz/repos/GPU_Programming/venv/lib/python3.11/site-packages/_distutils_hack/__init__.py:64 │
│ in ensure_local_distutils │
│ │
│ 61 │ │
│ 62 │ # check that submodules load as expected │
│ 63 │ core = importlib.import_module('distutils.core') │
│ ❱ 64 │ assert '_distutils' in core.__file__, core.__file__ │
│ 65 │ assert 'setuptools._distutils.log' not in sys.modules │
│ 66 │
│ 67 │
╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
AssertionError: /usr/lib/python3.11/distutils/core.py
System specifications
System Details
- OS (with version, e.g., Windows 10 v2004 or macOS 10.15 (Catalina)):
- RAM:
- Python version (
python/py/python3 --version
): - Installed modules (provide output from
pip list
):
Debian 12 kernel 6.1.0-22-amd64
ram: 64 GB DDR5
Python 3.11.2
Pip:
Package Version
------------------------ -----------
attrs 23.2.0
basedpyright 1.13.3
cattrs 23.2.3
certifi 2024.7.4
charset-normalizer 3.3.2
click 8.1.7
cloup 3.0.5
cmake 3.30.1
decorator 5.1.1
docstring-to-markdown 0.15
evdev 1.7.1
ffmpeg-python 0.2.0
filelock 3.15.4
fsspec 2024.6.1
future 1.0.0
glcontext 2.5.0
gTTS 2.5.1
huggingface-hub 0.24.1
idna 3.7
isosurfaces 0.1.2
jedi 0.19.1
jedi-language-server 0.41.4
Jinja2 3.1.4
lit 18.1.8
llvmlite 0.43.0
lsprotocol 2023.0.1
manim 0.18.1
manim-ml 0.0.24
manim-voiceover 0.3.6.post0
ManimPango 0.5.0
mapbox-earcut 1.0.1
markdown-it-py 3.0.0
MarkupSafe 2.1.5
mdurl 0.1.2
moderngl 5.10.0
moderngl-window 2.4.6
more-itertools 10.3.0
mpmath 1.3.0
multipledispatch 1.0.0
mutagen 1.47.0
networkx 3.3
nodejs-wheel-binaries 20.15.1
numba 0.60.0
numpy 1.26.4
nvidia-cublas-cu11 11.10.3.66
nvidia-cuda-cupti-cu11 11.7.101
nvidia-cuda-nvrtc-cu11 11.7.99
nvidia-cuda-runtime-cu11 11.7.99
nvidia-cudnn-cu11 8.5.0.96
nvidia-cufft-cu11 10.9.0.58
nvidia-curand-cu11 10.2.10.91
nvidia-cusolver-cu11 11.4.0.1
nvidia-cusparse-cu11 11.7.4.91
nvidia-nccl-cu11 2.14.3
nvidia-nvtx-cu11 11.7.91
openai-whisper 20230314
packaging 24.1
pandas 2.2.2
parso 0.8.4
pillow 10.4.0
pip 23.0.1
PyAudio 0.2.14
pycairo 1.26.1
pydub 0.25.1
pyglet 2.0.15
pygls 1.3.1
Pygments 2.18.0
pynput 1.7.7
pyrr 0.10.3
python-dateutil 2.9.0.post0
python-dotenv 0.21.1
python-slugify 8.0.4
python-xlib 0.33
pytz 2024.1
PyYAML 6.0.1
regex 2024.5.15
requests 2.32.3
rich 13.7.1
safetensors 0.4.3
scipy 1.14.0
screeninfo 0.8.1
setuptools 66.1.1
six 1.16.0
skia-pathops 0.8.0.post1
sox 1.5.0
srt 3.5.3
stable-ts 2.11.1
svgelements 1.9.6
sympy 1.13.1
text-unidecode 1.3
tiktoken 0.3.1
tokenizers 0.19.1
torch 2.0.1
torchaudio 2.0.2
tqdm 4.66.4
transformers 4.43.1
triton 2.0.0
typing_extensions 4.12.2
tzdata 2024.1
urllib3 2.2.2
watchdog 4.0.1
wheel 0.43.0
LaTeX details
- LaTeX distribution (e.g. TeX Live 2020):
- Installed LaTeX packages:
FFMPEG
Output of ffmpeg -version
:
PASTE HERE