bark Got `IndexError: index out of range in self` when turning on `SUNO_ENABLE

File ~/Code/Miniforge3/lib/python3.9/site-packages/bark/api.py:66, in semantic_to_waveform(semantic_tokens, history_prompt, temp, silent, output_full)
     54 coarse_tokens = generate_coarse(
     55     semantic_tokens,
     56     history_prompt=history_prompt,
   (...)
     59     use_kv_caching=True
     60 )
     61 fine_tokens = generate_fine(
     62     coarse_tokens,
     63     history_prompt=history_prompt,
     64     temp=0.5,
     65 )
---> 66 audio_arr = codec_decode(fine_tokens)
     67 if output_full:
     68     full_generation = {
     69         "semantic_prompt": semantic_tokens,
     70         "coarse_prompt": coarse_tokens,
     71         "fine_prompt": fine_tokens,
     72     }

File ~/Code/Miniforge3/lib/python3.9/site-packages/bark/generation.py:860, in codec_decode(fine_tokens)
    858 arr = arr.to(device)
    859 arr = arr.transpose(0, 1)
--> 860 emb = model.quantizer.decode(arr)
    861 out = model.decoder(emb)
    862 audio_arr = out.detach().cpu().numpy().squeeze()

File ~/Code/Miniforge3/lib/python3.9/site-packages/encodec/quantization/vq.py:112, in ResidualVectorQuantizer.decode(self, codes)
    109 def decode(self, codes: torch.Tensor) -> torch.Tensor:
    110     """Decode the given codes to the quantized representation.
    111     """
--> 112     quantized = self.vq.decode(codes)
    113     return quantized

File ~/Code/Miniforge3/lib/python3.9/site-packages/encodec/quantization/core_vq.py:361, in ResidualVectorQuantization.decode(self, q_indices)
    359 for i, indices in enumerate(q_indices):
    360     layer = self.layers[i]
--> 361     quantized = layer.decode(indices)
    362     quantized_out = quantized_out + quantized
    363 return quantized_out

File ~/Code/Miniforge3/lib/python3.9/site-packages/encodec/quantization/core_vq.py:288, in VectorQuantization.decode(self, embed_ind)
    287 def decode(self, embed_ind):
--> 288     quantize = self._codebook.decode(embed_ind)
    289     quantize = self.project_out(quantize)
    290     quantize = rearrange(quantize, "b n d -> b d n")

File ~/Code/Miniforge3/lib/python3.9/site-packages/encodec/quantization/core_vq.py:202, in EuclideanCodebook.decode(self, embed_ind)
    201 def decode(self, embed_ind):
--> 202     quantize = self.dequantize(embed_ind)
    203     return quantize

File ~/Code/Miniforge3/lib/python3.9/site-packages/encodec/quantization/core_vq.py:188, in EuclideanCodebook.dequantize(self, embed_ind)
    187 def dequantize(self, embed_ind):
--> 188     quantize = F.embedding(embed_ind, self.embed)
    189     return quantize

File ~/Code/Miniforge3/lib/python3.9/site-packages/torch/nn/functional.py:2210, in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
   2204     # Note [embedding_renorm set_grad_enabled]
   2205     # XXX: equivalent to
   2206     # with torch.no_grad():
   2207     #   torch.embedding_renorm_
   2208     # remove once script supports set_grad_enabled
   2209     _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 2210 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)

IndexError: index out of range in self

My device: M1 Pro MBP torch.__version__: 2.0.0

Originally posted by @hywhuangyuwei in https://github.com/suno-ai/bark/issues/22#issuecomment-1523963160

Apr 26 '23 19:04 hywhuangyuwei

try torch nightly

Apr 26 '23 22:04 gkucsko

Having the same problem with IndexError: index out of range in self (m1 pro). Running with torch nightly gives me:

---------------------------------------------------------------------------
OSError                                   Traceback (most recent call last)
Cell In[2], line 1
----> 1 from bark import SAMPLE_RATE, generate_audio, preload_models
      2 from IPython.display import Audio
      4 # download and load all models

File ~/Documents/Code/bark-ai/bark/bark/__init__.py:1
----> 1 from .api import generate_audio, text_to_semantic, semantic_to_waveform, save_as_prompt
      2 from .generation import SAMPLE_RATE, preload_models

File ~/Documents/Code/bark-ai/bark/bark/api.py:5
      1 from typing import Optional
      3 import numpy as np
----> 5 from .generation import codec_decode, generate_coarse, generate_fine, generate_text_semantic
      8 def text_to_semantic(
      9     text: str,
     10     history_prompt: Optional[str] = None,
     11     temp: float = 0.7,
     12     silent: bool = False,
     13 ):
     14     """Generate semantic array from text.
     15 
     16     Args:
   (...)
     23         numpy semantic array to be fed into `semantic_to_waveform`
     24     """

File ~/Documents/Code/bark-ai/bark/bark/generation.py:8
      5 import re
      6 import requests
----> 8 from encodec import EncodecModel
      9 import funcy
     10 import logging

File ~/Documents/Code/bark-ai/bark/venv/lib/python3.9/site-packages/encodec/__init__.py:12
      8 """EnCodec neural audio codec."""
     10 __version__ = "0.1.1"
---> 12 from .model import EncodecModel
     13 from .compress import compress, decompress

File ~/Documents/Code/bark-ai/bark/venv/lib/python3.9/site-packages/encodec/model.py:19
     17 from . import quantization as qt
     18 from . import modules as m
---> 19 from .utils import _check_checksum, _linear_overlap_add, _get_checkpoint_url
     22 ROOT_URL = 'https://dl.fbaipublicfiles.com/encodec/v0/'
     24 EncodedFrame = tp.Tuple[torch.Tensor, tp.Optional[torch.Tensor]]

File ~/Documents/Code/bark-ai/bark/venv/lib/python3.9/site-packages/encodec/utils.py:14
     11 import typing as tp
     13 import torch
---> 14 import torchaudio
     17 def _linear_overlap_add(frames: tp.List[torch.Tensor], stride: int):
     18     # Generic overlap add, with linear fade-in/fade-out, supporting complex scenario
     19     # e.g., more than 2 frames per position.
   (...)
     34     #   - if more than 2 frames overlap at a given point, we hope that by induction
     35     #      something sensible happens.
     36     assert len(frames)

File ~/Documents/Code/bark-ai/bark/venv/lib/python3.9/site-packages/torchaudio/__init__.py:1
----> 1 from torchaudio import (  # noqa: F401
      2     _extension,
      3     compliance,
      4     datasets,
      5     functional,
      6     io,
      7     kaldi_io,
      8     models,
      9     pipelines,
     10     sox_effects,
     11     transforms,
     12     utils,
     13 )
     15 from torchaudio.backend import get_audio_backend, list_audio_backends, set_audio_backend
     17 try:

File ~/Documents/Code/bark-ai/bark/venv/lib/python3.9/site-packages/torchaudio/_extension/__init__.py:43
     41 _IS_KALDI_AVAILABLE = False
     42 if _IS_TORCHAUDIO_EXT_AVAILABLE:
---> 43     _load_lib("libtorchaudio")
     45     import torchaudio.lib._torchaudio  # noqa
     47     _check_cuda_version()

File ~/Documents/Code/bark-ai/bark/venv/lib/python3.9/site-packages/torchaudio/_extension/utils.py:61, in _load_lib(lib)
     59 if not path.exists():
     60     return False
---> 61 torch.ops.load_library(path)
     62 torch.classes.load_library(path)
     63 return True

File ~/Documents/Code/bark-ai/bark/venv/lib/python3.9/site-packages/torch/_ops.py:787, in _Ops.load_library(self, path)
    782 path = _utils_internal.resolve_library_path(path)
    783 with dl_open_guard():
    784     # Import the shared library into the process, thus running its
    785     # static (global) initialization code in order to register custom
    786     # operators with the JIT.
--> 787     ctypes.CDLL(path)
    788 self.loaded_libraries.add(path)

File /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/ctypes/__init__.py:366, in CDLL.__init__(self, name, mode, handle, use_errno, use_last_error, winmode)
    363 self._FuncPtr = _FuncPtr
    365 if handle is None:
--> 366     self._handle = _dlopen(self._name, mode)
    367 else:
    368     self._handle = handle

OSError: dlopen(/Users/ph/Documents/Code/bark-ai/bark/venv/lib/python3.9/site-packages/torchaudio/lib/libtorchaudio.so, 0x0006): Symbol not found: __ZN2at4_ops9fft_irfft4callERKNS_6TensorEN3c108optionalIxEExNS6_INS5_17basic_string_viewIcEEEE
  Referenced from: <BBBCC85A-CF40-37F8-B811-463E29724353> /Users/ph/Documents/Code/bark-ai/bark/venv/lib/python3.9/site-packages/torchaudio/lib/libtorchaudio.so
  Expected in:     <6BE2250B-52C1-3362-ABF0-734C19B4356D> /Users/ph/Documents/Code/bark-ai/bark/venv/lib/python3.9/site-packages/torch/lib/libtorch_cpu.dylib

Apr 28 '23 10:04 pstuerner

are you sure you did pip uninstall torch torchaudio torchvision before? looks to be a torch issue without anything to do with bark..

Apr 28 '23 20:04 gkucsko

Hi! Same problem here. Using SUNO_ENABLE_MPS = True and torch 2.0.0 results in the following error:

Traceback (most recent call last):
  File "test.py", line 16, in <module>
    audio_array = generate_audio(text_prompt)
  File "/Users/user/workspace/ai/py/bark/bark/api.py", line 113, in generate_audio
    out = semantic_to_waveform(
  File "/Users/user/workspace/ai/py/bark/bark/api.py", line 66, in semantic_to_waveform
    audio_arr = codec_decode(fine_tokens)
  File "/Users/user/workspace/ai/py/bark/bark/generation.py", line 824, in codec_decode
    emb = model.quantizer.decode(arr)
  File "/opt/homebrew/anaconda3/envs/torch-mps/lib/python3.8/site-packages/encodec/quantization/vq.py", line 112, in decode
    quantized = self.vq.decode(codes)
  File "/opt/homebrew/anaconda3/envs/torch-mps/lib/python3.8/site-packages/encodec/quantization/core_vq.py", line 361, in decode
    quantized = layer.decode(indices)
  File "/opt/homebrew/anaconda3/envs/torch-mps/lib/python3.8/site-packages/encodec/quantization/core_vq.py", line 288, in decode
    quantize = self._codebook.decode(embed_ind)
  File "/opt/homebrew/anaconda3/envs/torch-mps/lib/python3.8/site-packages/encodec/quantization/core_vq.py", line 202, in decode
    quantize = self.dequantize(embed_ind)
  File "/opt/homebrew/anaconda3/envs/torch-mps/lib/python3.8/site-packages/encodec/quantization/core_vq.py", line 188, in dequantize
    quantize = F.embedding(embed_ind, self.embed)
  File "/opt/homebrew/anaconda3/envs/torch-mps/lib/python3.8/site-packages/torch/nn/functional.py", line 2210, in embedding
    return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
IndexError: index out of range in self

And using torch nightly:

Traceback (most recent call last):
  File "test.py", line 4, in <module>
    from bark import SAMPLE_RATE, generate_audio, preload_models
  File "/Users/user/workspace/ai/py/bark/bark/__init__.py", line 1, in <module>
    from .api import generate_audio, text_to_semantic, semantic_to_waveform, save_as_prompt
  File "/Users/user/workspace/ai/py/bark/bark/api.py", line 5, in <module>
    from .generation import codec_decode, generate_coarse, generate_fine, generate_text_semantic
  File "/Users/user/workspace/ai/py/bark/bark/generation.py", line 6, in <module>
    from encodec import EncodecModel
  File "/opt/homebrew/anaconda3/envs/torch-nightly/lib/python3.8/site-packages/encodec/__init__.py", line 12, in <module>
    from .model import EncodecModel
  File "/opt/homebrew/anaconda3/envs/torch-nightly/lib/python3.8/site-packages/encodec/model.py", line 19, in <module>
    from .utils import _check_checksum, _linear_overlap_add, _get_checkpoint_url
  File "/opt/homebrew/anaconda3/envs/torch-nightly/lib/python3.8/site-packages/encodec/utils.py", line 14, in <module>
    import torchaudio
  File "/opt/homebrew/anaconda3/envs/torch-nightly/lib/python3.8/site-packages/torchaudio/__init__.py", line 1, in <module>
    from torchaudio import (  # noqa: F401
  File "/opt/homebrew/anaconda3/envs/torch-nightly/lib/python3.8/site-packages/torchaudio/_extension/__init__.py", line 43, in <module>
    _load_lib("libtorchaudio")
  File "/opt/homebrew/anaconda3/envs/torch-nightly/lib/python3.8/site-packages/torchaudio/_extension/utils.py", line 61, in _load_lib
    torch.ops.load_library(path)
  File "/opt/homebrew/anaconda3/envs/torch-nightly/lib/python3.8/site-packages/torch/_ops.py", line 787, in load_library
    ctypes.CDLL(path)
  File "/opt/homebrew/anaconda3/envs/torch-nightly/lib/python3.8/ctypes/__init__.py", line 373, in __init__
    self._handle = _dlopen(self._name, mode)
OSError: dlopen(/opt/homebrew/anaconda3/envs/torch-nightly/lib/python3.8/site-packages/torchaudio/lib/libtorchaudio.so, 0x0006): Symbol not found: __ZN2at4_ops9fft_irfft4callERKNS_6TensorEN3c108optionalIxEExNS6_INS5_17basic_string_viewIcEEEE
  Referenced from: <6D80FE4D-3DC4-3CA2-85A9-C02FE346BA28> /opt/homebrew/anaconda3/envs/torch-nightly/lib/python3.8/site-packages/torchaudio/lib/libtorchaudio.so
  Expected in:     <F328CA24-D854-3CF5-9375-EF3FE2A68B8C> /opt/homebrew/anaconda3/envs/torch-nightly/lib/python3.8/site-packages/torch/lib/libtorch_cpu.dylib

May 02 '23 00:05 fluxa

looks like a torch audio issue. when you install nightly make sure to first uninstall torch, torchvision AND torchaudio and then reinstall all 3 with nightly. I'll try to find some time in the next few days to trouble shoot this. Was running for me fine yesterday, but probably slightly different setups

May 02 '23 00:05 gkucsko

Hi @gkucsko, I did pip uninstall torch, torchvision and torchaudio before installing the nightly build. I also rm -rf the entire virtual environment, created a new one and only installed the nightly build to make sure that everything is properly setup - still the same error.

May 02 '23 09:05 pstuerner

Just tried again with the latest master branch and for some reason it worked. Git clone the repo, pip install . the package, pip uninstall torch torchvision torchaudio and pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu

May 02 '23 12:05 pstuerner

great! gonna close for now, feel free to reopen if needed

May 02 '23 13:05 gkucsko

bark
bark copied to clipboard

Got `IndexError: index out of range in self` when turning on `SUNO_ENABLE_MPS`

bark bark copied to clipboard

Got `IndexError: index out of range in self` when turning on `SUNO_ENABLE_MPS`

bark
bark copied to clipboard