parler-tts
parler-tts copied to clipboard
error regarding some tokenizer issue
When I run the sample script I keep getting this error message among others...not sure how dire it is or whether it even impacts performance...
You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers
I got the same issue wit this error
ValueError Traceback (most recent call last)
Cell In[9], line 9
6 device = "cuda:0" if torch.cuda.is_available() else "cpu"
8 model = ParlerTTSForConditionalGeneration.from_pretrained("parler-tts/parler_tts_mini_v0.1").to(device)
----> 9 tokenizer = AutoTokenizer.from_pretrained("parler-tts/parler_tts_mini_v0.1")
11 prompt = "Hey, how are you doing today?"
12 description = "A female speaker with a slightly low-pitched voice delivers her words quite expressively, in a very confined sounding environment with clear audio quality. She speaks very fast."
File ~/.virtualenvs/voice-cloning-tts/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py:837, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
833 if tokenizer_class is None:
834 raise ValueError(
835 f"Tokenizer class {tokenizer_class_candidate} does not exist or is not currently imported."
836 )
--> 837 return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
839 # Otherwise we have to be creative.
840 # if model is an encoder decoder, the encoder tokenizer class is used by default
841 if isinstance(config, EncoderDecoderConfig):
File ~/.virtualenvs/voice-cloning-tts/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2086, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, trust_remote_code, *init_inputs, **kwargs)
2083 else:
2084 logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 2086 return cls._from_pretrained(
2087 resolved_vocab_files,
2088 pretrained_model_name_or_path,
2089 init_configuration,
2090 *init_inputs,
2091 token=token,
2092 cache_dir=cache_dir,
2093 local_files_only=local_files_only,
2094 _commit_hash=commit_hash,
2095 _is_local=is_local,
2096 trust_remote_code=trust_remote_code,
2097 **kwargs,
2098 )
File ~/.virtualenvs/voice-cloning-tts/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2325, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, token, cache_dir, local_files_only, _commit_hash, _is_local, trust_remote_code, *init_inputs, **kwargs)
2323 # Instantiate the tokenizer.
2324 try:
-> 2325 tokenizer = cls(*init_inputs, **init_kwargs)
2326 except OSError:
2327 raise OSError(
2328 "Unable to load vocabulary from file. "
2329 "Please check that the provided vocabulary is accessible and not corrupted."
2330 )
File ~/.virtualenvs/voice-cloning-tts/lib/python3.10/site-packages/transformers/models/t5/tokenization_t5_fast.py:146, in T5TokenizerFast.__init__(self, vocab_file, tokenizer_file, eos_token, unk_token, pad_token, extra_ids, additional_special_tokens, add_prefix_space, **kwargs)
141 logger.warning_once(
142 "You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers"
143 )
144 kwargs["from_slow"] = True
--> 146 super().__init__(
147 vocab_file,
148 tokenizer_file=tokenizer_file,
149 eos_token=eos_token,
150 unk_token=unk_token,
151 pad_token=pad_token,
152 extra_ids=extra_ids,
153 additional_special_tokens=additional_special_tokens,
154 **kwargs,
155 )
157 self.vocab_file = vocab_file
158 self._extra_ids = extra_ids
File ~/.virtualenvs/voice-cloning-tts/lib/python3.10/site-packages/transformers/tokenization_utils_fast.py:102, in PreTrainedTokenizerFast.__init__(self, *args, **kwargs)
99 added_tokens_decoder = kwargs.pop("added_tokens_decoder", {})
101 if from_slow and slow_tokenizer is None and self.slow_tokenizer_class is None:
--> 102 raise ValueError(
103 "Cannot instantiate this tokenizer from a slow version. If it's based on sentencepiece, make sure you "
104 "have sentencepiece installed."
105 )
107 if tokenizer_object is not None:
108 fast_tokenizer = copy.deepcopy(tokenizer_object)
ValueError: Cannot instantiate this tokenizer from a slow version. If it's based on sentencepiece, make sure you have sentencepiece installed.
Selection deleted
hey there, thanks for opening this issue, could send your transformers
, tokenizers
and sentencepiece
versions ? thanks!