soxan
soxan copied to clipboard
Error during Training on private dataset
Morning, I used your notebook Speech Emotion Recognition (Wav2Vec 2.0) with another dataset and I got an error during the training... Could you help me please, the code and error are just below .
from transformers import TrainingArguments
training_args = TrainingArguments(
output_dir=finetune_output_dir,
per_device_train_batch_size=4,
per_device_eval_batch_size=4,
evaluation_strategy="steps", #"epoch"
gradient_accumulation_steps=1,
num_train_epochs=50,
fp16=True,
save_steps= 10, #n_steps,
eval_steps= 10, #n_steps,
logging_steps=10,
learning_rate=1e-4,
save_total_limit=10,
)
trainer = CTCTrainer(
model=model,
data_collator=data_collator,
args=training_args,
compute_metrics=compute_metrics,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
tokenizer=processor.feature_extractor,
)
trainer.train()
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-32-3435b262f1ae> in <module>
----> 1 trainer.train()
/anaconda/envs/azureml_py38_pytorch/lib/python3.8/site-packages/transformers/trainer.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1330 tr_loss_step = self.training_step(model, inputs)
1331 else:
-> 1332 tr_loss_step = self.training_step(model, inputs)
1333
1334 if (
<ipython-input-29-878b4353167f> in training_step(self, model, inputs)
43 if self.use_amp:
44 with autocast():
---> 45 loss = self.compute_loss(model, inputs)
46 else:
47 loss = self.compute_loss(model, inputs)
/anaconda/envs/azureml_py38_pytorch/lib/python3.8/site-packages/transformers/trainer.py in compute_loss(self, model, inputs, return_outputs)
1921 else:
1922 labels = None
-> 1923 outputs = model(**inputs)
1924 # Save past state if it exists
1925 # TODO: this needs to be fixed and made cleaner later.
/anaconda/envs/azureml_py38_pytorch/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
<ipython-input-16-dd9fe3ea0f13> in forward(self, input_values, attention_mask, output_attentions, output_hidden_states, return_dict, labels)
70 ):
71 return_dict = return_dict if return_dict is not None else self.config.use_return_dict
---> 72 outputs = self.wav2vec2(
73 input_values,
74 attention_mask=attention_mask,
/anaconda/envs/azureml_py38_pytorch/lib/python3.8/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
/anaconda/envs/azureml_py38_pytorch/lib/python3.8/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py in forward(self, input_values, attention_mask, mask_time_indices, output_attentions, output_hidden_states, return_dict)
1285
1286 hidden_states, extract_features = self.feature_projection(extract_features)
-> 1287 hidden_states = self._mask_hidden_states(
1288 hidden_states, mask_time_indices=mask_time_indices, attention_mask=attention_mask
1289 )
/anaconda/envs/azureml_py38_pytorch/lib/python3.8/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py in _mask_hidden_states(self, hidden_states, mask_time_indices, attention_mask)
1228 hidden_states[mask_time_indices] = self.masked_spec_embed.to(hidden_states.dtype)
1229 elif self.config.mask_time_prob > 0 and self.training:
-> 1230 mask_time_indices = _compute_mask_indices(
1231 (batch_size, sequence_length),
1232 mask_prob=self.config.mask_time_prob,
/anaconda/envs/azureml_py38_pytorch/lib/python3.8/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py in _compute_mask_indices(shape, mask_prob, mask_length, attention_mask, min_masks)
240
241 # get random indices to mask
--> 242 spec_aug_mask_idx = np.random.choice(
243 np.arange(input_length - (mask_length - 1)), num_masked_span, replace=False
244 )
mtrand.pyx in numpy.random.mtrand.RandomState.choice()
ValueError: Cannot take a larger sample than population when 'replace=False'