fairseq
fairseq copied to clipboard
wav2vec_seq2seq prev_output_tokens error in finetuning
❓ Questions and Help
What is your question?
Hi i'm trying finetuning wav2vec2 and using wav2vec_seq2seq transformer decoder but because one line of code section , train is stopped
why prev_output_tokens type is list..? and i print prev_output_tokens list and then like a [tensor([a1,a2,a3, ... ax, ay, az]), tensor([b1,b2,b3, ... ,bx,by,bz]), .. ,tensor([c1,c2,c3, ... ,cx,cy,cz]) this value should've tensor type but list
this is last error message in {temp_path}/fairseq/models/wav2vec/wav2vec2_asr.py :630 line Roughly AttributeError: 'list' object has no attribute 'long'
and here are train configs if criterion changes to some thing then this error is will resolved?
{ "_name": null, "common": { "_name": null, "no_progress_bar": false, "log_interval": 100, "log_format": "json", "log_file": null, "aim_repo": null, "aim_run_hash": null, "tensorboard_logdir": null, "wandb_project": null, "azureml_logging": false, "seed": 1, "cpu": false, "tpu": false, "bf16": false, "memory_efficient_bf16": false, "fp16": false, "memory_efficient_fp16": false, "fp16_no_flatten_grads": false, "fp16_init_scale": 128, "fp16_scale_window": null, "fp16_scale_tolerance": 0.0, "on_cpu_convert_precision": false, "min_loss_scale": 0.0001, "threshold_loss_scale": null, "amp": false, "amp_batch_retries": 2, "amp_init_scale": 128, "amp_scale_window": null, "user_dir": null, "empty_cache_freq": 0, "all_gather_list_size": 16384, "model_parallel_size": 1, "quantization_config_path": null, "profile": false, "reset_logging": false, "suppress_crashes": false, "use_plasma_view": false, "plasma_path": "/tmp/plasma" }, "common_eval": { "_name": null, "path": null, "post_process": null, "quiet": false, "model_overrides": "{}", "results_path": null }, "distributed_training": { "_name": null, "distributed_world_size": 1, "distributed_num_procs": 1, "distributed_rank": 0, "distributed_backend": "nccl", "distributed_init_method": "env://", "distributed_port": 12345, "device_id": 0, "distributed_no_spawn": false, "ddp_backend": "legacy_ddp", "ddp_comm_hook": "none", "bucket_cap_mb": 25, "fix_batches_to_gpus": false, "find_unused_parameters": false, "gradient_as_bucket_view": false, "fast_stat_sync": false, "heartbeat_timeout": -1, "broadcast_buffers": false, "slowmo_momentum": null, "slowmo_base_algorithm": "localsgd", "localsgd_frequency": 3, "nprocs_per_node": 1, "pipeline_model_parallel": false, "pipeline_balance": null, "pipeline_devices": null, "pipeline_chunks": 0, "pipeline_encoder_balance": null, "pipeline_encoder_devices": null, "pipeline_decoder_balance": null, "pipeline_decoder_devices": null, "pipeline_checkpoint": "never", "zero_sharding": "none", "fp16": false, "memory_efficient_fp16": false, "tpu": false, "no_reshard_after_forward": false, "fp32_reduce_scatter": false, "cpu_offload": false, "use_sharded_state": false, "not_fsdp_flatten_parameters": false }, "dataset": { "_name": null, "num_workers": 1, "skip_invalid_size_inputs_valid_test": true, "max_tokens": 4800000, "batch_size": null, "required_batch_size_multiple": 8, "required_seq_len_multiple": 1, "dataset_impl": null, "data_buffer_size": 10, "train_subset": "train", "valid_subset": "valid", "combine_valid_subsets": null, "ignore_unused_valid_subsets": false, "validate_interval": 1, "validate_interval_updates": 0, "validate_after_updates": 0, "fixed_validation_seed": null, "disable_validation": false, "max_tokens_valid": 4800000, "batch_size_valid": null, "max_valid_steps": null, "curriculum": 0, "gen_subset": "test", "num_shards": 1, "shard_id": 0, "grouped_shuffling": false, "update_epoch_batch_itr": false, "update_ordered_indices_seed": false }, "optimization": { "_name": null, "max_epoch": 0, "max_update": 3200000, "stop_time_hours": 0.0, "clip_norm": 0.0, "sentence_avg": true, "update_freq": [ 1 ], "lr": [ 0.0005 ], "stop_min_lr": -1.0, "use_bmuf": false, "skip_remainder_batch": false }, "checkpoint": { "_name": null, "save_dir": "checkpoints", "restore_file": "checkpoint_last.pt", "continue_once": null, "finetune_from_model": null, "reset_dataloader": false, "reset_lr_scheduler": false, "reset_meters": false, "reset_optimizer": true, "optimizer_overrides": "{}", "save_interval": 1, "save_interval_updates": 0, "keep_interval_updates": -1, "keep_interval_updates_pattern": -1, "keep_last_epochs": -1, "keep_best_checkpoints": -1, "no_save": false, "no_epoch_checkpoints": false, "no_last_checkpoints": false, "no_save_optimizer_state": false, "best_checkpoint_metric": "wer", "maximize_best_checkpoint_metric": false, "patience": -1, "checkpoint_suffix": "", "checkpoint_shard_count": 1, "load_checkpoint_on_all_dp_ranks": false, "write_checkpoints_asynchronously": false, "model_parallel_size": 1 }, "bmuf": { "_name": null, "block_lr": 1.0, "block_momentum": 0.875, "global_sync_iter": 50, "warmup_iterations": 500, "use_nbm": false, "average_sync": false, "distributed_world_size": 1 }, "generation": { "_name": null, "beam": 5, "nbest": 1, "max_len_a": 0.0, "max_len_b": 200, "min_len": 1, "match_source_len": false, "unnormalized": false, "no_early_stop": false, "no_beamable_mm": false, "lenpen": 1.0, "unkpen": 0.0, "replace_unk": null, "sacrebleu": false, "score_reference": false, "prefix_size": 0, "no_repeat_ngram_size": 0, "sampling": false, "sampling_topk": -1, "sampling_topp": -1.0, "constraints": null, "temperature": 1.0, "diverse_beam_groups": -1, "diverse_beam_strength": 0.5, "diversity_rate": -1.0, "print_alignment": null, "print_step": false, "lm_path": null, "lm_weight": 0.0, "iter_decode_eos_penalty": 0.0, "iter_decode_max_iter": 10, "iter_decode_force_max_iter": false, "iter_decode_with_beam": 1, "iter_decode_with_external_reranker": false, "retain_iter_history": false, "retain_dropout": false, "retain_dropout_modules": null, "decoding_format": null, "no_seed_provided": false, "eos_token": null }, "eval_lm": { "_name": null, "output_word_probs": false, "output_word_stats": false, "context_window": 0, "softmax_batch": 9223372036854775807 }, "interactive": { "_name": null, "buffer_size": 0, "input": "-" }, "model": { "_name": "wav2vec_seq2seq", "w2v_path": "{some_file}", "no_pretrained_weights": false, "dropout_input": 0.0, "final_dropout": 0.0, "dropout": 0.0, "attention_dropout": 0.0, "activation_dropout": 0.1, "conv_feature_layers": "[(512, 10, 5)] + [(512, 3, 2)] * 4 + [(512,2,2)] + [(512,2,2)]", "encoder_embed_dim": 768, "apply_mask": true, "mask_length": 10, "mask_prob": 0.5, "mask_selection": "static", "mask_other": 0.0, "no_mask_overlap": false, "mask_min_space": 1, "require_same_masks": true, "mask_dropout": 0.0, "mask_channel_length": 64, "mask_channel_prob": 0.1, "mask_channel_selection": "static", "mask_channel_other": 0.0, "no_mask_channel_overlap": false, "freeze_finetune_updates": 0, "feature_grad_mult": 0.0, "layerdrop": 0.1, "mask_channel_min_space": 1, "mask_channel_before": false, "normalize": false, "data": "{some_data}", "w2v_args": null, "offload_activations": false, "min_params_to_wrap": 100000000, "checkpoint_activations": false, "ddp_backend": "lagecy_ddp", "decoder_embed_dim": 768, "decoder_ffn_embed_dim": 3072, "decoder_layers": 6, "decoder_layerdrop": 0.0, "decoder_attention_heads": 4, "decoder_learned_pos": false, "decoder_normalize_before": false, "no_token_positional_embeddings": false, "decoder_dropout": 0.0, "decoder_attention_dropout": 0.0, "decoder_activation_dropout": 0.0, "max_target_positions": 2048, "share_decoder_input_output_embed": false, "autoregressive": true }, "task": { "_name": "audio_finetuning", "data": "{some_data_path}", "labels": "ltr", "binarized_dataset": false, "sample_rate": 8000, "normalize": false, "enable_padding": false, "max_sample_size": null, "min_sample_size": null, "num_batch_buckets": 0, "precompute_mask_indices": false, "inferred_w2v_config": null, "tpu": false, "text_compression_level": "none", "eval_wer": false, "eval_wer_config": { "_name": null, "beam": 5, "nbest": 1, "max_len_a": 0.0, "max_len_b": 200, "min_len": 1, "match_source_len": false, "unnormalized": false, "no_early_stop": false, "no_beamable_mm": false, "lenpen": 1.0, "unkpen": 0.0, "replace_unk": null, "sacrebleu": false, "score_reference": false, "prefix_size": 0, "no_repeat_ngram_size": 0, "sampling": false, "sampling_topk": -1, "sampling_topp": -1.0, "constraints": null, "temperature": 1.0, "diverse_beam_groups": -1, "diverse_beam_strength": 0.5, "diversity_rate": -1.0, "print_alignment": null, "print_step": false, "lm_path": null, "lm_weight": 0.0, "iter_decode_eos_penalty": 0.0, "iter_decode_max_iter": 10, "iter_decode_force_max_iter": false, "iter_decode_with_beam": 1, "iter_decode_with_external_reranker": false, "retain_iter_history": false, "retain_dropout": false, "retain_dropout_modules": null, "decoding_format": null, "no_seed_provided": false, "eos_token": null }, "eval_wer_tokenizer": null, "eval_wer_post_process": "letter", "eval_bleu": false, "eval_bleu_detok": null, "eval_bleu_detok_args": "{}", "eval_tokenized_bleu": false, "eval_bleu_remove_bpe": null, "eval_bleu_args": "{}", "eval_bleu_print_samples": false, "autoregressive": true }, "criterion": { "_name": "ctc", "zero_infinity": true, "sentence_avg": true, "post_process": "letter", "wer_kenlm_model": null, "wer_lexicon": null, "wer_lm_weight": 2.0, "wer_word_score": -1.0, "wer_args": null }, "optimizer": { "_name": "adam", "adam_betas": "(0.9,0.98)", "adam_eps": 1e-08, "weight_decay": 0.0, "use_old_adam": false, "fp16_adam_stats": false, "tpu": false, "lr": [ 0.0005 ] }, "lr_scheduler": { "_name": "tri_stage", "warmup_steps": 0, "hold_steps": 0, "decay_steps": 0, "phase_ratio": [ 0.1, 0.4, 0.5 ], "init_lr_scale": 0.01, "final_lr_scale": 0.05, "max_update": 3200000.0, "lr": [ 0.0005 ] }, "scoring": null, "bpe": null, "tokenizer": null, "ema": { "_name": null, "store_ema": false, "ema_decay": 0.9999, "ema_start_update": 0, "ema_seed_model": null, "ema_update_freq": 1, "ema_fp32": false }, "job_logging_cfg": { "version": 1, "formatters": { "simple": { "format": "[%(asctime)s][%(name)s][%(levelname)s] - %(message)s" } }, "handlers": { "console": { "class": "logging.StreamHandler", "formatter": "simple", "stream": "ext://sys.stdout" }, "file": { "class": "logging.FileHandler", "formatter": "simple", "filename": "hydra_train.log" } }, "root": { "level": "INFO", "handlers": [ "console", "file" ] }, "disable_existing_loggers": false } }
Code
prev_output_tokens = prev_output_tokens.long()
What's your environment?
- fairseq Version (e.g., 1.0 or main): 0.12.2
- PyTorch Version (e.g., 1.0)1.12.1+cu113
- OS (e.g., Linux): ubuntu 20.04
- How you installed fairseq (
pip
, source): pip - Build command you used (if compiling from source):
- Python version: 3.8
- CUDA/cuDNN version: 11.3 /
- GPU models and configuration: a6000
- Any other relevant information: