vall-e icon indicating copy to clipboard operation
vall-e copied to clipboard

Error "No valid path is found for training"

Open Looong01 opened this issue 2 years ago • 16 comments

(vall-e) loong@Loong-Surface:~/Codes/vall-e$ python -m vall_e.train yaml=config/test/ar.yml
1it [00:00, 3407.23it/s]
Traceback (most recent call last):
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/vall_e/train.py", line 128, in <module>
    main()
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/vall_e/train.py", line 33, in main
    train_dl, subtrain_dl, val_dl = create_train_val_dataloader()
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/vall_e/data.py", line 266, in create_train_val_dataloader
    train_dataset, val_dataset = create_datasets()
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/vall_e/data.py", line 247, in create_datasets
    train_dataset = VALLEDatset(
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/vall_e/data.py", line 105, in __init__
    raise ValueError("No valid path is found for training.")
ValueError: No valid path is found for training.

image image image image

Env: Python 3.10 WSL2 (Ubuntu 22.04) on Win 11 Pro

Looong01 avatar Jan 31 '23 08:01 Looong01

Did you change test.normalized.txt? if you change test file you need to check Config.py for min_phones: int = 10 max_phones: int = 50

check this data.py

class VALLEDatset(Dataset): def init( self, paths, phone_symmap=None, spkr_symmap=None, min_phones=cfg.min_phones, max_phones=cfg.max_phones, training=False, extra_paths_by_spkr_name: dict[str, list] = {}, ): super().init() self._head = None self.min_phones = min_phones self.max_phones = max_phones print(self.min_phones) print(self.max_phones) for path in paths: print(_validate(path, self.min_phones, self.max_phones)) self.paths = [ path for path in paths if _validate(path, self.min_phones, self.max_phones) ] print(self.paths) self.spkr_symmap = spkr_symmap or self._get_spkr_symmap() self.phone_symmap = phone_symmap or self._get_phone_symmap() self.training = training print(self.training)

    self.paths_by_spkr_name = self._get_paths_by_spkr_name(extra_paths_by_spkr_name)

    self.paths = [
        p for p in self.paths if len(self.paths_by_spkr_name[cfg.get_spkr(p)]) > 1
    ]
    print(self.paths)
    if len(self.paths) == 0 and training:
        raise ValueError("No valid path is found for training.")

    if training:
        self.sampler = Sampler(self.paths, [cfg.get_spkr])
    else:
        self.sampler = None

Veria70 avatar Jan 31 '23 19:01 Veria70

@cache def _validate(path, min_phones, max_phones): phones = get_phones(path) unique_phones = list(set(phones)) print(len(unique_phones)) print(min_phones) print(max_phones) if len(unique_phones) == 0: return False if len(unique_phones) == 1 and unique_phones[0] == "": return False if len(phones) < min_phones: return False if len(phones) > max_phones: return False return True

Veria70 avatar Jan 31 '23 19:01 Veria70

The test files are my own. I have found where the problem is, but I don't know how to fix it. I have changed max_phones from 50 to 50000 and added these "print()" into codes like this:

class VALLEDatset(Dataset):
    def __init__(
        self,
        paths,
        phone_symmap=None,
        spkr_symmap=None,
        min_phones=cfg.min_phones,
        max_phones=cfg.max_phones,
        training=False,
        extra_paths_by_spkr_name: dict[str, list] = {},
    ):
        super().__init__()
        self._head = None
        self.min_phones = min_phones
        self.max_phones = max_phones
        print(self.min_phones)
        print(self.max_phones)
        for path in paths:
            print(_validate(path, self.min_phones, self.max_phones))
        self.paths = [
            path for path in paths if _validate(path, self.min_phones, self.max_phones)
        ]
        print(self.paths)
        self.spkr_symmap = spkr_symmap or self._get_spkr_symmap()
        self.phone_symmap = phone_symmap or self._get_phone_symmap()
        self.training = training
        print(self.training)
        self.paths_by_spkr_name = self._get_paths_by_spkr_name(extra_paths_by_spkr_name)
        print(self.paths_by_spkr_name)
        for p in self.paths:
            print(p)
            print(cfg.get_spkr(p))
            print(len(self.paths_by_spkr_name[cfg.get_spkr(p)]))
        self.paths = [
            p for p in self.paths if len(self.paths_by_spkr_name[cfg.get_spkr(p)]) > 1 **#problem is here, self.paths_by_spkr_name[cfg.get_spkr(p)]) = 1, not > 1**
        ]
        print(self.paths)
        if len(self.paths) == 0 and training:
            raise ValueError("No valid path is found for training.")

        if training:
            self.sampler = Sampler(self.paths, [cfg.get_spkr])
        else:
            self. Sampler = None

And it returns:

(vall-e) loong@Loong-Surface:~/Codes/vall-e$ python -m vall_e.train yaml=config/test/ar.yml
1it [00:00, 3916.25it/s]
10
50000
43
10
50000
160
True
[PosixPath('data/test/test.qnt.pt')]
True
{'test': [PosixPath('data/test/test.qnt.pt')]}
data/test/test.qnt.pt
test
1
[]
Traceback (most recent call last):
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/vall_e/train.py", line 128, in <module>
    main()
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/vall_e/train.py", line 33, in main
    train_dl, subtrain_dl, val_dl = create_train_val_dataloader()
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/vall_e/data.py", line 281, in create_train_val_dataloader
    train_dataset, val_dataset = create_datasets()
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/vall_e/data.py", line 262, in create_datasets
    train_dataset = VALLEDatset(
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/vall_e/data.py", line 120, in __init__
    raise ValueError("No valid path is found for training.")
ValueError: No valid path is found for training.

Looong01 avatar Feb 01 '23 02:02 Looong01

I try to change "p for p in self. Paths if len(self.paths_by_spkr_name[cfg.get_spkr(p)]) > 1" to "p for p in self.paths if len(self.paths_by_spkr_name[cfg.get_spkr(p)]) > 0", and it returns:

(vall-e) loong@Loong-Surface:~/Codes/vall-e$ python -m vall_e.train yaml=config/test/ar.yml
2it [00:00, 6892.86it/s]
10
50000
43
10
50000
160
True
43
10
50000
160
True
[PosixPath('data/test/test.qnt.pt'), PosixPath('data/test/test2.qnt.pt')]
True
{'test': [PosixPath('data/test/test.qnt.pt'), PosixPath('data/test/test2.qnt.pt')]}
data/test/test.qnt.pt
test
2
data/test/test2.qnt.pt
test
2
[PosixPath('data/test/test.qnt.pt'), PosixPath('data/test/test2.qnt.pt')]
10
50000
[]
False
{'test': [PosixPath('data/test/test.qnt.pt'), PosixPath('data/test/test2.qnt.pt')]}
[]
2023-02-01 10:13:51 - vall_e.data - INFO - GR=0;LR=0 -
{'</s>': 1, '<s>': 2, 'AA1': 3, 'AE1': 4, 'AE2': 5, 'AH0': 6, 'AH1': 7, 'AW1': 8, 'AY1': 9, 'B': 10, 'CH': 11, 'D': 12, 'DH': 13, 'EH0': 14, 'EH1': 15, 'EH2': 16, 'ER0': 17, 'EY1': 18, 'F': 19, 'HH': 20, 'IH0': 21, 'IH1': 22, 'IY0': 23, 'IY1': 24, 'JH': 25, 'K': 26, 'L': 27, 'M': 28, 'N': 29, 'NG': 30, 'OW1': 31, 'OW2': 32, 'P': 33, 'R': 34, 'S': 35, 'T': 36, 'UH1': 37, 'UW1': 38, 'V': 39, 'W': 40, 'Y': 41, 'Z': 42, '_': 43}
2023-02-01 10:13:51 - vall_e.data - INFO - GR=0;LR=0 -
{'test': 0}
2023-02-01 10:13:51 - vall_e.data - INFO - GR=0;LR=0 -
#samples (train): 2.
2023-02-01 10:13:51 - vall_e.data - INFO - GR=0;LR=0 -
#samples (val): 0.
[2023-02-01 10:13:51,848] [INFO] [comm.py:657:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl
2023-02-01 10:13:53 - torch.distributed.distributed_c10d - INFO - GR=0;LR=0 -
Added key: store_based_barrier_key:1 to store for rank: 0
2023-02-01 10:13:53 - torch.distributed.distributed_c10d - INFO - GR=0;LR=0 -
Rank 0: Completed store-based barrier for key:store_based_barrier_key:1 with 1 nodes.
2023-02-01 10:13:55 - torch.distributed.distributed_c10d - INFO - GR=0;LR=0 -
Added key: store_based_barrier_key:2 to store for rank: 0
2023-02-01 10:13:55 - torch.distributed.distributed_c10d - INFO - GR=0;LR=0 -
Rank 0: Completed store-based barrier for key:store_based_barrier_key:2 with 1 nodes.
[2023-02-01 10:13:55,409] [INFO] [logging.py:68:log_dist] [Rank 0] DeepSpeed Flops Profiler Enabled: False
Using /home/loong/.cache/torch_extensions/py310_cu117 as PyTorch extensions root...
Traceback (most recent call last):
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/vall_e/train.py", line 128, in <module>
    main()
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/vall_e/train.py", line 119, in main
    trainer.train(
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/vall_e/utils/trainer.py", line 125, in train
    engines = engines_loader()
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/vall_e/train.py", line 21, in load_engines
    model=trainer.Engine(
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/vall_e/utils/engines.py", line 22, in __init__
    super().__init__(None, *args, **kwargs)
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/deepspeed/runtime/engine.py", line 340, in __init__
    self._configure_optimizer(optimizer, model_parameters)
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/deepspeed/runtime/engine.py", line 1276, in _configure_optimizer
    basic_optimizer = self._configure_basic_optimizer(model_parameters)
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/deepspeed/runtime/engine.py", line 1353, in _configure_basic_optimizer
    optimizer = FusedAdam(
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/deepspeed/ops/adam/fused_adam.py", line 73, in __init__
    fused_adam_cuda = get_accelerator().create_op_builder(FusedAdamBuilder).load()
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py", line 462, in load
    return self.jit_load(verbose)
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/deepspeed/ops/op_builder/builder.py", line 497, in jit_load
    op_module = load(
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1284, in load
    return _jit_compile(
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1508, in _jit_compile
    _write_ninja_file_and_build_library(
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 1597, in _write_ninja_file_and_build_library
    get_compiler_abi_compatibility_and_version(compiler)
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 336, in get_compiler_abi_compatibility_and_version
    if not check_compiler_ok_for_platform(compiler):
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/torch/utils/cpp_extension.py", line 290, in check_compiler_ok_for_platform
    which = subprocess.check_output(['which', compiler], stderr=subprocess.STDOUT)
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/subprocess.py", line 421, in check_output
    return run(*popenargs, stdout=PIPE, timeout=timeout, check=True,
  File "/home/loong/miniconda3/envs/vall-e/lib/python3.10/subprocess.py", line 526, in run
    raise CalledProcessError(retcode, process.args,
subprocess.CalledProcessError: Command '['which', 'c++']' returned non-zero exit status 1.

Did my cuda is not installed successfully? I installed pytorch and cuda in anaconda3 envs by using this:

conda install pytorch torchvision torchaudio pytorch-cuda=11.7 -c pytorch -c nvidia

I do not install cuda in the operating system, I just installed it in conda envs.

Looong01 avatar Feb 01 '23 02:02 Looong01

raise CalledProcessError(retcode, process.args,
subprocess.CalledProcessError: Command '['which', 'c++']' returned non-zero exit status 1.

It looks like the subprocess module called which c++ and was unable to find a suitable compiler.

christopherwoodall avatar Feb 03 '23 14:02 christopherwoodall

raise CalledProcessError(retcode, process.args,
subprocess.CalledProcessError: Command '['which', 'c++']' returned non-zero exit status 1.

It looks like the subprocess module called which c++ and was unable to find a suitable compiler.

How can I solve this problem? Do I need to use "sudo apt install c++"?

Looong01 avatar Feb 03 '23 16:02 Looong01

did you find any solution to this?

prasoons075 avatar Feb 08 '23 15:02 prasoons075

did you find any solution to this?

Maybe it will work:

  1. Open the file /home/loong/miniconda3/envs/vall-e/lib/python3.10/site-packages/torch/utils/cpp_extension.py
  2. Change all the compiler = os.environ.get('CXX', 'c++') to compiler = os.environ.get('CXX', 'g++') on line 868, 1552 1596 and 2114.
  3. Change if os.path.basename(compiler_path) == 'c++' and 'gcc version' in version_string: to if os.path.basename(compiler_path) == 'g++' and 'gcc version' in version_string: on line 308.
  4. sudo apt install g++ and make sure your g++&gcc version is 11 (12 is not supported).
  5. sudo apt install ninja-build

If you are successful under these steps or meet with other errors, please tell me anyway! Thank you very much!

Looong01 avatar Feb 08 '23 16:02 Looong01

Actually, I was quickly testing this out on google colab, do you have any suggestions if you have a workaround there as well?

prasoons075 avatar Feb 09 '23 08:02 prasoons075

Actually, I was quickly testing this out on google colab, do you have any suggestions if you have a workaround there as well?

Yes.

Looong01 avatar Feb 10 '23 10:02 Looong01

@Looong01 can please describe that way.

prasoons075 avatar Feb 24 '23 04:02 prasoons075

@Looong01 can please describe that way.

What's your meaning?

Have you successfully do it?

Looong01 avatar Feb 24 '23 04:02 Looong01

Actually, I was quickly testing this out on google colab, do you have any suggestions if you have a workaround there as well?

Yes.

@Looong01 You said that you also have a workaround for colab. I was asking to describe how can we bypass/resolve this issue in colab.

prasoons075 avatar Feb 24 '23 04:02 prasoons075

Actually, I was quickly testing this out on google colab, do you have any suggestions if you have a workaround there as well?

Yes.

@Looong01 You said that you also have a workaround for colab. I was asking to describe how can we bypass/resolve this issue in colab.

Well, maybe I misunderstood your meaning about that on Colab. I do not try it on colab. I just try it on my own device.

Looong01 avatar Feb 24 '23 04:02 Looong01

Hi Guys , I am facing error

  "git_commit": "3476d393d2133fa9b50d5ad999ca13b95fc22060",
  "git_status": "On branch main\nYour branch is up to date with 'origin/main'.\n\nChanges not staged for commit:\n  (use \"git add/rm <file>...\" to update what will be committed)\n  (use \"git restore <file>...\" to discard changes in working directory)\n\tmodified:   config/test/ar.yml\n\tmodified:   config/test/nar.yml\n\tmodified:   data/test/test.normalized.txt\n\tmodified:   data/test/test.phn.txt\n\tmodified:   data/test/test.qnt.pt\n\tmodified:   data/test/test.wav\n\tdeleted:    data/test/test2.phn.txt\n\tdeleted:    data/test/test2.qnt.pt\n\tmodified:   scripts/plot.py\n\tmodified:   scripts/run.sh\n\tmodified:   vall_e/config.py\n\tmodified:   vall_e/data.py\n\tmodified:   vall_e/train.py\n\nUntracked files:\n  (use \"git add <file>...\" to include in what will be committed)\n\ttoy.wav\n\tzoo/\n\nno changes added to commit (use \"git add\" and/or \"git commit -a\")",
  "gradient_accumulation_steps": 1,
  "gradient_clipping": 100.0,
  "log_dir": "logs/your_data/ar/1678831659",
  "log_root": "logs",
  "max_grad_norm": null,
  "max_iter": 1000,
  "max_num_val": 20,
  "max_phones": 50000,
  "max_prompts": 3,
  "max_val_ar_steps": 300,
  "min_phones": 10,
  "model": "ar-quarter",
  "nj": 8,
  "num_tokens": 1024,
  "p_additional_prompt": 0.8,
  "relpath": "your_data/ar",
  "sample_rate": 24000,
  "sampling_temperature": 1.0,
  "save_artifacts_every": 100,
  "save_ckpt_every": 500,
  "save_on_oom": true,
  "save_on_quit": true,
  "spkr_name_getter": "lambda p: p.parts[-2]",
  "start_time": 1678831659,
  "token_dim": 256,
  "use_fp16": true,
  "warmup_max_lr": 0.0002,
  "warmup_min_lr": 1e-06,
  "warmup_num_steps": 1000
}
2023-03-14 22:09:11 - vall_e.utils.trainer - INFO - GR=0;LR=0 - 
New epoch starts.
/usr/local/lib/python3.10/site-packages/torch/utils/data/dataloader.py:554: UserWarning: This DataLoader will create 8 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
  warnings.warn(_create_warning_msg(
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/local/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/content/drive/MyDrive/Colab Notebooks/VALL_E/vall-e/vall_e/train.py", line 129, in <module>
    main()
  File "/content/drive/MyDrive/Colab Notebooks/VALL_E/vall-e/vall_e/train.py", line 120, in main
    trainer.train(
  File "/content/drive/MyDrive/Colab Notebooks/VALL_E/vall-e/vall_e/utils/trainer.py", line 150, in train
    for batch in _make_infinite_epochs(train_dl):
  File "/content/drive/MyDrive/Colab Notebooks/VALL_E/vall-e/vall_e/utils/trainer.py", line 103, in _make_infinite_epochs
    yield from dl
  File "/usr/local/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 628, in __next__
    data = self._next_data()
  File "/usr/local/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1333, in _next_data
    return self._process_data(data)
  File "/usr/local/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1359, in _process_data
    data.reraise()
  File "/usr/local/lib/python3.10/site-packages/torch/_utils.py", line 543, in reraise
    raise exception
ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/usr/local/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
    data = fetcher.fetch(index)
  File "/usr/local/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 58, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/usr/local/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 58, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/content/drive/MyDrive/Colab Notebooks/VALL_E/vall-e/vall_e/data.py", line 179, in __getitem__
    proms = self.sample_prompts(spkr_name, ignore=path)
  File "/content/drive/MyDrive/Colab Notebooks/VALL_E/vall-e/vall_e/data.py", line 156, in sample_prompts
    raise ValueError(
ValueError: Failed to find another different utterance for test.

NaeemKhanNiazi avatar Mar 14 '23 22:03 NaeemKhanNiazi

Hi Guys , I am facing error

  "git_commit": "3476d393d2133fa9b50d5ad999ca13b95fc22060",
  "git_status": "On branch main\nYour branch is up to date with 'origin/main'.\n\nChanges not staged for commit:\n  (use \"git add/rm <file>...\" to update what will be committed)\n  (use \"git restore <file>...\" to discard changes in working directory)\n\tmodified:   config/test/ar.yml\n\tmodified:   config/test/nar.yml\n\tmodified:   data/test/test.normalized.txt\n\tmodified:   data/test/test.phn.txt\n\tmodified:   data/test/test.qnt.pt\n\tmodified:   data/test/test.wav\n\tdeleted:    data/test/test2.phn.txt\n\tdeleted:    data/test/test2.qnt.pt\n\tmodified:   scripts/plot.py\n\tmodified:   scripts/run.sh\n\tmodified:   vall_e/config.py\n\tmodified:   vall_e/data.py\n\tmodified:   vall_e/train.py\n\nUntracked files:\n  (use \"git add <file>...\" to include in what will be committed)\n\ttoy.wav\n\tzoo/\n\nno changes added to commit (use \"git add\" and/or \"git commit -a\")",
  "gradient_accumulation_steps": 1,
  "gradient_clipping": 100.0,
  "log_dir": "logs/your_data/ar/1678831659",
  "log_root": "logs",
  "max_grad_norm": null,
  "max_iter": 1000,
  "max_num_val": 20,
  "max_phones": 50000,
  "max_prompts": 3,
  "max_val_ar_steps": 300,
  "min_phones": 10,
  "model": "ar-quarter",
  "nj": 8,
  "num_tokens": 1024,
  "p_additional_prompt": 0.8,
  "relpath": "your_data/ar",
  "sample_rate": 24000,
  "sampling_temperature": 1.0,
  "save_artifacts_every": 100,
  "save_ckpt_every": 500,
  "save_on_oom": true,
  "save_on_quit": true,
  "spkr_name_getter": "lambda p: p.parts[-2]",
  "start_time": 1678831659,
  "token_dim": 256,
  "use_fp16": true,
  "warmup_max_lr": 0.0002,
  "warmup_min_lr": 1e-06,
  "warmup_num_steps": 1000
}
2023-03-14 22:09:11 - vall_e.utils.trainer - INFO - GR=0;LR=0 - 
New epoch starts.
/usr/local/lib/python3.10/site-packages/torch/utils/data/dataloader.py:554: UserWarning: This DataLoader will create 8 worker processes in total. Our suggested max number of worker in current system is 2, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.
  warnings.warn(_create_warning_msg(
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/local/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/content/drive/MyDrive/Colab Notebooks/VALL_E/vall-e/vall_e/train.py", line 129, in <module>
    main()
  File "/content/drive/MyDrive/Colab Notebooks/VALL_E/vall-e/vall_e/train.py", line 120, in main
    trainer.train(
  File "/content/drive/MyDrive/Colab Notebooks/VALL_E/vall-e/vall_e/utils/trainer.py", line 150, in train
    for batch in _make_infinite_epochs(train_dl):
  File "/content/drive/MyDrive/Colab Notebooks/VALL_E/vall-e/vall_e/utils/trainer.py", line 103, in _make_infinite_epochs
    yield from dl
  File "/usr/local/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 628, in __next__
    data = self._next_data()
  File "/usr/local/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1333, in _next_data
    return self._process_data(data)
  File "/usr/local/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1359, in _process_data
    data.reraise()
  File "/usr/local/lib/python3.10/site-packages/torch/_utils.py", line 543, in reraise
    raise exception
ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/usr/local/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 302, in _worker_loop
    data = fetcher.fetch(index)
  File "/usr/local/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 58, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/usr/local/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 58, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/content/drive/MyDrive/Colab Notebooks/VALL_E/vall-e/vall_e/data.py", line 179, in __getitem__
    proms = self.sample_prompts(spkr_name, ignore=path)
  File "/content/drive/MyDrive/Colab Notebooks/VALL_E/vall-e/vall_e/data.py", line 156, in sample_prompts
    raise ValueError(
ValueError: Failed to find another different utterance for test.

How to reproduce it?

Looong01 avatar Mar 18 '23 16:03 Looong01