Hi,
Thank for the last time it helped me.
However, now I have another error.
After running the training from ProteinNet input:
python /data/openfold/train_openfold.py /data/af_databases/pdb_mmcif/mmcif_files/ /home/ubuntu/ProteinNet_parsed/ProteinNet_lc/ /data/af_databases/pdb_mmcif/mmcif_files/ /home/ubuntu/OF_train_from_Protein_Net/try_1_Dec29_2021/ 2021-10-10 --template_release_dates_cache_path /data/af_databases/pdb_mmcif/mmcif_cache.json --precision 16 --replace_sampler_ddp=True--deepspeed_config /data/deepspeed_config.json --default_root_dir /home/ubuntu/OF_train_from_Protein_Net/try_1_Dec29_2021/ --gpus 1 --seed 44
I got this error:
###############
Epoch 0: 0%| | 0/50939 [00:00<?, ?it/s]Traceback (most recent call last):
File "/data/openfold/train_openfold.py", line 336, in
main(args)
File "/data/openfold/train_openfold.py", line 196, in main
ckpt_path=ckpt_path,
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 736, in fit
self._fit_impl, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 682, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs)
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 770, in _fit_impl
self._run(model, ckpt_path=ckpt_path)
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1193, in _run
self._dispatch()
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1272, in _dispatch
self.training_type_plugin.start_training(self)
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 202, in start_training
self._results = trainer.run_stage()
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1282, in run_stage
return self._run_train()
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/pytorch_lightning/trainer/trainer.py", line 1312, in _run_train
self.fit_loop.run()
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/pytorch_lightning/loops/base.py", line 145, in run
self.advance(*args, **kwargs)
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/pytorch_lightning/loops/fit_loop.py", line 234, in advance
self.epoch_loop.run(data_fetcher)
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/pytorch_lightning/loops/base.py", line 140, in run
self.on_run_start(*args, **kwargs)
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/pytorch_lightning/loops/epoch/training_epoch_loop.py", line 141, in on_run_start
self._dataloader_iter = _update_dataloader_iter(data_fetcher, self.batch_idx + 1)
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/pytorch_lightning/loops/utilities.py", line 121, in _update_dataloader_iter
dataloader_iter = enumerate(data_fetcher, batch_idx)
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/pytorch_lightning/utilities/fetching.py", line 199, in iter
self.prefetching(self.prefetch_batches)
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/pytorch_lightning/utilities/fetching.py", line 258, in prefetching
self._fetch_next_batch()
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/pytorch_lightning/utilities/fetching.py", line 300, in _fetch_next_batch
batch = next(self.dataloader_iter)
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/pytorch_lightning/trainer/supporters.py", line 536, in next
return self.request_next_batch(self.loader_iters)
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/pytorch_lightning/trainer/supporters.py", line 548, in request_next_batch
return apply_to_collection(loader_iters, Iterator, next)
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/pytorch_lightning/utilities/apply_func.py", line 92, in apply_to_collection
return function(data, *args, **kwargs)
File "/data/openfold/openfold/data/data_modules.py", line 350, in _batch_prop_gen
for batch in iterator:
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 521, in next
data = self._next_data()
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 1203, in _next_data
return self._process_data(data)
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 1229, in _process_data
data.reraise()
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/torch/_utils.py", line 434, in reraise
raise exception
FileNotFoundError: Caught FileNotFoundError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/torch/utils/data/_utils/worker.py", line 287, in _worker_loop
data = fetcher.fetch(index)
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/data/openfold/lib/conda/envs/openfold_venv/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 49, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/data/openfold/openfold/data/data_modules.py", line 178, in getitem
chain_id=chain_id,
File "/data/openfold/openfold/data/data_pipeline.py", line 577, in process_pdb
with open(pdb_path, 'r') as f:
FileNotFoundError: [Errno 2] No such file or directory: '/data/af_databases/pdb_mmcif/mmcif_files/4l6v_9.pdb'
Epoch 0: 0%| | 0/50939 [00:00<?, ?it/s]
Thanks,
Oz
This looks like it's related to #4.
No, I don't have this file.
Indeed, it looks similar I will examine the fix you suggested in the other post.
Thanks
Since this is a .pdb file, this may be affected by a commit I just pushed (1c3d31b). Re-pull and try again.