Hi,
I am learning to train OpenFold-Multimer using just the alignments and mmcifs provided by AlphaFold/OpenFold. I used the following command to run the training, which I found to run fine when I train the monomer version of OpenFold:
python train_openfold.py pdb_data/mmcif_files alignment_data/alignments/ pdb_data/mmcif_files/ multimer/ 2021-10-10 --train_chain_data_cache_path pdb_data/data_caches/chain_data_cache.json --template_release_dates_cache_path pdb_data/data_caches/mmcif_cache.json --config_preset "model_1_multimer_v3" --obsolete_pdbs_file_path pdb_data/obsolete.dat --num_nodes 4 --seed 121924 --gpus 16 --max_epochs 15 --precision bf16-mixed
However, I keep running into the following error:
Traceback (most recent call last):
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/train_openfold.py", line 703, in
main(args)
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/train_openfold.py", line 356, in main
data_module.setup()
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 1139, in setup
self.train_dataset = OpenFoldMultimerDataset(
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 678, in init
super(OpenFoldMultimerDataset, self).init(datasets=datasets,
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 557, in init
self.reroll()
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 660, in reroll
datapoint_idx = next(samples)
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 725, in looped_samples
mmcif_data_cache = dataset.mmcif_data_cache
AttributeError: 'OpenFoldSingleMultimerDataset' object has no attribute 'mmcif_data_cache'. Did you mean: 'mmcif_data_cache_path'?
Traceback (most recent call last):
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/train_openfold.py", line 703, in
main(args)
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/train_openfold.py", line 356, in main
data_module.setup()
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 1139, in setup
self.train_dataset = OpenFoldMultimerDataset(
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 678, in init
super(OpenFoldMultimerDataset, self).init(datasets=datasets,
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 557, in init
self.reroll()
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 660, in reroll
datapoint_idx = next(samples)
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 725, in looped_samples
mmcif_data_cache = dataset.mmcif_data_cache
AttributeError: 'OpenFoldSingleMultimerDataset' object has no attribute 'mmcif_data_cache'. Did you mean: 'mmcif_data_cache_path'?
srun: error: nid001388: task 5: Exited with exit code 1
srun: Terminating StepId=21060918.0
I changed dataset.mmcif_data_cache to dataset.mmcif_data_cache_path in the file openfold/data/data_modules.py but then I ran into another error:
Traceback (most recent call last):
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/train_openfold.py", line 703, in
main(args)
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/train_openfold.py", line 356, in main
data_module.setup()
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 1139, in setup
self.train_dataset = OpenFoldMultimerDataset(
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 678, in init
Traceback (most recent call last):
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/train_openfold.py", line 703, in
super(OpenFoldMultimerDataset, self).init(datasets=datasets,
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 557, in init
self.reroll()
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 660, in reroll
main(args)
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/train_openfold.py", line 356, in main
datapoint_idx = next(samples)
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 732, in looped_samples
data_module.setup()
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 1139, in setup
mmcif_data_cache_entry = mmcif_data_cache[mmcif_id]
TypeError: 'NoneType' object is not subscriptable
self.train_dataset = OpenFoldMultimerDataset(
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 678, in init
super(OpenFoldMultimerDataset, self).init(datasets=datasets,
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 557, in init
self.reroll()
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 660, in reroll
datapoint_idx = next(samples)
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 732, in looped_samples
mmcif_data_cache_entry = mmcif_data_cache[mmcif_id]
TypeError: 'NoneType' object is not subscriptable
Traceback (most recent call last):
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/train_openfold.py", line 703, in
main(args)
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/train_openfold.py", line 356, in main
data_module.setup()
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 1139, in setup
self.train_dataset = OpenFoldMultimerDataset(
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 678, in init
super(OpenFoldMultimerDataset, self).init(datasets=datasets,
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 557, in init
self.reroll()
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 660, in reroll
datapoint_idx = next(samples)
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 732, in looped_samples
mmcif_data_cache_entry = mmcif_data_cache[mmcif_id]
TypeError: 'NoneType' object is not subscriptable
Traceback (most recent call last):
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/train_openfold.py", line 703, in
main(args)
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/train_openfold.py", line 356, in main
data_module.setup()
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 1139, in setup
self.train_dataset = OpenFoldMultimerDataset(
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 678, in init
super(OpenFoldMultimerDataset, self).init(datasets=datasets,
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 557, in init
self.reroll()
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 660, in reroll
datapoint_idx = next(samples)
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 732, in looped_samples
mmcif_data_cache_entry = mmcif_data_cache[mmcif_id]
TypeError: 'NoneType' object is not subscriptable
Traceback (most recent call last):
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/train_openfold.py", line 703, in
main(args)
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/train_openfold.py", line 356, in main
data_module.setup()
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 1139, in setup
self.train_dataset = OpenFoldMultimerDataset(
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 678, in init
super(OpenFoldMultimerDataset, self).init(datasets=datasets,
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 557, in init
self.reroll()
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 660, in reroll
datapoint_idx = next(samples)
File "/lustre/scratch5/.mdt0/hungd238/Fold-21-Multimer/fold-21/openfold/data/data_modules.py", line 732, in looped_samples
mmcif_data_cache_entry = mmcif_data_cache[mmcif_id]
TypeError: 'NoneType' object is not subscriptable
srun: error: nid001385: tasks 2-3: Exited with exit code 1
srun: Terminating StepId=21060799.0
Do you have any suggestions on how to fix these issues? Thank you in advance.