out of memory for partial hallucination
Run the partial hallucination using the following code and got out of memory issue. the complex is composed of two chains ['A', 'C'], A chain has 1505 residues. Need to repaired side chain structure using partial hallucination. the residues which need to repair structure is list in pos. totally 19 parts.
if __name__ == '__main__':
pdb_files = ['NavPas_LqhaIT.pdb']
data_dir = '/ai/cheng/data/zhongke/cryo-EM-mod'
for pdb_file in pdb_files:
pdb_path = os.path.join(data_dir, pdb_file)
save_path = os.path.join(data_dir, pdb_file.replace('.pdb', '_repaired.pdb'))
clear_mem()
af_model = mk_afdesign_model(protocol="partial",
use_templates=False, # set True to constrain positions using template input
data_dir='/ai/cheng/gitlab/software_git/ColabDesign/params')
if pdb_file == 'NavPas_LqhaIT.pdb':
af_model.prep_inputs(pdb_filename=pdb_path,
chain="A",
pos="295,297,300,302,304,320,321,323,324,1196,1197,1200,1201,1203,1204,1250,1251,1254,1258-1263",
fix_seq=True, # set True to constrain the sequence
)
af_model.rewire(loops=[6]*18)
# initialize with wildtype seq, fill in the rest with soft_gumbel distribution
af_model.restart(mode=["soft","gumbel","wildtype"])
af_model.design_3stage(100, 100, 10)
af_model.save_pdb(save_path)
when run the code, report following error. the memory consumption is unexpectedly large and could you kindly help me to check why?
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/ai/cheng/gitlab/software_git/ColabDesign/partial_hallucination.py", line 35, in <module>
af_model.design_3stage(100, 100, 10)
File "/ai/cheng/gitlab/software_git/ColabDesign/colabdesign/af/design.py", line 356, in design_3stage
self.design_logits(soft_iters, e_soft=1,
File "/ai/cheng/gitlab/software_git/ColabDesign/colabdesign/af/design.py", line 334, in design_logits
self.design(iters, **kwargs)
File "/ai/cheng/gitlab/software_git/ColabDesign/colabdesign/af/design.py", line 328, in design
self.step(lr_scale=lr_scale, num_recycles=num_recycles,
File "/ai/cheng/gitlab/software_git/ColabDesign/colabdesign/af/design.py", line 195, in step
self.run(num_recycles=num_recycles, num_models=num_models, sample_models=sample_models,
File "/ai/cheng/gitlab/software_git/ColabDesign/colabdesign/af/design.py", line 96, in run
auxs.append(self._recycle(p, num_recycles=num_recycles, backprop=backprop))
File "/ai/cheng/gitlab/software_git/ColabDesign/colabdesign/af/design.py", line 180, in _recycle
aux = self._single(model_params, backprop)
File "/ai/cheng/gitlab/software_git/ColabDesign/colabdesign/af/design.py", line 140, in _single
(loss, aux), grad = self._model["grad_fn"](*flags)
jaxlib.xla_extension.XlaRuntimeError: RESOURCE_EXHAUSTED: Out of memory while trying to allocate 219121747088 bytes.
BufferAssignment OOM Debugging.
BufferAssignment stats:
parameter allocation: 1.14GiB
constant allocation: 86.3KiB
maybe_live_out allocation: 829.86MiB
preallocated temp allocation: 204.07GiB
total allocation: 206.03GiB
additionally, the config is as follows, our gpu is a100, 80g memory
prep_model self.opt is {'alpha': 2.0, 'cmap_cutoff': 10.0, 'con': {'binary': False, 'cutoff': 14.0, 'num': 2, 'num_pos': inf, 'seqsep': 9}, 'dropout': True, 'fape_cutoff': 10.0, 'fix_pos': ar
ray([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23]), 'hard': 0.0, 'i_con': {'binary': False, 'cutoff': 21.6875, 'num': 1, 'num_pos': inf}, 'learning_rate': 0.1, 'norm_seq_grad': True, 'num_models': 1, 'num_recycles': 0, 'pos': array([ 242, 244, 247, 249, 251, 267, 268, 270, 271, 981, 982, 985, 986, 988, 989, 1035, 1036, 1039, 1043, 1044, 1045, 1046,
1047, 1048]), 'sample_models': True, 'soft': 0.0, 'temp': 1.0, 'template': {'dropout': 0.0, 'rm_ic': False}, 'use_pssm': False, 'weights': {'con': 1.0, 'dgram_cce': 1.0, 'exp_res': 0.0
, 'fape': 0.0, 'helix': 0.0, 'pae': 0.0, 'plddt': 0.0, 'rmsd': 0.0, 'seq_ent': 0.0}}
As you see, it tries to allocate 206 GB of GPU memory. Generally speaking going beyond 600AAs is not possible for Gradient-based optimisation. You can try the semigreedy protocol, this should work.