Can you help me with this error ?
dataparallel = False
number_feature = 3
Start training.
0%| | 0/308 [00:00<?, ?it/s]
0%| | 0/308 [00:03<?, ?it/s]
Traceback (most recent call last):
File "/scratch/yp2285/Point-Transformer/main_training.py", line 87, in
trainer.train_all()
File "/scratch/yp2285/Point-Transformer/trainer/cls_trainer.py", line 316, in train_all
self._train_one_epoch(epoch + 1)
File "/scratch/yp2285/Point-Transformer/trainer/cls_trainer.py", line 236, in _train_one_epoch
logits = self.model(x=batch_x, pos=batch_pos)
File "/ext3/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/yp2285/Point-Transformer/model/point_transformer_net.py", line 95, in forward
x = pt_trans_blk(x, pos) # point attention transformer
File "/ext3/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/scratch/yp2285/Point-Transformer/model/point_transformer_layer.py", line 134, in forward
attn_emb = self.apply_module_with_bn(qk_rel_topk + rel_pos_topk_emb, self.attn_mlp)
File "/scratch/yp2285/Point-Transformer/model/point_transformer_layer.py", line 97, in apply_module_with_bn
rel_pos_emb = layer(rel_pos_emb)
File "/ext3/miniconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/ext3/miniconda3/lib/python3.10/site-packages/torch/nn/modules/batchnorm.py", line 168, in forward
return F.batch_norm(
File "/ext3/miniconda3/lib/python3.10/site-packages/torch/nn/functional.py", line 2438, in batch_norm
return torch.batch_norm(
RuntimeError: CUDA out of memory. Tried to allocate 2.00 GiB (GPU 0; 15.78 GiB total capacity; 12.07 GiB already allocated; 1.77 GiB free; 12.95 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF