CEFCON icon indicating copy to clipboard operation
CEFCON copied to clipboard

CUDA out of memory

Open Roger-GOAT opened this issue 4 months ago • 1 comments

Hi ,great work! when I run

prior_network = pd.read_csv('./network_mouse.csv')
data = cf.data_preparation(adata, prior_network)
[0] - Data loading and preprocessing...
Consider the input data with 1 lineages:
  Lineage - all:
    337 extra edges (Spearman correlation > 0.6) are added into the prior gene interaction network.
    Total number of edges: 3537148.
    n_genes × n_cells = 12335 × 1822

CUDA = '0'
%%time
cefcon_results_dict = {}
for li, data_li in data.items():
    # We suggest setting up multiple repeats to minimize the randomness of the computation.
    cefcon_GRN_model = cf.NetModel(epochs=350, repeats=3, seed=-1,cuda=CUDA)
    cefcon_GRN_model.run(data_li)

    cefcon_results = cefcon_GRN_model.get_cefcon_results(edge_threshold_avgDegree=8)
    cefcon_results_dict[li] = cefcon_results

get error like this

[1] - Constructing cell-lineage-specific GRN...
Lineage - all: 
Warning: Auxiliary gene scores (e.g., differential expression level) are not considered!
0%|                                                                       | 0/350 [00:00<?, ?it/s]
---------------------------------------------------------------------------
OutOfMemoryError                          Traceback (most recent call last)
File <timed exec>:5

File ~/run/miniconda3/envs/CEFCON/lib/python3.10/site-packages/cefcon/cell_lineage_GRN.py:370, in NetModel.run(self, adata, showProgressBar)
 368 with trange(self.epochs, ncols=100) as t:
 369     for epoch in t:
--> 370         loss = self.__train(data, DGI_model, optimizer)
 371         t.set_description('  Iter: {}/{}'.format(rep + 1, self.repeats))
 372         if epoch < self.epochs - 1:

File ~/run/miniconda3/envs/CEFCON/lib/python3.10/site-packages/cefcon/cell_lineage_GRN.py:326, in NetModel.__train(data, model, optimizer)
 324 model.train()
 325 optimizer.zero_grad()
--> 326 pos_z, neg_z, summary = model(data)
 327 loss = model.loss(pos_z, neg_z, summary)
 328 loss.backward()

File ~/run/miniconda3/envs/CEFCON/lib/python3.10/site-packages/torch/nn/modules/module.py:1194, in Module._call_impl(self, *input, **kwargs)
1190 # If we don't have any hooks, we want to skip the rest of the logic in
1191 # this function, and just call forward.
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194     return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []

File ~/run/miniconda3/envs/CEFCON/lib/python3.10/site-packages/torch_geometric/nn/models/deep_graph_infomax.py:52, in DeepGraphInfomax.forward(self, *args, **kwargs)
  49 def forward(self, *args, **kwargs) -> Tuple[Tensor, Tensor, Tensor]:
  50     """Returns the latent space for the input arguments, their
  51     corruptions and their summary representation."""
---> 52     pos_z = self.encoder(*args, **kwargs)
  54     cor = self.corruption(*args, **kwargs)
  55     cor = cor if isinstance(cor, tuple) else (cor, )

File ~/run/miniconda3/envs/CEFCON/lib/python3.10/site-packages/torch/nn/modules/module.py:1194, in Module._call_impl(self, *input, **kwargs)
1190 # If we don't have any hooks, we want to skip the rest of the logic in
1191 # this function, and just call forward.
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194     return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []

File ~/run/miniconda3/envs/CEFCON/lib/python3.10/site-packages/cefcon/cell_lineage_GRN.py:222, in GRN_Encoder.forward(self, data)
 220 for norm, attn_in, attn_out, ffn in self.layers:
 221     x = norm(x)
--> 222     x_in, att_weights_in_ = attn_in(x, edge_index, x_auxiliary, return_attention_weights=True)
 223     x_out, att_weights_out_ = attn_out(x, edge_index, x_auxiliary, return_attention_weights=True)
 224     x = ffn(torch.cat((self.act(x_in), self.act(x_out)), 1))

File ~/run/miniconda3/envs/CEFCON/lib/python3.10/site-packages/torch/nn/modules/module.py:1194, in Module._call_impl(self, *input, **kwargs)
1190 # If we don't have any hooks, we want to skip the rest of the logic in
1191 # this function, and just call forward.
1192 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1193         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1194     return forward_call(*input, **kwargs)
1195 # Do not call functions when jit is used
1196 full_backward_hooks, non_full_backward_hooks = [], []

File ~/run/miniconda3/envs/CEFCON/lib/python3.10/site-packages/cefcon/cell_lineage_GRN.py:111, in GraphAttention_layer.forward(self, x, edge_index, x_auxiliary, return_attention_weights)
 109     x_norm_l = F.normalize(x_l, p=2., dim=-1)
 110     x_norm_r = F.normalize(x_r, p=2., dim=-1)
--> 111     out = self.propagate(edge_index, x=(x_l, x_r), x_norm=(x_norm_l, x_norm_r),
 112                          x_auxiliary=x_auxiliary, size=None)
 113 else:  # SD
 114     out = self.propagate(edge_index, x=(x_l, x_r), x_norm=None,
 115                          x_auxiliary=x_auxiliary, size=None)

File ~/run/miniconda3/envs/CEFCON/lib/python3.10/site-packages/torch_geometric/nn/conv/message_passing.py:455, in MessagePassing.propagate(self, edge_index, size, **kwargs)
 452     for arg in decomp_args:
 453         kwargs[arg] = decomp_kwargs[arg][i]
--> 455 coll_dict = self._collect(self._user_args, edge_index, size,
 456                           kwargs)
 458 msg_kwargs = self.inspector.distribute('message', coll_dict)
 459 for hook in self._message_forward_pre_hooks.values():

File ~/run/miniconda3/envs/CEFCON/lib/python3.10/site-packages/torch_geometric/nn/conv/message_passing.py:329, in MessagePassing._collect(self, args, edge_index, size, kwargs)
 327         if isinstance(data, Tensor):
 328             self._set_size(size, dim, data)
--> 329             data = self._lift(data, edge_index, dim)
 331         out[arg] = data
 333 if is_torch_sparse_tensor(edge_index):

File ~/run/miniconda3/envs/CEFCON/lib/python3.10/site-packages/torch_geometric/nn/conv/message_passing.py:276, in MessagePassing._lift(self, src, edge_index, dim)
 269     raise IndexError(
 270         f"Encountered an index error. Please ensure that all "
 271         f"indices in 'edge_index' point to valid indices in "
 272         f"the interval [0, {src.size(self.node_dim) - 1}] "
 273         f"(got interval "
 274         f"[{int(index.min())}, {int(index.max())}])")
 275 else:
--> 276     raise e
 278 if index.numel() > 0 and index.min() < 0:
 279     raise ValueError(
 280         f"Found negative indices in 'edge_index' (got "
 281         f"{index.min().item()}). Please ensure that all "
 282         f"indices in 'edge_index' point to valid indices "
 283         f"in the interval [0, {src.size(self.node_dim)}) in "
 284         f"your node feature matrix and try again.")

File ~/run/miniconda3/envs/CEFCON/lib/python3.10/site-packages/torch_geometric/nn/conv/message_passing.py:266, in MessagePassing._lift(self, src, edge_index, dim)
 264 try:
 265     index = edge_index[dim]
--> 266     return src.index_select(self.node_dim, index)
 267 except (IndexError, RuntimeError) as e:
 268     if index.min() < 0 or index.max() >= src.size(self.node_dim):

OutOfMemoryError: CUDA out of memory. Tried to allocate 6.77 GiB (GPU 0; 23.65 GiB total capacity; 20.62 GiB already allocated; 2.49 GiB free; 20.64 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

Roger-GOAT avatar Feb 25 '24 14:02 Roger-GOAT