cell2cell
cell2cell copied to clipboard
OutOfMemoryError when running run_tensor_cell2cell_pipeline on GPU
The memory at each iteration seems to accumulate. Is it possible to release it or chuck it differently to circumvent this issue?
Running Elbow Analysis
24%|██▍ | 6/25 [23:50<1:15:30, 238.44s/it]
---------------------------------------------------------------------------
OutOfMemoryError Traceback (most recent call last)
Cell In[10], line 1
----> 1 tensor2 = c2c.analysis.run_tensor_cell2cell_pipeline(tensor,
2 meta_tensor,
3 copy_tensor=True
4 rank=None,
5 tf_optimization='robust',
6 random_state=0,
7 device='cuda',
8 elbow_metric='error',
9 smooth_elbow=False,
10 upper_rank=25,
11 tf_init='random',
12 tf_svd='numpy_svd',
13 cmaps=None,
14 sample_col='Element',
15 group_col='Category',
16 fig_fontsize=14,
17 output_folder=output_folder,
18 output_fig=True,
19 fig_format='pdf',
20 )
File /mambaforge/envs/scanpy/lib/python3.9/site-packages/cell2cell/analysis/tensor_pipelines.py:172, in run_tensor_cell2cell_pipeline(interaction_tensor, tensor_metadata, copy_tensor, rank, tf_optimization, random_state, backend, device, elbow_metric, smooth_elbow, upper_rank, tf_init, tf_svd, cmaps, sample_col, group_col, fig_fontsize, output_folder, output_fig, fig_format, **kwargs)
170 if rank is None:
171 print('Running Elbow Analysis')
--> 172 fig1, error = interaction_tensor.elbow_rank_selection(upper_rank=upper_rank,
173 runs=elbow_runs,
174 init=tf_init,
175 svd=tf_svd,
176 automatic_elbow=True,
177 metric=elbow_metric,
178 output_fig=output_fig,
179 smooth=smooth_elbow,
180 random_state=random_state,
181 fontsize=fig_fontsize,
182 filename=elbow_filename,
183 tol=tol, n_iter_max=n_iter_max,
184 **kwargs
185 )
187 rank = interaction_tensor.rank
189 # Factorization
File /mambaforge/envs/scanpy/lib/python3.9/site-packages/cell2cell/tensor/tensor.py:518, in BaseTensor.elbow_rank_selection(self, upper_rank, runs, tf_type, init, svd, metric, random_state, n_iter_max, tol, automatic_elbow, manual_elbow, smooth, mask, ci, figsize, fontsize, filename, output_fig, verbose, **kwargs)
516 fig = None
517 elif runs > 1:
--> 518 all_loss = _multiple_runs_elbow_analysis(tensor=self.tensor,
519 upper_rank=upper_rank,
520 runs=runs,
521 tf_type=tf_type,
522 init=init,
523 svd=svd,
524 metric=metric,
525 random_state=random_state,
526 mask=mask,
527 n_iter_max=n_iter_max,
528 tol=tol,
529 verbose=verbose,
530 **kwargs
531 )
533 # Same outputs as runs = 1
534 loss = np.nanmean(all_loss, axis=0).tolist()
File /mambaforge/envs/scanpy/lib/python3.9/site-packages/cell2cell/tensor/factorization.py:342, in _multiple_runs_elbow_analysis(tensor, upper_rank, runs, tf_type, init, svd, metric, random_state, mask, n_iter_max, tol, verbose, **kwargs)
340 else:
341 rs = None
--> 342 tl_object, errors = _compute_tensor_factorization(tensor=tensor,
343 rank=r,
344 tf_type=tf_type,
345 init=init,
346 svd=svd,
347 random_state=rs,
348 mask=mask,
349 n_iter_max=n_iter_max,
350 tol=tol,
351 verbose=verbose,
352 **kwargs)
354 if metric == 'error':
355 # This helps to obtain proper error when the mask is not None.
356 if mask is None:
File /mambaforge/envs/scanpy/lib/python3.9/site-packages/cell2cell/tensor/factorization.py:92, in _compute_tensor_factorization(tensor, rank, tf_type, init, svd, random_state, mask, n_iter_max, tol, verbose, **kwargs)
89 return_errors = kwargs['return_errors']
91 if tf_type == 'non_negative_cp':
---> 92 cp_tf = non_negative_parafac(tensor=tensor,
93 rank=rank,
94 init='random' if mask is not None else init,
95 svd=svd,
96 random_state=random_state,
97 mask=mask,
98 n_iter_max=n_iter_max,
99 tol=tol,
100 verbose=verbose,
101 **kwargs)
103 if return_errors:
104 cp_tf, errors = cp_tf
File /mambaforge/envs/scanpy/lib/python3.9/site-packages/tensorly/decomposition/_nn_cp.py:126, in non_negative_parafac(tensor, rank, n_iter_max, init, svd, tol, random_state, verbose, normalize_factors, return_errors, mask, cvg_criterion, fixed_modes)
124 accum = tl.reshape(weights, (-1, 1)) * accum * tl.reshape(weights, (1, -1))
125 if mask is not None:
--> 126 tensor = tensor * mask + tl.cp_to_tensor(
127 (weights, factors), mask=1 - mask
128 )
130 mttkrp = unfolding_dot_khatri_rao(tensor, (weights, factors), mode)
132 numerator = tl.clip(mttkrp, a_min=epsilon, a_max=None)
File /mambaforge/envs/scanpy/lib/python3.9/site-packages/tensorly/cp_tensor.py:483, in cp_to_tensor(cp_tensor, mask)
478 full_tensor = T.dot(
479 factors[0] * weights, T.transpose(khatri_rao(factors, skip_matrix=0))
480 )
481 else:
482 full_tensor = T.sum(
--> 483 khatri_rao([factors[0] * weights] + factors[1:], mask=mask), axis=1
484 )
486 return fold(full_tensor, 0, shape)
File /mambaforge/envs/scanpy/lib/python3.9/site-packages/tensorly/backend/__init__.py:206, in BackendManager.dispatch_backend_method.<locals>.wrapped_backend_method(*args, **kwargs)
202 def wrapped_backend_method(*args, **kwargs):
203 """A dynamically dispatched method
204
205 Returns the queried method from the currently set backend"""
--> 206 return getattr(
207 cls._THREAD_LOCAL_DATA.__dict__.get("backend", cls._backend), name
208 )(*args, **kwargs)
File /mambaforge/envs/scanpy/lib/python3.9/site-packages/tensorly/tenalg/core_tenalg/_khatri_rao.py:101, in khatri_rao(matrices, weights, skip_matrix, reverse, mask)
98 matrices = matrices[::-1]
99 # Note: we do NOT use .reverse() which would reverse matrices even outside this function
--> 101 return T.kr(matrices, weights=weights, mask=mask)
File /mambaforge/envs/scanpy/lib/python3.9/site-packages/tensorly/backend/__init__.py:206, in BackendManager.dispatch_backend_method.<locals>.wrapped_backend_method(*args, **kwargs)
202 def wrapped_backend_method(*args, **kwargs):
203 """A dynamically dispatched method
204
205 Returns the queried method from the currently set backend"""
--> 206 return getattr(
207 cls._THREAD_LOCAL_DATA.__dict__.get("backend", cls._backend), name
208 )(*args, **kwargs)
File /mambaforge/envs/scanpy/lib/python3.9/site-packages/tensorly/backend/core.py:1097, in Backend.kr(self, matrices, weights, mask)
1093 res = self.reshape(a * b, (-1, n_col))
1095 m = self.reshape(mask, (-1, 1)) if mask is not None else 1
-> 1097 return res * m
OutOfMemoryError: CUDA out of memory. Tried to allocate 4.20 GiB (GPU 0; 15.90 GiB total capacity; 7.36 GiB already allocated; 3.66 GiB free; 11.41 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
What size is your tensor? this depends a lot on the size of the tensor because when unfolding it to performs underlying calculations can demand a lot of memory
I've experimented with different sizes, but still have the same issue. For example, a tensor of (42, 2628, 27, 27), which is roughly half the size I need, still runs out of memory. There is plenty of CPU memory available, so if it can be stored on CPU between iterations that would help. Seems to progressively accumulate each iteration. Otherwise, if it can be chunked differently for the GPU processes.
Can you try running the decomposition with a given number of factors (e.g. 10 factors) and see if you still get this error? If so, the issue is the memory that you have in your GPU, which from what I see is 16GB.
In my opinion, the issue is the size of your tensor and the limited memory in your GPU. I would recommend prioritizing ligand-receptor pairs (e.g. by signaling pathway, expression level, among other options) to reduce the number of interactions from 2,628 to ~200-500 pairs.