Palantir icon indicating copy to clipboard operation
Palantir copied to clipboard

difference b/w index and start_cell / error when getting gene_trends

Open kxxxjo opened this issue 2 years ago • 3 comments

Hi all,

first of all thanks for providing useful tools!

I have some questions about running palantir.

First, When I assign terminal_states for running palantir.core.run_palantir, I'm confusing the index parameter and start_cell. I understood that the start_cell was chosen based on high expression of marker genes, but which cells should I set as index? and What is the difference between index and start_cell?

below is the my code:

## Running Palantir 
tsne = palantir.utils.run_tsne(ms_data)
terminal_states = pd.Series(['Type I', 'Type II', 'Type III'], 
                           index=['#######1, '#######2', '#######3',

start_cell = '#######','

pr_res = palantir.core.run_palantir(ms_data, start_cell, num_waypoints=500, terminal_states=terminal_states.index)

pr_res.branch_probs.columns = terminal_states[pr_res.branch_probs.columns]

palantir.plot.plot_palantir_results(pr_res, umap)

cells = ['#######1', '#######2', '#######3', ]
palantir.plot.plot_terminal_state_probs(pr_res, cells) 

Second, When I run palantir.presults.compute_gene_trends to get gene _trends, the error occurred.

## gene expression trends
genes = ['XXXX', 'YYYY']

imp_df = pd.DataFrame(adata[:, genes].layers['MAGIC_imputed_data'],
                     index=adata.obs_names, columns=genes)

gene_trends = palantir.presults.compute_gene_trends(pr_res, imp_df.loc[:, genes])

Type I
---------------------------------------------------------------------------
_RemoteTraceback                          Traceback (most recent call last)
_RemoteTraceback: 
"""
Traceback (most recent call last):
  File "/Users/kj/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/joblib/externals/loky/process_executor.py", line 431, in _process_worker
    r = call_item()
  File "/Users/kj/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/joblib/externals/loky/process_executor.py", line 285, in __call__
    return self.fn(*self.args, **self.kwargs)
  File "/Users/kj/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/joblib/_parallel_backends.py", line 595, in __call__
    return self.func(*args, **kwargs)
  File "/Users/kj/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/joblib/parallel.py", line 262, in __call__
    return [func(*args, **kwargs)
  File "/Users/kj/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/joblib/parallel.py", line 262, in <listcomp>
    return [func(*args, **kwargs)
  File "/Users/kj/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/palantir/presults.py", line 146, in _gam_fit_predict
    y_pred = gam.predict(pred_x)
  File "/Users/kj/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/pygam/pygam.py", line 434, in predict
    return self.predict_mu(X)
  File "/Users/kj/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/pygam/pygam.py", line 412, in predict_mu
    X = check_X(X, n_feats=self.statistics_['m_features'],
  File "/Users/kj/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/pygam/utils.py", line 272, in check_X
    X = check_array(X, force_2d=True, n_feats=n_feats, min_samples=min_samples,
  File "/Users/kj/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/pygam/utils.py", line 171, in check_array
    raise ValueError('{} must not contain Inf nor NaN'.format(name))
ValueError: X data must not contain Inf nor NaN
"""

The above exception was the direct cause of the following exception:

ValueError                                Traceback (most recent call last)
<ipython-input-90-db867106a17b> in <module>
----> 1 gene_trends = palantir.presults.compute_gene_trends(pr_res, imp_df.loc[:, genes])

~/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/palantir/presults.py in compute_gene_trends(pr_res, gene_exprs, lineages, n_jobs)
    108         weights = pr_res.branch_probs.loc[gene_exprs.index, branch].values
    109         bins = np.array(results[branch]["trends"].columns)
--> 110         res = Parallel(n_jobs=n_jobs)(
    111             delayed(_gam_fit_predict)(
    112                 pr_res.pseudotime[gene_exprs.index].values,

~/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/joblib/parallel.py in __call__(self, iterable)
   1052 
   1053             with self._backend.retrieval_context():
-> 1054                 self.retrieve()
   1055             # Make sure that we get a last message telling us we are done
   1056             elapsed_time = time.time() - self._start_time

~/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/joblib/parallel.py in retrieve(self)
    931             try:
    932                 if getattr(self._backend, 'supports_timeout', False):
--> 933                     self._output.extend(job.get(timeout=self.timeout))
    934                 else:
    935                     self._output.extend(job.get())

~/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/joblib/_parallel_backends.py in wrap_future_result(future, timeout)
    540         AsyncResults.get from multiprocessing."""
    541         try:
--> 542             return future.result(timeout=timeout)
    543         except CfTimeoutError as e:
    544             raise TimeoutError from e

~/opt/miniconda3/envs/scanpy/lib/python3.8/concurrent/futures/_base.py in result(self, timeout)
    437                 raise CancelledError()
    438             elif self._state == FINISHED:
--> 439                 return self.__get_result()
    440             else:
    441                 raise TimeoutError()

~/opt/miniconda3/envs/scanpy/lib/python3.8/concurrent/futures/_base.py in __get_result(self)
    386     def __get_result(self):
    387         if self._exception:
--> 388             raise self._exception
    389         else:
    390             return self._result

ValueError: X data must not contain Inf nor NaN

could you give me a solution for my questions, please?

Thanks!

Best, KJ

kxxxjo avatar Jun 17 '22 06:06 kxxxjo

terminal_states should be a pandas.Series where the index are the cell identifiers i.e, entries in ad.obs_names and the values should be name of branches.

For question 2, can you try with n_jobs=1. I hope this will give a more debuggable error message.

ManuSetty avatar Jun 17 '22 22:06 ManuSetty

Hi Setty, thanks for reply!

In the tutorial (https://github.com/dpeerlab/Palantir/blob/master/notebooks/Palantir_sample_notebook.ipynb), 3 specific cells (index=['Run5_131097901611291', 'Run5_134936662236454', 'Run4_200562869397916']) were specified for the index, but I am curious as to what criteria it was determined based on.

and I tried to run gene_trends = palantir.presults.compute_gene_trends(pr_res, imp_df.loc[:, genes], n_jobs=1). But same error still occurred.

Type I
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-107-677ddefb558a> in <module>
----> 1 gene_trends = palantir.presults.compute_gene_trends(pr_res, imp_df.loc[:, genes], n_jobs=1)

~/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/palantir/presults.py in compute_gene_trends(pr_res, gene_exprs, lineages, n_jobs)
    108         weights = pr_res.branch_probs.loc[gene_exprs.index, branch].values
    109         bins = np.array(results[branch]["trends"].columns)
--> 110         res = Parallel(n_jobs=n_jobs)(
    111             delayed(_gam_fit_predict)(
    112                 pr_res.pseudotime[gene_exprs.index].values,

~/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/joblib/parallel.py in __call__(self, iterable)
   1039             # remaining jobs.
   1040             self._iterating = False
-> 1041             if self.dispatch_one_batch(iterator):
   1042                 self._iterating = self._original_iterator is not None
   1043 

~/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/joblib/parallel.py in dispatch_one_batch(self, iterator)
    857                 return False
    858             else:
--> 859                 self._dispatch(tasks)
    860                 return True
    861 

~/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/joblib/parallel.py in _dispatch(self, batch)
    775         with self._lock:
    776             job_idx = len(self._jobs)
--> 777             job = self._backend.apply_async(batch, callback=cb)
    778             # A job can complete so quickly than its callback is
    779             # called before we get here, causing self._jobs to

~/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/joblib/_parallel_backends.py in apply_async(self, func, callback)
    206     def apply_async(self, func, callback=None):
    207         """Schedule a func to be run"""
--> 208         result = ImmediateResult(func)
    209         if callback:
    210             callback(result)

~/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/joblib/_parallel_backends.py in __init__(self, batch)
    570         # Don't delay the application, to avoid keeping the input
    571         # arguments in memory
--> 572         self.results = batch()
    573 
    574     def get(self):

~/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/joblib/parallel.py in __call__(self)
    260         # change the default number of processes to -1
    261         with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262             return [func(*args, **kwargs)
    263                     for func, args, kwargs in self.items]
    264 

~/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/joblib/parallel.py in <listcomp>(.0)
    260         # change the default number of processes to -1
    261         with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 262             return [func(*args, **kwargs)
    263                     for func, args, kwargs in self.items]
    264 

~/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/palantir/presults.py in _gam_fit_predict(x, y, weights, pred_x)
    144     if pred_x is None:
    145         pred_x = x
--> 146     y_pred = gam.predict(pred_x)
    147 
    148     # Standard deviations

~/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/pygam/pygam.py in predict(self, X)
    432             containing predicted values under the model
    433         """
--> 434         return self.predict_mu(X)
    435 
    436     def _modelmat(self, X, term=-1):

~/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/pygam/pygam.py in predict_mu(self, X)
    410             raise AttributeError('GAM has not been fitted. Call fit first.')
    411 
--> 412         X = check_X(X, n_feats=self.statistics_['m_features'],
    413                     edge_knots=self.edge_knots_, dtypes=self.dtype,
    414                     features=self.feature, verbose=self.verbose)

~/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/pygam/utils.py in check_X(X, n_feats, min_samples, edge_knots, dtypes, features, verbose)
    270 
    271     # basic diagnostics
--> 272     X = check_array(X, force_2d=True, n_feats=n_feats, min_samples=min_samples,
    273                     name='X data', verbose=verbose)
    274 

~/opt/miniconda3/envs/scanpy/lib/python3.8/site-packages/pygam/utils.py in check_array(array, force_2d, n_feats, ndim, min_samples, name, verbose)
    169     # check finite
    170     if not(np.isfinite(array).all()):
--> 171         raise ValueError('{} must not contain Inf nor NaN'.format(name))
    172 
    173     # check ndim

ValueError: X data must not contain Inf nor NaN

I also tried with n_jobs=2,3... but it was the same.

Could you help me more to fix them, please?

Thanks.

Best, KJ

kxxxjo avatar Jun 19 '22 02:06 kxxxjo

  1. The criteria for determination of index / terminal cells is prior biological knowledge and expression of known markers of terminal states. One can pick the extreme of diffusion component with the highest expression of known markers to be the terminal states
  2. For the trend computation, it appears that you might have some NAs in imputed data. Perhaps you can check using np.isnan

ManuSetty avatar Jun 21 '22 23:06 ManuSetty

I also get a similar error with the trend computation. I have checked for NAs and Infs and zeros. I have tried to create a df of random floats (no NAs nan Inf Zeros) to input instead and still the same result. Any idea what could be the problem? I installed new from from git clone yesterday. All my data comes straight from 10x output. I am wondering could the NA be from the pr_res data? Thanks so much.


_RemoteTraceback Traceback (most recent call last) _RemoteTraceback: """ Traceback (most recent call last): File "/home/w/anaconda3/envs/env_palantir/lib/python3.7/site-packages/joblib/externals/loky/process_executor.py", line 428, in _process_worker r = call_item() File "/home/w/anaconda3/envs/env_palantir/lib/python3.7/site-packages/joblib/externals/loky/process_executor.py", line 275, in call return self.fn(*self.args, **self.kwargs) File "/home/w/anaconda3/envs/env_palantir/lib/python3.7/site-packages/joblib/_parallel_backends.py", line 620, in call return self.func(*args, **kwargs) File "/home/w/anaconda3/envs/env_palantir/lib/python3.7/site-packages/joblib/parallel.py", line 289, in call for func, args, kwargs in self.items] File "/home/w/anaconda3/envs/env_palantir/lib/python3.7/site-packages/joblib/parallel.py", line 289, in for func, args, kwargs in self.items] File "/home/w/anaconda3/envs/env_palantir/lib/python3.7/site-packages/palantir/presults.py", line 162, in gam_fit_predict y_pred = gam.predict(pred_x) File "/home/w/anaconda3/envs/env_palantir/lib/python3.7/site-packages/pygam/pygam.py", line 434, in predict return self.predict_mu(X) File "/home/w/anaconda3/envs/env_palantir/lib/python3.7/site-packages/pygam/pygam.py", line 414, in predict_mu features=self.feature, verbose=self.verbose) File "/home/w/anaconda3/envs/env_palantir/lib/python3.7/site-packages/pygam/utils.py", line 273, in check_X name='X data', verbose=verbose) File "/home/w/anaconda3/envs/env_palantir/lib/python3.7/site-packages/pygam/utils.py", line 171, in check_array raise ValueError('{} must not contain Inf nor NaN'.format(name)) ValueError: X data must not contain Inf nor NaN """

The above exception was the direct cause of the following exception:

ValueError Traceback (most recent call last) /tmp/ipykernel_1341918/1218618491.py in ----> 1 gene_trends = palantir.presults.compute_gene_trends(pr_res, imp_df)

~/anaconda3/envs/env_palantir/lib/python3.7/site-packages/palantir/presults.py in compute_gene_trends(pr_res, gene_exprs, lineages, n_splines, spline_order, n_jobs) 120 spline_order 121 ) --> 122 for gene in gene_exprs.columns 123 ) 124

~/anaconda3/envs/env_palantir/lib/python3.7/site-packages/joblib/parallel.py in call(self, iterable) 1096 1097 with self._backend.retrieval_context(): -> 1098 self.retrieve() 1099 # Make sure that we get a last message telling us we are done 1100 elapsed_time = time.time() - self._start_time

~/anaconda3/envs/env_palantir/lib/python3.7/site-packages/joblib/parallel.py in retrieve(self) 973 try: 974 if getattr(self._backend, 'supports_timeout', False): --> 975 self._output.extend(job.get(timeout=self.timeout)) 976 else: 977 self._output.extend(job.get())

~/anaconda3/envs/env_palantir/lib/python3.7/site-packages/joblib/_parallel_backends.py in wrap_future_result(future, timeout) 565 AsyncResults.get from multiprocessing.""" 566 try: --> 567 return future.result(timeout=timeout) 568 except CfTimeoutError as e: 569 raise TimeoutError from e

~/anaconda3/envs/env_palantir/lib/python3.7/concurrent/futures/_base.py in result(self, timeout) 433 raise CancelledError() 434 elif self._state == FINISHED: --> 435 return self.__get_result() 436 else: 437 raise TimeoutError()

~/anaconda3/envs/env_palantir/lib/python3.7/concurrent/futures/_base.py in __get_result(self) 382 def __get_result(self): 383 if self._exception: --> 384 raise self._exception 385 else: 386 return self._result

ValueError: X data must not contain Inf nor NaN

widsquid avatar Feb 17 '23 01:02 widsquid