scanpy icon indicating copy to clipboard operation
scanpy copied to clipboard

rank_genes_groups_dotplot does not work when using reference and using rankby_abs, or setting; values_to_plot='logfoldchanges'

Open Xparx opened this issue 2 years ago • 3 comments

  • [x] I have checked that this issue has not already been reported.
  • [x] I have confirmed this bug exists on the latest version of scanpy.
  • [ ] (optional) I have confirmed this bug exists on the master branch of scanpy.

Note: Please read this guide detailing how to provide the necessary information for us to reproduce your bug.

As the title says. A specific set of combinations of keywords to rank gene groups and plotting throws an error unexpectedly.

Minimal code sample (that we can copy&paste without having any data)

adata = sc.datasets.paul15()
sc.tl.rank_genes_groups(adata, groupby='paul15_clusters', key_added='GG', use_raw=False, reference='1Ery')
rax = sc.pl.rank_genes_groups_dotplot(adata, key='GG', # , rankby_abs= None,
                                      n_genes=3, cmap='PiYG_r', swap_axes=True,
                                      show=False, values_to_plot='logfoldchanges',
                                      vmin=None, vmax=None)
WARNING: In Scanpy 0.*, this returned logarithmized data. Now it returns non-logarithmized data.
... storing 'paul15_clusters' as categorical
Trying to set attribute `.uns` of view, copying.
WARNING: Default of the method has been changed to 't-test' from 't-test_overestim_var'
WARNING: It seems you use rank_genes_groups on the raw count data. Please logarithmize your data before calling rank_genes_groups.
ERROR: the given dot_color_df data frame has a different shape thanthe data frame used for the dot size. Both data frames needto have the same index and columns
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-110-708ec3ea001f> in <module>
      1 adata = sc.datasets.paul15()
      2 sc.tl.rank_genes_groups(adata, groupby='paul15_clusters', key_added='GG', use_raw=False, reference='1Ery')
----> 3 rax = sc.pl.rank_genes_groups_dotplot(adata, key='GG', # , rankby_abs= None,
      4                                       n_genes=3, cmap='PiYG_r', swap_axes=True,
      5                                       show=False, values_to_plot='logfoldchanges',

~/.virtualenvs/pytorch_latest/lib/python3.8/site-packages/scanpy/plotting/_tools/__init__.py in rank_genes_groups_dotplot(adata, groups, n_genes, groupby, values_to_plot, var_names, gene_symbols, min_logfoldchange, key, show, save, return_fig, **kwds)
    861     tl.rank_genes_groups
    862     """
--> 863     return _rank_genes_groups_plot(
    864         adata,
    865         plot_type='dotplot',

~/.virtualenvs/pytorch_latest/lib/python3.8/site-packages/scanpy/plotting/_tools/__init__.py in _rank_genes_groups_plot(adata, plot_type, groups, n_genes, groupby, values_to_plot, var_names, min_logfoldchange, key, show, save, return_fig, gene_symbols, **kwds)
    534             from .._dotplot import dotplot
    535 
--> 536             _pl = dotplot(
    537                 adata,
    538                 var_names,

~/.virtualenvs/pytorch_latest/lib/python3.8/site-packages/scanpy/plotting/_dotplot.py in dotplot(adata, var_names, groupby, use_raw, log, num_categories, expression_cutoff, mean_only_expressed, cmap, dot_max, dot_min, standard_scale, smallest_dot, title, colorbar_title, size_title, figsize, dendrogram, gene_symbols, var_group_positions, var_group_labels, var_group_rotation, layer, swap_axes, dot_color_df, show, save, ax, return_fig, vmin, vmax, vcenter, norm, **kwds)
    940         del kwds['color_map']
    941 
--> 942     dp = DotPlot(
    943         adata,
    944         var_names,

~/.virtualenvs/pytorch_latest/lib/python3.8/site-packages/scanpy/plotting/_dotplot.py in __init__(self, adata, var_names, groupby, use_raw, log, num_categories, categories_order, title, figsize, gene_symbols, var_group_positions, var_group_labels, var_group_rotation, layer, expression_cutoff, mean_only_expressed, standard_scale, dot_color_df, dot_size_df, ax, vmin, vmax, vcenter, norm, **kwds)
    215             # get the same order for rows and columns in the dot_color_df
    216             # using the order from the doc_size_df
--> 217             dot_color_df = dot_color_df.loc[dot_size_df.index][dot_size_df.columns]
    218 
    219         self.dot_color_df = dot_color_df

~/.virtualenvs/pytorch_latest/lib/python3.8/site-packages/pandas/core/indexing.py in __getitem__(self, key)
    893 
    894             maybe_callable = com.apply_if_callable(key, self.obj)
--> 895             return self._getitem_axis(maybe_callable, axis=axis)
    896 
    897     def _is_scalar_access(self, key: Tuple):

~/.virtualenvs/pytorch_latest/lib/python3.8/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
   1111                     raise ValueError("Cannot index with multidimensional key")
   1112 
-> 1113                 return self._getitem_iterable(key, axis=axis)
   1114 
   1115             # nested tuple slicing

~/.virtualenvs/pytorch_latest/lib/python3.8/site-packages/pandas/core/indexing.py in _getitem_iterable(self, key, axis)
   1051 
   1052         # A collection of keys
-> 1053         keyarr, indexer = self._get_listlike_indexer(key, axis, raise_missing=False)
   1054         return self.obj._reindex_with_indexers(
   1055             {axis: [keyarr, indexer]}, copy=True, allow_dups=True

~/.virtualenvs/pytorch_latest/lib/python3.8/site-packages/pandas/core/indexing.py in _get_listlike_indexer(self, key, axis, raise_missing)
   1264             keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
   1265 
-> 1266         self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing)
   1267         return keyarr, indexer
   1268 

~/.virtualenvs/pytorch_latest/lib/python3.8/site-packages/pandas/core/indexing.py in _validate_read_indexer(self, key, indexer, axis, raise_missing)
   1319 
   1320             with option_context("display.max_seq_items", 10, "display.width", 80):
-> 1321                 raise KeyError(
   1322                     "Passing list-likes to .loc or [] with any missing labels "
   1323                     "is no longer supported. "

KeyError: "Passing list-likes to .loc or [] with any missing labels is no longer supported. The following labels were missing: CategoricalIndex(['1Ery'], categories=['1Ery', '2Ery', '3Ery', '4Ery', '5Ery', '6Ery', '7MEP', '8Mk', ...], ordered=False, name='paul15_clusters', dtype='category'). See https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike"

Versions

sc.logging.print_versions() WARNING: If you miss a compact list, please try print_header!

anndata 0.7.6 scanpy 1.8.2 sinfo 0.3.1

PIL 8.2.0 anndata 0.7.6 autoreload NA backcall 0.2.0 cffi 1.14.5 configobj 5.0.6 cycler 0.10.0 cython_runtime NA dateutil 2.8.1 decorator 4.4.2 git 3.1.14 gitdb 4.0.7 google NA gpytorch 1.4.1 h5py 3.2.1 igraph 0.9.6 inferelator NA ipykernel 5.5.3 ipython_genutils 0.2.0 ipywidgets 7.6.3 jedi 0.18.0 joblib 1.0.1 kiwisolver 1.3.1 leidenalg 0.8.4 llvmlite 0.36.0 matplotlib 3.4.1 mpl_toolkits NA natsort 7.1.1 numba 0.53.1 numexpr 2.7.3 numpy 1.20.2 packaging 20.9 pandas 1.2.4 parso 0.8.2 pexpect 4.8.0 pickleshare 0.7.5 pkg_resources NA prompt_toolkit 3.0.18 ptyprocess 0.7.0 pycparser 2.20 pygments 2.8.1 pynndescent 0.5.2 pyparsing 2.4.7 pytz 2021.1 scanpy 1.8.2 scipy 1.6.3 seaborn 0.11.1 sinfo 0.3.1 sitecustomize NA six 1.15.0 sklearn 0.24.2 smmap 4.0.0 statsmodels 0.12.2 storemagic NA supirfactor NA tables 3.6.1 texttable 1.6.3 torch 1.9.0+cu102 tornado 6.1 tqdm 4.60.0 traitlets 5.0.5 typing_extensions NA umap 0.5.1 wcwidth 0.2.5 zmq 22.0.3

IPython 7.22.0 jupyter_client 6.1.12 jupyter_core 4.7.1 notebook 6.3.0

Python 3.8.5 (default, Jan 27 2021, 15:41:15) [GCC 9.3.0] Linux-5.4.0-91-generic-x86_64-with-glibc2.29 12 logical CPU cores, x86_64

Session information updated at 2021-12-10 17:16

Xparx avatar Dec 10 '21 22:12 Xparx

Thanks for the report. I can broadly reproduce the error for passing values_to_plot. The error I get is a little different, but I expect that's due to pandas versions.

A more minimal example:

import scanpy as sc

adata = sc.datasets.pbmc3k_processed().raw.to_adata()
sc.tl.rank_genes_groups(adata, groupby="louvain", reference="B cells")

# Errors with any of  ['scores', 'logfoldchanges', 'pvals', 'pvals_adj','log10_pvals', 'log10_pvals_adj']
sc.pl.rank_genes_groups_dotplot(adata, values_to_plot='logfoldchanges')
Traceback
ERROR: the given dot_color_df data frame has a different shape thanthe data frame used for the dot size. Both data frames needto have the same index and columns
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/var/folders/bd/43q20k0n6z15tdfzxvd22r7c0000gn/T/ipykernel_62013/1545772980.py in <module>
      1 while len(possible_vals) > 0:
----> 2     sc.pl.rank_genes_groups_dotplot(adata, values_to_plot=possible_vals.pop())
      3 

~/github/scanpy/scanpy/plotting/_tools/__init__.py in rank_genes_groups_dotplot(adata, groups, n_genes, groupby, values_to_plot, var_names, gene_symbols, min_logfoldchange, key, show, save, return_fig, **kwds)
    861     tl.rank_genes_groups
    862     """
--> 863     return _rank_genes_groups_plot(
    864         adata,
    865         plot_type='dotplot',

~/github/scanpy/scanpy/plotting/_tools/__init__.py in _rank_genes_groups_plot(adata, plot_type, groups, n_genes, groupby, values_to_plot, var_names, min_logfoldchange, key, show, save, return_fig, gene_symbols, **kwds)
    534             from .._dotplot import dotplot
    535 
--> 536             _pl = dotplot(
    537                 adata,
    538                 var_names,

~/github/scanpy/scanpy/plotting/_dotplot.py in dotplot(adata, var_names, groupby, use_raw, log, num_categories, expression_cutoff, mean_only_expressed, cmap, dot_max, dot_min, standard_scale, smallest_dot, title, colorbar_title, size_title, figsize, dendrogram, gene_symbols, var_group_positions, var_group_labels, var_group_rotation, layer, swap_axes, dot_color_df, show, save, ax, return_fig, vmin, vmax, vcenter, norm, **kwds)
    940         del kwds['color_map']
    941 
--> 942     dp = DotPlot(
    943         adata,
    944         var_names,

~/github/scanpy/scanpy/plotting/_dotplot.py in __init__(self, adata, var_names, groupby, use_raw, log, num_categories, categories_order, title, figsize, gene_symbols, var_group_positions, var_group_labels, var_group_rotation, layer, expression_cutoff, mean_only_expressed, standard_scale, dot_color_df, dot_size_df, ax, vmin, vmax, vcenter, norm, **kwds)
    215             # get the same order for rows and columns in the dot_color_df
    216             # using the order from the doc_size_df
--> 217             dot_color_df = dot_color_df.loc[dot_size_df.index][dot_size_df.columns]
    218 
    219         self.dot_color_df = dot_color_df

/usr/local/lib/python3.9/site-packages/pandas/core/indexing.py in __getitem__(self, key)
    929 
    930             maybe_callable = com.apply_if_callable(key, self.obj)
--> 931             return self._getitem_axis(maybe_callable, axis=axis)
    932 
    933     def _is_scalar_access(self, key: tuple):

/usr/local/lib/python3.9/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
   1151                     raise ValueError("Cannot index with multidimensional key")
   1152 
-> 1153                 return self._getitem_iterable(key, axis=axis)
   1154 
   1155             # nested tuple slicing

/usr/local/lib/python3.9/site-packages/pandas/core/indexing.py in _getitem_iterable(self, key, axis)
   1091 
   1092         # A collection of keys
-> 1093         keyarr, indexer = self._get_listlike_indexer(key, axis)
   1094         return self.obj._reindex_with_indexers(
   1095             {axis: [keyarr, indexer]}, copy=True, allow_dups=True

/usr/local/lib/python3.9/site-packages/pandas/core/indexing.py in _get_listlike_indexer(self, key, axis)
   1312             keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
   1313 
-> 1314         self._validate_read_indexer(keyarr, indexer, axis)
   1315 
   1316         if needs_i8_conversion(ax.dtype) or isinstance(

/usr/local/lib/python3.9/site-packages/pandas/core/indexing.py in _validate_read_indexer(self, key, indexer, axis)
   1375 
   1376             not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
-> 1377             raise KeyError(f"{not_found} not in index")
   1378 
   1379 

KeyError: "['B cells'] not in index"

For rankby_abs it does error, but is that a valid argument to pass to this function?

ivirshup avatar Dec 10 '21 23:12 ivirshup

I "fixed" the issue I had by eddting the _dotplot.py module editing the DotPlot class. Switching the top line for the bottom line.

# dot_color_df = dot_color_df.loc[dot_size_df.index][dot_size_df.columns]
dot_color_df = dot_color_df.reindex(dot_size_df.index).reindex(columns=dot_size_df.columns)

I'm not sure the output is what is desired but for my case at least it is the same for cases where it wored before.

Xparx avatar Dec 11 '21 19:12 Xparx

I will retract the above snippet. It let's the function work for what it worked for before but the new results are nonsense.

Xparx avatar Dec 17 '21 19:12 Xparx