scanpy
scanpy copied to clipboard
rank_genes_groups_dotplot does not work when using reference and using rankby_abs, or setting; values_to_plot='logfoldchanges'
- [x] I have checked that this issue has not already been reported.
- [x] I have confirmed this bug exists on the latest version of scanpy.
- [ ] (optional) I have confirmed this bug exists on the master branch of scanpy.
Note: Please read this guide detailing how to provide the necessary information for us to reproduce your bug.
As the title says. A specific set of combinations of keywords to rank gene groups and plotting throws an error unexpectedly.
Minimal code sample (that we can copy&paste without having any data)
adata = sc.datasets.paul15()
sc.tl.rank_genes_groups(adata, groupby='paul15_clusters', key_added='GG', use_raw=False, reference='1Ery')
rax = sc.pl.rank_genes_groups_dotplot(adata, key='GG', # , rankby_abs= None,
n_genes=3, cmap='PiYG_r', swap_axes=True,
show=False, values_to_plot='logfoldchanges',
vmin=None, vmax=None)
WARNING: In Scanpy 0.*, this returned logarithmized data. Now it returns non-logarithmized data.
... storing 'paul15_clusters' as categorical
Trying to set attribute `.uns` of view, copying.
WARNING: Default of the method has been changed to 't-test' from 't-test_overestim_var'
WARNING: It seems you use rank_genes_groups on the raw count data. Please logarithmize your data before calling rank_genes_groups.
ERROR: the given dot_color_df data frame has a different shape thanthe data frame used for the dot size. Both data frames needto have the same index and columns
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-110-708ec3ea001f> in <module>
1 adata = sc.datasets.paul15()
2 sc.tl.rank_genes_groups(adata, groupby='paul15_clusters', key_added='GG', use_raw=False, reference='1Ery')
----> 3 rax = sc.pl.rank_genes_groups_dotplot(adata, key='GG', # , rankby_abs= None,
4 n_genes=3, cmap='PiYG_r', swap_axes=True,
5 show=False, values_to_plot='logfoldchanges',
~/.virtualenvs/pytorch_latest/lib/python3.8/site-packages/scanpy/plotting/_tools/__init__.py in rank_genes_groups_dotplot(adata, groups, n_genes, groupby, values_to_plot, var_names, gene_symbols, min_logfoldchange, key, show, save, return_fig, **kwds)
861 tl.rank_genes_groups
862 """
--> 863 return _rank_genes_groups_plot(
864 adata,
865 plot_type='dotplot',
~/.virtualenvs/pytorch_latest/lib/python3.8/site-packages/scanpy/plotting/_tools/__init__.py in _rank_genes_groups_plot(adata, plot_type, groups, n_genes, groupby, values_to_plot, var_names, min_logfoldchange, key, show, save, return_fig, gene_symbols, **kwds)
534 from .._dotplot import dotplot
535
--> 536 _pl = dotplot(
537 adata,
538 var_names,
~/.virtualenvs/pytorch_latest/lib/python3.8/site-packages/scanpy/plotting/_dotplot.py in dotplot(adata, var_names, groupby, use_raw, log, num_categories, expression_cutoff, mean_only_expressed, cmap, dot_max, dot_min, standard_scale, smallest_dot, title, colorbar_title, size_title, figsize, dendrogram, gene_symbols, var_group_positions, var_group_labels, var_group_rotation, layer, swap_axes, dot_color_df, show, save, ax, return_fig, vmin, vmax, vcenter, norm, **kwds)
940 del kwds['color_map']
941
--> 942 dp = DotPlot(
943 adata,
944 var_names,
~/.virtualenvs/pytorch_latest/lib/python3.8/site-packages/scanpy/plotting/_dotplot.py in __init__(self, adata, var_names, groupby, use_raw, log, num_categories, categories_order, title, figsize, gene_symbols, var_group_positions, var_group_labels, var_group_rotation, layer, expression_cutoff, mean_only_expressed, standard_scale, dot_color_df, dot_size_df, ax, vmin, vmax, vcenter, norm, **kwds)
215 # get the same order for rows and columns in the dot_color_df
216 # using the order from the doc_size_df
--> 217 dot_color_df = dot_color_df.loc[dot_size_df.index][dot_size_df.columns]
218
219 self.dot_color_df = dot_color_df
~/.virtualenvs/pytorch_latest/lib/python3.8/site-packages/pandas/core/indexing.py in __getitem__(self, key)
893
894 maybe_callable = com.apply_if_callable(key, self.obj)
--> 895 return self._getitem_axis(maybe_callable, axis=axis)
896
897 def _is_scalar_access(self, key: Tuple):
~/.virtualenvs/pytorch_latest/lib/python3.8/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
1111 raise ValueError("Cannot index with multidimensional key")
1112
-> 1113 return self._getitem_iterable(key, axis=axis)
1114
1115 # nested tuple slicing
~/.virtualenvs/pytorch_latest/lib/python3.8/site-packages/pandas/core/indexing.py in _getitem_iterable(self, key, axis)
1051
1052 # A collection of keys
-> 1053 keyarr, indexer = self._get_listlike_indexer(key, axis, raise_missing=False)
1054 return self.obj._reindex_with_indexers(
1055 {axis: [keyarr, indexer]}, copy=True, allow_dups=True
~/.virtualenvs/pytorch_latest/lib/python3.8/site-packages/pandas/core/indexing.py in _get_listlike_indexer(self, key, axis, raise_missing)
1264 keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
1265
-> 1266 self._validate_read_indexer(keyarr, indexer, axis, raise_missing=raise_missing)
1267 return keyarr, indexer
1268
~/.virtualenvs/pytorch_latest/lib/python3.8/site-packages/pandas/core/indexing.py in _validate_read_indexer(self, key, indexer, axis, raise_missing)
1319
1320 with option_context("display.max_seq_items", 10, "display.width", 80):
-> 1321 raise KeyError(
1322 "Passing list-likes to .loc or [] with any missing labels "
1323 "is no longer supported. "
KeyError: "Passing list-likes to .loc or [] with any missing labels is no longer supported. The following labels were missing: CategoricalIndex(['1Ery'], categories=['1Ery', '2Ery', '3Ery', '4Ery', '5Ery', '6Ery', '7MEP', '8Mk', ...], ordered=False, name='paul15_clusters', dtype='category'). See https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike"
Versions
sc.logging.print_versions()
WARNING: If you miss a compact list, please try print_header
!
anndata 0.7.6 scanpy 1.8.2 sinfo 0.3.1
PIL 8.2.0 anndata 0.7.6 autoreload NA backcall 0.2.0 cffi 1.14.5 configobj 5.0.6 cycler 0.10.0 cython_runtime NA dateutil 2.8.1 decorator 4.4.2 git 3.1.14 gitdb 4.0.7 google NA gpytorch 1.4.1 h5py 3.2.1 igraph 0.9.6 inferelator NA ipykernel 5.5.3 ipython_genutils 0.2.0 ipywidgets 7.6.3 jedi 0.18.0 joblib 1.0.1 kiwisolver 1.3.1 leidenalg 0.8.4 llvmlite 0.36.0 matplotlib 3.4.1 mpl_toolkits NA natsort 7.1.1 numba 0.53.1 numexpr 2.7.3 numpy 1.20.2 packaging 20.9 pandas 1.2.4 parso 0.8.2 pexpect 4.8.0 pickleshare 0.7.5 pkg_resources NA prompt_toolkit 3.0.18 ptyprocess 0.7.0 pycparser 2.20 pygments 2.8.1 pynndescent 0.5.2 pyparsing 2.4.7 pytz 2021.1 scanpy 1.8.2 scipy 1.6.3 seaborn 0.11.1 sinfo 0.3.1 sitecustomize NA six 1.15.0 sklearn 0.24.2 smmap 4.0.0 statsmodels 0.12.2 storemagic NA supirfactor NA tables 3.6.1 texttable 1.6.3 torch 1.9.0+cu102 tornado 6.1 tqdm 4.60.0 traitlets 5.0.5 typing_extensions NA umap 0.5.1 wcwidth 0.2.5 zmq 22.0.3
IPython 7.22.0 jupyter_client 6.1.12 jupyter_core 4.7.1 notebook 6.3.0
Python 3.8.5 (default, Jan 27 2021, 15:41:15) [GCC 9.3.0] Linux-5.4.0-91-generic-x86_64-with-glibc2.29 12 logical CPU cores, x86_64
Session information updated at 2021-12-10 17:16
Thanks for the report. I can broadly reproduce the error for passing values_to_plot
. The error I get is a little different, but I expect that's due to pandas versions.
A more minimal example:
import scanpy as sc
adata = sc.datasets.pbmc3k_processed().raw.to_adata()
sc.tl.rank_genes_groups(adata, groupby="louvain", reference="B cells")
# Errors with any of ['scores', 'logfoldchanges', 'pvals', 'pvals_adj','log10_pvals', 'log10_pvals_adj']
sc.pl.rank_genes_groups_dotplot(adata, values_to_plot='logfoldchanges')
Traceback
ERROR: the given dot_color_df data frame has a different shape thanthe data frame used for the dot size. Both data frames needto have the same index and columns
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
/var/folders/bd/43q20k0n6z15tdfzxvd22r7c0000gn/T/ipykernel_62013/1545772980.py in <module>
1 while len(possible_vals) > 0:
----> 2 sc.pl.rank_genes_groups_dotplot(adata, values_to_plot=possible_vals.pop())
3
~/github/scanpy/scanpy/plotting/_tools/__init__.py in rank_genes_groups_dotplot(adata, groups, n_genes, groupby, values_to_plot, var_names, gene_symbols, min_logfoldchange, key, show, save, return_fig, **kwds)
861 tl.rank_genes_groups
862 """
--> 863 return _rank_genes_groups_plot(
864 adata,
865 plot_type='dotplot',
~/github/scanpy/scanpy/plotting/_tools/__init__.py in _rank_genes_groups_plot(adata, plot_type, groups, n_genes, groupby, values_to_plot, var_names, min_logfoldchange, key, show, save, return_fig, gene_symbols, **kwds)
534 from .._dotplot import dotplot
535
--> 536 _pl = dotplot(
537 adata,
538 var_names,
~/github/scanpy/scanpy/plotting/_dotplot.py in dotplot(adata, var_names, groupby, use_raw, log, num_categories, expression_cutoff, mean_only_expressed, cmap, dot_max, dot_min, standard_scale, smallest_dot, title, colorbar_title, size_title, figsize, dendrogram, gene_symbols, var_group_positions, var_group_labels, var_group_rotation, layer, swap_axes, dot_color_df, show, save, ax, return_fig, vmin, vmax, vcenter, norm, **kwds)
940 del kwds['color_map']
941
--> 942 dp = DotPlot(
943 adata,
944 var_names,
~/github/scanpy/scanpy/plotting/_dotplot.py in __init__(self, adata, var_names, groupby, use_raw, log, num_categories, categories_order, title, figsize, gene_symbols, var_group_positions, var_group_labels, var_group_rotation, layer, expression_cutoff, mean_only_expressed, standard_scale, dot_color_df, dot_size_df, ax, vmin, vmax, vcenter, norm, **kwds)
215 # get the same order for rows and columns in the dot_color_df
216 # using the order from the doc_size_df
--> 217 dot_color_df = dot_color_df.loc[dot_size_df.index][dot_size_df.columns]
218
219 self.dot_color_df = dot_color_df
/usr/local/lib/python3.9/site-packages/pandas/core/indexing.py in __getitem__(self, key)
929
930 maybe_callable = com.apply_if_callable(key, self.obj)
--> 931 return self._getitem_axis(maybe_callable, axis=axis)
932
933 def _is_scalar_access(self, key: tuple):
/usr/local/lib/python3.9/site-packages/pandas/core/indexing.py in _getitem_axis(self, key, axis)
1151 raise ValueError("Cannot index with multidimensional key")
1152
-> 1153 return self._getitem_iterable(key, axis=axis)
1154
1155 # nested tuple slicing
/usr/local/lib/python3.9/site-packages/pandas/core/indexing.py in _getitem_iterable(self, key, axis)
1091
1092 # A collection of keys
-> 1093 keyarr, indexer = self._get_listlike_indexer(key, axis)
1094 return self.obj._reindex_with_indexers(
1095 {axis: [keyarr, indexer]}, copy=True, allow_dups=True
/usr/local/lib/python3.9/site-packages/pandas/core/indexing.py in _get_listlike_indexer(self, key, axis)
1312 keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)
1313
-> 1314 self._validate_read_indexer(keyarr, indexer, axis)
1315
1316 if needs_i8_conversion(ax.dtype) or isinstance(
/usr/local/lib/python3.9/site-packages/pandas/core/indexing.py in _validate_read_indexer(self, key, indexer, axis)
1375
1376 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
-> 1377 raise KeyError(f"{not_found} not in index")
1378
1379
KeyError: "['B cells'] not in index"
For rankby_abs
it does error, but is that a valid argument to pass to this function?
I "fixed" the issue I had by eddting the _dotplot.py module editing the DotPlot class. Switching the top line for the bottom line.
# dot_color_df = dot_color_df.loc[dot_size_df.index][dot_size_df.columns]
dot_color_df = dot_color_df.reindex(dot_size_df.index).reindex(columns=dot_size_df.columns)
I'm not sure the output is what is desired but for my case at least it is the same for cases where it wored before.
I will retract the above snippet. It let's the function work for what it worked for before but the new results are nonsense.