anndata icon indicating copy to clipboard operation
anndata copied to clipboard

Errors caused by X when subsetting AnnData object

Open jonas2612 opened this issue 1 year ago • 7 comments

Please make sure these conditions are met

  • [X] I have checked that this issue has not already been reported.
  • [X] I have confirmed this bug exists on the latest version of anndata.
  • [ ] (optional) I have confirmed this bug exists on the master branch of anndata.

Report

I haven't been able to reproduce the error(s) on a smaller example. The dataset can be downloaded and assembled from here https://shendure-web.gs.washington.edu/content/members/cxqiu/public/nobackup/jax/download/adata/. X is in csr und experimental_batch is saved as categories. I receive two different error messages dependent on subsetting by equality or inequality

Code:

import scanpy as sc

adata = sc.read('/lustre/groups/ml01/workspace/monge_velo/data/mouse_gastrulation_atlas.h5ad')
adata_tmp1 = adata[adata.obs["experimental_batch"]!='run_22']
adata_tmp1.X

Traceback:

File ~/mambaforge/envs/atlas_pca/lib/python3.10/site-packages/anndata/_core/anndata.py:612, in AnnData.X(self)
    609     X = None
    610 elif self.is_view:
    611     X = as_view(
--> 612         _subset(self._adata_ref.X, (self._oidx, self._vidx)),
    613         ElementRef(self, "X"),
    614     )
    615 else:
    616     X = self._X

File ~/mambaforge/envs/atlas_pca/lib/python3.10/functools.py:889, in singledispatch.<locals>.wrapper(*args, **kw)
    885 if not args:
    886     raise TypeError(f'{funcname} requires at least '
    887                     '1 positional argument')
--> 889 return dispatch(args[0].__class__)(*args, **kw)

File ~/mambaforge/envs/atlas_pca/lib/python3.10/site-packages/anndata/_core/index.py:140, in _subset_spmatrix(a, subset_idx)
    138 if len(subset_idx) > 1 and all(isinstance(x, cabc.Iterable) for x in subset_idx):
    139     subset_idx = (subset_idx[0].reshape(-1, 1), *subset_idx[1:])
--> 140 return a[subset_idx]

File ~/mambaforge/envs/atlas_pca/lib/python3.10/site-packages/scipy/sparse/_index.py:75, in IndexMixin.__getitem__(self, key)
     73         return self._get_arrayXint(row, col)
     74     elif isinstance(col, slice):
---> 75         return self._get_arrayXslice(row, col)
     76 else:  # row.ndim == 2
     77     if isinstance(col, INT_TYPES):

File ~/mambaforge/envs/atlas_pca/lib/python3.10/site-packages/scipy/sparse/_csr.py:335, in _csr_base._get_arrayXslice(self, row, col)
    333     col = np.arange(*col.indices(self.shape[1]))
    334     return self._get_arrayXarray(row, col)
--> 335 return self._major_index_fancy(row)._get_submatrix(minor=col)

File ~/mambaforge/envs/atlas_pca/lib/python3.10/site-packages/scipy/sparse/_compressed.py:705, in _cs_matrix._major_index_fancy(self, idx)
    702 np.cumsum(row_nnz, out=res_indptr[1:])
    704 nnz = res_indptr[-1]
--> 705 res_indices = np.empty(nnz, dtype=idx_dtype)
    706 res_data = np.empty(nnz, dtype=self.dtype)
    707 csr_row_index(M, indices, self.indptr, self.indices, self.data,
    708               res_indices, res_data)

ValueError: negative dimensions are not allowed

Code:

import scanpy as sc

adata = sc.read('/lustre/groups/ml01/workspace/monge_velo/data/mouse_gastrulation_atlas.h5ad')
adata_tmp2 = adata[adata.obs["experimental_batch"]=='run_22']
adata_tmp2.X

Traceback:

File ~/mambaforge/envs/atlas_pca/lib/python3.10/site-packages/anndata/_core/anndata.py:612, in AnnData.X(self)
    609     X = None
    610 elif self.is_view:
    611     X = as_view(
--> 612         _subset(self._adata_ref.X, (self._oidx, self._vidx)),
    613         ElementRef(self, "X"),
    614     )
    615 else:
    616     X = self._X

File ~/mambaforge/envs/atlas_pca/lib/python3.10/functools.py:889, in singledispatch.<locals>.wrapper(*args, **kw)
    885 if not args:
    886     raise TypeError(f'{funcname} requires at least '
    887                     '1 positional argument')
--> 889 return dispatch(args[0].__class__)(*args, **kw)

File ~/mambaforge/envs/atlas_pca/lib/python3.10/site-packages/anndata/_core/index.py:140, in _subset_spmatrix(a, subset_idx)
    138 if len(subset_idx) > 1 and all(isinstance(x, cabc.Iterable) for x in subset_idx):
    139     subset_idx = (subset_idx[0].reshape(-1, 1), *subset_idx[1:])
--> 140 return a[subset_idx]

File ~/mambaforge/envs/atlas_pca/lib/python3.10/site-packages/scipy/sparse/_index.py:75, in IndexMixin.__getitem__(self, key)
     73         return self._get_arrayXint(row, col)
     74     elif isinstance(col, slice):
---> 75         return self._get_arrayXslice(row, col)
     76 else:  # row.ndim == 2
     77     if isinstance(col, INT_TYPES):

File ~/mambaforge/envs/atlas_pca/lib/python3.10/site-packages/scipy/sparse/_csr.py:335, in _csr_base._get_arrayXslice(self, row, col)
    333     col = np.arange(*col.indices(self.shape[1]))
    334     return self._get_arrayXarray(row, col)
--> 335 return self._major_index_fancy(row)._get_submatrix(minor=col)

File ~/mambaforge/envs/atlas_pca/lib/python3.10/site-packages/scipy/sparse/_compressed.py:707, in _cs_matrix._major_index_fancy(self, idx)
    705 res_indices = np.empty(nnz, dtype=idx_dtype)
    706 res_data = np.empty(nnz, dtype=self.dtype)
--> 707 csr_row_index(M, indices, self.indptr, self.indices, self.data,
    708               res_indices, res_data)
    710 return self.__class__((res_data, res_indices, res_indptr),
    711                       shape=new_shape, copy=False)

ValueError: Output dtype not compatible with inputs.

Versions

-----
anndata             0.9.2
numpy               1.24.4
pandas              2.0.3
scanpy              1.9.3
scipy               1.11.2
session_info        1.0.0
-----
PIL                         10.0.0
asciitree                   NA
asttokens                   NA
backcall                    0.2.0
cloudpickle                 2.2.1
comm                        0.1.2
cycler                      0.10.0
cython_runtime              NA
cytoolz                     0.12.2
dask                        2023.8.1
dateutil                    2.8.2
debugpy                     1.5.1
decorator                   5.1.1
entrypoints                 0.4
executing                   0.8.3
fasteners                   0.18
h5py                        3.9.0
importlib_metadata          NA
ipykernel                   6.19.2
jedi                        0.18.1
jinja2                      3.1.2
joblib                      1.3.2
kiwisolver                  1.4.4
llvmlite                    0.40.1
lz4                         4.3.2
markupsafe                  2.1.3
matplotlib                  3.7.2
mpl_toolkits                NA
msgpack                     1.0.5
natsort                     8.4.0
numba                       0.57.1
numcodecs                   0.11.0
packaging                   23.1
parso                       0.8.3
pexpect                     4.8.0
pickleshare                 0.7.5
pkg_resources               NA
platformdirs                3.10.0
prompt_toolkit              3.0.36
psutil                      5.9.5
ptyprocess                  0.7.0
pure_eval                   0.2.2
pyarrow                     12.0.1
pydev_ipython               NA
pydevconsole                NA
pydevd                      2.6.0
pydevd_concurrency_analyser NA
pydevd_file_utils           NA
pydevd_plugins              NA
pydevd_tracing              NA
pygments                    2.11.2
pyparsing                   3.0.9
pytz                        2023.3
six                         1.16.0
sklearn                     1.3.0
stack_data                  0.2.0
tblib                       1.7.0
threadpoolctl               3.2.0
tlz                         0.12.2
toolz                       0.12.0
tornado                     6.3.3
traitlets                   5.7.1
typing_extensions           NA
wcwidth                     0.2.5
yaml                        6.0
zarr                        2.16.1
zipp                        NA
zmq                         23.2.0
zoneinfo                    NA
-----
IPython             8.8.0
jupyter_client      7.4.8
jupyter_core        5.1.1
-----
Python 3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:40:32) [GCC 12.3.0]
Linux-4.18.0-425.3.1.el8.x86_64-x86_64-with-glibc2.28

jonas2612 avatar Sep 13 '23 08:09 jonas2612