anndata
anndata copied to clipboard
Creating AnnCollections from AnnDatas with dataframes in obsm errors
Discovered while trying to answer this discourse question about concatenating backed AnnData
objects.
Ping @Koncopd
Example:
from pathlib import Path
from shutil import rmtree
import anndata as ad, pandas as pd, numpy as np
from anndata.experimental.multi_files import AnnCollection
# Make data
dataset_dir = Path("test_datasets")
if dataset_dir.is_dir():
rmtree(dataset_dir)
dataset_dir.mkdir()
offset = 0
for i in range(3):
n_obs, n_var = 30, 10
# Giving unique obs_names
obs_names = pd.Index([f"cell_{i}" for i in range(offset, offset + n_obs)])
adata = ad.AnnData(
X=np.zeros((n_obs, n_var), dtype="float32"),
obs=pd.DataFrame(index=obs_names),
obsm={
"df": pd.DataFrame({"a": np.arange(n_obs)}, index=obs_names)
}
)
adata.write_h5ad(dataset_dir / f"adata_{i:02}.h5ad")
offset += n_obs
pths = sorted(dataset_dir.glob("*.h5ad"))
adatas = [ad.read_h5ad(p, backed="r") for p in pths]
dataset = AnnCollection(adatas)
TypeError: <lambda>() missing 2 required positional arguments: 'fill_value' and 'axis'
Full traceback
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Input In [3], in <cell line: 4>()
1 pths = sorted(dataset_dir.glob("*.h5ad"))
3 adatas = [ad.read_h5ad(p, backed="r") for p in pths]
----> 4 dataset = AnnCollection(adatas)
5 del adatas
File ~/github/anndata/anndata/experimental/multi_files/_anncollection.py:791, in AnnCollection.__init__(self, adatas, join_obs, join_obsm, join_vars, label, keys, index_unique, convert, harmonize_dtypes, indices_strict)
789 self._dtypes = None
790 if len(adatas) > 1 and harmonize_dtypes:
--> 791 self._dtypes = _harmonize_types(self._view_attrs_keys, self.adatas)
793 self.indices_strict = indices_strict
File ~/github/anndata/anndata/experimental/multi_files/_anncollection.py:62, in _harmonize_types(attrs_keys, adatas)
60 attrs_keys_types[attr] = {}
61 for key in keys:
---> 62 attrs_keys_types[attr][key] = check_type(attr, key)
64 attrs_keys_types["X"] = check_type("X")
66 return attrs_keys_types
File ~/github/anndata/anndata/experimental/multi_files/_anncollection.py:52, in _harmonize_types.<locals>.check_type(attr, key)
50 # hacky but numpy find_common_type doesn't work with categoricals
51 try:
---> 52 dtype = _merge([arr[:1] for arr in arrs]).dtype
53 except ValueError:
54 dtype = _merge([arr[:1, :1] for arr in arrs]).dtype
File ~/github/anndata/anndata/experimental/multi_files/_anncollection.py:23, in _merge(arrs)
21 def _merge(arrs):
22 rxers = [lambda x, fill_value, axis: x] * len(arrs)
---> 23 return concat_arrays(arrs, rxers)
File ~/github/anndata/anndata/_core/merge.py:433, in concat_arrays(arrays, reindexers, axis, index, fill_value)
428 raise NotImplementedError(
429 "Cannot concatenate a dataframe with other array types."
430 )
431 # TODO: behaviour here should be chosen through a merge strategy
432 df = pd.concat(
--> 433 [f(x) for f, x in zip(reindexers, arrays)], ignore_index=True, axis=axis
434 )
435 df.index = index
436 return df
File ~/github/anndata/anndata/_core/merge.py:433, in <listcomp>(.0)
428 raise NotImplementedError(
429 "Cannot concatenate a dataframe with other array types."
430 )
431 # TODO: behaviour here should be chosen through a merge strategy
432 df = pd.concat(
--> 433 [f(x) for f, x in zip(reindexers, arrays)], ignore_index=True, axis=axis
434 )
435 df.index = index
436 return df
TypeError: <lambda>() missing 2 required positional arguments: 'fill_value' and 'axis'
This doesn't error if there's just sparse and dense arrays in obsm
.
Hi, missed this, i will check.
This issue has been automatically marked as stale because it has not had recent activity. Please add a comment if you want to keep the issue open. Thank you for your contributions!
Hi @Koncopd did you end up checking this?
This issue has been automatically marked as stale because it has not had recent activity. Please add a comment if you want to keep the issue open. Thank you for your contributions!