anndata Creating AnnCollections from AnnDatas with dataframes in obsm errors

Discovered while trying to answer this discourse question about concatenating backed AnnData objects.

Ping @Koncopd

Example:

from pathlib import Path
from shutil import rmtree

import anndata as ad, pandas as pd, numpy as np

from anndata.experimental.multi_files import AnnCollection

# Make data
dataset_dir = Path("test_datasets")
if dataset_dir.is_dir():
    rmtree(dataset_dir)
dataset_dir.mkdir()


offset = 0
for i in range(3):
    n_obs, n_var = 30, 10
    
    # Giving unique obs_names
    obs_names = pd.Index([f"cell_{i}" for i in range(offset, offset + n_obs)])
    
    adata = ad.AnnData(
        X=np.zeros((n_obs, n_var), dtype="float32"),
        obs=pd.DataFrame(index=obs_names),
        obsm={
            "df": pd.DataFrame({"a": np.arange(n_obs)}, index=obs_names)
        }
    )

    adata.write_h5ad(dataset_dir / f"adata_{i:02}.h5ad")
    offset += n_obs

pths = sorted(dataset_dir.glob("*.h5ad"))

adatas = [ad.read_h5ad(p, backed="r") for p in pths]
dataset = AnnCollection(adatas)

TypeError: <lambda>() missing 2 required positional arguments: 'fill_value' and 'axis'

Full traceback

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Input In [3], in <cell line: 4>()
      1 pths = sorted(dataset_dir.glob("*.h5ad"))
      3 adatas = [ad.read_h5ad(p, backed="r") for p in pths]
----> 4 dataset = AnnCollection(adatas)
      5 del adatas

File ~/github/anndata/anndata/experimental/multi_files/_anncollection.py:791, in AnnCollection.__init__(self, adatas, join_obs, join_obsm, join_vars, label, keys, index_unique, convert, harmonize_dtypes, indices_strict)
    789 self._dtypes = None
    790 if len(adatas) > 1 and harmonize_dtypes:
--> 791     self._dtypes = _harmonize_types(self._view_attrs_keys, self.adatas)
    793 self.indices_strict = indices_strict

File ~/github/anndata/anndata/experimental/multi_files/_anncollection.py:62, in _harmonize_types(attrs_keys, adatas)
     60     attrs_keys_types[attr] = {}
     61     for key in keys:
---> 62         attrs_keys_types[attr][key] = check_type(attr, key)
     64 attrs_keys_types["X"] = check_type("X")
     66 return attrs_keys_types

File ~/github/anndata/anndata/experimental/multi_files/_anncollection.py:52, in _harmonize_types.<locals>.check_type(attr, key)
     50 # hacky but numpy find_common_type doesn't work with categoricals
     51 try:
---> 52     dtype = _merge([arr[:1] for arr in arrs]).dtype
     53 except ValueError:
     54     dtype = _merge([arr[:1, :1] for arr in arrs]).dtype

File ~/github/anndata/anndata/experimental/multi_files/_anncollection.py:23, in _merge(arrs)
     21 def _merge(arrs):
     22     rxers = [lambda x, fill_value, axis: x] * len(arrs)
---> 23     return concat_arrays(arrs, rxers)

File ~/github/anndata/anndata/_core/merge.py:433, in concat_arrays(arrays, reindexers, axis, index, fill_value)
    428     raise NotImplementedError(
    429         "Cannot concatenate a dataframe with other array types."
    430     )
    431 # TODO: behaviour here should be chosen through a merge strategy
    432 df = pd.concat(
--> 433     [f(x) for f, x in zip(reindexers, arrays)], ignore_index=True, axis=axis
    434 )
    435 df.index = index
    436 return df

File ~/github/anndata/anndata/_core/merge.py:433, in <listcomp>(.0)
    428     raise NotImplementedError(
    429         "Cannot concatenate a dataframe with other array types."
    430     )
    431 # TODO: behaviour here should be chosen through a merge strategy
    432 df = pd.concat(
--> 433     [f(x) for f, x in zip(reindexers, arrays)], ignore_index=True, axis=axis
    434 )
    435 df.index = index
    436 return df

TypeError: <lambda>() missing 2 required positional arguments: 'fill_value' and 'axis'

This doesn't error if there's just sparse and dense arrays in obsm.

Apr 12 '22 21:04 ivirshup

Hi, missed this, i will check.

Apr 22 '22 09:04 Koncopd

This issue has been automatically marked as stale because it has not had recent activity. Please add a comment if you want to keep the issue open. Thank you for your contributions!

Aug 14 '23 02:08 github-actions[bot]

Hi @Koncopd did you end up checking this?

Aug 14 '23 07:08 flying-sheep

This issue has been automatically marked as stale because it has not had recent activity. Please add a comment if you want to keep the issue open. Thank you for your contributions!

Oct 16 '23 02:10 github-actions[bot]

anndata anndata copied to clipboard

Creating AnnCollections from AnnDatas with dataframes in obsm errors

Example:

anndata
anndata copied to clipboard