spatialdata icon indicating copy to clipboard operation
spatialdata copied to clipboard

Renaming of feature_key column of points

Open LouisK92 opened this issue 3 months ago • 0 comments

I was trying to rename columns of a spatialdata points object and ran into issues regarding the column that is labeled as feature_key.

Some reproducible ways of how I tried it:

Sdata setup

import pandas as pd
import dask.dataframe as dd
import spatialdata as sd

df = pd.DataFrame({"gene": ["A", "B", "C"], "x": [1, 2, 3], "y": [1, 1, 1]})
df = dd.from_pandas(df, npartitions=1)

sdata = sd.SpatialData(points={"points": sd.models.PointsModel.parse(df, feature_key="gene")})

Example 1

sdata['points'] = sdata['points'].rename(columns={"gene": "gene_symbol"})

leads to

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File ~/miniconda3/envs/g3/lib/python3.11/site-packages/pandas/core/indexes/base.py:3812, in Index.get_loc(self, key)
   3811 try:
-> 3812     return self._engine.get_loc(casted_key)
   3813 except KeyError as err:

File pandas/_libs/index.pyx:167, in pandas._libs.index.IndexEngine.get_loc()

File pandas/_libs/index.pyx:196, in pandas._libs.index.IndexEngine.get_loc()

File pandas/_libs/hashtable_class_helper.pxi:7088, in pandas._libs.hashtable.PyObjectHashTable.get_item()

File pandas/_libs/hashtable_class_helper.pxi:7096, in pandas._libs.hashtable.PyObjectHashTable.get_item()

KeyError: 'gene'

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
Cell In[60], line 9
      6 df = dd.from_pandas(df, npartitions=1)
      8 sdata = sd.SpatialData(points={"points": sd.models.PointsModel.parse(df, feature_key="gene")})
----> 9 sdata['points'] = sdata['points'].rename(columns={"gene": "gene_symbol"})
     12 sdata['points']['gene_symbol'] = sdata['points']['gene']
     13 del sdata['points']['gene']

File ~/miniconda3/envs/g3/lib/python3.11/site-packages/spatialdata/_core/spatialdata.py:2393, in SpatialData.__setitem__(self, key, value)
   2382 def __setitem__(self, key: str, value: SpatialElement | AnnData) -> None:
   2383     """
   2384     Add the element to the SpatialData object.
   2385 
   (...)
   2391         The element.
   2392     """
-> 2393     schema = get_model(value)
   2394     if schema in (Image2DModel, Image3DModel):
   2395         self.images[key] = value

File ~/miniconda3/envs/g3/lib/python3.11/site-packages/spatialdata/models/models.py:1193, in get_model(e)
   1191     return _validate_and_return(ShapesModel, e)
   1192 if isinstance(e, DaskDataFrame):
-> 1193     return _validate_and_return(PointsModel, e)
   1194 if isinstance(e, AnnData):
   1195     return _validate_and_return(TableModel, e)

File ~/miniconda3/envs/g3/lib/python3.11/site-packages/spatialdata/models/models.py:1178, in get_model.<locals>._validate_and_return(schema, e)
   1174 def _validate_and_return(
   1175     schema: Schema_t,
   1176     e: SpatialElement,
   1177 ) -> Schema_t:
-> 1178     schema().validate(e)
   1179     return schema

File ~/miniconda3/envs/g3/lib/python3.11/site-packages/spatialdata/models/models.py:653, in PointsModel.validate(cls, data)
    651 if ATTRS_KEY in data.attrs and "feature_key" in data.attrs[ATTRS_KEY]:
    652     feature_key = data.attrs[ATTRS_KEY][cls.FEATURE_KEY]
--> 653     if not isinstance(data[feature_key].dtype, CategoricalDtype):
    654         logger.info(f"Feature key `{feature_key}`could be of type `pd.Categorical`. Consider casting it.")

File ~/miniconda3/envs/g3/lib/python3.11/site-packages/dask/dataframe/core.py:4955, in DataFrame.__getitem__(self, key)
   4952         return self.loc[key]
   4954 # error is raised from pandas
-> 4955 meta = self._meta[_extract_meta(key)]
   4956 dsk = partitionwise_graph(operator.getitem, name, self, key)
...
   3822     #  InvalidIndexError. Otherwise we fall through and re-raise
   3823     #  the TypeError.
   3824     self._check_indexing_error(key)

KeyError: 'gene'

Example 2 (error occurs after writing and reading)

sdata['points']['gene_symbol'] = sdata['points']['gene']
del sdata['points']['gene']

sdata.write("sdata_test.zarr")
sd.read_zarr("sdata_test.zarr")

leads to

---------------------------------------------------------------------------
ValidationError                           Traceback (most recent call last)
Cell In[59], line 17
     14 #sdata['points'].attrs["spatialdata_attrs"]["feature_key"] = "gene_symbol"
     16 sdata.write("sdata_test.zarr")
---> 17 sd.read_zarr("sdata_test.zarr")
     20 #sdata = sd.SpatialData(
     21 #    transcripts=sdata['transcripts'],
     22 #    counts=sdata['counts'],
   (...)
     25 #)
     26 #sdata

File ~/miniconda3/envs/g3/lib/python3.11/site-packages/spatialdata/_io/io_zarr.py:229, in read_zarr(store, selection, on_bad_files)
    226 else:
    227     attrs = None
--> 229 sdata = SpatialData(
    230     images=images,
    231     labels=labels,
    232     points=points,
    233     shapes=shapes,
    234     tables=tables,
    235     attrs=attrs,
    236 )
    237 sdata.path = Path(store)
    238 return sdata

File ~/miniconda3/envs/g3/lib/python3.11/site-packages/spatialdata/_utils.py:270, in _deprecation_alias.<locals>.deprecation_decorator.<locals>.wrapper(*args, **kwargs)
    268     raise ValueError("version for deprecation must be specified")
    269 rename_kwargs(f.__name__, kwargs, alias_copy, class_name, library, version)
--> 270 return f(*args, **kwargs)

File ~/miniconda3/envs/g3/lib/python3.11/site-packages/spatialdata/_core/spatialdata.py:156, in SpatialData.__init__(self, images, labels, points, shapes, tables, attrs)
    151     duplicates = {x for x in element_names if element_names.count(x) > 1}
    152     raise KeyError(
    153         f"Element names must be unique. The following element names are used multiple times: {duplicates}"
    154     )
--> 156 with raise_validation_errors(
    157     title="Cannot construct SpatialData object, input contains invalid elements.\n"
    158     "For renaming, please see the discussion here https://github.com/scverse/spatialdata/discussions/707 .",
    159     exc_type=(ValueError, KeyError),
    160 ) as collect_error:
    161     if images is not None:
    162         for k, v in images.items():

File ~/miniconda3/envs/g3/lib/python3.11/site-packages/spatialdata/_core/validation.py:382, in raise_validation_errors.__exit__(self, exc_type, exc_val, exc_tb)
    380 # Exceptions were collected that we want to raise as a combined validation error.
    381 if self._collector.errors:
--> 382     raise ValidationError(title=self._message, errors=self._collector.errors)
    383 return True

ValidationError: Cannot construct SpatialData object, input contains invalid elements.
For renaming, please see the discussion here https://github.com/scverse/spatialdata/discussions/707 .
  points/points: gene

Solution

I got it working with a small adjustment of example 2:

sdata['points']['gene_symbol'] = sdata['points']['gene']
del sdata['points']['gene']
sdata['points'].attrs["spatialdata_attrs"]["feature_key"] = "gene_symbol"

sdata.write("sdata_test.zarr")
sd.read_zarr("sdata_test.zarr")

Expected behaviour

I think the expected behaviour would be that

  • sdata['points'] = sdata['points'].rename(columns={"gene": "gene_symbol"}) works out of the box, renaming the feature_key (sdata['points'].attrs["spatialdata_attrs"]["feature_key"]) as well
  • del sdata['points']['gene'] would delete the column, but also the entry in sdata['points'].attrs["spatialdata_attrs"]["feature_key"] (maybe with a warning, telling that the feature_key column is deleted and not just a standard column)

LouisK92 avatar Oct 10 '25 18:10 LouisK92