spatialdata icon indicating copy to clipboard operation
spatialdata copied to clipboard

exceeding rasterize's maximum label index

Open LouisK92 opened this issue 3 months ago • 1 comments

I am using sd.rasterize on cell polygons of >100k cells and run into the error.


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[14], line 1
----> 1 sdata["cell_labels"] = sd.rasterize(
...
      9 )

File ~/miniconda3/envs/g3/lib/python3.11/site-packages/spatialdata/_core/operations/rasterize.py:351, in rasterize(data, axes, min_coordinate, max_coordinate, target_coordinate_system, target_unit_to_pixels, target_width, target_height, target_depth, sdata, value_key, table_name, return_regions_as_labels, agg_func, return_single_channel)
    349     return rasterized
    350 if model in (PointsModel, ShapesModel):
--> 351     return rasterize_shapes_points(
...    368     )
    369 raise ValueError(f"Unsupported model {model}.")

File ~/miniconda3/envs/g3/lib/python3.11/site-packages/spatialdata/_core/operations/rasterize.py:723, in rasterize_shapes_points(data, axes, min_coordinate, max_coordinate, target_coordinate_system, target_unit_to_pixels, target_width, target_height, target_depth, element_name, sdata, value_key, table_name, return_regions_as_labels, agg_func, return_single_channel)
    721 max_uint16 = np.iinfo(np.uint16).max
    722 if max_label > max_uint16:
--> 723     raise ValueError(f"Maximum label index is {max_label}. Values higher than {max_uint16} are not supported.")
    724 agg = agg.astype(np.uint16)
    725 return Labels2DModel.parse(agg, transformations=transformations)

ValueError: Maximum label index is 117340. Values higher than 65535 are not supported.

I was wondering if the threshold needs to be that low or could be increased?

To reproduce the error:


import numpy as np
import spatialdata as sd

def get_n_coords(n):
    coords = np.random.rand(n, 2)
    return coords
    
sdata1 = sd.SpatialData(points={"points": PointsModel.parse(get_n_coords(65534))})
sdata2 = sd.SpatialData(points={"points": PointsModel.parse(get_n_coords(65536))})

# sdata1 would work
img_extent = sd.get_extent(sdata2['points'])
sd.rasterize(
    sdata2["points"],
    ["x", "y"],
    min_coordinate=[int(img_extent["x"][0]), int(img_extent["y"][0])],
    max_coordinate=[int(img_extent["x"][1]), int(img_extent["y"][1])],
    target_coordinate_system="global",
    target_unit_to_pixels=1,
    return_regions_as_labels=True,
)

LouisK92 avatar Sep 22 '25 14:09 LouisK92

A quick workaround (assuming that cell labels don't overlap) would be the following. Note that I used circles instead of points for the example, which is closer to what I actually wanted. In case of points the .iloc fails since we'd deal with a dask dataframe in that case.


import numpy as np
import spatialdata as sd
from spatialdata.models import ShapesModel

def get_n_coords(n):
    coords = np.random.rand(n, 2)
    return coords
    
sdata = sd.SpatialData(shapes={"shapes": ShapesModel.parse(get_n_coords(65536), geometry=0, radius=np.ones(65536))})

img_extent = sd.get_extent(sdata['shapes']) # NOTE: Here I actually use sd.get_extent(sdata['image'])

N = 65535 # number of shapes that are rasterised per step
n_cells = len(sdata['shapes'])
n_iter = n_cells // N + bool(n_cells % N)

rasterize_args = {
    "min_coordinate": [int(img_extent["x"][0]), int(img_extent["y"][0])],
    "max_coordinate": [int(img_extent["x"][1]), int(img_extent["y"][1])],
    "target_coordinate_system": "global",
    "target_unit_to_pixels": 1,
    "return_regions_as_labels": True,
}

for i in range(n_iter):
    labels_image_ = sd.rasterize(sdata['shapes'].iloc[i*N:min((i+1)*N, n_cells)], ["x", "y"], **rasterize_args)
    if i == 0:
        labels_image = labels_image_
    else:
        labels_image.values[labels_image_.values > 0] = labels_image_.values[labels_image_.values > 0]

sdata['labels'] = labels_image

LouisK92 avatar Sep 22 '25 19:09 LouisK92