Error during the prediction of segmentation
I was running segment method as follows in my script:
from segger.training.segger_data_module import SeggerDataModule
# from segger.prediction.predict import predict, load_model
from segger.models.segger_model import Segger
from segger.training.train import LitSegger
from torch_geometric.nn import to_hetero
from lightning.pytorch.loggers import CSVLogger
from lightning import Trainer
from pathlib import Path
from lightning.pytorch.plugins.environments import LightningEnvironment
from matplotlib import pyplot as plt
import seaborn as sns
# import pandas as pd
from segger.data.utils import calculate_gene_celltype_abundance_embedding
# import scanpy as sc
import os
from lightning import LightningModule
from path import Path
from segger.data.parquet.sample import STSampleParquet
from segger.data.utils import calculate_gene_celltype_abundance_embedding
import scanpy as sc
import pandas as pd
import math
import numpy as np
from segger.data.parquet._utils import get_polygons_from_xy
import spatialdata_io as sio
import spatialdata as sd
from segger.training.segger_data_module import SeggerDataModule
from segger.prediction.predict_parquet import segment, load_model
from pathlib import Path
from matplotlib import pyplot as plt
import seaborn as sns
import scanpy as sc
import os
import dask.dataframe as dd
import pandas as pd
from pathlib import Path
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
os.environ["CUPY_CACHE_DIR"] = "./.cupy"
data_xenium = '~/241204_Xenium_Run02/output-XETG00051__0029181__S188007__20241204__122024'
segger_data = '~/data/t2d/segger_out/S188007/'
segger_model = '~/data/t2d/segger_out/model/S188007/'
adata = sc.read_h5ad('~/data/t2d/reference_pancreas.h5ad')
sc.pp.pca(adata, 128)
weights = pd.DataFrame(adata.varm['PCs'])
weights.index = adata.var.index
xenium_data_dir = Path(data_xenium)
segger_data_dir = Path(segger_data)
models_dir = Path(segger_model)
XENIUM_DATA_DIR = Path(data_xenium)
transcripts_file = (XENIUM_DATA_DIR / "transcripts.parquet")
SEGGER_DATA_DIR = Path(segger_data)
# Initialize the Lightning data module
dm = SeggerDataModule(
data_dir=segger_data_dir,
batch_size=2,
num_workers=2,
)
dm.setup()
# # If you use custom gene embeddings, use the following two lines instead:
num_tx_tokens = (
dm.train[0].x_dict["tx"].shape[1]
) # Set the number of tokens to the number of genes
model_version = 0
output_dir = Path(
"~/data/t2d/segger_out/out/S188007/"
)
# # Initialize the Lightning data module
dm = SeggerDataModule(
data_dir=SEGGER_DATA_DIR,
batch_size=1,
num_workers=1,
)
dm.setup()
# Load in latest checkpoint
model_path = models_dir / "lightning_logs" / f"version_{model_version}"
model = load_model(model_path / "checkpoints")
seg_tag = None
receptive_field = {"k_bd": 4, "dist_bd": 15, "k_tx": 5, "dist_tx": 3}
segment(
model,
dm,
save_dir=output_dir,
seg_tag=seg_tag,
transcript_file=transcripts_file,
receptive_field=receptive_field,
min_transcripts=5,
score_cut=0.5,
cell_id_col="segger_cell_id",
save_transcripts= True,
save_anndata= True,
save_cell_masks= True, # Placeholder for future implementation
use_cc=False, # if one wants fragments (groups of similar transcripts not attached to any nuclei)
knn_method="kd_tree",
verbose=True,
gpu_ids=["0"],
# client=client
)
print('-------- Done ! -------')
but it stopped due to the following error:
Computing and saving cell masks anndata object...
2%|█ | 4168/197955 [1:34:16<1:22:13, 39.28it/s]
A linearring requires at least 4 coordinates. [[np.int32(0), np.int32(1)]]
7%|███▍ | 13077/197955 [1:37:02<1:02:55, 48.97it/s]
A linearring requires at least 4 coordinates. [[np.int32(0), np.int32(2)]]
7%|███▊ | 14504/197955 [1:37:29<1:02:48, 48.68it/s]
A linearring requires at least 4 coordinates. [[np.int32(0), np.int32(1)]]
15%|████████▏ | 29829/197955 [1:42:58<55:19, 50.64it/s]
A linearring requires at least 4 coordinates. [[np.int32(0), np.int32(1)]]
22%|████████████▏ | 44476/197955 [1:48:03<56:34, 45.21it/s]
A linearring requires at least 4 coordinates. [[np.int32(0), np.int32(1)]]
25%|█████████████▎ | 48786/197955 [1:49:24<57:47, 43.02it/s]
A linearring requires at least 4 coordinates. [[np.int32(0), np.int32(1)]]
25%|█████████████▌ | 49622/197955 [1:49:35<25:29, 96.99it/s]
A linearring requires at least 4 coordinates. [[np.int32(1), np.int32(2)]]
37%|███████████████████▉ | 73091/197955 [1:56:20<36:38, 56.78it/s]
A linearring requires at least 4 coordinates. [[np.int32(0), np.int32(1)]]
41%|█████████████████████▉ | 80320/197955 [1:58:33<29:56, 65.47it/s]
A linearring requires at least 4 coordinates. [[np.int32(1), np.int32(2)]]
41%|██████████████████████ | 81045/197955 [1:58:45<26:38, 73.14it/s]
A linearring requires at least 4 coordinates. [[np.int32(0), np.int32(1)]]
42%|██████████████████████▉ | 83863/197955 [1:59:41<32:17, 58.89it/s]
A linearring requires at least 4 coordinates. [[np.int32(0), np.int32(2)]]
44%|███████████████████████▌ | 86160/197955 [2:00:19<26:01, 71.60it/s]
A linearring requires at least 4 coordinates. [[np.int32(0), np.int32(1)]]
49%|██████████████████████████▌ | 97207/197955 [2:04:25<21:42, 77.36it/s]
A linearring requires at least 4 coordinates. [[np.int32(0), np.int32(2)]]
58%|██████████████████████████████▋ | 114669/197955 [2:10:14<17:08, 81.01it/s]
A linearring requires at least 4 coordinates. [[np.int32(0), np.int32(1)]]
59%|███████████████████████████████▍ | 117195/197955 [2:10:53<23:41, 56.81it/s]
A linearring requires at least 4 coordinates. [[np.int32(0), np.int32(2)]]
59%|███████████████████████████████▍ | 117407/197955 [2:10:55<17:35, 76.28it/s]
A linearring requires at least 4 coordinates. [[np.int32(0), np.int32(2)]]
63%|█████████████████████████████████▏ | 124006/197955 [2:12:33<19:32, 63.07it/s]
A linearring requires at least 4 coordinates. [[np.int32(0), np.int32(1)]]
68%|████████████████████████████████████ | 134637/197955 [2:15:07<12:21, 85.42it/s]
A linearring requires at least 4 coordinates. [[np.int32(0), np.int32(1)]]
68%|████████████████████████████████████▏ | 135338/197955 [2:15:15<14:36, 71.41it/s]
A linearring requires at least 4 coordinates. [[np.int32(1), np.int32(2)]]
70%|████████████████████████████████████▉ | 138078/197955 [2:15:51<58:54, 16.94it/s]
Traceback (most recent call last):
File "/ictstr01/home/icb/mostafa.shahhosseini/code/analysis/t2d/scripts/segger_script_predict.py",
line 89, in <module>
segment(
File "/ictstr01/home/icb/mostafa.shahhosseini/tools/segger/segger_dev/src/segger/prediction/predic
t_parquet.py", line 762, in segment
boundaries_gdf = generate_boundaries(transcripts_df_filtered)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/ictstr01/home/icb/mostafa.shahhosseini/tools/segger/segger_dev/src/segger/prediction/bounda
ry.py", line 332, in generate_boundaries
res.append({"cell_id": cell_id, "length": len(t), "geom": generate_boundary(t, x=x, y=y)})
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/ictstr01/home/icb/mostafa.shahhosseini/tools/segger/segger_dev/src/segger/prediction/bounda
ry.py", line 344, in generate_boundary
bi = BoundaryIdentification(t[[x, y]].values)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/ictstr01/home/icb/mostafa.shahhosseini/tools/segger/segger_dev/src/segger/prediction/bounda
ry.py", line 89, in __init__
self.d = Delaunay(data)
^^^^^^^^^^^^^^
File "scipy/spatial/_qhull.pyx", line 1889, in scipy.spatial._qhull.Delaunay.__init__
File "scipy/spatial/_qhull.pyx", line 356, in scipy.spatial._qhull._Qhull.__init__
scipy.spatial._qhull.QhullError: QH6154 Qhull precision error: Initial simplex is flat (facet 1 is c
oplanar with the interior point)
While executing: | qhull d Q12 Qt Qz Qbb Qc
Options selected for Qhull 2020.2.r 2020/08/31:
run-id 818639731 delaunay Q12-allow-wide Qtriangulate Qz-infinity-point
Qbbound-last Qcoplanar-keep _pre-merge _zero-centrum Qinterior-keep
Pgood _max-width 0.98 Error-roundoff 5.9e-12 _one-merge 4.1e-11
File "/ictstr01/home/icb/mostafa.shahhosseini/code/analysis/t2d/scripts/segger_script_predict.py",
line 89, in <module>
segment(
File "/ictstr01/home/icb/mostafa.shahhosseini/tools/segger/segger_dev/src/segger/prediction/predic
t_parquet.py", line 762, in segment
boundaries_gdf = generate_boundaries(transcripts_df_filtered)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/ictstr01/home/icb/mostafa.shahhosseini/tools/segger/segger_dev/src/segger/prediction/bounda
ry.py", line 332, in generate_boundaries
res.append({"cell_id": cell_id, "length": len(t), "geom": generate_boundary(t, x=x, y=y)})
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/ictstr01/home/icb/mostafa.shahhosseini/tools/segger/segger_dev/src/segger/prediction/bounda
ry.py", line 344, in generate_boundary
bi = BoundaryIdentification(t[[x, y]].values)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/ictstr01/home/icb/mostafa.shahhosseini/tools/segger/segger_dev/src/segger/prediction/bounda
ry.py", line 89, in __init__
self.d = Delaunay(data)
^^^^^^^^^^^^^^
File "scipy/spatial/_qhull.pyx", line 1889, in scipy.spatial._qhull.Delaunay.__init__
File "scipy/spatial/_qhull.pyx", line 356, in scipy.spatial._qhull._Qhull.__init__
scipy.spatial._qhull.QhullError: QH6154 Qhull precision error: Initial simplex is flat (facet 1 is c
oplanar with the interior point)
While executing: | qhull d Q12 Qt Qz Qbb Qc
Options selected for Qhull 2020.2.r 2020/08/31:
run-id 818639731 delaunay Q12-allow-wide Qtriangulate Qz-infinity-point
Qbbound-last Qcoplanar-keep _pre-merge _zero-centrum Qinterior-keep
Pgood _max-width 0.98 Error-roundoff 5.9e-12 _one-merge 4.1e-11
Visible-distance 1.2e-11 U-max-coplanar 1.2e-11 Width-outside 2.4e-11
_wide-facet 7.1e-11 _maxoutside 4.7e-11
The input to qhull appears to be less than 3 dimensional, or a
computation has overflowed.
Qhull could not construct a clearly convex simplex from points:
- p2(v4): 4.3e+03 1.7e+03 7.5
- p3(v3): 4.3e+03 1.7e+03 4.3e+03
- p1(v2): 4.3e+03 1.7e+03 17
- p0(v1): 4.3e+03 1.7e+03 0
The center point is coplanar with a facet, or a vertex is coplanar
with a neighboring facet. The maximum round off error for
computing distances is 5.9e-12. The center point, facets and distances
to the center point are as follows:
center point 4267 1700 1073
facet p3 p1 p0 distance= 0
facet p2 p1 p0 distance= 0
facet p2 p3 p0 distance= 0
facet p2 p3 p1 distance= 0
These points either have a maximum or minimum x-coordinate, or
they maximize the determinant for k coordinates. Trial points
are first selected from points that maximize a coordinate.
The min and max coordinates for each dimension are:
0: 4267 4268 difference= 0.9844
1: 1700 1700 difference= 0
2: 0 4268 difference= 4268
If the input should be full dimensional, you have several options that
may determine an initial simplex:
- use 'QJ' to joggle the input and make it full dimensional
- use 'QbB' to scale the points to the unit cube
- use 'QR0' to randomly rotate the input for different maximum points
- use 'Qs' to search all points for the initial simplex
- use 'En' to specify a maximum roundoff error less than 5.9e-12.
- trace execution with 'T3' to see the determinant for each point.
If the input is lower dimensional:
- use 'QJ' to joggle the input and make it full dimensional
- use 'Qbk:0Bk:0' to delete coordinate k from the input. You should
pick the coordinate with the least range. The hull will have the
correct topology.
- determine the flat containing the points, rotate the points
into a coordinate plane, and delete the other coordinates.
- add one or more points to make the input full dimensional.
@mossishahi this is the mask-creation step, that is your segmentation is already over and you have the anndata file and the transcripts file with segger_cell_id produced. in order to reproduce the error I'd need the data. Let me know if you wanna work on this together to reproduce and find the bug.
Hello, I am actually getting the same error. It'd be difficult to share the data, but I have a naive question: since I already have the anndata, is the newly segmented cell information there somewhere? It might be my inexperience with Xenium data but I only see which transcript was assigned to which segger_cell_id but I don't see the new boundaries of the cells.
Hello, @erkinacar5 as mentioned above based on your info, the data has been already segmented, that is transcripts are grouped into new segger_cell_ids. Your problem is at the mask-creation step, that is your segmentation is already over and you have the anndata file and the transcripts file with segger_cell_id produced. I will dig deeper into this error which is at the mask generation part (this is mainly only for visualisation purposes).
Thanks for your reply @EliHei2 . Is there any other way to visualise the segmentation from the main output (anndata and transcripts.parquet)? If it helps, my data is a TMA from 5k panel and it is Xenium v2. I am considering subsetting all the cores and processing them separately but not sure if that will be a solution to the issue at hand. Please let me know if I can somehow help
FYI, I got the same error as @erkinacar5 while running following (5k panel; Xenium v2 with multiple sections):
################################################################################################# import dask.dataframe as dd ddf = dd.read_parquet('/project/simmons_hts/jpark/0_tools/segger_dev/data_segger/RUNTRexBio_SLIDE1/benchmarks/segger_output_0.5_False_4_12_15_3_20250730/segger_transcripts.parquet').compute() ddf = ddf.dropna() ddf = ddf[ddf.segger_cell_id != "None"] ddf = ddf.sort_values("segger_cell_id")
from segger.prediction.boundary import generate_boundaries boundaries_gdf = generate_boundaries(ddf) cell_masks_save_path = "/project/simmons_hts/jpark/0_tools/segger_dev/data_segger/RUNTRexBio_SLIDE1/benchmarks/segger_output_0.5_False_4_12_15_3_20250730/segger_cell_boundaries.parquet" boundaries_gdf.to_parquet(cell_masks_save_path) #################################################################################################
#################################################################################################
`72%|███████▏ | 742581/1033895 [4:57:06<1:56:33, 41.66it/s]
Traceback (most recent call last):
File "/ceph/project/simmons_hts/jpark/0_tools/segger_dev/log/run_mask.py", line 32, in
While executing: | qhull d Qbb Qz Qc Qt Q12 Options selected for Qhull 2019.1.r 2019/06/21: run-id 2091629226 delaunay Qbbound-last Qz-infinity-point Qcoplanar-keep Qtriangulate Q12-allow-wide _pre-merge _zero-centrum Qinterior-keep Pgood _max-width 0.72 Error-roundoff 1.7e-11 _one-merge 1.2e-10 Visible-distance 3.5e-11 U-max-coplanar 3.5e-11 Width-outside 7e-11 _wide-facet 2.1e-10 _maxoutside 1.4e-10
precision problems (corrected unless 'Q0' or an error) 2 degenerate hyperplanes recomputed with gaussian elimination 2 nearly singular or axis-parallel hyperplanes 2 zero divisors during back substitute 2 zero divisors during gaussian elimination
The input to qhull appears to be less than 3 dimensional, or a computation has overflowed.
Qhull could not construct a clearly convex simplex from points:
- p2(v4): 7e+03 1.3e+04 7.8
- p3(v3): 7e+03 1.3e+04 1.3e+04
- p0(v2): 7e+03 1.3e+04 0
- p1(v1): 7e+03 1.3e+04 7.8
The center point is coplanar with a facet, or a vertex is coplanar with a neighboring facet. The maximum round off error for computing distances is 1.7e-11. The center point, facets and distances to the center point are as follows:
center point 7030 1.261e+04 3156
facet p3 p0 p1 distance= -7e-14 facet p2 p0 p1 distance= 0 facet p2 p3 p1 distance= -9.1e-13 facet p2 p3 p0 distance= 7e-14
These points either have a maximum or minimum x-coordinate, or they maximize the determinant for k coordinates. Trial points are first selected from points that maximize a coordinate.
The min and max coordinates for each dimension are: 0: 7030 7031 difference= 0.375 1: 1.261e+04 1.261e+04 difference= 0.7188 2: 0 1.261e+04 difference= 1.261e+04
If the input should be full dimensional, you have several options that may determine an initial simplex:
- use 'QJ' to joggle the input and make it full dimensional
- use 'QbB' to scale the points to the unit cube
- use 'QR0' to randomly rotate the input for different maximum points
- use 'Qs' to search all points for the initial simplex
- use 'En' to specify a maximum roundoff error less than 1.7e-11.
- trace execution with 'T3' to see the determinant for each point.
If the input is lower dimensional:
- use 'QJ' to joggle the input and make it full dimensional
- use 'Qbk:0Bk:0' to delete coordinate k from the input. You should pick the coordinate with the least range. The hull will have the correct topology.
- determine the flat containing the points, rotate the points into a coordinate plane, and delete the other coordinates.
- add one or more points to make the input full dimensional.` #################################################################################################
I am also keen on knowing possible other solution to visualise segmentation using either anndata/parquet until seg2explorer is fixed (if its possible)