panoptica
panoptica copied to clipboard
[FEATURE] Easy to use tutorial for multiple classes
Is your feature request related to a problem? Please describe. Currently, all tutorials are for single classes.
Describe the solution you'd like A tutorial which considers multiple classes.
Describe alternatives you've considered N/A [I am assuming everyone would want something off the bat]
Additional context n/A
import numpy as np
from rich import print as pprint
from panoptica import (
InputType,
Panoptica_Evaluator,
ConnectedComponentsInstanceApproximator,
NaiveThresholdMatching,
)
from panoptica.utils.label_group import LabelGroup
from panoptica.utils.segmentation_class import SegmentationClassGroups
# Define segmentation groups
groups = {
"class_1": ([1], False),
"class_2": ([2], False),
}
class_groups = SegmentationClassGroups(groups)
pred = np.zeros((30, 30))
gt = np.zeros((30, 30))
# Class 1 - Both pred and gt have the same region
pred[1:5, 1:5] = 1
gt[1:5, 1:5] = 1
# Class 1 - Both pred and gt have the same region
pred[7:11, 7:11] = 1
gt[7:11, 7:11] = 1
# Class 2 - Different regions in pred and gt
pred[25:32, 5:10] = 2
gt[25:32, 7:12] = 2 # Slight offset for partial overlap
# Convert to int32 for evaluation
pred = pred.astype(np.int32)
gt = gt.astype(np.int32)
# Create evaluator
evaluator = Panoptica_Evaluator(
expected_input=InputType.SEMANTIC,
instance_approximator=ConnectedComponentsInstanceApproximator(),
instance_matcher=NaiveThresholdMatching(),
segmentation_class_groups=class_groups,
)
# Evaluate
results = evaluator.evaluate(pred, gt, verbose=False)
# Display results for each class
print("\n===== Evaluation Results =====")
for class_name, result in results.items():
print(f"\n--- {class_name} ---")
# Print key metrics
print(f"True Positives (TP): {result.tp}")
print(f"False Positives (FP): {result.fp}")
print(f"False Negatives (FN): {result.fn}")
print(f"Segmentation Quality (SQ): {result.sq:.4f}")
print(f"Recognition Quality (RQ): {result.rq:.4f}")
print(f"Panoptic Quality (PQ): {result.pq:.4f}")
print(f"Dice Score (DSC): {result.sq_dsc:.4f}")
# Print additional details if needed
print("\nDetailed metrics:")
metrics_dict = {
"Global Binary DSC": result.global_bin_dsc,
"Number of reference instances": result.num_ref_instances,
"Number of prediction instances": result.num_pred_instances
}
for metric_name, value in metrics_dict.items():
print(f"{metric_name}: {value}")
# Alternatively, you can convert all results to dictionaries
print("\n===== All Metrics as Dictionary =====")
for class_name, result in results.items():
print(f"\n--- {class_name} ---")
result_dict = result.to_dict()
# Filter the dictionary to show only the most important metrics
important_metrics = ['tp', 'fp', 'fn', 'sq', 'rq', 'pq', 'sq_dsc', 'global_bin_dsc']
filtered_dict = {k: round(v, 4) if isinstance(v, float) else v
for k, v in result_dict.items() if k in important_metrics}
pprint(filtered_dict)
Is this the right way to go about it?
In addition, Panoptic Quality is inherently not calculated for individual classes rather a single score is reported across all classes. Is there any way to do so by simple api calls?
Dear @aymuos15, as promised, I am sharing a script with you to do a multi-class BraTS evaluation, it also illustrates usage of the aggregator.
This script computes instance-wise and global metrics for ET, NET, ED, and the hierarchic groups WT, TC:
from auxiliary.nifti.io import read_nifti
from panoptica import (
InputType,
Panoptica_Evaluator,
Panoptica_Aggregator,
ConnectedComponentsInstanceApproximator,
NaiveThresholdMatching,
)
from panoptica.utils.segmentation_class import SegmentationClassGroups
from panoptica.utils.label_group import LabelMergeGroup
from auxiliary.turbopath.turbopath import turbopath
from concurrent.futures import ProcessPoolExecutor, as_completed
import os
import pathlib
from tqdm import tqdm
import cProfile
def evaluate_folder(prediction_folder, reference_folder, dataset, out_folder):
os.makedirs(out_folder, exist_ok=True)
prediction_folder = turbopath(prediction_folder)
reference_folder = turbopath(reference_folder)
evaluator = Panoptica_Evaluator(
expected_input=InputType.SEMANTIC,
instance_approximator=ConnectedComponentsInstanceApproximator(),
instance_matcher=NaiveThresholdMatching(),
segmentation_class_groups=SegmentationClassGroups(
{
"ET": (3, False),
"NET": (1, False),
"ED": (2, False),
"TC": LabelMergeGroup([1, 3], False),
"WT": LabelMergeGroup([1, 2, 3], False),
}
),
)
aggregator = Panoptica_Aggregator(evaluator, out_folder.joinpath("results.tsv"))
exams = prediction_folder.files("*.nii.gz")
for exam in tqdm(exams[0:10]):
exam_name = exam.name.split(".nii.gz")[0]
print(exam_name)
reference_path = reference_folder / f"{exam_name}-seg.nii.gz"
ref_masks = read_nifti(reference_path)
ref_masks = ref_masks.astype(int)
pred_masks = read_nifti(exam)
pred_masks = pred_masks.astype(int)
aggregator.evaluate(pred_masks, ref_masks, subject_name=exam.name)
# with ProcessPoolExecutor(max_workers=6) as executor:
# futures = {
# executor.submit(
# aggregator.evaluate,
# read_nifti(exam).astype(int),
# read_nifti(
# reference_folder / f"{exam.name.split('.nii.gz')[0]}-seg.nii.gz"
# ).astype(int),
# subject_name=exam.name,
# ): exam
# for exam in tqdm(exams)
# }
# for future in tqdm(
# as_completed(futures), total=len(futures), desc="Panoptica Evaluation"
# ):
# result = future.result()
# if result is not None:
# print("Done")
if __name__ == "__main__":
pred_dir = pathlib.Path("/home/josef/Data/Projects/brats_moda/result_files")
out_dir = pathlib.Path(
"/home/josef/Data/Projects/brats_moda/panoptica_results_updated"
)
datasets = ["predictions"]
subfolders = [f.path for f in os.scandir(pred_dir) if f.is_dir()]
for subfolder in subfolders:
for dataset in datasets:
pred_path = os.path.join(subfolder, dataset)
out_path = out_dir / os.path.basename(subfolder) / dataset
evaluate_folder(
prediction_folder=pred_path,
reference_folder=pred_dir.parent.joinpath("reference", dataset),
dataset=dataset,
out_folder=out_path,
)
Fantastic. Thank you very much @neuronflow. Really appreciate it.
PS: The code above also implements parallel evaluation (commented code)
I reopened the issue, as I agree with you that we need a tutorial to point out these features :)
Here you find the existing panoptica tutorials: https://github.com/BrainLesion/tutorials/tree/main/panoptica
Please let us know if you have any questions.
@aymuos15 Here some more example code to analyze a single exam:
from auxiliary.nifti.io import read_nifti
from panoptica import (
InputType,
Panoptica_Evaluator,
Panoptica_Aggregator,
ConnectedComponentsInstanceApproximator,
NaiveThresholdMatching,
)
from panoptica.utils.segmentation_class import SegmentationClassGroups
from panoptica.utils.label_group import LabelMergeGroup
from auxiliary.turbopath.turbopath import turbopath
from concurrent.futures import ProcessPoolExecutor, as_completed
import os
from tqdm import tqdm
import pandas as pd
import tempfile
def analyze_exam(
prediction_path: str,
label_path: str,
identifier: str,
panoptica_config_path: str,
) -> dict:
"""
Evaluate a single exam using Panoptica.
Args:
prediction_path (str): _description_
label_path (str): _description_
identifier (str): _description_
panoptica_config_path (str): _description_
Returns:
dict: _description_
Note: The function returns a dictionary containing the evaluation results.
Note this is very ugly as the evaluator is instantiated again and again for every exam and panoptica's batch processing capabilities (Aggregator) are not used.
This should be improved in the future.
"""
# file io
ref_masks = read_nifti(label_path)
ref_masks = ref_masks.astype(int)
pred_masks = read_nifti(prediction_path)
pred_masks = pred_masks.astype(int)
# manually set BraTS config; uncomment for manual configuration
# evaluator = Panoptica_Evaluator(
# expected_input=InputType.SEMANTIC,
# instance_approximator=ConnectedComponentsInstanceApproximator(),
# instance_matcher=NaiveThresholdMatching(),
# segmentation_class_groups=SegmentationClassGroups(
# {
# "ET": (3, False),
# "NET": (1, False),
# "ED": (2, False),
# "TC": LabelMergeGroup([1, 3], False),
# "WT": LabelMergeGroup([1, 2, 3], False),
# }
# ),
# )
# evaluator.save_to_config(panoptica_config_path)
evaluator = Panoptica_Evaluator.load_from_config(panoptica_config_path)
# call evaluate
group2result = evaluator.evaluate(
prediction_arr=pred_masks,
reference_arr=ref_masks,
# subject_name=identifier,
)
# for key, value in result.items():
# print(key, value)
# convert to dict and add identifier
results = {k: r.to_dict() for k, r in group2result.items()}
results["subject_name"] = identifier
return results
if __name__ == "__main__":
results = analyze_exam(
prediction_path="test_case/brats202x/zyx_2019.nii.gz",
label_path="test_case/brats202x/simple.nii.gz",
identifier="test_case",
panoptica_config_path="brats_config.yaml",
)
print(results)
the config:
!Panoptica_Evaluator
decision_metric: null
decision_threshold: null
edge_case_handler: !EdgeCaseHandler
empty_list_std: !EdgeCaseResult NAN
listmetric_zeroTP_handling:
!Metric DSC: !MetricZeroTPEdgeCaseHandling {empty_prediction_result: !EdgeCaseResult ZERO,
empty_reference_result: !EdgeCaseResult ZERO, no_instances_result: !EdgeCaseResult NAN,
normal: !EdgeCaseResult ZERO}
!Metric clDSC: !MetricZeroTPEdgeCaseHandling {empty_prediction_result: !EdgeCaseResult ZERO,
empty_reference_result: !EdgeCaseResult ZERO, no_instances_result: !EdgeCaseResult NAN,
normal: !EdgeCaseResult ZERO}
!Metric IOU: !MetricZeroTPEdgeCaseHandling {empty_prediction_result: !EdgeCaseResult ZERO,
empty_reference_result: !EdgeCaseResult ZERO, no_instances_result: !EdgeCaseResult NAN,
normal: !EdgeCaseResult ZERO}
!Metric ASSD: !MetricZeroTPEdgeCaseHandling {empty_prediction_result: !EdgeCaseResult INF,
empty_reference_result: !EdgeCaseResult INF, no_instances_result: !EdgeCaseResult NAN,
normal: !EdgeCaseResult INF}
!Metric RVD: !MetricZeroTPEdgeCaseHandling {empty_prediction_result: !EdgeCaseResult NAN,
empty_reference_result: !EdgeCaseResult NAN, no_instances_result: !EdgeCaseResult NAN,
normal: !EdgeCaseResult NAN}
!Metric RVAE: !MetricZeroTPEdgeCaseHandling {empty_prediction_result: !EdgeCaseResult NAN,
empty_reference_result: !EdgeCaseResult NAN, no_instances_result: !EdgeCaseResult NAN,
normal: !EdgeCaseResult NAN}
expected_input: !InputType SEMANTIC
global_metrics: [!Metric DSC]
instance_approximator: !ConnectedComponentsInstanceApproximator {cca_backend: null}
instance_matcher: !NaiveThresholdMatching {allow_many_to_one: false, matching_metric: !Metric IOU,
matching_threshold: 0.5}
instance_metrics: [!Metric DSC, !Metric IOU, !Metric ASSD, !Metric RVD]
log_times: false
save_group_times: false
segmentation_class_groups: !SegmentationClassGroups
groups:
ed: !LabelGroup
single_instance: false
value_labels: [2]
et: !LabelGroup
single_instance: false
value_labels: [3]
net: !LabelGroup
single_instance: false
value_labels: [1]
tc: !LabelMergeGroup
single_instance: false
value_labels: [1, 3]
wt: !LabelMergeGroup
single_instance: false
value_labels: [1, 2, 3]
verbose: false