panoptica icon indicating copy to clipboard operation
panoptica copied to clipboard

[FEATURE] Easy to use tutorial for multiple classes

Open aymuos15 opened this issue 7 months ago • 6 comments

Is your feature request related to a problem? Please describe. Currently, all tutorials are for single classes.

Describe the solution you'd like A tutorial which considers multiple classes.

Describe alternatives you've considered N/A [I am assuming everyone would want something off the bat]

Additional context n/A

aymuos15 avatar Apr 23 '25 15:04 aymuos15

import numpy as np
from rich import print as pprint
from panoptica import (
    InputType,
    Panoptica_Evaluator,
    ConnectedComponentsInstanceApproximator,
    NaiveThresholdMatching,
)
from panoptica.utils.label_group import LabelGroup
from panoptica.utils.segmentation_class import SegmentationClassGroups

# Define segmentation groups
groups = {
    "class_1": ([1], False),
    "class_2": ([2], False),
}
class_groups = SegmentationClassGroups(groups)

pred = np.zeros((30, 30))
gt = np.zeros((30, 30))

# Class 1 - Both pred and gt have the same region
pred[1:5, 1:5] = 1
gt[1:5, 1:5] = 1

# Class 1 - Both pred and gt have the same region
pred[7:11, 7:11] = 1
gt[7:11, 7:11] = 1

# Class 2 - Different regions in pred and gt
pred[25:32, 5:10] = 2
gt[25:32, 7:12] = 2  # Slight offset for partial overlap

# Convert to int32 for evaluation
pred = pred.astype(np.int32)
gt = gt.astype(np.int32)

# Create evaluator
evaluator = Panoptica_Evaluator(
    expected_input=InputType.SEMANTIC,
    instance_approximator=ConnectedComponentsInstanceApproximator(),
    instance_matcher=NaiveThresholdMatching(),
    segmentation_class_groups=class_groups,
)

# Evaluate
results = evaluator.evaluate(pred, gt, verbose=False)

# Display results for each class
print("\n===== Evaluation Results =====")
for class_name, result in results.items():
    print(f"\n--- {class_name} ---")
    # Print key metrics
    print(f"True Positives (TP): {result.tp}")
    print(f"False Positives (FP): {result.fp}")
    print(f"False Negatives (FN): {result.fn}")
    print(f"Segmentation Quality (SQ): {result.sq:.4f}")
    print(f"Recognition Quality (RQ): {result.rq:.4f}")
    print(f"Panoptic Quality (PQ): {result.pq:.4f}")
    print(f"Dice Score (DSC): {result.sq_dsc:.4f}")
    
    # Print additional details if needed
    print("\nDetailed metrics:")
    metrics_dict = {
        "Global Binary DSC": result.global_bin_dsc,
        "Number of reference instances": result.num_ref_instances,
        "Number of prediction instances": result.num_pred_instances
    }
    for metric_name, value in metrics_dict.items():
        print(f"{metric_name}: {value}")

# Alternatively, you can convert all results to dictionaries
print("\n===== All Metrics as Dictionary =====")
for class_name, result in results.items():
    print(f"\n--- {class_name} ---")
    result_dict = result.to_dict()
    # Filter the dictionary to show only the most important metrics
    important_metrics = ['tp', 'fp', 'fn', 'sq', 'rq', 'pq', 'sq_dsc', 'global_bin_dsc']
    filtered_dict = {k: round(v, 4) if isinstance(v, float) else v 
                    for k, v in result_dict.items() if k in important_metrics}
    pprint(filtered_dict)

Is this the right way to go about it?

aymuos15 avatar Apr 23 '25 15:04 aymuos15

In addition, Panoptic Quality is inherently not calculated for individual classes rather a single score is reported across all classes. Is there any way to do so by simple api calls?

aymuos15 avatar Apr 23 '25 17:04 aymuos15

Dear @aymuos15, as promised, I am sharing a script with you to do a multi-class BraTS evaluation, it also illustrates usage of the aggregator.

This script computes instance-wise and global metrics for ET, NET, ED, and the hierarchic groups WT, TC:

from auxiliary.nifti.io import read_nifti

from panoptica import (
    InputType,
    Panoptica_Evaluator,
    Panoptica_Aggregator,
    ConnectedComponentsInstanceApproximator,
    NaiveThresholdMatching,
)
from panoptica.utils.segmentation_class import SegmentationClassGroups
from panoptica.utils.label_group import LabelMergeGroup

from auxiliary.turbopath.turbopath import turbopath

from concurrent.futures import ProcessPoolExecutor, as_completed

import os

import pathlib

from tqdm import tqdm

import cProfile


def evaluate_folder(prediction_folder, reference_folder, dataset, out_folder):
    os.makedirs(out_folder, exist_ok=True)

    prediction_folder = turbopath(prediction_folder)
    reference_folder = turbopath(reference_folder)

    evaluator = Panoptica_Evaluator(
        expected_input=InputType.SEMANTIC,
        instance_approximator=ConnectedComponentsInstanceApproximator(),
        instance_matcher=NaiveThresholdMatching(),
        segmentation_class_groups=SegmentationClassGroups(
            {
                "ET": (3, False),
                "NET": (1, False),
                "ED": (2, False),
                "TC": LabelMergeGroup([1, 3], False),
                "WT": LabelMergeGroup([1, 2, 3], False),
            }
        ),
    )
    aggregator = Panoptica_Aggregator(evaluator, out_folder.joinpath("results.tsv"))

    exams = prediction_folder.files("*.nii.gz")

    for exam in tqdm(exams[0:10]):
        exam_name = exam.name.split(".nii.gz")[0]
        print(exam_name)
        reference_path = reference_folder / f"{exam_name}-seg.nii.gz"

        ref_masks = read_nifti(reference_path)
        ref_masks = ref_masks.astype(int)

        pred_masks = read_nifti(exam)
        pred_masks = pred_masks.astype(int)

        aggregator.evaluate(pred_masks, ref_masks, subject_name=exam.name)

    # with ProcessPoolExecutor(max_workers=6) as executor:
    #     futures = {
    #         executor.submit(
    #             aggregator.evaluate,
    #             read_nifti(exam).astype(int),
    #             read_nifti(
    #                 reference_folder / f"{exam.name.split('.nii.gz')[0]}-seg.nii.gz"
    #             ).astype(int),
    #             subject_name=exam.name,
    #         ): exam
    #         for exam in tqdm(exams)
    #     }
    #     for future in tqdm(
    #         as_completed(futures), total=len(futures), desc="Panoptica Evaluation"
    #     ):
    #         result = future.result()
    #         if result is not None:
    #             print("Done")


if __name__ == "__main__":
    pred_dir = pathlib.Path("/home/josef/Data/Projects/brats_moda/result_files")

    out_dir = pathlib.Path(
        "/home/josef/Data/Projects/brats_moda/panoptica_results_updated"
    )

    datasets = ["predictions"]

    subfolders = [f.path for f in os.scandir(pred_dir) if f.is_dir()]

    for subfolder in subfolders:
        for dataset in datasets:
            pred_path = os.path.join(subfolder, dataset)
            out_path = out_dir / os.path.basename(subfolder) / dataset
            evaluate_folder(
                prediction_folder=pred_path,
                reference_folder=pred_dir.parent.joinpath("reference", dataset),
                dataset=dataset,
                out_folder=out_path,
            )

neuronflow avatar Apr 23 '25 19:04 neuronflow

Fantastic. Thank you very much @neuronflow. Really appreciate it.

aymuos15 avatar Apr 23 '25 19:04 aymuos15

PS: The code above also implements parallel evaluation (commented code)

I reopened the issue, as I agree with you that we need a tutorial to point out these features :)

Here you find the existing panoptica tutorials: https://github.com/BrainLesion/tutorials/tree/main/panoptica

Please let us know if you have any questions.

neuronflow avatar Apr 23 '25 21:04 neuronflow

@aymuos15 Here some more example code to analyze a single exam:

from auxiliary.nifti.io import read_nifti

from panoptica import (
    InputType,
    Panoptica_Evaluator,
    Panoptica_Aggregator,
    ConnectedComponentsInstanceApproximator,
    NaiveThresholdMatching,
)
from panoptica.utils.segmentation_class import SegmentationClassGroups
from panoptica.utils.label_group import LabelMergeGroup

from auxiliary.turbopath.turbopath import turbopath

from concurrent.futures import ProcessPoolExecutor, as_completed

import os


from tqdm import tqdm


import pandas as pd

import tempfile


def analyze_exam(
    prediction_path: str,
    label_path: str,
    identifier: str,
    panoptica_config_path: str,
) -> dict:
    """
    Evaluate a single exam using Panoptica.

    Args:
        prediction_path (str): _description_
        label_path (str): _description_
        identifier (str): _description_
        panoptica_config_path (str): _description_

    Returns:
        dict: _description_

        Note: The function returns a dictionary containing the evaluation results.
        Note this is very ugly as the evaluator is instantiated again and again for every exam and panoptica's batch processing capabilities (Aggregator) are not used.
        This should be improved in the future.
    """

    # file io

    ref_masks = read_nifti(label_path)
    ref_masks = ref_masks.astype(int)

    pred_masks = read_nifti(prediction_path)
    pred_masks = pred_masks.astype(int)

    # manually set BraTS config; uncomment for manual configuration
    # evaluator = Panoptica_Evaluator(
    #     expected_input=InputType.SEMANTIC,
    #     instance_approximator=ConnectedComponentsInstanceApproximator(),
    #     instance_matcher=NaiveThresholdMatching(),
    #     segmentation_class_groups=SegmentationClassGroups(
    #         {
    #             "ET": (3, False),
    #             "NET": (1, False),
    #             "ED": (2, False),
    #             "TC": LabelMergeGroup([1, 3], False),
    #             "WT": LabelMergeGroup([1, 2, 3], False),
    #         }
    #     ),
    # )

    # evaluator.save_to_config(panoptica_config_path)

    evaluator = Panoptica_Evaluator.load_from_config(panoptica_config_path)

    # call evaluate
    group2result = evaluator.evaluate(
        prediction_arr=pred_masks,
        reference_arr=ref_masks,
        # subject_name=identifier,
    )

    # for key, value in result.items():
    #     print(key, value)

    # convert to dict and add identifier
    results = {k: r.to_dict() for k, r in group2result.items()}
    results["subject_name"] = identifier
    return results


if __name__ == "__main__":
    results = analyze_exam(
        prediction_path="test_case/brats202x/zyx_2019.nii.gz",
        label_path="test_case/brats202x/simple.nii.gz",
        identifier="test_case",
        panoptica_config_path="brats_config.yaml",
    )

    print(results)

the config:

!Panoptica_Evaluator
decision_metric: null
decision_threshold: null
edge_case_handler: !EdgeCaseHandler
  empty_list_std: !EdgeCaseResult NAN
  listmetric_zeroTP_handling:
    !Metric DSC: !MetricZeroTPEdgeCaseHandling {empty_prediction_result: !EdgeCaseResult ZERO,
      empty_reference_result: !EdgeCaseResult ZERO, no_instances_result: !EdgeCaseResult NAN,
      normal: !EdgeCaseResult ZERO}
    !Metric clDSC: !MetricZeroTPEdgeCaseHandling {empty_prediction_result: !EdgeCaseResult ZERO,
      empty_reference_result: !EdgeCaseResult ZERO, no_instances_result: !EdgeCaseResult NAN,
      normal: !EdgeCaseResult ZERO}
    !Metric IOU: !MetricZeroTPEdgeCaseHandling {empty_prediction_result: !EdgeCaseResult ZERO,
      empty_reference_result: !EdgeCaseResult ZERO, no_instances_result: !EdgeCaseResult NAN,
      normal: !EdgeCaseResult ZERO}
    !Metric ASSD: !MetricZeroTPEdgeCaseHandling {empty_prediction_result: !EdgeCaseResult INF,
      empty_reference_result: !EdgeCaseResult INF, no_instances_result: !EdgeCaseResult NAN,
      normal: !EdgeCaseResult INF}
    !Metric RVD: !MetricZeroTPEdgeCaseHandling {empty_prediction_result: !EdgeCaseResult NAN,
      empty_reference_result: !EdgeCaseResult NAN, no_instances_result: !EdgeCaseResult NAN,
      normal: !EdgeCaseResult NAN}
    !Metric RVAE: !MetricZeroTPEdgeCaseHandling {empty_prediction_result: !EdgeCaseResult NAN,
      empty_reference_result: !EdgeCaseResult NAN, no_instances_result: !EdgeCaseResult NAN,
      normal: !EdgeCaseResult NAN}
expected_input: !InputType SEMANTIC
global_metrics: [!Metric DSC]
instance_approximator: !ConnectedComponentsInstanceApproximator {cca_backend: null}
instance_matcher: !NaiveThresholdMatching {allow_many_to_one: false, matching_metric: !Metric IOU,
  matching_threshold: 0.5}
instance_metrics: [!Metric DSC, !Metric IOU, !Metric ASSD, !Metric RVD]
log_times: false
save_group_times: false
segmentation_class_groups: !SegmentationClassGroups
  groups:
    ed: !LabelGroup
      single_instance: false
      value_labels: [2]
    et: !LabelGroup
      single_instance: false
      value_labels: [3]
    net: !LabelGroup
      single_instance: false
      value_labels: [1]
    tc: !LabelMergeGroup
      single_instance: false
      value_labels: [1, 3]
    wt: !LabelMergeGroup
      single_instance: false
      value_labels: [1, 2, 3]
verbose: false

neuronflow avatar Apr 24 '25 06:04 neuronflow