segmentation_models
segmentation_models copied to clipboard
[OPEN QUESTION] How to evaluate segmentation model with empty masks ?
I created multiple models that are able to do binary segmentation on images and I would like to evaluate them.
I would like to know for each model how good he is:
- in finding an object on the images
- in finding a precise border of objects
- in finding no object when there is no object to find
Actually, I'm using these scores for that purpose:
- iou (intersection over union of predicted mask and ground truth mask)
- dice score
- pixel accuracy
- precision iou
- recall iou
A good intuitive explanation of these scores can be found @ https://www.jeremyjordan.me/evaluating-image-segmentation-models/
The scores that I listed above are well-suited to verify that the model predicts correctly segmented objects with a precise border. However, my test dataset contains also images with no objects on it and I would like to evaluate how good my model is to know that there is nothing to predict.
For the moment, I get an IoU and a Dice score of 0 when there is no object to find mask and my model correctly predicted an empty mask which is wrong.
Here is my code:
class SegmentationEvaluator():
def __init__(self, model: SegmentationModel, dataset: Dataset2D, batch_size: int = 10, iou_thresholds: List[float] = [0.1, 0.5, 0.7, 0.8], dice_thresholds: List[float] = [0.1, 0.5, 0.7, 0.8]):
self.model = model
self.dataset = dataset
self.batch_size = batch_size
self.batch_loader = BatchLoader(self.dataset, batch_size=batch_size, shuffle=False)
self.ious = []
self.dices = []
self.accuracies = []
self.true_positive_iou = [0 for i in range(len(iou_thresholds))]
self.false_positive_iou = [0 for i in range(len(iou_thresholds))]
self.false_negative_iou = [0 for i in range(len(iou_thresholds))]
self.true_positive_dice = [0 for i in range(len(dice_thresholds))]
self.false_positive_dice = [0 for i in range(len(dice_thresholds))]
self.false_negative_dice = [0 for i in range(len(dice_thresholds))]
self.iou_thresholds = iou_thresholds
self.dice_thresholds = dice_thresholds
def evaluate(self, metrics: List[str] = ["iou", "accuracy", "dice", "precision_iou", "recall_iou", "precision_dice", \
"recall_dice", "tp_iou", "fp_iou", "fn_iou"]) -> List[Any]:
# add metrics iou or dice
for batch in tqdm(self.batch_loader):
images, masks = batch
predicted_masks = self.model.predict_masks(images)
for metric in ["iou", "accuracy", "dice"]:
# we don't need to go through all metrics
for true_mask, predicted_mask in zip(masks, predicted_masks):
if metric == "iou":
iou = get_iou(true_mask, predicted_mask)
self.update_tp_tn_fp_fn_iou(iou)
self.ious.append(iou)
elif metric == "dice":
dice = get_dice(true_mask, predicted_mask)
self.update_tp_tn_fp_fn_dice(dice)
self.dices.append(dice)
elif metric == "accuracy":
self.accuracies.append(get_accuracy(true_mask, predicted_mask))
### Prepare the results to display ###
results = defaultdict(float)
# IOU MEDIAN + MEAN
if "iou" in metrics:
results["mean_iou:"] = self.get_mean_iou()
results["median iou:"] = self.get_median_iou()
# DICE MEDIAN + MEAN
if "dice" in metrics:
results["mean dice:"] = self.get_mean_dice()
results["median dice:"] = self.get_median_dice()
# PRECISION IOU
if "precision_iou" in metrics:
precision_iou = self.get_precision_iou()
for threshold in self.iou_thresholds:
results[f"precision_iou: {threshold}"] = precision_iou[threshold]
# PRECISION DICE
if "precision_dice" in metrics:
precision_dice = self.get_precision_dice()
for threshold in self.dice_thresholds:
results[f"precision_dice: {threshold}"] = precision_dice[threshold]
# RECALL IOU
if "recall_iou" in metrics:
recall_iou = self.get_recall_iou()
for threshold in self.iou_thresholds:
results[f"recall_iou: {threshold}"] = recall_iou[threshold]
# RECALL DICE
if "recall_dice" in metrics:
recall_dice = self.get_recall_dice()
for threshold in self.dice_thresholds:
results[f"recall_dice: {threshold}"] = recall_dice[threshold]
# ACCURACY
if "accuracy" in metrics:
results["accuracy"] = self.get_accuracy()
# TRUE POSITIVES
if "tp_iou" in metrics:
for i, threshold in enumerate(self.iou_thresholds):
results[f"tp_iou {threshold}:"] = self.true_positive_iou[i]
# FALSE NEGATIVES
if "fn_iou" in metrics:
for i, threshold in enumerate(self.iou_thresholds):
results[f"fn_iou {threshold}:"] = self.false_negative_iou[i]
# FALSE POSITIVES
if "fp_iou" in metrics:
for i, threshold in enumerate(self.iou_thresholds):
results[f"fp_iou {threshold}:"] = self.false_positive_iou[i]
return results.items()
def get_accuracy(self):
"""Computes the pixel accuracy on the test dataset."""
return sum(self.accuracies) / len(self.accuracies)
def get_median_dice(self) -> float:
"""Computes the median dice on test dataset."""
return statistics.median(self.dices)
def get_mean_dice(self) -> float:
"""Computes the mean dice on test dataset."""
return sum(self.dices) / len(self.dices)
def get_median_iou(self) -> float:
"""Computes the median iou on test dataset."""
return statistics.median(self.ious)
def get_mean_iou(self) -> float:
"""Computes the mean iou on test dataset."""
return sum(self.ious) / len(self.ious)
def update_tp_tn_fp_fn_dice(self, dice: float) -> None:
for i, threshold in enumerate(self.dice_thresholds):
if dice >= threshold:
self.true_positive_dice[i] += 1
else:
self.false_negative_dice[i] += 1
self.false_positive_dice[i] += 1
def update_tp_tn_fp_fn_iou(self, iou: float) -> None:
for i, threshold in enumerate(self.iou_thresholds):
if iou >= threshold:
self.true_positive_iou[i] += 1
else:
self.false_negative_iou[i] += 1
self.false_positive_iou[i] += 1
def __show_result(self, image: NDArray[(Any, Any), float], true_mask: NDArray[(Any, Any), int]) -> None:
predicted_mask = self.model.predict_masks([image])[0]
show_image_color(image=image, true_mask=true_mask, predicted_mask=predicted_mask)
image = image.astype("uint8")
show_mask_on_image(image=image, mask=true_mask)
print(f"IOU: {get_iou(true_mask, predicted_mask)}")
print(f"DICE: {get_dice(true_mask, predicted_mask)}")
def show_results(self, display: int) -> None:
j = 1
for batch in self.batch_loader:
images, masks = batch
for image, mask in zip(images, masks):
if j <= display:
self.__show_result(image, mask)
j += 1
else: return None
def get_precision_iou(self) -> float:
precision_iou = defaultdict(float)
for i, iou_threshold in enumerate(self.iou_thresholds):
precision_iou[iou_threshold] = self.true_positive_iou[i] / (self.true_positive_iou[i] + self.false_positive_iou[i])
return precision_iou
def get_recall_iou(self) -> float:
recall_iou = defaultdict(float)
for i, iou_threshold in enumerate(self.iou_thresholds):
recall_iou[iou_threshold] = self.true_positive_iou[i] / (self.true_positive_iou[i] + self.false_negative_iou[i])
return recall_iou
def get_precision_dice(self) -> float:
precision_dice = defaultdict(float)
for i, dice_threshold in enumerate(self.dice_thresholds):
precision_dice[dice_threshold] = self.true_positive_dice[i] / (self.true_positive_dice[i] + self.false_positive_dice[i])
return precision_dice
def get_recall_dice(self) -> float:
recall_dice = defaultdict(float)
for i, dice_threshold in enumerate(self.dice_thresholds):
recall_dice[dice_threshold] = self.true_positive_dice[i] / (self.true_positive_dice[i] + self.false_negative_dice[i])
return recall_dice
def get_accuracy(true_mask: NDArray[(Any, Any), float], predicted_mask: NDArray[(Any, Any), float]) -> float:
true_positive, true_negative, false_positive, false_negative = get_pixel_tp_tn_fp_fn(true_mask, predicted_mask)
return (true_positive + true_negative) / (true_negative + true_positive + false_negative + false_positive)
def get_pixel_tp_tn_fp_fn(true_mask: NDArray[(Any, Any), float], predicted_mask: NDArray[(Any, Any), float]) -> Tuple[int, int, int, int]:
# true positive represents a pixel that is correctly predicted to belong to the tumor class
true_positive = np.sum(np.logical_and(true_mask, predicted_mask))
# true negative represents a pixel that is correctly identified as non-tumoral
inversed_true_mask = 1 - true_mask
inversed_predicted_mask = 1 - predicted_mask
true_negative = np.sum(np.logical_and(inversed_true_mask, inversed_predicted_mask))
# false positive represents a pixel that is wrongly predicted to belong to the tumor class
diff_mask = true_mask - predicted_mask
diff_mask_copy = np.copy(diff_mask)
diff_mask[diff_mask == 1] = 0
false_positive = - np.sum(diff_mask)
# false negative represents a pixel that should have been predicted as tumoral but wasn't
diff_mask_copy[diff_mask_copy == -1] = 0
false_negative = np.sum(diff_mask_copy)
return true_positive, true_negative, false_positive, false_negative
def get_iou(true_mask: NDArray[(Any, Any), float], predicted_mask: NDArray[(Any, Any), float]) -> float:
"""
Computes the iou score for binary segmentation.
"""
#true_mask = true_mask#.astype(np.bool)
# print(true_mask)
# predicted_mask = predicted_mask#.astype(np.bool)
intersection = np.logical_and(true_mask, predicted_mask).astype("uint8")
union = np.logical_or(true_mask, predicted_mask).astype("uint8")
return np.sum(intersection) / np.sum(union)
def get_dice(true_mask: NDArray[(Any, Any), float], predicted_mask: NDArray[(Any, Any), float]) -> float:
"""
Computes the dice score for binary segmentation.
"""
true_mask = true_mask.astype(np.bool)
predicted_mask = predicted_mask.astype(np.bool)
masks_sum = np.sum(true_mask) + np.sum(predicted_mask)
return 2 * np.sum(np.logical_and(true_mask, predicted_mask)) / masks_sum
I tried also to inverse the masks when the masks are empty but it gives to high iou and dice scores even if the predicted mask isn't completly empty.
def get_iou(true_mask: NDArray[(Any, Any), float], predicted_mask: NDArray[(Any, Any), float]) -> float:
"""
Computes the iou score for binary segmentation.
"""
#true_mask = true_mask#.astype(np.bool)
# print(true_mask)
# predicted_mask = predicted_mask#.astype(np.bool)
if np.sum(true_mask) == 0:
true_mask = 1 - true_mask
predicted_mask = 1 - predicted_mask
intersection = np.logical_and(true_mask, predicted_mask).astype("uint8")
union = np.logical_or(true_mask, predicted_mask).astype("uint8")
return np.sum(intersection) / np.sum(union)
def get_dice(true_mask: NDArray[(Any, Any), float], predicted_mask: NDArray[(Any, Any), float]) -> float:
"""
Computes the dice score for binary segmentation.
"""
if np.sum(true_mask) == 0:
true_mask = 1 - true_mask
predicted_mask = 1 - predicted_mask
true_mask = true_mask.astype(np.bool)
predicted_mask = predicted_mask.astype(np.bool)
masks_sum = np.sum(true_mask) + np.sum(predicted_mask)
return 2 * np.sum(np.logical_and(true_mask, predicted_mask)) / masks_sum
So, my questions are:
- How to take into account empty masks when computing IoU and Dice score ?
- Is there a better score to evaluate the models in my case ?
Thank you in advance for your help and ideas !
I think when both ground truth mask and prediction mask are empty then the metric should evaluate to Null and later we can exclude the nulls while reducing to a single value metric.