Implemented micro and macro averaged metrics

Signed-off-by: Jim Martens <github@2martens.de>
This commit is contained in:
2019-07-18 13:40:19 +02:00
parent 3b2f9fe4da
commit a5555c5f73
2 changed files with 121 additions and 18 deletions

View File

@ -312,16 +312,25 @@ def _ssd_evaluate(args: argparse.Namespace) -> None:
true_positives, false_positives, \
cum_true_positives, cum_false_positives, \
open_set_error, cumulative_open_set_error = evaluate.match_predictions(predictions_per_class, labels,
bounding_box_utils.iou,
nr_classes, iou_threshold)
open_set_error, cumulative_open_set_error, \
cum_true_positives_overall, cum_false_positives_overall = evaluate.match_predictions(predictions_per_class,
labels,
bounding_box_utils.iou,
nr_classes, iou_threshold)
cum_precisions, cum_recalls = evaluate.get_precision_recall(number_gt_per_class,
cum_true_positives,
cum_false_positives,
nr_classes)
cum_precisions, cum_recalls, \
cum_precisions_micro, cum_recalls_micro, \
cum_precisions_macro, cum_recalls_macro = evaluate.get_precision_recall(number_gt_per_class,
cum_true_positives,
cum_false_positives,
cum_true_positives_overall,
cum_false_positives_overall,
nr_classes)
f1_scores = evaluate.get_f1_score(cum_precisions, cum_recalls, nr_classes)
f1_scores, f1_scores_micro, f1_scores_macro = evaluate.get_f1_score(cum_precisions, cum_recalls,
cum_precisions_micro, cum_recalls_micro,
cum_precisions_macro, cum_recalls_macro,
nr_classes)
average_precisions = evaluate.get_mean_average_precisions(cum_precisions, cum_recalls, nr_classes)
mean_average_precision = evaluate.get_mean_average_precision(average_precisions)
@ -329,9 +338,17 @@ def _ssd_evaluate(args: argparse.Namespace) -> None:
false_positives,
cum_true_positives,
cum_false_positives,
cum_true_positives_overall,
cum_false_positives_overall,
cum_precisions,
cum_recalls,
cum_precisions_micro,
cum_recalls_micro,
cum_precisions_macro,
cum_recalls_macro,
f1_scores,
f1_scores_micro,
f1_scores_macro,
average_precisions,
mean_average_precision,
open_set_error,
@ -868,9 +885,17 @@ def _ssd_evaluate_get_results(true_positives: Sequence[np.ndarray],
false_positives: Sequence[np.ndarray],
cum_true_positives: Sequence[np.ndarray],
cum_false_positives: Sequence[np.ndarray],
cum_true_positives_micro: np.ndarray,
cum_false_positives_micro: np.ndarray,
cum_precisions: Sequence[np.ndarray],
cum_recalls: Sequence[np.ndarray],
cum_precision_micro: np.ndarray,
cum_recall_micro: np.ndarray,
cum_precision_macro: np.ndarray,
cum_recall_macro: np.ndarray,
f1_scores: Sequence[np.ndarray],
f1_scores_micro: np.ndarray,
f1_scores_macro: np.ndarray,
average_precisions: Sequence[float],
mean_average_precision: float,
open_set_error: np.ndarray,
@ -881,9 +906,17 @@ def _ssd_evaluate_get_results(true_positives: Sequence[np.ndarray],
"false_positives": false_positives,
"cumulative_true_positives": cum_true_positives,
"cumulative_false_positives": cum_false_positives,
"cumulative_true_positives_micro": cum_true_positives_micro,
"cumulative_false_positives_micro": cum_false_positives_micro,
"cumulative_precisions": cum_precisions,
"cumulative_recalls": cum_recalls,
"cumulative_precision_micro": cum_precision_micro,
"cumulative_recall_micro": cum_recall_micro,
"cumulative_precision_macro": cum_precision_macro,
"cumulative_recall_macro": cum_recall_macro,
"f1_scores": f1_scores,
"f1_scores_micro": f1_scores_micro,
"f1_scores_macro": f1_scores_macro,
"mean_average_precisions": average_precisions,
"mean_average_precision": mean_average_precision,
"open_set_error": open_set_error,

View File

@ -101,6 +101,7 @@ def match_predictions(predictions: Sequence[Sequence[Tuple[int, float, float, in
border_pixels: str = "include",
sorting_algorithm: str = "quicksort") -> Tuple[List[np.ndarray], List[np.ndarray],
List[np.ndarray], List[np.ndarray],
np.ndarray, np.ndarray,
np.ndarray, np.ndarray]:
"""
Matches predictions to ground truth boxes.
@ -126,7 +127,8 @@ def match_predictions(predictions: Sequence[Sequence[Tuple[int, float, float, in
Returns:
true positives, false positives, cumulative true positives, and cumulative false positives for
each class, open set error as defined by Miller et al, cumulative open set error
each class, open set error as defined by Miller et al, cumulative open set error,
cumulative true positives and cumulative false positives over all classes
"""
true_positives = [[]] # The false positives for each class, sorted by descending confidence.
false_positives = [[]] # The true positives for each class, sorted by descending confidence.
@ -140,7 +142,9 @@ def match_predictions(predictions: Sequence[Sequence[Tuple[int, float, float, in
most_predictions = nr_predictions
open_set_error = np.zeros(most_predictions, dtype=np.int)
true_positives_micro = np.zeros(most_predictions, dtype=np.int)
false_positives_micro = np.zeros(most_predictions, dtype=np.int)
for class_id in range(1, nr_classes + 1):
predictions_class = predictions[class_id]
@ -198,6 +202,7 @@ def match_predictions(predictions: Sequence[Sequence[Tuple[int, float, float, in
# If the image doesn't contain any objects of this class,
# the prediction becomes a false positive.
false_pos[i] = 1
false_positives_micro[i] += 1
open_set_error[i] += 1
continue
@ -219,12 +224,14 @@ def match_predictions(predictions: Sequence[Sequence[Tuple[int, float, float, in
# Those predictions whose matched overlap is below the threshold become
# false positives.
false_pos[i] = 1
false_positives_micro[i] += 1
else:
if image_id not in gt_matched:
# True positive:
# If the matched ground truth box for this prediction hasn't been matched to a
# different prediction already, we have a true positive.
true_pos[i] = 1
true_positives_micro[i] += 1
gt_matched[image_id] = np.zeros(shape=(gt.shape[0]), dtype=np.bool)
gt_matched[image_id][gt_match_index] = True
elif not gt_matched[image_id][gt_match_index]:
@ -232,6 +239,7 @@ def match_predictions(predictions: Sequence[Sequence[Tuple[int, float, float, in
# If the matched ground truth box for this prediction hasn't been matched to a
# different prediction already, we have a true positive.
true_pos[i] = 1
true_positives_micro[i] += 1
gt_matched[image_id][gt_match_index] = True
else:
# False positive, duplicate detection:
@ -239,6 +247,7 @@ def match_predictions(predictions: Sequence[Sequence[Tuple[int, float, float, in
# to a different prediction previously, it is a duplicate detection for an
# already detected object, which counts as a false positive.
false_pos[i] = 1
false_positives_micro[i] += 1
true_positives.append(true_pos)
false_positives.append(false_pos)
@ -250,17 +259,24 @@ def match_predictions(predictions: Sequence[Sequence[Tuple[int, float, float, in
cumulative_false_positives.append(cumulative_false_pos)
cumulative_open_set_error = np.cumsum(open_set_error)
cumulative_false_positives_micro = np.cumsum(false_positives_micro)
cumulative_true_positives_micro = np.cumsum(true_positives_micro)
return (
true_positives, false_positives, cumulative_true_positives, cumulative_false_positives,
open_set_error, cumulative_open_set_error
open_set_error, cumulative_open_set_error,
cumulative_true_positives_micro, cumulative_false_positives_micro
)
def get_precision_recall(number_gt_per_class: np.ndarray,
cumulative_true_positives: Sequence[np.ndarray],
cumulative_false_positives: Sequence[np.ndarray],
nr_classes: int) -> Tuple[List[np.ndarray], List[np.ndarray]]:
cumulative_true_positives_micro: np.ndarray,
cumulative_false_positives_micro: np.ndarray,
nr_classes: int) -> Tuple[List[np.ndarray], List[np.ndarray],
np.ndarray, np.ndarray,
np.ndarray, np.ndarray]:
"""
Computes the precision and recall values and returns them.
@ -268,13 +284,23 @@ def get_precision_recall(number_gt_per_class: np.ndarray,
number_gt_per_class: number of ground truth bounding boxes per class
cumulative_true_positives: cumulative true positives per class
cumulative_false_positives: cumulative false positives per class
cumulative_true_positives_micro: cumulative true positives over all classes
cumulative_false_positives_micro: cumulative false positives over all classes
nr_classes: number of classes
Returns:
cumulative precisions and cumulative recalls per class
cumulative precisions and cumulative recalls per class,
micro averaged precision/recall, and
macro averaged precision/recall
"""
cumulative_precisions = [[]]
cumulative_recalls = [[]]
cumulative_precision_micro = np.zeros_like(cumulative_true_positives_micro)
cumulative_recall_micro = np.zeros_like(cumulative_true_positives_micro)
cumulative_precision_macro = np.zeros_like(cumulative_precision_micro)
cumulative_recall_macro = np.zeros_like(cumulative_recall_micro)
total_number_gt = 0
number_of_nonzero_classes = 0
# Iterate over all classes.
for class_id in range(1, nr_classes + 1):
@ -288,27 +314,66 @@ def get_precision_recall(number_gt_per_class: np.ndarray,
fp = cumulative_false_positives[class_id]
cumulative_precision = np.where(tp + fp > 0, tp / (tp + fp), 0) # 1D array with shape `(num_predictions,)`
cumulative_recall = tp / number_gt_per_class[class_id] # 1D array with shape `(num_predictions,)`
number_gt = number_gt_per_class[class_id]
total_number_gt += number_gt
cumulative_recall = tp / number_gt # 1D array with shape `(num_predictions,)`
cumulative_precisions.append(cumulative_precision)
cumulative_recalls.append(cumulative_recall)
diff_to_largest_class = cumulative_precision_micro.shape[0] - cumulative_precision.shape[0]
if diff_to_largest_class:
repeated_last_precision = np.tile(cumulative_precision[-1], diff_to_largest_class)
repeated_last_recall = np.tile(cumulative_recall[-1], diff_to_largest_class)
extended_precision = np.concatenate((cumulative_precision, repeated_last_precision))
extended_recall = np.concatenate((cumulative_recall, repeated_last_recall))
cumulative_precision_macro += extended_precision
cumulative_recall_macro += extended_recall
else:
cumulative_precision_macro += cumulative_precision
cumulative_recall_macro += cumulative_recall
number_of_nonzero_classes += 1
# calculate micro averaged precision and recall
tp = cumulative_true_positives_micro
fp = cumulative_false_positives_micro
cumulative_precision_micro = np.where(tp + fp > 0, tp / (tp + fp), 0)
cumulative_recall_micro = tp / total_number_gt
return cumulative_precisions, cumulative_recalls
# calculate macro averaged precision and recall
cumulative_precision_macro /= number_of_nonzero_classes
cumulative_recall_macro /= number_of_nonzero_classes
return (cumulative_precisions, cumulative_recalls,
cumulative_precision_micro, cumulative_recall_micro,
cumulative_precision_macro, cumulative_recall_macro
)
def get_f1_score(cumulative_precisions: List[np.ndarray],
cumulative_recalls: List[np.ndarray],
nr_classes: int) -> List[np.ndarray]:
cumulative_precision_micro: np.ndarray,
cumulative_recall_micro: np.ndarray,
cumulative_precision_macro: np.ndarray,
cumulative_recall_macro: np.ndarray,
nr_classes: int) -> Tuple[List[np.ndarray],
np.ndarray, np.ndarray]:
"""
Computes the F1 score for every class.
Args:
cumulative_precisions: cumulative precisions for each class
cumulative_recalls: cumulative recalls for each class
cumulative_precision_micro: cumulative precision micro averaged
cumulative_recall_micro: cumulative recall micro averaged
cumulative_precision_macro: cumulative precision macro averaged
cumulative_recall_macro: cumulative recall macro averaged
nr_classes: number of classes
Returns:
cumulative F1 score per class
cumulative F1 score per class,
cumulative F1 score micro averaged, cumulative F1 score macro averaged
"""
cumulative_f1_scores = [[]]
@ -321,8 +386,13 @@ def get_f1_score(cumulative_precisions: List[np.ndarray],
continue
f1_score = 2 * ((cumulative_precision * cumulative_recall) / (cumulative_precision + cumulative_recall + 0.001))
cumulative_f1_scores.append(f1_score)
f1_score_micro = 2 * ((cumulative_precision_micro * cumulative_recall_micro) /
(cumulative_precision_micro + cumulative_recall_micro + 0.001))
f1_score_macro = 2 * ((cumulative_precision_macro * cumulative_recall_macro) /
(cumulative_precision_macro + cumulative_recall_macro + 0.001))
return cumulative_f1_scores
return cumulative_f1_scores, f1_score_micro, f1_score_macro
def get_mean_average_precisions(cumulative_precisions: List[np.ndarray],