From ef05d3f58ea4c0a1ee91e69cfb226d7753b773f1 Mon Sep 17 00:00:00 2001
From: Jim Martens <github@2martens.de>
Date: Wed, 15 May 2019 16:42:02 +0200
Subject: [PATCH] Added evaluate module

Signed-off-by: Jim Martens <github@2martens.de>
---
 src/twomartens/masterthesis/__init__.py |   1 +
 src/twomartens/masterthesis/evaluate.py | 342 ++++++++++++++++++++++++
 2 files changed, 343 insertions(+)
 create mode 100644 src/twomartens/masterthesis/evaluate.py

diff --git a/src/twomartens/masterthesis/__init__.py b/src/twomartens/masterthesis/__init__.py
index f8d37e4..c1c379e 100644
--- a/src/twomartens/masterthesis/__init__.py
+++ b/src/twomartens/masterthesis/__init__.py
@@ -24,6 +24,7 @@ Subpackages:
 Modules:
     ``data``: provides functionality to load data sets
     ``definitions``: contains mapper between COCO classes and WordNet IDs
+    ``evaluate``: provides functionality to evaluate networks
     ``main``: main entrance point of application
     ``ssd``: provides functionality to use the SSD models
 """
diff --git a/src/twomartens/masterthesis/evaluate.py b/src/twomartens/masterthesis/evaluate.py
new file mode 100644
index 0000000..26934b5
--- /dev/null
+++ b/src/twomartens/masterthesis/evaluate.py
@@ -0,0 +1,342 @@
+# -*- coding: utf-8 -*-
+
+#   Copyright 2018 Timon Brüning, Inga Kempfert, Anne Kunstmann, Jim Martens,
+#                  Marius Pierenkemper, Yanneck Reiss
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
+"""
+Functionality to evaluate results of networks.
+
+Functions:
+    get_number_gt_per_class(...): calculates the number of ground truth boxes per class
+    match_predictions(...): matches predictions to ground truth boxes
+"""
+from typing import Sequence, Union, Tuple, List
+
+import numpy as np
+
+from twomartens.masterthesis.ssd_keras.bounding_box_utils import bounding_box_utils
+
+
+def get_number_gt_per_class(labels: Sequence[Sequence[Sequence[int]]],
+                            nr_classes: int) -> np.ndarray:
+    """
+    Calculates the number of ground truth boxes per class and returns result.
+    
+    Args:
+        labels: list of labels per image
+        nr_classes: number of classes
+
+    Returns:
+        numpy array with respective counts
+    """
+    number_gt_per_class = np.zeros(shape=(nr_classes + 1), dtype=np.int)
+    label_range = range(len(labels))
+    
+    # iterate over images
+    for i in label_range:
+        boxes = np.asarray(labels[i])
+        
+        # iterate over boxes in image
+        for j in range(boxes.shape[0]):
+            class_id = boxes[j, 0]
+            number_gt_per_class[class_id] += 1
+    
+    return number_gt_per_class
+
+
+def prepare_predictions(predictions: Sequence[Sequence[Sequence[Union[int, float]]]],
+                        nr_classes: int) -> \
+        List[List[Tuple[int, float, int, int, int, int]]]:
+    """
+    Prepares the predictions for further processing.
+    
+    Args:
+        predictions: list of predictions per image
+        nr_classes: number of classes
+
+    Returns:
+        list of predictions per class
+    """
+    results = [list() for _ in range(nr_classes + 1)]
+    
+    for i, batch_item in enumerate(predictions):
+        image_id = i
+        
+        for box in batch_item:
+            class_id = int(box[0])
+            # Round the box coordinates to reduce the required memory.
+            confidence = box[1]
+            xmin = round(box[2])
+            ymin = round(box[3])
+            xmax = round(box[4])
+            ymax = round(box[5])
+            prediction = (image_id, confidence, xmin, ymin, xmax, ymax)
+            # Append the predicted box to the results list for its class.
+            results[class_id].append(prediction)
+    
+    return results
+
+
+def match_predictions(predictions: Sequence[Sequence[Tuple[int, float, int, int, int, int]]],
+                      labels: Sequence[Sequence[Sequence[int]]],
+                      nr_classes: int,
+                      iou_threshold: float = 0.5,
+                      border_pixels: str = "include",
+                      sorting_algorithm: str = "quicksort") -> Tuple[List[np.ndarray], List[np.ndarray],
+                                                                     List[np.ndarray], List[np.ndarray]]:
+    """
+    Matches predictions to ground truth boxes.
+    
+    Args:
+        predictions: list of predictions
+        labels: list of labels per image
+        nr_classes: number of classes
+        iou_threshold: only matches higher than this value will be considered
+        border_pixels:  How to treat the border pixels of the bounding boxes.
+                Can be 'include', 'exclude', or 'half'. If 'include', the border pixels belong
+                to the boxes. If 'exclude', the border pixels do not belong to the boxes.
+                If 'half', then one of each of the two horizontal and vertical borders belong
+                to the boxes, but not the other.
+        sorting_algorithm: Which sorting algorithm the matching algorithm should use. This
+            argument accepts any valid sorting algorithm for Numpy's `argsort()` function.
+            You will usually want to choose between 'quicksort' (fastest and most memory efficient,
+            but not stable) and 'mergesort' (slight slower and less memory efficient, but stable).
+            The official Matlab evaluation algorithm uses a stable sorting algorithm, so this algorithm
+            is only guaranteed to behave identically if you choose 'mergesort' as the sorting algorithm,
+            but it will almost always behave identically even if you choose 'quicksort' (but no guarantees).
+
+    Returns:
+        true positives, false positives, cumulative true positives, and cumulative false positives for
+            each class
+    """
+    true_positives = [[]]  # The false positives for each class, sorted by descending confidence.
+    false_positives = [[]]  # The true positives for each class, sorted by descending confidence.
+    cumulative_true_positives = [[]]
+    cumulative_false_positives = [[]]
+    
+    for class_id in range(1, nr_classes + 1):
+        predictions_class = predictions[class_id]
+        
+        # Store the matching results in these lists:
+        true_pos = np.zeros(len(predictions_class),
+                            dtype=np.int)  # 1 for every prediction that is a true positive, 0 otherwise
+        false_pos = np.zeros(len(predictions_class),
+                             dtype=np.int)  # 1 for every prediction that is a false positive, 0 otherwise
+
+        # In case there are no predictions at all for this class, we're done here.
+        if len(predictions_class) == 0:
+            true_positives.append(true_pos)
+            false_positives.append(false_pos)
+            continue
+
+        # Convert the predictions list for this class into a structured array so that we can sort it by confidence.
+
+        # Create the data type for the structured array.
+        preds_data_type = np.dtype([('image_id', np.int32),
+                                    ('confidence', 'f4'),
+                                    ('xmin', 'f4'),
+                                    ('ymin', 'f4'),
+                                    ('xmax', 'f4'),
+                                    ('ymax', 'f4')])
+        # Create the structured array
+        predictions_class = np.array(predictions_class, dtype=preds_data_type)
+        # Sort the detections by decreasing confidence.
+        descending_indices = np.argsort(-predictions_class['confidence'], kind=sorting_algorithm)
+        predictions_sorted = predictions_class[descending_indices]
+
+        # Keep track of which ground truth boxes were already matched to a detection.
+        gt_matched = {}
+        
+        for i in range(len(predictions_class)):
+            prediction = predictions_sorted[i]
+            image_id = prediction['image_id']
+            # Convert the structured array element to a regular array
+            pred_box = np.asarray(list(prediction[['xmin', 'ymin', 'xmax', 'ymax']]))
+
+            # Get the relevant ground truth boxes for this prediction,
+            # i.e. all ground truth boxes that match the prediction's
+            # image ID and class ID.
+
+            gt = labels[image_id]
+            gt = np.asarray(gt)
+            class_mask = gt[:, 0] == class_id
+            gt = gt[class_mask]
+
+            if gt.size == 0:
+                # If the image doesn't contain any objects of this class,
+                # the prediction becomes a false positive.
+                false_pos[i] = 1
+                continue
+
+            # Compute the IoU of this prediction with all ground truth boxes of the same class.
+            overlaps = bounding_box_utils.iou(boxes1=gt[:, [1, 2, 3, 4]],
+                                              boxes2=pred_box,
+                                              coords='corners',
+                                              mode='element-wise',
+                                              border_pixels=border_pixels)
+
+            # For each detection, match the ground truth box with the highest overlap.
+            # It's possible that the same ground truth box will be matched to multiple
+            # detections.
+            gt_match_index = np.argmax(overlaps)
+            gt_match_overlap = overlaps[gt_match_index]
+
+            if gt_match_overlap < iou_threshold:
+                # False positive, IoU threshold violated:
+                # Those predictions whose matched overlap is below the threshold become
+                # false positives.
+                false_pos[i] = 1
+            else:
+                # If this is not a ground truth that is supposed to be evaluation-neutral
+                # (i.e. should be skipped for the evaluation) or if we don't even have the
+                # concept of neutral boxes.
+                if image_id not in gt_matched:
+                    # True positive:
+                    # If the matched ground truth box for this prediction hasn't been matched to a
+                    # different prediction already, we have a true positive.
+                    true_pos[i] = 1
+                    gt_matched[image_id] = np.zeros(shape=(gt.shape[0]), dtype=np.bool)
+                    gt_matched[image_id][gt_match_index] = True
+                elif not gt_matched[image_id][gt_match_index]:
+                    # True positive:
+                    # If the matched ground truth box for this prediction hasn't been matched to a
+                    # different prediction already, we have a true positive.
+                    true_pos[i] = 1
+                    gt_matched[image_id][gt_match_index] = True
+                else:
+                    # False positive, duplicate detection:
+                    # If the matched ground truth box for this prediction has already been matched
+                    # to a different prediction previously, it is a duplicate detection for an
+                    # already detected object, which counts as a false positive.
+                    false_pos[i] = 1
+        
+        true_positives.append(true_pos)
+        false_positives.append(false_pos)
+
+        cumulative_true_pos = np.cumsum(true_pos)  # Cumulative sums of the true positives
+        cumulative_false_pos = np.cumsum(false_pos)  # Cumulative sums of the false positives
+
+        cumulative_true_positives.append(cumulative_true_pos)
+        cumulative_false_positives.append(cumulative_false_pos)
+    
+    return true_positives, false_positives, cumulative_true_positives, cumulative_false_positives
+
+
+def get_precision_recall(number_gt_per_class: np.ndarray,
+                         cumulative_true_positives: Sequence[np.ndarray],
+                         cumulative_false_positives: Sequence[np.ndarray],
+                         nr_classes: int) -> Tuple[List[np.ndarray], List[np.ndarray]]:
+    """
+    Computes the precision and recall values and returns them.
+    
+    Args:
+        number_gt_per_class: number of ground truth bounding boxes per class
+        cumulative_true_positives: cumulative true positives per class
+        cumulative_false_positives: cumulative false positives per class
+        nr_classes: number of classes
+
+    Returns:
+        cumulative precisions and cumulative recalls per class
+    """
+    cumulative_precisions = [[]]
+    cumulative_recalls = [[]]
+
+    # Iterate over all classes.
+    for class_id in range(1, nr_classes + 1):
+    
+        tp = cumulative_true_positives[class_id]
+        fp = cumulative_false_positives[class_id]
+    
+        cumulative_precision = np.where(tp + fp > 0, tp / (tp + fp), 0)  # 1D array with shape `(num_predictions,)`
+        cumulative_recall = tp / number_gt_per_class[class_id]  # 1D array with shape `(num_predictions,)`
+    
+        cumulative_precisions.append(cumulative_precision)
+        cumulative_recalls.append(cumulative_recall)
+    
+    return cumulative_precisions, cumulative_recalls
+
+
+def get_mean_average_precisions(cumulative_precisions: List[np.ndarray],
+                                cumulative_recalls: List[np.ndarray],
+                                nr_classes: int) -> List[float]:
+    """
+    Computes the mean average precision for each class and returns them.
+    
+    Args:
+        cumulative_precisions: cumulative precisions for each class
+        cumulative_recalls: cumulative recalls for each class
+        nr_classes: number of classes
+
+    Returns:
+        average precision per class
+    """
+    average_precisions = [0.0]
+
+    # Iterate over all classes.
+    for class_id in range(1, nr_classes + 1):
+    
+        cumulative_precision = cumulative_precisions[class_id]
+        cumulative_recall = cumulative_recalls[class_id]
+        
+        # We will compute the precision at all unique recall values.
+        unique_recalls, unique_recall_indices, unique_recall_counts = np.unique(cumulative_recall,
+                                                                                return_index=True,
+                                                                                return_counts=True)
+    
+        # Store the maximal precision for each recall value and the absolute difference
+        # between any two unique recall values in the lists below. The products of these
+        # two numbers constitute the rectangular areas whose sum will be our numerical
+        # integral.
+        maximal_precisions = np.zeros_like(unique_recalls)
+        recall_deltas = np.zeros_like(unique_recalls)
+    
+        # Iterate over all unique recall values in reverse order. This saves a lot of computation:
+        # For each unique recall value `r`, we want to get the maximal precision value obtained
+        # for any recall value `r* >= r`. Once we know the maximal precision for the last `k` recall
+        # values after a given iteration, then in the next iteration, in order compute the maximal
+        # precisions for the last `l > k` recall values, we only need to compute the maximal precision
+        # for `l - k` recall values and then take the maximum between that and the previously computed
+        # maximum instead of computing the maximum over all `l` values.
+        # We skip the very last recall value, since the precision after the last recall value
+        # 1.0 is defined to be zero.
+        for i in range(len(unique_recalls) - 2, -1, -1):
+            begin = unique_recall_indices[i]
+            end = unique_recall_indices[i + 1]
+            # When computing the maximal precisions, use the maximum of the previous iteration to
+            # avoid unnecessary repeated computation over the same precision values.
+            # The maximal precisions are the heights of the rectangle areas of our integral under
+            # the precision-recall curve.
+            maximal_precisions[i] = np.maximum(np.amax(cumulative_precision[begin:end]),
+                                               maximal_precisions[i + 1])
+            # The differences between two adjacent recall values are the widths of our rectangle areas.
+            recall_deltas[i] = unique_recalls[i + 1] - unique_recalls[i]
+    
+        average_precision = np.sum(maximal_precisions * recall_deltas)
+        average_precisions.append(average_precision)
+    
+    return average_precisions
+
+
+def get_mean_average_precision(average_precisions: List[float]) -> float:
+    """
+    Computes the mean average precision over all classes and returns it.
+    
+    Args:
+        average_precisions: list of average precisions per class
+
+    Returns:
+        mean average precision over all classes
+    """
+    return np.average(average_precisions[1:])