masterthesis/src/twomartens/masterthesis/ssd.py

#  -*- coding: utf-8 -*-
#
#  Copyright 2019 Jim Martens
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

"""
Provides functionality to use the SSD Keras implementation.

Functions:
    compile_model(...): compiles an SSD model
    get_loss_func(...): returns the SSD loss function
    get_model(...): returns correct SSD model and corresponding predictor sizes
    predict(...): runs trained SSD/DropoutSSD on a given data set
    train(...): trains the SSD/DropoutSSD on a given data set
"""
import functools
import os
import pickle
from typing import Generator
from typing import List
from typing import Optional
from typing import Sequence
from typing import Tuple
from typing import Union

import math
import numpy as np
import tensorflow as tf
from attributedict.collections import AttributeDict

from twomartens.masterthesis import config
from twomartens.masterthesis import debug
from twomartens.masterthesis.ssd_keras.bounding_box_utils import bounding_box_utils
from twomartens.masterthesis.ssd_keras.data_generator import object_detection_2d_misc_utils
from twomartens.masterthesis.ssd_keras.eval_utils import coco_utils
from twomartens.masterthesis.ssd_keras.keras_loss_function import keras_ssd_loss
from twomartens.masterthesis.ssd_keras.ssd_encoder_decoder import ssd_output_decoder

K = tf.keras.backend
tfe = tf.contrib.eager


def get_model(use_bayesian: bool,
              bayesian_model: callable, vanilla_model: callable,
              conf_obj: config.Config,
              mode: str,
              pre_trained_weights_file: Optional[str] = None) -> Tuple[tf.keras.models.Model, np.ndarray]:
    """
    Returns the correct SSD model and the corresponding predictor sizes.

    Args:
        use_bayesian: True if Bayesian variant should be used, False otherwise
        bayesian_model: function to build Bayesian SSD model
        vanilla_model: function to build vanilla SSD model
        conf_obj: configuration object
        mode: one of "training", "inference", "inference_fast"
        pre_trained_weights_file: path to h5 file with pre-trained weights

    Returns:
        SSD model, predictor_sizes
    """

    image_size = (conf_obj.parameters.ssd_image_size, conf_obj.parameters.ssd_image_size, 3)
    scales = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]
    if use_bayesian:
        model, predictor_sizes = bayesian_model(
            image_size=image_size,
            n_classes=conf_obj.parameters.nr_classes,
            mode=mode,
            iou_threshold=conf_obj.parameters.ssd_iou_threshold,
            dropout_rate=conf_obj.parameters.ssd_dropout_rate,
            top_k=conf_obj.parameters.ssd_top_k,
            scales=scales,
            return_predictor_sizes=True,
            coords="corners",
            use_dropout=conf_obj.parameters.ssd_use_dropout
        )
    else:
        model, predictor_sizes = vanilla_model(
            image_size=image_size,
            n_classes=conf_obj.parameters.nr_classes,
            mode=mode,
            iou_threshold=conf_obj.parameters.ssd_iou_threshold,
            top_k=conf_obj.parameters.ssd_top_k,
            scales=scales,
            return_predictor_sizes=True,
            coords="corners"
        )

    if mode == "training":
        # set non-classifier layers to non-trainable
        classifier_names = ['conv4_3_norm_mbox_conf',
                            'fc7_mbox_conf',
                            'conv6_2_mbox_conf',
                            'conv7_2_mbox_conf',
                            'conv8_2_mbox_conf',
                            'conv9_2_mbox_conf']
        for layer in model.layers:
            if layer.name not in classifier_names:
                layer.trainable = False

    if pre_trained_weights_file is not None:
        model.load_weights(pre_trained_weights_file, by_name=True)

    return model, predictor_sizes


def get_loss_func() -> callable:
    """Returns loss function for SSD."""
    return keras_ssd_loss.SSDLoss().compute_loss


def compile_model(model: tf.keras.models.Model, learning_rate: float, loss_func: callable) -> None:
    """
    Compiles an SSD model.

    Args:
        model: SSD model
        learning_rate: the learning rate
        loss_func: loss function to minimize
    """
    learning_rate_var = K.variable(learning_rate)

    # compile the model
    model.compile(
        optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate_var,
                                         beta1=0.9, beta2=0.999),
        loss=loss_func,
        metrics=[
            "categorical_accuracy"
        ]
    )


def predict(generator: callable,
            model: tf.keras.models.Model,
            conf_obj: config.Config,
            steps_per_epoch: int,
            use_bayesian: bool,
            nr_digits: int,
            output_path: str) -> None:
    """
    Run trained SSD on the given data set.

    The prediction results are saved to the output path.

    Args:
        generator: generator of test data
        model: compiled and trained Keras model
        conf_obj: configuration object
        steps_per_epoch: number of batches per epoch
        use_bayesian: if True, multiple forward passes and observations will be used
        nr_digits: number of digits needed to print largest batch number
        output_path: the path in which the results should be saved
    """
    output_file, label_output_file = _predict_prepare_paths(output_path, use_bayesian)

    _predict_loop(
        generator=generator,
        use_bayesian=use_bayesian,
        conf_obj=conf_obj,
        steps_per_epoch=steps_per_epoch,
        callables=AttributeDict({
            "dropout_step": functools.partial(
                _predict_dropout_step,
                model=model,
                batch_size=conf_obj.parameters.batch_size,
                forward_passes_per_image=conf_obj.parameters.ssd_forward_passes_per_image
            ),
            "vanilla_step": functools.partial(_predict_vanilla_step, model=model),
            "save_images": functools.partial(
                _predict_save_images,
                save_images=debug.save_ssd_train_images,
                get_coco_cat_maps_func=coco_utils.get_coco_category_maps,
                output_path=output_path,
                conf_obj=conf_obj
            ),
            "decode_func": functools.partial(
                _decode_predictions,
                decode_func=ssd_output_decoder.decode_detections,
                image_size=conf_obj.parameters.ssd_image_size,
                confidence_threshold=conf_obj.parameters.ssd_confidence_threshold,
                iou_threshold=conf_obj.parameters.ssd_iou_threshold,
                top_k=conf_obj.parameters.ssd_top_k
            ),
            "decode_func_dropout": functools.partial(
                _decode_predictions_dropout,
                decode_func=ssd_output_decoder.decode_detections_dropout,
                image_size=conf_obj.parameters.ssd_image_size,
                confidence_threshold=conf_obj.parameters.ssd_confidence_threshold,
            ),
            "apply_entropy_threshold_func": functools.partial(
                _apply_entropy_filtering,
                confidence_threshold=conf_obj.parameters.ssd_confidence_threshold,
                nr_classes=conf_obj.parameters.nr_classes,
                iou_threshold=conf_obj.parameters.ssd_iou_threshold,
                use_nms=conf_obj.parameters.ssd_use_nms
            ),
            "apply_top_k_func": functools.partial(
                _apply_top_k,
                top_k=conf_obj.parameters.ssd_top_k
            ),
            "get_observations_func": _get_observations,
            "transform_func": functools.partial(
                _transform_predictions,
                inverse_transform_func=object_detection_2d_misc_utils.apply_inverse_transforms),
            "save_func": functools.partial(
                _save_predictions,
                output_file=output_file,
                label_output_file=label_output_file,
                nr_digits=nr_digits
            )
        })
    )


def train(train_generator: callable,
          steps_per_epoch_train: int,
          val_generator: callable,
          steps_per_epoch_val: int,
          ssd_model: tf.keras.models.Model,
          weights_prefix: str,
          iteration: int,
          initial_epoch: int,
          nr_epochs: int,
          tensorboard_callback: Optional[tf.keras.callbacks.TensorBoard]) -> tf.keras.callbacks.History:
    """
    Trains the SSD on the given data set using Keras functionality.

    Args:
        train_generator: generator of training data
        steps_per_epoch_train: number of batches per training epoch
        val_generator: generator of validation data
        steps_per_epoch_val: number of batches per validation epoch
        ssd_model: compiled SSD model
        weights_prefix: prefix for weights directory
        iteration: identifier for current training run
        initial_epoch: the epoch to start training in
        nr_epochs: number of epochs to train
        tensorboard_callback: initialised TensorBoard callback
    """

    checkpoint_dir = os.path.join(weights_prefix, str(iteration))
    os.makedirs(checkpoint_dir, exist_ok=True)

    callbacks = [
        tf.keras.callbacks.ModelCheckpoint(
            filepath=f"{checkpoint_dir}/ssd300-{{epoch:02d}}_loss-{{loss:.4f}}_val_loss-{{val_loss:.4f}}.h5",
            monitor="val_loss",
            verbose=1,
            save_best_only=True,
            save_weights_only=False
        ),
        tf.keras.callbacks.TerminateOnNaN(),
        # tf.keras.callbacks.EarlyStopping(patience=2, min_delta=0.001, monitor="val_loss")
    ]
    if tensorboard_callback is not None:
        callbacks.append(tensorboard_callback)

    history = ssd_model.fit_generator(generator=train_generator,
                                      epochs=nr_epochs,
                                      steps_per_epoch=steps_per_epoch_train,
                                      validation_data=val_generator,
                                      validation_steps=steps_per_epoch_val,
                                      callbacks=callbacks,
                                      initial_epoch=initial_epoch)

    ssd_model.save(f"{checkpoint_dir}/ssd300.h5")
    ssd_model.save_weights(f"{checkpoint_dir}/ssd300_weights.h5")

    return history


def _predict_prepare_paths(output_path: str, use_dropout: bool) -> Tuple[str, str]:
    filename = "ssd_predictions"
    label_filename = "ssd_labels"
    if use_dropout:
        filename = f"dropout-{filename}"

    output_file = os.path.join(output_path, filename)
    label_output_file = os.path.join(output_path, label_filename)

    return output_file, label_output_file


def _predict_loop(generator: Generator,
                  use_bayesian: bool,
                  conf_obj: config.Config,
                  steps_per_epoch: int,
                  callables: AttributeDict) -> None:

    batch_counter = 0
    saved_images_prediction = False
    saved_images_decoding = False
    if conf_obj.parameters.ssd_use_entropy_threshold:
        nr_steps = math.floor(
            (conf_obj.parameters.ssd_entropy_threshold_max - conf_obj.parameters.ssd_entropy_threshold_min) * 10
        )
        entropy_thresholds = [round(i / 10 + conf_obj.parameters.ssd_entropy_threshold_min, 1) for i in range(nr_steps)]
    else:
        entropy_thresholds = [0]

    for inputs, filenames, inverse_transforms, original_labels in generator:
        if use_bayesian:
            predictions = callables.dropout_step(inputs)
        else:
            predictions = callables.vanilla_step(inputs)

        if not saved_images_prediction:
            callables.save_images(inputs, predictions, custom_string="after-prediction")
            saved_images_prediction = True
        if use_bayesian:
            decoded_predictions = callables.decode_func_dropout(predictions)
            observations = callables.get_observations_func(decoded_predictions)
        for entropy_threshold in entropy_thresholds:
            if use_bayesian:
                callables.save_func(observations, original_labels, filenames,
                                    batch_nr=batch_counter, entropy_threshold=entropy_threshold,
                                    suffix="_prediction")
                decoded_predictions = callables.apply_entropy_threshold_func(observations,
                                                                             entropy_threshold=entropy_threshold)
                callables.save_func(decoded_predictions, original_labels, filenames,
                                    batch_nr=batch_counter, entropy_threshold=entropy_threshold,
                                    suffix="_entropy")
                decoded_predictions = callables.apply_top_k_func(decoded_predictions)
            else:
                decoded_predictions = callables.decode_func(predictions, entropy_threshold=entropy_threshold)
            if not saved_images_decoding:
                custom_string = f"after-decoding-{entropy_threshold}" \
                    if conf_obj.parameters.ssd_use_entropy_threshold else "after-decoding"
                callables.save_images(inputs, decoded_predictions, custom_string=custom_string)

            transformed_predictions = callables.transform_func(decoded_predictions,
                                                               inverse_transforms)
            callables.save_func(transformed_predictions, original_labels, filenames,
                                batch_nr=batch_counter, entropy_threshold=entropy_threshold,
                                suffix="_transformed")

        if not saved_images_decoding:
            saved_images_decoding = True

        batch_counter += 1

        if batch_counter == steps_per_epoch:
            break


def _predict_dropout_step(inputs: np.ndarray, model: tf.keras.models.Model,
                          batch_size: int, forward_passes_per_image: int) -> np.ndarray:

    detections = np.zeros((batch_size, 8732 * forward_passes_per_image, 73))

    for forward_pass in range(forward_passes_per_image):
        predictions = model.predict_on_batch(inputs)

        for i in range(batch_size):
            detections[i][forward_pass * 8732:forward_pass * 8732 + 8732] = predictions[i]

    return detections


def _predict_vanilla_step(inputs: np.ndarray, model: tf.keras.models.Model) -> np.ndarray:
    return np.asarray(model.predict_on_batch(inputs))


def _decode_predictions(predictions: np.ndarray,
                        decode_func: callable,
                        image_size: int,
                        entropy_threshold: float,
                        confidence_threshold: float,
                        iou_threshold: float,
                        top_k: int) -> np.ndarray:
    return decode_func(
        y_pred=predictions,
        img_width=image_size,
        img_height=image_size,
        input_coords="corners",
        entropy_thresh=entropy_threshold,
        confidence_thresh=confidence_threshold,
        iou_threshold=iou_threshold,
        top_k=top_k
    )


def _decode_predictions_dropout(predictions: np.ndarray,
                                decode_func: callable,
                                image_size: int,
                                confidence_threshold: float,
                                ) -> List[np.ndarray]:
    return decode_func(
        y_pred=predictions,
        img_width=image_size,
        img_height=image_size,
        input_coords="corners",
        confidence_thresh=confidence_threshold
    )


def _apply_entropy_filtering(observations: Sequence[np.ndarray],
                             entropy_threshold: float,
                             confidence_threshold: float,
                             iou_threshold: float,
                             nr_classes: int,
                             use_nms: bool = True) -> List[np.ndarray]:
    final_observations = []
    batch_size = len(observations)
    for i in range(batch_size):
        if not observations[i].size:
            final_observations.append(observations[i])
            continue

        filtered_image_observations = observations[i][observations[i][:, -1] < entropy_threshold]
        final_image_observations = []
        for class_id in range(1, nr_classes):
            single_class = filtered_image_observations[:, [class_id, -5, -4, -3, -2]]
            threshold_met = single_class[single_class[:, 0] > confidence_threshold]
            if threshold_met.shape[0] > 0:
                if use_nms:
                    maxima = ssd_output_decoder._greedy_nms(threshold_met, iou_threshold=iou_threshold)
                    maxima_output = np.zeros((maxima.shape[0], maxima.shape[1] + 1))
                else:
                    maxima_output = np.zeros((threshold_met.shape[0], threshold_met.shape[1] + 1))
                maxima_output[:, 0] = class_id
                maxima_output[:, 1:] = maxima if use_nms else threshold_met
                final_image_observations.append(maxima_output)
        if final_image_observations:
            final_image_observations = np.concatenate(final_image_observations, axis=0)
        else:
            final_image_observations = np.array(final_image_observations)
        final_observations.append(final_image_observations)

    return final_observations


def _apply_top_k(detections: Sequence[np.ndarray], top_k: float) -> List[np.ndarray]:
    final_detections = []
    batch_size = len(detections)
    data_type = np.dtype([('class_id', np.int32),
                          ('confidence', 'f4'),
                          ('xmin', 'f4'),
                          ('ymin', 'f4'),
                          ('xmax', 'f4'),
                          ('ymax', 'f4')])
    for i in range(batch_size):
        image_detections = detections[i]
        if not image_detections.size:
            final_detections.append(image_detections)
            continue

        image_detections_structured = np.core.records.fromarrays(image_detections.transpose(),
                                                                 dtype=data_type)
        descending_indices = np.argsort(-image_detections_structured['confidence'])
        image_detections_sorted = image_detections[descending_indices]
        if image_detections_sorted.shape[0] > top_k:
            top_k_indices = np.argpartition(image_detections_sorted[:, 1],
                                            kth=image_detections_sorted.shape[0] - top_k,
                                            axis=0)[image_detections_sorted.shape[0] - top_k:]
            final_detections.append(image_detections_sorted[top_k_indices])
        else:
            final_detections.append(image_detections_sorted)

    return final_detections


def _transform_predictions(decoded_predictions: np.ndarray, inverse_transforms: Sequence[np.ndarray],
                           inverse_transform_func: callable) -> np.ndarray:

    return inverse_transform_func(decoded_predictions, inverse_transforms)


def _save_predictions(transformed_predictions: Union[np.ndarray, Sequence[np.ndarray]],
                      original_labels: np.ndarray, filenames: Sequence[str],
                      output_file: str, label_output_file: str,
                      batch_nr: int, nr_digits: int, entropy_threshold: float,
                      suffix: str) -> None:

    counter_str = str(batch_nr).zfill(nr_digits)
    filename = f"{output_file}{suffix}-{counter_str}"
    filename = f"{filename}-{entropy_threshold}" if entropy_threshold else filename
    label_filename = f"{label_output_file}-{counter_str}.bin"

    with open(filename, "wb") as file, open(label_filename, "wb") as label_file:
        pickle.dump(transformed_predictions, file)
        pickle.dump({"labels": original_labels, "filenames": filenames}, label_file)


def _predict_save_images(inputs: np.ndarray, predictions: np.ndarray,
                         save_images: callable,
                         get_coco_cat_maps_func: callable,
                         output_path: str,
                         conf_obj: config.Config,
                         custom_string: str) -> None:
    save_images(inputs, predictions,
                output_path, conf_obj.paths.coco, conf_obj.parameters.ssd_image_size,
                get_coco_cat_maps_func, custom_string)


def _get_observations(detections: Sequence[np.ndarray]) -> List[np.ndarray]:
    batch_size = len(detections)
    observations = [[] for _ in range(batch_size)]
    final_observations = []

    # iterate over images
    for i in range(batch_size):
        detections_image = np.asarray(detections[i])
        if not detections_image.size:
            final_observations.append(detections_image)
            continue

        overlaps = bounding_box_utils.iou(detections_image[:, -5:-1],
                                          detections_image[:, -5:-1],
                                          mode="outer_product",
                                          border_pixels="include")
        image_observations = []
        used_boxes = None
        for j in range(overlaps.shape[0]):
            # check if box is already in existing observation
            if used_boxes is not None and j in used_boxes:
                continue

            box_overlaps = overlaps[j]
            overlap_detections = np.nonzero(box_overlaps >= 0.95)
            if not len(overlap_detections[0]):
                continue
            observation_set = np.unique(overlap_detections, axis=0)
            for k in overlap_detections[0]:
                # check if box was already removed from observation, then skip
                if k not in observation_set:
                    continue

                # check if other found detections are also overlapping with this
                # detection
                second_overlaps = overlaps[k]
                second_detections = np.unique(np.nonzero(second_overlaps >= 0.95), axis=0)
                difference = np.setdiff1d(observation_set, second_detections, assume_unique=True)
                observation_set = np.setdiff1d(observation_set, difference)

            if used_boxes is None:
                used_boxes = observation_set
            else:
                used_boxes = np.unique(np.concatenate([used_boxes, observation_set],
                                                      axis=0), axis=0)
            image_observations.append(observation_set)

        for observation in image_observations:
            observation_detections = detections_image[observation]
            # average over class probabilities
            observation_mean = np.mean(observation_detections, axis=0)
            observations[i].append(observation_mean)

        final_observations.append(np.asarray(observations[i]))
        if not len(observations[i]):
            continue
        final_observations[i][:, -1] = -np.sum(final_observations[i][:, :-5] * np.log(final_observations[i][:, :-5]),
                                               axis=-1)

    return final_observations


def _set_difference(first_array: np.ndarray, second_array: np.ndarray) -> np.ndarray:
    """
    Removes all elements from first_array that are present in second_array.

    Args:
        first_array: the first array
        second_array: the second array

    Returns:
        set difference between first_array and second_array
    """
    max2 = second_array.max(axis=0)
    max1 = first_array.max(axis=0)
    dims = np.maximum(max2,
                      max1) + 1
    rmi2 = np.ravel_multi_index(second_array.T, dims)
    rmi1 = np.ravel_multi_index(first_array.T, dims)
    in1d = np.in1d(rmi2, rmi1)
    return second_array[~in1d]