Modified data module to work with SSD data generator

Signed-off-by: Jim Martens <github@2martens.de>
This commit is contained in:
2019-06-13 13:30:02 +02:00
parent 693594e75c
commit 87133dd885

View File

@ -23,15 +23,19 @@ Functions:
load_scenenet_data(...): loads the SceneNet RGB-D data into a Tensorflow data set load_scenenet_data(...): loads the SceneNet RGB-D data into a Tensorflow data set
prepare_scenenet_data(...): prepares the SceneNet RGB-D data and returns it in Python format prepare_scenenet_data(...): prepares the SceneNet RGB-D data and returns it in Python format
""" """
import functools
from typing import Callable, List, Mapping, Tuple from typing import Callable, List, Mapping, Tuple
from typing import Sequence from typing import Sequence
import math
import numpy as np import numpy as np
import scipy import scipy
import tensorflow as tf import tensorflow as tf
from scipy import ndimage from scipy import ndimage
from twomartens.masterthesis.ssd_keras.data_generator import object_detection_2d_data_generator, \
data_augmentation_chain_original_ssd, object_detection_2d_photometric_ops, object_detection_2d_geometric_ops
from twomartens.masterthesis.ssd_keras.ssd_encoder_decoder import ssd_input_encoder
def load_coco_train(data_path: str, category: int, def load_coco_train(data_path: str, category: int,
num_epochs: int, batch_size: int = 32, num_epochs: int, batch_size: int = 32,
@ -230,10 +234,10 @@ def _load_images_callback(resized_shape: Sequence[int]) -> Callable[
def load_scenenet_data(photo_paths: Sequence[Sequence[str]], def load_scenenet_data(photo_paths: Sequence[Sequence[str]],
instances: Sequence[Sequence[Sequence[dict]]], instances: Sequence[Sequence[Sequence[dict]]],
coco_path: str, coco_path: str, predictor_sizes: np.ndarray,
num_epochs: int = 1, batch_size: int = 32, batch_size: int,
resized_shape: Sequence[int] = (256, 256), resized_shape: Sequence[int],
mode: str = "inference") -> Tuple[tf.data.Dataset, int, int]: mode: str) -> Tuple[callable, int]:
""" """
Loads the SceneNet RGB-D data and returns a data set. Loads the SceneNet RGB-D data and returns a data set.
@ -241,14 +245,13 @@ def load_scenenet_data(photo_paths: Sequence[Sequence[str]],
photo_paths: contains a list of image paths per trajectory photo_paths: contains a list of image paths per trajectory
instances: instance data per frame per trajectory instances: instance data per frame per trajectory
coco_path: path to the COCO data set coco_path: path to the COCO data set
num_epochs: number of epochs to use predictor_sizes: sizes of the predictor layers
batch_size: size of every batch batch_size: size of every batch
resized_shape: shape of input images to SSD resized_shape: shape of input images to SSD
mode: one of "inference" or "training" mode: one of "validation" or "training"
Returns: Returns:
scenenet data set scenenet data set generator
number of digits required to print largest batch number
length of dataset length of dataset
""" """
trajectories = zip(photo_paths, instances) trajectories = zip(photo_paths, instances)
@ -282,31 +285,43 @@ def load_scenenet_data(photo_paths: Sequence[Sequence[str]],
final_image_paths.append(image_path) final_image_paths.append(image_path)
final_labels.append(labels) final_labels.append(labels)
empty_label = [[-1.0, 0.0, 0.0, 0.0, 0.0]] data_generator = object_detection_2d_data_generator.DataGenerator(
real_final_labels = [] filenames=final_image_paths,
for labels in final_labels: labels=final_labels
_labels = labels[:] )
len_labels = len(labels)
if len_labels < max_nr_labels:
_labels.extend(empty_label * (max_nr_labels - len_labels))
real_final_labels.append(_labels)
length_dataset = len(final_image_paths) if mode == "training":
shuffle = True
transformations = [data_augmentation_chain_original_ssd.SSDDataAugmentation(
img_width=resized_shape[0],
img_height=resized_shape[1]
)]
else:
shuffle = False
transformations = [
object_detection_2d_photometric_ops.ConvertTo3Channels(),
object_detection_2d_geometric_ops.Resize(height=resized_shape[0],
width=resized_shape[1])
]
path_dataset = tf.data.Dataset.from_tensor_slices(final_image_paths) generator = functools.partial(
label_dataset = tf.data.Dataset.from_tensor_slices(real_final_labels) data_generator.generate,
dataset = tf.data.Dataset.zip((path_dataset, label_dataset)) batch_size=batch_size,
if mode == "inference": shuffle=shuffle,
dataset = dataset.repeat(num_epochs) transformations=transformations,
elif mode == "training": label_encoder=ssd_input_encoder.SSDInputEncoder(
dataset = dataset.apply(tf.data.experimental.shuffle_and_repeat(length_dataset, num_epochs)) img_height=resized_shape[0],
dataset = dataset.batch(batch_size=batch_size) img_width=resized_shape[1],
dataset = dataset.map(_load_images_ssd_callback(resized_shape)) n_classes=len(cats_to_classes), # 80
dataset = dataset.prefetch(1) predictor_sizes=predictor_sizes
),
returns={'processed_images', 'encoded_labels'},
keep_images_without_gt=False
)
nr_digits = math.ceil(math.log10(math.ceil((length_dataset * num_epochs) / batch_size))) length_dataset = data_generator.dataset_size
return dataset, nr_digits, length_dataset return generator, length_dataset
def _load_images_ssd_callback(resized_shape: Sequence[int]) \ def _load_images_ssd_callback(resized_shape: Sequence[int]) \