diff --git a/src/twomartens/masterthesis/data.py b/src/twomartens/masterthesis/data.py index 1ad6d63..6b46704 100644 --- a/src/twomartens/masterthesis/data.py +++ b/src/twomartens/masterthesis/data.py @@ -18,19 +18,20 @@ Functionality to load data into Tensorflow data sets. Functions: - load_coco(...): loads the COCO data into a Tensorflow data set + load_coco_train(...): loads the COCO training data into a Tensorflow data set + load_coco_val(...): loads the COCO validation data into a Tensorflow data set load_scenenet(...): loads the SceneNet RGB-D data into a Tensorflow data set """ -from typing import List +from typing import Callable, List, Mapping, Tuple from typing import Sequence import tensorflow as tf from pycocotools import coco -def load_coco(data_path: str, category: int, - num_epochs: int, batch_size: int = 32, - resized_shape: Sequence[int] = (256, 256)) -> tf.data.Dataset: +def load_coco_train(data_path: str, category: int, + num_epochs: int, batch_size: int = 32, + resized_shape: Sequence[int] = (256, 256)) -> tf.data.Dataset: """ Loads the COCO trainval35k data and returns a data set. @@ -39,12 +40,14 @@ def load_coco(data_path: str, category: int, category: id of the inlying class num_epochs: number of epochs batch_size: batch size (default: 32) - resized_shape: shape of images after resizing them (default: (300,300)) + resized_shape: shape of images after resizing them (default: (256, 256)) + Returns: Tensorflow data set """ annotation_file_train = f"{data_path}/annotations/instances_train2014.json" annotation_file_val = f"{data_path}/annotations/instances_valminusminival2014.json" + # load training images coco_train = coco.COCO(annotation_file_train) img_ids = coco_train.getImgIds(catIds=[category]) # return all image IDs belonging to given category @@ -60,17 +63,85 @@ def load_coco(data_path: str, category: int, annotation_ids = coco_val.getAnnIds(img_ids, catIds=[category]) annotations_val = coco_val.loadAnns(annotation_ids) # load all image annotations file_names_val = {image['id']: f"{data_path}/val2014/{image['file_name']}" for image in images_val} - + images.extend(images_val) annotations.extend(annotations_val) + file_names.update(file_names_val) ids_to_images = {image['id']: image for image in images} + + checked_file_names, checked_bboxes = _clean_dataset(annotations, file_names, ids_to_images) + length_dataset = len(checked_file_names) + + # build image data set + path_dataset = tf.data.Dataset.from_tensor_slices(checked_file_names) + label_dataset = tf.data.Dataset.from_tensor_slices(checked_bboxes) + dataset = tf.data.Dataset.zip((path_dataset, label_dataset)) + dataset = dataset.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=length_dataset, count=num_epochs)) + dataset = dataset.batch(batch_size=batch_size) + dataset = dataset.map(_load_images_callback(resized_shape)) + + return dataset + +def load_coco_val(data_path: str, category: int, + num_epochs: int = 1, batch_size: int = 32, + resized_shape: Sequence[int] = (256, 256)) -> tf.data.Dataset: + """ + Loads the COCO minival2014/val2017 data and returns a data set. + + Args: + data_path: path to the COCO data set + category: id of the inlying class + num_epochs: number of epochs (default: 1) + batch_size: batch size (default: 32) + resized_shape: shape of images after resizing them (default: (256, 256)) + + Returns: + Tensorflow data set + """ + annotation_file_minival = f"{data_path}/annotations/instances/val2017.json" + + coco_val = coco.COCO(annotation_file_minival) + img_ids = coco_val.getImgIds(catIds=[category]) # return all image IDs belonging to given category + images = coco_val.loadImgs(img_ids) # load all images + annotation_ids = coco_val.getAnnIds(img_ids, catIds=[category]) + annotations = coco_val.loadAnns(annotation_ids) # load all image annotations + file_names = {image['id']: f"{data_path}/val2014/{image['file_name']}" for image in images} + ids_to_images = {image['id']: image for image in images} + + checked_file_names, checked_bboxes = _clean_dataset(annotations, file_names, ids_to_images) + length_dataset = len(checked_file_names) + + # build image data set + path_dataset = tf.data.Dataset.from_tensor_slices(checked_file_names) + label_dataset = tf.data.Dataset.from_tensor_slices(checked_bboxes) + dataset = tf.data.Dataset.zip((path_dataset, label_dataset)) + dataset = dataset.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=length_dataset, count=num_epochs)) + dataset = dataset.batch(batch_size=batch_size) + dataset = dataset.map(_load_images_callback(resized_shape)) + + return dataset + + +def _clean_dataset(annotations: Sequence[dict], file_names: Mapping[str, str], + ids_to_images: Mapping[str, dict]) -> Tuple[List[str], List[List[float]]]: + """ + Cleans a given data set from problematic cases and returns cleaned version. + + Args: + annotations: list of annotation dictionaries + file_names: mapping of fileID -> file name + ids_to_images: mapping of imageID -> image dictionary + + Returns: + cleaned file names, corresponding clean bounding boxes + """ checked_file_names = [] checked_bboxes = [] for annotation in annotations: img_id = annotation['image_id'] image = ids_to_images[img_id] - file_name = file_names[img_id] if img_id in file_names else file_names_val[img_id] + file_name = file_names[img_id] bbox = annotation['bbox'] target_height = round(bbox[3]) target_width = round(bbox[2]) @@ -97,15 +168,38 @@ def load_coco(data_path: str, category: int, continue bbox[2] = target_width bbox[3] = target_height - + checked_file_names.append(file_name) checked_bboxes.append(bbox) - - length_dataset = len(checked_file_names) - - def _load_image(paths: Sequence[str], labels: Sequence[Sequence[float]]): - _images = tf.map_fn(lambda path: tf.read_file(path), paths) + return checked_file_names, checked_bboxes + + +def _load_images_callback(resized_shape: Sequence[int]) -> Callable[ + [Sequence[str], Sequence[Sequence[float]]], tf.Tensor]: + """ + Returns the callback function to load images. + + Args: + resized_shape: shape of resized image (height, width) + + Returns: + callback function + """ + + def _load_images(paths: Sequence[str], labels: Sequence[Sequence[float]]) -> tf.Tensor: + """ + Callback function to load images. + + Args: + paths: list of file paths + labels: list of bounding boxes + + Returns: + loaded images + """ + _images = tf.map_fn(lambda path: tf.read_file(path), paths) + def _get_images(image_data: Sequence[tf.Tensor]) -> List[tf.Tensor]: image = tf.image.decode_image(image_data[0], channels=3, dtype=tf.float32) image_shape = tf.shape(image) @@ -116,24 +210,16 @@ def load_coco(data_path: str, category: int, tf.cast(tf.floor(label[3]), dtype=tf.int32), tf.cast(tf.floor(label[2]), dtype=tf.int32)) image_resized = tf.image.resize_image_with_pad(image_cut, resized_shape[0], resized_shape[1]) - + return [image_resized, label] - + processed = tf.map_fn(_get_images, [_images, labels], dtype=[tf.float32, tf.float32]) processed_images = processed[0] processed_images = tf.reshape(processed_images, [-1, resized_shape[0], resized_shape[1], 3]) - + return processed_images - # build image data set - path_dataset = tf.data.Dataset.from_tensor_slices(checked_file_names) - label_dataset = tf.data.Dataset.from_tensor_slices(checked_bboxes) - dataset = tf.data.Dataset.zip((path_dataset, label_dataset)) - dataset = dataset.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=length_dataset, count=num_epochs)) - dataset = dataset.batch(batch_size=batch_size) - dataset = dataset.map(_load_image) - - return dataset + return _load_images def load_scenenet(data_path: str, num_epochs: int, batch_size: int = 32) -> tf.data.Dataset: diff --git a/src/twomartens/masterthesis/main.py b/src/twomartens/masterthesis/main.py index 8514b41..8ac9649 100644 --- a/src/twomartens/masterthesis/main.py +++ b/src/twomartens/masterthesis/main.py @@ -122,8 +122,8 @@ def _use(args: argparse.Namespace) -> None: category = args.category category_trained = args.category_trained batch_size = 16 - coco_data = data.load_coco(coco_path, category, num_epochs=1, - batch_size=batch_size, resized_shape=(256, 256)) + coco_data = data.load_coco_train(coco_path, category, num_epochs=1, + batch_size=batch_size, resized_shape=(256, 256)) use_summary_writer = summary_ops_v2.create_file_writer( f"{args.summary_path}/use/category-{category}/{args.iteration}" ) @@ -143,8 +143,8 @@ def _auto_encoder_train(args: argparse.Namespace) -> None: coco_path = args.coco_path category = args.category batch_size = 16 - coco_data = data.load_coco(coco_path, category, num_epochs=args.num_epochs, batch_size=batch_size, - resized_shape=(256, 256)) + coco_data = data.load_coco_train(coco_path, category, num_epochs=args.num_epochs, batch_size=batch_size, + resized_shape=(256, 256)) train_summary_writer = summary_ops_v2.create_file_writer( f"{args.summary_path}/train/category-{category}/{args.iteration}" )