Implemented ground truth preparation for SceneNet RGB-D

Signed-off-by: Jim Martens <github@2martens.de>
2019-04-19 15:02:07 +02:00
parent 3704f6a2ce
commit 37e355e820
2 changed files with 97 additions and 10 deletions
--- a/src/twomartens/masterthesis/data.py
+++ b/src/twomartens/masterthesis/data.py
@ -22,11 +22,13 @@ Functions:
    load_coco_val(...): loads the COCO validation data into a Tensorflow data set
    load_scenenet(...): loads the SceneNet RGB-D data into a Tensorflow data set
 """
-from typing import Callable, List, Mapping, Tuple
+from typing import Callable, List, Mapping, Tuple, Any, Dict
 from typing import Sequence
 import cv2
 import numpy as np
 import tensorflow as tf
-from pycocotools import coco
+from scipy import ndimage
 def load_coco_train(data_path: str, category: int,
@ -49,6 +51,7 @@ def load_coco_train(data_path: str, category: int,
    annotation_file_val = f"{data_path}/annotations/instances_valminusminival2014.json"
    # load training images
    from pycocotools import coco
    coco_train = coco.COCO(annotation_file_train)
    img_ids = coco_train.getImgIds(catIds=[category])  # return all image IDs belonging to given category
    images = coco_train.loadImgs(img_ids)  # load all images
@ -100,7 +103,8 @@ def load_coco_val(data_path: str, category: int,
        Tensorflow data set
    """
    annotation_file_minival = f"{data_path}/annotations/instances_minival2014.json"
-    
+
    from pycocotools import coco
    coco_val = coco.COCO(annotation_file_minival)
    img_ids = coco_val.getImgIds(catIds=[category])  # return all image IDs belonging to given category
    images = coco_val.loadImgs(img_ids)  # load all images
@ -222,15 +226,74 @@ def _load_images_callback(resized_shape: Sequence[int]) -> Callable[
    return _load_images
-def load_scenenet(data_path: str, num_epochs: int, batch_size: int = 32) -> tf.data.Dataset:
+def prepare_scenenet_val(data_path: str, protobuf_path: str) -> Tuple[List[List[str]],
                                                                      List[List[str]],
                                                                      List[Dict[int, dict]]]:
    """
-    Loads the SceneNet RGB-D data and returns a data set.
+    Prepares the SceneNet RGB-D data and returns it in Python format.
    Args:
-        data_path: path to the SceneNet RGB-D data set
+        data_path: path to the SceneNet RGB-D val data set
-        num_epochs: number of epochs
+        protobuf_path: path to the SceneNet RGB-D val protobuf
        batch_size: batch size
    Returns:
-        Tensorflow data set
+        file names photos, file names instances, instances
    """
-    pass
+    from twomartens.masterthesis import definitions
    from twomartens.masterthesis import scenenet_pb2
    trajectories = scenenet_pb2.Trajectories()
    with open(protobuf_path, 'rb') as file:
        trajectories.ParseFromString(file.read())
    file_names_photos = []
    file_names_instances = []
    instances = []
    for trajectory in trajectories.trajectories:
        path = f"{data_path}/{trajectory.render_path}"
        file_names_photos_traj = []
        file_names_instances_traj = []
        instances_traj = {}
        for instance in trajectory.instances:
            instance_type = instance.instance_type
            instance_id = instance.instance_id
            instance_dict = {}
            if instance_type != scenenet_pb2.Instance.BACKGROUND:
                wnid = instance.semantic_wordnet_id
                instance_dict['wordnet_id'] = wnid
                if wnid in definitions.WNID_TO_COCO:
                    instance_dict['coco_id'] = definitions.WNID_TO_COCO[wnid]
                else:
                    instance_dict['coco_id'] = 0  # if no COCO id is found, the correct COCO class is background
            if instance_type == scenenet_pb2.Instance.LIGHT_OBJECT:
                instance_dict['light_type'] = instance.light_type
            if instance_type == scenenet_pb2.Instance.RANDOM_OBJECT:
                instance_dict['object_info'] = instance.object_info
            instances_traj[instance_id] = instance_dict
        # iterate through images/frames
        for view in trajectory.views:
            frame_num = view.frame_num
            instance_file = f"{path}/instance/{frame_num}.jpg"
            file_names_photos_traj.append(f"{path}/photo/{frame_num}.jpg")
            file_names_instances_traj.append(instance_file)
            # load instance file
            instance_image = np.array(cv2.imread(instance_file))
            for instance_id in instances_traj:
                instance_local = np.copy(instance_image)
                instance_local[instance_local != instance_id] = 0
                instance_local[instance_local == instance_id] = 1
                coordinates = ndimage.find_objects(instance_local)[0]
                x = coordinates[0]
                y = coordinates[1]
                xmin, xmax = x.start, x.stop
                ymin, ymax = y.start, y.stop
                instances_traj[instance_id]['bbox'] = (xmin, ymin, xmax, ymax)
        file_names_photos.append(file_names_photos_traj)
        file_names_instances.append(file_names_instances_traj)
        instances.append(instances_traj)
    return file_names_photos, file_names_instances, instances
--- a/src/twomartens/masterthesis/main.py
+++ b/src/twomartens/masterthesis/main.py
@ -37,11 +37,13 @@ def main() -> None:
    sub_parsers = parser.add_subparsers(dest="action")
    sub_parsers.required = True
    prepare_parser = sub_parsers.add_parser("prepare", help="Prepare SceneNet RGB-D ground truth")
    train_parser = sub_parsers.add_parser("train", help="Train a network")
    test_parser = sub_parsers.add_parser("test", help="Test a network")
    val_parser = sub_parsers.add_parser("val", help="Validate a network")
    # build sub parsers
    _build_prepare(prepare_parser)
    _build_train(train_parser)
    _build_val(val_parser)
@ -53,8 +55,16 @@ def main() -> None:
        _test(args)
    elif args.action == "val":
        _val(args)
    elif args.action == "prepare":
        _prepare(args)
 def _build_prepare(parser: argparse.ArgumentParser) -> None:
    parser.add_argument("scenenet_path", type=str, help="the path to the SceneNet RGB-D validation data set")
    parser.add_argument("protobuf_path", type=str, help="the path to the SceneNet RGB-D validation protobuf file")
    parser.add_argument("ground_truth_path", type=str, help="the path where the ground truth should be stored")
 def _build_train(parser: argparse.ArgumentParser) -> None:
    sub_parsers = parser.add_subparsers(dest="network")
    sub_parsers.required = True
@ -175,5 +185,19 @@ def _bayesian_ssd_train(args: argparse.Namespace) -> None:
    raise NotImplementedError
 def _prepare(args: argparse.Namespace) -> None:
    import pickle
    from twomartens.masterthesis import data
    file_names_photos, file_names_instances, instances = data.prepare_scenenet_val(args.scenenet_path, args.protobuf_path)
    with open(f"{args.ground_truth_path}/photo_paths.bin", "wb") as file:
        pickle.dump(file_names_photos, file)
    with open(f"{args.ground_truth_path}/instance_paths.bin", "wb") as file:
        pickle.dump(file_names_instances, file)
    with open(f"{args.ground_truth_path}/instances.bin", "wb") as file:
        pickle.dump(instances, file)
 if __name__ == "__main__":
    main()