Implemented ground truth preparation for SceneNet RGB-D

Signed-off-by: Jim Martens <github@2martens.de>
2019-04-19 15:02:07 +02:00
parent 3704f6a2ce
commit 37e355e820
2 changed files with 97 additions and 10 deletions
--- a/src/twomartens/masterthesis/data.py
+++ b/src/twomartens/masterthesis/data.py
@ -22,11 +22,13 @@ Functions:
    load_coco_val(...): loads the COCO validation data into a Tensorflow data set
    load_scenenet(...): loads the SceneNet RGB-D data into a Tensorflow data set
 """
-from typing import Callable, List, Mapping, Tuple
+from typing import Callable, List, Mapping, Tuple, Any, Dict
 from typing import Sequence

+import cv2
+import numpy as np
 import tensorflow as tf
-from pycocotools import coco
+from scipy import ndimage


 def load_coco_train(data_path: str, category: int,
@ -49,6 +51,7 @@ def load_coco_train(data_path: str, category: int,
    annotation_file_val = f"{data_path}/annotations/instances_valminusminival2014.json"
    
    # load training images
+    from pycocotools import coco
    coco_train = coco.COCO(annotation_file_train)
    img_ids = coco_train.getImgIds(catIds=[category])  # return all image IDs belonging to given category
    images = coco_train.loadImgs(img_ids)  # load all images
@ -100,7 +103,8 @@ def load_coco_val(data_path: str, category: int,
        Tensorflow data set
    """
    annotation_file_minival = f"{data_path}/annotations/instances_minival2014.json"
-    
+
+    from pycocotools import coco
    coco_val = coco.COCO(annotation_file_minival)
    img_ids = coco_val.getImgIds(catIds=[category])  # return all image IDs belonging to given category
    images = coco_val.loadImgs(img_ids)  # load all images
@ -222,15 +226,74 @@ def _load_images_callback(resized_shape: Sequence[int]) -> Callable[
    return _load_images


-def load_scenenet(data_path: str, num_epochs: int, batch_size: int = 32) -> tf.data.Dataset:
+def prepare_scenenet_val(data_path: str, protobuf_path: str) -> Tuple[List[List[str]],
+                                                                      List[List[str]],
+                                                                      List[Dict[int, dict]]]:
    """
-    Loads the SceneNet RGB-D data and returns a data set.
+    Prepares the SceneNet RGB-D data and returns it in Python format.
    
    Args:
-        data_path: path to the SceneNet RGB-D data set
-        num_epochs: number of epochs
-        batch_size: batch size
+        data_path: path to the SceneNet RGB-D val data set
+        protobuf_path: path to the SceneNet RGB-D val protobuf
    Returns:
-        Tensorflow data set
+        file names photos, file names instances, instances
    """
-    pass
+    from twomartens.masterthesis import definitions
+    from twomartens.masterthesis import scenenet_pb2
+    
+    trajectories = scenenet_pb2.Trajectories()
+    with open(protobuf_path, 'rb') as file:
+        trajectories.ParseFromString(file.read())
+    
+    file_names_photos = []
+    file_names_instances = []
+    instances = []
+    for trajectory in trajectories.trajectories:
+        path = f"{data_path}/{trajectory.render_path}"
+        file_names_photos_traj = []
+        file_names_instances_traj = []
+        instances_traj = {}
+        
+        for instance in trajectory.instances:
+            instance_type = instance.instance_type
+            instance_id = instance.instance_id
+            instance_dict = {}
+            if instance_type != scenenet_pb2.Instance.BACKGROUND:
+                wnid = instance.semantic_wordnet_id
+                instance_dict['wordnet_id'] = wnid
+                if wnid in definitions.WNID_TO_COCO:
+                    instance_dict['coco_id'] = definitions.WNID_TO_COCO[wnid]
+                else:
+                    instance_dict['coco_id'] = 0  # if no COCO id is found, the correct COCO class is background
+            if instance_type == scenenet_pb2.Instance.LIGHT_OBJECT:
+                instance_dict['light_type'] = instance.light_type
+            if instance_type == scenenet_pb2.Instance.RANDOM_OBJECT:
+                instance_dict['object_info'] = instance.object_info
+            
+            instances_traj[instance_id] = instance_dict
+        
+        # iterate through images/frames
+        for view in trajectory.views:
+            frame_num = view.frame_num
+            instance_file = f"{path}/instance/{frame_num}.jpg"
+            file_names_photos_traj.append(f"{path}/photo/{frame_num}.jpg")
+            file_names_instances_traj.append(instance_file)
+            
+            # load instance file
+            instance_image = np.array(cv2.imread(instance_file))
+            for instance_id in instances_traj:
+                instance_local = np.copy(instance_image)
+                instance_local[instance_local != instance_id] = 0
+                instance_local[instance_local == instance_id] = 1
+                coordinates = ndimage.find_objects(instance_local)[0]
+                x = coordinates[0]
+                y = coordinates[1]
+                xmin, xmax = x.start, x.stop
+                ymin, ymax = y.start, y.stop
+                instances_traj[instance_id]['bbox'] = (xmin, ymin, xmax, ymax)
+        
+        file_names_photos.append(file_names_photos_traj)
+        file_names_instances.append(file_names_instances_traj)
+        instances.append(instances_traj)
+    
+    return file_names_photos, file_names_instances, instances
--- a/src/twomartens/masterthesis/main.py
+++ b/src/twomartens/masterthesis/main.py
@ -37,11 +37,13 @@ def main() -> None:
    sub_parsers = parser.add_subparsers(dest="action")
    sub_parsers.required = True
    
+    prepare_parser = sub_parsers.add_parser("prepare", help="Prepare SceneNet RGB-D ground truth")
    train_parser = sub_parsers.add_parser("train", help="Train a network")
    test_parser = sub_parsers.add_parser("test", help="Test a network")
    val_parser = sub_parsers.add_parser("val", help="Validate a network")
    
    # build sub parsers
+    _build_prepare(prepare_parser)
    _build_train(train_parser)
    _build_val(val_parser)
    
@ -53,8 +55,16 @@ def main() -> None:
        _test(args)
    elif args.action == "val":
        _val(args)
+    elif args.action == "prepare":
+        _prepare(args)


+def _build_prepare(parser: argparse.ArgumentParser) -> None:
+    parser.add_argument("scenenet_path", type=str, help="the path to the SceneNet RGB-D validation data set")
+    parser.add_argument("protobuf_path", type=str, help="the path to the SceneNet RGB-D validation protobuf file")
+    parser.add_argument("ground_truth_path", type=str, help="the path where the ground truth should be stored")
+    
+
 def _build_train(parser: argparse.ArgumentParser) -> None:
    sub_parsers = parser.add_subparsers(dest="network")
    sub_parsers.required = True
@ -175,5 +185,19 @@ def _bayesian_ssd_train(args: argparse.Namespace) -> None:
    raise NotImplementedError


+def _prepare(args: argparse.Namespace) -> None:
+    import pickle
+    
+    from twomartens.masterthesis import data
+    
+    file_names_photos, file_names_instances, instances = data.prepare_scenenet_val(args.scenenet_path, args.protobuf_path)
+    with open(f"{args.ground_truth_path}/photo_paths.bin", "wb") as file:
+        pickle.dump(file_names_photos, file)
+    with open(f"{args.ground_truth_path}/instance_paths.bin", "wb") as file:
+        pickle.dump(file_names_instances, file)
+    with open(f"{args.ground_truth_path}/instances.bin", "wb") as file:
+        pickle.dump(instances, file)
+
+
 if __name__ == "__main__":
    main()