diff --git a/src/twomartens/masterthesis/data.py b/src/twomartens/masterthesis/data.py
index 5904f9a..d42c0a2 100644
--- a/src/twomartens/masterthesis/data.py
+++ b/src/twomartens/masterthesis/data.py
@@ -22,11 +22,13 @@ Functions:
     load_coco_val(...): loads the COCO validation data into a Tensorflow data set
     load_scenenet(...): loads the SceneNet RGB-D data into a Tensorflow data set
 """
-from typing import Callable, List, Mapping, Tuple
+from typing import Callable, List, Mapping, Tuple, Any, Dict
 from typing import Sequence
 
+import cv2
+import numpy as np
 import tensorflow as tf
-from pycocotools import coco
+from scipy import ndimage
 
 
 def load_coco_train(data_path: str, category: int,
@@ -49,6 +51,7 @@ def load_coco_train(data_path: str, category: int,
     annotation_file_val = f"{data_path}/annotations/instances_valminusminival2014.json"
     
     # load training images
+    from pycocotools import coco
     coco_train = coco.COCO(annotation_file_train)
     img_ids = coco_train.getImgIds(catIds=[category])  # return all image IDs belonging to given category
     images = coco_train.loadImgs(img_ids)  # load all images
@@ -100,7 +103,8 @@ def load_coco_val(data_path: str, category: int,
         Tensorflow data set
     """
     annotation_file_minival = f"{data_path}/annotations/instances_minival2014.json"
-    
+
+    from pycocotools import coco
     coco_val = coco.COCO(annotation_file_minival)
     img_ids = coco_val.getImgIds(catIds=[category])  # return all image IDs belonging to given category
     images = coco_val.loadImgs(img_ids)  # load all images
@@ -222,15 +226,74 @@ def _load_images_callback(resized_shape: Sequence[int]) -> Callable[
     return _load_images
 
 
-def load_scenenet(data_path: str, num_epochs: int, batch_size: int = 32) -> tf.data.Dataset:
+def prepare_scenenet_val(data_path: str, protobuf_path: str) -> Tuple[List[List[str]],
+                                                                      List[List[str]],
+                                                                      List[Dict[int, dict]]]:
     """
-    Loads the SceneNet RGB-D data and returns a data set.
+    Prepares the SceneNet RGB-D data and returns it in Python format.
     
     Args:
-        data_path: path to the SceneNet RGB-D data set
-        num_epochs: number of epochs
-        batch_size: batch size
+        data_path: path to the SceneNet RGB-D val data set
+        protobuf_path: path to the SceneNet RGB-D val protobuf
     Returns:
-        Tensorflow data set
+        file names photos, file names instances, instances
     """
-    pass
+    from twomartens.masterthesis import definitions
+    from twomartens.masterthesis import scenenet_pb2
+    
+    trajectories = scenenet_pb2.Trajectories()
+    with open(protobuf_path, 'rb') as file:
+        trajectories.ParseFromString(file.read())
+    
+    file_names_photos = []
+    file_names_instances = []
+    instances = []
+    for trajectory in trajectories.trajectories:
+        path = f"{data_path}/{trajectory.render_path}"
+        file_names_photos_traj = []
+        file_names_instances_traj = []
+        instances_traj = {}
+        
+        for instance in trajectory.instances:
+            instance_type = instance.instance_type
+            instance_id = instance.instance_id
+            instance_dict = {}
+            if instance_type != scenenet_pb2.Instance.BACKGROUND:
+                wnid = instance.semantic_wordnet_id
+                instance_dict['wordnet_id'] = wnid
+                if wnid in definitions.WNID_TO_COCO:
+                    instance_dict['coco_id'] = definitions.WNID_TO_COCO[wnid]
+                else:
+                    instance_dict['coco_id'] = 0  # if no COCO id is found, the correct COCO class is background
+            if instance_type == scenenet_pb2.Instance.LIGHT_OBJECT:
+                instance_dict['light_type'] = instance.light_type
+            if instance_type == scenenet_pb2.Instance.RANDOM_OBJECT:
+                instance_dict['object_info'] = instance.object_info
+            
+            instances_traj[instance_id] = instance_dict
+        
+        # iterate through images/frames
+        for view in trajectory.views:
+            frame_num = view.frame_num
+            instance_file = f"{path}/instance/{frame_num}.jpg"
+            file_names_photos_traj.append(f"{path}/photo/{frame_num}.jpg")
+            file_names_instances_traj.append(instance_file)
+            
+            # load instance file
+            instance_image = np.array(cv2.imread(instance_file))
+            for instance_id in instances_traj:
+                instance_local = np.copy(instance_image)
+                instance_local[instance_local != instance_id] = 0
+                instance_local[instance_local == instance_id] = 1
+                coordinates = ndimage.find_objects(instance_local)[0]
+                x = coordinates[0]
+                y = coordinates[1]
+                xmin, xmax = x.start, x.stop
+                ymin, ymax = y.start, y.stop
+                instances_traj[instance_id]['bbox'] = (xmin, ymin, xmax, ymax)
+        
+        file_names_photos.append(file_names_photos_traj)
+        file_names_instances.append(file_names_instances_traj)
+        instances.append(instances_traj)
+    
+    return file_names_photos, file_names_instances, instances
diff --git a/src/twomartens/masterthesis/main.py b/src/twomartens/masterthesis/main.py
index cd3ba88..85fb07c 100644
--- a/src/twomartens/masterthesis/main.py
+++ b/src/twomartens/masterthesis/main.py
@@ -37,11 +37,13 @@ def main() -> None:
     sub_parsers = parser.add_subparsers(dest="action")
     sub_parsers.required = True
     
+    prepare_parser = sub_parsers.add_parser("prepare", help="Prepare SceneNet RGB-D ground truth")
     train_parser = sub_parsers.add_parser("train", help="Train a network")
     test_parser = sub_parsers.add_parser("test", help="Test a network")
     val_parser = sub_parsers.add_parser("val", help="Validate a network")
     
     # build sub parsers
+    _build_prepare(prepare_parser)
     _build_train(train_parser)
     _build_val(val_parser)
     
@@ -53,8 +55,16 @@ def main() -> None:
         _test(args)
     elif args.action == "val":
         _val(args)
+    elif args.action == "prepare":
+        _prepare(args)
 
 
+def _build_prepare(parser: argparse.ArgumentParser) -> None:
+    parser.add_argument("scenenet_path", type=str, help="the path to the SceneNet RGB-D validation data set")
+    parser.add_argument("protobuf_path", type=str, help="the path to the SceneNet RGB-D validation protobuf file")
+    parser.add_argument("ground_truth_path", type=str, help="the path where the ground truth should be stored")
+    
+
 def _build_train(parser: argparse.ArgumentParser) -> None:
     sub_parsers = parser.add_subparsers(dest="network")
     sub_parsers.required = True
@@ -175,5 +185,19 @@ def _bayesian_ssd_train(args: argparse.Namespace) -> None:
     raise NotImplementedError
 
 
+def _prepare(args: argparse.Namespace) -> None:
+    import pickle
+    
+    from twomartens.masterthesis import data
+    
+    file_names_photos, file_names_instances, instances = data.prepare_scenenet_val(args.scenenet_path, args.protobuf_path)
+    with open(f"{args.ground_truth_path}/photo_paths.bin", "wb") as file:
+        pickle.dump(file_names_photos, file)
+    with open(f"{args.ground_truth_path}/instance_paths.bin", "wb") as file:
+        pickle.dump(file_names_instances, file)
+    with open(f"{args.ground_truth_path}/instances.bin", "wb") as file:
+        pickle.dump(instances, file)
+
+
 if __name__ == "__main__":
     main()