Implemented ground truth preparation for SceneNet RGB-D
Signed-off-by: Jim Martens <github@2martens.de>
This commit is contained in:
@ -22,11 +22,13 @@ Functions:
|
||||
load_coco_val(...): loads the COCO validation data into a Tensorflow data set
|
||||
load_scenenet(...): loads the SceneNet RGB-D data into a Tensorflow data set
|
||||
"""
|
||||
from typing import Callable, List, Mapping, Tuple
|
||||
from typing import Callable, List, Mapping, Tuple, Any, Dict
|
||||
from typing import Sequence
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from pycocotools import coco
|
||||
from scipy import ndimage
|
||||
|
||||
|
||||
def load_coco_train(data_path: str, category: int,
|
||||
@ -49,6 +51,7 @@ def load_coco_train(data_path: str, category: int,
|
||||
annotation_file_val = f"{data_path}/annotations/instances_valminusminival2014.json"
|
||||
|
||||
# load training images
|
||||
from pycocotools import coco
|
||||
coco_train = coco.COCO(annotation_file_train)
|
||||
img_ids = coco_train.getImgIds(catIds=[category]) # return all image IDs belonging to given category
|
||||
images = coco_train.loadImgs(img_ids) # load all images
|
||||
@ -100,7 +103,8 @@ def load_coco_val(data_path: str, category: int,
|
||||
Tensorflow data set
|
||||
"""
|
||||
annotation_file_minival = f"{data_path}/annotations/instances_minival2014.json"
|
||||
|
||||
|
||||
from pycocotools import coco
|
||||
coco_val = coco.COCO(annotation_file_minival)
|
||||
img_ids = coco_val.getImgIds(catIds=[category]) # return all image IDs belonging to given category
|
||||
images = coco_val.loadImgs(img_ids) # load all images
|
||||
@ -222,15 +226,74 @@ def _load_images_callback(resized_shape: Sequence[int]) -> Callable[
|
||||
return _load_images
|
||||
|
||||
|
||||
def load_scenenet(data_path: str, num_epochs: int, batch_size: int = 32) -> tf.data.Dataset:
|
||||
def prepare_scenenet_val(data_path: str, protobuf_path: str) -> Tuple[List[List[str]],
|
||||
List[List[str]],
|
||||
List[Dict[int, dict]]]:
|
||||
"""
|
||||
Loads the SceneNet RGB-D data and returns a data set.
|
||||
Prepares the SceneNet RGB-D data and returns it in Python format.
|
||||
|
||||
Args:
|
||||
data_path: path to the SceneNet RGB-D data set
|
||||
num_epochs: number of epochs
|
||||
batch_size: batch size
|
||||
data_path: path to the SceneNet RGB-D val data set
|
||||
protobuf_path: path to the SceneNet RGB-D val protobuf
|
||||
Returns:
|
||||
Tensorflow data set
|
||||
file names photos, file names instances, instances
|
||||
"""
|
||||
pass
|
||||
from twomartens.masterthesis import definitions
|
||||
from twomartens.masterthesis import scenenet_pb2
|
||||
|
||||
trajectories = scenenet_pb2.Trajectories()
|
||||
with open(protobuf_path, 'rb') as file:
|
||||
trajectories.ParseFromString(file.read())
|
||||
|
||||
file_names_photos = []
|
||||
file_names_instances = []
|
||||
instances = []
|
||||
for trajectory in trajectories.trajectories:
|
||||
path = f"{data_path}/{trajectory.render_path}"
|
||||
file_names_photos_traj = []
|
||||
file_names_instances_traj = []
|
||||
instances_traj = {}
|
||||
|
||||
for instance in trajectory.instances:
|
||||
instance_type = instance.instance_type
|
||||
instance_id = instance.instance_id
|
||||
instance_dict = {}
|
||||
if instance_type != scenenet_pb2.Instance.BACKGROUND:
|
||||
wnid = instance.semantic_wordnet_id
|
||||
instance_dict['wordnet_id'] = wnid
|
||||
if wnid in definitions.WNID_TO_COCO:
|
||||
instance_dict['coco_id'] = definitions.WNID_TO_COCO[wnid]
|
||||
else:
|
||||
instance_dict['coco_id'] = 0 # if no COCO id is found, the correct COCO class is background
|
||||
if instance_type == scenenet_pb2.Instance.LIGHT_OBJECT:
|
||||
instance_dict['light_type'] = instance.light_type
|
||||
if instance_type == scenenet_pb2.Instance.RANDOM_OBJECT:
|
||||
instance_dict['object_info'] = instance.object_info
|
||||
|
||||
instances_traj[instance_id] = instance_dict
|
||||
|
||||
# iterate through images/frames
|
||||
for view in trajectory.views:
|
||||
frame_num = view.frame_num
|
||||
instance_file = f"{path}/instance/{frame_num}.jpg"
|
||||
file_names_photos_traj.append(f"{path}/photo/{frame_num}.jpg")
|
||||
file_names_instances_traj.append(instance_file)
|
||||
|
||||
# load instance file
|
||||
instance_image = np.array(cv2.imread(instance_file))
|
||||
for instance_id in instances_traj:
|
||||
instance_local = np.copy(instance_image)
|
||||
instance_local[instance_local != instance_id] = 0
|
||||
instance_local[instance_local == instance_id] = 1
|
||||
coordinates = ndimage.find_objects(instance_local)[0]
|
||||
x = coordinates[0]
|
||||
y = coordinates[1]
|
||||
xmin, xmax = x.start, x.stop
|
||||
ymin, ymax = y.start, y.stop
|
||||
instances_traj[instance_id]['bbox'] = (xmin, ymin, xmax, ymax)
|
||||
|
||||
file_names_photos.append(file_names_photos_traj)
|
||||
file_names_instances.append(file_names_instances_traj)
|
||||
instances.append(instances_traj)
|
||||
|
||||
return file_names_photos, file_names_instances, instances
|
||||
|
||||
@ -37,11 +37,13 @@ def main() -> None:
|
||||
sub_parsers = parser.add_subparsers(dest="action")
|
||||
sub_parsers.required = True
|
||||
|
||||
prepare_parser = sub_parsers.add_parser("prepare", help="Prepare SceneNet RGB-D ground truth")
|
||||
train_parser = sub_parsers.add_parser("train", help="Train a network")
|
||||
test_parser = sub_parsers.add_parser("test", help="Test a network")
|
||||
val_parser = sub_parsers.add_parser("val", help="Validate a network")
|
||||
|
||||
# build sub parsers
|
||||
_build_prepare(prepare_parser)
|
||||
_build_train(train_parser)
|
||||
_build_val(val_parser)
|
||||
|
||||
@ -53,8 +55,16 @@ def main() -> None:
|
||||
_test(args)
|
||||
elif args.action == "val":
|
||||
_val(args)
|
||||
elif args.action == "prepare":
|
||||
_prepare(args)
|
||||
|
||||
|
||||
def _build_prepare(parser: argparse.ArgumentParser) -> None:
|
||||
parser.add_argument("scenenet_path", type=str, help="the path to the SceneNet RGB-D validation data set")
|
||||
parser.add_argument("protobuf_path", type=str, help="the path to the SceneNet RGB-D validation protobuf file")
|
||||
parser.add_argument("ground_truth_path", type=str, help="the path where the ground truth should be stored")
|
||||
|
||||
|
||||
def _build_train(parser: argparse.ArgumentParser) -> None:
|
||||
sub_parsers = parser.add_subparsers(dest="network")
|
||||
sub_parsers.required = True
|
||||
@ -175,5 +185,19 @@ def _bayesian_ssd_train(args: argparse.Namespace) -> None:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def _prepare(args: argparse.Namespace) -> None:
|
||||
import pickle
|
||||
|
||||
from twomartens.masterthesis import data
|
||||
|
||||
file_names_photos, file_names_instances, instances = data.prepare_scenenet_val(args.scenenet_path, args.protobuf_path)
|
||||
with open(f"{args.ground_truth_path}/photo_paths.bin", "wb") as file:
|
||||
pickle.dump(file_names_photos, file)
|
||||
with open(f"{args.ground_truth_path}/instance_paths.bin", "wb") as file:
|
||||
pickle.dump(file_names_instances, file)
|
||||
with open(f"{args.ground_truth_path}/instances.bin", "wb") as file:
|
||||
pickle.dump(instances, file)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user