Added labels to data set

Padded them to make the tensors homogeneous. The zero tensors can be removed later when evaluating. They are part of data set primarily to ensure mapping between prediction result and corresponding labels. Signed-off-by: Jim Martens <github@2martens.de>
2019-05-10 15:36:20 +02:00
parent 50fd116bc7
commit c77d7bc157
3 changed files with 23 additions and 6 deletions
--- a/src/twomartens/masterthesis/data.py
+++ b/src/twomartens/masterthesis/data.py
@ -255,6 +255,7 @@ def load_scenenet_val(photo_paths: Sequence[Sequence[str]],
    annotation_file_train = f"{coco_path}/annotations/instances_train2014.json"
    cats_to_classes, _, _, _ = coco_utils.get_coco_category_maps(annotation_file_train)
    max_nr_labels = -1
    for trajectory in trajectories:
        traj_image_paths, traj_instances = trajectory
@ -269,16 +270,26 @@ def load_scenenet_val(photo_paths: Sequence[Sequence[str]],
                    bbox[2],
                    bbox[3]
                ])
            len_labels = len(labels)
            if len_labels > max_nr_labels:
                max_nr_labels = len_labels
            final_image_paths.append(image_path)
            final_labels.append(labels)
    empty_label = [0, 0, 0, 0, 0]
    for labels in final_labels:
        len_labels = len(labels)
        if len_labels < max_nr_labels:
            labels += empty_label * (max_nr_labels - len_labels)
    length_dataset = len(final_image_paths)
    path_dataset = tf.data.Dataset.from_tensor_slices(final_image_paths)
-    # label_dataset = tf.data.Dataset.from_tensor_slices(final_labels)
+    label_dataset = tf.data.Dataset.from_tensor_slices(final_labels)
-    # dataset = tf.data.Dataset.zip((path_dataset, label_dataset))
+    dataset = tf.data.Dataset.zip((path_dataset, label_dataset))
-    dataset = path_dataset.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=length_dataset, count=num_epochs))
+    dataset = dataset.repeat(num_epochs)
    dataset = dataset.batch(batch_size=batch_size)
    dataset = dataset.map(_load_images_ssd_callback(resized_shape))
    dataset = dataset.prefetch(1)
--- a/src/twomartens/masterthesis/main.py
+++ b/src/twomartens/masterthesis/main.py
@ -196,7 +196,8 @@ def _ssd_val(args: argparse.Namespace) -> None:
        instances = pickle.load(file)
    scenenet_data, nr_digits = data.load_scenenet_val(file_names_photos, instances, args.coco_path,
-                                                      batch_size=batch_size, resized_shape=(image_size, image_size))
+                                                      batch_size=batch_size,
                                                      resized_shape=(image_size, image_size))
    del file_names_photos, instances
    use_summary_writer = summary_ops_v2.create_file_writer(
--- a/src/twomartens/masterthesis/ssd.py
+++ b/src/twomartens/masterthesis/ssd.py
@ -174,9 +174,11 @@ def _predict_one_epoch(dataset: tf.data.Dataset,
    # prepare filename
    filename = 'ssd_predictions'
    label_filename = 'ssd_labels'
    if use_dropout:
        filename = f"dropout-{filename}"
    output_file = os.path.join(output_path, filename)
    label_output_file = os.path.join(output_path, label_filename)
    # go through the data set
    counter = 0
@ -184,7 +186,7 @@ def _predict_one_epoch(dataset: tf.data.Dataset,
    from tensorflow.python.eager import context
-    for inputs in dataset:
+    for inputs, labels in dataset:
        decoded_predictions_batch = []
        if use_dropout:
            for _ in range(forward_passes_per_image):
@ -200,14 +202,17 @@ def _predict_one_epoch(dataset: tf.data.Dataset,
        if nr_digits is not None:
            counter_str = str(counter).zfill(nr_digits)
            filename = f"{output_file}-{counter_str}.npy"
            label_filename = f"{label_output_file}-{counter_str}.npy"
        else:
            filename = f"{output_file}-{counter:d}.npy"
            label_filename = f"{label_output_file}-{counter:d}.npy"
-        with open(filename, 'wb') as file:
+        with open(filename, 'wb') as file, open(label_filename, 'wb') as label_file:
            decoded_predictions_batch_np = np.array(decoded_predictions_batch)
            del decoded_predictions_batch
            np.save(file, decoded_predictions_batch_np, allow_pickle=False, fix_imports=False)
            del decoded_predictions_batch_np
            np.save(label_file, labels, allow_pickle=False, fix_imports=False)
        counter += 1