From c77d7bc157a9e062a56f25ed5aeab98575d3dec8 Mon Sep 17 00:00:00 2001 From: Jim Martens Date: Fri, 10 May 2019 15:36:20 +0200 Subject: [PATCH] Added labels to data set Padded them to make the tensors homogeneous. The zero tensors can be removed later when evaluating. They are part of data set primarily to ensure mapping between prediction result and corresponding labels. Signed-off-by: Jim Martens --- src/twomartens/masterthesis/data.py | 17 ++++++++++++++--- src/twomartens/masterthesis/main.py | 3 ++- src/twomartens/masterthesis/ssd.py | 9 +++++++-- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/src/twomartens/masterthesis/data.py b/src/twomartens/masterthesis/data.py index 6351b20..34b0287 100644 --- a/src/twomartens/masterthesis/data.py +++ b/src/twomartens/masterthesis/data.py @@ -255,6 +255,7 @@ def load_scenenet_val(photo_paths: Sequence[Sequence[str]], annotation_file_train = f"{coco_path}/annotations/instances_train2014.json" cats_to_classes, _, _, _ = coco_utils.get_coco_category_maps(annotation_file_train) + max_nr_labels = -1 for trajectory in trajectories: traj_image_paths, traj_instances = trajectory @@ -269,16 +270,26 @@ def load_scenenet_val(photo_paths: Sequence[Sequence[str]], bbox[2], bbox[3] ]) + + len_labels = len(labels) + if len_labels > max_nr_labels: + max_nr_labels = len_labels final_image_paths.append(image_path) final_labels.append(labels) + empty_label = [0, 0, 0, 0, 0] + for labels in final_labels: + len_labels = len(labels) + if len_labels < max_nr_labels: + labels += empty_label * (max_nr_labels - len_labels) + length_dataset = len(final_image_paths) path_dataset = tf.data.Dataset.from_tensor_slices(final_image_paths) - # label_dataset = tf.data.Dataset.from_tensor_slices(final_labels) - # dataset = tf.data.Dataset.zip((path_dataset, label_dataset)) - dataset = path_dataset.apply(tf.data.experimental.shuffle_and_repeat(buffer_size=length_dataset, count=num_epochs)) + label_dataset = tf.data.Dataset.from_tensor_slices(final_labels) + dataset = tf.data.Dataset.zip((path_dataset, label_dataset)) + dataset = dataset.repeat(num_epochs) dataset = dataset.batch(batch_size=batch_size) dataset = dataset.map(_load_images_ssd_callback(resized_shape)) dataset = dataset.prefetch(1) diff --git a/src/twomartens/masterthesis/main.py b/src/twomartens/masterthesis/main.py index 549a6ad..690e0b9 100644 --- a/src/twomartens/masterthesis/main.py +++ b/src/twomartens/masterthesis/main.py @@ -196,7 +196,8 @@ def _ssd_val(args: argparse.Namespace) -> None: instances = pickle.load(file) scenenet_data, nr_digits = data.load_scenenet_val(file_names_photos, instances, args.coco_path, - batch_size=batch_size, resized_shape=(image_size, image_size)) + batch_size=batch_size, + resized_shape=(image_size, image_size)) del file_names_photos, instances use_summary_writer = summary_ops_v2.create_file_writer( diff --git a/src/twomartens/masterthesis/ssd.py b/src/twomartens/masterthesis/ssd.py index b464bdf..6f60414 100644 --- a/src/twomartens/masterthesis/ssd.py +++ b/src/twomartens/masterthesis/ssd.py @@ -174,9 +174,11 @@ def _predict_one_epoch(dataset: tf.data.Dataset, # prepare filename filename = 'ssd_predictions' + label_filename = 'ssd_labels' if use_dropout: filename = f"dropout-{filename}" output_file = os.path.join(output_path, filename) + label_output_file = os.path.join(output_path, label_filename) # go through the data set counter = 0 @@ -184,7 +186,7 @@ def _predict_one_epoch(dataset: tf.data.Dataset, from tensorflow.python.eager import context - for inputs in dataset: + for inputs, labels in dataset: decoded_predictions_batch = [] if use_dropout: for _ in range(forward_passes_per_image): @@ -200,14 +202,17 @@ def _predict_one_epoch(dataset: tf.data.Dataset, if nr_digits is not None: counter_str = str(counter).zfill(nr_digits) filename = f"{output_file}-{counter_str}.npy" + label_filename = f"{label_output_file}-{counter_str}.npy" else: filename = f"{output_file}-{counter:d}.npy" + label_filename = f"{label_output_file}-{counter:d}.npy" - with open(filename, 'wb') as file: + with open(filename, 'wb') as file, open(label_filename, 'wb') as label_file: decoded_predictions_batch_np = np.array(decoded_predictions_batch) del decoded_predictions_batch np.save(file, decoded_predictions_batch_np, allow_pickle=False, fix_imports=False) del decoded_predictions_batch_np + np.save(label_file, labels, allow_pickle=False, fix_imports=False) counter += 1