Improved formatting of numpy results

Signed-off-by: Jim Martens <github@2martens.de>
2019-04-29 11:36:33 +02:00
parent 64a0253834
commit 7f84fafbb2
3 changed files with 27 additions and 16 deletions
--- a/src/twomartens/masterthesis/data.py
+++ b/src/twomartens/masterthesis/data.py
@ -25,6 +25,7 @@ Functions:
 from typing import Callable, List, Mapping, Tuple
 from typing import Sequence

+import math
 import numpy as np
 import scipy
 import tensorflow as tf
@ -230,7 +231,7 @@ def load_scenenet_val(photo_paths: Sequence[Sequence[str]],
                      instances: Sequence[Sequence[Sequence[dict]]],
                      coco_path: str,
                      num_epochs: int = 1, batch_size: int = 32,
-                      resized_shape: Sequence[int] = (256, 256)) -> tf.data.Dataset:
+                      resized_shape: Sequence[int] = (256, 256)) -> Tuple[tf.data.Dataset, int]:
    """
    Loads the SceneNet RGB-D data and returns a data set.
    
@ -244,6 +245,7 @@ def load_scenenet_val(photo_paths: Sequence[Sequence[str]],

    Returns:
        scenenet val data set
+        number of digits required to print largest batch number
    """
    trajectories = zip(photo_paths, instances)
    final_image_paths = []
@ -280,7 +282,9 @@ def load_scenenet_val(photo_paths: Sequence[Sequence[str]],
    dataset = dataset.batch(batch_size=batch_size)
    dataset = dataset.map(_load_images_ssd_callback(resized_shape))
    
-    return dataset
+    nr_digits = math.ceil((length_dataset * num_epochs) / batch_size)
+    
+    return dataset, nr_digits


 def _load_images_ssd_callback(resized_shape: Sequence[int]) \
--- a/src/twomartens/masterthesis/main.py
+++ b/src/twomartens/masterthesis/main.py
@ -185,10 +185,9 @@ def _ssd_val(args: argparse.Namespace) -> None:
    with open(f"{args.ground_truth_path}/instances.bin", "rb") as file:
        instances = pickle.load(file)
    
-    scenenet_data = data.load_scenenet_val(file_names_photos, instances, args.coco_path,
+    scenenet_data, nr_digits = data.load_scenenet_val(file_names_photos, instances, args.coco_path,
                                                      batch_size=batch_size, resized_shape=(image_size, image_size))

-
    use_summary_writer = summary_ops_v2.create_file_writer(
        f"{args.summary_path}/val/ssd/{args.iteration}"
    )
--- a/src/twomartens/masterthesis/ssd.py
+++ b/src/twomartens/masterthesis/ssd.py
@ -113,7 +113,8 @@ def predict(dataset: tf.data.Dataset,
            weights_path: Optional[str] = None,
            checkpoint_path: Optional[str] = None,
            verbose: Optional[bool] = False,
-            forward_passes_per_image: Optional[int] = 42) -> None:
+            forward_passes_per_image: Optional[int] = 42,
+            nr_digits: Optional[int] = None) -> None:
    """
    Run trained SSD on the given data set.
    
@ -131,6 +132,7 @@ def predict(dataset: tf.data.Dataset,
        verbose: if True, progress is printed to the standard output
        forward_passes_per_image: specifies number of forward passes per image
            used by DropoutSSD
+        nr_digits: number of digits needed to print largest batch number
    """
    if weights_path is None and checkpoint_path is None:
        raise ValueError("Either 'weights_path' or 'checkpoint_path' must be given.")
@ -151,7 +153,8 @@ def predict(dataset: tf.data.Dataset,
        checkpoint = tf.train.Checkpoint(**checkpointables)
        checkpoint.restore(latest_checkpoint)

-    outputs = _predict_one_epoch(dataset, use_dropout, output_path, forward_passes_per_image, **checkpointables)
+    outputs = _predict_one_epoch(dataset, use_dropout, output_path, forward_passes_per_image,
+                                 nr_digits, **checkpointables)
    
    if verbose:
        print((
@ -164,6 +167,7 @@ def _predict_one_epoch(dataset: tf.data.Dataset,
                       use_dropout: bool,
                       output_path: str,
                       forward_passes_per_image: int,
+                       nr_digits: int,
                       ssd: tf.keras.Model) -> Dict[str, float]:
    
    epoch_start_time = time.time()
@ -176,7 +180,6 @@ def _predict_one_epoch(dataset: tf.data.Dataset,
    
    # go through the data set
    counter = 0
-    nr_digits = math.ceil(math.log10(len(dataset)))
    for inputs in dataset:
        decoded_predictions_batch = []
        if use_dropout:
@ -187,7 +190,12 @@ def _predict_one_epoch(dataset: tf.data.Dataset,
            decoded_predictions_batch.append(np.array(ssd(inputs)))

        # save predictions batch-wise to prevent memory problems
-        with open(f"{output_file}-{counter:{nr_digits}d}.npy", 'wb') as file:
+        if nr_digits is not None:
+            filename = f"{output_file}-{counter:{nr_digits}d}.npy"
+        else:
+            filename = f"{output_file}-{counter:d}.npy"
+        
+        with open(filename, 'wb') as file:
            np.save(file, decoded_predictions_batch, allow_pickle=False, fix_imports=False)
        
        counter += 1