diff --git a/src/twomartens/masterthesis/aae/run.py b/src/twomartens/masterthesis/aae/run.py index 0fc8f54..4ad3164 100644 --- a/src/twomartens/masterthesis/aae/run.py +++ b/src/twomartens/masterthesis/aae/run.py @@ -42,7 +42,8 @@ def run_simple(dataset: tf.data.Dataset, channels: int = 1, zsize: int = 32, batch_size: int = 128, - verbose: bool = True) -> None: + verbose: bool = False, + debug: bool = False) -> None: """ Runs the trained auto-encoder for given data set. @@ -55,7 +56,8 @@ def run_simple(dataset: tf.data.Dataset, channels: number of channels in input image (default: 1) zsize: size of the intermediary z (default: 32) batch_size: size of each batch (default: 128) - verbose: if True prints train progress info to console (default: True) + verbose: if True training progress is printed to console (default: False) + debug: if True summaries are collected (default: False) """ # checkpointed tensors and variables @@ -77,6 +79,7 @@ def run_simple(dataset: tf.data.Dataset, outputs = _run_one_epoch_simple(dataset, batch_size=batch_size, global_step=global_step, + debug=debug, **checkpointables) if verbose: @@ -88,6 +91,7 @@ def run_simple(dataset: tf.data.Dataset, def _run_one_epoch_simple(dataset: tf.data.Dataset, batch_size: int, + debug: bool, encoder: model.Encoder, decoder: model.Decoder, global_step: tf.Variable) -> Dict[str, float]: @@ -98,12 +102,12 @@ def _run_one_epoch_simple(dataset: tf.data.Dataset, for x in dataset: reconstruction_loss, x_decoded, z = _run_enc_dec_step_simple(encoder=encoder, - decoder=decoder, - inputs=x, - global_step=global_step) + decoder=decoder, + inputs=x, + global_step=global_step) enc_dec_loss_avg(reconstruction_loss) - if int(global_step % train.LOG_FREQUENCY) == 0: + if int(global_step % train.LOG_FREQUENCY) == 0 and debug: comparison = K.concatenate([x[:int(batch_size / 2)], x_decoded[:int(batch_size / 2), z[:int(batch_size / 2)]]], axis=0) grid = util.prepare_image(comparison.cpu(), nrow=int(batch_size / 2)) @@ -126,7 +130,8 @@ def _run_one_epoch_simple(dataset: tf.data.Dataset, def _run_enc_dec_step_simple(encoder: model.Encoder, decoder: model.Decoder, inputs: tf.Tensor, - global_step: tf.Variable) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: + global_step: tf.Variable, + debug: bool) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: """ Runs the encoder and decoder jointly for one step (one batch). @@ -135,6 +140,7 @@ def _run_enc_dec_step_simple(encoder: model.Encoder, decoder: model.Decoder, decoder: instance of decoder model inputs: inputs from data set global_step: the global step variable + debug: if True summaries are collected Returns: tuple of reconstruction loss, reconstructed input, latent space value @@ -144,7 +150,7 @@ def _run_enc_dec_step_simple(encoder: model.Encoder, decoder: model.Decoder, reconstruction_loss = tf.losses.log_loss(inputs, x_decoded) - if int(global_step % train.LOG_FREQUENCY) == 0: + if int(global_step % train.LOG_FREQUENCY) == 0 and debug: summary_ops_v2.scalar(name='reconstruction_loss', tensor=reconstruction_loss, step=global_step) diff --git a/src/twomartens/masterthesis/aae/train.py b/src/twomartens/masterthesis/aae/train.py index fc773ba..43e666c 100644 --- a/src/twomartens/masterthesis/aae/train.py +++ b/src/twomartens/masterthesis/aae/train.py @@ -52,7 +52,8 @@ def train_simple(dataset: tf.data.Dataset, lr: float = 0.002, train_epoch: int = 80, batch_size: int = 128, - verbose: bool = True) -> None: + verbose: bool = False, + debug: bool = False) -> None: """ Trains auto-encoder for given data set. @@ -72,7 +73,8 @@ def train_simple(dataset: tf.data.Dataset, lr: initial learning rate (default: 0.002) train_epoch: number of epochs to train (default: 80) batch_size: size of each batch (default: 128) - verbose: if True prints train progress info to console (default: True) + verbose: if True training progress is printed to console (default: False) + debug: if True summaries are collected (default: False) """ # checkpointed tensors and variables @@ -115,6 +117,7 @@ def train_simple(dataset: tf.data.Dataset, _epoch = epoch + previous_epochs outputs = _train_one_epoch_simple(_epoch, dataset, verbose=verbose, + debug=debug, batch_size=batch_size, **checkpointables) @@ -138,6 +141,7 @@ def train_simple(dataset: tf.data.Dataset, def _train_one_epoch_simple(epoch: int, dataset: tf.data.Dataset, verbose: bool, + debug: bool, batch_size: int, learning_rate_var: tf.Variable, decoder: model.Decoder, @@ -166,10 +170,11 @@ def _train_one_epoch_simple(epoch: int, optimizer=enc_dec_optimizer, inputs=x, global_step_enc_dec=global_step_enc_dec, - global_step=global_step) + global_step=global_step, + debug=debug) enc_dec_loss_avg(reconstruction_loss) - if int(global_step % LOG_FREQUENCY) == 0 and verbose: + if int(global_step % LOG_FREQUENCY) == 0 and debug: comparison = K.concatenate([x[:int(batch_size / 2)], x_decoded[:int(batch_size / 2)], z[:int(batch_size/2)]], axis=0) grid = util.prepare_image(comparison.cpu(), nrow=int(batch_size/2)) @@ -194,17 +199,22 @@ def _train_enc_dec_step_simple(encoder: model.Encoder, decoder: model.Decoder, optimizer: tf.train.Optimizer, inputs: tf.Tensor, global_step: tf.Variable, - global_step_enc_dec: tf.Variable) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: + global_step_enc_dec: tf.Variable, + debug: bool) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: """ Trains the encoder and decoder jointly for one step (one batch). - - :param encoder: instance of encoder model - :param decoder: instance of decoder model - :param optimizer: instance of chosen optimizer - :param inputs: inputs from data set - :param global_step: the global step variable - :param global_step_enc_dec: global step variable for enc_dec - :return: tuple of reconstruction loss, reconstructed input, z value + + Args: + encoder: instance of encoder model + decoder: instance of decoder model + optimizer: instance of chosen optimizer + inputs: inputs from data set + global_step: the global step variable + global_step_enc_dec: global step variable for enc_dec + debug: if True summaries are collected + + Returns: + tuple of reconstruction loss, reconstructed input, z value """ with tf.GradientTape() as tape: z = encoder(inputs) @@ -214,7 +224,7 @@ def _train_enc_dec_step_simple(encoder: model.Encoder, decoder: model.Decoder, enc_dec_grads = tape.gradient(reconstruction_loss, encoder.trainable_variables + decoder.trainable_variables) - if int(global_step % LOG_FREQUENCY) == 0: + if int(global_step % LOG_FREQUENCY) == 0 and debug: summary_ops_v2.scalar(name='reconstruction_loss', tensor=reconstruction_loss, step=global_step) for grad, variable in zip(enc_dec_grads, encoder.trainable_variables + decoder.trainable_variables): diff --git a/src/twomartens/masterthesis/main.py b/src/twomartens/masterthesis/main.py index 8ac9649..39d8645 100644 --- a/src/twomartens/masterthesis/main.py +++ b/src/twomartens/masterthesis/main.py @@ -32,6 +32,7 @@ def main() -> None: ) parser.add_argument("--verbose", default=False, action="store_true", help="provide to get extra output") + parser.add_argument("--debug", default=False, action="store_true", help="provide to collect tensorboard summaries") parser.add_argument('--version', action='version', version='2martens Masterthesis 0.1.0') sub_parsers = parser.add_subparsers(dest="action") sub_parsers.required = True @@ -128,7 +129,7 @@ def _use(args: argparse.Namespace) -> None: f"{args.summary_path}/use/category-{category}/{args.iteration}" ) with use_summary_writer.as_default(): - run.run_simple(coco_data, iteration=args.iteration_trained, + run.run_simple(coco_data, iteration=args.iteration_trained, debug=args.debug, weights_prefix=f"{args.weights_path}/category-{category_trained}", zsize=64, verbose=args.verbose, channels=3, batch_size=batch_size) @@ -151,7 +152,7 @@ def _auto_encoder_train(args: argparse.Namespace) -> None: with train_summary_writer.as_default(): train.train_simple(coco_data, iteration=args.iteration, weights_prefix=f"{args.weights_path}/category-{category}", - zsize=64, lr=0.0001, verbose=args.verbose, + zsize=64, lr=0.0001, verbose=args.verbose, debug=args.debug, channels=3, train_epoch=args.num_epochs, batch_size=batch_size)