From d58179fa2af8f0ad7af87f8e06ef0056914ff2a8 Mon Sep 17 00:00:00 2001 From: Jim Martens Date: Mon, 2 Sep 2019 13:10:12 +0200 Subject: [PATCH] Removed obsolete auto-encoder code Signed-off-by: Jim Martens --- src/twomartens/masterthesis/__init__.py | 1 - src/twomartens/masterthesis/aae/__init__.py | 29 - src/twomartens/masterthesis/aae/data.py | 110 ---- src/twomartens/masterthesis/aae/model.py | 193 ------- src/twomartens/masterthesis/aae/run.py | 152 ----- src/twomartens/masterthesis/aae/train.py | 247 -------- src/twomartens/masterthesis/aae/train_aae.py | 569 ------------------- src/twomartens/masterthesis/aae/util.py | 172 ------ src/twomartens/masterthesis/cli.py | 86 +-- src/twomartens/masterthesis/main.py | 21 - 10 files changed, 2 insertions(+), 1578 deletions(-) delete mode 100644 src/twomartens/masterthesis/aae/__init__.py delete mode 100644 src/twomartens/masterthesis/aae/data.py delete mode 100644 src/twomartens/masterthesis/aae/model.py delete mode 100644 src/twomartens/masterthesis/aae/run.py delete mode 100644 src/twomartens/masterthesis/aae/train.py delete mode 100644 src/twomartens/masterthesis/aae/train_aae.py delete mode 100644 src/twomartens/masterthesis/aae/util.py diff --git a/src/twomartens/masterthesis/__init__.py b/src/twomartens/masterthesis/__init__.py index c1c379e..d82eb81 100644 --- a/src/twomartens/masterthesis/__init__.py +++ b/src/twomartens/masterthesis/__init__.py @@ -18,7 +18,6 @@ Masterthesis package. Subpackages: - ``aae``: provides an implementation of Adversarial Auto Encoders ``ssd_keras``: provides an implementation of SSD Modules: diff --git a/src/twomartens/masterthesis/aae/__init__.py b/src/twomartens/masterthesis/aae/__init__.py deleted file mode 100644 index 7e5b620..0000000 --- a/src/twomartens/masterthesis/aae/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2019 Jim Martens -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Provides an AAE implementation. - -Modules: - ``model``: provides the keras models of the AAE implementation - - ``train``: provides functionality to train the AAE - - ``util``: provides helper functionality for visualization - -Todos: - - make the implementation compatible with the YCB Video dataset -""" diff --git a/src/twomartens/masterthesis/aae/data.py b/src/twomartens/masterthesis/aae/data.py deleted file mode 100644 index 7058d58..0000000 --- a/src/twomartens/masterthesis/aae/data.py +++ /dev/null @@ -1,110 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2019 Jim Martens -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Data functionality for my AAE implementation. - -This module provides a function to prepare the training data. - -Functions: - prepare_training_data(...): prepares the mnist training data -""" -import pickle -from typing import Sequence -from typing import Tuple - -import numpy as np -import tensorflow as tf - -K = tf.keras.backend - - -def prepare_training_data(test_fold_id: int, - inlier_classes: Sequence[int], - total_classes: int, - fold_prefix: str = 'data/data_fold_', - batch_size: int = 128, - folds: int = 5) -> Tuple[tf.data.Dataset, tf.data.Dataset]: - """ - Prepares the MNIST training data. - - Args: - test_fold_id: id of test fold - inlier_classes: list of class ids that are considered inliers - total_classes: total number of classes - fold_prefix: the prefix for the fold pickle files (default: 'data/data_fold_') - batch_size: size of batch (default: 128) - folds: number of folds (default: 5) - - Returns: - A tuple (train dataset, valid dataset) - """ - # prepare data - mnist_train = [] - mnist_valid = [] - - for i in range(folds): - if i != test_fold_id: # exclude testing fold, representing 20% of each class - with open(f"{fold_prefix}{i:d}.pkl", 'rb') as pkl: - fold = pickle.load(pkl) - if len(mnist_valid) == 0: # single out one fold, comprising 20% of each class - mnist_valid = fold - else: # form train set from remaining folds, comprising 60% of each class - mnist_train += fold - - outlier_classes = [] - for i in range(total_classes): - if i not in inlier_classes: - outlier_classes.append(i) - - # keep only train classes - mnist_train = [x for x in mnist_train if x[0] in inlier_classes] - - def _list_of_pairs_to_numpy(list_of_pairs: Sequence[Tuple[int, np.ndarray]]) -> Tuple[np.ndarray, np.ndarray]: - """ - Converts a list of pairs to a numpy array. - - Args: - list_of_pairs: list of pairs - - Returns: - tuple (feature array, label array) - """ - return np.asarray([x[1] for x in list_of_pairs], np.float32), np.asarray([x[0] for x in list_of_pairs], np.int) - - mnist_train_x, mnist_train_y = _list_of_pairs_to_numpy(mnist_train) - mnist_valid_x, mnist_valid_y = _list_of_pairs_to_numpy(mnist_valid) - - # get dataset - train_dataset = tf.data.Dataset.from_tensor_slices((mnist_train_x, mnist_train_y)) - train_dataset = train_dataset.shuffle(mnist_train_x.shape[0]).batch(batch_size, - drop_remainder=True).map(_normalize) - valid_dataset = tf.data.Dataset.from_tensor_slices((mnist_valid_x, mnist_valid_y)) - valid_dataset = valid_dataset.shuffle(mnist_valid_x.shape[0]).batch(batch_size, - drop_remainder=True).map(_normalize) - - return train_dataset, valid_dataset - - -def _normalize(feature: tf.Tensor, label: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]: - """ - Normalizes a tensor from a 0-255 range to a 0-1 range and adds one dimension. - - :param feature: tensor to be normalized - :param label: label tensor - :return: normalized tensor - """ - return K.expand_dims(tf.divide(feature, 255.0)), label diff --git a/src/twomartens/masterthesis/aae/model.py b/src/twomartens/masterthesis/aae/model.py deleted file mode 100644 index bd6f90f..0000000 --- a/src/twomartens/masterthesis/aae/model.py +++ /dev/null @@ -1,193 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2019 Jim Martens -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Provides the models of my AAE implementation. - -Classes: - ``Encoder``: encodes an image input to a latent space - - ``Decoder``: decodes data from a latent space to resemble input data - - ``XDiscriminator``: differentiates between real input data and decoded input data - - ``ZDiscriminator``: differentiates between z values drawn from a normal distribution (real) and the encoded input - (fake) - -""" -import tensorflow as tf - -# shortcuts for tensorflow - quasi imports -keras = tf.keras -k = tf.keras.backend - - -class Encoder(keras.Model): - """ - Encodes input to a latent space. - - Args: - zsize: size of the latent space - """ - - def __init__(self, zsize: int) -> None: - super().__init__(name='encoder') - weight_init = keras.initializers.RandomNormal(mean=0, stddev=0.02) - self.conv1 = keras.layers.Conv2D(filters=zsize * 4, kernel_size=3, strides=2, name='conv1', - padding='same', kernel_initializer=weight_init, - activation=keras.activations.sigmoid) - self.conv2 = keras.layers.Conv2D(filters=zsize * 2, kernel_size=3, strides=2, name='conv2', - padding='same', kernel_initializer=weight_init) - self.conv2_a = keras.layers.ReLU() - self.conv3 = keras.layers.Conv2D(filters=zsize, kernel_size=3, strides=2, name='conv3', - padding='same', kernel_initializer=weight_init) - self.conv3_a = keras.layers.ReLU() - self.flatten = keras.layers.Flatten(name='flatten') - self.latent = keras.layers.Dense(units=zsize * (2 ** 5), name='latent') - - def call(self, inputs: tf.Tensor, **kwargs) -> tf.Tensor: - """See base class.""" - result = self.conv1(inputs) - result = self.conv2(result) - result = self.conv2_a(result) - result = self.conv3(result) - result = self.conv3_a(result) - result = self.flatten(result) - result = self.latent(result) - - return result - - -class Decoder(keras.Model): - """ - Generates input data from latent space values. - """ - - def __init__(self, channels: int, zsize: int, image_size: int) -> None: - """ - Initializes the Decoder class. - - Args: - channels: number of channels in the input image - zsize: size of the latent space - image_size: size of height/width of input image - """ - super().__init__(name='decoder') - weight_init = keras.initializers.RandomNormal(mean=0, stddev=0.02) - # calculate dimension of last conv layer in encoder - conv_image_size = image_size / (2 ** 3) - dimensions = zsize * conv_image_size * conv_image_size - self.conv_shape = (-1, conv_image_size, conv_image_size, zsize) - self.transform = keras.layers.Dense(units=dimensions, name='input_transform') - self.deconv1 = keras.layers.Conv2DTranspose(filters=zsize, kernel_size=3, strides=1, name='deconv1', - padding='same', kernel_initializer=weight_init) - self.deconv1_a = keras.layers.ReLU() - self.deconv2 = keras.layers.Conv2DTranspose(filters=zsize * 2, kernel_size=3, strides=2, name='deconv2', - padding='same', kernel_initializer=weight_init) - self.deconv2_a = keras.layers.ReLU() - self.deconv3 = keras.layers.Conv2DTranspose(filters=zsize * 4, kernel_size=3, strides=2, name='deconv3', - padding='same', kernel_initializer=weight_init) - self.deconv3_a = keras.layers.ReLU() - self.deconv4 = keras.layers.Conv2DTranspose(filters=channels, kernel_size=3, strides=2, name='deconv4', - padding='same', kernel_initializer=weight_init) - - def call(self, inputs: tf.Tensor, **kwargs) -> tf.Tensor: - """See base class.""" - result = self.transform(inputs) - result = tf.reshape(result, self.conv_shape) - result = self.deconv1(result) - result = self.deconv1_a(result) - result = self.deconv2(result) - result = self.deconv2_a(result) - result = self.deconv3(result) - result = self.deconv3_a(result) - result = self.deconv4(result) - result = k.sigmoid(result) - - return result - - -class ZDiscriminator(keras.Model): - """ - Discriminates between encoded inputs and latent space distribution. - - The latent space value is drawn from a normal distribution with ``0`` mean - and a variance of ``1``. - """ - - def __init__(self) -> None: - super().__init__(name='zdiscriminator') - weight_init = keras.initializers.RandomNormal(mean=0, stddev=0.02) - self.zd1 = keras.layers.Dense(units=128, name='zd1', kernel_initializer=weight_init) - self.zd1_a = keras.layers.LeakyReLU(alpha=0.2) - self.zd2 = keras.layers.Dense(units=128, name='zd2', kernel_initializer=weight_init) - self.zd2_a = keras.layers.LeakyReLU(alpha=0.2) - self.zd3 = keras.layers.Dense(units=1, name='zd3', activation='sigmoid', - kernel_initializer=weight_init) - - def call(self, inputs: tf.Tensor, **kwargs) -> tf.Tensor: - """See base class.""" - result = self.zd1(inputs) - result = self.zd1_a(result) - result = self.zd2(result) - result = self.zd2_a(result) - result = self.zd3(result) - - return result - - -class XDiscriminator(keras.Model): - """ - Discriminates between generated inputs and the actual inputs. - """ - - def __init__(self) -> None: - super().__init__(name='xdiscriminator') - weight_init = keras.initializers.RandomNormal(mean=0, stddev=0.02) - self.x_padded = keras.layers.ZeroPadding2D(padding=1) - self.xd1 = keras.layers.Conv2D(filters=64, kernel_size=4, strides=2, name='xd1', - padding='valid', kernel_initializer=weight_init) - self.xd1_a = keras.layers.LeakyReLU(alpha=0.2) - self.xd1_a_padded = keras.layers.ZeroPadding2D(padding=1) - self.xd2 = keras.layers.Conv2D(filters=256, kernel_size=4, strides=2, name='xd2', - padding='valid', kernel_initializer=weight_init) - self.xd2_bn = keras.layers.BatchNormalization() - self.xd2_a = keras.layers.LeakyReLU(alpha=0.2) - self.xd2_a_padded = keras.layers.ZeroPadding2D(padding=1) - self.xd3 = keras.layers.Conv2D(filters=512, kernel_size=4, strides=2, name='xd3', - padding='valid', kernel_initializer=weight_init) - self.xd3_bn = keras.layers.BatchNormalization() - self.xd3_a = keras.layers.LeakyReLU(alpha=0.2) - self.xd4 = keras.layers.Conv2D(filters=1, kernel_size=4, strides=1, name='xd4', - padding='valid', kernel_initializer=weight_init, - activation='sigmoid') - - def call(self, inputs: tf.Tensor, **kwargs) -> tf.Tensor: - """See base class.""" - result = self.x_padded(inputs) - result = self.xd1(result) - result = self.xd1_a(result) - result = self.xd1_a_padded(result) - result = self.xd2(result) - result = self.xd2_bn(result) - result = self.xd2_a(result) - result = self.xd2_a_padded(result) - result = self.xd3(result) - result = self.xd3_bn(result) - result = self.xd3_a(result) - result = self.xd4(result) - - return result diff --git a/src/twomartens/masterthesis/aae/run.py b/src/twomartens/masterthesis/aae/run.py deleted file mode 100644 index c6c59ff..0000000 --- a/src/twomartens/masterthesis/aae/run.py +++ /dev/null @@ -1,152 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2019 Jim Martens -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Functionality to run my auto-encoder implementation. - -This module provides a function to run a trained simple auto-encoder. - -Functions: - run_simple(...): runs a trained simple auto-encoder -""" -import os -import time -from typing import Dict, Tuple - -import tensorflow as tf -from tensorflow.python.ops import summary_ops_v2 - -# shortcuts for tensorflow sub packages and classes -from twomartens.masterthesis.aae import model, train, util - -K = tf.keras.backend -tfe = tf.contrib.eager - - -def run_simple(dataset: tf.data.Dataset, - iteration: int, - weights_prefix: str, - image_size: int, - channels: int = 3, - zsize: int = 64, - batch_size: int = 16, - verbose: bool = False) -> None: - """ - Runs the trained auto-encoder for given data set. - - This function runs the trained auto-encoder - - Args: - dataset: run dataset - iteration: identifier for the used training run - weights_prefix: prefix for trained weights directory - image_size: height/width of input image - channels: number of channels in input image (default: 3) - zsize: size of the intermediary z (default: 64) - batch_size: size of each batch (default: 16) - verbose: if True training progress is printed to console (default: False) - """ - - # checkpointed tensors and variables - checkpointables = { - # get models - 'encoder': model.Encoder(zsize), - 'decoder': model.Decoder(channels, zsize, image_size), - } - - global_step = tf.train.get_or_create_global_step() - - # checkpoint - checkpoint_dir = os.path.join(weights_prefix, str(iteration) + '/') - os.makedirs(checkpoint_dir, exist_ok=True) - latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) - checkpoint = tf.train.Checkpoint(**checkpointables) - checkpoint.restore(latest_checkpoint) - - outputs = _run_one_epoch_simple(dataset, - batch_size=batch_size, - global_step=global_step, - **checkpointables) - - if verbose: - print(( - f"run time: {outputs['time']:.2f}, " - f"Encoder + Decoder loss: {outputs['enc_dec_loss']:.3f}" - )) - - -def _run_one_epoch_simple(dataset: tf.data.Dataset, - batch_size: int, - encoder: model.Encoder, - decoder: model.Decoder, - global_step: tf.Variable) -> Dict[str, float]: - with summary_ops_v2.always_record_summaries(): - start_time = time.time() - enc_dec_loss_avg = tfe.metrics.Mean(name='encoder_decoder_loss', - dtype=tf.float32) - - for x in dataset: - reconstruction_loss, x_decoded = _run_enc_dec_step_simple(encoder=encoder, - decoder=decoder, - inputs=x, - global_step=global_step) - enc_dec_loss_avg(reconstruction_loss) - - if int(global_step % train.LOG_FREQUENCY) == 0: - comparison = K.concatenate([x[:int(batch_size / 2)], x_decoded[:int(batch_size / 2)]], axis=0) - grid = util.prepare_image(comparison.cpu(), nrow=int(batch_size / 2)) - summary_ops_v2.image(name='reconstruction', - tensor=K.expand_dims(grid, axis=0), max_images=1, - step=global_step) - global_step.assign_add(1) - - end_time = time.time() - run_time = end_time - start_time - - # final losses of epoch - outputs = { - 'enc_dec_loss': enc_dec_loss_avg.result(False), - 'run_time': run_time - } - - return outputs - - -def _run_enc_dec_step_simple(encoder: model.Encoder, decoder: model.Decoder, - inputs: tf.Tensor, - global_step: tf.Variable) -> Tuple[tf.Tensor, tf.Tensor]: - """ - Runs the encoder and decoder jointly for one step (one batch). - - Args: - encoder: instance of encoder model - decoder: instance of decoder model - inputs: inputs from data set - global_step: the global step variable - - Returns: - tuple of reconstruction loss, reconstructed input, latent space value - """ - z = encoder(inputs) - x_decoded = decoder(z) - - reconstruction_loss = tf.losses.log_loss(inputs, x_decoded) - - if int(global_step % train.LOG_FREQUENCY) == 0: - summary_ops_v2.scalar(name='reconstruction_loss', tensor=reconstruction_loss, - step=global_step) - - return reconstruction_loss, x_decoded diff --git a/src/twomartens/masterthesis/aae/train.py b/src/twomartens/masterthesis/aae/train.py deleted file mode 100644 index d3a75f6..0000000 --- a/src/twomartens/masterthesis/aae/train.py +++ /dev/null @@ -1,247 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2019 Jim Martens -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Training functionality for my AAE implementation. - -This module provides a function to train a simple auto-encoder. - -Attributes: - LOG_FREQUENCY: number of steps that must pass before logging happens - -Functions: - train_simple(...): trains a simple auto-encoder only with reconstruction loss - -""" -import os -import time -from typing import Dict -from typing import Tuple - -import tensorflow as tf -from tensorflow.python.ops import summary_ops_v2 - -from twomartens.masterthesis.aae import model -from twomartens.masterthesis.aae import util - -# shortcuts for tensorflow sub packages and classes -K = tf.keras.backend -tfe = tf.contrib.eager - -LOG_FREQUENCY: int = 10 - - -def train_simple(dataset: tf.data.Dataset, - iteration: int, - weights_prefix: str, - image_size: int, - channels: int = 3, - zsize: int = 64, - lr: float = 0.0001, - train_epoch: int = 1, - batch_size: int = 16, - verbose: bool = False) -> None: - """ - Trains auto-encoder for given data set. - - This function creates checkpoints after every - epoch as well as after finishing training (or stopping early). When starting - this function with the same ``iteration`` then the training will try to - continue where it ended last time by restoring a saved checkpoint. - The loss values are provided as scalar summaries. Reconstruction images are - provided as summary images. - - Args: - dataset: train dataset - iteration: identifier for the current training run - weights_prefix: prefix for weights directory - image_size: height/width of input image - channels: number of channels in input image (default: 3) - zsize: size of the intermediary z (default: 64) - lr: initial learning rate (default: 0.0001) - train_epoch: number of epochs to train (default: 1) - batch_size: size of each batch (default: 16) - verbose: if True training progress is printed to console (default: False) - """ - - # checkpointed tensors and variables - checkpointables = { - 'learning_rate_var': K.variable(lr), - } - checkpointables.update({ - # get models - 'encoder': model.Encoder(zsize), - 'decoder': model.Decoder(channels, zsize, image_size), - # define optimizers - 'enc_dec_optimizer': tf.train.AdamOptimizer(learning_rate=checkpointables['learning_rate_var']), - # global step counter - 'epoch_var': K.variable(-1, dtype=tf.int64), - 'global_step': tf.train.get_or_create_global_step(), - 'global_step_enc_dec': K.variable(0, dtype=tf.int64), - }) - - # checkpoint - checkpoint_dir = os.path.join(weights_prefix, str(iteration) + '/') - os.makedirs(checkpoint_dir, exist_ok=True) - checkpoint_prefix = os.path.join(checkpoint_dir, 'ckpt') - latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) - checkpoint = tf.train.Checkpoint(**checkpointables) - checkpoint.restore(latest_checkpoint) - - def _get_last_epoch(epoch_var: tf.Variable, **kwargs) -> int: - return int(epoch_var) - - last_epoch = _get_last_epoch(**checkpointables) - previous_epochs = 0 - if last_epoch != -1: - previous_epochs = last_epoch + 1 - - with summary_ops_v2.always_record_summaries(): - summary_ops_v2.scalar(name='learning_rate', tensor=checkpointables['learning_rate_var'], - step=checkpointables['global_step']) - - for epoch in range(train_epoch - previous_epochs): - _epoch = epoch + previous_epochs - outputs = _train_one_epoch_simple(_epoch, dataset, - verbose=verbose, - batch_size=batch_size, - **checkpointables) - - if verbose: - print(( - f"[{_epoch + 1:d}/{train_epoch:d}] - " - f"train time: {outputs['per_epoch_time']:.2f}, " - f"Encoder + Decoder loss: {outputs['enc_dec_loss']:.3f}" - )) - - # save weights at end of epoch - checkpoint.save(checkpoint_prefix) - - if verbose: - print("Training finish!... save model weights") - - # save trained models - checkpoint.save(checkpoint_prefix) - - -def _train_one_epoch_simple(epoch: int, - dataset: tf.data.Dataset, - verbose: bool, - batch_size: int, - learning_rate_var: tf.Variable, - decoder: model.Decoder, - encoder: model.Encoder, - enc_dec_optimizer: tf.train.Optimizer, - global_step: tf.Variable, - global_step_enc_dec: tf.Variable, - epoch_var: tf.Variable) -> Dict[str, float]: - with summary_ops_v2.always_record_summaries(): - epoch_var.assign(epoch) - epoch_start_time = time.time() - # define loss variables - enc_dec_loss_avg = tfe.metrics.Mean(name='encoder_decoder_loss', dtype=tf.float32) - - # update learning rate - if (epoch + 1) % 30 == 0: - learning_rate_var.assign(learning_rate_var.value() / 4) - summary_ops_v2.scalar(name='learning_rate', tensor=learning_rate_var, - step=global_step) - if verbose: - print("learning rate change!") - - for x in dataset: - reconstruction_loss, x_decoded = _train_enc_dec_step_simple(encoder=encoder, - decoder=decoder, - optimizer=enc_dec_optimizer, - inputs=x, - global_step_enc_dec=global_step_enc_dec, - global_step=global_step) - enc_dec_loss_avg(reconstruction_loss) - - if int(global_step % LOG_FREQUENCY) == 0: - comparison = K.concatenate([x[:int(batch_size / 2)], x_decoded[:int(batch_size / 2)]], axis=0) - grid = util.prepare_image(comparison.cpu(), nrow=int(batch_size/2)) - summary_ops_v2.image(name='reconstruction', - tensor=K.expand_dims(grid, axis=0), max_images=1, - step=global_step) - global_step.assign_add(1) - - epoch_end_time = time.time() - per_epoch_time = epoch_end_time - epoch_start_time - - # final losses of epoch - outputs = { - 'enc_dec_loss': enc_dec_loss_avg.result(False), - 'per_epoch_time': per_epoch_time, - } - - return outputs - - -def _train_enc_dec_step_simple(encoder: model.Encoder, decoder: model.Decoder, - optimizer: tf.train.Optimizer, - inputs: tf.Tensor, - global_step: tf.Variable, - global_step_enc_dec: tf.Variable) -> Tuple[tf.Tensor, tf.Tensor]: - """ - Trains the encoder and decoder jointly for one step (one batch). - - Args: - encoder: instance of encoder model - decoder: instance of decoder model - optimizer: instance of chosen optimizer - inputs: inputs from data set - global_step: the global step variable - global_step_enc_dec: global step variable for enc_dec - - Returns: - tuple of reconstruction loss, reconstructed input, z value - """ - with tf.GradientTape() as tape: - z = encoder(inputs) - x_decoded = decoder(z) - - reconstruction_loss = tf.losses.log_loss(inputs, x_decoded) - - enc_dec_grads = tape.gradient(reconstruction_loss, - encoder.trainable_variables + decoder.trainable_variables) - if int(global_step % LOG_FREQUENCY) == 0: - summary_ops_v2.scalar(name='reconstruction_loss', tensor=reconstruction_loss, - step=global_step) - for grad, variable in zip(enc_dec_grads, encoder.trainable_variables + decoder.trainable_variables): - summary_ops_v2.histogram(name='gradients/' + variable.name, tensor=tf.math.l2_normalize(grad), - step=global_step) - summary_ops_v2.histogram(name='variables/' + variable.name, tensor=tf.math.l2_normalize(variable), - step=global_step) - optimizer.apply_gradients(zip(enc_dec_grads, - encoder.trainable_variables + decoder.trainable_variables), - global_step=global_step_enc_dec) - - return reconstruction_loss, x_decoded - - -if __name__ == "__main__": - from twomartens.masterthesis.aae.data import prepare_training_data - tf.enable_eager_execution() - inlier_classes = [8] - iteration = 2 - train_dataset, _ = prepare_training_data(test_fold_id=0, inlier_classes=inlier_classes, - total_classes=10) - train_summary_writer = summary_ops_v2.create_file_writer( - './summaries/train/number-' + str(inlier_classes[0]) + '/' + str(iteration)) - with train_summary_writer.as_default(): - train_simple(dataset=train_dataset, iteration=iteration, - weights_prefix='weights/' + str(inlier_classes[0]) + '/') diff --git a/src/twomartens/masterthesis/aae/train_aae.py b/src/twomartens/masterthesis/aae/train_aae.py deleted file mode 100644 index 4927f2a..0000000 --- a/src/twomartens/masterthesis/aae/train_aae.py +++ /dev/null @@ -1,569 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2019 Jim Martens -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Training functionality for my AAE implementation. - -This module provides functions to train the Adversarial Auto Encoder. - -Attributes: - GRACE: specifies the number of epochs that the training loss can stagnate or worsen - before the training is stopped early - TOTAL_LOSS_GRACE_CAP: upper limit for total loss, grace countdown only enabled if total loss higher - -Functions: - prepare_training_data(...): prepares the mnist training data - train(...): trains the AAE models - -Todos: - - fix early stopping - - fix losses reaching exactly zero - -""" -import functools -import os -import time -from typing import Callable -from typing import Dict -from typing import Tuple - -import math -import tensorflow as tf -from tensorflow.python.ops import summary_ops_v2 - -from twomartens.masterthesis.aae import model -from twomartens.masterthesis.aae import util -from twomartens.masterthesis.aae.train import LOG_FREQUENCY - -# shortcuts for tensorflow sub packages and classes -K = tf.keras.backend -tfe = tf.contrib.eager - -GRACE: int = 10 -TOTAL_LOSS_GRACE_CAP: int = 6 - - -def train(dataset: tf.data.Dataset, - iteration: int, - weights_prefix: str, - channels: int = 1, - zsize: int = 32, - lr: float = 0.002, - batch_size: int = 128, - train_epoch: int = 80, - verbose: bool = True, - early_stopping: bool = False) -> None: - """ - Trains AAE for given data set. - - This function provides early stopping and creates checkpoints after every - epoch as well as after finishing training (or stopping early). When starting - this function with the same ``iteration`` then the training will try to - continue where it ended last time by restoring a saved checkpoint. - The loss values are provided as scalar summaries. Reconstruction and sample - images are provided as summary images. - - Args: - dataset: train dataset - iteration: identifier for the current training run - weights_prefix: prefix for weights directory - channels: number of channels in input image (default: 1) - zsize: size of the intermediary z (default: 32) - lr: initial learning rate (default: 0.002) - batch_size: the size of each batch (default: 128) - train_epoch: number of epochs to train (default: 80) - verbose: if True prints train progress info to console (default: True) - early_stopping: if True the early stopping mechanic is enabled (default: False) - - Notes: - The training stops early if for ``GRACE`` number of epochs the loss is not - decreasing. Specifically all individual losses are accounted for and any one - of those not decreasing triggers a ``strike``. If the total loss, which is - a sum of all individual losses, is also not decreasing and has a total - value of more than ``TOTAL_LOSS_GRACE_CAP``, the counter for the remaining grace period is - decreased. If in any epoch afterwards all losses are decreasing the grace - period is reset to ``GRACE``. Lastly the training loop will be stopped early - if the grace counter reaches ``0`` at the end of an epoch. - """ - - # non-preserved tensors - y_real = K.ones(batch_size) - y_fake = K.zeros(batch_size) - sample = K.expand_dims(K.expand_dims(K.random_normal((64, zsize)), axis=1), axis=1) - # z generator function - z_generator = functools.partial(_get_z_variable, batch_size=batch_size, zsize=zsize) - - # non-preserved python variables - encoder_lowest_loss = math.inf - decoder_lowest_loss = math.inf - enc_dec_lowest_loss = math.inf - zd_lowest_loss = math.inf - xd_lowest_loss = math.inf - total_lowest_loss = math.inf - grace_period = GRACE - - # checkpointed tensors and variables - checkpointables = { - 'learning_rate_var': K.variable(lr), - } - checkpointables.update({ - # get models - 'encoder': model.Encoder(zsize), - 'decoder': model.Decoder(channels, zsize), - 'z_discriminator': model.ZDiscriminator(), - 'x_discriminator': model.XDiscriminator(), - # define optimizers - 'decoder_optimizer': tf.train.AdamOptimizer(learning_rate=checkpointables['learning_rate_var'], - beta1=0.5, beta2=0.999), - 'enc_dec_optimizer': tf.train.AdamOptimizer(learning_rate=checkpointables['learning_rate_var'], - beta1=0.5, beta2=0.999), - 'z_discriminator_optimizer': tf.train.AdamOptimizer(learning_rate=checkpointables['learning_rate_var'], - beta1=0.5, beta2=0.999), - 'x_discriminator_optimizer': tf.train.AdamOptimizer(learning_rate=checkpointables['learning_rate_var'], - beta1=0.5, beta2=0.999), - # global step counter - 'epoch_var': K.variable(-1, dtype=tf.int64), - 'global_step': tf.train.get_or_create_global_step(), - 'global_step_decoder': K.variable(0, dtype=tf.int64), - 'global_step_enc_dec': K.variable(0, dtype=tf.int64), - 'global_step_xd': K.variable(0, dtype=tf.int64), - 'global_step_zd': K.variable(0, dtype=tf.int64), - }) - - # checkpoint - checkpoint_dir = os.path.join(weights_prefix, str(iteration) + '/') - os.makedirs(checkpoint_dir, exist_ok=True) - checkpoint_prefix = os.path.join(checkpoint_dir, 'ckpt') - latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir) - checkpoint = tf.train.Checkpoint(**checkpointables) - checkpoint.restore(latest_checkpoint) - - def _get_last_epoch(epoch_var: tf.Variable, **kwargs) -> int: - return int(epoch_var) - - last_epoch = _get_last_epoch(**checkpointables) - previous_epochs = 0 - if last_epoch != -1: - previous_epochs = last_epoch + 1 - - with summary_ops_v2.always_record_summaries(): - summary_ops_v2.scalar(name='learning_rate', tensor=checkpointables['learning_rate_var'], - step=checkpointables['global_step']) - - for epoch in range(train_epoch - previous_epochs): - _epoch = epoch + previous_epochs - outputs = _train_one_epoch(_epoch, dataset, targets_real=y_real, - targets_fake=y_fake, z_generator=z_generator, - verbose=verbose, batch_size=batch_size, - **checkpointables) - - if verbose: - print(( - f"[{_epoch + 1:d}/{train_epoch:d}] - " - f"train time: {outputs['per_epoch_time']:.2f}, " - f"Decoder loss: {outputs['decoder_loss']:.3f}, " - f"X Discriminator loss: {outputs['xd_loss']:.3f}, " - f"Z Discriminator loss: {outputs['zd_loss']:.3f}, " - f"Encoder + Decoder loss: {outputs['enc_dec_loss']:.3f}, " - f"Encoder loss: {outputs['encoder_loss']:.3f}" - )) - - # save sample image summary - def _save_sample(decoder: model.Decoder, global_step: tf.Variable, **kwargs) -> None: - resultsample = decoder(sample).cpu() - grid = util.prepare_image(resultsample) - summary_ops_v2.image(name='sample', tensor=K.expand_dims(grid, axis=0), - max_images=1, step=global_step) - - with summary_ops_v2.always_record_summaries(): - _save_sample(**checkpointables) - - # save weights at end of epoch - checkpoint.save(checkpoint_prefix) - - # check for improvements in error reduction - otherwise early stopping - if early_stopping: - strike = False - total_strike = False - total_loss = outputs['encoder_loss'] + outputs['decoder_loss'] + outputs['enc_dec_loss'] + \ - outputs['xd_loss'] + outputs['zd_loss'] - if total_loss < total_lowest_loss: - total_lowest_loss = total_loss - elif total_loss > TOTAL_LOSS_GRACE_CAP: - total_strike = True - if outputs['encoder_loss'] < encoder_lowest_loss: - encoder_lowest_loss = outputs['encoder_loss'] - else: - strike = True - if outputs['decoder_loss'] < decoder_lowest_loss: - decoder_lowest_loss = outputs['decoder_loss'] - else: - strike = True - if outputs['enc_dec_loss'] < enc_dec_lowest_loss: - enc_dec_lowest_loss = outputs['enc_dec_loss'] - else: - strike = True - if outputs['xd_loss'] < xd_lowest_loss: - xd_lowest_loss = outputs['xd_loss'] - else: - strike = True - if outputs['zd_loss'] < zd_lowest_loss: - zd_lowest_loss = outputs['zd_loss'] - else: - strike = True - - if strike and total_strike: - grace_period -= 1 - elif strike: - pass - else: - grace_period = GRACE - - if grace_period == 0: - break - - if verbose: - if grace_period > 0: - print("Training finish!... save model weights") - if grace_period == 0: - print("Training stopped early!... save model weights") - - # save trained models - checkpoint.save(checkpoint_prefix) - - -def _train_one_epoch(epoch: int, - dataset: tf.data.Dataset, - targets_real: tf.Tensor, - verbose: bool, - batch_size: int, - targets_fake: tf.Tensor, - z_generator: Callable[[], tf.Variable], - learning_rate_var: tf.Variable, - decoder: model.Decoder, - encoder: model.Encoder, - x_discriminator: model.XDiscriminator, - z_discriminator: model.ZDiscriminator, - decoder_optimizer: tf.train.Optimizer, - x_discriminator_optimizer: tf.train.Optimizer, - z_discriminator_optimizer: tf.train.Optimizer, - enc_dec_optimizer: tf.train.Optimizer, - global_step: tf.Variable, - global_step_xd: tf.Variable, - global_step_zd: tf.Variable, - global_step_decoder: tf.Variable, - global_step_enc_dec: tf.Variable, - epoch_var: tf.Variable) -> Dict[str, float]: - with summary_ops_v2.always_record_summaries(): - epoch_var.assign(epoch) - epoch_start_time = time.time() - # define loss variables - encoder_loss_avg = tfe.metrics.Mean(name='encoder_loss', dtype=tf.float32) - decoder_loss_avg = tfe.metrics.Mean(name='decoder_loss', dtype=tf.float32) - enc_dec_loss_avg = tfe.metrics.Mean(name='encoder_decoder_loss', dtype=tf.float32) - zd_loss_avg = tfe.metrics.Mean(name='z_discriminator_loss', dtype=tf.float32) - xd_loss_avg = tfe.metrics.Mean(name='x_discriminator_loss', dtype=tf.float32) - - # update learning rate - if (epoch + 1) % 30 == 0: - learning_rate_var.assign(learning_rate_var.value() / 4) - summary_ops_v2.scalar(name='learning_rate', tensor=learning_rate_var, - step=global_step) - if verbose: - print("learning rate change!") - - for x, _ in dataset: - # x discriminator - _xd_train_loss = _train_xdiscriminator_step(x_discriminator=x_discriminator, - decoder=decoder, - optimizer=x_discriminator_optimizer, - inputs=x, - targets_real=targets_real, - targets_fake=targets_fake, - global_step_xd=global_step_xd, - global_step=global_step, - z_generator=z_generator) - xd_loss_avg(_xd_train_loss) - - # -------- - # decoder - _decoder_train_loss = _train_decoder_step(decoder=decoder, - x_discriminator=x_discriminator, - optimizer=decoder_optimizer, - targets=targets_real, - global_step_decoder=global_step_decoder, - global_step=global_step, - z_generator=z_generator) - decoder_loss_avg(_decoder_train_loss) - - # --------- - # z discriminator - _zd_train_loss = _train_zdiscriminator_step(z_discriminator=z_discriminator, - encoder=encoder, - optimizer=z_discriminator_optimizer, - inputs=x, - targets_real=targets_real, - targets_fake=targets_fake, - global_step_zd=global_step_zd, - global_step=global_step, - z_generator=z_generator) - zd_loss_avg(_zd_train_loss) - - # ----------- - # encoder + decoder - encoder_loss, reconstruction_loss, x_decoded = _train_enc_dec_step(encoder=encoder, - decoder=decoder, - z_discriminator=z_discriminator, - optimizer=enc_dec_optimizer, - inputs=x, - targets=targets_real, - global_step_enc_dec=global_step_enc_dec, - global_step=global_step) - enc_dec_loss_avg(reconstruction_loss) - encoder_loss_avg(encoder_loss) - - if int(global_step % LOG_FREQUENCY) == 0: - comparison = K.concatenate([x[:batch_size/2], x_decoded[:batch_size/2]], axis=0) - grid = util.prepare_image(comparison.cpu(), nrow=int(batch_size/2)) - summary_ops_v2.image(name='reconstruction', - tensor=K.expand_dims(grid, axis=0), max_images=1, - step=global_step) - global_step.assign_add(1) - - epoch_end_time = time.time() - per_epoch_time = epoch_end_time - epoch_start_time - - # final losses of epoch - outputs = { - 'decoder_loss': decoder_loss_avg.result(False), - 'encoder_loss': encoder_loss_avg.result(False), - 'enc_dec_loss': enc_dec_loss_avg.result(False), - 'xd_loss': xd_loss_avg.result(False), - 'zd_loss': zd_loss_avg.result(False), - 'per_epoch_time': per_epoch_time, - } - - return outputs - - -def _train_xdiscriminator_step(x_discriminator: model.XDiscriminator, - decoder: model.Decoder, - optimizer: tf.train.Optimizer, - inputs: tf.Tensor, - targets_real: tf.Tensor, - targets_fake: tf.Tensor, - global_step: tf.Variable, - global_step_xd: tf.Variable, - z_generator: Callable[[], tf.Variable]) -> tf.Tensor: - """ - Trains the x discriminator model for one step (one batch). - - :param x_discriminator: instance of x discriminator model - :param decoder: instance of decoder model - :param optimizer: instance of chosen optimizer - :param inputs: inputs from dataset - :param targets_real: target tensor for real loss calculation - :param targets_fake: target tensor for fake loss calculation - :param global_step: the global step variable - :param global_step_xd: global step variable for xd - :param z_generator: callable function that returns a z variable - :return: the calculated loss - """ - with tf.GradientTape() as tape: - xd_result_1 = tf.squeeze(x_discriminator(inputs)) - xd_real_loss = tf.losses.log_loss(targets_real, xd_result_1) - - z = z_generator() - x_fake = decoder(z) - xd_result_2 = tf.squeeze(x_discriminator(x_fake)) - xd_fake_loss = tf.losses.log_loss(targets_fake, xd_result_2) - - _xd_train_loss = xd_real_loss + xd_fake_loss - - xd_grads = tape.gradient(_xd_train_loss, x_discriminator.trainable_variables) - if int(global_step % LOG_FREQUENCY) == 0: - summary_ops_v2.scalar(name='x_discriminator_real_loss', tensor=xd_real_loss, - step=global_step) - summary_ops_v2.scalar(name='x_discriminator_fake_loss', tensor=xd_fake_loss, - step=global_step) - summary_ops_v2.scalar(name='x_discriminator_loss', tensor=_xd_train_loss, - step=global_step) - for grad, variable in zip(xd_grads, x_discriminator.trainable_variables): - summary_ops_v2.histogram(name='gradients/' + variable.name, tensor=tf.math.l2_normalize(grad), - step=global_step) - summary_ops_v2.histogram(name='variables/' + variable.name, tensor=tf.math.l2_normalize(variable), - step=global_step) - optimizer.apply_gradients(zip(xd_grads, x_discriminator.trainable_variables), - global_step=global_step_xd) - - return _xd_train_loss - - -def _train_decoder_step(decoder: model.Decoder, - x_discriminator: model.XDiscriminator, - optimizer: tf.train.Optimizer, - targets: tf.Tensor, - global_step: tf.Variable, - global_step_decoder: tf.Variable, - z_generator: Callable[[], tf.Variable]) -> tf.Tensor: - """ - Trains the decoder model for one step (one batch). - - :param decoder: instance of decoder model - :param x_discriminator: instance of the x discriminator model - :param optimizer: instance of chosen optimizer - :param targets: target tensor for loss calculation - :param global_step: the global step variable - :param global_step_decoder: global step variable for decoder - :param z_generator: callable function that returns a z variable - :return: the calculated loss - """ - with tf.GradientTape() as tape: - z = z_generator() - - x_fake = decoder(z) - xd_result = tf.squeeze(x_discriminator(x_fake)) - _decoder_train_loss = tf.losses.log_loss(targets, xd_result) - - grads = tape.gradient(_decoder_train_loss, decoder.trainable_variables) - if int(global_step % LOG_FREQUENCY) == 0: - summary_ops_v2.scalar(name='decoder_loss', tensor=_decoder_train_loss, - step=global_step) - for grad, variable in zip(grads, decoder.trainable_variables): - summary_ops_v2.histogram(name='gradients/' + variable.name, tensor=tf.math.l2_normalize(grad), - step=global_step) - summary_ops_v2.histogram(name='variables/' + variable.name, tensor=tf.math.l2_normalize(variable), - step=global_step) - optimizer.apply_gradients(zip(grads, decoder.trainable_variables), - global_step=global_step_decoder) - - return _decoder_train_loss - - -def _train_zdiscriminator_step(z_discriminator: model.ZDiscriminator, - encoder: model.Encoder, - optimizer: tf.train.Optimizer, - inputs: tf.Tensor, - targets_real: tf.Tensor, - targets_fake: tf.Tensor, - global_step: tf.Variable, - global_step_zd: tf.Variable, - z_generator: Callable[[], tf.Variable]) -> tf.Tensor: - """ - Trains the z discriminator one step (one batch). - - :param z_discriminator: instance of z discriminator model - :param encoder: instance of encoder model - :param optimizer: instance of chosen optimizer - :param inputs: inputs from dataset - :param targets_real: target tensor for real loss calculation - :param targets_fake: target tensor for fake loss calculation - :param global_step: the global step variable - :param global_step_zd: global step variable for zd - :param z_generator: callable function that returns a z variable - :return: the calculated loss - """ - with tf.GradientTape() as tape: - z = z_generator() - - zd_result = tf.squeeze(z_discriminator(z)) - zd_real_loss = tf.losses.log_loss(targets_real, zd_result) - - z = tf.squeeze(encoder(inputs)) - zd_result = tf.squeeze(z_discriminator(z)) - zd_fake_loss = tf.losses.log_loss(targets_fake, zd_result) - - _zd_train_loss = zd_real_loss + zd_fake_loss - - zd_grads = tape.gradient(_zd_train_loss, z_discriminator.trainable_variables) - if int(global_step % LOG_FREQUENCY) == 0: - summary_ops_v2.scalar(name='z_discriminator_real_loss', tensor=zd_real_loss, - step=global_step) - summary_ops_v2.scalar(name='z_discriminator_fake_loss', tensor=zd_fake_loss, - step=global_step) - summary_ops_v2.scalar(name='z_discriminator_loss', tensor=_zd_train_loss, - step=global_step) - for grad, variable in zip(zd_grads, z_discriminator.trainable_variables): - summary_ops_v2.histogram(name='gradients/' + variable.name, tensor=tf.math.l2_normalize(grad), - step=global_step) - summary_ops_v2.histogram(name='variables/' + variable.name, tensor=tf.math.l2_normalize(variable), - step=global_step) - optimizer.apply_gradients(zip(zd_grads, z_discriminator.trainable_variables), - global_step=global_step_zd) - - return _zd_train_loss - - -def _train_enc_dec_step(encoder: model.Encoder, decoder: model.Decoder, - z_discriminator: model.ZDiscriminator, - optimizer: tf.train.Optimizer, - inputs: tf.Tensor, - targets: tf.Tensor, - global_step: tf.Variable, - global_step_enc_dec: tf.Variable) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: - """ - Trains the encoder and decoder jointly for one step (one batch). - - :param encoder: instance of encoder model - :param decoder: instance of decoder model - :param z_discriminator: instance of z discriminator model - :param optimizer: instance of chosen optimizer - :param inputs: inputs from dataset - :param targets: target tensor for loss calculation - :param global_step: the global step variable - :param global_step_enc_dec: global step variable for enc_dec - :return: tuple of encoder loss, reconstruction loss, reconstructed input - """ - with tf.GradientTape() as tape: - z = encoder(inputs) - x_decoded = decoder(z) - - zd_result = tf.squeeze(z_discriminator(tf.squeeze(z))) - encoder_loss = tf.losses.log_loss(targets, zd_result) * 2.0 - reconstruction_loss = tf.losses.log_loss(inputs, x_decoded) - _enc_dec_train_loss = encoder_loss + reconstruction_loss - - enc_dec_grads = tape.gradient(_enc_dec_train_loss, - encoder.trainable_variables + decoder.trainable_variables) - if int(global_step % LOG_FREQUENCY) == 0: - summary_ops_v2.scalar(name='encoder_loss', tensor=encoder_loss, - step=global_step) - summary_ops_v2.scalar(name='reconstruction_loss', tensor=reconstruction_loss, - step=global_step) - summary_ops_v2.scalar(name='encoder_decoder_loss', tensor=_enc_dec_train_loss, - step=global_step) - for grad, variable in zip(enc_dec_grads, encoder.trainable_variables + decoder.trainable_variables): - summary_ops_v2.histogram(name='gradients/' + variable.name, tensor=tf.math.l2_normalize(grad), - step=global_step) - summary_ops_v2.histogram(name='variables/' + variable.name, tensor=tf.math.l2_normalize(variable), - step=global_step) - optimizer.apply_gradients(zip(enc_dec_grads, - encoder.trainable_variables + decoder.trainable_variables), - global_step=global_step_enc_dec) - - return encoder_loss, reconstruction_loss, x_decoded - - -def _get_z_variable(batch_size: int, zsize: int) -> tf.Variable: - """ - Creates and returns a z variable taken from a normal distribution. - - :param batch_size: size of the batch - :param zsize: size of the z latent space - :return: created variable - """ - z = K.reshape(K.random_normal((batch_size, zsize)), (-1, 1, 1, zsize)) - return K.variable(z) diff --git a/src/twomartens/masterthesis/aae/util.py b/src/twomartens/masterthesis/aae/util.py deleted file mode 100644 index c10ff8b..0000000 --- a/src/twomartens/masterthesis/aae/util.py +++ /dev/null @@ -1,172 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2019 Jim Martens -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Utility functionality for visualizing predictions. - -Functions: - prepare_image(...): prepares a tensor to be visualized as an image - -""" -import math -from typing import Sequence -from typing import Tuple -from typing import Union - -import numpy as np -import tensorflow as tf - -k = tf.keras.backend - - -def prepare_image(tensor: Union[tf.Tensor, Sequence[tf.Tensor]], nrow: int = 8, - padding: int = 2, - normalize: bool = False, range_value: Tuple[float, float] = None, - scale_each: bool = False, pad_value: float = 0.0) -> tf.Tensor: - """ - Prepares a tensor to be saved as image and returns it. - - Args: - tensor: Image to be saved. - given a mini-batch tensor, saves the tensor as a grid of images by calling make_grid. - nrow: Number of images displayed in each row of the grid. - The Final grid size is (B / nrow, nrow). Default is 8. - padding: amount of padding. Default is 2. - normalize: If True, shift the image to the range (0, 1), - by subtracting the minimum and dividing by the maximum pixel value. - range_value: tuple (min, max) where min and max are numbers, - then these numbers are used to normalize the image. By default, min and max - are computed from the tensor. - scale_each: If True, scale each image in the batch of - images separately rather than the (min, max) over all images. - pad_value: Value for the padded pixels. - - Returns: - the prepared tensor - """ - grid = _make_grid(tensor, nrow, padding, normalize, range_value, - scale_each, pad_value) - min_pixel_value = 0 - max_pixel_value = 255 - grid *= max_pixel_value - grid = tf.clip_by_value(grid, min_pixel_value, max_pixel_value) - grid = tf.cast(grid, tf.uint8) - - return grid - - -def _make_grid(tensor: Union[tf.Tensor, Sequence[tf.Tensor]], nrow: int = 8, padding: int = 2, - normalize: bool = False, range_value: Tuple[float, float] = None, - scale_each: bool = False, pad_value: float = 0.0) -> tf.Tensor: - """ - Make a grid of images. - - Example: - See this notebook `here `_ - - :param tensor: 4D mini-batch Tensor of shape (B x C x H x W) - or a list of images all of the same size. - :param nrow: Number of images displayed in each row of the grid. - The Final grid size is (B / nrow, nrow). Default is 8. - :param padding: amount of padding. Default is 2. - :param normalize: If True, shift the image to the range (0, 1), - by subtracting the minimum and dividing by the maximum pixel value. - :param range_value: tuple (min, max) where min and max are numbers, - then these numbers are used to normalize the image. By default, min and max - are computed from the tensor. - :param scale_each: If True, scale each image in the batch of - images separately rather than the (min, max) over all images. - :param pad_value: Value for the padded pixels. - :return: tensor containing image grid - """ - if not (tf.contrib.framework.is_tensor(tensor) or - (isinstance(tensor, list) and all(tf.contrib.framework.is_tensor(t) for t in tensor))): - raise TypeError('tensor or list of tensors expected, got {}'.format(type(tensor))) - - # if list of tensors, convert to a 4D mini-batch Tensor - if isinstance(tensor, list): - tensor = k.stack(tensor, axis=0) - tensor_shape = tf.shape(tensor).numpy() - tensor_rank = tf.rank(tensor).numpy() - - if tensor_rank == 2: # single image H x W - tensor = k.reshape(tensor, (tensor_shape[0], tensor_shape[1], 1)) - if tensor_rank == 3: # single image - if tensor_shape[2] == 1: # if single-channel, convert to 3-channel - tensor = k.concatenate((tensor, tensor, tensor), axis=2) - tensor = k.reshape(tensor, (1, tensor_shape[0], tensor_shape[1], tensor_shape[2])) - - if tensor_rank == 4 and tensor_shape[3] == 1: # single-channel images - tensor = k.concatenate((tensor, tensor, tensor), axis=3) - - if normalize is True: - if range_value is not None: - assert isinstance(range_value, tuple), \ - "range_value has to be a tuple (min, max) if specified. min and max are numbers" - - def norm_ip(img: tf.Tensor, min_v: float, max_v: float) -> tf.Tensor: - """ - Internal function to clip given tensor to given min and max values. - :param img: tensor to be clipped - :param min_v: min value - :param max_v: max value - :return: clipped tensor - """ - img = tf.clip_by_value(img, min_v, max_v) - img = tf.add(img, -min_v) - return tf.divide(img, max_v - min_v + 1e-5) - - def norm_range(t: tf.Tensor, range_v: Tuple[float, float] = None) -> tf.Tensor: - """ - Internal function to normalize a tensor to a given range. - :param t: tensor to be normalized - :param range_v: tuple with (min, max) range values - :return: normalized tensor - """ - if range_v is not None: - return norm_ip(t, range_v[0], range_v[1]) - else: - return norm_ip(t, float(k.min(t)), float(k.max(t))) - - if scale_each is True: - updated_tensors = [] - for t in tensor: # loop over mini-batch dimension - updated_tensors.append(norm_range(t, range_value)) - tensor = k.constant(np.array(updated_tensors)) - else: - tensor = norm_range(tensor, range_value) - - if tensor_shape[0] == 1: - return tf.squeeze(tensor) - - # make the mini-batch of images into a grid - nmaps = tensor_shape[0] - xmaps = min(nrow, nmaps) - ymaps = int(math.ceil(float(nmaps) / xmaps)) - height, width = int(tensor_shape[1] + padding), int(tensor_shape[2] + padding) - grid = tf.fill((height * ymaps + padding, width * xmaps + padding, 3), pad_value).numpy() - tensor_numpy = tensor.numpy() - i = 0 - for y in range(ymaps): - for x in range(xmaps): - if i >= nmaps: - break - start_height = y * height + padding - start_width = x * width + padding - np.copyto(grid[start_height: start_height + height - padding, - start_width:start_width + width - padding], tensor_numpy[i, :, :, :]) - i = i + 1 - return k.constant(grid) diff --git a/src/twomartens/masterthesis/cli.py b/src/twomartens/masterthesis/cli.py index 41f3b5c..fdc8209 100644 --- a/src/twomartens/masterthesis/cli.py +++ b/src/twomartens/masterthesis/cli.py @@ -69,14 +69,12 @@ def prepare(args: argparse.Namespace) -> None: def train(args: argparse.Namespace) -> None: - _train_execute_action(args, _ssd_train, _auto_encoder_train) + _train_execute_action(args, _ssd_train) def test(args: argparse.Namespace) -> None: if args.network == "ssd" or args.network == "bayesian_ssd": _ssd_test(args) - elif args.network == "auto_encoder": - _auto_encoder_test(args) def evaluate(args: argparse.Namespace) -> None: @@ -152,11 +150,9 @@ def _config_execute_action(args: argparse.Namespace, on_get: callable, on_list() -def _train_execute_action(args: argparse.Namespace, on_ssd: callable, on_auto_encoder: callable) -> None: +def _train_execute_action(args: argparse.Namespace, on_ssd: callable) -> None: if args.network == "ssd" or args.network == "bayesian_ssd": on_ssd(args) - elif args.network == "auto_encoder": - on_auto_encoder(args) def _ssd_train(args: argparse.Namespace) -> None: @@ -1060,81 +1056,3 @@ def _visualise_ose_f1(open_set_error: np.ndarray, f1_scores: np.ndarray, pyplot.savefig(f"{output_path}/ose-f1-{file_suffix}.png") pyplot.close(figure) - - -def _auto_encoder_train(args: argparse.Namespace) -> None: - import os - - from tensorflow.python.ops import summary_ops_v2 - - from twomartens.masterthesis import data - from twomartens.masterthesis.aae import train - - tf.enable_eager_execution() - coco_path = args.coco_path - category = args.category - batch_size = 16 - image_size = 256 - coco_data = data.load_coco_train(coco_path, category, num_epochs=args.num_epochs, batch_size=batch_size, - resized_shape=(image_size, image_size)) - summary_path = conf.get_property("Paths.summary") - summary_path = f"{summary_path}/{args.network}/train/category-{category}/{args.iteration}" - train_summary_writer = summary_ops_v2.create_file_writer( - summary_path - ) - os.makedirs(summary_path, exist_ok=True) - - weights_path = conf.get_property("Paths.weights") - weights_path = f"{weights_path}/{args.network}/category-{category}" - os.makedirs(weights_path, exist_ok=True) - if args.debug: - with train_summary_writer.as_default(): - train.train_simple(coco_data, iteration=args.iteration, - weights_prefix=weights_path, - zsize=16, lr=0.0001, verbose=args.verbose, image_size=image_size, - channels=3, train_epoch=args.num_epochs, batch_size=batch_size) - else: - train.train_simple(coco_data, iteration=args.iteration, - weights_prefix=weights_path, - zsize=16, lr=0.0001, verbose=args.verbose, image_size=image_size, - channels=3, train_epoch=args.num_epochs, batch_size=batch_size) - - -def _auto_encoder_test(args: argparse.Namespace) -> None: - import os - - from tensorflow.python.ops import summary_ops_v2 - - from twomartens.masterthesis import data - from twomartens.masterthesis.aae import run - - tf.enable_eager_execution() - coco_path = conf.get_property("Paths.coco") - category = args.category - category_trained = args.category_trained - batch_size = 16 - image_size = 256 - coco_data = data.load_coco_val(coco_path, category, num_epochs=1, - batch_size=batch_size, resized_shape=(image_size, image_size)) - - summary_path = conf.get_property("Paths.summary") - summary_path = f"{summary_path}/{args.network}/val/category-{category}/{args.iteration}" - os.makedirs(summary_path, exist_ok=True) - use_summary_writer = summary_ops_v2.create_file_writer( - summary_path - ) - - weights_path = conf.get_property("Paths.weights") - weights_path = f"{weights_path}/{args.network}/category-{category_trained}" - os.makedirs(weights_path, exist_ok=True) - if args.debug: - with use_summary_writer.as_default(): - run.run_simple(coco_data, iteration=args.iteration_trained, - weights_prefix=weights_path, - zsize=16, verbose=args.verbose, channels=3, batch_size=batch_size, - image_size=image_size) - else: - run.run_simple(coco_data, iteration=args.iteration_trained, - weights_prefix=weights_path, - zsize=16, verbose=args.verbose, channels=3, batch_size=batch_size, - image_size=image_size) diff --git a/src/twomartens/masterthesis/main.py b/src/twomartens/masterthesis/main.py index 9b540b6..9634176 100644 --- a/src/twomartens/masterthesis/main.py +++ b/src/twomartens/masterthesis/main.py @@ -155,13 +155,9 @@ def _build_train(parser: argparse.ArgumentParser) -> None: sub_parsers.required = True ssd_parser = sub_parsers.add_parser("ssd", help="SSD") - # ssd_bayesian_parser = sub_parsers.add_parser("bayesian_ssd", help="SSD with dropout layers") - auto_encoder_parser = sub_parsers.add_parser("auto_encoder", help="Auto-encoder network") # build sub parsers _build_ssd_train(ssd_parser) - # _build_bayesian_ssd(ssd_bayesian_parser) - _build_auto_encoder_train(auto_encoder_parser) def _build_ssd_train(parser: argparse.ArgumentParser) -> None: @@ -169,36 +165,21 @@ def _build_ssd_train(parser: argparse.ArgumentParser) -> None: parser.add_argument("iteration", type=int, help="the training iteration") -def _build_auto_encoder_train(parser: argparse.ArgumentParser) -> None: - parser.add_argument("category", type=int, help="the COCO category to use") - parser.add_argument("num_epochs", type=int, help="the number of epochs to train", default=80) - parser.add_argument("iteration", type=int, help="the training iteration") - - def _build_test(parser: argparse.ArgumentParser) -> None: sub_parsers = parser.add_subparsers(dest="network") sub_parsers.required = True ssd_bayesian_parser = sub_parsers.add_parser("bayesian_ssd", help="SSD with dropout layers") ssd_parser = sub_parsers.add_parser("ssd", help="SSD") - auto_encoder_parser = sub_parsers.add_parser("auto_encoder", help="Auto-encoder network") # build sub parsers _build_ssd_test(ssd_bayesian_parser) _build_ssd_test(ssd_parser) - _build_auto_encoder_test(auto_encoder_parser) def _build_ssd_test(parser: argparse.ArgumentParser) -> None: parser.add_argument("iteration", type=int, help="the validation iteration") parser.add_argument("train_iteration", type=int, help="the train iteration") - - -def _build_auto_encoder_test(parser: argparse.ArgumentParser) -> None: - parser.add_argument("category", type=int, help="the COCO category to validate") - parser.add_argument("category_trained", type=int, help="the trained COCO category") - parser.add_argument("iteration", type=int, help="the validation iteration") - parser.add_argument("iteration_trained", type=int, help="the training iteration") def _build_evaluate(parser: argparse.ArgumentParser) -> None: @@ -228,11 +209,9 @@ def _build_visualise_metrics(parser: argparse.ArgumentParser) -> None: ssd_bayesian_parser = sub_parsers.add_parser("bayesian_ssd", help="SSD with dropout layers") ssd_parser = sub_parsers.add_parser("ssd", help="SSD") - auto_encoder_parser = sub_parsers.add_parser("auto_encoder", help="Auto-encoder network") ssd_bayesian_parser.add_argument("iteration", type=int, help="the validation iteration to use") ssd_parser.add_argument("iteration", type=int, help="the validation iteration to use") - auto_encoder_parser.add_argument("iteration", type=int, help="the validation iteration to use") def _build_measure(parser: argparse.ArgumentParser) -> None: