masterthesis-latex/ma.bib

% Encoding: UTF-8


@Inproceedings{Japkowicz1995,
  Title                    = {A novelty detection approach to classification},
  Author                   = {Japkowicz, Nathalie and Myers, Catherine and Gluck, Mark and others},
  Booktitle                = {IJCAI},
  Year                     = {1995},
  Pages                    = {518--523},
  Volume                   = {1},

  Abstract                 = {Novelty Detection techniques are concept-learning methods that proceed by recognizing positive instances of a concept rather than differentiating between its positive and negative instances. Novelty Detection approaches consequently require very few, if any, negative training instances. This paper presents a particular Novelty Detection approach to classification that uses a Redundancy Compression and Non-Redundancy Differentiation technique based on the [Gluck & Myers, 1993] model of the hippocampus, a part of the brain critically involved in learning and memory. In particular, this approach consists of training an autoencoder to reconstruct positive input instances at the output layer and then using this autoencoder to recognize novel instances. Classification is possible, after training, because positive instances are expected to be reconstructed accurately while negative instances are not. The purpose of this paper is to compare HIPPO, the system that implements this technique, to C4.5 and feedforward neural network classification
on several applications.

System:

* autoencoder which is trained with positive examples, resulting in specialized autoencoder producing a reconstruction error
* threshold determination component is fed all examples of reconstruction error (from positive and negative examples) and produces discriminator

* noiseless case: reconstruction of positive example always low and of negative always high
* noisy case: reconstruction of positive examples sometimes high and sometimes low for negative examples},
  Owner                    = {jim},
  Timestamp                = {2018.12.08}
}

@InProceedings{Richter2017,
  author    = {Richter, Charles and Roy, Nicholas},
  title     = {Safe visual navigation via deep learning and novelty detection},
  booktitle = {Robotics: Science and Systems},
  year      = {2017},
  publisher = {Robotics: Science and Systems Foundation},
  abstract  = {Robots that use learned perceptual models in the real world must be able to safely handle cases where they are forced to make decisions in scenarios that are unlike any of their training examples. However, state-of-the-art deep learning methods are known to produce erratic or unsafe predictions when faced with novel inputs. Furthermore, recent ensemble, bootstrap and dropout methods for quantifying neural network uncertainty may not efficiently provide accurate uncertainty estimates when queried with inputs that are very different from their training data. Rather than unconditionally trusting the predictions of a neural network for unpredictable real-world data, we use an autoencoder to recognize when a query is novel, and revert to a safe prior behavior. With this capability, we can deploy an autonomous deep learning system in arbitrary environments, without concern for whether it has received the appropriate training. We demonstrate our method with a vision-guided robot that can leverage its deep neural network to navigate 50% faster than a safe baseline policy in familiar types of environments, while reverting to the prior behavior in novel environments so that it can safely collect additional training data and continually improve. A video illustrating our approach is available at: http://groups.csail.mit.edu/rrg/videos/safe visual navigation.

LIDAR: Lidar is a surveying method that measures distance to a target by illuminating the target with pulsed laser light and measuring the reflected pulses with a sensor.
SLAM: Simultaneous localization and mapping

general idea: robot drives in environment, autoencoder says it's novel, robot switches to conservative (hardcoded?) behaviour, robot takes new input and learns new environment self-supervised, robot is able to navigate faster in environment and becomes familiar with it
},
  owner     = {jim},
  timestamp = {2018.12.08},
}

@Article{Blundell2015,
  author      = {Charles Blundell and Julien Cornebise and Koray Kavukcuoglu and Daan Wierstra},
  title       = {Weight Uncertainty in Neural Networks},
  journal     = {arXiv e-prints},
  year        = {2015},
  date        = {2015-05-21},
  eprint      = {1505.05424v2},
  eprintclass = {stat.ML},
  eprinttype  = {arXiv},
  abstract    = {We introduce a new, efficient, principled and backpropagation-compatible algorithm for learning a probability distribution on the weights of a neural network, called Bayes by Backprop. It regularises the weights by minimising a compression cost, known as the variational free energy or the expected lower bound on the marginal likelihood. We show that this principled kind of regularisation yields comparable performance to dropout on MNIST classification. We then demonstrate how the learnt uncertainty in the weights can be used to improve generalisation in non-linear regression problems, and how this weight uncertainty can be used to drive the exploration-exploitation trade-off in reinforcement learning.

comparable performance to dropout
uncertainty in weights - all weights represented by probability distributions over possible values},
  file        = {:http\://arxiv.org/pdf/1505.05424v2:PDF},
  keywords    = {stat.ML, cs.LG},
  owner       = {jim},
  timestamp   = {2019.01.02},
}

@Article{Bishop1994,
  author    = {Bishop, Christopher M.},
  title     = {Novelty detection and neural network validation},
  journal   = {{IEE} Proceedings - Vision, Image, and Signal Processing},
  year      = {1994},
  volume    = {141},
  number    = {4},
  pages     = {217--222},
  doi       = {10.1049/ip-vis:19941330},
  abstract  = {One of the key factors which limits the use of neural networks in many industrial applications has been the difficulty of demonstrating that a trained network will continue to generate reliable outputs once it is in routine use. An important potential source of errors is novel input data; that is, input data which differ significantly from the data used to train the network. The author investigates the relationship between the degree of novelty of input data and the corresponding reliability of the outputs from the network. He describes a quantitative procedure for assessing novelty, and demonstrates its performance by using an application which involves monitoring oil flow in multiphase pipelines.},
  owner     = {jim},
  publisher = {Institution of Engineering and Technology ({IET})},
  timestamp = {2019.01.02},
}

@InProceedings{Gal2016,
  author    = {Yarin Gal and Zoubin Ghahramani},
  title     = {Dropout as a Bayesian Approximation: Representing Model Uncertainty in Deep Learning},
  booktitle = {Proceedings of The 33rd International Conference on Machine Learning},
  year      = {2016},
  date      = {2016-06-20/2016-06-22},
  editor    = {Maria Florina Balcan and Kilian Q. Weinberger},
  volume    = {48},
  series    = {Proceedings of Machine Learning Research},
  publisher = {PMLR},
  pages     = {1050--1059},
  abstract  = {Deep learning tools have gained tremendous attention in applied machine learning. However such tools for regression and classification do not capture model uncertainty. In comparison, Bayesian models offer a mathematically grounded framework to reason about model uncertainty, but usually come with a prohibitive computational cost. In this paper we develop a new theoretical framework casting dropout training in deep neural networks (NNs) as approximate Bayesian inference in deep Gaussian processes. A direct result of this theory gives us tools to model uncertainty with dropout NNs – extracting information from existing models that has been thrown away so far. This mitigates the problem of representing uncertainty in deep learning without sacrificing either computational complexity or test accuracy. We perform an extensive study of the properties of dropout’s uncertainty. Various network architectures and non-linearities are assessed on tasks of regression and classification, using MNIST as an example. We show a considerable improvement in predictive log-likelihood and RMSE compared to existing state-of-the-art methods, and finish by using dropout’s uncertainty in deep reinforcement learning.

dropout as bayesian approximation},
  address   = {New York, New York, USA},
  file      = {gal16.pdf:http\://proceedings.mlr.press/v48/gal16.pdf:PDF},
  owner     = {jim},
  timestamp = {2019.01.02},
}

@PhdThesis{Gal2017,
  author      = {Gal, Y.},
  title       = {Uncertainty in Deep Learning},
  institution = {University of Cambridge},
  year        = {2017},
  owner       = {jim},
  timestamp   = {2019.01.02},
}

@InCollection{Krizhevsky2012,
  author    = {Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E.},
  title     = {ImageNet Classification with Deep Convolutional Neural Networks},
  booktitle = {Advances in Neural Information Processing Systems},
  year      = {2012},
  editor    = {F. Pereira and C. J. C. Burges and L. Bottou and K. Q. Weinberger},
  volume    = {25},
  publisher = {Curran Associates, Inc.},
  pages     = {1097--1105},
  url       = {http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf},
  abstract  = {We trained a large, deep convolutional neural network to classify the 1.2 million high-resolution images in the ImageNet LSVRC-2010 contest into the 1000 different classes. On the test data, we achieved top-1 and top-5 error rates of 37.5%
and 17.0% which is considerably better than the previous state-of-the-art. The neural network, which has 60 million parameters and 650,000 neurons, consists of five convolutional layers, some of which are followed by max-pooling layers, and three fully-connected layers with a final 1000-way softmax. To make training faster, we used non-saturating neurons and a very efficient GPU implementation of the convolution operation. To reduce overfitting in the fully-connected layers we employed a recently-developed regularization method called “dropout” that proved to be very effective. We also entered a variant of this model in the ILSVRC-2012 competition and achieved a winning top-5 test error rate of 15.3%, compared to 26.2% achieved by the second-best entry.},
  owner     = {jim},
  timestamp = {2019.01.02},
}

@InCollection{Lakshminarayanan2017,
  author        = {Lakshminarayanan, Balaji and Pritzel, Alexander and Blundell, Charles},
  title         = {Simple and Scalable Predictive Uncertainty Estimation using Deep Ensembles},
  booktitle     = {Advances in Neural Information Processing Systems},
  year          = {2017},
  editor        = {I. Guyon and U. V. Luxburg and S. Bengio and H. Wallach and R. Fergus and S. Vishwanathan and R. Garnett},
  volume        = {30},
  publisher     = {Curran Associates, Inc.},
  pages         = {6402--6413},
  url           = {http://papers.nips.cc/paper/7219-simple-and-scalable-predictive-uncertainty-estimation-using-deep-ensembles.pdf},
  __markedentry = {[jim:1]},
  abstract      = {Deep neural networks (NNs) are powerful black box predictors that have recently achieved impressive performance on a wide spectrum of tasks. Quantifying predictive uncertainty in NNs is a challenging and yet unsolved problem. Bayesian NNs, which learn a distribution over weights, are currently the state-of-the-art for estimating predictive uncertainty; however these require significant modifications to the training procedure and are computationally expensive compared to standard (non-Bayesian) NNs. We propose an alternative to Bayesian NNs that is simple to implement, readily parallelizable, requires very little hyperparameter tuning, and yields high quality predictive uncertainty estimates. Through a series of experiments on classification and regression benchmarks, we demonstrate that our method produces well-calibrated uncertainty estimates which are as good or better than approximate Bayesian NNs. To assess robustness to dataset shift, we evaluate the predictive uncertainty on test examples from known and unknown distributions, and show that our method is able to express higher uncertainty on out-of-distribution examples. We demonstrate the scalability of our method by evaluating predictive uncertainty estimates on ImageNet.},
  owner         = {jim},
  timestamp     = {2019.01.02},
}

@InProceedings{Thompson2002,
  author    = {Thompson, B. B. and Marks, R. J. and Choi, J. J. and El-Sharkawi, M. A. and Huang, Ming-Yuh and Bunje, C.},
  title     = {Implicit learning in autoencoder novelty assessment},
  booktitle = {{IJCNN}},
  year      = {2002},
  volume    = {3},
  publisher = {{IEEE}},
  pages     = {2878--2883},
  doi       = {10.1109/ijcnn.2002.1007605},
  abstract  = {When the situation arises that only "normal" behavior is known about a system, it is desirable to develop a system based solely on that behavior which enables the user to determine when that system behavior falls outside of that range of normality. A new method is proposed for detecting such novel behavior through the use of autoassociative neural network encoders, which can be shown to implicitly learn the nature of the underlying "normal" system behavior.},
  owner     = {jim},
  timestamp = {2019.01.02},
}

@InProceedings{Ilg2018,
  author    = {Eddy Ilg and Özgün {\c{C}}i{\c{c}}ek and Silvio Galesso and Aaron Klein and Osama Makansi and Frank Hutter and Thomas Brox},
  title     = {Uncertainty Estimates and Multi-hypotheses Networks for Optical Flow},
  booktitle = {Computer Vision {\textendash} {ECCV} 2018},
  year      = {2018},
  editor    = {Ferrari, V. and Hebert, M. and Sminchisescu, C. and Weiss, Y.},
  publisher = {Springer, Cham},
  pages     = {677--693},
  doi       = {10.1007/978-3-030-01234-2_40},
  owner     = {jim},
  timestamp = {2019.01.03},
}

@InCollection{Sensoy2018,
  author    = {Sensoy, Murat and Kaplan, Lance and Kandemir, Melih},
  title     = {Evidential Deep Learning to Quantify Classification Uncertainty},
  booktitle = {Advances in Neural Information Processing Systems},
  year      = {2018},
  editor    = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},
  volume    = {31},
  publisher = {Curran Associates, Inc.},
  pages     = {3183--3193},
  url       = {http://papers.nips.cc/paper/7580-evidential-deep-learning-to-quantify-classification-uncertainty.pdf},
  abstract  = {Deterministic neural nets have been shown to learn effective predictors on a wide range of machine learning problems. However, as the standard approach is to train the network to minimize a prediction loss, the resultant model remains ignorant to its prediction confidence. Orthogonally to Bayesian neural nets that indirectly infer prediction uncertainty through weight uncertainties, we propose explicit modeling of the same using the theory of subjective logic. By placing a Dirichlet distribution on the class probabilities, we treat predictions of a neural net as subjective opinions and learn the function that collects the evidence leading to these opinions by a deterministic neural net from data. The resultant predictor for a multi-class classification problem is another Dirichlet distribution whose parameters are set by the continuous output of a neural net. We provide a preliminary analysis on how the peculiarities of our new loss function drive improved uncertainty estimation. We observe that our method achieves unprecedented success on detection of out-of-distribution queries and endurance against adversarial perturbations.},
  owner     = {jim},
  timestamp = {2019.01.03},
}

@InProceedings{Wu2019,
  author        = {Anqi Wu and Sebastian Nowozin and Edward Meeds and Richard E. Turner and José Miguel Hernández-Lobato and Alexander L. Gaunt},
  title         = {Deterministic Variational Inference for Robust Bayesian Neural Networks},
  booktitle     = {International Conference on Learning Representations},
  year          = {2019},
  url           = {https://openreview.net/forum?id=B1l08oAct7},
  urldate       = {2019-01-03},
  __markedentry = {[jim:1]},
  abstract      = {Bayesian neural networks (BNNs) hold great promise as a flexible and principled solution to deal with uncertainty when learning from finite data. Among approaches to realize probabilistic inference in deep neural networks, variational Bayes (VB) is theoretically grounded, generally applicable, and computationally efficient. With wide recognition of potential advantages, why is it that variational Bayes has seen very limited practical use for BNNs in real applications? We argue that variational inference in neural networks is fragile: successful implementations require careful initialization and tuning of prior variances, as well as controlling the variance of Monte Carlo gradient estimates. We provide two innovations that aim to turn VB into a robust inference tool for Bayesian neural networks: first, we introduce a novel deterministic method to approximate moments in neural networks, eliminating gradient variance; second, we introduce a hierarchical prior for parameters and a novel Empirical Bayes procedure for automatically selecting prior variances. Combining these two innovations, the resulting method is highly efficient and robust. On the application of heteroscedastic regression we demonstrate good predictive performance over alternative approaches.},
  owner         = {jim},
  timestamp     = {2019.01.03},
}

@Article{Geifman2018,
  author        = {Yonatan Geifman and Guy Uziel and Ran El-Yaniv},
  title         = {Bias-Reduced Uncertainty Estimation for Deep Neural Classifiers},
  journal       = {arXiv e-prints},
  date          = {2018-09-30},
  eprint        = {1805.08206v3},
  eprintclass   = {cs.LG},
  eprinttype    = {arXiv},
  __markedentry = {[jim:1]},
  abstract      = {We consider the problem of uncertainty estimation in the context of (non-Bayesian) deep neural classification. In this context, all known methods are based on extracting uncertainty signals from a trained network optimized to solve the classification problem at hand. We demonstrate that such techniques tend to introduce biased estimates for instances whose predictions are supposed to be highly confident. We argue that this deficiency is an artifact of the dynamics of training with SGD-like optimizers, and it has some properties similar to overfitting. Based on this observation, we develop an uncertainty estimation algorithm that selectively estimates the uncertainty of highly confident points, using earlier snapshots of the trained model, before their estimates are jittered (and way before they are ready for actual classification). We present extensive experiments indicating that the proposed algorithm provides uncertainty estimates that are consistently better than all known methods.},
  file          = {:http\://arxiv.org/pdf/1805.08206v3:PDF},
  keywords      = {cs.LG, stat.ML},
  owner         = {jim},
  timestamp     = {2019.01.03},
}

@Article{Mukhoti2018,
  author      = {Jishnu Mukhoti and Yarin Gal},
  title       = {Evaluating Bayesian Deep Learning Methods for Semantic Segmentation},
  journal     = {arXiv e-prints},
  date        = {2018-11-30},
  eprint      = {1811.12709v1},
  eprintclass = {cs.CV},
  eprinttype  = {arXiv},
  abstract    = {Deep learning has been revolutionary for computer vision and semantic segmentation in particular, with Bayesian Deep Learning (BDL) used to obtain uncertainty maps from deep models when predicting semantic classes. This information is critical when using semantic segmentation for autonomous driving for example. Standard semantic segmentation systems have well-established evaluation metrics. However, with BDL's rising popularity in computer vision we require new metrics to evaluate whether a BDL method produces better uncertainty estimates than another method. In this work we propose three such metrics to evaluate BDL models designed specifically for the task of semantic segmentation. We modify DeepLab-v3+, one of the state-of-the-art deep neural networks, and create its Bayesian counterpart using MC dropout and Concrete dropout as inference techniques. We then compare and test these two inference techniques on the well-known Cityscapes dataset using our suggested metrics. Our results provide new benchmarks for researchers to compare and evaluate their improved uncertainty quantification in pursuit of safer semantic segmentation.},
  file        = {:http\://arxiv.org/pdf/1811.12709v1:PDF},
  keywords    = {cs.CV},
  owner       = {jim},
  timestamp   = {2019.01.03},
}

@Article{Jankowiak2018,
  author      = {Martin Jankowiak},
  title       = {Closed Form Variational Objectives For Bayesian Neural Networks with a Single Hidden Layer},
  journal     = {arXiv e-prints},
  date        = {2018-12-02},
  eprint      = {1811.00686v2},
  eprintclass = {stat.ML},
  eprinttype  = {arXiv},
  abstract    = {In this note we consider setups in which variational objectives for Bayesian neural networks can be computed in closed form. In particular we focus on single-layer networks in which the activation function is piecewise polynomial (e.g. ReLU). In this case we show that for a Normal likelihood and structured Normal variational distributions one can compute a variational lower bound in closed form. In addition we compute the predictive mean and variance in closed form. Finally, we also show how to compute approximate lower bounds for other likelihoods (e.g. softmax classification). In experiments we show how the resulting variational objectives can help improve training and provide fast test time predictions.},
  file        = {:http\://arxiv.org/pdf/1811.00686v2:PDF},
  keywords    = {stat.ML, cs.LG},
  owner       = {jim},
  timestamp   = {2019.01.03},
}

@Comment{jabref-meta: databaseType:biblatex;}