masterthesis-latex/ma.bib

598 lines
60 KiB
BibTeX
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

% Encoding: UTF-8
@InProceedings{Japkowicz1995,
author = {Japkowicz, Nathalie and Myers, Catherine and Gluck, Mark and others},
title = {A novelty detection approach to classification},
booktitle = {IJCAI},
year = {1995},
volume = {1},
pages = {518--523},
abstract = {Novelty Detection techniques are concept-learning methods that proceed by recognizing positive instances of a concept rather than differentiating between its positive and negative instances. Novelty Detection approaches consequently require very few, if any, negative training instances. This paper presents a particular Novelty Detection approach to classification that uses a Redundancy Compression and Non-Redundancy Differentiation technique based on the [Gluck & Myers, 1993] model of the hippocampus, a part of the brain critically involved in learning and memory. In particular, this approach consists of training an autoencoder to reconstruct positive input instances at the output layer and then using this autoencoder to recognize novel instances. Classification is possible, after training, because positive instances are expected to be reconstructed accurately while negative instances are not. The purpose of this paper is to compare HIPPO, the system that implements this technique, to C4.5 and feedforward neural network classification
on several applications.
System:
* autoencoder which is trained with positive examples, resulting in specialized autoencoder producing a reconstruction error
* threshold determination component is fed all examples of reconstruction error (from positive and negative examples) and produces discriminator
* noiseless case: reconstruction of positive example always low and of negative always high
* noisy case: reconstruction of positive examples sometimes high and sometimes low for negative examples},
file = {:/home/jim/Documents/Studium/MA/Literatur/01_novelty-detection-classification_japkowicz.pdf:PDF},
owner = {jim},
timestamp = {2018.12.08},
}
@InProceedings{Richter2017,
author = {Richter, Charles and Roy, Nicholas},
title = {Safe visual navigation via deep learning and novelty detection},
booktitle = {Robotics: Science and Systems},
year = {2017},
publisher = {Robotics: Science and Systems Foundation},
abstract = {Robots that use learned perceptual models in the real world must be able to safely handle cases where they are forced to make decisions in scenarios that are unlike any of their training examples. However, state-of-the-art deep learning methods are known to produce erratic or unsafe predictions when faced with novel inputs. Furthermore, recent ensemble, bootstrap and dropout methods for quantifying neural network uncertainty may not efficiently provide accurate uncertainty estimates when queried with inputs that are very different from their training data. Rather than unconditionally trusting the predictions of a neural network for unpredictable real-world data, we use an autoencoder to recognize when a query is novel, and revert to a safe prior behavior. With this capability, we can deploy an autonomous deep learning system in arbitrary environments, without concern for whether it has received the appropriate training. We demonstrate our method with a vision-guided robot that can leverage its deep neural network to navigate 50% faster than a safe baseline policy in familiar types of environments, while reverting to the prior behavior in novel environments so that it can safely collect additional training data and continually improve. A video illustrating our approach is available at: http://groups.csail.mit.edu/rrg/videos/safe visual navigation.
LIDAR: Lidar is a surveying method that measures distance to a target by illuminating the target with pulsed laser light and measuring the reflected pulses with a sensor.
SLAM: Simultaneous localization and mapping
general idea: robot drives in environment, autoencoder says it's novel, robot switches to conservative (hardcoded?) behaviour, robot takes new input and learns new environment self-supervised, robot is able to navigate faster in environment and becomes familiar with it
},
file = {:/home/jim/Documents/Studium/MA/Literatur/02_safe-visual-navigation_richter-roy.pdf:PDF},
owner = {jim},
timestamp = {2018.12.08},
}
@Article{Blundell2015,
author = {Charles Blundell and Julien Cornebise and Koray Kavukcuoglu and Daan Wierstra},
title = {Weight Uncertainty in Neural Networks},
journal = {arXiv preprint},
year = {2015},
date = {2015-05-21},
eprint = {1505.05424v2},
eprintclass = {stat.ML},
eprinttype = {arXiv},
abstract = {We introduce a new, efficient, principled and backpropagation-compatible algorithm for learning a probability distribution on the weights of a neural network, called Bayes by Backprop. It regularises the weights by minimising a compression cost, known as the variational free energy or the expected lower bound on the marginal likelihood. We show that this principled kind of regularisation yields comparable performance to dropout on MNIST classification. We then demonstrate how the learnt uncertainty in the weights can be used to improve generalisation in non-linear regression problems, and how this weight uncertainty can be used to drive the exploration-exploitation trade-off in reinforcement learning.
comparable performance to dropout
uncertainty in weights - all weights represented by probability distributions over possible values},
file = {:/home/jim/Documents/Studium/MA/Literatur/04_weight-uncertainty_blundell.pdf:PDF},
keywords = {stat.ML, cs.LG},
owner = {jim},
timestamp = {2019.01.02},
}
@Article{Bishop1994,
author = {Bishop, Christopher M.},
title = {Novelty detection and neural network validation},
journal = {{IEE} Proceedings - Vision, Image, and Signal Processing},
year = {1994},
volume = {141},
number = {4},
pages = {217--222},
doi = {10.1049/ip-vis:19941330},
abstract = {One of the key factors which limits the use of neural networks in many industrial applications has been the difficulty of demonstrating that a trained network will continue to generate reliable outputs once it is in routine use. An important potential source of errors is novel input data; that is, input data which differ significantly from the data used to train the network. The author investigates the relationship between the degree of novelty of input data and the corresponding reliability of the outputs from the network. He describes a quantitative procedure for assessing novelty, and demonstrates its performance by using an application which involves monitoring oil flow in multiphase pipelines.},
file = {:/home/jim/Documents/Studium/MA/Literatur/03_Bishop-Novelty-Detection_IEE-Proceedings-94b.pdf:PDF},
owner = {jim},
publisher = {Institution of Engineering and Technology ({IET})},
timestamp = {2019.01.02},
}
@InProceedings{Gal2016,
author = {Yarin Gal and Zoubin Ghahramani},
title = {Dropout as a Bayesian Approximation: Representing Model Uncertainty in Deep Learning},
booktitle = {Proceedings of The 33rd International Conference on Machine Learning},
year = {2016},
date = {2016-06-20/2016-06-22},
editor = {Maria Florina Balcan and Kilian Q. Weinberger},
volume = {48},
series = {Proceedings of Machine Learning Research},
publisher = {PMLR},
pages = {1050--1059},
abstract = {Deep learning tools have gained tremendous attention in applied machine learning. However such tools for regression and classification do not capture model uncertainty. In comparison, Bayesian models offer a mathematically grounded framework to reason about model uncertainty, but usually come with a prohibitive computational cost. In this paper we develop a new theoretical framework casting dropout training in deep neural networks (NNs) as approximate Bayesian inference in deep Gaussian processes. A direct result of this theory gives us tools to model uncertainty with dropout NNs extracting information from existing models that has been thrown away so far. This mitigates the problem of representing uncertainty in deep learning without sacrificing either computational complexity or test accuracy. We perform an extensive study of the properties of dropouts uncertainty. Various network architectures and non-linearities are assessed on tasks of regression and classification, using MNIST as an example. We show a considerable improvement in predictive log-likelihood and RMSE compared to existing state-of-the-art methods, and finish by using dropouts uncertainty in deep reinforcement learning.
dropout as bayesian approximation},
address = {New York, New York, USA},
file = {:/home/jim/Documents/Studium/MA/Literatur/05_dropout-bayesian-approximation_gal-ghahramani.pdf:PDF},
owner = {jim},
timestamp = {2019.01.02},
}
@PhdThesis{Gal2017,
author = {Gal, Y.},
title = {Uncertainty in Deep Learning},
institution = {University of Cambridge},
year = {2017},
file = {:/home/jim/Documents/Studium/MA/Literatur/06_uncertainty-deep-learning-phd-thesis.pdf:PDF},
owner = {jim},
timestamp = {2019.01.02},
}
@InCollection{Krizhevsky2012,
author = {Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E.},
title = {ImageNet Classification with Deep Convolutional Neural Networks},
booktitle = {Advances in Neural Information Processing Systems},
year = {2012},
editor = {F. Pereira and C. J. C. Burges and L. Bottou and K. Q. Weinberger},
volume = {25},
publisher = {Curran Associates, Inc.},
pages = {1097--1105},
url = {http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf},
abstract = {We trained a large, deep convolutional neural network to classify the 1.2 million high-resolution images in the ImageNet LSVRC-2010 contest into the 1000 different classes. On the test data, we achieved top-1 and top-5 error rates of 37.5%
and 17.0% which is considerably better than the previous state-of-the-art. The neural network, which has 60 million parameters and 650,000 neurons, consists of five convolutional layers, some of which are followed by max-pooling layers, and three fully-connected layers with a final 1000-way softmax. To make training faster, we used non-saturating neurons and a very efficient GPU implementation of the convolution operation. To reduce overfitting in the fully-connected layers we employed a recently-developed regularization method called “dropout” that proved to be very effective. We also entered a variant of this model in the ILSVRC-2012 competition and achieved a winning top-5 test error rate of 15.3%, compared to 26.2% achieved by the second-best entry.},
file = {:/home/jim/Documents/Studium/MA/Literatur/07_imagenet-classification-with-deep-convolutional-neural-networks_krizhevsky.pdf:PDF},
owner = {jim},
timestamp = {2019.01.02},
}
@InCollection{Lakshminarayanan2017,
author = {Lakshminarayanan, Balaji and Pritzel, Alexander and Blundell, Charles},
title = {Simple and Scalable Predictive Uncertainty Estimation using Deep Ensembles},
booktitle = {Advances in Neural Information Processing Systems},
year = {2017},
editor = {I. Guyon and U. V. Luxburg and S. Bengio and H. Wallach and R. Fergus and S. Vishwanathan and R. Garnett},
volume = {30},
publisher = {Curran Associates, Inc.},
pages = {6402--6413},
url = {http://papers.nips.cc/paper/7219-simple-and-scalable-predictive-uncertainty-estimation-using-deep-ensembles.pdf},
abstract = {Deep neural networks (NNs) are powerful black box predictors that have recently achieved impressive performance on a wide spectrum of tasks. Quantifying predictive uncertainty in NNs is a challenging and yet unsolved problem. Bayesian NNs, which learn a distribution over weights, are currently the state-of-the-art for estimating predictive uncertainty; however these require significant modifications to the training procedure and are computationally expensive compared to standard (non-Bayesian) NNs. We propose an alternative to Bayesian NNs that is simple to implement, readily parallelizable, requires very little hyperparameter tuning, and yields high quality predictive uncertainty estimates. Through a series of experiments on classification and regression benchmarks, we demonstrate that our method produces well-calibrated uncertainty estimates which are as good or better than approximate Bayesian NNs. To assess robustness to dataset shift, we evaluate the predictive uncertainty on test examples from known and unknown distributions, and show that our method is able to express higher uncertainty on out-of-distribution examples. We demonstrate the scalability of our method by evaluating predictive uncertainty estimates on ImageNet.},
file = {:/home/jim/Documents/Studium/MA/Literatur/08_simple-and-scalable-predictive-uncertainty-estimation-using-deep-ensembles_lakshminarayanan.pdf:PDF},
owner = {jim},
timestamp = {2019.01.02},
}
@InProceedings{Thompson2002,
author = {Thompson, B. B. and Marks, R. J. and Choi, J. J. and El-Sharkawi, M. A. and Huang, Ming-Yuh and Bunje, C.},
title = {Implicit learning in autoencoder novelty assessment},
booktitle = {{IJCNN}},
year = {2002},
volume = {3},
publisher = {{IEEE}},
pages = {2878--2883},
doi = {10.1109/ijcnn.2002.1007605},
abstract = {When the situation arises that only "normal" behavior is known about a system, it is desirable to develop a system based solely on that behavior which enables the user to determine when that system behavior falls outside of that range of normality. A new method is proposed for detecting such novel behavior through the use of autoassociative neural network encoders, which can be shown to implicitly learn the nature of the underlying "normal" system behavior.},
file = {:/home/jim/Documents/Studium/MA/Literatur/09_implicit-learning-autoencoder-novelty-assenssment_thompson.pdf:PDF},
owner = {jim},
timestamp = {2019.01.02},
}
@InProceedings{Ilg2018,
author = {Eddy Ilg and Özgün {\c{C}}i{\c{c}}ek and Silvio Galesso and Aaron Klein and Osama Makansi and Frank Hutter and Thomas Brox},
title = {Uncertainty Estimates and Multi-hypotheses Networks for Optical Flow},
booktitle = {Computer Vision {\textendash} {ECCV} 2018},
year = {2018},
editor = {Ferrari, V. and Hebert, M. and Sminchisescu, C. and Weiss, Y.},
publisher = {Springer, Cham},
pages = {677--693},
doi = {10.1007/978-3-030-01234-2_40},
file = {:/home/jim/Documents/Studium/MA/Literatur/10_uncertainty-estimates-multi-hypotheses-networks-optical-flow.pdf:PDF},
owner = {jim},
timestamp = {2019.01.03},
}
@InCollection{Sensoy2018,
author = {Sensoy, Murat and Kaplan, Lance and Kandemir, Melih},
title = {Evidential Deep Learning to Quantify Classification Uncertainty},
booktitle = {Advances in Neural Information Processing Systems},
year = {2018},
editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},
volume = {31},
publisher = {Curran Associates, Inc.},
pages = {3183--3193},
url = {http://papers.nips.cc/paper/7580-evidential-deep-learning-to-quantify-classification-uncertainty.pdf},
abstract = {Deterministic neural nets have been shown to learn effective predictors on a wide range of machine learning problems. However, as the standard approach is to train the network to minimize a prediction loss, the resultant model remains ignorant to its prediction confidence. Orthogonally to Bayesian neural nets that indirectly infer prediction uncertainty through weight uncertainties, we propose explicit modeling of the same using the theory of subjective logic. By placing a Dirichlet distribution on the class probabilities, we treat predictions of a neural net as subjective opinions and learn the function that collects the evidence leading to these opinions by a deterministic neural net from data. The resultant predictor for a multi-class classification problem is another Dirichlet distribution whose parameters are set by the continuous output of a neural net. We provide a preliminary analysis on how the peculiarities of our new loss function drive improved uncertainty estimation. We observe that our method achieves unprecedented success on detection of out-of-distribution queries and endurance against adversarial perturbations.},
file = {:/home/jim/Documents/Studium/MA/Literatur/11_evidential-deep-learning-to-quantify-classification-uncertainty_sensoy.pdf:PDF},
owner = {jim},
timestamp = {2019.01.03},
}
@InProceedings{Wu2019,
author = {Anqi Wu and Sebastian Nowozin and Edward Meeds and Richard E. Turner and José Miguel Hernández-Lobato and Alexander L. Gaunt},
title = {Deterministic Variational Inference for Robust Bayesian Neural Networks},
booktitle = {International Conference on Learning Representations},
year = {2019},
url = {https://openreview.net/forum?id=B1l08oAct7},
urldate = {2019-01-03},
abstract = {Bayesian neural networks (BNNs) hold great promise as a flexible and principled solution to deal with uncertainty when learning from finite data. Among approaches to realize probabilistic inference in deep neural networks, variational Bayes (VB) is theoretically grounded, generally applicable, and computationally efficient. With wide recognition of potential advantages, why is it that variational Bayes has seen very limited practical use for BNNs in real applications? We argue that variational inference in neural networks is fragile: successful implementations require careful initialization and tuning of prior variances, as well as controlling the variance of Monte Carlo gradient estimates. We provide two innovations that aim to turn VB into a robust inference tool for Bayesian neural networks: first, we introduce a novel deterministic method to approximate moments in neural networks, eliminating gradient variance; second, we introduce a hierarchical prior for parameters and a novel Empirical Bayes procedure for automatically selecting prior variances. Combining these two innovations, the resulting method is highly efficient and robust. On the application of heteroscedastic regression we demonstrate good predictive performance over alternative approaches.},
file = {:/home/jim/Documents/Studium/MA/Literatur/12a_deterministic-variational-inference-robust-bayesian-neural-networks_wu.pdf:PDF},
owner = {jim},
timestamp = {2019.01.03},
}
@Article{Geifman2018,
author = {Yonatan Geifman and Guy Uziel and Ran El-Yaniv},
title = {Bias-Reduced Uncertainty Estimation for Deep Neural Classifiers},
journal = {arXiv preprint},
date = {2018-09-30},
eprint = {1805.08206v3},
eprintclass = {cs.LG},
eprinttype = {arXiv},
abstract = {We consider the problem of uncertainty estimation in the context of (non-Bayesian) deep neural classification. In this context, all known methods are based on extracting uncertainty signals from a trained network optimized to solve the classification problem at hand. We demonstrate that such techniques tend to introduce biased estimates for instances whose predictions are supposed to be highly confident. We argue that this deficiency is an artifact of the dynamics of training with SGD-like optimizers, and it has some properties similar to overfitting. Based on this observation, we develop an uncertainty estimation algorithm that selectively estimates the uncertainty of highly confident points, using earlier snapshots of the trained model, before their estimates are jittered (and way before they are ready for actual classification). We present extensive experiments indicating that the proposed algorithm provides uncertainty estimates that are consistently better than all known methods.},
file = {:/home/jim/Documents/Studium/MA/Literatur/13_bias-reduced-uncertainty-estimation-deep-neural-classifiers_geifman.pdf:PDF},
keywords = {cs.LG, stat.ML},
owner = {jim},
timestamp = {2019.01.03},
}
@Article{Mukhoti2018,
author = {Jishnu Mukhoti and Yarin Gal},
title = {Evaluating Bayesian Deep Learning Methods for Semantic Segmentation},
journal = {arXiv preprint},
date = {2018-11-30},
eprint = {1811.12709v1},
eprintclass = {cs.CV},
eprinttype = {arXiv},
abstract = {Deep learning has been revolutionary for computer vision and semantic segmentation in particular, with Bayesian Deep Learning (BDL) used to obtain uncertainty maps from deep models when predicting semantic classes. This information is critical when using semantic segmentation for autonomous driving for example. Standard semantic segmentation systems have well-established evaluation metrics. However, with BDL's rising popularity in computer vision we require new metrics to evaluate whether a BDL method produces better uncertainty estimates than another method. In this work we propose three such metrics to evaluate BDL models designed specifically for the task of semantic segmentation. We modify DeepLab-v3+, one of the state-of-the-art deep neural networks, and create its Bayesian counterpart using MC dropout and Concrete dropout as inference techniques. We then compare and test these two inference techniques on the well-known Cityscapes dataset using our suggested metrics. Our results provide new benchmarks for researchers to compare and evaluate their improved uncertainty quantification in pursuit of safer semantic segmentation.},
file = {:/home/jim/Documents/Studium/MA/Literatur/16_evaluating-bayesian-deep-learning-methods-semantic-segmentation_mukhoti.pdf:PDF},
keywords = {cs.CV},
owner = {jim},
timestamp = {2019.01.03},
}
@Article{Jankowiak2018,
author = {Martin Jankowiak},
title = {Closed Form Variational Objectives For Bayesian Neural Networks with a Single Hidden Layer},
journal = {arXiv preprint},
date = {2018-12-02},
eprint = {1811.00686v2},
eprintclass = {stat.ML},
eprinttype = {arXiv},
abstract = {In this note we consider setups in which variational objectives for Bayesian neural networks can be computed in closed form. In particular we focus on single-layer networks in which the activation function is piecewise polynomial (e.g. ReLU). In this case we show that for a Normal likelihood and structured Normal variational distributions one can compute a variational lower bound in closed form. In addition we compute the predictive mean and variance in closed form. Finally, we also show how to compute approximate lower bounds for other likelihoods (e.g. softmax classification). In experiments we show how the resulting variational objectives can help improve training and provide fast test time predictions.},
file = {:/home/jim/Documents/Studium/MA/Literatur/14_closed-form-variational-objectives-bayesian-neural-networks-single-hidden-layer_jankowiak.pdf:PDF},
keywords = {stat.ML, cs.LG},
owner = {jim},
timestamp = {2019.01.03},
}
@InCollection{Pomerleau1993,
author = {Pomerleau, Dean A.},
title = {Input reconstruction reliability Estimation},
booktitle = {Advances in Neural Information Processing Systems},
year = {1993},
publisher = {Curran Associates, Inc.},
pages = {279--286},
abstract = {This paper describes a technique called Input Reconstruction Reliability Estimation (IRRE) for determining the response reliability of a restricted class of multi-layer perceptrons (MLPs). The technique uses a network's ability to accurately encode the input pattern in its internal representation as a measure of its reliability. The more accurately a network is able to reconstruct the input pattern from its internal representation, the more reliable the network is considered to be. IRRE is provides a good estimate of the reliability of MLPs trained for autonomous driving. Results are presented in which the reliability estimates provided by IRRE are used to select between networks trained for different driving situations.},
file = {:/home/jim/Documents/Studium/MA/Literatur/15_input-reconstruction-reliability-estimation_pomerleau.pdf:PDF},
owner = {jim},
timestamp = {2019.01.05},
}
@Article{Hodge2004,
author = {Victoria Hodge and Jim Austin},
title = {A Survey of Outlier Detection Methodologies},
journal = {Artificial Intelligence Review},
year = {2004},
volume = {22},
number = {2},
pages = {85--126},
doi = {10.1023/b:aire.0000045502.10941.a9},
abstract = {Outlier detection has been used for centuries to detect and, where appropriate, remove anomalous observations from data. Outliers arise due to mechanical faults, changes in system behaviour, fraudulent behaviour, human error, instrument error or simply through natural deviations in populations. Their detection can identify system faults and fraud before they escalate with potentially catastrophic consequences. It can identify errors and remove their contaminating effect on the data set and as such to purify the data for processing. The original outlier detection methods were arbitrary but now, principled and systematic techniques are used, drawn from the full gamut of Computer Science and Statistics. In this paper, we introduce a survey of contemporary techniques for outlier detection. We identify their respective motivations and distinguish their advantages and disadvantages in a comparative review.},
file = {:/home/jim/Documents/Studium/MA/Literatur/17_survey-outlier-detection-methodologies_hodge.pdf:PDF},
owner = {jim},
publisher = {Springer Nature},
timestamp = {2019.01.05},
}
@InProceedings{Yadav2014,
author = {Balvant Yadav and V. Susheela Devi},
title = {Novelty detection applied to the classification problem using Probabilistic Neural Network},
booktitle = {2014 {IEEE} Symposium on Computational Intelligence and Data Mining ({CIDM})},
year = {2014},
publisher = {{IEEE}},
month = {dec},
doi = {10.1109/cidm.2014.7008677},
file = {:/home/jim/Documents/Studium/MA/Literatur/18_novelty-detection-applied-classification-problem-using-probabilistic-neural-network_yadav.pdf:PDF},
owner = {jim},
timestamp = {2019.01.05},
}
@InCollection{Pidhorskyi2018,
author = {Stanislav Pidhorskyi and Ranya Almohsen and Donald A. Adjeroh and Gianfranco Doretto},
title = {Generative Probabilistic Novelty Detection with Adversarial Autoencoders},
booktitle = {Advances in Neural Information Processing Systems},
year = {2018},
editor = {S. Bengio and H. Wallach and H. Larochelle and K. Grauman and N. Cesa-Bianchi and R. Garnett},
volume = {31},
publisher = {Curran Associates, Curran Associates, Inc.},
pages = {6823--6834},
__markedentry = {[jim:1]},
abstract = {Novelty detection is the problem of identifying whether a new data point is considered to be an inlier or an outlier. We assume that training data is available to describe only the inlier distribution. Recent approaches primarily leverage deep encoder-decoder network architectures to compute a reconstruction error that is used to either compute a novelty score or to train a one-class classifier. While we too leverage a novel network of that kind, we take a probabilistic approach and effectively compute how likely is that a sample was generated by the inlier distribution. We achieve this with two main contributions. First, we make the computation of the novelty probability feasible because we linearize the parameterized manifold capturing the underlying structure of the inlier distribution, and show how the probability factorizes and can be computed with respect to local coordinates of the manifold tangent space. Second, we improved the training of the autoencoder network. An extensive set of results show that the approach achieves state-of-the-art results on several benchmark datasets.},
file = {:/home/jim/Documents/Studium/MA/Literatur/19_generative-probabilistic-novelty-detection-adversarial-autoencoders_pidhorskyi.pdf:PDF},
keywords = {cs.CV},
owner = {jim},
timestamp = {2019.01.05},
}
@InProceedings{Xia2015,
author = {Yan Xia and Xudong Cao and Fang Wen and Gang Hua and Jian Sun},
title = {Learning Discriminative Reconstructions for Unsupervised Outlier Removal},
booktitle = {2015 {IEEE} International Conference on Computer Vision ({ICCV})},
year = {2015},
publisher = {{IEEE}},
month = {dec},
doi = {10.1109/iccv.2015.177},
abstract = {We study the problem of automatically removing outliers from noisy data, with application for removing outlier images from an image collection. We address this problem by utilizing the reconstruction errors of an autoencoder. We observe that when data are reconstructed from low-dimensional representations, the inliers and the outliers can be well separated according to their reconstruction errors. Based on this basic observation, we gradually inject discriminative information in the learning process of an autoencoder to make the inliers and the outliers more separable. Experiments on a variety of image datasets validate our approach.},
file = {:/home/jim/Documents/Studium/MA/Literatur/20_learning-discriminative-reconstructions-for-unsupervised-outlier-removal_xia.pdf:PDF},
owner = {jim},
timestamp = {2019.01.05},
}
@Article{You2017,
author = {Chong You and Daniel P. Robinson and René Vidal},
title = {Provable Self-Representation Based Outlier Detection in a Union of Subspaces},
journal = {arXiv preprint},
date = {2017-04-12},
eprint = {1704.03925v1},
eprintclass = {cs.CV},
eprinttype = {arXiv},
abstract = {Many computer vision tasks involve processing large amounts of data contaminated by outliers, which need to be detected and rejected. While outlier detection methods based on robust statistics have existed for decades, only recently have methods based on sparse and low-rank representation been developed along with guarantees of correct outlier detection when the inliers lie in one or more low-dimensional subspaces. This paper proposes a new outlier detection method that combines tools from sparse representation with random walks on a graph. By exploiting the property that data points can be expressed as sparse linear combinations of each other, we obtain an asymmetric affinity matrix among data points, which we use to construct a weighted directed graph. By defining a suitable Markov Chain from this graph, we establish a connection between inliers/outliers and essential/inessential states of the Markov chain, which allows us to detect outliers by using random walks. We provide a theoretical analysis that justifies the correctness of our method under geometric and connectivity assumptions. Experimental results on image databases demonstrate its superiority with respect to state-of-the-art sparse and low-rank outlier detection methods.},
file = {:/home/jim/Documents/Studium/MA/Literatur/21_provable-self-representation-based-outlier-detection-union-of-subspaces_you.pdf:PDF},
keywords = {cs.CV, stat.ML},
owner = {jim},
timestamp = {2019.01.05},
}
@Article{Pimentel2014,
author = {Pimentel, Marco A. F. and Clifton, David A. and Clifton, Lei and Tarassenko, Lionel},
title = {A review of novelty detection},
journal = {Signal Processing},
year = {2014},
volume = {99},
pages = {215--249},
abstract = {Novelty detection is the task of classifying test data that differ in some respect from the data that are available during training. This may be seen as “one-class classification”, in which a model is constructed to describe “normal” training data. The novelty detection approach is typically used when the quantity of available “abnormal” data is insufficient
to construct explicit models for non-normal classes. Application includes inference in datasets from critical systems, where the quantity of available normal data is very large, such that “normality” may be accurately modelled. In this review we aim to provide an updated and structured investigation of novelty detection research papers that have appeared in the machine learning literature during the last decade.},
file = {:/home/jim/Documents/Studium/MA/Literatur/22_review-novelty-detection_pimentel.pdf:PDF},
owner = {jim},
timestamp = {2019.01.05},
}
@Article{Ravanbakhsh2017,
author = {Mahdyar Ravanbakhsh and Moin Nabi and Enver Sangineto and Lucio Marcenaro and Carlo Regazzoni and Nicu Sebe},
title = {Abnormal Event Detection in Videos using Generative Adversarial Nets},
journal = {arXiv preprint},
date = {2017-08-31},
eprint = {1708.09644v1},
eprintclass = {cs.CV},
eprinttype = {arXiv},
abstract = {In this paper we address the abnormality detection problem in crowded scenes. We propose to use Generative Adversarial Nets (GANs), which are trained using normal frames and corresponding optical-flow images in order to learn an internal representation of the scene normality. Since our GANs are trained with only normal data, they are not able to generate abnormal events. At testing time the real data are compared with both the appearance and the motion representations reconstructed by our GANs and abnormal areas are detected by computing local differences. Experimental results on challenging abnormality detection datasets show the superiority of the proposed method compared to the state of the art in both frame-level and pixel-level abnormality detection tasks.},
file = {:/home/jim/Documents/Studium/MA/Literatur/23_abnormal-event-detection-videos-using-generative-adversarial-nets_ravanbakhsh.pdf:PDF},
keywords = {cs.CV, cs.MM},
owner = {jim},
timestamp = {2019.01.05},
}
@Article{Sabokrou2018,
author = {Mohammad Sabokrou and Mohammad Khalooei and Mahmood Fathy and Ehsan Adeli},
title = {Adversarially Learned One-Class Classifier for Novelty Detection},
journal = {arXiv preprint},
date = {2018-04-24},
eprint = {1802.09088v2},
eprintclass = {cs.CV},
eprinttype = {arXiv},
abstract = {Novelty detection is the process of identifying the observation(s) that differ in some respect from the training observations (the target class). In reality, the novelty class is often absent during training, poorly sampled or not well defined. Therefore, one-class classifiers can efficiently model such problems. However, due to the unavailability of data from the novelty class, training an end-to-end deep network is a cumbersome task. In this paper, inspired by the success of generative adversarial networks for training deep models in unsupervised and semi-supervised settings, we propose an end-to-end architecture for one-class classification. Our architecture is composed of two deep networks, each of which trained by competing with each other while collaborating to understand the underlying concept in the target class, and then classify the testing samples. One network works as the novelty detector, while the other supports it by enhancing the inlier samples and distorting the outliers. The intuition is that the separability of the enhanced inliers and distorted outliers is much better than deciding on the original samples. The proposed framework applies to different related applications of anomaly and outlier detection in images and videos. The results on MNIST and Caltech-256 image datasets, along with the challenging UCSD Ped2 dataset for video anomaly detection illustrate that our proposed method learns the target class effectively and is superior to the baseline and state-of-the-art methods.},
file = {:/home/jim/Documents/Studium/MA/Literatur/24_adversarially-learned-one-class-classifier-novelty-detection_sabokrou.pdf:PDF},
keywords = {cs.CV},
owner = {jim},
timestamp = {2019.01.05},
}
@Article{Makhzani2015,
author = {Alireza Makhzani and Jonathon Shlens and Navdeep Jaitly and Ian Goodfellow and Brendan Frey},
title = {Adversarial Autoencoders},
journal = {arXiv preprint},
date = {2016-05-25},
eprint = {1511.05644v2},
eprintclass = {cs.LG},
eprinttype = {arXiv},
abstract = {In this paper, we propose the "adversarial autoencoder" (AAE), which is a probabilistic autoencoder that uses the recently proposed generative adversarial networks (GAN) to perform variational inference by matching the aggregated posterior of the hidden code vector of the autoencoder with an arbitrary prior distribution. Matching the aggregated posterior to the prior ensures that generating from any part of prior space results in meaningful samples. As a result, the decoder of the adversarial autoencoder learns a deep generative model that maps the imposed prior to the data distribution. We show how the adversarial autoencoder can be used in applications such as semi-supervised classification, disentangling style and content of images, unsupervised clustering, dimensionality reduction and data visualization. We performed experiments on MNIST, Street View House Numbers and Toronto Face datasets and show that adversarial autoencoders achieve competitive results in generative modeling and semi-supervised classification tasks.},
file = {:/home/jim/Documents/Studium/MA/Literatur/25_adversarial-autoencoders_makhzani.pdf:PDF},
keywords = {cs.LG},
owner = {jim},
timestamp = {2019.01.05},
}
@InProceedings{Hasan2016,
author = {Mahmudul Hasan and Jonghyun Choi and Jan Neumann and Amit K. Roy-Chowdhury and Larry S. Davis},
title = {Learning Temporal Regularity in Video Sequences},
booktitle = {2016 {IEEE} Conference on Computer Vision and Pattern Recognition ({CVPR})},
year = {2016},
publisher = {{IEEE}},
doi = {10.1109/cvpr.2016.86},
abstract = {Perceiving meaningful activities in a long video sequence is a challenging problem due to ambiguous definition of 'meaningfulness' as well as clutters in the scene. We approach this problem by learning a generative model for regular motion patterns (termed as regularity) using multiple sources with very limited supervision. Specifically, we propose two methods that are built upon the autoencoders for their ability to work with little to no supervision. We first leverage the conventional handcrafted spatio-temporal local features and learn a fully connected autoencoder on them. Second, we build a fully convolutional feed-forward autoencoder to learn both the local features and the classifiers as an end-to-end learning framework. Our model can capture the regularities from multiple datasets. We evaluate our methods in both qualitative and quantitative ways - showing the learned regularity of videos in various aspects and demonstrating competitive performance on anomaly detection datasets as an application.},
file = {:/home/jim/Documents/Studium/MA/Literatur/26_learning-temporal-regularity-video-sequences_hasan.pdf:PDF},
owner = {jim},
timestamp = {2019.01.05},
}
@Article{Xu2015,
author = {Dan Xu and Elisa Ricci and Yan Yan and Jingkuan Song and Nicu Sebe},
title = {Learning Deep Representations of Appearance and Motion for Anomalous Event Detection},
journal = {arXiv preprint},
date = {2015-10-06},
eprint = {1510.01553v1},
eprintclass = {cs.CV},
eprinttype = {arXiv},
abstract = {We present a novel unsupervised deep learning framework for anomalous event detection in complex video scenes. While most existing works merely use hand-crafted appearance and motion features, we propose Appearance and Motion DeepNet (AMDN) which utilizes deep neural networks to automatically learn feature representations. To exploit the complementary information of both appearance and motion patterns, we introduce a novel double fusion framework, combining both the benefits of traditional early fusion and late fusion strategies. Specifically, stacked denoising autoencoders are proposed to separately learn both appearance and motion features as well as a joint representation (early fusion). Based on the learned representations, multiple one-class SVM models are used to predict the anomaly scores of each input, which are then integrated with a late fusion strategy for final anomaly detection. We evaluate the proposed method on two publicly available video surveillance datasets, showing competitive performance with respect to state of the art approaches.},
file = {:/home/jim/Documents/Studium/MA/Literatur/27_learning-deep-representations-of-appearance-and-motion-for-anomalous-event-detection_xu.pdf:PDF},
keywords = {cs.CV},
owner = {jim},
timestamp = {2019.01.05},
}
@InCollection{Goodfellow2014,
author = {Goodfellow, Ian and Pouget-Abadie, Jean and Mirza, Mehdi and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and Courville, Aaron and Bengio, Yoshua},
title = {Generative adversarial nets},
booktitle = {Advances in Neural Information Processing Systems},
year = {2014},
editor = {Z. Ghahramani and M. Welling and C. Cortes and N. D. Lawrence and K. Q. Weinberger},
volume = {27},
publisher = {Curran Associates, Inc.},
pages = {2672--2680},
abstract = {We propose a new framework for estimating generative models via adversarial nets, in which we simultaneously train two models: a generative model G that captures the data distribution, and a discriminative model D that estimates the probability that a sample came from the training data rather than G. The training procedure for G is to maximize the probability of D making a mistake. This framework corresponds to a minimax two-player game. In the space of arbitrary functions G and D, a unique solution exists, with G recovering the training data distribution and D equal to 1/2 everywhere. In the case where G and D are defined by multilayer perceptrons, the entire system can be trained with backpropagation. There is no need for any Markov chains or unrolled approximate inference networks during either training or generation of samples. Experiments demonstrate the potential of the framework through qualitative and quantitatively evaluation of the generated samples.},
file = {:/home/jim/Documents/Studium/MA/Literatur/28_generative-adversarial-nets_goodfellow.pdf:PDF},
owner = {jim},
timestamp = {2019.01.05},
}
@Article{Wang2018,
author = {Huan-gang Wang and Xin Li and Tao Zhang},
title = {Generative adversarial network based novelty detection using minimized reconstruction error},
journal = {Frontiers of Information Technology {\&} Electronic Engineering},
year = {2018},
volume = {19},
number = {1},
month = {jan},
pages = {116--125},
doi = {10.1631/fitee.1700786},
abstract = {Generative adversarial network (GAN) is the most exciting machine learning breakthrough in recent years, and it trains the learning model by finding the Nash equilibrium of a two-player zero-sum game. GAN is composed of a generator and a discriminator, both trained with the adversarial learning mechanism. In this paper, we introduce and investigate the use of GAN for novelty detection. In training, GAN learns from ordinary data. Then, using previously unknown data, the generator and the discriminator with the designed decision boundaries can both be used to separate novel patterns from ordinary patterns. The proposed GAN-based novelty detection method demonstrates a competitive performance on the MNIST digit database and the Tennessee Eastman (TE) benchmark process compared with the PCA-based novelty detection methods using Hotellings T2 and squared prediction error statistics.},
file = {:/home/jim/Documents/Studium/MA/Literatur/29_generative-adversarial-network-based-novelty-detection-using-minimized-reconstruction-error_wang.pdf:PDF},
owner = {jim},
publisher = {Zhejiang University Press},
timestamp = {2019.01.05},
}
@InProceedings{Fan2017,
author = {Haoqiang Fan and Hao Su and Leonidas Guibas},
title = {A Point Set Generation Network for 3D Object Reconstruction from a Single Image},
booktitle = {2017 {IEEE} Conference on Computer Vision and Pattern Recognition ({CVPR})},
year = {2017},
publisher = {{IEEE}},
month = {jul},
doi = {10.1109/cvpr.2017.264},
abstract = {Generation of 3D data by deep neural networks has been attracting increasing attention in the research community. The majority of extant works resort to regular representations such as volumetric grids or collections of images; however, these representations obscure the natural invariance of 3D shapes under geometric transformations, and also suffer from a number of other issues. In this paper we address the problem of 3D reconstruction from a single image, generating a straight-forward form of output - point cloud coordinates. Along with this problem arises a unique and interesting issue, that the groundtruth shape for an input image may be ambiguous. Driven by this unorthodox output form and the inherent ambiguity in groundtruth, we design architecture, loss function and learning paradigm that are novel and effective. Our final solution is a conditional shape sampler, capable of predicting multiple plausible 3D point clouds from an input image. In experiments not only can our system outperform state-of-the-art methods on single image based 3d reconstruction benchmarks; but it also shows strong performance for 3D shape completion and promising ability in making multiple plausible predictions.},
file = {:/home/jim/Documents/Studium/MA/Literatur/30_a-point-set-generation-network-for-3D-object-reconstruction-from-a-single-image_fan.pdf:PDF},
owner = {jim},
timestamp = {2019.01.05},
}
@InProceedings{Miller2018,
author = {Dimity Miller and Lachlan Nicholson and Feras Dayoub and Niko Sunderhauf},
title = {Dropout Sampling for Robust Object Detection in Open-Set Conditions},
booktitle = {2018 {IEEE} International Conference on Robotics and Automation ({ICRA})},
year = {2018},
publisher = {{IEEE}},
month = {may},
doi = {10.1109/icra.2018.8460700},
__markedentry = {[jim:1]},
abstract = {Dropout Variational Inference, or Dropout Sampling, has been recently proposed as an approximation technique for Bayesian Deep Learning and evaluated for image classification and regression tasks. This paper investigates the utility of Dropout Sampling for object detection for the first time. We demonstrate how label uncertainty can be extracted from a state-of-the-art object detection system via Dropout Sampling. We evaluate this approach on a large synthetic dataset of 30,000 images, and a real-world dataset captured by a mobile robot in a versatile campus environment. We show that this uncertainty can be utilized to increase object detection performance under the open-set conditions that are typically encountered in robotic vision. A Dropout Sampling network is shown to achieve a 12.3 \% increase in recall (for the same precision score as a standard network) and a 15.1 \% increase in precision (for the same recall score as the standard network).},
file = {:/home/jim/Documents/Studium/MA/Literatur/31_dropout-sampling-for-robust-object-detection-in-open-set-conditions_miller.pdf:PDF},
owner = {jim},
timestamp = {2019.01.05},
}
@InProceedings{Charles2017,
author = {R. Qi Charles and Hao Su and Mo Kaichun and Leonidas J. Guibas},
title = {{PointNet}: Deep Learning on Point Sets for 3D Classification and Segmentation},
booktitle = {2017 {IEEE} Conference on Computer Vision and Pattern Recognition ({CVPR})},
year = {2017},
publisher = {{IEEE}},
month = {jul},
doi = {10.1109/cvpr.2017.16},
abstract = {Point cloud is an important type of geometric data structure. Due to its irregular format, most researchers transform such data to regular 3D voxel grids or collections of images. This, however, renders data unnecessarily voluminous and causes issues. In this paper, we design a novel type of neural network that directly consumes point clouds, which well respects the permutation invariance of points in the input. Our network, named PointNet, provides a unified architecture for applications ranging from object classification, part segmentation, to scene semantic parsing. Though simple, PointNet is highly efficient and effective. Empirically, it shows strong performance on par or even better than state of the art. Theoretically, we provide analysis towards understanding of what the network has learnt and why the network is robust with respect to input perturbation and corruption.},
file = {:/home/jim/Documents/Studium/MA/Literatur/32_PointNet_qi.pdf:PDF},
owner = {jim},
timestamp = {2019.01.05},
}
@InProceedings{Qi2018,
author = {Charles R. Qi and Wei Liu and Chenxia Wu and Hao Su and Leonidas J. Guibas},
title = {Frustum {PointNets} for 3D Object Detection from {RGB}-D Data},
booktitle = {2018 {IEEE}/{CVF} Conference on Computer Vision and Pattern Recognition},
year = {2018},
publisher = {{IEEE}},
month = {jun},
doi = {10.1109/cvpr.2018.00102},
abstract = {In this work, we study 3D object detection from RGBD data in both indoor and outdoor scenes. While previous methods focus on images or 3D voxels, often obscuring natural 3D patterns and invariances of 3D data, we directly operate on raw point clouds by popping up RGB-D scans. However, a key challenge of this approach is how to efficiently localize objects in point clouds of large-scale scenes (region proposal). Instead of solely relying on 3D proposals, our method leverages both mature 2D object detectors and advanced 3D deep learning for object localization, achieving efficiency as well as high recall for even small objects. Benefited from learning directly in raw point clouds, our method is also able to precisely estimate 3D bounding boxes even under strong occlusion or with very sparse points. Evaluated on KITTI and SUN RGB-D 3D detection benchmarks, our method outperforms the state of the art by remarkable margins while having real-time capability.},
file = {:/home/jim/Documents/Studium/MA/Literatur/33_FrustumPointNet_qi.pdf:PDF},
owner = {jim},
timestamp = {2019.01.05},
}
@InCollection{Liu2016,
author = {Wei Liu and Dragomir Anguelov and Dumitru Erhan and Christian Szegedy and Scott Reed and Cheng-Yang Fu and Alexander C. Berg},
title = {{SSD}: Single Shot {MultiBox} Detector},
booktitle = {Computer Vision {\textendash} {ECCV} 2016},
year = {2016},
publisher = {Springer International Publishing},
pages = {21--37},
doi = {10.1007/978-3-319-46448-0_2},
abstract = {We present a method for detecting objects in images using a single deep neural network. Our approach, named SSD, discretizes the output space of bounding boxes into a set of default boxes over different aspect ratios and scales per feature map location. At prediction time, the network generates scores for the presence of each object category in each default box and produces adjustments to the box to better match the object shape. Additionally, the network combines predictions from multiple feature maps with different resolutions to naturally handle objects of various sizes. SSD is simple relative to methods that require object proposals because it completely eliminates proposal generation and subsequent pixel or feature resampling stages and encapsulates all computation in a single network. This makes SSD easy to train and straightforward to integrate into systems that require a detection component. Experimental results on the PASCAL VOC, COCO, and ILSVRC datasets confirm that SSD has competitive accuracy to methods that utilize an additional object proposal step and is much faster, while providing a unified framework for both training and inference. For 300×300 input, SSD achieves 74.3 \% mAP on VOC2007 test at 59 FPS on a Nvidia Titan X and for 512×512 input, SSD achieves 76.9 \% mAP, outperforming a comparable state of the art Faster R-CNN model. Compared to other single stage methods, SSD has much better accuracy even with a smaller input image size. Code is available at https://github.com/weiliu89/caffe/tree/ssd.},
file = {:/home/jim/Documents/Studium/MA/Literatur/34_SSD_Liu.pdf:PDF},
owner = {jim},
timestamp = {2019.01.05},
}
@Article{Simonyan2014,
author = {Karen Simonyan and Andrew Zisserman},
title = {Very Deep Convolutional Networks for Large-Scale Image Recognition},
journal = {arXiv preprint},
date = {2014-09-04},
eprint = {http://arxiv.org/abs/1409.1556v6},
eprintclass = {cs.CV},
eprinttype = {arXiv},
abstract = {In this work we investigate the effect of the convolutional network depth on its accuracy in the large-scale image recognition setting. Our main contribution is a thorough evaluation of networks of increasing depth using an architecture with very small (3x3) convolution filters, which shows that a significant improvement on the prior-art configurations can be achieved by pushing the depth to 16-19 weight layers. These findings were the basis of our ImageNet Challenge 2014 submission, where our team secured the first and the second places in the localisation and classification tracks respectively. We also show that our representations generalise well to other datasets, where they achieve state-of-the-art results. We have made our two best-performing ConvNet models publicly available to facilitate further research on the use of deep visual representations in computer vision.},
file = {:/home/jim/Documents/Studium/MA/Literatur/35_VGG.pdf:PDF},
keywords = {cs.CV},
owner = {jim},
timestamp = {2019.02.19},
}
@Article{Friedman1996,
author = {Friedman, Batya and Nissenbaum, Helen},
title = {Bias in Computer Systems},
journal = {{ACM} Transactions on Information Systems},
year = {1996},
volume = {14},
number = {3},
pages = {330--347},
doi = {10.1145/230538.230561},
abstract = {From an analysis of actual cases, three categories of bias in computer systems have been developed: preexisting, technical, and emergent. Preexisting bias has its roots in social institutions, practices, and attitudes. Technical bias arises from technical constraints of considerations. Emergent bias arises in a context of use. Although others have pointed to bias inparticular computer systems and have noted the general problem, we know of no comparable work that examines this phenomenon comprehensively and which offers a framework for understanding and remedying it. We conclude by suggesting that freedom from bias should by counted amoung the select set of criteria—including reliability, accuracy, and efficiency—according to which the quality of systems in use in society should be judged.},
address = {New York, NY, USA},
file = {:/home/jim/Documents/Studium/WS2018_19/IWT/prüfungsrelevant/06_Friedman.pdf:PDF},
owner = {jim},
publisher = {ACM},
timestamp = {2019.02.20},
}
@Report{Diakopoulos2014,
author = {Diakopoulos, Nicholas},
title = {Algorithmic Accountability Reporting: On the Investigation of Black Boxes},
type = {resreport},
institution = {Tow Center for Digital Journalism, Columbia University},
year = {2014},
doi = {10.7916/d8zk5tw2},
abstract = {How can we characterize the power that various algorithms may exert on us? And how can we better understand when algorithms might be wronging us? What should be the role of journalists in holding that power to account? In this report I discuss what algorithms are and how they encode power. I then describe the idea of algorithmic accountability, first examining how algorithms problematize and sometimes stand in tension with transparency. Next, I describe how reverse engineering can provide an alternative way to characterize algorithmic power by delineating a conceptual model that captures different investigative scenarios based on reverse engineering algorithms input-output relationships. I then provide a number of illustrative cases and methodological details on how algorithmic accountability reporting might be realized in practice. I conclude with a discussion about broader issues of human resources, legality, ethics, and transparency.},
file = {:/home/jim/Documents/Studium/MA/Literatur/36_Algorithmic-Accountability.pdf:PDF},
keywords = {Journalism, Journalism--Methodology, Journalism--Data processing},
owner = {jim},
publisher = {Columbia University},
timestamp = {2019.02.20},
}
@InProceedings{McCormac2017,
author = {McCormac, John and Handa, Ankur and Leutenegger, Stefan and Davison, Andrew J.},
title = {{SceneNet} {RGB}-D: Can 5M Synthetic Images Beat Generic {ImageNet} Pre-training on Indoor Segmentation?},
booktitle = {2017 {IEEE} International Conference on Computer Vision ({ICCV})},
year = {2017},
publisher = {{IEEE}},
doi = {10.1109/iccv.2017.292},
__markedentry = {[jim:]},
abstract = {We introduce SceneNet RGB-D, a dataset providingpixel-perfect ground truth for scene understanding prob-lems such as semantic segmentation, instance segmenta-tion, and object detection. It also provides perfect cameraposes and depth data, allowing investigation into geomet-ric computer vision problems such as optical flow, cam-era pose estimation, and 3D scene labelling tasks. Ran-dom sampling permits virtually unlimited scene configu-rations, and here we provide 5M rendered RGB-D im-ages from 16K randomly generated 3D trajectories in syn-thetic layouts, with random but physically simulated ob-ject configurations. We compare the semantic segmenta-tion performance of network weights produced from pre-training on RGB images from our dataset against genericVGG-16 ImageNet weights. After fine-tuning on the SUNRGB-D and NYUv2 real-world datasets we find in bothcases that the synthetically pre-trained network outper-forms the VGG-16 weights. When synthetic pre-trainingincludes a depth channel (something ImageNet cannot na-tively provide) the performance is greater still. This sug-gests that large-scale high-quality synthetic RGB datasetswith task-specific labels can be more useful for pre-training than real-world generic pre-training such as Im-ageNet. We host the dataset at http://robotvault.bitbucket.io/scenenet-rgbd.html.},
file = {:/home/jim/Documents/Studium/MA/Literatur/37_SceneNet.pdf:PDF},
owner = {jim},
timestamp = {2019.02.20},
}
@InCollection{Lin2014,
author = {Tsung-Yi Lin and Michael Maire and Serge Belongie and James Hays and Pietro Perona and Deva Ramanan and Piotr Doll{\'{a}}r and C. Lawrence Zitnick},
title = {Microsoft {COCO}: Common Objects in Context},
booktitle = {Computer Vision {\textendash} {ECCV} 2014},
year = {2014},
publisher = {Springer International Publishing},
pages = {740--755},
doi = {10.1007/978-3-319-10602-1_48},
abstract = {We present a new dataset with the goal of advancing the state-of-the-art in object recognition by placing the question of object recognition in the context of the broader question of scene understanding. This is achieved by gathering images of complex everyday scenes containing common objects in their natural context. Objects are labeled using per-instance segmentations to aid in precise object localization. Our dataset contains photos of 91 objects types that would be easily recognizable by a 4 year old. With a total of 2.5 million labeled instances in 328k images, the creation of our dataset drew upon extensive crowd worker involvement via novel user interfaces for category detection, instance spotting and instance segmentation. We present a detailed statistical analysis of the dataset in comparison to PASCAL, ImageNet, and SUN. Finally, we provide baseline performance analysis for bounding box and segmentation detection results using a Deformable Parts Model.},
file = {:/home/jim/Documents/Studium/MA/Literatur/38_MSCOCO.pdf:PDF},
owner = {jim},
timestamp = {2019.02.20},
}
@Comment{jabref-meta: databaseType:biblatex;}