427 lines
14 KiB
TeX
427 lines
14 KiB
TeX
|
\documentclass{beamer}
|
||
|
\usepackage[T1]{fontenc}
|
||
|
\usepackage[utf8]{inputenc}
|
||
|
\usepackage[english]{babel}
|
||
|
%\usepackage{paralist}
|
||
|
%\useoutertheme{infolines}
|
||
|
\usepackage{graphicx}
|
||
|
\usepackage{hyperref}
|
||
|
\usepackage{listings}
|
||
|
\usepackage{color}
|
||
|
\usepackage{textcomp}
|
||
|
\usepackage{csquotes}
|
||
|
\usetheme{Warsaw}
|
||
|
\usecolortheme{crane}
|
||
|
\pagenumbering{arabic}
|
||
|
\def\thesection{\arabic{section})}
|
||
|
\def\thesubsection{\alph{subsection})}
|
||
|
\def\thesubsubsection{(\roman{subsubsection})}
|
||
|
\setbeamertemplate{navigation symbols}{}
|
||
|
\graphicspath{ {src/} {/home/jim/Pictures/} }
|
||
|
|
||
|
\definecolor{mygreen}{rgb}{0,0.6,0}
|
||
|
\definecolor{mygray}{rgb}{0.5,0.5,0.5}
|
||
|
\definecolor{mymauve}{rgb}{0.58,0,0.82}
|
||
|
|
||
|
\usepackage[
|
||
|
backend=biber,
|
||
|
bibstyle=authoryear,
|
||
|
citestyle=authoryear,
|
||
|
minnames=1,
|
||
|
maxnames=2
|
||
|
]{biblatex}
|
||
|
|
||
|
\addbibresource{ma.bib}
|
||
|
|
||
|
\MakeOuterQuote{"}
|
||
|
|
||
|
%\definecolor{craneorange}{RGB}{61,61,61}
|
||
|
%\definecolor{craneblue}{RGB}{255,255,255}
|
||
|
|
||
|
\lstset{ %
|
||
|
backgroundcolor=\color{white}, % choose the background color; you must add \usepackage{color} or \usepackage{xcolor}
|
||
|
basicstyle=\footnotesize, % the size of the fonts that are used for the code
|
||
|
breakatwhitespace=false, % sets if automatic breaks should only happen at whitespace
|
||
|
breaklines=true, % sets automatic line breaking
|
||
|
captionpos=b, % sets the caption-position to bottom
|
||
|
commentstyle=\color{mygray}, % comment style
|
||
|
deletekeywords={}, % if you want to delete keywords from the given language
|
||
|
escapeinside={\%*}{*)}, % if you want to add LaTeX within your code
|
||
|
extendedchars=true, % lets you use non-ASCII characters; for 8-bits encodings only, does not work with UTF-8
|
||
|
keepspaces=true, % keeps spaces in text, useful for keeping indentation of code (possibly needs columns=flexible)
|
||
|
keywordstyle=\color{blue}, % keyword style
|
||
|
language=PHP, % the language of the code
|
||
|
morekeywords={class, function, return, protected, public, private, const, static, new, extends, namespace, null}, % if you want to add more keywords to the set
|
||
|
numbers=left, % where to put the line-numbers; possible values are (none, left, right)
|
||
|
numbersep=5pt, % how far the line-numbers are from the code
|
||
|
numberstyle=\tiny\color{mygray}, % the style that is used for the line-numbers
|
||
|
rulecolor=\color{black}, % if not set, the frame-color may be changed on line-breaks within not-black text (e.g. comments (green here))
|
||
|
showspaces=false, % show spaces everywhere adding particular underscores; it overrides 'showstringspaces'
|
||
|
showstringspaces=false, % underline spaces within strings only
|
||
|
showtabs=false, % show tabs within strings adding particular underscores
|
||
|
stepnumber=2, % the step between two line-numbers. If it's 1, each line will be numbered
|
||
|
stringstyle=\color{mygreen}, % string literal style
|
||
|
tabsize=2, % sets default tabsize to 2 spaces
|
||
|
title=\lstname % show the filename of files included with \lstinputlisting; also try caption instead of title
|
||
|
}
|
||
|
|
||
|
\hypersetup{
|
||
|
pdfauthor=Jim Martens,
|
||
|
pdfstartview=Fit
|
||
|
}
|
||
|
|
||
|
\expandafter\def\expandafter\insertshorttitle\expandafter{%
|
||
|
\raggedleft \insertframenumber\,/\,\inserttotalframenumber\;}
|
||
|
|
||
|
\begin{document}
|
||
|
\author{Jim 2martens}
|
||
|
\title{Novelty detection for object detection in open set conditions}
|
||
|
\date{October 18th, 2019}
|
||
|
|
||
|
\begin{frame}
|
||
|
\titlepage
|
||
|
\end{frame}
|
||
|
|
||
|
\begin{frame}{Contents}
|
||
|
\tableofcontents
|
||
|
\end{frame}
|
||
|
|
||
|
\section{Motivation}
|
||
|
\begin{frame}{Problem}
|
||
|
\begin{itemize}
|
||
|
\item network trained with one set of objects
|
||
|
\vfill
|
||
|
\item encounters other objects in reality
|
||
|
\vfill
|
||
|
\item cannot properly classify or detect unknown
|
||
|
object types
|
||
|
\end{itemize}
|
||
|
\end{frame}
|
||
|
|
||
|
\begin{frame}{Open Set}
|
||
|
\centering
|
||
|
\begin{figure}[h]
|
||
|
\includegraphics[scale=0.8]{images/open-set.pdf}
|
||
|
\caption{Open set problem: test set contains classes that were not present during training. Resembles figure 1 of \cite{Miller2018}}
|
||
|
\end{figure}
|
||
|
|
||
|
\end{frame}
|
||
|
|
||
|
\begin{frame}{Approaches}
|
||
|
\begin{itemize}
|
||
|
\item somehow detect or identify novel input
|
||
|
\vfill
|
||
|
\item \cite{Pimentel2014} provide overview of different approaches
|
||
|
\vfill
|
||
|
\item will look at reconstruction-based novelty detection
|
||
|
\vfill
|
||
|
\item in particular, dropout sampling
|
||
|
\end{itemize}
|
||
|
\end{frame}
|
||
|
|
||
|
\begin{frame}{Hypothesis}
|
||
|
\centering
|
||
|
Dropout sampling delivers better object detection performance under open set conditions compared to object detection without it.
|
||
|
\end{frame}
|
||
|
|
||
|
\section{Background}
|
||
|
\begin{frame}{Related Works}
|
||
|
\begin{itemize}
|
||
|
\item Bayesian networks can be used to model uncertainty (\cite{Ghahramani2015})
|
||
|
\vfill
|
||
|
\item \cite{Gal2017} shows that dropout training resembles a general approximate Bayesian model
|
||
|
\vfill
|
||
|
\item \cite{Teye2018} argue that most modern networks have adopted other regularisation techniques
|
||
|
\vfill
|
||
|
\item \cite{Ioffe2015} introduce batch normalisation
|
||
|
\end{itemize}
|
||
|
\end{frame}
|
||
|
|
||
|
\section{Methods}
|
||
|
|
||
|
\subsection{Structure}
|
||
|
|
||
|
\begin{frame}{Vanilla SSD}
|
||
|
\centering
|
||
|
\begin{figure}[h]
|
||
|
\includegraphics{images/vanilla-ssd.pdf}
|
||
|
\caption{Vanilla SSD network as defined by \cite{Liu2016}}
|
||
|
\end{figure}
|
||
|
\begin{itemize}
|
||
|
\item detects objects of differing sizes
|
||
|
\end{itemize}
|
||
|
\end{frame}
|
||
|
|
||
|
\begin{frame}{Bayesian SSD}
|
||
|
\centering
|
||
|
\begin{figure}[h]
|
||
|
\includegraphics[scale=0.95]{images/bayesian-ssd.pdf}
|
||
|
\caption{Bayesian SSD network as defined by \cite{Miller2018}}
|
||
|
\end{figure}
|
||
|
\begin{itemize}
|
||
|
\item adds dropout layers and multiple forward passes
|
||
|
\end{itemize}
|
||
|
\end{frame}
|
||
|
|
||
|
\subsection{Decoding Pipelines}
|
||
|
|
||
|
\begin{frame}{Vanilla SSD}
|
||
|
\begin{enumerate}
|
||
|
\item (batch\_size, \#nr\_boxes, \#nr\_classes + 12)
|
||
|
\vfill
|
||
|
\item transforming relative into absolute coordinates
|
||
|
\vfill
|
||
|
\item per-class confidence threshold
|
||
|
\vfill
|
||
|
\item per-class non-maximum suppression
|
||
|
\vfill
|
||
|
\item top \(k\)
|
||
|
\end{enumerate}
|
||
|
\end{frame}
|
||
|
|
||
|
\begin{frame}{Vanilla SSD with Entropy Threshold}
|
||
|
\begin{enumerate}
|
||
|
\item (batch\_size, \#nr\_boxes, \#nr\_classes + 12)
|
||
|
\vfill
|
||
|
\item transforming relative into absolute coordinates
|
||
|
\vfill
|
||
|
\item \textbf{entropy thresholding}
|
||
|
\vfill
|
||
|
\item per-class confidence threshold
|
||
|
\vfill
|
||
|
\item per-class non-maximum suppression
|
||
|
\vfill
|
||
|
\item top \(k\)
|
||
|
\end{enumerate}
|
||
|
\end{frame}
|
||
|
|
||
|
\begin{frame}{Bayesian SSD}
|
||
|
\begin{enumerate}
|
||
|
\item (batch\_size, \#nr\_boxes \(\cdot\) \#nr\_forward\_passes, \#nr\_classes + 12)
|
||
|
\vfill
|
||
|
\item transforming relative into absolute coordinates
|
||
|
\vfill
|
||
|
\item confidence threshold for class with highest confidence
|
||
|
\vfill
|
||
|
\item discard predictions with background 0.8 or higher
|
||
|
\vfill
|
||
|
\item partition into observations
|
||
|
\vfill
|
||
|
\item entropy thresholding
|
||
|
\vfill
|
||
|
\item per-class confidence threshold
|
||
|
\vfill
|
||
|
\item per-class non-maximum suppression
|
||
|
\vfill
|
||
|
\item top \(k\)
|
||
|
\end{enumerate}
|
||
|
\end{frame}
|
||
|
|
||
|
\section{Results and Interpretation}
|
||
|
|
||
|
\subsection{Data}
|
||
|
|
||
|
\begin{frame}{MS COCO}
|
||
|
\begin{table}[htbp]
|
||
|
\centering
|
||
|
\begin{tabular}{rcc}
|
||
|
\hline
|
||
|
class & number of & percentage of \\
|
||
|
& detections & total \\
|
||
|
\hline
|
||
|
total & 31,991 & 100\% \\
|
||
|
persons & 10,988 & 34.3\% \\
|
||
|
cars & 1,932 & 6\% \\
|
||
|
chairs & 1,791 & 5.6\% \\
|
||
|
bottles & 1,021 & 3.2\% \\
|
||
|
cups & 898 & 2.8\% \\
|
||
|
\hline
|
||
|
\end{tabular}
|
||
|
|
||
|
\caption{Number of ground truth detections per class (top 5).}
|
||
|
\end{table}
|
||
|
\end{frame}
|
||
|
|
||
|
\subsection{Results}
|
||
|
|
||
|
\begin{frame}[allowframebreaks]{Micro Averaging}
|
||
|
|
||
|
\begin{table}[ht]
|
||
|
\begin{tabular}{rcccc}
|
||
|
\hline
|
||
|
& max & abs OSE & Recall & Precision\\
|
||
|
& \(F_1\) Score & \multicolumn{3}{c}{at max \(F_1\) point} \\
|
||
|
\hline
|
||
|
0.01 conf & 0.255 & 3176 & 0.214 & 0.318 \\
|
||
|
0.2 conf & \textbf{0.376} & 2939 & \textbf{0.382} & 0.372 \\
|
||
|
entropy - 0.01 conf & 0.255 & 3168 & 0.214 & 0.318 \\
|
||
|
% entropy} thresh: 2.4 for vanilla} SSD} is best
|
||
|
\hline
|
||
|
\end{tabular}
|
||
|
\caption{Vanilla SSD rounded results for micro averaging.}
|
||
|
\end{table}
|
||
|
|
||
|
\begin{table}[ht]
|
||
|
\begin{tabular}{rcccc}
|
||
|
\hline
|
||
|
& max & abs OSE & Recall & Precision\\
|
||
|
& \(F_1\) Score & \multicolumn{3}{c}{at max \(F_1\) point} \\
|
||
|
\hline
|
||
|
no DO - 0.2 conf & 0.209 & 2709 & 0.300 & 0.161 \\
|
||
|
no DO - 0.2 conf - NMS& 0.371 & \textbf{2335} & 0.365 & \textbf{0.378} \\
|
||
|
0.9 keep - 0.2 conf - NMS & 0.359 & 2584 & 0.363 & 0.357 \\
|
||
|
0.5 keep - 0.2 conf - NMS & 0.325 & 2759 & 0.342 & 0.311 \\
|
||
|
\hline
|
||
|
\end{tabular}
|
||
|
\caption{Bayesian SSD rounded results for micro averaging. All variants use 10 forward passes.}
|
||
|
\end{table}
|
||
|
\end{frame}
|
||
|
|
||
|
|
||
|
\begin{frame}[allowframebreaks]{Macro Averaging}
|
||
|
|
||
|
\begin{table}[ht]
|
||
|
\begin{tabular}{rcccc}
|
||
|
\hline
|
||
|
& max & abs OSE & Recall & Precision\\
|
||
|
& \(F_1\) Score & \multicolumn{3}{c}{at max \(F_1\) point} \\
|
||
|
\hline
|
||
|
0.01 conf & 0.370 & 1426 & 0.328 & 0.424 \\
|
||
|
0.2 conf & \textbf{0.375} & 1218 & \textbf{0.338} & 0.424 \\
|
||
|
entropy - 0.01 conf & 0.370 & 1373 & 0.329 & \textbf{0.425} \\
|
||
|
\hline
|
||
|
\end{tabular}
|
||
|
\caption{Vanilla SSD rounded results for macro averaging.}
|
||
|
\end{table}
|
||
|
|
||
|
\begin{table}[ht]
|
||
|
\begin{tabular}{rcccc}
|
||
|
\hline
|
||
|
& max & abs OSE & Recall & Precision\\
|
||
|
& \(F_1\) Score & \multicolumn{3}{c}{at max \(F_1\) point} \\
|
||
|
\hline
|
||
|
no DO - 0.2 conf & 0.226 & \textbf{809} & 0.229 & 0.224 \\
|
||
|
no DO - 0.2 conf - NMS & 0.363 & 1057 & 0.321 & 0.420 \\
|
||
|
0.9 keep - 0.2 conf - NMS & 0.355 & 1137 & 0.320 & 0.399 \\
|
||
|
0.5 keep - 0.2 conf - NMS & 0.322 & 1264 & 0.307 & 0.340 \\
|
||
|
\hline
|
||
|
\end{tabular}
|
||
|
\caption{Bayesian SSD rounded results for macro averaging. All variants use 10 forward passes.}
|
||
|
\end{table}
|
||
|
\end{frame}
|
||
|
|
||
|
\begin{frame}{Example: Vanilla SSD}
|
||
|
\centering
|
||
|
\begin{figure}[h]
|
||
|
\includegraphics[width=\textwidth]{images/COCO_val2014_000000336587_bboxes_vanilla.png}
|
||
|
\end{figure}
|
||
|
\end{frame}
|
||
|
|
||
|
\begin{frame}{Example: Bayesian SSD 0.9 keep ratio}
|
||
|
\centering
|
||
|
\begin{figure}[h]
|
||
|
\includegraphics[width=\textwidth]{images/COCO_val2014_000000336587_bboxes_bayesian.png}
|
||
|
\end{figure}
|
||
|
\end{frame}
|
||
|
|
||
|
\subsection{Interpretation}
|
||
|
|
||
|
\begin{frame}[allowframebreaks]{Effect of NMS}
|
||
|
\begin{table}[tbp]
|
||
|
\centering
|
||
|
\begin{tabular}{rccc}
|
||
|
\hline
|
||
|
variant & before & after & after \\
|
||
|
& entropy/NMS & entropy/NMS & top \(k\) \\
|
||
|
\hline
|
||
|
no DO, no NMS & 155,251 & 122,868 & 72,207 \\
|
||
|
no DO, NMS & 155,250 & 36,061 & 33,827 \\
|
||
|
\hline
|
||
|
\end{tabular}
|
||
|
|
||
|
\caption{Comparison of Bayesian SSD variants without dropout with
|
||
|
respect to the number of detections before the entropy threshold,
|
||
|
after it and/or NMS, and after top \(k\). The
|
||
|
entropy threshold 1.5 was used for both.}
|
||
|
\end{table}
|
||
|
|
||
|
\begin{table}[htbp]
|
||
|
\centering
|
||
|
\begin{tabular}{rccc}
|
||
|
\hline
|
||
|
variant & before & after & after \\
|
||
|
& entropy/NMS & entropy/NMS & top \(k\) \\
|
||
|
\hline
|
||
|
no DO, no NMS & 19,014 & 48,484 & 27,707 \\
|
||
|
no DO, NMS & 19,014 & 14,542 & 13,486 \\
|
||
|
\hline
|
||
|
\end{tabular}
|
||
|
|
||
|
\caption{Comparison of Bayesian SSD variants without dropout with
|
||
|
respect to the number of detections before the entropy threshold,
|
||
|
after it and/or NMS, and after top \(k\). The
|
||
|
entropy threshold 1.5 was used for both. The numbers are for
|
||
|
the persons class.}
|
||
|
\end{table}
|
||
|
|
||
|
\begin{table}[htbp]
|
||
|
\centering
|
||
|
\begin{tabular}{rccc}
|
||
|
\hline
|
||
|
variant & before & after & after \\
|
||
|
& entropy/NMS & entropy/NMS & top \(k\) \\
|
||
|
\hline
|
||
|
no DO, no NMS & 1,011 & 1,785 & 1,458 \\
|
||
|
no DO, NMS & 1,011 & 426 & 425 \\
|
||
|
\hline
|
||
|
\end{tabular}
|
||
|
|
||
|
\caption{Comparison of Bayesian SSD variants without dropout with
|
||
|
respect to the number of detections before the entropy threshold,
|
||
|
after it and/or NMS, and after top \(k\). The
|
||
|
entropy threshold 1.5 was used for both. The numbers are for
|
||
|
the dogs class.}
|
||
|
\end{table}
|
||
|
\end{frame}
|
||
|
|
||
|
\begin{frame}{Effect of Dropout on Observations}
|
||
|
\begin{table}[tbp]
|
||
|
\centering
|
||
|
\begin{tabular}{rccc}
|
||
|
\hline
|
||
|
variant & after & after \\
|
||
|
& prediction & observation grouping \\
|
||
|
\hline
|
||
|
no DO, NMS & 1,677,050 & 155,250 \\
|
||
|
keep rate 0.9, NMS & 1,617,675 & 549,166 \\
|
||
|
\hline
|
||
|
\end{tabular}
|
||
|
|
||
|
\caption{Comparison of Bayesian SSD variants without dropout and with
|
||
|
0.9 keep ratio of dropout with
|
||
|
respect to the number of detections directly after the network
|
||
|
predictions and after the observation grouping.}
|
||
|
\end{table}
|
||
|
\end{frame}
|
||
|
|
||
|
\begin{frame}{Conclusion}
|
||
|
\begin{itemize}
|
||
|
\item hypothesis was not confirmed
|
||
|
\vfill
|
||
|
\item worse performance than according to \cite{Miller2018}
|
||
|
\vfill
|
||
|
\item reason for difference should be investigated
|
||
|
\end{itemize}
|
||
|
|
||
|
\end{frame}
|
||
|
|
||
|
\begin{frame}{Questions}
|
||
|
\centering
|
||
|
Time for questions!
|
||
|
\end{frame}
|
||
|
|
||
|
\begin{frame}[t,allowframebreaks]{References}
|
||
|
\printbibliography
|
||
|
\end{frame}
|
||
|
\end{document}
|