\RequirePackage{pdf14} \documentclass{beamer} \usepackage[T1]{fontenc} \usepackage[utf8]{inputenc} \usepackage[english]{babel} %\usepackage{paralist} %\useoutertheme{infolines} \usepackage{graphicx} \usepackage{hyperref} \usepackage{listings} \usepackage{color} \usepackage{textcomp} \usepackage{csquotes} \usetheme{Warsaw} \usecolortheme{crane} \pagenumbering{arabic} \def\thesection{\arabic{section})} \def\thesubsection{\alph{subsection})} \def\thesubsubsection{(\roman{subsubsection})} \setbeamertemplate{navigation symbols}{} \graphicspath{ {src/} {/home/jim/Pictures/} } \definecolor{mygreen}{rgb}{0,0.6,0} \definecolor{mygray}{rgb}{0.5,0.5,0.5} \definecolor{mymauve}{rgb}{0.58,0,0.82} \usepackage[ backend=biber, bibstyle=ieee, citestyle=ieee, minnames=1, maxnames=2 ]{biblatex} \addbibresource{bib.bib} \MakeOuterQuote{"} %\definecolor{craneorange}{RGB}{61,61,61} %\definecolor{craneblue}{RGB}{255,255,255} \lstset{ % backgroundcolor=\color{white}, % choose the background color; you must add \usepackage{color} or \usepackage{xcolor} basicstyle=\footnotesize, % the size of the fonts that are used for the code breakatwhitespace=false, % sets if automatic breaks should only happen at whitespace breaklines=true, % sets automatic line breaking captionpos=b, % sets the caption-position to bottom commentstyle=\color{mygray}, % comment style deletekeywords={}, % if you want to delete keywords from the given language escapeinside={\%*}{*)}, % if you want to add LaTeX within your code extendedchars=true, % lets you use non-ASCII characters; for 8-bits encodings only, does not work with UTF-8 keepspaces=true, % keeps spaces in text, useful for keeping indentation of code (possibly needs columns=flexible) keywordstyle=\color{blue}, % keyword style language=PHP, % the language of the code morekeywords={class, function, return, protected, public, private, const, static, new, extends, namespace, null}, % if you want to add more keywords to the set numbers=left, % where to put the line-numbers; possible values are (none, left, right) numbersep=5pt, % how far the line-numbers are from the code numberstyle=\tiny\color{mygray}, % the style that is used for the line-numbers rulecolor=\color{black}, % if not set, the frame-color may be changed on line-breaks within not-black text (e.g. comments (green here)) showspaces=false, % show spaces everywhere adding particular underscores; it overrides 'showstringspaces' showstringspaces=false, % underline spaces within strings only showtabs=false, % show tabs within strings adding particular underscores stepnumber=2, % the step between two line-numbers. If it's 1, each line will be numbered stringstyle=\color{mygreen}, % string literal style tabsize=2, % sets default tabsize to 2 spaces title=\lstname % show the filename of files included with \lstinputlisting; also try caption instead of title } \hypersetup{ pdfauthor=Jim Martens, pdfstartview=Fit } \expandafter\def\expandafter\insertshorttitle\expandafter{% \raggedleft \insertframenumber\,/\,\inserttotalframenumber\;} \begin{document} \author{Jim 2martens} \title{Deep Sliding Shapes: A Review} \date{\today} \begin{frame} \titlepage \end{frame} \begin{frame}{Contents} \tableofcontents \end{frame} \section{Motivation} \begin{frame}{Task} \begin{itemize} \item object detecion is central task for neural networks \vfill \item combination of classification and localization tasks \vfill \item output are usually bounding boxes and classifications \end{itemize} \end{frame} \begin{frame}{Field} \begin{itemize} \item 2D object detection very mature with Single Shot MultiBox Detector\cite{Liu2016} \vfill \item with more availability of depth data, usage of depth becomes more important \vfill \item early approaches use depth as fourth channel in 2D object detection, for example Depth RCNN\cite{Gupta2015} \vfill \item Deep Sliding Shapes\cite{Song2016} uses 3D data for actual 3D deep learning and uses 2D object detectors \end{itemize} \end{frame} \section{Method} \begin{frame}{Method} \begin{enumerate} \item encoding 3D representation and normalization \vfill \item multi-scale 3D region proposal network \vfill \item joint amodal object recognition network \end{enumerate} \end{frame} \begin{frame}{Representation and Normalization} \begin{itemize} \item raw 3D space divided into equally spaced 3D voxel grid \vfill \item data encoded by Truncated Signed Distance Function \vfill \item each voxel stores distance from its center to surface of input depth map and direction of each surface point \vfill \item every scene is rotated to align with gravity direction \vfill \item major room directions are used for proposal orientations \end{itemize} \end{frame} \begin{frame}{Region Proposal Network} \begin{itemize} \item proposes a few interesting regions for the object recognition network \vfill \item each region proposal corresponds to one anchor box \vfill \item two scales are used since anchor box size varies a lot (from 0.3 to 2 meters) \vfill \item a full 3D convolutional architecture is used \vfill \item after the calculation of the region proposals multiple bars have to be met for regions for them to be proposed \vfill \item in the end only the top 2000 regions move on (after the convolution with only dropping all regions with point density lower than 0.005 points per cubic centimeter a total of 107674 regions remain on average) \end{itemize} \end{frame} \begin{frame}{Object Recognition Network} \begin{itemize} \item starts with both 3D and 2D object recognition networks \vfill \item VGGnet pretrained on ImageNet is used for extracting colour features \vfill \item resulting feature vectors of both networks are concatenated \vfill \item at the end two separate fully connected layers predict object label and 3D bounding box \vfill \item some outlier protection measures are applied \end{itemize} \end{frame} \section{Experimental Results} \begin{frame}{Evaluation} \begin{itemize} \item evaluated on NYUv2\cite{Silberman2012} and SUN RGB-D\cite{Song2015} \vfill \item threshold of 0.25 used for average recall of proposal generation and average precision of detection \vfill \item ground truth bounding boxes obtained from SUN RGB-D \vfill \item single-scale RPN, multi-scale RPN and multi-scale RPN with RGB colour usage (RGB colour encoded in 3D TSDF) were compared against each other and the baselines using the NYU data set \vfill \item 3D selective search and naive 2D to 3D conversion used as baselines \vfill \item second experiment tested ORN with different region proposals \end{itemize} \end{frame} \begin{frame}{Results} \begin{itemize} \item works well on non-planar objects with depth information \vfill \item 2D component helps in distinguishing similar shaped objects \vfill \item 3D Deep Sliding Shapes outperforms chosen state-of-the-art methods \end{itemize} \end{frame} \section{Review} \begin{frame}{Review} \begin{itemize} \item idea to use 3D data directly very intruiging \vfill \item high-level structure of region proposal followed by object recognition is visible in more recent approaches like Frustum Pointnet\cite{Qi2017} as well \vfill \item motivations for used data sets NYUv2 and SUN RGB-D unclear \vfill \item no information on process of "obtaining" ground truth bounding boxes from SUN RGB-D data set \vfill \item no implementation details provided \end{itemize} \end{frame} \begin{frame}[allowframebreaks]{References} \printbibliography \end{frame} \end{document}