mirror of https://github.com/2martens/uni.git
239 lines
8.8 KiB
TeX
239 lines
8.8 KiB
TeX
\RequirePackage{pdf14}
|
|
\documentclass{beamer}
|
|
\usepackage[T1]{fontenc}
|
|
\usepackage[utf8]{inputenc}
|
|
\usepackage[english]{babel}
|
|
%\usepackage{paralist}
|
|
%\useoutertheme{infolines}
|
|
\usepackage{graphicx}
|
|
\usepackage{hyperref}
|
|
\usepackage{listings}
|
|
\usepackage{color}
|
|
\usepackage{textcomp}
|
|
\usepackage{csquotes}
|
|
\usetheme{Warsaw}
|
|
\usecolortheme{crane}
|
|
\pagenumbering{arabic}
|
|
\def\thesection{\arabic{section})}
|
|
\def\thesubsection{\alph{subsection})}
|
|
\def\thesubsubsection{(\roman{subsubsection})}
|
|
\setbeamertemplate{navigation symbols}{}
|
|
\graphicspath{ {src/} {/home/jim/Pictures/} }
|
|
|
|
\definecolor{mygreen}{rgb}{0,0.6,0}
|
|
\definecolor{mygray}{rgb}{0.5,0.5,0.5}
|
|
\definecolor{mymauve}{rgb}{0.58,0,0.82}
|
|
|
|
\usepackage[
|
|
backend=biber,
|
|
bibstyle=ieee,
|
|
citestyle=ieee,
|
|
minnames=1,
|
|
maxnames=2
|
|
]{biblatex}
|
|
|
|
\addbibresource{bib.bib}
|
|
|
|
\MakeOuterQuote{"}
|
|
|
|
%\definecolor{craneorange}{RGB}{61,61,61}
|
|
%\definecolor{craneblue}{RGB}{255,255,255}
|
|
|
|
\lstset{ %
|
|
backgroundcolor=\color{white}, % choose the background color; you must add \usepackage{color} or \usepackage{xcolor}
|
|
basicstyle=\footnotesize, % the size of the fonts that are used for the code
|
|
breakatwhitespace=false, % sets if automatic breaks should only happen at whitespace
|
|
breaklines=true, % sets automatic line breaking
|
|
captionpos=b, % sets the caption-position to bottom
|
|
commentstyle=\color{mygray}, % comment style
|
|
deletekeywords={}, % if you want to delete keywords from the given language
|
|
escapeinside={\%*}{*)}, % if you want to add LaTeX within your code
|
|
extendedchars=true, % lets you use non-ASCII characters; for 8-bits encodings only, does not work with UTF-8
|
|
keepspaces=true, % keeps spaces in text, useful for keeping indentation of code (possibly needs columns=flexible)
|
|
keywordstyle=\color{blue}, % keyword style
|
|
language=PHP, % the language of the code
|
|
morekeywords={class, function, return, protected, public, private, const, static, new, extends, namespace, null}, % if you want to add more keywords to the set
|
|
numbers=left, % where to put the line-numbers; possible values are (none, left, right)
|
|
numbersep=5pt, % how far the line-numbers are from the code
|
|
numberstyle=\tiny\color{mygray}, % the style that is used for the line-numbers
|
|
rulecolor=\color{black}, % if not set, the frame-color may be changed on line-breaks within not-black text (e.g. comments (green here))
|
|
showspaces=false, % show spaces everywhere adding particular underscores; it overrides 'showstringspaces'
|
|
showstringspaces=false, % underline spaces within strings only
|
|
showtabs=false, % show tabs within strings adding particular underscores
|
|
stepnumber=2, % the step between two line-numbers. If it's 1, each line will be numbered
|
|
stringstyle=\color{mygreen}, % string literal style
|
|
tabsize=2, % sets default tabsize to 2 spaces
|
|
title=\lstname % show the filename of files included with \lstinputlisting; also try caption instead of title
|
|
}
|
|
|
|
\hypersetup{
|
|
pdfauthor=Jim Martens,
|
|
pdfstartview=Fit
|
|
}
|
|
|
|
\expandafter\def\expandafter\insertshorttitle\expandafter{%
|
|
\raggedleft \insertframenumber\,/\,\inserttotalframenumber\;}
|
|
|
|
\begin{document}
|
|
\author{Jim 2martens}
|
|
\title{Deep Sliding Shapes: A Review}
|
|
\date{July 4th, 2018}
|
|
|
|
\begin{frame}
|
|
\titlepage
|
|
\end{frame}
|
|
|
|
\begin{frame}{Contents}
|
|
\tableofcontents
|
|
\end{frame}
|
|
|
|
\section{Motivation}
|
|
\begin{frame}{Task}
|
|
\begin{itemize}
|
|
\item object detection is central task for neural networks
|
|
\vfill
|
|
\item combination of classification and localization tasks
|
|
\vfill
|
|
\item output are usually bounding boxes and classifications
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Field}
|
|
\begin{itemize}
|
|
\item 2D object detection very mature with Single Shot MultiBox Detector\cite{Liu2016}
|
|
\vfill
|
|
\item with more availability of depth data, usage of depth becomes more
|
|
important
|
|
\vfill
|
|
\item early approaches use depth as fourth channel in 2D object detection,
|
|
for example Depth RCNN\cite{Gupta2015}
|
|
\vfill
|
|
\item Deep Sliding Shapes\cite{Song2016} uses 3D data for actual 3D deep
|
|
learning and uses 2D object detectors
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\section{Method}
|
|
\begin{frame}{Method}
|
|
\begin{enumerate}
|
|
\item encoding 3D representation and normalization
|
|
\vfill
|
|
\item multi-scale 3D region proposal network
|
|
\vfill
|
|
\item joint amodal object recognition network
|
|
\end{enumerate}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Representation and Normalization}
|
|
\begin{itemize}
|
|
\item raw 3D space divided into equally spaced 3D voxel grid
|
|
\vfill
|
|
\item data encoded by Truncated Signed Distance Function
|
|
\vfill
|
|
\item each voxel stores distance from its center to surface of input depth
|
|
map and direction of each surface point
|
|
\vfill
|
|
\item every scene is rotated to align with gravity direction
|
|
\vfill
|
|
\item major room directions are used for proposal orientations
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Region Proposal Network}
|
|
\begin{itemize}
|
|
\item proposes a few interesting regions for the object recognition network
|
|
\vfill
|
|
\item each region proposal corresponds to one anchor box
|
|
\vfill
|
|
\item two scales are used since anchor box size varies a lot (from 0.3
|
|
to 2 meters)
|
|
\vfill
|
|
\item a full 3D convolutional architecture is used
|
|
\vfill
|
|
\item after the calculation of the region proposals multiple bars have
|
|
to be met for regions for them to be proposed
|
|
\vfill
|
|
\item in the end only the top 2000 regions move on (after the convolution
|
|
with only dropping all regions with point density lower than 0.005
|
|
points per cubic centimeter a total of 107674 regions remain on average)
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Object Recognition Network}
|
|
\begin{itemize}
|
|
\item starts with both 3D and 2D object recognition networks
|
|
\vfill
|
|
\item VGGnet pretrained on ImageNet is used for extracting colour features
|
|
\vfill
|
|
\item resulting feature vectors of both networks are concatenated
|
|
\vfill
|
|
\item at the end two separate fully connected layers predict object label
|
|
and 3D bounding box
|
|
\vfill
|
|
\item some outlier protection measures are applied
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\section{Experimental Results}
|
|
\begin{frame}{Evaluation}
|
|
\begin{itemize}
|
|
\item evaluated on NYUv2\cite{Silberman2012} and SUN RGB-D\cite{Song2015}
|
|
\vfill
|
|
\item threshold of 0.25 used for average recall of proposal generation and
|
|
average precision of detection
|
|
\vfill
|
|
\item ground truth bounding boxes obtained from SUN RGB-D
|
|
\vfill
|
|
\item single-scale RPN, multi-scale RPN and multi-scale RPN with RGB colour
|
|
usage (RGB colour encoded in 3D TSDF) were compared against each
|
|
other and the baselines using the NYU data set
|
|
\vfill
|
|
\item 3D selective search and naive 2D to 3D conversion used as baselines
|
|
\vfill
|
|
\item second experiment tested ORN with different region proposals
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}{Results}
|
|
\begin{itemize}
|
|
\item works well on non-planar objects with depth information
|
|
\vfill
|
|
\item 2D component helps in distinguishing similar shaped objects
|
|
\vfill
|
|
\item 3D Deep Sliding Shapes outperforms chosen state-of-the-art methods
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\section{Review}
|
|
\begin{frame}{Review}
|
|
\begin{itemize}
|
|
\item idea to use 3D data directly very intruiging
|
|
\vfill
|
|
\item high-level structure of region proposal followed by object recognition
|
|
is visible in more recent approaches like Frustum Pointnet\cite{Qi2017}
|
|
as well
|
|
\vfill
|
|
\item motivations for used data sets NYUv2 and SUN RGB-D unclear
|
|
\vfill
|
|
\item no information on process of "obtaining" ground truth bounding boxes
|
|
from SUN RGB-D data set
|
|
\vfill
|
|
\item no implementation details provided
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\section{Conclusion}
|
|
\begin{frame}{Conclusion}
|
|
\begin{itemize}
|
|
\item 3D Deep Sliding Shapes should be compared to other 3D centric approaches
|
|
like Frustum Pointnet
|
|
\vfill
|
|
\item structural comparison with other 3D approaches is interesting
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}[allowframebreaks]{References}
|
|
\printbibliography
|
|
\end{frame}
|
|
\end{document}
|