From e2a7fcb16c7526282e8e3eb6f1d96958ada9476f Mon Sep 17 00:00:00 2001 From: Jim Martens Date: Thu, 26 Sep 2019 13:39:17 +0200 Subject: [PATCH] Added class-specific results Signed-off-by: Jim Martens --- body.tex | 153 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) diff --git a/body.tex b/body.tex index 199428c..7b84745 100644 --- a/body.tex +++ b/body.tex @@ -861,6 +861,159 @@ are included. All plotted variants show a similar behaviour that is in line with previously reported figures, such as the ones in Miller et al.~\cite{Miller2018} +\subsection{Class-specific results} + +As mentioned before, the data set is imbalanced with respect to its +classes: four classes make up roughly 50\% of all ground truth +detections. Therefore, it is interesting to see the performance +of the tested variants with respect to these classes: persons, cars, +chairs, and bottles. Additionally, the results of the giraffe class are +presented as these are exceptionally good, although the class makes up +only 0.7\% of the ground truth. + +\begin{table}[htbp] + \begin{tabular}{rccc} + \hline + Forward & max & Recall & Precision\\ + Passes & \(F_1\) Score & \multicolumn{2}{c}{at max \(F_1\) point} \\ + \hline + vanilla SSD - 0.01 conf & 0.460 & \textbf{0.405} & 0.532 \\ + vanilla SSD - 0.2 conf & \textbf{0.460} & \textbf{0.405} & \textbf{0.533} \\ + SSD with Entropy test - 0.01 conf & 0.460 & 0.405 & 0.532 \\ + % entropy thresh: 1.7 for vanilla SSD is best + \hline + Bay. SSD - no DO - 0.2 conf - no NMS \; 10 & 0.272 & 0.292 & 0.256 \\ + no dropout - 0.2 conf - NMS \; 10 & 0.451 & 0.403 & 0.514 \\ + 0.9 keep ratio - 0.2 conf - NMS \; 10 & 0.447 & 0.401 & 0.505 \\ + 0.5 keep ratio - 0.2 conf - NMS \; 10 & 0.410 & 0.368 & 0.465 \\ + % entropy thresh: 1.2 for Bayesian - 2 is best, 0.4 for 3 + % entropy thresh: 0.7 for Bayesian - 6 is best, 1.5 for 7 + % 1.7 for 8, 2.0 for 9 + \hline + \end{tabular} + \caption{Rounded results for persons class. SSD with Entropy test and Bayesian SSD are represented with + their best performing macro averaging entropy threshold with respect to \(F_1\) score.} + \label{tab:results-persons} +\end{table} + +It is clearly visible that the overall trend continues in the individual +classes (see tables \ref{tab:results-persons} through \ref{tab:results-giraffes}). However, the two vanilla SSD variants with only 0.01 confidence +threshold perform better than in the averaged results presented earlier. +Only in the chairs class, a Bayesian SSD variant performs better (in +precision) than any of the vanilla SSD variants. Moreover, there are +multiple classes where two or all of the vanilla SSD variants perform +equally well. When compared with the macro averaged results, +giraffes and persons perform better across the board. Cars have a higher +precision than average but lower recall values for all but the Bayesian +SSD variant without NMS and dropout. Chairs and bottles perform +worse than average. + +The giraffe class illustrates the difference between macro and micro +averaging very well: in macro averaging, the persons class and the giraffe +class have the same impact. With micro averaging, the outliers of the +giraffe class are negligible. + +\begin{table}[htbp] + \begin{tabular}{rccc} + \hline + Forward & max & Recall & Precision\\ + Passes & \(F_1\) Score & \multicolumn{2}{c}{at max \(F_1\) point} \\ + \hline + vanilla SSD - 0.01 conf & 0.364 & \textbf{0.305} & 0.452 \\ + vanilla SSD - 0.2 conf & 0.363 & 0.294 & \textbf{0.476} \\ + SSD with Entropy test - 0.01 conf & \textbf{0.364} & \textbf{0.305} & 0.453 \\ + % entropy thresh: 1.7 for vanilla SSD is best + \hline + Bay. SSD - no DO - 0.2 conf - no NMS \; 10 & 0.236 & 0.244 & 0.229 \\ + no dropout - 0.2 conf - NMS \; 10 & 0.336 & 0.266 & 0.460 \\ + 0.9 keep ratio - 0.2 conf - NMS \; 10 & 0.332 & 0.262 & 0.454 \\ + 0.5 keep ratio - 0.2 conf - NMS \; 10 & 0.309 & 0.264 & 0.374 \\ + % entropy thresh: 1.2 for Bayesian - 2 is best, 0.4 for 3 + % entropy thresh: 0.7 for Bayesian - 6 is best, 1.5 for 7 + % 1.7 for 8, 2.0 for 9 + \hline + \end{tabular} + \caption{Rounded results for cars class. SSD with Entropy test and Bayesian SSD are represented with + their best performing macro averaging entropy threshold with respect to \(F_1\) score. } + \label{tab:results-cars} +\end{table} + +\begin{table}[htbp] + \begin{tabular}{rccc} + \hline + Forward & max & Recall & Precision\\ + Passes & \(F_1\) Score & \multicolumn{2}{c}{at max \(F_1\) point} \\ + \hline + vanilla SSD - 0.01 conf & 0.287 & \textbf{0.251} & 0.335 \\ + vanilla SSD - 0.2 conf & 0.283 & 0.242 & 0.341 \\ + SSD with Entropy test - 0.01 conf & \textbf{0.288} & \textbf{0.251} & 0.338 \\ + % entropy thresh: 1.7 for vanilla SSD is best + \hline + Bay. SSD - no DO - 0.2 conf - no NMS \; 10 & 0.172 & 0.168 & 0.178 \\ + no dropout - 0.2 conf - NMS \; 10 & 0.280 & 0.229 & \textbf{0.360} \\ + 0.9 keep ratio - 0.2 conf - NMS \; 10 & 0.274 & 0.228 & 0.343 \\ + 0.5 keep ratio - 0.2 conf - NMS \; 10 & 0.240 & 0.220 & 0.265 \\ + % entropy thresh: 1.2 for Bayesian - 2 is best, 0.4 for 3 + % entropy thresh: 0.7 for Bayesian - 6 is best, 1.5 for 7 + % 1.7 for 8, 2.0 for 9 + \hline + \end{tabular} + \caption{Rounded results for chairs class. SSD with Entropy test and Bayesian SSD are represented with + their best performing macro averaging entropy threshold with respect to \(F_1\) score. } + \label{tab:results-chairs} +\end{table} + + +\begin{table}[htbp] + \begin{tabular}{rccc} + \hline + Forward & max & Recall & Precision\\ + Passes & \(F_1\) Score & \multicolumn{2}{c}{at max \(F_1\) point} \\ + \hline + vanilla SSD - 0.01 conf & 0.233 & \textbf{0.175} & 0.348 \\ + vanilla SSD - 0.2 conf & 0.231 & 0.173 & \textbf{0.350} \\ + SSD with Entropy test - 0.01 conf & \textbf{0.233} & \textbf{0.175} & 0.350 \\ + % entropy thresh: 1.7 for vanilla SSD is best + \hline + Bay. SSD - no DO - 0.2 conf - no NMS \; 10 & 0.160 & 0.140 & 0.188 \\ + no dropout - 0.2 conf - NMS \; 10 & 0.224 & 0.170 & 0.328 \\ + 0.9 keep ratio - 0.2 conf - NMS \; 10 & 0.220 & 0.170 & 0.311 \\ + 0.5 keep ratio - 0.2 conf - NMS \; 10 & 0.202 & 0.172 & 0.245 \\ + % entropy thresh: 1.2 for Bayesian - 2 is best, 0.4 for 3 + % entropy thresh: 0.7 for Bayesian - 6 is best, 1.5 for 7 + % 1.7 for 8, 2.0 for 9 + \hline + \end{tabular} + \caption{Rounded results for bottles class. SSD with Entropy test and Bayesian SSD are represented with + their best performing macro averaging entropy threshold with respect to \(F_1\) score. } + \label{tab:results-bottles} +\end{table} + +\begin{table}[htbp] + \begin{tabular}{rccc} + \hline + Forward & max & Recall & Precision\\ + Passes & \(F_1\) Score & \multicolumn{2}{c}{at max \(F_1\) point} \\ + \hline + vanilla SSD - 0.01 conf & \textbf{0.650} & \textbf{0.647} & \textbf{0.655} \\ + vanilla SSD - 0.2 conf & \textbf{0.650} & \textbf{0.647} & \textbf{0.655} \\ + SSD with Entropy test - 0.01 conf & \textbf{0.650} & \textbf{0.647} & \textbf{0.655} \\ + % entropy thresh: 1.7 for vanilla SSD is best + \hline + Bay. SSD - no DO - 0.2 conf - no NMS \; 10 & 0.415 & 0.414 & 0.417 \\ + no dropout - 0.2 conf - NMS \; 10 & 0.647 & 0.642 & 0.654 \\ + 0.9 keep ratio - 0.2 conf - NMS \; 10 & 0.637 & 0.634 & 0.642 \\ + 0.5 keep ratio - 0.2 conf - NMS \; 10 & 0.586 & 0.578 & 0.596 \\ + % entropy thresh: 1.2 for Bayesian - 2 is best, 0.4 for 3 + % entropy thresh: 0.7 for Bayesian - 6 is best, 1.5 for 7 + % 1.7 for 8, 2.0 for 9 + \hline + \end{tabular} + \caption{Rounded results for giraffe class. SSD with Entropy test and Bayesian SSD are represented with + their best performing macro averaging entropy threshold with respect to \(F_1\) score. } + \label{tab:results-giraffes} +\end{table} + \subsection*{Qualitative Analysis} % TODO: expand