diff --git a/body.tex b/body.tex index d729a91..342f7ab 100644 --- a/body.tex +++ b/body.tex @@ -669,6 +669,31 @@ However, in case of a class imbalance the macro averaging favours classes with few detections whereas micro averaging benefits classes with many detections. +\begin{table}[ht] + \begin{tabular}{rcccc} + \hline + Forward & max & abs OSE & Recall & Precision\\ + Passes & \(F_1\) Score & \multicolumn{3}{c}{at max \(F_1\) point} \\ + \hline + vanilla SSD - 0.01 conf & 0.255 & 3176 & 0.214 & 0.318 \\ + vanilla SSD - 0.2 conf & \textbf{0.376} & 2939 & \textbf{0.382} & 0.372 \\ + SSD with Entropy test - 0.01 conf & 0.255 & 3168 & 0.214 & 0.318 \\ + % entropy thresh: 2.4 for vanilla SSD is best + \hline + Bayesian SSD - no DO - 0.2 conf - no NMS \; 10 & 0.006 & 164 & 0.004 & 0.005 \\ + no dropout - 0.2 conf - NMS \; 10 & 0.371 & \textbf{2335} & 0.365 & \textbf{0.378} \\ + % entropy thresh: 1.2 for Bayesian - 2 is best, 0.4 for 3 + % 0.5 for Bayesian - 6, 1.4 for 7 + \hline + \end{tabular} + \caption{Results for micro averaging. SSD with Entropy test and Bayesian SSD are represented with + their best performing entropy threshold. Vanilla SSD with Entropy test performed best with an + entropy threshold of 2.4, Bayesian SSD with no non-maximum suppression performed best for 0.5, + and Bayesian SSD with non-maximum suppression performed best for 1.4 as entropy + threshold.} + \label{tab:results-micro} +\end{table} + In both cases, vanilla SSD with a per-class confidence threshold of 0.2 performs best (see tables \ref{tab:results-micro} and \ref{tab:results-macro}) with a maximum \(F_1\) score of 0.376/0.375 (always micro/macro) compared to both vanilla SSD with a per-class @@ -681,50 +706,27 @@ not very uncertain. The best performing entropy threshold is not any better than the corresponding vanilla SSD without entropy threshold. Therefore, in this case the per-class confidence score is far more important for the result. -\begin{table} - \begin{tabular}{rcccc} - \hline - Forward & max & abs OSE & Recall & Precision\\ - Passes & \(F_1\) Score & \multicolumn{3}{c}{at max \(F_1\) point} \\ - \hline - vanilla SSD - 0.01 conf & 0.255 & 3176 & 0.214 & 0.318 \\ - vanilla SSD - 0.2 conf & \textbf{0.376} & \textbf{2939} & \textbf{0.382} & \textbf{0.372} \\ - SSD with Entropy test - 0.01 conf & 0.255 & 3168 & 0.214 & 0.318 \\ - % entropy thresh: 2.4 for vanilla SSD is best - \hline - Bayesian SSD - no bg - 0.2 conf \; 10 & 0.003 & 2145 & 0.005 & 0.002 \\ - no bg > 0.8 conf - 0.2 conf \; 10 & 0.003 & 151 & 0.004 & 0.003 \\ - % entropy thresh: 1.2 for Bayesian - 2 is best, 0.4 for 3 - \hline - \end{tabular} - \caption{Results for micro averaging. SSD with Entropy test and Bayesian SSD are represented with - their best performing entropy threshold. Vanilla SSD with Entropy test performed best with an - entropy threshold of 2.4, Bayesian SSD with no background performed best for 1.2, - and Bayesian SSD with no background prediction higher than 0.8 performed best for 0.4 as entropy - threshold.} - \label{tab:results-micro} -\end{table} - -\begin{table} +\begin{table}[ht] \begin{tabular}{rcccc} \hline Forward & max & abs OSE & Recall & Precision\\ Passes & \(F_1\) Score & \multicolumn{3}{c}{at max \(F_1\) point} \\ \hline vanilla SSD - 0.01 conf & 0.370 & 1426 & 0.328 & 0.424 \\ - vanilla SSD - 0.2 conf & \textbf{0.375} & \textbf{1218} & \textbf{0.338} & 0.424 \\ + vanilla SSD - 0.2 conf & \textbf{0.375} & 1218 & \textbf{0.338} & 0.424 \\ SSD with Entropy test - 0.01 conf & 0.370 & 1373 & 0.329 & \textbf{0.425} \\ % entropy thresh: 1.7 for vanilla SSD is best \hline - Bayesian SSD - no bg - 0.2 conf \; 10 & 0.002 & 1784 & 0.005 & 0.002 \\ - no bg > 0.8 conf - 0.2 conf \; 10 & 0.002 & 122 & 0.003 & 0.002 \\ + Bayesian SSD - no DO - 0.2 conf - no NMS \; 10 & 0.006 & 1453 & 0.009 & 0.005 \\ + no dropout - 0.2 conf - NMS \; 10 & 0.363 & \textbf{1057} & 0.321 & 0.420 \\ % entropy thresh: 1.2 for Bayesian - 2 is best, 0.4 for 3 + % entropy thresh: 0.7 for Bayesian - 6 is best, 1.5 for 7 \hline \end{tabular} \caption{Results for macro averaging. SSD with Entropy test and Bayesian SSD are represented with their best performing entropy threshold. Vanilla SSD with Entropy test performed best with an - entropy threshold of 1.7, Bayesian SSD with no background performed best for 1.2, - and Bayesian SSD with no background prediction higher than 0.8 performed best for 0.4 as entropy + entropy threshold of 1.7, Bayesian SSD with no non-maximum suppression performed best for 0.7, + and Bayesian SSD with non-maximum suppression performed best for 1.5 as entropy threshold.} \label{tab:results-macro} \end{table}