Added papers for SceneNet and MS COCO

Signed-off-by: Jim Martens <github@2martens.de>
2019-02-20 15:32:59 +01:00 · 2019-02-20 15:32:59 +01:00 · 43239a7f84
parent dac7c946b2
commit 43239a7f84
1 changed files with 42 additions and 15 deletions
--- a/ma.bib
+++ b/ma.bib
@ -535,21 +535,20 @@ to construct explicit models for non-normal classes. Application includes infere
 }

@Article{Friedman1996,
-  author        = {Friedman, Batya and Nissenbaum, Helen},
-  title         = {Bias in Computer Systems},
-  journal       = {{ACM} Transactions on Information Systems},
-  year          = {1996},
-  volume        = {14},
-  number        = {3},
-  pages         = {330--347},
-  doi           = {10.1145/230538.230561},
-  __markedentry = {[jim:]},
-  abstract      = {From an analysis of actual cases, three categories of bias in computer systems have been developed: preexisting, technical, and emergent. Preexisting bias has its roots in social institutions, practices, and attitudes. Technical bias arises from technical constraints of considerations. Emergent bias arises in a context of use. Although others have pointed to bias inparticular computer systems and have noted the general problem, we know of no comparable work that examines this phenomenon comprehensively and which offers a framework for understanding and remedying it. We conclude by suggesting that freedom from bias should by counted amoung the select set of criteria—including reliability, accuracy, and efficiency—according to which the quality of systems in use in society should be judged.},
-  address       = {New York, NY, USA},
-  file          = {:/home/jim/Documents/Studium/WS2018_19/IWT/prüfungsrelevant/06_Friedman.pdf:PDF},
-  owner         = {jim},
-  publisher     = {ACM},
-  timestamp     = {2019.02.20},
+  author    = {Friedman, Batya and Nissenbaum, Helen},
+  title     = {Bias in Computer Systems},
+  journal   = {{ACM} Transactions on Information Systems},
+  year      = {1996},
+  volume    = {14},
+  number    = {3},
+  pages     = {330--347},
+  doi       = {10.1145/230538.230561},
+  abstract  = {From an analysis of actual cases, three categories of bias in computer systems have been developed: preexisting, technical, and emergent. Preexisting bias has its roots in social institutions, practices, and attitudes. Technical bias arises from technical constraints of considerations. Emergent bias arises in a context of use. Although others have pointed to bias inparticular computer systems and have noted the general problem, we know of no comparable work that examines this phenomenon comprehensively and which offers a framework for understanding and remedying it. We conclude by suggesting that freedom from bias should by counted amoung the select set of criteria—including reliability, accuracy, and efficiency—according to which the quality of systems in use in society should be judged.},
+  address   = {New York, NY, USA},
+  file      = {:/home/jim/Documents/Studium/WS2018_19/IWT/prüfungsrelevant/06_Friedman.pdf:PDF},
+  owner     = {jim},
+  publisher = {ACM},
+  timestamp = {2019.02.20},
 }

@Report{Diakopoulos2014,
@ -567,4 +566,32 @@ to construct explicit models for non-normal classes. Application includes infere
  timestamp   = {2019.02.20},
 }

+@InProceedings{McCormac2017,
+  author        = {McCormac, John and Handa, Ankur and Leutenegger, Stefan and Davison, Andrew J.},
+  title         = {{SceneNet} {RGB}-D: Can 5M Synthetic Images Beat Generic {ImageNet} Pre-training on Indoor Segmentation?},
+  booktitle     = {2017 {IEEE} International Conference on Computer Vision ({ICCV})},
+  year          = {2017},
+  publisher     = {{IEEE}},
+  doi           = {10.1109/iccv.2017.292},
+  __markedentry = {[jim:]},
+  abstract      = {We introduce SceneNet RGB-D, a dataset providingpixel-perfect ground truth for scene understanding prob-lems such as semantic segmentation, instance segmenta-tion, and object detection. It also provides perfect cameraposes and depth data, allowing investigation into geomet-ric computer vision problems such as optical flow, cam-era pose estimation, and 3D scene labelling tasks. Ran-dom sampling permits virtually unlimited scene configu-rations, and here we provide 5M rendered RGB-D im-ages from 16K randomly generated 3D trajectories in syn-thetic layouts, with random but physically simulated ob-ject configurations.  We compare the semantic segmenta-tion performance of network weights produced from pre-training on RGB images from our dataset against genericVGG-16 ImageNet weights. After fine-tuning on the SUNRGB-D and NYUv2 real-world datasets we find in bothcases that the synthetically pre-trained network outper-forms the VGG-16 weights.  When synthetic pre-trainingincludes a depth channel (something ImageNet cannot na-tively provide) the performance is greater still. This sug-gests that large-scale high-quality synthetic RGB datasetswith task-specific labels can be more useful for pre-training than real-world generic pre-training such as Im-ageNet. We host the dataset at http://robotvault.bitbucket.io/scenenet-rgbd.html.},
+  file          = {:/home/jim/Documents/Studium/MA/Literatur/37_SceneNet.pdf:PDF},
+  owner         = {jim},
+  timestamp     = {2019.02.20},
+}
+
+@InCollection{Lin2014,
+  author    = {Tsung-Yi Lin and Michael Maire and Serge Belongie and James Hays and Pietro Perona and Deva Ramanan and Piotr Doll{\'{a}}r and C. Lawrence Zitnick},
+  title     = {Microsoft {COCO}: Common Objects in Context},
+  booktitle = {Computer Vision {\textendash} {ECCV} 2014},
+  year      = {2014},
+  publisher = {Springer International Publishing},
+  pages     = {740--755},
+  doi       = {10.1007/978-3-319-10602-1_48},
+  abstract  = {We present a new dataset with the goal of advancing the state-of-the-art in object recognition by placing the question of object recognition in the context of the broader question of scene understanding. This is achieved by gathering images of complex everyday scenes containing common objects in their natural context. Objects are labeled using per-instance segmentations to aid in precise object localization. Our dataset contains photos of 91 objects types that would be easily recognizable by a 4 year old. With a total of 2.5 million labeled instances in 328k images, the creation of our dataset drew upon extensive crowd worker involvement via novel user interfaces for category detection, instance spotting and instance segmentation. We present a detailed statistical analysis of the dataset in comparison to PASCAL, ImageNet, and SUN. Finally, we provide baseline performance analysis for bounding box and segmentation detection results using a Deformable Parts Model.},
+  file      = {:/home/jim/Documents/Studium/MA/Literatur/38_MSCOCO.pdf:PDF},
+  owner     = {jim},
+  timestamp = {2019.02.20},
+}
+
@Comment{jabref-meta: databaseType:biblatex;}