{ "id": "2004.05821", "version": "v1", "published": "2020-04-13T08:32:03.000Z", "updated": "2020-04-13T08:32:03.000Z", "title": "Monocular Depth Estimation with Self-supervised Instance Adaptation", "authors": [ "Robert McCraith", "Lukas Neumann", "Andrew Zisserman", "Andrea Vedaldi" ], "comment": "IROS submission, 7 pages", "categories": [ "cs.CV", "cs.LG" ], "abstract": "Recent advances in self-supervised learning havedemonstrated that it is possible to learn accurate monoculardepth reconstruction from raw video data, without using any 3Dground truth for supervision. However, in robotics applications,multiple views of a scene may or may not be available, depend-ing on the actions of the robot, switching between monocularand multi-view reconstruction. To address this mixed setting,we proposed a new approach that extends any off-the-shelfself-supervised monocular depth reconstruction system to usemore than one image at test time. Our method builds on astandard prior learned to perform monocular reconstruction,but uses self-supervision at test time to further improve thereconstruction accuracy when multiple images are available.When used to update the correct components of the model, thisapproach is highly-effective. On the standard KITTI bench-mark, our self-supervised method consistently outperformsall the previous methods with an average 25% reduction inabsolute error for the three common setups (monocular, stereoand monocular+stereo), and comes very close in accuracy whencompared to the fully-supervised state-of-the-art methods.", "revisions": [ { "version": "v1", "updated": "2020-04-13T08:32:03.000Z" } ], "analyses": { "keywords": [ "monocular depth estimation", "self-supervised instance adaptation", "test time", "learn accurate monoculardepth reconstruction", "off-the-shelfself-supervised monocular depth reconstruction system" ], "note": { "typesetting": "TeX", "pages": 7, "language": "en", "license": "arXiv", "status": "editable" } } }