{
  "id": "1909.09803",
  "version": "v1",
  "published": "2019-09-21T10:00:21.000Z",
  "updated": "2019-09-21T10:00:21.000Z",
  "title": "Visual Odometry Revisited: What Should Be Learnt?",
  "authors": [
    "Huangying Zhan",
    "Chamara Saroj Weerasekera",
    "Jiawang Bian",
    "Ian Reid"
  ],
  "comment": "Demo video: https://youtu.be/Nl8mFU4SJKY",
  "categories": [
    "cs.CV"
  ],
  "abstract": "In this work we present a monocular visual odometry (VO) algorithm which leverages geometry-based methods and deep learning. Most existing VO/SLAM systems with superior performance are based on geometry and have to be carefully designed for different application scenarios. Moreover, most monocular systems suffer from scale-drift issue. Some recent deep learning works learn VO in an end-to-end manner but the performance of these deep systems is still not comparable to geometry-based methods. In this work, we revisit the basics of VO and explore the right way for integrating deep learning with epipolar geometry and Perspective-n-Point (PnP) method. Specifically, we train two convolutional neural networks (CNNs) for estimating single-view depths and two-view optical flows as intermediate outputs. With the deep predictions, we design a simple but robust frame-to-frame VO algorithm (DF-VO) which outperforms pure deep learning-based and geometry-based methods. More importantly, our system does not suffer from the scale-drift issue being aided by a scale consistent single-view depth CNN. Extensive experiments on KITTI dataset shows the robustness of our system and a detailed ablation study shows the effect of different factors in our system.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2019-09-21T10:00:21.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "visual odometry",
      "geometry-based methods",
      "scale consistent single-view depth cnn",
      "robust frame-to-frame vo algorithm",
      "scale-drift issue"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}