{
  "id": "1802.03934",
  "version": "v1",
  "published": "2018-02-12T08:44:39.000Z",
  "updated": "2018-02-12T08:44:39.000Z",
  "title": "Object Detection with Mask-based Feature Encoding",
  "authors": [
    "Xiaochuan Fan",
    "Hao Guo",
    "Kang Zheng",
    "Wei Feng",
    "Song Wang"
  ],
  "comment": "8 pages",
  "categories": [
    "cs.CV"
  ],
  "abstract": "Region-based Convolutional Neural Networks (R-CNNs) have achieved great success in the field of object detection. The existing R-CNNs usually divide a Region-of-Interest (ROI) into grids, and then localize objects by utilizing the spatial information reflected by the relative position of each grid in the ROI. In this paper, we propose a novel feature-encoding approach, where spatial information is represented through the spatial distributions of visual patterns. In particular, we design a Mask Weight Network (MWN) to learn a set of masks and then apply channel-wise masking operations to ROI feature map, followed by a global pooling and a cheap fully-connected layer. We integrate the newly designed feature encoder into the Faster R-CNN architecture. The resulting new Faster R-CNNs can preserve the object-detection accuracy of the standard Faster R-CNNs by using substantially fewer parameters. Compared to R-FCNs using state-of-art PS ROI pooling and deformable PS ROI pooling, the new Faster R-CNNs can produce higher object-detection accuracy with good run-time efficiency. We also show that a specifically designed and learned MWN can capture global contextual information and further improve the object-detection accuracy. Validation experiments are conducted on both PASCAL VOC and MS COCO datasets.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2018-02-12T08:44:39.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "object detection",
      "mask-based feature encoding",
      "capture global contextual information",
      "produce higher object-detection accuracy",
      "ps roi pooling"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 8,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}