{
  "id": "1411.6447",
  "version": "v1",
  "published": "2014-11-24T13:30:07.000Z",
  "updated": "2014-11-24T13:30:07.000Z",
  "title": "The Application of Two-level Attention Models in Deep Convolutional Neural Network for Fine-grained Image Classification",
  "authors": [
    "Tianjun Xiao",
    "Yichong Xu",
    "Kuiyuan Yang",
    "Jiaxing Zhang",
    "Yuxin Peng",
    "Zheng Zhang"
  ],
  "categories": [
    "cs.CV"
  ],
  "abstract": "Fine-grained classification is challenging because categories can only be discriminated by subtle and local differences. Variances in the pose, scale or rotation usually make the problem more difficult. Most fine-grained classification systems follow the pipeline of finding foreground object or object parts (where) to extract discriminative features (what). In this paper, we propose to apply visual attention to fine-grained classification task using deep neural network. Our pipeline integrates three types of attention: the bottom-up attention that propose candidate patches, the object-level top-down attention that selects relevant patches to a certain object, and the part-level top-down attention that localizes discriminative parts. We combine these attentions to train domain-specific deep nets, then use it to improve both the what and where aspects. Importantly, we avoid using expensive annotations like bounding box or part information from end-to-end. The weak supervision constraint makes our work easier to generalize. We have verified the effectiveness of the method on the subsets of ILSVRC2012 dataset and CUB200_2011 dataset. Our pipeline delivered significant improvements and achieved the best accuracy under the weakest supervision condition. The performance is competitive against other methods that rely on additional annotations.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2014-11-24T13:30:07.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "deep convolutional neural network",
      "two-level attention models",
      "fine-grained image classification",
      "fine-grained classification",
      "application"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable",
      "adsabs": "2014arXiv1411.6447X"
    }
  }
}