{
  "id": "2008.05700",
  "version": "v1",
  "published": "2020-08-13T05:51:35.000Z",
  "updated": "2020-08-13T05:51:35.000Z",
  "title": "What leads to generalization of object proposals?",
  "authors": [
    "Rui Wang",
    "Dhruv Mahajan",
    "Vignesh Ramanathan"
  ],
  "categories": [
    "cs.CV"
  ],
  "abstract": "Object proposal generation is often the first step in many detection models. It is lucrative to train a good proposal model, that generalizes to unseen classes. This could help scaling detection models to larger number of classes with fewer annotations. Motivated by this, we study how a detection model trained on a small set of source classes can provide proposals that generalize to unseen classes. We systematically study the properties of the dataset - visual diversity and label space granularity - required for good generalization. We show the trade-off between using fine-grained labels and coarse labels. We introduce the idea of prototypical classes: a set of sufficient and necessary classes required to train a detection model to obtain generalized proposals in a more data-efficient way. On the Open Images V4 dataset, we show that only 25% of the classes can be selected to form such a prototypical set. The resulting proposals from a model trained with these classes is only 4.3% worse than using all the classes, in terms of average recall (AR). We also demonstrate that Faster R-CNN model leads to better generalization of proposals compared to a single-stage network like RetinaNet.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2020-08-13T05:51:35.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "generalization",
      "open images v4 dataset",
      "unseen classes",
      "object proposal generation",
      "faster r-cnn model"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}