{
  "id": "2207.14227",
  "version": "v1",
  "published": "2022-07-28T16:55:11.000Z",
  "updated": "2022-07-28T16:55:11.000Z",
  "title": "Visual Recognition by Request",
  "authors": [
    "Chufeng Tang",
    "Lingxi Xie",
    "Xiaopeng Zhang",
    "Xiaolin Hu",
    "Qi Tian"
  ],
  "categories": [
    "cs.CV"
  ],
  "abstract": "In this paper, we present a novel protocol of annotation and evaluation for visual recognition. Different from traditional settings, the protocol does not require the labeler/algorithm to annotate/recognize all targets (objects, parts, etc.) at once, but instead raises a number of recognition instructions and the algorithm recognizes targets by request. This mechanism brings two beneficial properties to reduce the burden of annotation, namely, (i) variable granularity: different scenarios can have different levels of annotation, in particular, object parts can be labeled only in large and clear instances, (ii) being open-domain: new concepts can be added to the database in minimal costs. To deal with the proposed setting, we maintain a knowledge base and design a query-based visual recognition framework that constructs queries on-the-fly based on the requests. We evaluate the recognition system on two mixed-annotated datasets, CPP and ADE20K, and demonstrate its promising ability of learning from partially labeled data as well as adapting to new concepts with only text labels.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2022-07-28T16:55:11.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "annotation",
      "algorithm recognizes targets",
      "query-based visual recognition framework",
      "recognition instructions",
      "novel protocol"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}