{
  "id": "2304.10539",
  "version": "v1",
  "published": "2023-04-20T20:05:08.000Z",
  "updated": "2023-04-20T20:05:08.000Z",
  "title": "Learning in Imperfect Environment: Multi-Label Classification with Long-Tailed Distribution and Partial Labels",
  "authors": [
    "Wenqiao Zhang",
    "Changshuo Liu",
    "Lingze Zeng",
    "Beng Chin Ooi",
    "Siliang Tang",
    "Yueting Zhuang"
  ],
  "categories": [
    "cs.LG",
    "cs.CV"
  ],
  "abstract": "Conventional multi-label classification (MLC) methods assume that all samples are fully labeled and identically distributed. Unfortunately, this assumption is unrealistic in large-scale MLC data that has long-tailed (LT) distribution and partial labels (PL). To address the problem, we introduce a novel task, Partial labeling and Long-Tailed Multi-Label Classification (PLT-MLC), to jointly consider the above two imperfect learning environments. Not surprisingly, we find that most LT-MLC and PL-MLC approaches fail to solve the PLT-MLC, resulting in significant performance degradation on the two proposed PLT-MLC benchmarks. Therefore, we propose an end-to-end learning framework: \\textbf{CO}rrection $\\rightarrow$ \\textbf{M}odificat\\textbf{I}on $\\rightarrow$ balan\\textbf{C}e, abbreviated as \\textbf{\\method{}}. Our bootstrapping philosophy is to simultaneously correct the missing labels (Correction) with convinced prediction confidence over a class-aware threshold and to learn from these recall labels during training. We next propose a novel multi-focal modifier loss that simultaneously addresses head-tail imbalance and positive-negative imbalance to adaptively modify the attention to different samples (Modification) under the LT class distribution. In addition, we develop a balanced training strategy by distilling the model's learning effect from head and tail samples, and thus design a balanced classifier (Balance) conditioned on the head and tail learning effect to maintain stable performance for all samples. Our experimental study shows that the proposed \\method{} significantly outperforms general MLC, LT-MLC and PL-MLC methods in terms of effectiveness and robustness on our newly created PLT-MLC datasets.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2023-04-20T20:05:08.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "multi-label classification",
      "partial labels",
      "imperfect environment",
      "long-tailed distribution",
      "novel multi-focal modifier loss"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}