{
  "id": "2303.07269",
  "version": "v1",
  "published": "2023-03-13T16:45:41.000Z",
  "updated": "2023-03-13T16:45:41.000Z",
  "title": "InPL: Pseudo-labeling the Inliers First for Imbalanced Semi-supervised Learning",
  "authors": [
    "Zhuoran Yu",
    "Yin Li",
    "Yong Jae Lee"
  ],
  "comment": "Accepted by ICLR 2023",
  "categories": [
    "cs.CV",
    "cs.LG"
  ],
  "abstract": "Recent state-of-the-art methods in imbalanced semi-supervised learning (SSL) rely on confidence-based pseudo-labeling with consistency regularization. To obtain high-quality pseudo-labels, a high confidence threshold is typically adopted. However, it has been shown that softmax-based confidence scores in deep networks can be arbitrarily high for samples far from the training data, and thus, the pseudo-labels for even high-confidence unlabeled samples may still be unreliable. In this work, we present a new perspective of pseudo-labeling for imbalanced SSL. Without relying on model confidence, we propose to measure whether an unlabeled sample is likely to be ``in-distribution''; i.e., close to the current training data. To decide whether an unlabeled sample is ``in-distribution'' or ``out-of-distribution'', we adopt the energy score from out-of-distribution detection literature. As training progresses and more unlabeled samples become in-distribution and contribute to training, the combined labeled and pseudo-labeled data can better approximate the true class distribution to improve the model. Experiments demonstrate that our energy-based pseudo-labeling method, \\textbf{InPL}, albeit conceptually simple, significantly outperforms confidence-based methods on imbalanced SSL benchmarks. For example, it produces around 3\\% absolute accuracy improvement on CIFAR10-LT. When combined with state-of-the-art long-tailed SSL methods, further improvements are attained. In particular, in one of the most challenging scenarios, InPL achieves a 6.9\\% accuracy improvement over the best competitor.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2023-03-13T16:45:41.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "imbalanced semi-supervised learning",
      "inliers first",
      "unlabeled sample",
      "pseudo-labeling",
      "true class distribution"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}