{
  "id": "2403.08947",
  "version": "v1",
  "published": "2024-03-13T20:26:50.000Z",
  "updated": "2024-03-13T20:26:50.000Z",
  "title": "Robust COVID-19 Detection in CT Images with CLIP",
  "authors": [
    "Li Lin",
    "Yamini Sri Krubha",
    "Zhenhuan Yang",
    "Cheng Ren",
    "Xin Wang",
    "Shu Hu"
  ],
  "categories": [
    "eess.IV",
    "cs.CV"
  ],
  "abstract": "In the realm of medical imaging, particularly for COVID-19 detection, deep learning models face substantial challenges such as the necessity for extensive computational resources, the paucity of well-annotated datasets, and a significant amount of unlabeled data. In this work, we introduce the first lightweight detector designed to overcome these obstacles, leveraging a frozen CLIP image encoder and a trainable multilayer perception (MLP). Enhanced with Conditional Value at Risk (CVaR) for robustness and a loss landscape flattening strategy for improved generalization, our model is tailored for high efficacy in COVID-19 detection. Furthermore, we integrate a teacher-student framework to capitalize on the vast amounts of unlabeled data, enabling our model to achieve superior performance despite the inherent data limitations. Experimental results on the COV19-CT-DB dataset demonstrate the effectiveness of our approach, surpassing baseline by up to 10.6% in `macro' F1 score in supervised learning. The code is available at https://github.com/Purdue-M2/COVID-19_Detection_M2_PURDUE.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2024-03-13T20:26:50.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "ct images",
      "learning models face substantial challenges",
      "deep learning models face substantial",
      "achieve superior performance despite",
      "frozen clip image encoder"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}