{
  "id": "2207.07027",
  "version": "v1",
  "published": "2022-07-14T15:59:03.000Z",
  "updated": "2022-07-14T15:59:03.000Z",
  "title": "MedFuse: Multi-modal fusion with clinical time-series data and chest X-ray images",
  "authors": [
    "Nasir Hayat",
    "Krzysztof J. Geras",
    "Farah E. Shamout"
  ],
  "categories": [
    "eess.IV",
    "cs.CV",
    "cs.LG"
  ],
  "abstract": "Multi-modal fusion approaches aim to integrate information from different data sources. Unlike natural datasets, such as in audio-visual applications, where samples consist of \"paired\" modalities, data in healthcare is often collected asynchronously. Hence, requiring the presence of all modalities for a given sample is not realistic for clinical tasks and significantly limits the size of the dataset during training. In this paper, we propose MedFuse, a conceptually simple yet promising LSTM-based fusion module that can accommodate uni-modal as well as multi-modal input. We evaluate the fusion method and introduce new benchmark results for in-hospital mortality prediction and phenotype classification, using clinical time-series data in the MIMIC-IV dataset and corresponding chest X-ray images in MIMIC-CXR. Compared to more complex multi-modal fusion strategies, MedFuse provides a performance improvement by a large margin on the fully paired test set. It also remains robust across the partially paired test set containing samples with missing chest X-ray images. We release our code for reproducibility and to enable the evaluation of competing models in the future.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2022-07-14T15:59:03.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "chest x-ray images",
      "clinical time-series data",
      "multi-modal fusion",
      "test set containing samples",
      "paired test set containing"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}