{
  "id": "2308.07505",
  "version": "v1",
  "published": "2023-08-15T00:08:43.000Z",
  "updated": "2023-08-15T00:08:43.000Z",
  "title": "Data Race Detection Using Large Language Models",
  "authors": [
    "Le Chen",
    "Xianzhong Ding",
    "Murali Emani",
    "Tristan Vanderbruggen",
    "Pei-hung Lin",
    "Chuanhua Liao"
  ],
  "categories": [
    "cs.LG",
    "cs.CL"
  ],
  "abstract": "Large language models (LLMs) are demonstrating significant promise as an alternate strategy to facilitate analyses and optimizations of high-performance computing programs, circumventing the need for resource-intensive manual tool creation. In this paper, we explore a novel LLM-based data race detection approach combining prompting engineering and fine-tuning techniques. We create a dedicated dataset named DRB-ML, which is derived from DataRaceBench, with fine-grain labels showing the presence of data race pairs and their associated variables, line numbers, and read/write information. DRB-ML is then used to evaluate representative LLMs and fine-tune open-source ones. Our experiment shows that LLMs can be a viable approach to data race detection. However, they still cannot compete with traditional data race detection tools when we need detailed information about variable pairs causing data races.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2023-08-15T00:08:43.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "large language models",
      "data race detection tools",
      "data race detection approach",
      "pairs causing data races",
      "llm-based data race detection"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}