{
  "id": "2405.08252",
  "version": "v1",
  "published": "2024-05-14T00:57:02.000Z",
  "updated": "2024-05-14T00:57:02.000Z",
  "title": "Smart Sampling: Self-Attention and Bootstrapping for Improved Ensembled Q-Learning",
  "authors": [
    "Muhammad Junaid Khan",
    "Syed Hammad Ahmed",
    "Gita Sukthankar"
  ],
  "comment": "FLAIRS-37 (2024)",
  "categories": [
    "cs.LG",
    "cs.AI"
  ],
  "abstract": "We present a novel method aimed at enhancing the sample efficiency of ensemble Q learning. Our proposed approach integrates multi-head self-attention into the ensembled Q networks while bootstrapping the state-action pairs ingested by the ensemble. This not only results in performance improvements over the original REDQ (Chen et al. 2021) and its variant DroQ (Hi-raoka et al. 2022), thereby enhancing Q predictions, but also effectively reduces both the average normalized bias and standard deviation of normalized bias within Q-function ensembles. Importantly, our method also performs well even in scenarios with a low update-to-data (UTD) ratio. Notably, the implementation of our proposed method is straightforward, requiring minimal modifications to the base model.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2024-05-14T00:57:02.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "smart sampling",
      "ensembled q-learning",
      "approach integrates multi-head self-attention",
      "bootstrapping",
      "base model"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}