{ "id": "2405.08252", "version": "v1", "published": "2024-05-14T00:57:02.000Z", "updated": "2024-05-14T00:57:02.000Z", "title": "Smart Sampling: Self-Attention and Bootstrapping for Improved Ensembled Q-Learning", "authors": [ "Muhammad Junaid Khan", "Syed Hammad Ahmed", "Gita Sukthankar" ], "comment": "FLAIRS-37 (2024)", "categories": [ "cs.LG", "cs.AI" ], "abstract": "We present a novel method aimed at enhancing the sample efficiency of ensemble Q learning. Our proposed approach integrates multi-head self-attention into the ensembled Q networks while bootstrapping the state-action pairs ingested by the ensemble. This not only results in performance improvements over the original REDQ (Chen et al. 2021) and its variant DroQ (Hi-raoka et al. 2022), thereby enhancing Q predictions, but also effectively reduces both the average normalized bias and standard deviation of normalized bias within Q-function ensembles. Importantly, our method also performs well even in scenarios with a low update-to-data (UTD) ratio. Notably, the implementation of our proposed method is straightforward, requiring minimal modifications to the base model.", "revisions": [ { "version": "v1", "updated": "2024-05-14T00:57:02.000Z" } ], "analyses": { "keywords": [ "smart sampling", "ensembled q-learning", "approach integrates multi-head self-attention", "bootstrapping", "base model" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }