{ "id": "2207.01115", "version": "v1", "published": "2022-07-03T20:25:06.000Z", "updated": "2022-07-03T20:25:06.000Z", "title": "USHER: Unbiased Sampling for Hindsight Experience Replay", "authors": [ "Liam Schramm", "Yunfu Deng", "Edgar Granados", "Abdeslam Boularias" ], "categories": [ "cs.LG", "cs.AI", "cs.RO" ], "abstract": "Dealing with sparse rewards is a long-standing challenge in reinforcement learning (RL). Hindsight Experience Replay (HER) addresses this problem by reusing failed trajectories for one goal as successful trajectories for another. This allows for both a minimum density of reward and for generalization across multiple goals. However, this strategy is known to result in a biased value function, as the update rule underestimates the likelihood of bad outcomes in a stochastic environment. We propose an asymptotically unbiased importance-sampling-based algorithm to address this problem without sacrificing performance on deterministic environments. We show its effectiveness on a range of robotic systems, including challenging high dimensional stochastic environments.", "revisions": [ { "version": "v1", "updated": "2022-07-03T20:25:06.000Z" } ], "analyses": { "keywords": [ "hindsight experience replay", "unbiased sampling", "challenging high dimensional stochastic environments", "update rule underestimates", "multiple goals" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }