{ "id": "2211.04878", "version": "v1", "published": "2022-11-09T13:34:45.000Z", "updated": "2022-11-09T13:34:45.000Z", "title": "Foundation Models for Semantic Novelty in Reinforcement Learning", "authors": [ "Tarun Gupta", "Peter Karkus", "Tong Che", "Danfei Xu", "Marco Pavone" ], "comment": "Foundation Models for Decision Making Workshop at Neural Information Processing Systems, 2022", "categories": [ "cs.LG", "cs.AI" ], "abstract": "Effectively exploring the environment is a key challenge in reinforcement learning (RL). We address this challenge by defining a novel intrinsic reward based on a foundation model, such as contrastive language image pretraining (CLIP), which can encode a wealth of domain-independent semantic visual-language knowledge about the world. Specifically, our intrinsic reward is defined based on pre-trained CLIP embeddings without any fine-tuning or learning on the target RL task. We demonstrate that CLIP-based intrinsic rewards can drive exploration towards semantically meaningful states and outperform state-of-the-art methods in challenging sparse-reward procedurally-generated environments.", "revisions": [ { "version": "v1", "updated": "2022-11-09T13:34:45.000Z" } ], "analyses": { "keywords": [ "foundation model", "semantic novelty", "reinforcement learning", "domain-independent semantic visual-language knowledge", "intrinsic reward" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }