{
  "id": "2211.04878",
  "version": "v1",
  "published": "2022-11-09T13:34:45.000Z",
  "updated": "2022-11-09T13:34:45.000Z",
  "title": "Foundation Models for Semantic Novelty in Reinforcement Learning",
  "authors": [
    "Tarun Gupta",
    "Peter Karkus",
    "Tong Che",
    "Danfei Xu",
    "Marco Pavone"
  ],
  "comment": "Foundation Models for Decision Making Workshop at Neural Information Processing Systems, 2022",
  "categories": [
    "cs.LG",
    "cs.AI"
  ],
  "abstract": "Effectively exploring the environment is a key challenge in reinforcement learning (RL). We address this challenge by defining a novel intrinsic reward based on a foundation model, such as contrastive language image pretraining (CLIP), which can encode a wealth of domain-independent semantic visual-language knowledge about the world. Specifically, our intrinsic reward is defined based on pre-trained CLIP embeddings without any fine-tuning or learning on the target RL task. We demonstrate that CLIP-based intrinsic rewards can drive exploration towards semantically meaningful states and outperform state-of-the-art methods in challenging sparse-reward procedurally-generated environments.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2022-11-09T13:34:45.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "foundation model",
      "semantic novelty",
      "reinforcement learning",
      "domain-independent semantic visual-language knowledge",
      "intrinsic reward"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}