{
  "id": "2311.01329",
  "version": "v1",
  "published": "2023-11-02T15:41:09.000Z",
  "updated": "2023-11-02T15:41:09.000Z",
  "title": "A Simple Solution for Offline Imitation from Observations and Examples with Possibly Incomplete Trajectories",
  "authors": [
    "Kai Yan",
    "Alexander G. Schwing",
    "Yu-Xiong Wang"
  ],
  "comment": "35 pages; Accepted as a poster for NeurIPS2023",
  "categories": [
    "cs.LG",
    "cs.AI"
  ],
  "abstract": "Offline imitation from observations aims to solve MDPs where only task-specific expert states and task-agnostic non-expert state-action pairs are available. Offline imitation is useful in real-world scenarios where arbitrary interactions are costly and expert actions are unavailable. The state-of-the-art \"DIstribution Correction Estimation\" (DICE) methods minimize divergence of state occupancy between expert and learner policies and retrieve a policy with weighted behavior cloning; however, their results are unstable when learning from incomplete trajectories, due to a non-robust optimization in the dual domain. To address the issue, in this paper, we propose Trajectory-Aware Imitation Learning from Observations (TAILO). TAILO uses a discounted sum along the future trajectory as the weight for weighted behavior cloning. The terms for the sum are scaled by the output of a discriminator, which aims to identify expert states. Despite simplicity, TAILO works well if there exist trajectories or segments of expert behavior in the task-agnostic data, a common assumption in prior work. In experiments across multiple testbeds, we find TAILO to be more robust and effective, particularly with incomplete trajectories.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2023-11-02T15:41:09.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "trajectory",
      "offline imitation",
      "possibly incomplete trajectories",
      "simple solution",
      "observations"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 35,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}