{
  "id": "2306.11541",
  "version": "v1",
  "published": "2023-06-20T13:53:05.000Z",
  "updated": "2023-06-20T13:53:05.000Z",
  "title": "Audio-Driven 3D Facial Animation from In-the-Wild Videos",
  "authors": [
    "Liying Lu",
    "Tianke Zhang",
    "Yunfei Liu",
    "Xuangeng Chu",
    "Yu Li"
  ],
  "categories": [
    "cs.CV"
  ],
  "abstract": "Given an arbitrary audio clip, audio-driven 3D facial animation aims to generate lifelike lip motions and facial expressions for a 3D head. Existing methods typically rely on training their models using limited public 3D datasets that contain a restricted number of audio-3D scan pairs. Consequently, their generalization capability remains limited. In this paper, we propose a novel method that leverages in-the-wild 2D talking-head videos to train our 3D facial animation model. The abundance of easily accessible 2D talking-head videos equips our model with a robust generalization capability. By combining these videos with existing 3D face reconstruction methods, our model excels in generating consistent and high-fidelity lip synchronization. Additionally, our model proficiently captures the speaking styles of different individuals, allowing it to generate 3D talking-heads with distinct personal styles. Extensive qualitative and quantitative experimental results demonstrate the superiority of our method.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2023-06-20T13:53:05.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "in-the-wild videos",
      "3d face reconstruction methods",
      "in-the-wild 2d talking-head videos",
      "2d talking-head videos equips",
      "audio-driven 3d facial animation aims"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}