{ "id": "2306.11541", "version": "v1", "published": "2023-06-20T13:53:05.000Z", "updated": "2023-06-20T13:53:05.000Z", "title": "Audio-Driven 3D Facial Animation from In-the-Wild Videos", "authors": [ "Liying Lu", "Tianke Zhang", "Yunfei Liu", "Xuangeng Chu", "Yu Li" ], "categories": [ "cs.CV" ], "abstract": "Given an arbitrary audio clip, audio-driven 3D facial animation aims to generate lifelike lip motions and facial expressions for a 3D head. Existing methods typically rely on training their models using limited public 3D datasets that contain a restricted number of audio-3D scan pairs. Consequently, their generalization capability remains limited. In this paper, we propose a novel method that leverages in-the-wild 2D talking-head videos to train our 3D facial animation model. The abundance of easily accessible 2D talking-head videos equips our model with a robust generalization capability. By combining these videos with existing 3D face reconstruction methods, our model excels in generating consistent and high-fidelity lip synchronization. Additionally, our model proficiently captures the speaking styles of different individuals, allowing it to generate 3D talking-heads with distinct personal styles. Extensive qualitative and quantitative experimental results demonstrate the superiority of our method.", "revisions": [ { "version": "v1", "updated": "2023-06-20T13:53:05.000Z" } ], "analyses": { "keywords": [ "in-the-wild videos", "3d face reconstruction methods", "in-the-wild 2d talking-head videos", "2d talking-head videos equips", "audio-driven 3d facial animation aims" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }