{
  "id": "2408.08086",
  "version": "v1",
  "published": "2024-08-15T11:27:18.000Z",
  "updated": "2024-08-15T11:27:18.000Z",
  "title": "Single-image coherent reconstruction of objects and humans",
  "authors": [
    "Sarthak Batra",
    "Partha P. Chakrabarti",
    "Simon Hadfield",
    "Armin Mustafa"
  ],
  "comment": "Accepted at AI for 3D Generation, CVPR Workshop",
  "categories": [
    "cs.CV"
  ],
  "abstract": "Existing methods for reconstructing objects and humans from a monocular image suffer from severe mesh collisions and performance limitations for interacting occluding objects. This paper introduces a method to obtain a globally consistent 3D reconstruction of interacting objects and people from a single image. Our contributions include: 1) an optimization framework, featuring a collision loss, tailored to handle human-object and human-human interactions, ensuring spatially coherent scene reconstruction; and 2) a novel technique to robustly estimate 6 degrees of freedom (DOF) poses, specifically for heavily occluded objects, exploiting image inpainting. Notably, our proposed method operates effectively on images from real-world scenarios, without necessitating scene or object-level 3D supervision. Extensive qualitative and quantitative evaluation against existing methods demonstrates a significant reduction in collisions in the final reconstructions of scenes with multiple interacting humans and objects and a more coherent scene reconstruction.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2024-08-15T11:27:18.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "single-image coherent reconstruction",
      "existing methods",
      "object-level 3d supervision",
      "globally consistent 3d reconstruction",
      "severe mesh collisions"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}