{ "id": "1806.02920", "version": "v1", "published": "2018-06-07T22:57:16.000Z", "updated": "2018-06-07T22:57:16.000Z", "title": "GAIN: Missing Data Imputation using Generative Adversarial Nets", "authors": [ "Jinsung Yoon", "James Jordon", "Mihaela van der Schaar" ], "comment": "10 pages, 3 figures, 2018 International Conference of Machine Learning", "categories": [ "cs.LG", "stat.ML" ], "abstract": "We propose a novel method for imputing missing data by adapting the well-known Generative Adversarial Nets (GAN) framework. Accordingly, we call our method Generative Adversarial Imputation Nets (GAIN). The generator (G) observes some components of a real data vector, imputes the missing components conditioned on what is actually observed, and outputs a completed vector. The discriminator (D) then takes a completed vector and attempts to determine which components were actually observed and which were imputed. To ensure that D forces G to learn the desired distribution, we provide D with some additional information in the form of a hint vector. The hint reveals to D partial information about the missingness of the original sample, which is used by D to focus its attention on the imputation quality of particular components. This hint ensures that G does in fact learn to generate according to the true data distribution. We tested our method on various datasets and found that GAIN significantly outperforms state-of-the-art imputation methods.", "revisions": [ { "version": "v1", "updated": "2018-06-07T22:57:16.000Z" } ], "analyses": { "keywords": [ "generative adversarial nets", "missing data imputation", "outperforms state-of-the-art imputation methods", "significantly outperforms state-of-the-art imputation", "method generative adversarial imputation nets" ], "tags": [ "conference paper" ], "note": { "typesetting": "TeX", "pages": 10, "language": "en", "license": "arXiv", "status": "editable" } } }