{ "id": "2301.09633", "version": "v1", "published": "2023-01-23T18:59:28.000Z", "updated": "2023-01-23T18:59:28.000Z", "title": "Prediction-Powered Inference", "authors": [ "Anastasios N. Angelopoulos", "Stephen Bates", "Clara Fannjiang", "Michael I. Jordan", "Tijana Zrnic" ], "comment": "Code is available at https://github.com/aangelopoulos/prediction-powered-inference", "categories": [ "stat.ML", "cs.AI", "cs.LG", "q-bio.QM", "stat.ME" ], "abstract": "We introduce prediction-powered inference $\\unicode{x2013}$ a framework for performing valid statistical inference when an experimental data set is supplemented with predictions from a machine-learning system such as AlphaFold. Our framework yields provably valid conclusions without making any assumptions on the machine-learning algorithm that supplies the predictions. Higher accuracy of the predictions translates to smaller confidence intervals, permitting more powerful inference. Prediction-powered inference yields simple algorithms for computing valid confidence intervals for statistical objects such as means, quantiles, and linear and logistic regression coefficients. We demonstrate the benefits of prediction-powered inference with data sets from proteomics, genomics, electronic voting, remote sensing, census analysis, and ecology.", "revisions": [ { "version": "v1", "updated": "2023-01-23T18:59:28.000Z" } ], "analyses": { "keywords": [ "data set", "framework yields provably valid conclusions", "prediction-powered inference yields simple algorithms", "logistic regression coefficients", "computing valid confidence intervals" ], "tags": [ "github project" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }