{ "id": "1802.09583", "version": "v1", "published": "2018-02-26T20:14:03.000Z", "updated": "2018-02-26T20:14:03.000Z", "title": "Data-dependent PAC-Bayes priors via differential privacy", "authors": [ "Gintare Karolina Dziugaite", "Daniel M. Roy" ], "comment": "17 pages, 2 figures; subsumes and extends some results first reported in arXiv:1712.09376", "categories": [ "cs.LG", "stat.ML" ], "abstract": "The Probably Approximately Correct (PAC) Bayes framework (McAllester, 1999) can incorporate knowledge about the learning algorithm and data distribution through the use of distribution-dependent priors, yielding tighter generalization bounds on data-dependent posteriors. Using this flexibility, however, is difficult, especially when the data distribution is presumed to be unknown. We show how an {\\epsilon}-differentially private data-dependent prior yields a valid PAC-Bayes bound, and then show how non-private mechanisms for choosing priors obtain the same generalization bound provided they converge weakly to the private mechanism. As an application of this result, we show that a Gaussian prior mean chosen via stochastic gradient Langevin dynamics (SGLD; Welling and Teh, 2011) leads to a valid PAC-Bayes bound, despite SGLD only converging weakly to an {\\epsilon}-differentially private mechanism. As the bounds are data-dependent, we study the bounds empirically on synthetic data and standard neural network benchmarks in order to illustrate the gains of data-dependent priors over existing distribution-dependent PAC-Bayes bound.", "revisions": [ { "version": "v1", "updated": "2018-02-26T20:14:03.000Z" } ], "analyses": { "keywords": [ "data-dependent pac-bayes priors", "differential privacy", "valid pac-bayes bound", "generalization bound", "data distribution" ], "note": { "typesetting": "TeX", "pages": 17, "language": "en", "license": "arXiv", "status": "editable" } } }