{
  "id": "1802.09583",
  "version": "v1",
  "published": "2018-02-26T20:14:03.000Z",
  "updated": "2018-02-26T20:14:03.000Z",
  "title": "Data-dependent PAC-Bayes priors via differential privacy",
  "authors": [
    "Gintare Karolina Dziugaite",
    "Daniel M. Roy"
  ],
  "comment": "17 pages, 2 figures; subsumes and extends some results first reported in arXiv:1712.09376",
  "categories": [
    "cs.LG",
    "stat.ML"
  ],
  "abstract": "The Probably Approximately Correct (PAC) Bayes framework (McAllester, 1999) can incorporate knowledge about the learning algorithm and data distribution through the use of distribution-dependent priors, yielding tighter generalization bounds on data-dependent posteriors. Using this flexibility, however, is difficult, especially when the data distribution is presumed to be unknown. We show how an {\\epsilon}-differentially private data-dependent prior yields a valid PAC-Bayes bound, and then show how non-private mechanisms for choosing priors obtain the same generalization bound provided they converge weakly to the private mechanism. As an application of this result, we show that a Gaussian prior mean chosen via stochastic gradient Langevin dynamics (SGLD; Welling and Teh, 2011) leads to a valid PAC-Bayes bound, despite SGLD only converging weakly to an {\\epsilon}-differentially private mechanism. As the bounds are data-dependent, we study the bounds empirically on synthetic data and standard neural network benchmarks in order to illustrate the gains of data-dependent priors over existing distribution-dependent PAC-Bayes bound.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2018-02-26T20:14:03.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "data-dependent pac-bayes priors",
      "differential privacy",
      "valid pac-bayes bound",
      "generalization bound",
      "data distribution"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 17,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}