{
  "id": "1701.07953",
  "version": "v1",
  "published": "2017-01-27T06:17:14.000Z",
  "updated": "2017-01-27T06:17:14.000Z",
  "title": "The Price of Differential Privacy For Online Learning",
  "authors": [
    "Naman Agarwal",
    "Karan Singh"
  ],
  "categories": [
    "cs.LG",
    "stat.ML"
  ],
  "abstract": "We design differentially private algorithms for the problem of online linear optimization in the full information and bandit settings with optimal $\\tilde{O}(\\sqrt{T})$ regret bounds. In the full-information setting, our results demonstrate that $(\\epsilon, \\delta)$-differential privacy may be ensured for free - in particular, the regret bounds scale as $O(\\sqrt{T})+\\tilde{O}\\big(\\frac{1}{\\epsilon}\\log \\frac{1}{\\delta}\\big)$. For bandit linear optimization, and as a special case, for non-stochastic multi-armed bandits, the proposed algorithm achieves a regret of $O\\Big(\\frac{\\sqrt{T\\log T}}{\\epsilon}\\log \\frac{1}{\\delta}\\Big)$, while the previously best known bound was $\\tilde{O}\\Big(\\frac{T^{\\frac{3}{4}}}{\\epsilon}\\Big)$.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2017-01-27T06:17:14.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "differential privacy",
      "online learning",
      "design differentially private algorithms",
      "bandit linear optimization",
      "regret bounds scale"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}