{ "id": "1701.07953", "version": "v1", "published": "2017-01-27T06:17:14.000Z", "updated": "2017-01-27T06:17:14.000Z", "title": "The Price of Differential Privacy For Online Learning", "authors": [ "Naman Agarwal", "Karan Singh" ], "categories": [ "cs.LG", "stat.ML" ], "abstract": "We design differentially private algorithms for the problem of online linear optimization in the full information and bandit settings with optimal $\\tilde{O}(\\sqrt{T})$ regret bounds. In the full-information setting, our results demonstrate that $(\\epsilon, \\delta)$-differential privacy may be ensured for free - in particular, the regret bounds scale as $O(\\sqrt{T})+\\tilde{O}\\big(\\frac{1}{\\epsilon}\\log \\frac{1}{\\delta}\\big)$. For bandit linear optimization, and as a special case, for non-stochastic multi-armed bandits, the proposed algorithm achieves a regret of $O\\Big(\\frac{\\sqrt{T\\log T}}{\\epsilon}\\log \\frac{1}{\\delta}\\Big)$, while the previously best known bound was $\\tilde{O}\\Big(\\frac{T^{\\frac{3}{4}}}{\\epsilon}\\Big)$.", "revisions": [ { "version": "v1", "updated": "2017-01-27T06:17:14.000Z" } ], "analyses": { "keywords": [ "differential privacy", "online learning", "design differentially private algorithms", "bandit linear optimization", "regret bounds scale" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }