{ "id": "2107.00469", "version": "v1", "published": "2021-06-29T16:07:50.000Z", "updated": "2021-06-29T16:07:50.000Z", "title": "Never Go Full Batch (in Stochastic Convex Optimization)", "authors": [ "Idan Amir", "Yair Carmon", "Tomer Koren", "Roi Livni" ], "categories": [ "math.OC", "cs.LG" ], "abstract": "We study the generalization performance of $\\text{full-batch}$ optimization algorithms for stochastic convex optimization: these are first-order methods that only access the exact gradient of the empirical risk (rather than gradients with respect to individual data points), that include a wide range of algorithms such as gradient descent, mirror descent, and their regularized and/or accelerated variants. We provide a new separation result showing that, while algorithms such as stochastic gradient descent can generalize and optimize the population risk to within $\\epsilon$ after $O(1/\\epsilon^2)$ iterations, full-batch methods either need at least $\\Omega(1/\\epsilon^4)$ iterations or exhibit a dimension-dependent sample complexity.", "revisions": [ { "version": "v1", "updated": "2021-06-29T16:07:50.000Z" } ], "analyses": { "keywords": [ "stochastic convex optimization", "full batch", "individual data points", "stochastic gradient descent", "dimension-dependent sample complexity" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }