{
  "id": "2107.00469",
  "version": "v1",
  "published": "2021-06-29T16:07:50.000Z",
  "updated": "2021-06-29T16:07:50.000Z",
  "title": "Never Go Full Batch (in Stochastic Convex Optimization)",
  "authors": [
    "Idan Amir",
    "Yair Carmon",
    "Tomer Koren",
    "Roi Livni"
  ],
  "categories": [
    "math.OC",
    "cs.LG"
  ],
  "abstract": "We study the generalization performance of $\\text{full-batch}$ optimization algorithms for stochastic convex optimization: these are first-order methods that only access the exact gradient of the empirical risk (rather than gradients with respect to individual data points), that include a wide range of algorithms such as gradient descent, mirror descent, and their regularized and/or accelerated variants. We provide a new separation result showing that, while algorithms such as stochastic gradient descent can generalize and optimize the population risk to within $\\epsilon$ after $O(1/\\epsilon^2)$ iterations, full-batch methods either need at least $\\Omega(1/\\epsilon^4)$ iterations or exhibit a dimension-dependent sample complexity.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2021-06-29T16:07:50.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "stochastic convex optimization",
      "full batch",
      "individual data points",
      "stochastic gradient descent",
      "dimension-dependent sample complexity"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}