{
  "id": "2210.07931",
  "version": "v1",
  "published": "2022-10-14T16:30:23.000Z",
  "updated": "2022-10-14T16:30:23.000Z",
  "title": "Sequential Learning Of Neural Networks for Prequential MDL",
  "authors": [
    "Jorg Bornschein",
    "Yazhe Li",
    "Marcus Hutter"
  ],
  "categories": [
    "stat.ML",
    "cs.LG"
  ],
  "abstract": "Minimum Description Length (MDL) provides a framework and an objective for principled model evaluation. It formalizes Occam's Razor and can be applied to data from non-stationary sources. In the prequential formulation of MDL, the objective is to minimize the cumulative next-step log-loss when sequentially going through the data and using previous observations for parameter estimation. It thus closely resembles a continual- or online-learning problem. In this study, we evaluate approaches for computing prequential description lengths for image classification datasets with neural networks. Considering the computational cost, we find that online-learning with rehearsal has favorable performance compared to the previously widely used block-wise estimation. We propose forward-calibration to better align the models predictions with the empirical observations and introduce replay-streams, a minibatch incremental training technique to efficiently implement approximate random replay while avoiding large in-memory replay buffers. As a result, we present description lengths for a suite of image classification datasets that improve upon previously reported results by large margins.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2022-10-14T16:30:23.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "neural networks",
      "prequential mdl",
      "implement approximate random replay",
      "description length",
      "image classification datasets"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}