{ "id": "2210.07931", "version": "v1", "published": "2022-10-14T16:30:23.000Z", "updated": "2022-10-14T16:30:23.000Z", "title": "Sequential Learning Of Neural Networks for Prequential MDL", "authors": [ "Jorg Bornschein", "Yazhe Li", "Marcus Hutter" ], "categories": [ "stat.ML", "cs.LG" ], "abstract": "Minimum Description Length (MDL) provides a framework and an objective for principled model evaluation. It formalizes Occam's Razor and can be applied to data from non-stationary sources. In the prequential formulation of MDL, the objective is to minimize the cumulative next-step log-loss when sequentially going through the data and using previous observations for parameter estimation. It thus closely resembles a continual- or online-learning problem. In this study, we evaluate approaches for computing prequential description lengths for image classification datasets with neural networks. Considering the computational cost, we find that online-learning with rehearsal has favorable performance compared to the previously widely used block-wise estimation. We propose forward-calibration to better align the models predictions with the empirical observations and introduce replay-streams, a minibatch incremental training technique to efficiently implement approximate random replay while avoiding large in-memory replay buffers. As a result, we present description lengths for a suite of image classification datasets that improve upon previously reported results by large margins.", "revisions": [ { "version": "v1", "updated": "2022-10-14T16:30:23.000Z" } ], "analyses": { "keywords": [ "neural networks", "prequential mdl", "implement approximate random replay", "description length", "image classification datasets" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }