{ "id": "2407.19353", "version": "v1", "published": "2024-07-28T00:07:20.000Z", "updated": "2024-07-28T00:07:20.000Z", "title": "A spring-block theory of feature learning in deep neural networks", "authors": [ "Cheng Shi", "Liming Pan", "Ivan Dokmanić" ], "categories": [ "cond-mat.dis-nn", "cond-mat.stat-mech", "cs.LG", "stat.ML" ], "abstract": "A central question in deep learning is how deep neural networks (DNNs) learn features. DNN layers progressively collapse data into a regular low-dimensional geometry. This collective effect of non-linearity, noise, learning rate, width, depth, and numerous other parameters, has eluded first-principles theories which are built from microscopic neuronal dynamics. Here we present a noise-non-linearity phase diagram that highlights where shallow or deep layers learn features more effectively. We then propose a macroscopic mechanical theory of feature learning that accurately reproduces this phase diagram, offering a clear intuition for why and how some DNNs are ``lazy'' and some are ``active'', and relating the distribution of feature learning over layers with test accuracy.", "revisions": [ { "version": "v1", "updated": "2024-07-28T00:07:20.000Z" } ], "analyses": { "keywords": [ "deep neural networks", "feature learning", "spring-block theory", "dnn layers progressively collapse data", "phase diagram" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }