{
  "id": "2407.19353",
  "version": "v1",
  "published": "2024-07-28T00:07:20.000Z",
  "updated": "2024-07-28T00:07:20.000Z",
  "title": "A spring-block theory of feature learning in deep neural networks",
  "authors": [
    "Cheng Shi",
    "Liming Pan",
    "Ivan Dokmanić"
  ],
  "categories": [
    "cond-mat.dis-nn",
    "cond-mat.stat-mech",
    "cs.LG",
    "stat.ML"
  ],
  "abstract": "A central question in deep learning is how deep neural networks (DNNs) learn features. DNN layers progressively collapse data into a regular low-dimensional geometry. This collective effect of non-linearity, noise, learning rate, width, depth, and numerous other parameters, has eluded first-principles theories which are built from microscopic neuronal dynamics. Here we present a noise-non-linearity phase diagram that highlights where shallow or deep layers learn features more effectively. We then propose a macroscopic mechanical theory of feature learning that accurately reproduces this phase diagram, offering a clear intuition for why and how some DNNs are ``lazy'' and some are ``active'', and relating the distribution of feature learning over layers with test accuracy.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2024-07-28T00:07:20.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "deep neural networks",
      "feature learning",
      "spring-block theory",
      "dnn layers progressively collapse data",
      "phase diagram"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}