{
  "id": "2305.18425",
  "version": "v1",
  "published": "2023-05-28T21:10:22.000Z",
  "updated": "2023-05-28T21:10:22.000Z",
  "title": "Efficient Storage of Fine-Tuned Models via Low-Rank Approximation of Weight Residuals",
  "authors": [
    "Simo Ryu",
    "Seunghyun Seo",
    "Jaejun Yoo"
  ],
  "comment": "16 pages, 8 figures",
  "categories": [
    "cs.LG",
    "cs.AI"
  ],
  "abstract": "In this paper, we present an efficient method for storing fine-tuned models by leveraging the low-rank properties of weight residuals. Our key observation is that weight residuals in large overparameterized models exhibit even stronger low-rank characteristics. Based on this insight, we propose Efficient Residual Encoding (ERE), a novel approach that achieves efficient storage of fine-tuned model weights by approximating the low-rank weight residuals. Furthermore, we analyze the robustness of weight residuals and push the limit of storage efficiency by utilizing additional quantization and layer-wise rank allocation. Our experimental results demonstrate that our method significantly reduces memory footprint while preserving performance in various tasks and modalities. We release our code.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2023-05-28T21:10:22.000Z"
    }
  ],
  "analyses": {
    "subjects": [
      "I.2.6"
    ],
    "keywords": [
      "fine-tuned model",
      "low-rank approximation",
      "method significantly reduces memory footprint",
      "low-rank weight residuals",
      "stronger low-rank characteristics"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 16,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}