{ "id": "2305.18425", "version": "v1", "published": "2023-05-28T21:10:22.000Z", "updated": "2023-05-28T21:10:22.000Z", "title": "Efficient Storage of Fine-Tuned Models via Low-Rank Approximation of Weight Residuals", "authors": [ "Simo Ryu", "Seunghyun Seo", "Jaejun Yoo" ], "comment": "16 pages, 8 figures", "categories": [ "cs.LG", "cs.AI" ], "abstract": "In this paper, we present an efficient method for storing fine-tuned models by leveraging the low-rank properties of weight residuals. Our key observation is that weight residuals in large overparameterized models exhibit even stronger low-rank characteristics. Based on this insight, we propose Efficient Residual Encoding (ERE), a novel approach that achieves efficient storage of fine-tuned model weights by approximating the low-rank weight residuals. Furthermore, we analyze the robustness of weight residuals and push the limit of storage efficiency by utilizing additional quantization and layer-wise rank allocation. Our experimental results demonstrate that our method significantly reduces memory footprint while preserving performance in various tasks and modalities. We release our code.", "revisions": [ { "version": "v1", "updated": "2023-05-28T21:10:22.000Z" } ], "analyses": { "subjects": [ "I.2.6" ], "keywords": [ "fine-tuned model", "low-rank approximation", "method significantly reduces memory footprint", "low-rank weight residuals", "stronger low-rank characteristics" ], "note": { "typesetting": "TeX", "pages": 16, "language": "en", "license": "arXiv", "status": "editable" } } }