{ "id": "1406.0067", "version": "v2", "published": "2014-05-31T11:02:37.000Z", "updated": "2015-05-10T08:31:14.000Z", "title": "Optimization via Low-rank Approximation for Community Detection in Networks", "authors": [ "Can M. Le", "Elizaveta Levina", "Roman Vershynin" ], "comment": "45 pages, 7 figures; added discussions about computational complexity and extension to more than two communities", "categories": [ "stat.ML", "cs.SI", "math.ST", "physics.soc-ph", "stat.TH" ], "abstract": "Community detection is one of the fundamental problems of network analysis, for which a number of methods have been proposed. Most model-based or criteria-based methods have to solve an optimization problem over a discrete set of labels to find communities, which is computationally infeasible. Some fast spectral algorithms have been proposed for specific methods or models, but only on a case-by-case basis. Here we propose a general approach for maximizing a function of a network adjacency matrix over discrete labels by projecting the set of labels onto a subspace approximating the leading eigenvectors of the expected adjacency matrix. This projection onto a low-dimensional space makes the feasible set of labels much smaller and the optimization problem much easier. We prove a general result about this method and show how to apply it to several previously proposed community detection criteria, establishing its consistency for label estimation in each case and demonstrating the fundamental connection between spectral properties of the network and various model-based approaches to community detection. Simulations and applications to real-world data are included to demonstrate our method performs well for multiple problems over a wide range of parameters.", "revisions": [ { "version": "v1", "updated": "2014-05-31T11:02:37.000Z", "title": "Optimization via Low-rank Approximation, with Applications to Community Detection in Networks", "abstract": "Community detection is one of the fundamental problems of network analysis. A number of methods for community detection have been proposed, including spectral clustering, modularity, and likelihood-based methods. Most of these methods have to solve an optimization problem over a discrete set of labels, which is computationally infeasible. Some fast algorithms have been proposed for specific methods or models have been proposed, but only on a case by case basis. Here we propose a general approach for maximizing a function of a network adjacency matrix over discrete labels by projecting the set of labels onto a subspace spanned by leading eigenvectors of the adjacency matrix. The main idea is that projection onto a low-dimensional space makes the feasible set of labels much smaller and the optimization problem much easier. We prove a general result on this method and show how to apply it to several previously proposed community detection criteria, establishing its consistency for label estimation in each case. Simulations and applications to real-world data are included to demonstrate our method performs well for multiple problems over a wide range of parameters.", "comment": "41 pages, 7 figures", "journal": null, "doi": null }, { "version": "v2", "updated": "2015-05-10T08:31:14.000Z" } ], "analyses": { "subjects": [ "62E10", "62G05" ], "keywords": [ "low-rank approximation", "applications", "optimization problem", "network adjacency matrix", "community detection criteria" ], "note": { "typesetting": "TeX", "pages": 45, "language": "en", "license": "arXiv", "status": "editable", "adsabs": "2014arXiv1406.0067L" } } }