{ "id": "1610.02918", "version": "v1", "published": "2016-10-10T13:57:00.000Z", "updated": "2016-10-10T13:57:00.000Z", "title": "Phase transitions and optimal algorithms in high-dimensional Gaussian mixture clustering", "authors": [ "Thibault Lesieur", "Caterina De Bacco", "Jess Banks", "Florent Krzakala", "Cris Moore", "Lenka Zdeborová" ], "comment": "8 pages, 3 figures, conference", "categories": [ "stat.ML", "cond-mat.dis-nn", "cs.IT", "math.IT" ], "abstract": "We consider the problem of Gaussian mixture clustering in the high-dimensional limit where the data consists of $m$ points in $n$ dimensions, $n,m \\rightarrow \\infty$ and $\\alpha = m/n$ stays finite. Using exact but non-rigorous methods from statistical physics, we determine the critical value of $\\alpha$ and the distance between the clusters at which it becomes information-theoretically possible to reconstruct the membership into clusters better than chance. We also determine the accuracy achievable by the Bayes-optimal estimation algorithm. In particular, we find that when the number of clusters is sufficiently large, $r > 4 + 2 \\sqrt{\\alpha}$, there is a gap between the threshold for information-theoretically optimal performance and the threshold at which known algorithms succeed.", "revisions": [ { "version": "v1", "updated": "2016-10-10T13:57:00.000Z" } ], "analyses": { "keywords": [ "high-dimensional gaussian mixture clustering", "phase transitions", "optimal algorithms", "bayes-optimal estimation algorithm", "data consists" ], "tags": [ "conference paper" ], "note": { "typesetting": "TeX", "pages": 8, "language": "en", "license": "arXiv", "status": "editable" } } }