{
  "id": "1912.00458",
  "version": "v1",
  "published": "2019-12-01T18:05:49.000Z",
  "updated": "2019-12-01T18:05:49.000Z",
  "title": "On the optimality of kernels for high-dimensional clustering",
  "authors": [
    "Leena Chennuru Vankadara",
    "Debarghya Ghoshdastidar"
  ],
  "categories": [
    "stat.ML",
    "cs.LG"
  ],
  "abstract": "This paper studies the optimality of kernel methods in high-dimensional data clustering. Recent works have studied the large sample performance of kernel clustering in the high-dimensional regime, where Euclidean distance becomes less informative. However, it is unknown whether popular methods, such as kernel k-means, are optimal in this regime. We consider the problem of high-dimensional Gaussian clustering and show that, with the exponential kernel function, the sufficient conditions for partial recovery of clusters using the NP-hard kernel k-means objective matches the known information-theoretic limit up to a factor of $\\sqrt{2}$ for large $k$. It also exactly matches the known upper bounds for the non-kernel setting. We also show that a semi-definite relaxation of the kernel k-means procedure matches up to constant factors, the spectral threshold, below which no polynomial-time algorithm is known to succeed. This is the first work that provides such optimality guarantees for the kernel k-means as well as its convex relaxation. Our proofs demonstrate the utility of the less known polynomial concentration results for random variables with exponentially decaying tails in a higher-order analysis of kernel methods.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2019-12-01T18:05:49.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "optimality",
      "high-dimensional clustering",
      "kernel k-means procedure matches",
      "kernel methods",
      "np-hard kernel k-means objective matches"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}