{ "id": "1912.01098", "version": "v1", "published": "2019-12-02T22:12:16.000Z", "updated": "2019-12-02T22:12:16.000Z", "title": "Using Dimensionality Reduction to Optimize t-SNE", "authors": [ "Rikhav Shah", "Sandeep Silwal" ], "comment": "11th Annual Workshop on Optimization for Machine Learning (OPT2019 )", "categories": [ "cs.LG", "stat.ML" ], "abstract": "t-SNE is a popular tool for embedding multi-dimensional datasets into two or three dimensions. However, it has a large computational cost, especially when the input data has many dimensions. Many use t-SNE to embed the output of a neural network, which is generally of much lower dimension than the original data. This limits the use of t-SNE in unsupervised scenarios. We propose using \\textit{random} projections to embed high dimensional datasets into relatively few dimensions, and then using t-SNE to obtain a two dimensional embedding. We show that random projections preserve the desirable clustering achieved by t-SNE, while dramatically reducing the runtime of finding the embedding.", "revisions": [ { "version": "v1", "updated": "2019-12-02T22:12:16.000Z" } ], "analyses": { "keywords": [ "dimensionality reduction", "optimize t-sne", "large computational cost", "embed high dimensional datasets", "random projections preserve" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }