{ "id": "1806.01547", "version": "v1", "published": "2018-06-05T08:23:42.000Z", "updated": "2018-06-05T08:23:42.000Z", "title": "ClusterNet : Semi-Supervised Clustering using Neural Networks", "authors": [ "Ankita Shukla", "Gullal Singh Cheema", "Saket Anand" ], "comment": "9 Pages", "categories": [ "cs.LG", "cs.CV", "stat.ML" ], "abstract": "Clustering using neural networks has recently demon- strated promising performance in machine learning and computer vision applications. However, the performance of current approaches is limited either by unsupervised learn- ing or their dependence on large set of labeled data sam- ples. In this paper, we propose ClusterNet that uses pair- wise semantic constraints from very few labeled data sam- ples (< 5% of total data) and exploits the abundant un- labeled data to drive the clustering approach. We define a new loss function that uses pairwise semantic similarity between objects combined with constrained k-means clus- tering to efficiently utilize both labeled and unlabeled data in the same framework. The proposed network uses con- volution autoencoder to learn a latent representation that groups data into k specified clusters, while also learning the cluster centers simultaneously. We evaluate and com- pare the performance of ClusterNet on several datasets and state of the art deep clustering approaches.", "revisions": [ { "version": "v1", "updated": "2018-06-05T08:23:42.000Z" } ], "analyses": { "keywords": [ "neural networks", "clusternet", "labeled data", "semi-supervised clustering", "performance" ], "note": { "typesetting": "TeX", "pages": 9, "language": "en", "license": "arXiv", "status": "editable" } } }