{ "id": "2311.15887", "version": "v1", "published": "2023-11-27T14:55:16.000Z", "updated": "2023-11-27T14:55:16.000Z", "title": "FLASC: A Flare-Sensitive Clustering Algorithm: Extending HDBSCAN* for Detecting Branches in Clusters", "authors": [ "D. M. Bot", "J. Peeters", "J. Liesenborgs", "J. Aerts" ], "comment": "20 pages, 11 figures, submitted to ACM TKDD", "categories": [ "cs.LG", "cs.DB" ], "abstract": "We present FLASC, an algorithm for flare-sensitive clustering. Our algorithm builds upon HDBSCAN* -- which provides high-quality density-based clustering performance -- through a post-processing step that differentiates branches within the detected clusters' manifold, adding a type of pattern that can be discovered. Two variants of the algorithm are presented, which trade computational cost for noise robustness. We show that both variants scale similarly to HDBSCAN* in terms of computational cost and provide stable outputs using synthetic data sets, resulting in an efficient flare-sensitive clustering algorithm. In addition, we demonstrate the algorithm's benefit in data exploration over HDBSCAN* clustering on two real-world data sets.", "revisions": [ { "version": "v1", "updated": "2023-11-27T14:55:16.000Z" } ], "analyses": { "subjects": [ "I.5.3", "H.3.3" ], "keywords": [ "detecting branches", "synthetic data sets", "real-world data sets", "trade computational cost", "high-quality density-based clustering performance" ], "note": { "typesetting": "TeX", "pages": 20, "language": "en", "license": "arXiv", "status": "editable" } } }