{ "id": "1804.07134", "version": "v1", "published": "2018-04-19T13:12:03.000Z", "updated": "2018-04-19T13:12:03.000Z", "title": "varrank: an R package for variable ranking based on mutual information with applications to observed systemic datasets", "authors": [ "Gilles Kratzer", "Reinhard Furrer" ], "comment": "18 pages, 4 figures", "categories": [ "stat.ML", "cs.LG" ], "abstract": "This article describes the R package varrank. It has a flexible implementation of heuristic approaches which perform variable ranking based on mutual information. The package is particularly suitable for exploring multivariate datasets requiring a holistic analysis. The core functionality is a general implementation of the minimum redundancy maximum relevance (mRMRe) model. This approach is based on information theory metrics. It is compatible with discrete and continuous data which are discretised using a large choice of possible rules. The two main problems that can be addressed by this package are the selection of the most representative variables for modeling a collection of variables of interest, i.e., dimension reduction, and variable ranking with respect to a set of variables of interest.", "revisions": [ { "version": "v1", "updated": "2018-04-19T13:12:03.000Z" } ], "analyses": { "keywords": [ "mutual information", "variable ranking", "systemic datasets", "applications", "minimum redundancy maximum relevance" ], "tags": [ "research tool" ], "note": { "typesetting": "TeX", "pages": 18, "language": "en", "license": "arXiv", "status": "editable" } } }