{ "id": "1402.2300", "version": "v1", "published": "2014-02-10T21:05:58.000Z", "updated": "2014-02-10T21:05:58.000Z", "title": "Feature and Variable Selection in Classification", "authors": [ "Aaron Karper" ], "comment": "Part of master seminar in document analysis held by Marcus Eichenberger-Liwicki", "categories": [ "cs.LG", "cs.AI", "stat.ML" ], "abstract": "The amount of information in the form of features and variables avail- able to machine learning algorithms is ever increasing. This can lead to classifiers that are prone to overfitting in high dimensions, high di- mensional models do not lend themselves to interpretable results, and the CPU and memory resources necessary to run on high-dimensional datasets severly limit the applications of the approaches. Variable and feature selection aim to remedy this by finding a subset of features that in some way captures the information provided best. In this paper we present the general methodology and highlight some specific approaches.", "revisions": [ { "version": "v1", "updated": "2014-02-10T21:05:58.000Z" } ], "analyses": { "keywords": [ "variable selection", "classification", "feature selection aim", "high-dimensional datasets severly limit", "memory resources necessary" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable", "adsabs": "2014arXiv1402.2300K" } } }