{ "id": "1903.05179", "version": "v1", "published": "2019-03-12T19:52:53.000Z", "updated": "2019-03-12T19:52:53.000Z", "title": "Unbiased Measurement of Feature Importance in Tree-Based Methods", "authors": [ "Zhengze Zhou", "Giles Hooker" ], "categories": [ "stat.ML", "cs.LG" ], "abstract": "We propose a modification that corrects for split-improvement variable importance measures in Random Forests and other tree-based methods. These methods have been shown to be biased towards increasing the importance of features with more potential splits. We show that by appropriately incorporating split-improvement as measured on out of sample data, this bias can be corrected yielding better summaries and screening tools.", "revisions": [ { "version": "v1", "updated": "2019-03-12T19:52:53.000Z" } ], "analyses": { "keywords": [ "tree-based methods", "feature importance", "unbiased measurement", "split-improvement variable importance measures", "random forests" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }