{ "id": "1612.08580", "version": "v1", "published": "2016-12-27T11:30:51.000Z", "updated": "2016-12-27T11:30:51.000Z", "title": "Concentration Inequalities for Random Sets", "authors": [ "Erel Segal-Halevi", "Avinatan Hassidim" ], "categories": [ "math.PR" ], "abstract": "In a large, possibly infinite population, each subject is colored red with probability $p$, independently of the others. Then, a finite sub-population is selected, possibly as a function of the coloring. The imbalance in the sub-population is defined as the difference between the number of reds in it and p times its size. This paper presents high-probability upper bounds (tail-bounds) on this imbalance. To present the upper bounds we define the *UI dimension* --- a new measure for the richness of a set-family. We present three simple rules for upper-bounding the UI dimension of a set-family. Our upper bounds on the imbalance in a sub-population depend only on the size of the sub-population and on the UI dimension of its support. We relate our results to known concepts from machine learning, particularly the VC dimension and Rademacher complexity.", "revisions": [ { "version": "v1", "updated": "2016-12-27T11:30:51.000Z" } ], "analyses": { "keywords": [ "concentration inequalities", "random sets", "ui dimension", "high-probability upper bounds", "vc dimension" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }