{ "id": "1404.6810", "version": "v2", "published": "2014-04-27T19:21:24.000Z", "updated": "2014-11-28T23:16:23.000Z", "title": "Information Measures: the Curious Case of the Binary Alphabet", "authors": [ "Jiantao Jiao", "Thomas Courtade", "Albert No", "Kartik Venkat", "Tsachy Weissman" ], "comment": "to appear in IEEE Transactions on Information Theory", "categories": [ "cs.IT", "math.IT" ], "abstract": "Four problems related to information divergence measures defined on finite alphabets are considered. In three of the cases we consider, we illustrate a contrast which arises between the binary-alphabet and larger-alphabet settings. This is surprising in some instances, since characterizations for the larger-alphabet settings do not generalize their binary-alphabet counterparts. Specifically, we show that $f$-divergences are not the unique decomposable divergences on binary alphabets that satisfy the data processing inequality, thereby clarifying claims that have previously appeared in the literature. We also show that KL divergence is the unique Bregman divergence which is also an $f$-divergence for any alphabet size. We show that KL divergence is the unique Bregman divergence which is invariant to statistically sufficient transformations of the data, even when non-decomposable divergences are considered. Like some of the problems we consider, this result holds only when the alphabet size is at least three.", "revisions": [ { "version": "v1", "updated": "2014-04-27T19:21:24.000Z", "abstract": "Four problems related to information divergence measures defined on finite alphabets are considered. In three of the cases we consider, we illustrate a contrast which arises between the binary-alphabet and larger-alphabet settings. This is surprising in some instances, since characterizations for the larger-alphabet settings do not generalize their binary-alphabet counterparts. Specifically, we show that $f$-divergences are not the unique decomposable divergences on binary alphabets that satisfy the data processing inequality, despite contrary claims in the literature. We also show that Kullback-Leibler (KL) divergence is the unique Bregman divergence which is invariant to statistically sufficient transforms of the data, only when the alphabet size is at least three. Finally, we show KL divergence is the unique Bregman divergence which is also an $f$-divergence for any alphabet size.", "comment": "submitted to IEEE Transactions on Information Theory", "journal": null, "doi": null }, { "version": "v2", "updated": "2014-11-28T23:16:23.000Z" } ], "analyses": { "keywords": [ "binary alphabet", "information measures", "curious case", "unique bregman divergence", "larger-alphabet settings" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable", "adsabs": "2014arXiv1404.6810J" } } }