{ "id": "1806.08301", "version": "v1", "published": "2018-06-21T15:57:17.000Z", "updated": "2018-06-21T15:57:17.000Z", "title": "Online Saddle Point Problem with Applications to Constrained Online Convex Optimization", "authors": [ "Adrian Rivera", "He Wang", "Huan Xu" ], "categories": [ "stat.ML", "cs.LG", "math.OC" ], "abstract": "We study an online saddle point problem where at each iteration a pair of actions need to be chosen without knowledge of the future (convex-concave) payoff functions. The objective is to minimize the gap between the cumulative payoffs and the saddle point value of the aggregate payoff function, which we measure using a metric called \"SP-regret\". The problem generalizes the online convex optimization framework and can be interpreted as finding the Nash equilibrium for the aggregate of a sequence of two-player zero-sum games. We propose an algorithm that achieves $\\tilde{O}(\\sqrt{T})$ SP-regret in the general case, and $O(\\log T)$ SP-regret for the strongly convex-concave case. We then consider a constrained online convex optimization problem motivated by a variety of applications in dynamic pricing, auctions, and crowdsourcing. We relate this problem to an online saddle point problem and establish $O(\\sqrt{T})$ regret using a primal-dual algorithm.", "revisions": [ { "version": "v1", "updated": "2018-06-21T15:57:17.000Z" } ], "analyses": { "keywords": [ "online saddle point problem", "applications", "constrained online convex optimization problem", "payoff function", "online convex optimization framework" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }