{
  "id": "2106.05165",
  "version": "v1",
  "published": "2021-06-09T16:12:07.000Z",
  "updated": "2021-06-09T16:12:07.000Z",
  "title": "A Lyapunov-Based Methodology for Constrained Optimization with Bandit Feedback",
  "authors": [
    "Semih Cayci",
    "Yilin Zheng",
    "Atilla Eryilmaz"
  ],
  "categories": [
    "cs.LG",
    "math.OC",
    "stat.ML"
  ],
  "abstract": "In a wide variety of applications including online advertising, contractual hiring, and wireless scheduling, the controller is constrained by a stringent budget constraint on the available resources, which are consumed in a random amount by each action, and a stochastic feasibility constraint that may impose important operational limitations on decision-making. In this work, we consider a general model to address such problems, where each action returns a random reward, cost, and penalty from an unknown joint distribution, and the decision-maker aims to maximize the total reward under a budget constraint $B$ on the total cost and a stochastic constraint on the time-average penalty. We propose a novel low-complexity algorithm based on Lyapunov optimization methodology, named ${\\tt LyOn}$, and prove that it achieves $O(\\sqrt{B\\log B})$ regret and $O(\\log B/B)$ constraint-violation. The low computational cost and sharp performance bounds of ${\\tt LyOn}$ suggest that Lyapunov-based algorithm design methodology can be effective in solving constrained bandit optimization problems.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2021-06-09T16:12:07.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "bandit feedback",
      "lyapunov-based methodology",
      "constrained optimization",
      "impose important operational limitations",
      "budget constraint"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}