{
  "id": "2109.03670",
  "version": "v1",
  "published": "2021-09-08T14:16:31.000Z",
  "updated": "2021-09-08T14:16:31.000Z",
  "title": "YAHPO Gym -- Design Criteria and a new Multifidelity Benchmark for Hyperparameter Optimization",
  "authors": [
    "Florian Pfisterer",
    "Lennart Schneider",
    "Julia Moosbauer",
    "Martin Binder",
    "Bernd Bischl"
  ],
  "comment": "Preprint. Under review. 17 pages, 4 tables, 5 figures",
  "categories": [
    "cs.LG",
    "stat.ML"
  ],
  "abstract": "When developing and analyzing new hyperparameter optimization (HPO) methods, it is vital to empirically evaluate and compare them on well-curated benchmark suites. In this work, we list desirable properties and requirements for such benchmarks and propose a new set of challenging and relevant multifidelity HPO benchmark problems motivated by these requirements. For this, we revisit the concept of surrogate-based benchmarks and empirically compare them to more widely-used tabular benchmarks, showing that the latter ones may induce bias in performance estimation and ranking of HPO methods. We present a new surrogate-based benchmark suite for multifidelity HPO methods consisting of 9 benchmark collections that constitute over 700 multifidelity HPO problems in total. All our benchmarks also allow for querying of multiple optimization targets, enabling the benchmarking of multi-objective HPO. We examine and compare our benchmark suite with respect to the defined requirements and show that our benchmarks provide viable additions to existing suites.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2021-09-08T14:16:31.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "hyperparameter optimization",
      "design criteria",
      "yahpo gym",
      "multifidelity benchmark",
      "benchmark suite"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 17,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}