{ "id": "2109.03670", "version": "v1", "published": "2021-09-08T14:16:31.000Z", "updated": "2021-09-08T14:16:31.000Z", "title": "YAHPO Gym -- Design Criteria and a new Multifidelity Benchmark for Hyperparameter Optimization", "authors": [ "Florian Pfisterer", "Lennart Schneider", "Julia Moosbauer", "Martin Binder", "Bernd Bischl" ], "comment": "Preprint. Under review. 17 pages, 4 tables, 5 figures", "categories": [ "cs.LG", "stat.ML" ], "abstract": "When developing and analyzing new hyperparameter optimization (HPO) methods, it is vital to empirically evaluate and compare them on well-curated benchmark suites. In this work, we list desirable properties and requirements for such benchmarks and propose a new set of challenging and relevant multifidelity HPO benchmark problems motivated by these requirements. For this, we revisit the concept of surrogate-based benchmarks and empirically compare them to more widely-used tabular benchmarks, showing that the latter ones may induce bias in performance estimation and ranking of HPO methods. We present a new surrogate-based benchmark suite for multifidelity HPO methods consisting of 9 benchmark collections that constitute over 700 multifidelity HPO problems in total. All our benchmarks also allow for querying of multiple optimization targets, enabling the benchmarking of multi-objective HPO. We examine and compare our benchmark suite with respect to the defined requirements and show that our benchmarks provide viable additions to existing suites.", "revisions": [ { "version": "v1", "updated": "2021-09-08T14:16:31.000Z" } ], "analyses": { "keywords": [ "hyperparameter optimization", "design criteria", "yahpo gym", "multifidelity benchmark", "benchmark suite" ], "note": { "typesetting": "TeX", "pages": 17, "language": "en", "license": "arXiv", "status": "editable" } } }