{
  "id": "2106.07263",
  "version": "v1",
  "published": "2021-06-14T09:35:54.000Z",
  "updated": "2021-06-14T09:35:54.000Z",
  "title": "Machine Learning for Variance Reduction in Online Experiments",
  "authors": [
    "Yongyi Guo",
    "Dominic Coey",
    "Mikael Konutgan",
    "Wenting Li",
    "Chris Schoener",
    "Matt Goldman"
  ],
  "categories": [
    "stat.ML",
    "cs.LG"
  ],
  "abstract": "We consider the problem of variance reduction in randomized controlled trials, through the use of covariates correlated with the outcome but independent of the treatment. We propose a machine learning regression-adjusted treatment effect estimator, which we call MLRATE. MLRATE uses machine learning predictors of the outcome to reduce estimator variance. It employs cross-fitting to avoid overfitting biases, and we prove consistency and asymptotic normality under general conditions. MLRATE is robust to poor predictions from the machine learning step: if the predictions are uncorrelated with the outcomes, the estimator performs asymptotically no worse than the standard difference-in-means estimator, while if predictions are highly correlated with outcomes, the efficiency gains are large. In A/A tests, for a set of 48 outcome metrics commonly monitored in Facebook experiments the estimator has over 70\\% lower variance than the simple difference-in-means estimator, and about 19\\% lower variance than the common univariate procedure which adjusts only for pre-experiment values of the outcome.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2021-06-14T09:35:54.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "machine learning",
      "variance reduction",
      "online experiments",
      "regression-adjusted treatment effect estimator",
      "learning regression-adjusted treatment effect"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}