{
  "id": "2309.17347",
  "version": "v1",
  "published": "2023-09-27T11:47:05.000Z",
  "updated": "2023-09-27T11:47:05.000Z",
  "title": "Demographic Parity: Mitigating Biases in Real-World Data",
  "authors": [
    "Orestis Loukas",
    "Ho-Ryun Chung"
  ],
  "comment": "24 pages, 16 Figures, Python code attached",
  "categories": [
    "cs.LG",
    "cs.CY"
  ],
  "abstract": "Computer-based decision systems are widely used to automate decisions in many aspects of everyday life, which include sensitive areas like hiring, loaning and even criminal sentencing. A decision pipeline heavily relies on large volumes of historical real-world data for training its models. However, historical training data often contains gender, racial or other biases which are propagated to the trained models influencing computer-based decisions. In this work, we propose a robust methodology that guarantees the removal of unwanted biases while maximally preserving classification utility. Our approach can always achieve this in a model-independent way by deriving from real-world data the asymptotic dataset that uniquely encodes demographic parity and realism. As a proof-of-principle, we deduce from public census records such an asymptotic dataset from which synthetic samples can be generated to train well-established classifiers. Benchmarking the generalization capability of these classifiers trained on our synthetic data, we confirm the absence of any explicit or implicit bias in the computer-aided decision.",
  "revisions": [
    {
      "version": "v1",
      "updated": "2023-09-27T11:47:05.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "real-world data",
      "mitigating biases",
      "asymptotic dataset",
      "decision pipeline heavily relies",
      "uniquely encodes demographic parity"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 24,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}