{
  "id": "2509.25035",
  "version": "v2",
  "published": "2025-09-29T16:55:44.000Z",
  "updated": "2025-10-01T17:45:09.000Z",
  "title": "Ultra-Fast Language Generation via Discrete Diffusion Divergence Instruct",
  "authors": [
    "Haoyang Zheng",
    "Xinyang Liu",
    "Cindy Xiangrui Kong",
    "Nan Jiang",
    "Zheyuan Hu",
    "Weijian Luo",
    "Wei Deng",
    "Guang Lin"
  ],
  "comment": "56 pages, 7 figures, 7 tables",
  "categories": [
    "cs.CL",
    "cs.AI",
    "cs.LG"
  ],
  "abstract": "Fast and high-quality language generation is the holy grail that people pursue in the age of AI. In this work, we introduce Discrete Diffusion Divergence Instruct (DiDi-Instruct), a training-based method that initializes from a pre-trained (masked) discrete diffusion language model (dLLM) and distills a few-step student for fast generation. The resulting DiDi-Instruct model achieves comparable or superior performance to its dLLM teacher and the GPT-2 baseline while enabling up to 64$\\times$ acceleration. The theoretical foundation of DiDi-Instruct is a novel framework based on integral KL-divergence minimization, which yields a practical training algorithm. We further introduce grouped reward normalization, intermediate-state matching, and the reward-guided ancestral sampler that significantly improve training stability, model coverage, and inference quality. On OpenWebText, DiDi-Instruct achieves perplexity from 62.2 (8 NFEs) to 18.4 (128 NFEs), which outperforms prior accelerated dLLMs and GPT-2 baseline. These gains come with a negligible entropy loss (around $1\\%$) and reduce additional training wall-clock time by more than $20\\times$ compared to competing dLLM distillation methods. We further validate the robustness and effectiveness of DiDi-Instruct through extensive ablation studies, model scaling, and the generation of discrete protein sequences. In conclusion, DiDi-Instruct is an efficient yet effective distillation method, enabling language generation in the blink of an eye. We will release both code and models at github.com/haoyangzheng-ai/didi-instruct.",
  "revisions": [
    {
      "version": "v2",
      "updated": "2025-10-01T17:45:09.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "discrete diffusion divergence instruct",
      "ultra-fast language generation",
      "additional training wall-clock time",
      "didi-instruct model achieves comparable"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 56,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}