{ "id": "2205.10068", "version": "v1", "published": "2022-05-20T10:29:46.000Z", "updated": "2022-05-20T10:29:46.000Z", "title": "Understanding and Mitigating the Uncertainty in Zero-Shot Translation", "authors": [ "Wenxuan Wang", "Wenxiang Jiao", "Shuo Wang", "Zhaopeng Tu", "Michael R. Lyu" ], "comment": "work in progress", "categories": [ "cs.CL", "cs.AI", "cs.LG" ], "abstract": "Zero-shot translation is a promising direction for building a comprehensive multilingual neural machine translation (MNMT) system. However, its quality is still not satisfactory due to off-target issues. In this paper, we aim to understand and alleviate the off-target issues from the perspective of uncertainty in zero-shot translation. By carefully examining the translation output and model confidence, we identify two uncertainties that are responsible for the off-target issues, namely, extrinsic data uncertainty and intrinsic model uncertainty. Based on the observations, we propose two light-weight and complementary approaches to denoise the training data for model training, and mask out the vocabulary of the off-target languages in inference. Extensive experiments on both balanced and unbalanced datasets show that our approaches significantly improve the performance of zero-shot translation over strong MNMT baselines. Qualitative analyses provide insights into where our approaches reduce off-target translations", "revisions": [ { "version": "v1", "updated": "2022-05-20T10:29:46.000Z" } ], "analyses": { "keywords": [ "zero-shot translation", "off-target issues", "understand", "comprehensive multilingual neural machine translation", "approaches reduce off-target translations" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }