{ "id": "2505.05763", "version": "v1", "published": "2025-05-09T03:53:10.000Z", "updated": "2025-05-09T03:53:10.000Z", "title": "BMMDetect: A Multimodal Deep Learning Framework for Comprehensive Biomedical Misconduct Detection", "authors": [ "Yize Zhou", "Jie Zhang", "Meijie Wang", "Lun Yu" ], "categories": [ "cs.LG", "cs.CL" ], "abstract": "Academic misconduct detection in biomedical research remains challenging due to algorithmic narrowness in existing methods and fragmented analytical pipelines. We present BMMDetect, a multimodal deep learning framework that integrates journal metadata (SJR, institutional data), semantic embeddings (PubMedBERT), and GPT-4o-mined textual attributes (methodological statistics, data anomalies) for holistic manuscript evaluation. Key innovations include: (1) multimodal fusion of domain-specific features to reduce detection bias; (2) quantitative evaluation of feature importance, identifying journal authority metrics (e.g., SJR-index) and textual anomalies (e.g., statistical outliers) as dominant predictors; and (3) the BioMCD dataset, a large-scale benchmark with 13,160 retracted articles and 53,411 controls. BMMDetect achieves 74.33% AUC, outperforming single-modality baselines by 8.6%, and demonstrates transferability across biomedical subfields. This work advances scalable, interpretable tools for safeguarding research integrity.", "revisions": [ { "version": "v1", "updated": "2025-05-09T03:53:10.000Z" } ], "analyses": { "keywords": [ "multimodal deep learning framework", "comprehensive biomedical misconduct detection", "identifying journal authority metrics", "reduce detection bias", "academic misconduct detection" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }