{ "id": "2107.10314", "version": "v1", "published": "2021-07-21T19:23:56.000Z", "updated": "2021-07-21T19:23:56.000Z", "title": "Small-text: Active Learning for Text Classification in Python", "authors": [ "Christopher Schröder", "Lydia Müller", "Andreas Niekler", "Martin Potthast" ], "comment": "preprint", "categories": [ "cs.LG", "cs.CL" ], "abstract": "We present small-text, a simple modular active learning library, which offers pool-based active learning for text classification in Python. It comes with various pre-implemented state-of-the-art query strategies, including some which can leverage the GPU. Clearly defined interfaces allow to combine a multitude of such query strategies with different classifiers, thereby facilitating a quick mix and match, and enabling a rapid development of both active learning experiments and applications. To make various classifiers accessible in a consistent way, it integrates several well-known machine learning libraries, namely, scikit-learn, PyTorch, and huggingface transformers -- for which the latter integrations are available as optionally installable extensions. The library is available under the MIT License at https://github.com/webis-de/small-text.", "revisions": [ { "version": "v1", "updated": "2021-07-21T19:23:56.000Z" } ], "analyses": { "keywords": [ "text classification", "small-text", "pre-implemented state-of-the-art query strategies", "simple modular active learning library", "well-known machine learning libraries" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }