{ "id": "1711.07245", "version": "v1", "published": "2017-11-20T10:33:29.000Z", "updated": "2017-11-20T10:33:29.000Z", "title": "Optical Character Recognition (OCR) for Telugu: Database, Algorithm and Application", "authors": [ "Konkimalla Chandra Prakash", "Y. M. Srikar", "Gayam Trishal", "Souraj Mandal", "Sumohana S. Channappayya" ], "comment": "Submitted to NCC 2018", "categories": [ "cs.CV" ], "abstract": "Telugu is a Dravidian language spoken by more than 80 million people worldwide. The optical character recognition (OCR) of the Telugu script has wide ranging applications including education, health-care, administration etc. The beautiful Telugu script however is very different from Germanic scripts like English and German. This makes the use of transfer learning of Germanic OCR solutions to Telugu a non-trivial task. To address the challenge of OCR for Telugu, we make three contributions in this work: (i) a database of Telugu characters, (ii) a deep learning based OCR algorithm, and (iii) a client server solution for the online deployment of the algorithm. For the benefit of the Telugu people and the research community, we will make our code freely available at https://gayamtrishal.github.io/OCR_Telugu.github.io/", "revisions": [ { "version": "v1", "updated": "2017-11-20T10:33:29.000Z" } ], "analyses": { "keywords": [ "optical character recognition", "application", "million people worldwide", "client server solution", "dravidian language spoken" ], "note": { "typesetting": "TeX", "pages": 0, "language": "en", "license": "arXiv", "status": "editable" } } }