{
  "id": "1711.07245",
  "version": "v1",
  "published": "2017-11-20T10:33:29.000Z",
  "updated": "2017-11-20T10:33:29.000Z",
  "title": "Optical Character Recognition (OCR) for Telugu: Database, Algorithm and Application",
  "authors": [
    "Konkimalla Chandra Prakash",
    "Y. M. Srikar",
    "Gayam Trishal",
    "Souraj Mandal",
    "Sumohana S. Channappayya"
  ],
  "comment": "Submitted to NCC 2018",
  "categories": [
    "cs.CV"
  ],
  "abstract": "Telugu is a Dravidian language spoken by more than 80 million people worldwide. The optical character recognition (OCR) of the Telugu script has wide ranging applications including education, health-care, administration etc. The beautiful Telugu script however is very different from Germanic scripts like English and German. This makes the use of transfer learning of Germanic OCR solutions to Telugu a non-trivial task. To address the challenge of OCR for Telugu, we make three contributions in this work: (i) a database of Telugu characters, (ii) a deep learning based OCR algorithm, and (iii) a client server solution for the online deployment of the algorithm. For the benefit of the Telugu people and the research community, we will make our code freely available at https://gayamtrishal.github.io/OCR_Telugu.github.io/",
  "revisions": [
    {
      "version": "v1",
      "updated": "2017-11-20T10:33:29.000Z"
    }
  ],
  "analyses": {
    "keywords": [
      "optical character recognition",
      "application",
      "million people worldwide",
      "client server solution",
      "dravidian language spoken"
    ],
    "note": {
      "typesetting": "TeX",
      "pages": 0,
      "language": "en",
      "license": "arXiv",
      "status": "editable"
    }
  }
}