import subprocess import sys import os import requests, uuid, json from .indictrans.indictrans import Transliterator from om_transliterator import Transliterator as om_Transliterator from indic_transliteration import sanscript from indic_transliteration.sanscript import transliterate from libindic.transliteration import getInstance t = getInstance() from indic_transliteration import sanscript from indic_transliteration.sanscript import transliterate from indicnlp.transliterate.unicode_transliterate import UnicodeIndicTransliterator from transliterate import translit #, get_available_language_codes from indic_transliteration.sanscript import SchemeMap, SCHEMES, transliterate # import polyglot # from polyglot.transliteration import Transliterator as poly # from polyglot.text import Text # import pinyin # from anyascii import anyascii from MNF.settings import BasePath basePath = BasePath() # -> Directly Usable azure api for transliteration def azure_transliteration(text, source_lang, source_script, dest_script): if source_script=="Devanagari": source_script="Deva" elif source_script=="Arabic": source_script="Arab" elif source_script=="Latin": source_script="Latn" elif source_script=="Kannada": source_script="knda" elif source_script=="Tamil": source_script="Taml" elif source_script=="Bengali": source_script="Beng" elif source_script=="Telugu": source_script="Telu" elif source_script=="Malayalam": source_script="Mlym" elif source_script=="Cyrillic": source_script="Cyrl" elif source_script=="Gurmukhi": source_script="Guru" elif source_script=="Telugu": source_script="Telu" elif source_script=="Gujarati": source_script="Gujr" elif source_script=="Oriya": source_script="Orya" elif source_script=="Sinhala": source_script="Sinh" elif source_script=="Hanji": source_script="Hans" elif source_script=="Thai": source_script="Thai" elif source_script=="Hebrew": source_script="Hebr" if dest_script=="Devanagari": dest_script="Deva" elif dest_script=="Arabic": dest_script="Arab" elif dest_script=="Latin": dest_script="Latn" elif dest_script=="Kannada": dest_script="knda" elif dest_script=="Tamil": dest_script="Taml" elif dest_script=="Cyrillic": dest_script="Cyrl" elif dest_script=="Malayalam": dest_script="Mlym" elif dest_script=="Gurmukhi": dest_script="Guru" elif dest_script=="Telugu": dest_script="Telu" elif dest_script=="Gujarati": dest_script="Gujr" elif dest_script=="Oriya": dest_script="Orya" elif dest_script=="Bengali": dest_script="Beng" elif dest_script=="Sinhala": dest_script="Sinh" elif dest_script=="Hanji": dest_script="Hans" elif dest_script=="Thai": dest_script="Thai" elif dest_script=="Hebrew": dest_script="Hebr" subscription_key = "959354878e73458e898a69f1f5887b69" endpoint = "https://api.cognitive.microsofttranslator.com" location = "eastus" path = '/translate' constructed_url = endpoint + path headers = { 'Ocp-Apim-Subscription-Key': subscription_key, 'Ocp-Apim-Subscription-Region': location, 'Content-type': 'application/json', 'X-ClientTraceId': str(uuid.uuid4()) } constructed_url1 = "https://api.cognitive.microsofttranslator.com/transliterate?api-version=3.0" print("source_script", source_script) print("dest_script", dest_script) print("source_lang", source_lang) print("text", text) params = {'language':source_lang, 'fromScript': source_script, 'toScript': dest_script} body = [{'text': text}] try: request = requests.post(constructed_url1, params=params, headers=headers, json=body) response = request.json() out = response[0]['text'] except Exception as e: print("The error was ",e) out = text return out # -> Directly Usable Polyglot api for transliteration # def polyglot_trans(text, source_script, dest_script): # # from polyglot.downloader import downloader # if source_script=="Latin": # source_script="en" # elif source_script=="Arabic": # source_script="ar" # elif source_script=="Hanji": # source_script="zh" # if dest_script=="Latin": # dest_script="en" # elif dest_script=="Arabic": # dest_script="ar" # elif source_script=="Hanji": # source_script="zh" # new_text = "" # text_break = Text(text) # for x in text_break.transliterate(dest_script): # new_text = new_text + str(x) # return new_text # -> Directly Usable indic_trans api for transliteration def indic_trans(text, source_script, dest_script): if source_script=="Devanagari": source_script="hin" elif source_script=="Arabic": source_script="urd" elif source_script=="Kannada": source_script="kan" elif source_script=="Tamil": source_script="tam" elif source_script=="Latin": source_script="eng" elif source_script=="Bengali": source_script="ben" elif source_script=="Telugu": source_script="tel" elif source_script=="Malayalam": source_script="mal" elif source_script=="Tamil": source_script="tam" elif source_script=="Oriya": source_script="ori" elif source_script=="Gujarati": source_script="guj" elif source_script=="Gurmukhi": source_script="pan" if dest_script=="Devanagari": dest_script="hin" elif dest_script=="Arabic": dest_script="urd" elif dest_script=="Kannada": dest_script="kan" elif dest_script=="Latin": dest_script="eng" elif source_script=="Tamil": source_script="tam" elif dest_script=="Gujarati": dest_script="guj" elif dest_script=="Oriya": dest_script="ori" elif dest_script=="Telugu": dest_script="tel" elif dest_script=="Malayalam": dest_script="mal" # elif dest_script=="Gurmukhi": # dest_script="Guru" elif dest_script=="Telugu": dest_script="Telu" elif dest_script=="Gujarati": dest_script="Gujr" elif dest_script=="Oriya": dest_script="Orya" elif dest_script=="Bengali": dest_script="Ben" elif dest_script=="Tamil": dest_script="tam" elif dest_script=="Gurmukhi": dest_script="pan" trn = Transliterator(source=source_script, target=dest_script, build_lookup=True) out = trn.transform(text) return out # -> Directly Usable om_translator api for transliteration def om_transliterator(text): transliterator = om_Transliterator() out = transliterator.knda_to_latn(text) return out # -> Directly Usable libindic api for transliteration def libindic(text, dest_script): if dest_script=="Devanagari": dest_script="hi" elif dest_script=="Latin": dest_script="en" elif dest_script=="Malayalam": dest_script="ml" elif dest_script=="Gujarati": dest_script="gu" elif dest_script=="Oriya": dest_script="or" elif dest_script=="Telugu": dest_script="te" elif dest_script=="Bengali": dest_script="bn" elif dest_script=="Tamil": dest_script="ta" elif dest_script=="Kannada": dest_script="kn" elif dest_script=="Gurmukhi": dest_script="gu" code = dest_script+'_IN' out = t.transliterate(text, code) return out # -> Directly Usable indic_transliteration_IAST api for transliteration def indic_transliteration_IAST(text): out = transliterate(text, sanscript.IAST, sanscript.DEVANAGARI) return out # -> Directly Usable indic_transliteration_ITRANS api for transliteration def indic_transliteration_ITRANS(text): out = transliterate(text, sanscript.ITRANS, sanscript.DEVANAGARI) return out # -> Directly Usable sheetal api for transliteration def sheetal(text): s2_out = subprocess.check_output([sys.executable, rf"{basePath}/conversion/translation/dev-rom-sheetal.py", text]) out = s2_out.decode('utf-8') return out # -> Directly Usable ritwik code for transliteration def ritwik(text): s2_out = subprocess.check_output([sys.executable, rf"{basePath}/conversion/translation/dev-rom-ritwik.py", text]) out = s2_out.decode('utf-8') return out # -> Directly Usable indic_transliteration_GURMUKHI api for transliteration def indic_transliteration_GURMUKHI(text): out = transliterate(text,sanscript.IAST, sanscript.GURMUKHI) return out # -> Directly Usable unicode_transliteration_GURMUKHI api for transliteration def unicode_transliteration_GURMUKHI(text): input_text=transliterate(text, sanscript.IAST, sanscript.DEVANAGARI) out=UnicodeIndicTransliterator.transliterate(input_text,"hi","pa") return out # -> Directly Usable transliteration_LATIN_CYRILLIC api for transliteration def transliteration_LATIN_CYRILLIC(text): out = translit(text, 'bg') return out # -> Directly Usable translit_CHINESE_LATIN api for transliteration def translit_CHINESE_LATIN(text): out = pinyin.get(text, format="strip", delimiter=" ") return out def translit_th_sin_mng_heb_to_latin(text): out = anyascii(text) return out # -> Directly Usable indic_transliteration_TELUGU api for transliteration def indic_transliteration_TELUGU(text): out = transliterate(text,sanscript.IAST, sanscript.TELUGU) return out # -> Directly Usable indic_transliteration_GURMUKHI_LATIN api for transliteration def indic_transliteration_GURMUKHI_LATIN(text): out = transliterate(text, sanscript.GURMUKHI, sanscript.ITRANS) return out # -> Directly Usable unicode_transliteration_GURMUKHI_LATIN api for transliteration def unicode_transliteration_GURMUKHI_LATIN(text): input_text=transliterate(text, sanscript.IAST, sanscript.DEVANAGARI) out = UnicodeIndicTransliterator.transliterate(input_text,"hi","pa") return out # -> Directly Usable transliteration_CYRILIC_LATIN api for transliteration def transliteration_CYRILIC_LATIN(text): out = translit(text, 'bg',reversed=True) return out # -> Some Random Code to replace special characters def readonly(str): str=str.replace("а", "a") str=str.replace("б", "b") str=str.replace("в", "v") str=str.replace("г", "g") str=str.replace("д", "d") str=str.replace("е", "e") str=str.replace("ё", "yo") str=str.replace("ж", "zh") str=str.replace("з", "z") str=str.replace("и", "i") str=str.replace("й", "j") str=str.replace("к", "k") str=str.replace("л", "l") str=str.replace("м", "m") str=str.replace("н", "n") str=str.replace("о", "o") str=str.replace("п", "p") str=str.replace("р", "r") str=str.replace("с", "s") str=str.replace("т", "t") str=str.replace("у", "u") str=str.replace("ф", "f") str=str.replace("х", "h") str=str.replace("ц", "c") str=str.replace("ч", "ch") str=str.replace("ш", "sh") str=str.replace("щ", "sch") str=str.replace("ъ", "j") str=str.replace("ы", "i") str=str.replace("ь", "j") str=str.replace("э", "e") str=str.replace("ю", "yu") str=str.replace("я", "ya") str=str.replace("А", "A") str=str.replace("Б", "B") str=str.replace("В", "V") str=str.replace("Г", "G") str=str.replace("Д", "D") str=str.replace("Е", "E") str=str.replace("Ё", "Yo") str=str.replace("Ж", "Zh") str=str.replace("З", "Z") str=str.replace("И", "I") str=str.replace("Й", "J") str=str.replace("К", "K") str=str.replace("Л", "L") str=str.replace("М", "M") str=str.replace("Н", "N") str=str.replace("О", "O") str=str.replace("П", "P") str=str.replace("Р", "R") str=str.replace("С", "S") str=str.replace("Т", "T") str=str.replace("У", "U") str=str.replace("Ф", "F") str=str.replace("Х", "H") str=str.replace("Ц", "C") str=str.replace("Ч", "Ch") str=str.replace("Ш", "Sh") str=str.replace("Щ", "Sch") str=str.replace("Ъ", "J") str=str.replace("Ы", "I") str=str.replace("Ь", "J") str=str.replace("Э", "E") str=str.replace("Ю", "Yu") str=str.replace("Я", "Ya") return str # -> Code to Convert Letters to Latin Script def ConvertToLatin(source): result='' for letter in source: Letter = readonly(letter) ## replacemnet of word result=result+Letter return result # -> Directly Usable indic_transliteration_OTHER_DEVANAGRI api for transliteration def indic_transliteration_OTHER_DEVANAGRI(text,src_script): if src_script=="Malayalam": out = transliterate(text, sanscript.MALAYALAM, sanscript.DEVANAGARI) if src_script=="Gujarati": out = transliterate(text, sanscript.GUJARATI, sanscript.DEVANAGARI) if src_script=="Telugu": out = transliterate(text, sanscript.TELUGU, sanscript.DEVANAGARI) if src_script=="Oriya": out = transliterate(text, sanscript.ORIYA, sanscript.DEVANAGARI) if src_script=="Bengali": out = transliterate(text, sanscript.BENGALI, sanscript.DEVANAGARI) if src_script=="Kannada": out = transliterate(text, sanscript.KANNADA, sanscript.DEVANAGARI) if src_script=="Gurmukhi": out = transliterate(text, sanscript.GURMUKHI, sanscript.DEVANAGARI) if src_script=="Tamil": out = transliterate(text, sanscript.TAMIL, sanscript.DEVANAGARI) return out # -> Directly Usable indic_transliteration_DEVANAGRI_OTHER api for transliteration def indic_transliteration_DEVANAGRI_OTHER(text,dest_script): if dest_script=="Malayalam": out = transliterate(text, sanscript.DEVANAGARI, sanscript.MALAYALAM) if dest_script=="Gujarati": out = transliterate(text, sanscript.DEVANAGARI, sanscript.GUJARATI) if dest_script=="Telugu": out = transliterate(text, sanscript.DEVANAGARI, sanscript.TELUGU) if dest_script=="Oriya": out = transliterate(text, sanscript.DEVANAGARI, sanscript.ORIYA) if dest_script=="Bengali": out = transliterate(text,sanscript.DEVANAGARI,sanscript.BENGALI) if dest_script=="Kannada": out = transliterate(text, sanscript.DEVANAGARI, sanscript.KANNADA) if dest_script=="Gurmukhi": out = transliterate(text, sanscript.DEVANAGARI, sanscript.GURMUKHI) if dest_script=="Tamil": out = transliterate(text, sanscript.DEVANAGARI, sanscript.TAMIL) return out # -> Directly Usable indic_transliteration_KANNADA_OTHER api for transliteration def indic_transliteration_KANNADA_OTHER(text,dest_script): if dest_script=="Malayalam": out = transliterate(text, sanscript.KANNADA, sanscript.MALAYALAM) if dest_script=="Telugu": out = transliterate(text, sanscript.KANNADA, sanscript.TELUGU) if dest_script=="Tamil": out = transliterate(text, sanscript.KANNADA, sanscript.TAMIL) if dest_script=="Bengali": out = transliterate(text, sanscript.KANNADA, sanscript.BENGALI) return out # -> Directly Usable indic_transliteration_OTHER_KANNADA api for transliteration def indic_transliteration_OTHER_KANNADA(text,src_script): if src_script=="Malayalam": out = transliterate(text, sanscript.MALAYALAM, sanscript.KANNADA) if src_script=="Telugu": out = transliterate(text, sanscript.TELUGU, sanscript.KANNADA) if src_script=="Tamil": out = transliterate(text, sanscript.TAMIL, sanscript.KANNADA) if src_script=="Bengali": out = transliterate(text, sanscript.BENGALI, sanscript.KANNADA) return out # -> Directly Usable indic_transliteration_TAMIL_OTHER api for transliteration def indic_transliteration_TAMIL_OTHER(text,dest_script): if dest_script=="Malayalam": out = transliterate(text, sanscript.TAMIL, sanscript.MALAYALAM) if dest_script=="Telugu": out = transliterate(text, sanscript.TAMIL, sanscript.TELUGU) return out # -> Directly Usable indic_transliteration_OTHER_TAMIL api for transliteration def indic_transliteration_OTHER_TAMIL(text,src_script): if src_script=="Malayalam": out = transliterate(text, sanscript.MALAYALAM,sanscript.TAMIL) if src_script=="Telugu": out = transliterate(text, sanscript.TELUGU, sanscript.TAMIL) return out # -> Directly Usable indic_transliteration_TELUGU_OTHER api for transliteration def indic_transliteration_TELUGU_OTHER(text, desc_script): if desc_script=="Malayalam": out = transliterate(text, sanscript.TELUGU, sanscript.MALAYALAM) return out # -> Directly Usable indic_transliteration_MALAYALAM_OTHER api for transliteration def indic_transliteration_MALAYALAM_OTHER(text, desc_script): if desc_script=="Telugu": out = transliterate(text, sanscript.MALAYALAM, sanscript.TELUGU) return out # -> Directly Usable indic_transliteration_OTHER_GUJARATI api for transliteration def indic_transliteration_OTHER_GUJARATI(text, src_script): if src_script=="Gurmukhi": out = transliterate(text, sanscript.GURMUKHI, sanscript.GUJARATI) if src_script=="Oriya": out = transliterate(text, sanscript.ORIYA, sanscript.GUJARATI) return out # -> Directly Usable indic_transliteration_OTHER_GURMUKHI api for transliteration def indic_transliteration_OTHER_GURMUKHI(text, src_script): if src_script=="Gujarati": out = transliterate(text, sanscript.GUJARATI, sanscript.GURMUKHI) if src_script=="Oriya": out = transliterate(text, sanscript.ORIYA, sanscript.GURMUKHI) return out # -> Directly Usable indic_transliteration_OTHER_ORIYA api for transliteration def indic_transliteration_OTHER_ORIYA(text, src_script): if src_script=="Gujarati": out = transliterate(text, sanscript.GUJARATI, sanscript.ORIYA) if src_script=="Gurmukhi": out = transliterate(text, sanscript.GURMUKHI, sanscript.ORIYA) return out