Conversion_Kitchen_Code/kitchen_counter/conversion/translation/transliteration_resources.py

536 lines
18 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import subprocess
import sys
import os
import requests, uuid, json
from .indictrans.indictrans import Transliterator
from om_transliterator import Transliterator as om_Transliterator
from indic_transliteration import sanscript
from indic_transliteration.sanscript import transliterate
from libindic.transliteration import getInstance
t = getInstance()
from indic_transliteration import sanscript
from indic_transliteration.sanscript import transliterate
from indicnlp.transliterate.unicode_transliterate import UnicodeIndicTransliterator
from transliterate import translit #, get_available_language_codes
from indic_transliteration.sanscript import SchemeMap, SCHEMES, transliterate
# import polyglot
# from polyglot.transliteration import Transliterator as poly
# from polyglot.text import Text
# import pinyin
# from anyascii import anyascii
from MNF.settings import BasePath
basePath = BasePath()
# -> Directly Usable azure api for transliteration
def azure_transliteration(text, source_lang, source_script, dest_script):
if source_script=="Devanagari":
source_script="Deva"
elif source_script=="Arabic":
source_script="Arab"
elif source_script=="Latin":
source_script="Latn"
elif source_script=="Kannada":
source_script="knda"
elif source_script=="Tamil":
source_script="Taml"
elif source_script=="Bengali":
source_script="Beng"
elif source_script=="Telugu":
source_script="Telu"
elif source_script=="Malayalam":
source_script="Mlym"
elif source_script=="Cyrillic":
source_script="Cyrl"
elif source_script=="Gurmukhi":
source_script="Guru"
elif source_script=="Telugu":
source_script="Telu"
elif source_script=="Gujarati":
source_script="Gujr"
elif source_script=="Oriya":
source_script="Orya"
elif source_script=="Sinhala":
source_script="Sinh"
elif source_script=="Hanji":
source_script="Hans"
elif source_script=="Thai":
source_script="Thai"
elif source_script=="Hebrew":
source_script="Hebr"
if dest_script=="Devanagari":
dest_script="Deva"
elif dest_script=="Arabic":
dest_script="Arab"
elif dest_script=="Latin":
dest_script="Latn"
elif dest_script=="Kannada":
dest_script="knda"
elif dest_script=="Tamil":
dest_script="Taml"
elif dest_script=="Cyrillic":
dest_script="Cyrl"
elif dest_script=="Malayalam":
dest_script="Mlym"
elif dest_script=="Gurmukhi":
dest_script="Guru"
elif dest_script=="Telugu":
dest_script="Telu"
elif dest_script=="Gujarati":
dest_script="Gujr"
elif dest_script=="Oriya":
dest_script="Orya"
elif dest_script=="Bengali":
dest_script="Beng"
elif dest_script=="Sinhala":
dest_script="Sinh"
elif dest_script=="Hanji":
dest_script="Hans"
elif dest_script=="Thai":
dest_script="Thai"
elif dest_script=="Hebrew":
dest_script="Hebr"
subscription_key = "959354878e73458e898a69f1f5887b69"
endpoint = "https://api.cognitive.microsofttranslator.com"
location = "eastus"
path = '/translate'
constructed_url = endpoint + path
headers = {
'Ocp-Apim-Subscription-Key': subscription_key,
'Ocp-Apim-Subscription-Region': location,
'Content-type': 'application/json',
'X-ClientTraceId': str(uuid.uuid4())
}
constructed_url1 = "https://api.cognitive.microsofttranslator.com/transliterate?api-version=3.0"
print("source_script", source_script)
print("dest_script", dest_script)
print("source_lang", source_lang)
print("text", text)
params = {'language':source_lang, 'fromScript': source_script, 'toScript': dest_script}
body = [{'text': text}]
try:
request = requests.post(constructed_url1, params=params, headers=headers, json=body)
response = request.json()
out = response[0]['text']
except Exception as e:
print("The error was ",e)
out = text
return out
# -> Directly Usable Polyglot api for transliteration
# def polyglot_trans(text, source_script, dest_script):
# # from polyglot.downloader import downloader
# if source_script=="Latin":
# source_script="en"
# elif source_script=="Arabic":
# source_script="ar"
# elif source_script=="Hanji":
# source_script="zh"
# if dest_script=="Latin":
# dest_script="en"
# elif dest_script=="Arabic":
# dest_script="ar"
# elif source_script=="Hanji":
# source_script="zh"
# new_text = ""
# text_break = Text(text)
# for x in text_break.transliterate(dest_script):
# new_text = new_text + str(x)
# return new_text
# -> Directly Usable indic_trans api for transliteration
def indic_trans(text, source_script, dest_script):
if source_script=="Devanagari":
source_script="hin"
elif source_script=="Arabic":
source_script="urd"
elif source_script=="Kannada":
source_script="kan"
elif source_script=="Tamil":
source_script="tam"
elif source_script=="Latin":
source_script="eng"
elif source_script=="Bengali":
source_script="ben"
elif source_script=="Telugu":
source_script="tel"
elif source_script=="Malayalam":
source_script="mal"
elif source_script=="Tamil":
source_script="tam"
elif source_script=="Oriya":
source_script="ori"
elif source_script=="Gujarati":
source_script="guj"
elif source_script=="Gurmukhi":
source_script="pan"
if dest_script=="Devanagari":
dest_script="hin"
elif dest_script=="Arabic":
dest_script="urd"
elif dest_script=="Kannada":
dest_script="kan"
elif dest_script=="Latin":
dest_script="eng"
elif source_script=="Tamil":
source_script="tam"
elif dest_script=="Gujarati":
dest_script="guj"
elif dest_script=="Oriya":
dest_script="ori"
elif dest_script=="Telugu":
dest_script="tel"
elif dest_script=="Malayalam":
dest_script="mal"
# elif dest_script=="Gurmukhi":
# dest_script="Guru"
elif dest_script=="Telugu":
dest_script="Telu"
elif dest_script=="Gujarati":
dest_script="Gujr"
elif dest_script=="Oriya":
dest_script="Orya"
elif dest_script=="Bengali":
dest_script="Ben"
elif dest_script=="Tamil":
dest_script="tam"
elif dest_script=="Gurmukhi":
dest_script="pan"
trn = Transliterator(source=source_script, target=dest_script, build_lookup=True)
out = trn.transform(text)
return out
# -> Directly Usable om_translator api for transliteration
def om_transliterator(text):
transliterator = om_Transliterator()
out = transliterator.knda_to_latn(text)
return out
# -> Directly Usable libindic api for transliteration
def libindic(text, dest_script):
if dest_script=="Devanagari":
dest_script="hi"
elif dest_script=="Latin":
dest_script="en"
elif dest_script=="Malayalam":
dest_script="ml"
elif dest_script=="Gujarati":
dest_script="gu"
elif dest_script=="Oriya":
dest_script="or"
elif dest_script=="Telugu":
dest_script="te"
elif dest_script=="Bengali":
dest_script="bn"
elif dest_script=="Tamil":
dest_script="ta"
elif dest_script=="Kannada":
dest_script="kn"
elif dest_script=="Gurmukhi":
dest_script="gu"
code = dest_script+'_IN'
out = t.transliterate(text, code)
return out
# -> Directly Usable indic_transliteration_IAST api for transliteration
def indic_transliteration_IAST(text):
out = transliterate(text, sanscript.IAST, sanscript.DEVANAGARI)
return out
# -> Directly Usable indic_transliteration_ITRANS api for transliteration
def indic_transliteration_ITRANS(text):
out = transliterate(text, sanscript.ITRANS, sanscript.DEVANAGARI)
return out
# -> Directly Usable sheetal api for transliteration
def sheetal(text):
s2_out = subprocess.check_output([sys.executable, rf"{basePath}/conversion/translation/dev-rom-sheetal.py", text])
out = s2_out.decode('utf-8')
return out
# -> Directly Usable ritwik code for transliteration
def ritwik(text):
s2_out = subprocess.check_output([sys.executable, rf"{basePath}/conversion/translation/dev-rom-ritwik.py", text])
out = s2_out.decode('utf-8')
return out
# -> Directly Usable indic_transliteration_GURMUKHI api for transliteration
def indic_transliteration_GURMUKHI(text):
out = transliterate(text,sanscript.IAST, sanscript.GURMUKHI)
return out
# -> Directly Usable unicode_transliteration_GURMUKHI api for transliteration
def unicode_transliteration_GURMUKHI(text):
input_text=transliterate(text, sanscript.IAST, sanscript.DEVANAGARI)
out=UnicodeIndicTransliterator.transliterate(input_text,"hi","pa")
return out
# -> Directly Usable transliteration_LATIN_CYRILLIC api for transliteration
def transliteration_LATIN_CYRILLIC(text):
out = translit(text, 'bg')
return out
# -> Directly Usable translit_CHINESE_LATIN api for transliteration
def translit_CHINESE_LATIN(text):
out = pinyin.get(text, format="strip", delimiter=" ")
return out
def translit_th_sin_mng_heb_to_latin(text):
out = anyascii(text)
return out
# -> Directly Usable indic_transliteration_TELUGU api for transliteration
def indic_transliteration_TELUGU(text):
out = transliterate(text,sanscript.IAST, sanscript.TELUGU)
return out
# -> Directly Usable indic_transliteration_GURMUKHI_LATIN api for transliteration
def indic_transliteration_GURMUKHI_LATIN(text):
out = transliterate(text, sanscript.GURMUKHI, sanscript.ITRANS)
return out
# -> Directly Usable unicode_transliteration_GURMUKHI_LATIN api for transliteration
def unicode_transliteration_GURMUKHI_LATIN(text):
input_text=transliterate(text, sanscript.IAST, sanscript.DEVANAGARI)
out = UnicodeIndicTransliterator.transliterate(input_text,"hi","pa")
return out
# -> Directly Usable transliteration_CYRILIC_LATIN api for transliteration
def transliteration_CYRILIC_LATIN(text):
out = translit(text, 'bg',reversed=True)
return out
# -> Some Random Code to replace special characters
def readonly(str):
str=str.replace("а", "a")
str=str.replace("б", "b")
str=str.replace("в", "v")
str=str.replace("г", "g")
str=str.replace("д", "d")
str=str.replace("е", "e")
str=str.replace("ё", "yo")
str=str.replace("ж", "zh")
str=str.replace("з", "z")
str=str.replace("и", "i")
str=str.replace("й", "j")
str=str.replace("к", "k")
str=str.replace("л", "l")
str=str.replace("м", "m")
str=str.replace("н", "n")
str=str.replace("о", "o")
str=str.replace("п", "p")
str=str.replace("р", "r")
str=str.replace("с", "s")
str=str.replace("т", "t")
str=str.replace("у", "u")
str=str.replace("ф", "f")
str=str.replace("х", "h")
str=str.replace("ц", "c")
str=str.replace("ч", "ch")
str=str.replace("ш", "sh")
str=str.replace("щ", "sch")
str=str.replace("ъ", "j")
str=str.replace("ы", "i")
str=str.replace("ь", "j")
str=str.replace("э", "e")
str=str.replace("ю", "yu")
str=str.replace("я", "ya")
str=str.replace("А", "A")
str=str.replace("Б", "B")
str=str.replace("В", "V")
str=str.replace("Г", "G")
str=str.replace("Д", "D")
str=str.replace("Е", "E")
str=str.replace("Ё", "Yo")
str=str.replace("Ж", "Zh")
str=str.replace("З", "Z")
str=str.replace("И", "I")
str=str.replace("Й", "J")
str=str.replace("К", "K")
str=str.replace("Л", "L")
str=str.replace("М", "M")
str=str.replace("Н", "N")
str=str.replace("О", "O")
str=str.replace("П", "P")
str=str.replace("Р", "R")
str=str.replace("С", "S")
str=str.replace("Т", "T")
str=str.replace("У", "U")
str=str.replace("Ф", "F")
str=str.replace("Х", "H")
str=str.replace("Ц", "C")
str=str.replace("Ч", "Ch")
str=str.replace("Ш", "Sh")
str=str.replace("Щ", "Sch")
str=str.replace("Ъ", "J")
str=str.replace("Ы", "I")
str=str.replace("Ь", "J")
str=str.replace("Э", "E")
str=str.replace("Ю", "Yu")
str=str.replace("Я", "Ya")
return str
# -> Code to Convert Letters to Latin Script
def ConvertToLatin(source):
result=''
for letter in source:
Letter = readonly(letter) ## replacemnet of word
result=result+Letter
return result
# -> Directly Usable indic_transliteration_OTHER_DEVANAGRI api for transliteration
def indic_transliteration_OTHER_DEVANAGRI(text,src_script):
if src_script=="Malayalam":
out = transliterate(text, sanscript.MALAYALAM, sanscript.DEVANAGARI)
if src_script=="Gujarati":
out = transliterate(text, sanscript.GUJARATI, sanscript.DEVANAGARI)
if src_script=="Telugu":
out = transliterate(text, sanscript.TELUGU, sanscript.DEVANAGARI)
if src_script=="Oriya":
out = transliterate(text, sanscript.ORIYA, sanscript.DEVANAGARI)
if src_script=="Bengali":
out = transliterate(text, sanscript.BENGALI, sanscript.DEVANAGARI)
if src_script=="Kannada":
out = transliterate(text, sanscript.KANNADA, sanscript.DEVANAGARI)
if src_script=="Gurmukhi":
out = transliterate(text, sanscript.GURMUKHI, sanscript.DEVANAGARI)
if src_script=="Tamil":
out = transliterate(text, sanscript.TAMIL, sanscript.DEVANAGARI)
return out
# -> Directly Usable indic_transliteration_DEVANAGRI_OTHER api for transliteration
def indic_transliteration_DEVANAGRI_OTHER(text,dest_script):
if dest_script=="Malayalam":
out = transliterate(text, sanscript.DEVANAGARI, sanscript.MALAYALAM)
if dest_script=="Gujarati":
out = transliterate(text, sanscript.DEVANAGARI, sanscript.GUJARATI)
if dest_script=="Telugu":
out = transliterate(text, sanscript.DEVANAGARI, sanscript.TELUGU)
if dest_script=="Oriya":
out = transliterate(text, sanscript.DEVANAGARI, sanscript.ORIYA)
if dest_script=="Bengali":
out = transliterate(text,sanscript.DEVANAGARI,sanscript.BENGALI)
if dest_script=="Kannada":
out = transliterate(text, sanscript.DEVANAGARI, sanscript.KANNADA)
if dest_script=="Gurmukhi":
out = transliterate(text, sanscript.DEVANAGARI, sanscript.GURMUKHI)
if dest_script=="Tamil":
out = transliterate(text, sanscript.DEVANAGARI, sanscript.TAMIL)
return out
# -> Directly Usable indic_transliteration_KANNADA_OTHER api for transliteration
def indic_transliteration_KANNADA_OTHER(text,dest_script):
if dest_script=="Malayalam":
out = transliterate(text, sanscript.KANNADA, sanscript.MALAYALAM)
if dest_script=="Telugu":
out = transliterate(text, sanscript.KANNADA, sanscript.TELUGU)
if dest_script=="Tamil":
out = transliterate(text, sanscript.KANNADA, sanscript.TAMIL)
if dest_script=="Bengali":
out = transliterate(text, sanscript.KANNADA, sanscript.BENGALI)
return out
# -> Directly Usable indic_transliteration_OTHER_KANNADA api for transliteration
def indic_transliteration_OTHER_KANNADA(text,src_script):
if src_script=="Malayalam":
out = transliterate(text, sanscript.MALAYALAM, sanscript.KANNADA)
if src_script=="Telugu":
out = transliterate(text, sanscript.TELUGU, sanscript.KANNADA)
if src_script=="Tamil":
out = transliterate(text, sanscript.TAMIL, sanscript.KANNADA)
if src_script=="Bengali":
out = transliterate(text, sanscript.BENGALI, sanscript.KANNADA)
return out
# -> Directly Usable indic_transliteration_TAMIL_OTHER api for transliteration
def indic_transliteration_TAMIL_OTHER(text,dest_script):
if dest_script=="Malayalam":
out = transliterate(text, sanscript.TAMIL, sanscript.MALAYALAM)
if dest_script=="Telugu":
out = transliterate(text, sanscript.TAMIL, sanscript.TELUGU)
return out
# -> Directly Usable indic_transliteration_OTHER_TAMIL api for transliteration
def indic_transliteration_OTHER_TAMIL(text,src_script):
if src_script=="Malayalam":
out = transliterate(text, sanscript.MALAYALAM,sanscript.TAMIL)
if src_script=="Telugu":
out = transliterate(text, sanscript.TELUGU, sanscript.TAMIL)
return out
# -> Directly Usable indic_transliteration_TELUGU_OTHER api for transliteration
def indic_transliteration_TELUGU_OTHER(text, desc_script):
if desc_script=="Malayalam":
out = transliterate(text, sanscript.TELUGU, sanscript.MALAYALAM)
return out
# -> Directly Usable indic_transliteration_MALAYALAM_OTHER api for transliteration
def indic_transliteration_MALAYALAM_OTHER(text, desc_script):
if desc_script=="Telugu":
out = transliterate(text, sanscript.MALAYALAM, sanscript.TELUGU)
return out
# -> Directly Usable indic_transliteration_OTHER_GUJARATI api for transliteration
def indic_transliteration_OTHER_GUJARATI(text, src_script):
if src_script=="Gurmukhi":
out = transliterate(text, sanscript.GURMUKHI, sanscript.GUJARATI)
if src_script=="Oriya":
out = transliterate(text, sanscript.ORIYA, sanscript.GUJARATI)
return out
# -> Directly Usable indic_transliteration_OTHER_GURMUKHI api for transliteration
def indic_transliteration_OTHER_GURMUKHI(text, src_script):
if src_script=="Gujarati":
out = transliterate(text, sanscript.GUJARATI, sanscript.GURMUKHI)
if src_script=="Oriya":
out = transliterate(text, sanscript.ORIYA, sanscript.GURMUKHI)
return out
# -> Directly Usable indic_transliteration_OTHER_ORIYA api for transliteration
def indic_transliteration_OTHER_ORIYA(text, src_script):
if src_script=="Gujarati":
out = transliterate(text, sanscript.GUJARATI, sanscript.ORIYA)
if src_script=="Gurmukhi":
out = transliterate(text, sanscript.GURMUKHI, sanscript.ORIYA)
return out