Conversion_Kitchen_Code/kitchen_counter/conversion/translation/transliteration_resources.py

536 lines
18 KiB
Python
Raw Normal View History

2024-04-27 09:33:09 +00:00
import subprocess
import sys
import os
import requests, uuid, json
from .indictrans.indictrans import Transliterator
from om_transliterator import Transliterator as om_Transliterator
from indic_transliteration import sanscript
from indic_transliteration.sanscript import transliterate
from libindic.transliteration import getInstance
t = getInstance()
from indic_transliteration import sanscript
from indic_transliteration.sanscript import transliterate
from indicnlp.transliterate.unicode_transliterate import UnicodeIndicTransliterator
from transliterate import translit #, get_available_language_codes
from indic_transliteration.sanscript import SchemeMap, SCHEMES, transliterate
# import polyglot
# from polyglot.transliteration import Transliterator as poly
# from polyglot.text import Text
# import pinyin
# from anyascii import anyascii
from MNF.settings import BasePath
basePath = BasePath()
# -> Directly Usable azure api for transliteration
def azure_transliteration(text, source_lang, source_script, dest_script):
if source_script=="Devanagari":
source_script="Deva"
elif source_script=="Arabic":
source_script="Arab"
elif source_script=="Latin":
source_script="Latn"
elif source_script=="Kannada":
source_script="knda"
elif source_script=="Tamil":
source_script="Taml"
elif source_script=="Bengali":
source_script="Beng"
elif source_script=="Telugu":
source_script="Telu"
elif source_script=="Malayalam":
source_script="Mlym"
elif source_script=="Cyrillic":
source_script="Cyrl"
elif source_script=="Gurmukhi":
source_script="Guru"
elif source_script=="Telugu":
source_script="Telu"
elif source_script=="Gujarati":
source_script="Gujr"
elif source_script=="Oriya":
source_script="Orya"
elif source_script=="Sinhala":
source_script="Sinh"
elif source_script=="Hanji":
source_script="Hans"
elif source_script=="Thai":
source_script="Thai"
elif source_script=="Hebrew":
source_script="Hebr"
if dest_script=="Devanagari":
dest_script="Deva"
elif dest_script=="Arabic":
dest_script="Arab"
elif dest_script=="Latin":
dest_script="Latn"
elif dest_script=="Kannada":
dest_script="knda"
elif dest_script=="Tamil":
dest_script="Taml"
elif dest_script=="Cyrillic":
dest_script="Cyrl"
elif dest_script=="Malayalam":
dest_script="Mlym"
elif dest_script=="Gurmukhi":
dest_script="Guru"
elif dest_script=="Telugu":
dest_script="Telu"
elif dest_script=="Gujarati":
dest_script="Gujr"
elif dest_script=="Oriya":
dest_script="Orya"
elif dest_script=="Bengali":
dest_script="Beng"
elif dest_script=="Sinhala":
dest_script="Sinh"
elif dest_script=="Hanji":
dest_script="Hans"
elif dest_script=="Thai":
dest_script="Thai"
elif dest_script=="Hebrew":
dest_script="Hebr"
subscription_key = "959354878e73458e898a69f1f5887b69"
endpoint = "https://api.cognitive.microsofttranslator.com"
location = "eastus"
path = '/translate'
constructed_url = endpoint + path
headers = {
'Ocp-Apim-Subscription-Key': subscription_key,
'Ocp-Apim-Subscription-Region': location,
'Content-type': 'application/json',
'X-ClientTraceId': str(uuid.uuid4())
}
constructed_url1 = "https://api.cognitive.microsofttranslator.com/transliterate?api-version=3.0"
print("source_script", source_script)
print("dest_script", dest_script)
print("source_lang", source_lang)
print("text", text)
params = {'language':source_lang, 'fromScript': source_script, 'toScript': dest_script}
body = [{'text': text}]
try:
request = requests.post(constructed_url1, params=params, headers=headers, json=body)
response = request.json()
out = response[0]['text']
except Exception as e:
print("The error was ",e)
out = text
return out
# -> Directly Usable Polyglot api for transliteration
# def polyglot_trans(text, source_script, dest_script):
# # from polyglot.downloader import downloader
# if source_script=="Latin":
# source_script="en"
# elif source_script=="Arabic":
# source_script="ar"
# elif source_script=="Hanji":
# source_script="zh"
# if dest_script=="Latin":
# dest_script="en"
# elif dest_script=="Arabic":
# dest_script="ar"
# elif source_script=="Hanji":
# source_script="zh"
# new_text = ""
# text_break = Text(text)
# for x in text_break.transliterate(dest_script):
# new_text = new_text + str(x)
# return new_text
# -> Directly Usable indic_trans api for transliteration
def indic_trans(text, source_script, dest_script):
if source_script=="Devanagari":
source_script="hin"
elif source_script=="Arabic":
source_script="urd"
elif source_script=="Kannada":
source_script="kan"
elif source_script=="Tamil":
source_script="tam"
elif source_script=="Latin":
source_script="eng"
elif source_script=="Bengali":
source_script="ben"
elif source_script=="Telugu":
source_script="tel"
elif source_script=="Malayalam":
source_script="mal"
elif source_script=="Tamil":
source_script="tam"
elif source_script=="Oriya":
source_script="ori"
elif source_script=="Gujarati":
source_script="guj"
elif source_script=="Gurmukhi":
source_script="pan"
if dest_script=="Devanagari":
dest_script="hin"
elif dest_script=="Arabic":
dest_script="urd"
elif dest_script=="Kannada":
dest_script="kan"
elif dest_script=="Latin":
dest_script="eng"
elif source_script=="Tamil":
source_script="tam"
elif dest_script=="Gujarati":
dest_script="guj"
elif dest_script=="Oriya":
dest_script="ori"
elif dest_script=="Telugu":
dest_script="tel"
elif dest_script=="Malayalam":
dest_script="mal"
# elif dest_script=="Gurmukhi":
# dest_script="Guru"
elif dest_script=="Telugu":
dest_script="Telu"
elif dest_script=="Gujarati":
dest_script="Gujr"
elif dest_script=="Oriya":
dest_script="Orya"
elif dest_script=="Bengali":
dest_script="Ben"
elif dest_script=="Tamil":
dest_script="tam"
elif dest_script=="Gurmukhi":
dest_script="pan"
trn = Transliterator(source=source_script, target=dest_script, build_lookup=True)
out = trn.transform(text)
return out
# -> Directly Usable om_translator api for transliteration
def om_transliterator(text):
transliterator = om_Transliterator()
out = transliterator.knda_to_latn(text)
return out
# -> Directly Usable libindic api for transliteration
def libindic(text, dest_script):
if dest_script=="Devanagari":
dest_script="hi"
elif dest_script=="Latin":
dest_script="en"
elif dest_script=="Malayalam":
dest_script="ml"
elif dest_script=="Gujarati":
dest_script="gu"
elif dest_script=="Oriya":
dest_script="or"
elif dest_script=="Telugu":
dest_script="te"
elif dest_script=="Bengali":
dest_script="bn"
elif dest_script=="Tamil":
dest_script="ta"
elif dest_script=="Kannada":
dest_script="kn"
elif dest_script=="Gurmukhi":
dest_script="gu"
code = dest_script+'_IN'
out = t.transliterate(text, code)
return out
# -> Directly Usable indic_transliteration_IAST api for transliteration
def indic_transliteration_IAST(text):
out = transliterate(text, sanscript.IAST, sanscript.DEVANAGARI)
return out
# -> Directly Usable indic_transliteration_ITRANS api for transliteration
def indic_transliteration_ITRANS(text):
out = transliterate(text, sanscript.ITRANS, sanscript.DEVANAGARI)
return out
# -> Directly Usable sheetal api for transliteration
def sheetal(text):
s2_out = subprocess.check_output([sys.executable, rf"{basePath}/conversion/translation/dev-rom-sheetal.py", text])
out = s2_out.decode('utf-8')
return out
# -> Directly Usable ritwik code for transliteration
def ritwik(text):
s2_out = subprocess.check_output([sys.executable, rf"{basePath}/conversion/translation/dev-rom-ritwik.py", text])
out = s2_out.decode('utf-8')
return out
# -> Directly Usable indic_transliteration_GURMUKHI api for transliteration
def indic_transliteration_GURMUKHI(text):
out = transliterate(text,sanscript.IAST, sanscript.GURMUKHI)
return out
# -> Directly Usable unicode_transliteration_GURMUKHI api for transliteration
def unicode_transliteration_GURMUKHI(text):
input_text=transliterate(text, sanscript.IAST, sanscript.DEVANAGARI)
out=UnicodeIndicTransliterator.transliterate(input_text,"hi","pa")
return out
# -> Directly Usable transliteration_LATIN_CYRILLIC api for transliteration
def transliteration_LATIN_CYRILLIC(text):
out = translit(text, 'bg')
return out
# -> Directly Usable translit_CHINESE_LATIN api for transliteration
def translit_CHINESE_LATIN(text):
out = pinyin.get(text, format="strip", delimiter=" ")
return out
def translit_th_sin_mng_heb_to_latin(text):
out = anyascii(text)
return out
# -> Directly Usable indic_transliteration_TELUGU api for transliteration
def indic_transliteration_TELUGU(text):
out = transliterate(text,sanscript.IAST, sanscript.TELUGU)
return out
# -> Directly Usable indic_transliteration_GURMUKHI_LATIN api for transliteration
def indic_transliteration_GURMUKHI_LATIN(text):
out = transliterate(text, sanscript.GURMUKHI, sanscript.ITRANS)
return out
# -> Directly Usable unicode_transliteration_GURMUKHI_LATIN api for transliteration
def unicode_transliteration_GURMUKHI_LATIN(text):
input_text=transliterate(text, sanscript.IAST, sanscript.DEVANAGARI)
out = UnicodeIndicTransliterator.transliterate(input_text,"hi","pa")
return out
# -> Directly Usable transliteration_CYRILIC_LATIN api for transliteration
def transliteration_CYRILIC_LATIN(text):
out = translit(text, 'bg',reversed=True)
return out
# -> Some Random Code to replace special characters
def readonly(str):
str=str.replace("а", "a")
str=str.replace("б", "b")
str=str.replace("в", "v")
str=str.replace("г", "g")
str=str.replace("д", "d")
str=str.replace("е", "e")
str=str.replace("ё", "yo")
str=str.replace("ж", "zh")
str=str.replace("з", "z")
str=str.replace("и", "i")
str=str.replace("й", "j")
str=str.replace("к", "k")
str=str.replace("л", "l")
str=str.replace("м", "m")
str=str.replace("н", "n")
str=str.replace("о", "o")
str=str.replace("п", "p")
str=str.replace("р", "r")
str=str.replace("с", "s")
str=str.replace("т", "t")
str=str.replace("у", "u")
str=str.replace("ф", "f")
str=str.replace("х", "h")
str=str.replace("ц", "c")
str=str.replace("ч", "ch")
str=str.replace("ш", "sh")
str=str.replace("щ", "sch")
str=str.replace("ъ", "j")
str=str.replace("ы", "i")
str=str.replace("ь", "j")
str=str.replace("э", "e")
str=str.replace("ю", "yu")
str=str.replace("я", "ya")
str=str.replace("А", "A")
str=str.replace("Б", "B")
str=str.replace("В", "V")
str=str.replace("Г", "G")
str=str.replace("Д", "D")
str=str.replace("Е", "E")
str=str.replace("Ё", "Yo")
str=str.replace("Ж", "Zh")
str=str.replace("З", "Z")
str=str.replace("И", "I")
str=str.replace("Й", "J")
str=str.replace("К", "K")
str=str.replace("Л", "L")
str=str.replace("М", "M")
str=str.replace("Н", "N")
str=str.replace("О", "O")
str=str.replace("П", "P")
str=str.replace("Р", "R")
str=str.replace("С", "S")
str=str.replace("Т", "T")
str=str.replace("У", "U")
str=str.replace("Ф", "F")
str=str.replace("Х", "H")
str=str.replace("Ц", "C")
str=str.replace("Ч", "Ch")
str=str.replace("Ш", "Sh")
str=str.replace("Щ", "Sch")
str=str.replace("Ъ", "J")
str=str.replace("Ы", "I")
str=str.replace("Ь", "J")
str=str.replace("Э", "E")
str=str.replace("Ю", "Yu")
str=str.replace("Я", "Ya")
return str
# -> Code to Convert Letters to Latin Script
def ConvertToLatin(source):
result=''
for letter in source:
Letter = readonly(letter) ## replacemnet of word
result=result+Letter
return result
# -> Directly Usable indic_transliteration_OTHER_DEVANAGRI api for transliteration
def indic_transliteration_OTHER_DEVANAGRI(text,src_script):
if src_script=="Malayalam":
out = transliterate(text, sanscript.MALAYALAM, sanscript.DEVANAGARI)
if src_script=="Gujarati":
out = transliterate(text, sanscript.GUJARATI, sanscript.DEVANAGARI)
if src_script=="Telugu":
out = transliterate(text, sanscript.TELUGU, sanscript.DEVANAGARI)
if src_script=="Oriya":
out = transliterate(text, sanscript.ORIYA, sanscript.DEVANAGARI)
if src_script=="Bengali":
out = transliterate(text, sanscript.BENGALI, sanscript.DEVANAGARI)
if src_script=="Kannada":
out = transliterate(text, sanscript.KANNADA, sanscript.DEVANAGARI)
if src_script=="Gurmukhi":
out = transliterate(text, sanscript.GURMUKHI, sanscript.DEVANAGARI)
if src_script=="Tamil":
out = transliterate(text, sanscript.TAMIL, sanscript.DEVANAGARI)
return out
# -> Directly Usable indic_transliteration_DEVANAGRI_OTHER api for transliteration
def indic_transliteration_DEVANAGRI_OTHER(text,dest_script):
if dest_script=="Malayalam":
out = transliterate(text, sanscript.DEVANAGARI, sanscript.MALAYALAM)
if dest_script=="Gujarati":
out = transliterate(text, sanscript.DEVANAGARI, sanscript.GUJARATI)
if dest_script=="Telugu":
out = transliterate(text, sanscript.DEVANAGARI, sanscript.TELUGU)
if dest_script=="Oriya":
out = transliterate(text, sanscript.DEVANAGARI, sanscript.ORIYA)
if dest_script=="Bengali":
out = transliterate(text,sanscript.DEVANAGARI,sanscript.BENGALI)
if dest_script=="Kannada":
out = transliterate(text, sanscript.DEVANAGARI, sanscript.KANNADA)
if dest_script=="Gurmukhi":
out = transliterate(text, sanscript.DEVANAGARI, sanscript.GURMUKHI)
if dest_script=="Tamil":
out = transliterate(text, sanscript.DEVANAGARI, sanscript.TAMIL)
return out
# -> Directly Usable indic_transliteration_KANNADA_OTHER api for transliteration
def indic_transliteration_KANNADA_OTHER(text,dest_script):
if dest_script=="Malayalam":
out = transliterate(text, sanscript.KANNADA, sanscript.MALAYALAM)
if dest_script=="Telugu":
out = transliterate(text, sanscript.KANNADA, sanscript.TELUGU)
if dest_script=="Tamil":
out = transliterate(text, sanscript.KANNADA, sanscript.TAMIL)
if dest_script=="Bengali":
out = transliterate(text, sanscript.KANNADA, sanscript.BENGALI)
return out
# -> Directly Usable indic_transliteration_OTHER_KANNADA api for transliteration
def indic_transliteration_OTHER_KANNADA(text,src_script):
if src_script=="Malayalam":
out = transliterate(text, sanscript.MALAYALAM, sanscript.KANNADA)
if src_script=="Telugu":
out = transliterate(text, sanscript.TELUGU, sanscript.KANNADA)
if src_script=="Tamil":
out = transliterate(text, sanscript.TAMIL, sanscript.KANNADA)
if src_script=="Bengali":
out = transliterate(text, sanscript.BENGALI, sanscript.KANNADA)
return out
# -> Directly Usable indic_transliteration_TAMIL_OTHER api for transliteration
def indic_transliteration_TAMIL_OTHER(text,dest_script):
if dest_script=="Malayalam":
out = transliterate(text, sanscript.TAMIL, sanscript.MALAYALAM)
if dest_script=="Telugu":
out = transliterate(text, sanscript.TAMIL, sanscript.TELUGU)
return out
# -> Directly Usable indic_transliteration_OTHER_TAMIL api for transliteration
def indic_transliteration_OTHER_TAMIL(text,src_script):
if src_script=="Malayalam":
out = transliterate(text, sanscript.MALAYALAM,sanscript.TAMIL)
if src_script=="Telugu":
out = transliterate(text, sanscript.TELUGU, sanscript.TAMIL)
return out
# -> Directly Usable indic_transliteration_TELUGU_OTHER api for transliteration
def indic_transliteration_TELUGU_OTHER(text, desc_script):
if desc_script=="Malayalam":
out = transliterate(text, sanscript.TELUGU, sanscript.MALAYALAM)
return out
# -> Directly Usable indic_transliteration_MALAYALAM_OTHER api for transliteration
def indic_transliteration_MALAYALAM_OTHER(text, desc_script):
if desc_script=="Telugu":
out = transliterate(text, sanscript.MALAYALAM, sanscript.TELUGU)
return out
# -> Directly Usable indic_transliteration_OTHER_GUJARATI api for transliteration
def indic_transliteration_OTHER_GUJARATI(text, src_script):
if src_script=="Gurmukhi":
out = transliterate(text, sanscript.GURMUKHI, sanscript.GUJARATI)
if src_script=="Oriya":
out = transliterate(text, sanscript.ORIYA, sanscript.GUJARATI)
return out
# -> Directly Usable indic_transliteration_OTHER_GURMUKHI api for transliteration
def indic_transliteration_OTHER_GURMUKHI(text, src_script):
if src_script=="Gujarati":
out = transliterate(text, sanscript.GUJARATI, sanscript.GURMUKHI)
if src_script=="Oriya":
out = transliterate(text, sanscript.ORIYA, sanscript.GURMUKHI)
return out
# -> Directly Usable indic_transliteration_OTHER_ORIYA api for transliteration
def indic_transliteration_OTHER_ORIYA(text, src_script):
if src_script=="Gujarati":
out = transliterate(text, sanscript.GUJARATI, sanscript.ORIYA)
if src_script=="Gurmukhi":
out = transliterate(text, sanscript.GURMUKHI, sanscript.ORIYA)
return out