Conversion_Kitchen_Code/kitchen_counter/conversion/translation/final_transliteration_only2.py

2286 lines
106 KiB
Python
Executable File

# Module Imports
import docx
import re
from indicnlp.tokenize import sentence_tokenize
from .buck_2_unicode import buck_2_unicode
from .transString import transString
from .selection_source import (
selection_source_transliteration,
)
from .detection import language_detector, script_det
from .script_writing import (
addSlugLine,
addActionLine,
addSpeaker,
addParenthetical,
addDialogue,
dual_script,
addTransition,
dial_checker,
non_dial_checker,
addSpecialTerm
)
from .translation_resources import google, aws, azure, yandex
from .transliteration_resources import (
azure_transliteration,
indic_trans,
indic_transliteration_OTHER_GUJARATI,
indic_transliteration_OTHER_GURMUKHI,
indic_transliteration_OTHER_ORIYA,
om_transliterator,
libindic,
indic_transliteration_IAST,
indic_transliteration_ITRANS,
# polyglot_trans,
sheetal,
unicode_transliteration_GURMUKHI,
indic_transliteration_GURMUKHI,
transliteration_LATIN_CYRILLIC,
indic_transliteration_TELUGU,
unicode_transliteration_GURMUKHI_LATIN,
indic_transliteration_GURMUKHI_LATIN,
transliteration_CYRILIC_LATIN,
ConvertToLatin,
readonly,
indic_transliteration_OTHER_DEVANAGRI,
indic_transliteration_DEVANAGRI_OTHER,
indic_transliteration_KANNADA_OTHER,
indic_transliteration_OTHER_KANNADA,
indic_transliteration_TAMIL_OTHER,
indic_transliteration_OTHER_TAMIL,
indic_transliteration_TELUGU_OTHER,
indic_transliteration_MALAYALAM_OTHER,
indic_transliteration_OTHER_GUJARATI,
indic_transliteration_OTHER_GURMUKHI,
indic_transliteration_OTHER_ORIYA,
translit_CHINESE_LATIN,
translit_th_sin_mng_heb_to_latin
)
from conversion.translation.translation_function import translate_comparison2
from MNF.settings import BasePath
# Importing Basepath of System
basePath = BasePath()
etc_punctuation = ["", " . . .", " . .", " . . ”"]
"""overriding dictionary class"""
class myDict(dict):
def __init__(self):
self = dict()
def add(self, key, value):
self[key] = value
# -> Punctuation Remover code
def punct_remover(string):
punctuations = """!()-[]{};:'"\,<>./?@#$%^&*_~…।"""
for x in string.lower():
if x in punctuations:
string = string.replace(x, " ")
return string
# -> Space After Punctuation Remover code
def space_after_punct(text):
# text = text.replace('...',' ... ')
text = text.replace(". . .", " ... ")
text = re.sub("([,!?()…-])", r"\1 ", text)
text = re.sub("\s{2,}", " ", text)
return text
# -> Removing Punctuation from Transliterated text code
def final_transliterated_sentence(original, transliterated):
original = space_after_punct(original)
punct_list = [
"!",
'"',
"#",
"$",
"%",
"&",
"'",
"(",
")",
"*",
"+",
",",
" ",
"-",
".",
"/",
":",
";",
"<",
"=",
">",
"?",
"@",
"[",
"\\",
"]",
"^",
"_",
"`",
"{",
"|",
"}",
"~",
"",
"...",
"",
]
sentence = []
j = 0
for i in range(len(original.split())):
if original.split()[i] in punct_list:
sentence.append(original.split()[i])
elif original.split()[i][-1] in punct_list:
temp = transliterated.split()[j] + original.split()[i][-1]
sentence.append(temp)
j = j + 1
elif original.split()[i][-1] not in punct_list:
temp = transliterated.split()[j]
sentence.append(temp)
j = j + 1
transliterated_sentence = " ".join(sentence)
transliterated_sentence.replace(" ... ", "...")
transliterated_sentence.replace("", "")
return transliterated_sentence
def google_length_checker(t, temp_sentence, t0):
if len(t.split()) >= len(temp_sentence.split()):
return t
elif len(t.split()) == len(temp_sentence.split()) - 1:
final_t = t + " " + t0.split()[-1]
return final_t
elif len(t.split()) == len(temp_sentence.split()) - 2:
final_t = t + " " + t0.split()[-2] + " " + t0.split()[-1]
return final_t
else:
return t
# Special Symbol(Hindi Sentence Ending) Remover
def Halant_remover(T3):
if T3[-1] == "":
return T3[:-1]
else:
return T3
def whole_transliteration_func_wrapper(**kwargs):
text = kwargs.get("text")
source_lang = kwargs.get("source_lang")
func_params = kwargs.get("func_params")
sentences = sentence_tokenize.sentence_split(text, lang=source_lang)
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in func_params:
try:
transliterated_word = function(temp_sentence, *args)
if source == "libindic":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
def dial_comparison_transliteration_rom_dev_ph1(text, source_lang, source_script, dest_script):
sentences = sentence_tokenize.sentence_split(text, lang="en")
if source_lang == "ne":
source_lang = "hi"
final_transliterated_whole_sentence = []
# source_lang = "hi"
# source_script = "Latin"
# dest_script = "Devanagari"
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["indic_trans", "Azure", "google", "indic_trans_IAST"],
[(temp_sentence, source_script, dest_script),
(temp_sentence, source_lang, source_script, dest_script),
(temp_sentence, "en", "hi"), (temp_sentence)],
[indic_trans, azure_transliteration, google, indic_transliteration_IAST]):
try:
transliterated_word = function(*args)
if source == "libindic":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
"""not used anymore"""
# def dial_comparison_transliteration_rom_dev_ph1_sentence_wise(
# text, source_lang, source_script, dest_script
# ):
# source_lang = "hi"
# sources_name = {
# "0": "Azure",
# "1": "indic_trans",
# "2": "google",
# "3": "indic_trans_IAST",
# }
#
# sentences = sentence_tokenize.sentence_split(text, lang="en")
# priority_list = ["Azure", "indic_trans", "google", "indic_trans_IAST"]
# transliterated_text = []
#
# for sentence in sentences:
# if sentence in etc_punctuation:
# continue
# print("original_sentence", sentence)
# temp_sentence = punct_remover(sentence)
# print("sentence_without_punctuation", temp_sentence)
# t00 = azure_transliteration(
# temp_sentence, source_lang, source_script, dest_script
# )
# t11 = indic_trans(temp_sentence, source_script, dest_script)
# t = google(temp_sentence, "en", "hi")
# t22 = google_length_checker(t, temp_sentence, t00)
# t33 = indic_transliteration_IAST(temp_sentence)
# Out = []
# for i in range(len(temp_sentence.split())):
# word = temp_sentence.split()[i]
#
# T0 = t00.split()[i]
# T1 = t11.split()[i]
# T2 = t22.split()[i]
# T3 = t33.split()[i]
# T3 = Halant_remover(T3)
#
# outputs = [T0, T1, T2, T3]
# out = compare_outputs_transliteration(
# word, outputs, sources_name, priority_list
# )
# Out.append(out)
# trans_sent_wo_punct = " ".join(Out)
# print("trans_sent_wo_punct", trans_sent_wo_punct)
# transliterated_sentence = final_transliterated_sentence(
# sentence, trans_sent_wo_punct
# )
# print("trans_sent_w_punct", transliterated_sentence)
# transliterated_text.append(transliterated_sentence)
#
# return " ".join(transliterated_text)
def dial_comparison_transliteration_dev_rom_ph1_sentence_wise2(text, source_lang, source_script, dest_script):
sentences = sentence_tokenize.sentence_split(text, lang="hi")
if source_lang == "ne":
source_lang = "hi"
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["indic_trans", "Azure", "libindic", "sheetal"],
[(temp_sentence, source_script, dest_script),
(temp_sentence, source_lang, source_script, dest_script),
(temp_sentence, dest_script), (temp_sentence)],
[indic_trans, azure_transliteration, libindic, sheetal]):
try:
transliterated_word = function(*args)
if source == "libindic":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
"""not used anymore"""
# def dial_comparison_transliteration_dev_rom_ph1_sentence_wise3(text, source_lang, source_script, dest_script):
# if source_lang == "ne":
# source_lang = "hi"
# kwargs = {
# "text": text,
# "source_lang": source_lang,
# "func_params": zip(["indic_trans", "Azure", "libindic", "sheetal"],
# [(source_script, dest_script),
# (source_lang, source_script, dest_script),
# (dest_script), ()],
# [indic_trans, azure_transliteration, libindic, sheetal])
# }
# # sentences = sentence_tokenize.sentence_split(text, lang="hi")
# # final_transliterated_whole_sentence = []
# # for sentence_ in sentences:
# # print("Full Sentence is", sentence_)
# # final_transliterated_words = []
# # for sentence in sentence_.split(" "):
# # if sentence in etc_punctuation:
# # continue
# # print("Original Word", sentence)
# # temp_sentence = punct_remover(sentence)
# # i = 0
# # priority_list = list()
# # sources_name = myDict()
# # transliterated_words = []
# # for source, args, function in zip(["indic_trans", "Azure", "libindic", "sheetal"],
# # [(temp_sentence, source_script, dest_script),
# # (temp_sentence, source_lang, source_script, dest_script),
# # (temp_sentence, dest_script), (temp_sentence)],
# # [indic_trans, azure_transliteration, libindic, sheetal]):
# #
# # try:
# # transliterated_word = function(*args)
# # if source == "libindic":
# # transliterated_word = transliterated_word.rstrip()
# # elif source == "sheetal":
# # transliterated_word = transliterated_word.replace("\n", "")
# # transliterated_words.append(transliterated_word)
# # priority_list.append(source)
# # sources_name.add(str(i), str(source))
# # i = i + 1
# # except:
# # pass
# #
# # best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list)
# # best_output = final_transliterated_sentence(
# # temp_sentence, best_output
# # )
# # final_transliterated_words.append(best_output)
# # final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
# # return " ".join(final_transliterated_whole_sentence)
# final_transliterated_sentence = whole_transliteration_func_wrapper(**kwargs)
# return final_transliterated_sentence
"""not used anymore"""
# def dial_comparison_transliteration_dev_rom_ph1_sentence_wise(
# text, source_lang, source_script, dest_script
# ):
# print("Entered Here1212")
# print("Line is", text)
# sources_name = {"0": "indic_trans", "1": "Azure",
# "2": "libindic", "3": "sheetal"}
# priority_list = ["indic_trans", "Azure", "libindic", "sheetal"]
# etc_punctuation = ["", " . . .", " . .", " . . ”"]
# sentences = sentence_tokenize.sentence_split(text, lang="hi")
# if source_lang == "ne":
# source_lang = "hi"
# transliterated_text = []
# # for sentence in sentences:
# # print("sentence is")
# # if sentence in etc_punctuation:
# # continue
# # print("original_sentence", sentence)
# # temp_sentence = punct_remover(sentence)
# # print("sentence_without_punctuation", temp_sentence)
# # t0 = indic_trans(temp_sentence, source_script, dest_script)
# # t1 = azure_transliteration(
# # temp_sentence, source_lang, source_script, dest_script
# # )
# # print("before t1111111111")
# # t2 = libindic(temp_sentence, dest_script).rstrip()
# # print("before sheetal", t2)
# # t3 = sheetal(temp_sentence).replace("\n", "")
# # print("after sheetal", t3)
# # Out = []
# #
# # # trans_counter = Counter([len(t0), len(t1), len(t2), len(t3)])
# # # print(trans_counter)
# # # trans_counter_keys = list(trans_counter.keys())
# # # # trans_counter_keys = list(trans_counter.values())
# # # outputsidx = []
# # # highest = trans_counter_keys[0]
# # # for idx, output in enumerate([t0, t1, t2, t3]):
# # # if len(output) == highest:
# # # outputsidx.append(idx)
# # # print("all outputs are -> ", t0, t1, t2, t3, t3)
# # # outputs = []
# # # priority_list2 = []
# # # sources_name2 = {}
# # # for key in sources_name.keys():
# # # if int(key) not in outputsidx:
# # # pass
# # # else:
# # # sources_name2[key] = sources_name[key]
# # # for idx, value in enumerate(priority_list):
# # # if idx not in outputsidx:
# # # pass
# # # else:
# # # priority_list2.append(value)
# # # print(outputsidx, "outputsidx")
# # # for i in range(len(temp_sentence.split())):
# # # word = temp_sentence.split()[i]
# # #
# # # if 0 in outputsidx:
# # # T0 = t0.split()[i]
# # # outputs.append(T0)
# # # if 1 in outputsidx:
# # # T1 = t1.split()[i]
# # # outputs.append(T1)
# # # if 2 in outputsidx:
# # # T2 = t2.split()[i]
# # # outputs.append(T2)
# # # if 3 in outputsidx:
# # # T3 = t3.split()[i]
# # # outputs.append(T3)
# # # # T2 = t2.split()[i]
# # # # T3 = t3.split()[i]
# # # # outputs = [T0, T1, T2, T3]
# # # print("ouputs -> ", outputs, sources_name2, priority_list2)
# # # out = compare_outputs_transliteration(
# # # word, outputs, sources_name2, priority_list2
# # # )
# # Out.append(out)
# # trans_sent_wo_punct = " ".join(Out)
# if text in etc_punctuation:
# return text
# # print("original_sentence", sentence)
# temp_sentence = punct_remover(text)
# tt = 0
# try:
# t0 = indic_trans(temp_sentence, source_script, dest_script)
# outputa = t0
# except:
# tt += 1
# try:
# if tt == 1:
# t1 = azure_transliteration(
# temp_sentence, source_lang, source_script, dest_script
# )
# outputa = t1
# except:
# tt += 1
# # print("before t1111111111")
# try:
# if tt == 2:
# t2 = libindic(temp_sentence, dest_script).rstrip()
# outputa = t2
# except:
# tt += 1
# # print("before sheetal", t2)
# try:
# if tt == 3:
# t3 = sheetal(temp_sentence).replace("\n", "")
# outputa = t3
# except:
# tt += 1
#
# if tt == 4:
# outputa = text
# # else:
# # trans_sent_wo_punct = outputa
# # # print("trans_sent_wo_punct", trans_sent_wo_punct)
# # # transliterated_sentence = final_transliterated_sentence(
# # # sentence, trans_sent_wo_punct
# # # )
# # # print("trans_sent_w_punct", transliterated_sentence)
# # # transliterated_text.append(transliterated_sentence)
# # # print("Entered Exiting Here1212")
# return outputa
"""not used anymore"""
# def dial_comparison_transliteration_dev_rom_ph1(
# text, source_lang, source_script, dest_script
# ):
# sources_name = {"0": "indic_trans", "1": "Azure",
# "2": "libindic", "3": "sheetal"}
# sentences = sentence_tokenize.sentence_split(text, lang="hi")
# priority_list = ["indic_trans", "Azure", "sheetal", "libindic"]
# transliterated_text = []
#
# for sentence in sentences:
# if (
# sentence == ""
# or sentence == " . . ."
# or sentence == " . ."
# or sentence == " . . ”"
# ):
# continue
# OUT = []
# for word in sentence.split():
# if word == ".":
# continue
# t0 = indic_trans(word, source_script, dest_script)
# t1 = azure_transliteration(
# word, source_lang, source_script, dest_script)
# t2 = libindic(word, dest_script).rstrip()
# t3 = sheetal(word).replace("\n", "")
# outputs = [t0, t1, t2, t3]
# out = compare_outputs_transliteration(
# word, outputs, sources_name, priority_list
# )
# OUT.append(out)
# transliterated_text.append(" ".join(OUT))
#
# return " ".join(transliterated_text)
def dial_comparison_transliteration_arbic_to_rom_ph1(text, source_lang, source_script, dest_script):
sentences = sentence_tokenize.sentence_split(text, lang="ar")
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["indic_trans", "Azure", "buck_2_unicode"],
[(temp_sentence, source_script, dest_script),
(temp_sentence, source_lang, source_script, dest_script),
(temp_sentence)],
[indic_trans, azure_transliteration, buck_2_unicode]):
try:
transliterated_word = function(*args)
if source == "libindic":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
def dial_comparison_transliteration_kann_to_rom_ph1(text, source_lang, source_script, dest_script):
sentences = sentence_tokenize.sentence_split(text, lang="kn")
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["om_transliteration", "indic_trans", "libindic", "Azure"],
[(temp_sentence), (temp_sentence, source_script, dest_script),
(temp_sentence, dest_script),
(temp_sentence, source_lang, source_script, dest_script)],
[om_transliterator, indic_trans, libindic, azure_transliteration]):
try:
transliterated_word = function(*args)
if source == "libindic":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
def dial_comparison_transliteration_tamil_to_rom_ph1(text, source_lang, source_script, dest_script):
sentences = sentence_tokenize.sentence_split(text, lang="ta")
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["Azure", "libindic", "indic_trans"],
[(temp_sentence, source_lang, source_script, dest_script),
(temp_sentence, dest_script),
(temp_sentence, source_script, dest_script)],
[azure_transliteration, libindic, indic_trans]):
try:
transliterated_word = function(*args)
if source == "libindic":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
def dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(text, source_lang, source_script, dest_script):
sentences = sentence_tokenize.sentence_split(text, lang=source_lang)
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["Azure", "indic_trans", "libindic"],
[(temp_sentence, source_lang, source_script, dest_script),
(temp_sentence, source_script, dest_script),
(temp_sentence, dest_script)],
[azure_transliteration, indic_trans, libindic]):
try:
transliterated_word = function(*args)
if source == "libindic":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
def dial_comparison_transliteration_latin_gurmukhi(text, source_lang, source_script, dest_script):
source_lang = "pa"
sentences = sentence_tokenize.sentence_split(text, lang=source_lang)
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["Azure", "indic_trans", "indic_trans_IAST"],
[(temp_sentence, source_lang, source_script, dest_script),
(temp_sentence),
(temp_sentence)],
[azure_transliteration, indic_transliteration_GURMUKHI,
unicode_transliteration_GURMUKHI]):
try:
transliterated_word = function(args)
if source == "libindic":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except Exception as e:
print(f"Error occured for {function} which is ->", e)
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
def dial_comparison_transliteration_latin_cyrillic(text, source_lang, source_script, dest_script):
source_lang = "bg"
sentences = sentence_tokenize.sentence_split(text, lang=source_lang)
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["Azure", "indic_trans", "google"],
[(temp_sentence, source_lang, source_script, dest_script),
(temp_sentence),
(temp_sentence, "en", source_lang)],
[azure_transliteration, transliteration_LATIN_CYRILLIC,
google]):
try:
transliterated_word = function(*args)
if source == "libindic":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
def dial_comparison_transliteration_latin_telugu_sentence_wise(text, source_lang, source_script, dest_script):
source_lang = "te"
sentences = sentence_tokenize.sentence_split(text, lang=source_lang)
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["indic_translit", "Azure", "indic_trans", "libindic"],
[(temp_sentence),
(temp_sentence, source_lang, source_script, dest_script),
(temp_sentence, source_script, dest_script),
(temp_sentence, dest_script)],
[indic_transliteration_TELUGU, azure_transliteration,
indic_trans, libindic]):
try:
transliterated_word = function(*args)
if source == "libindic":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
def dial_comparison_transliteration_gurmukhi_latin_sentence_wise(text, source_lang, source_script, dest_script):
source_lang = "pa"
sentences = sentence_tokenize.sentence_split(text, lang=source_lang)
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["indic_trans", "Azure", "unicode"],
[(temp_sentence),
(temp_sentence, source_lang, source_script, dest_script),
(temp_sentence)],
[indic_transliteration_GURMUKHI_LATIN, azure_transliteration,
unicode_transliteration_GURMUKHI_LATIN]):
try:
transliterated_word = function(*args)
if source == "libindic" or source == "unicode":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
def dial_comparison_transliteration_cyrilic_latin_sentence_wise(text, source_lang, source_script, dest_script):
source_lang = "bg"
sentences = sentence_tokenize.sentence_split(text, lang=source_lang)
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["Azure", "indic_trans", "unicode"],
[(temp_sentence, source_lang, source_script, dest_script),
(temp_sentence),
(temp_sentence)],
[azure_transliteration, transliteration_CYRILIC_LATIN,
ConvertToLatin]):
try:
transliterated_word = function(*args)
if source == "libindic" or source == "unicode":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
def dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(text, source_lang, source_script, dest_script):
if dest_script == "Gujarati":
source_lang = "gu"
if dest_script == "Oriya":
source_lang = "or"
if dest_script == "Malayalam":
source_lang = "ml"
if dest_script == "Tamil":
source_lang = "ta"
if dest_script == "Bengali":
source_lang = "bn"
if dest_script == "Kannada":
source_lang = "kn"
sentences = sentence_tokenize.sentence_split(text, lang=source_lang)
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["Azure", "libindic", "indic_trans"],
[(temp_sentence, source_lang, source_script, dest_script),
(temp_sentence, dest_script),
(temp_sentence, source_script, dest_script)],
[azure_transliteration, libindic, indic_trans]):
try:
transliterated_word = function(*args)
if source == "libindic" or source == "unicode":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
def dial_comparison_transliteration_or_ml_gu_te_devanagari(text, source_lang, source_script, dest_script):
sentences = sentence_tokenize.sentence_split(text, lang=source_lang)
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["indic_trans_IAST", "libindic", "indic_trans"],
[(temp_sentence, source_script),
(temp_sentence, dest_script),
(temp_sentence, source_script, dest_script)],
[indic_transliteration_OTHER_DEVANAGRI, libindic, indic_trans]):
try:
transliterated_word = function(*args)
if source == "libindic" or source == "unicode":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
def dial_comparison_transliteration_devanagari_or_ml_gu_te(text, source_lang, source_script, dest_script):
sentences = sentence_tokenize.sentence_split(text, lang=source_lang)
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["indic_trans_IAST", "libindic", "indic_trans"],
[(temp_sentence, dest_script),
(temp_sentence, dest_script),
(temp_sentence, source_script, dest_script)],
[indic_transliteration_DEVANAGRI_OTHER, libindic, indic_trans]):
try:
transliterated_word = function(*args)
if source == "libindic" or source == "unicode":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
def dial_comparison_transliteration_kannada_ml_ta_te_ben(text, source_lang, source_script, dest_script):
sentences = sentence_tokenize.sentence_split(text, lang=source_lang)
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["indic_trans_IAST", "libindic", "indic_trans"],
[(temp_sentence, dest_script),
(temp_sentence, dest_script),
(temp_sentence, source_script, dest_script)],
[indic_transliteration_KANNADA_OTHER, libindic, indic_trans]):
try:
transliterated_word = function(*args)
if source == "libindic" or source == "unicode":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
def dial_comparison_transliteration_ml_ta_te_ben_kannada(text, source_lang, source_script, dest_script):
sentences = sentence_tokenize.sentence_split(text, lang=source_lang)
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["indic_trans_IAST", "libindic", "indic_trans"],
[(temp_sentence, source_script),
(temp_sentence, source_script),
(temp_sentence, source_script, dest_script)],
[indic_transliteration_OTHER_KANNADA, libindic, indic_trans]):
try:
transliterated_word = function(*args)
if source == "libindic" or source == "unicode":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
def dial_comparison_transliteration_tamil_other(text, source_lang, source_script, dest_script):
sentences = sentence_tokenize.sentence_split(text, lang=source_lang)
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["Azure", "libindic", "indic_trans"],
[(temp_sentence, dest_script),
(temp_sentence, source_script),
(temp_sentence, source_script, dest_script)],
[indic_transliteration_TAMIL_OTHER, libindic, indic_trans]):
try:
transliterated_word = function(*args)
if source == "libindic" or source == "unicode":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
def dial_comparison_transliteration_other_tamil(text, source_lang, source_script, dest_script):
sentences = sentence_tokenize.sentence_split(text, lang=source_lang)
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["Azure", "libindic", "indic_trans"],
[(temp_sentence, source_script),
(temp_sentence, source_script),
(temp_sentence, source_script, dest_script)],
[indic_transliteration_OTHER_TAMIL, libindic, indic_trans]):
try:
transliterated_word = function(*args)
if source == "libindic" or source == "unicode":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
# -> Function to transliterate from telugu to malayalam
def dial_comparison_transliteration_te_to_ml(text, source_lang, source_script, dest_script):
sentences = sentence_tokenize.sentence_split(text, lang=source_lang)
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["indic_trans", "libindic", "indic_trans_IAST"],
[(temp_sentence, source_script, dest_script),
(temp_sentence, dest_script),
(temp_sentence, dest_script)],
[indic_trans, libindic, indic_transliteration_TELUGU_OTHER]):
try:
transliterated_word = function(*args)
if source == "libindic" or source == "unicode":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
# -> Function to transliterate from malayalam to telugu
def dial_comparison_transliteration_ml_to_te(text, source_lang, source_script, dest_script):
sentences = sentence_tokenize.sentence_split(text, lang=source_lang)
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["Azure", "libindic", "indic_trans_IAST"],
[(temp_sentence, source_lang, source_script, dest_script),
(temp_sentence, dest_script),
(temp_sentence, dest_script)],
[azure_transliteration, libindic, indic_transliteration_MALAYALAM_OTHER]):
try:
transliterated_word = function(*args)
if source == "libindic" or source == "unicode":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
# -> Function to transliterate from gujarati and oriya to gurmukhi
def dial_comparison_transliteration_guj_or_to_gur(text, source_lang, source_script, dest_script):
sentences = sentence_tokenize.sentence_split(text, lang=source_lang)
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["indic_trans", "libindic", "indic_trans_IAST"],
[(temp_sentence, source_script, dest_script),
(temp_sentence, dest_script),
(temp_sentence, source_script)],
[indic_trans, libindic, indic_transliteration_OTHER_GURMUKHI]):
try:
transliterated_word = function(*args)
if source == "libindic" or source == "unicode":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
# -> Function to transliterate from gurmukhi and oriya to gujarati
def dial_comparison_transliteration_gur_or_to_guj(text, source_lang, source_script, dest_script):
sentences = sentence_tokenize.sentence_split(text, lang=source_lang)
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["indic_trans", "libindic", "indic_trans_IAST"],
[(temp_sentence, source_script, dest_script),
(temp_sentence, dest_script),
(temp_sentence, source_script)],
[indic_trans, libindic, indic_transliteration_OTHER_GUJARATI]):
try:
transliterated_word = function(*args)
if source == "libindic" or source == "unicode":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
# -> Function to transliterate from gujarati and gurmukhi to oriya
def dial_comparison_transliteration_guj_gur_to_or(text, source_lang, source_script, dest_script):
sentences = sentence_tokenize.sentence_split(text, lang=source_lang)
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["indic_trans", "libindic", "indic_trans_IAST"],
[(temp_sentence, source_script, dest_script),
(temp_sentence, dest_script),
(temp_sentence, source_script)],
[indic_trans, libindic, indic_transliteration_OTHER_ORIYA]):
try:
transliterated_word = function(*args)
if source == "libindic" or source == "unicode":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
# -> Function to transliterate from latin to arabic
def dial_comparison_transliteration_latin_arabic(text, source_lang, source_script, dest_script):
sentences = sentence_tokenize.sentence_split(text, lang=source_lang)
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["Azure", "transString", "google"],
[(temp_sentence, source_lang, source_script, dest_script),
(temp_sentence, 1),
(temp_sentence, "en", "ar")],
[azure_transliteration, transString, google]):
try:
transliterated_word = function(*args)
if source == "libindic" or source == "unicode":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
# -> Function to transliterate from chinese to latin
def dial_comparison_transliteration_chinese_latin(text, source_lang, source_script, dest_script):
sentences = sentence_tokenize.sentence_split(text, lang=source_lang)
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(["Azure", "Pinyin"],
[(temp_sentence, source_lang, source_script, dest_script),
(temp_sentence)],
[azure_transliteration, translit_CHINESE_LATIN]):
try:
transliterated_word = function(*args)
if source == "libindic" or source == "unicode":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
# -> Function to transliterate from thai, sinhala, mongolian and Hebrew to latin
def dial_comparison_transliteration_th_sin_mng_heb_latin(text, source_lang, source_script, dest_script):
if source_lang == "iw":
source_lang = "he"
sentences = sentence_tokenize.sentence_split(text, lang=source_lang)
final_transliterated_whole_sentence = []
for sentence_ in sentences:
print("Full Sentence is", sentence_)
final_transliterated_words = []
for sentence in sentence_.split(" "):
if sentence in etc_punctuation:
continue
print("Original Word", sentence)
temp_sentence = punct_remover(sentence)
i = 0
priority_list = list()
sources_name = myDict()
transliterated_words = []
for source, args, function in zip(['Azure', 'anyascii'],
[(temp_sentence, source_lang, source_script, dest_script),
(temp_sentence)],
[azure_transliteration, translit_th_sin_mng_heb_to_latin]):
try:
transliterated_word = function(*args)
if source == "libindic" or source == "unicode":
transliterated_word = transliterated_word.rstrip()
elif source == "sheetal":
transliterated_word = transliterated_word.replace("\n", "")
transliterated_words.append(transliterated_word)
priority_list.append(source)
sources_name.add(str(i), str(source))
i = i + 1
except:
pass
best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name,
priority_list)
best_output = final_transliterated_sentence(
temp_sentence, best_output
)
final_transliterated_words.append(best_output)
final_transliterated_whole_sentence.append(" ".join(final_transliterated_words))
return " ".join(final_transliterated_whole_sentence)
def compare_outputs_transliteration(word, outputs, sources_name, priority_list):
# print(outputs)
# doc2 = docx.Document()
# sections = doc2.sections
# for section in sections:
# section.top_margin = Inches(0.2)
# section.bottom_margin = Inches(0.2)
# section.left_margin = Inches(0.2)
# section.right_margin = Inches(0.2)
# section = doc2.sections[-1]
# new_height = section.page_width
# section.page_width = section.page_height
# section.page_height = new_height
# name = 'Final table ' + doc_file
# doc2.add_heading(name, 0)
# doc_para = doc2.add_paragraph()
# doc_para.add_run('Translation resources used : Google, IBM watson, AWS, Azure, Lingvanex, Yandex').bold = True
# table2 = doc2.add_table(rows=1, cols=4)
# table2.style = 'TableGrid'
# hdr_Cells = table2.rows[0].cells
# hdr_Cells[0].paragraphs[0].add_run("Input").bold = True
# hdr_Cells[1].paragraphs[0].add_run("Output1").bold = True
# hdr_Cells[2].paragraphs[0].add_run("Output2").bold = True
# hdr_Cells[3].paragraphs[0].add_run("Output3").bold = True
print("Before Comparing transliteration outputs", sources_name, outputs, priority_list)
O1ANDS1, O2ANDS2 = selection_source_transliteration(
sources_name, outputs, priority_list
)
print(O1ANDS1, "compare all transliterations")
# add_dial_comparison_doc2_transliteration(doc2, table2, word, O1ANDS1, O2ANDS2, sources_name)
return O1ANDS1[0]
def add_dial_comparison_doc2_transliteration(doc2, table2, word, O1ANDS1, O2ANDS2, sources_name):
row_Cells = table2.add_row().cells
row_Cells[0].text = word
row_Cells[1].text = O1ANDS1[0]
row_Cells[1].paragraphs[0].add_run("(Source : " + str(O1ANDS1[1]) + ")")
row_Cells[2].text = O2ANDS2[0]
row_Cells[2].paragraphs[0].add_run("(Source : " + str(O2ANDS2[1]) + ")")
# -> Housing all the Script Pair Combinations for Transliterations
def transliterate(dest_script, src_script, src_lang, text):
print("transliterate", dest_script, src_script, src_lang, text)
# if src_script == "Common" or dest_script == "Common" or src_script == "None" or dest_script == "None" or src_script == dest_script:
# return
trans_text = text
if dest_script == "Latin" and src_script == "Devanagari":
# trans_text = dial_comparison_transliteration_dev_rom_ph1(text, src_lang, src_script,dest_script)
trans_text = dial_comparison_transliteration_dev_rom_ph1_sentence_wise2(
text, src_lang, src_script, dest_script
)
elif dest_script == "Devanagari" and src_script == "Latin":
trans_text = dial_comparison_transliteration_rom_dev_ph1(
text, src_lang, src_script, dest_script
)
# trans_text=dial_comparison_transliteration_rom_dev_ph1_sentence_wise(text, src_lang, src_script,dest_script)
elif dest_script == "Latin" and src_script == "Arabic":
trans_text = dial_comparison_transliteration_arbic_to_rom_ph1(
text, src_lang, src_script, dest_script
)
elif dest_script == "Latin" and src_script == "Kannada":
trans_text = dial_comparison_transliteration_kann_to_rom_ph1(
text, src_lang, src_script, dest_script
)
elif dest_script == "Latin" and src_script == "Tamil":
trans_text = dial_comparison_transliteration_tamil_to_rom_ph1(
text, src_lang, src_script, dest_script
)
elif dest_script == "Latin" and src_script == "Bengali":
trans_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(
text, src_lang, src_script, dest_script
)
elif dest_script == "Latin" and src_script == "Telugu":
trans_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(
text, src_lang, src_script, dest_script
)
elif dest_script == "Latin" and src_script == "Malayalam":
trans_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(
text, src_lang, src_script, dest_script
)
elif dest_script == "Gurmukhi" and src_script == "Latin":
trans_text = dial_comparison_transliteration_latin_gurmukhi(
text, src_lang, src_script, dest_script
)
elif dest_script == "Cyrillic" and src_script == "Latin":
trans_text = dial_comparison_transliteration_latin_cyrillic(
text, src_lang, src_script, dest_script
)
elif dest_script == "Telugu" and src_script == "Latin":
trans_text = dial_comparison_transliteration_latin_telugu_sentence_wise(
text, src_lang, src_script, dest_script
)
elif dest_script == "Latin" and src_script == "Gurmukhi":
trans_text = dial_comparison_transliteration_gurmukhi_latin_sentence_wise(
text, src_lang, src_script, dest_script
)
elif dest_script == "Latin" and src_script == "Cyrillic":
trans_text = dial_comparison_transliteration_cyrilic_latin_sentence_wise(
text, src_lang, src_script, dest_script
)
elif dest_script == "Latin" and src_script == "Gujarati":
trans_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(
text, src_lang, src_script, dest_script
)
elif dest_script == "Latin" and src_script == "Oriya":
trans_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(
text, src_lang, src_script, dest_script
)
elif dest_script == "Gujarati" and src_script == "Latin":
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
text, src_lang, src_script, dest_script
)
elif dest_script == "Oriya" and src_script == "Latin":
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
text, src_lang, src_script, dest_script
)
elif dest_script == "Tamil" and src_script == "Latin":
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
text, src_lang, src_script, dest_script
)
elif dest_script == "Malayalam" and src_script == "Latin":
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
text, src_lang, src_script, dest_script
)
elif dest_script == "Bengali" and src_script == "Latin":
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
text, src_lang, src_script, dest_script
)
elif dest_script == "Devanagari" and src_script == "Oriya":
trans_text = dial_comparison_transliteration_or_ml_gu_te_devanagari(
text, src_lang, src_script, dest_script
)
elif dest_script == "Devanagari" and src_script == "Gujarati":
trans_text = dial_comparison_transliteration_or_ml_gu_te_devanagari(
text, src_lang, src_script, dest_script
)
elif dest_script == "Devanagari" and src_script == "Malayalam":
trans_text = dial_comparison_transliteration_or_ml_gu_te_devanagari(
text, src_lang, src_script, dest_script
)
elif dest_script == "Devanagari" and src_script == "Telugu":
trans_text = dial_comparison_transliteration_or_ml_gu_te_devanagari(
text, src_lang, src_script, dest_script
)
elif dest_script == "Oriya" and src_script == "Devanagari":
trans_text = dial_comparison_transliteration_devanagari_or_ml_gu_te(
text, src_lang, src_script, dest_script
)
elif dest_script == "Gujarati" and src_script == "Devanagari":
trans_text = dial_comparison_transliteration_devanagari_or_ml_gu_te(
text, src_lang, src_script, dest_script
)
elif dest_script == "Malayalam" and src_script == "Devanagari":
trans_text = dial_comparison_transliteration_devanagari_or_ml_gu_te(
text, src_lang, src_script, dest_script
)
elif dest_script == "Telugu" and src_script == "Devanagari":
trans_text = dial_comparison_transliteration_devanagari_or_ml_gu_te(
text, src_lang, src_script, dest_script
)
elif dest_script == "Devanagari" and src_script == "Bengali":
trans_text = dial_comparison_transliteration_or_ml_gu_te_devanagari(
text, src_lang, src_script, dest_script
)
elif dest_script == "Devanagari" and src_script == "Gurmukhi":
trans_text = dial_comparison_transliteration_or_ml_gu_te_devanagari(
text, src_lang, src_script, dest_script
)
elif dest_script == "Devanagari" and src_script == "Kannada":
trans_text = dial_comparison_transliteration_or_ml_gu_te_devanagari(
text, src_lang, src_script, dest_script
)
elif dest_script == "Bengali" and src_script == "Devanagari":
trans_text = dial_comparison_transliteration_devanagari_or_ml_gu_te(
text, src_lang, src_script, dest_script
)
elif dest_script == "Gurmukhi" and src_script == "Devanagari":
trans_text = dial_comparison_transliteration_devanagari_or_ml_gu_te(
text, src_lang, src_script, dest_script
)
elif dest_script == "Kannada" and src_script == "Devanagari":
trans_text = dial_comparison_transliteration_devanagari_or_ml_gu_te(
text, src_lang, src_script, dest_script
)
elif dest_script == "Tamil" and src_script == "Kannada":
trans_text = dial_comparison_transliteration_kannada_ml_ta_te_ben(
text, src_lang, src_script, dest_script
)
elif dest_script == "Malayalam" and src_script == "Kannada":
trans_text = dial_comparison_transliteration_kannada_ml_ta_te_ben(
text, src_lang, src_script, dest_script
)
elif dest_script == "Telugu" and src_script == "Kannada":
trans_text = dial_comparison_transliteration_kannada_ml_ta_te_ben(
text, src_lang, src_script, dest_script
)
elif dest_script == "Kannada" and src_script == "Tamil":
trans_text = dial_comparison_transliteration_ml_ta_te_ben_kannada(
text, src_lang, src_script, dest_script
)
elif dest_script == "Kannada" and src_script == "Malayalam":
trans_text = dial_comparison_transliteration_ml_ta_te_ben_kannada(
text, src_lang, src_script, dest_script
)
elif dest_script == "Kannada" and src_script == "Telugu":
trans_text = dial_comparison_transliteration_ml_ta_te_ben_kannada(
text, src_lang, src_script, dest_script
)
elif dest_script == "Kannada" and src_script == "Latin":
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
text, src_lang, src_script, dest_script
)
elif dest_script == "Tamil" and src_script == "Devanagari":
trans_text = dial_comparison_transliteration_devanagari_or_ml_gu_te(
text, src_lang, src_script, dest_script
)
elif dest_script == "Devanagari" and src_script == "Tamil":
trans_text = dial_comparison_transliteration_or_ml_gu_te_devanagari(
text, src_lang, src_script, dest_script
)
elif dest_script == "Telugu" and src_script == "Tamil":
trans_text = dial_comparison_transliteration_tamil_other(
text, src_lang, src_script, dest_script
)
elif dest_script == "Malayalam" and src_script == "Tamil":
trans_text = dial_comparison_transliteration_tamil_other(
text, src_lang, src_script, dest_script
)
elif dest_script == "Tamil" and src_script == "Malayalam":
trans_text = dial_comparison_transliteration_other_tamil(
text, src_lang, src_script, dest_script
)
elif dest_script == "Tamil" and src_script == "Telugu":
trans_text = dial_comparison_transliteration_other_tamil(
text, src_lang, src_script, dest_script
)
elif dest_script == "Malayalam" and src_script == "Telugu":
trans_text = dial_comparison_transliteration_te_to_ml(
text, src_lang, src_script, dest_script
)
elif dest_script == "Telugu" and src_script == "Malayalam":
trans_text = dial_comparison_transliteration_ml_to_te(
text, src_lang, src_script, dest_script
)
elif dest_script == "Gurmukhi" and src_script == "Gujarati":
trans_text = dial_comparison_transliteration_guj_or_to_gur(
text, src_lang, src_script, dest_script
)
elif dest_script == "Gujarati" and src_script == "Gurmukhi":
trans_text = dial_comparison_transliteration_gur_or_to_guj(
text, src_lang, src_script, dest_script
)
elif dest_script == "Gujarati" and src_script == "Oriya":
trans_text = dial_comparison_transliteration_gur_or_to_guj(
text, src_lang, src_script, dest_script
)
elif dest_script == "Gurmukhi" and src_script == "Oriya":
trans_text = dial_comparison_transliteration_guj_or_to_gur(
text, src_lang, src_script, dest_script
)
elif dest_script == "Oriya" and src_script == "Gujarati":
trans_text = dial_comparison_transliteration_guj_gur_to_or(
text, src_lang, src_script, dest_script
)
elif dest_script == "Oriya" and src_script == "Gurmukhi":
trans_text = dial_comparison_transliteration_guj_gur_to_or(
text, src_lang, src_script, dest_script
)
elif dest_script == "Bengali" and src_script == "Kannada":
trans_text = dial_comparison_transliteration_kannada_ml_ta_te_ben(
text, src_lang, src_script, dest_script
)
elif dest_script == "Kannada" and src_script == "Bengali":
trans_text = dial_comparison_transliteration_ml_ta_te_ben_kannada(
text, src_lang, src_script, dest_script
)
elif dest_script == "Devanagari" and src_script == "Arabic":
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_arbic_to_rom_ph1(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_rom_dev_ph1(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Gurmukhi" and src_script == "Arabic":
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_arbic_to_rom_ph1(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_gurmukhi(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Gujarati" and src_script == "Arabic":
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_arbic_to_rom_ph1(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Cyrillic" and src_script == "Arabic":
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_arbic_to_rom_ph1(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_cyrillic(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Arabic" and src_script == "Latin":
trans_text = dial_comparison_transliteration_latin_arabic(
text, src_lang, src_script, dest_script
)
elif dest_script == "Cyrillic" and src_script == "Devanagari":
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_dev_rom_ph1_sentence_wise2(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_cyrillic(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Kannada" and src_script == "Arabic":
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_arbic_to_rom_ph1(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Tamil" and src_script == "Arabic":
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_arbic_to_rom_ph1(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Telugu" and src_script == "Arabic":
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_arbic_to_rom_ph1(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_telugu_sentence_wise(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Malayalam" and src_script == "Arabic":
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_arbic_to_rom_ph1(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Bengali" and src_script == "Arabic":
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_arbic_to_rom_ph1(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Oriya" and src_script == "Arabic":
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_arbic_to_rom_ph1(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Cyrillic" and src_script == "Kannada":
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_kann_to_rom_ph1(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_cyrillic(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Gujarati" and src_script == "Kannada":
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_kann_to_rom_ph1(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Gurmukhi" and src_script == "Kannada":
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_kann_to_rom_ph1(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_gurmukhi(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Oriya" and src_script == "Kannada":
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_kann_to_rom_ph1(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Cyrillic" and src_script == "Tamil":
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_tamil_to_rom_ph1(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_cyrillic(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Tamil" and src_script == "Cyrillic":
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_cyrilic_latin_sentence_wise(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Tamil" and src_script == "Bengali":
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Telugu" and src_script == "Bengali":
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_telugu_sentence_wise(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Malayalam" and src_script == "Bengali":
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Arabic" and src_script == "Devanagari":
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_dev_rom_ph1_sentence_wise2(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_arabic(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Arabic" and src_script == "Cyrillic":
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_cyrilic_latin_sentence_wise(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_arabic(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Arabic" and src_script == "Gurmukhi":
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_gurmukhi_latin_sentence_wise(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_arabic(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Arabic" and src_script == "Gujarati":
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_arabic(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Latin" and src_script == "Hanji":
if src_lang == "zh-CN":
src_lang = "zh-Hans"
trans_text = dial_comparison_transliteration_chinese_latin(
text, src_lang, src_script, dest_script
)
elif dest_script == "Devanagari" and src_script == "Hanji":
if src_lang == "zh-CN":
src_lang = "zh-Hans"
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_chinese_latin(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_rom_dev_ph1(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Arabic" and src_script == "Hanji":
if src_lang == "zh-CN":
src_lang = "zh-Hans"
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_chinese_latin(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_arabic(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Gurmukhi" and src_script == "Hanji":
if src_lang == "zh-CN":
src_lang = "zh-Hans"
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_chinese_latin(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_gurmukhi(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Gujarati" and src_script == "Hanji":
if src_lang == "zh-CN":
src_lang = "zh-Hans"
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_chinese_latin(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Oriya" and src_script == "Hanji":
if src_lang == "zh-CN":
src_lang = "zh-Hans"
temp_dest_script = "Latin"
temp_text = dial_comparison_transliteration_chinese_latin(
text, src_lang, src_script, temp_dest_script
)
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
temp_text, src_lang, temp_dest_script, dest_script
)
elif dest_script == "Latin" and src_script == "Thai":
trans_text = dial_comparison_transliteration_th_sin_mng_heb_latin(
text, src_lang, src_script, dest_script
)
elif dest_script == "Latin" and src_script == "Sinhala":
trans_text = dial_comparison_transliteration_th_sin_mng_heb_latin(
text, src_lang, src_script, dest_script
)
elif dest_script == "Latin" and src_script == "Hebrew":
trans_text = dial_comparison_transliteration_th_sin_mng_heb_latin(
text, src_lang, src_script, dest_script
)
elif dest_script == "Latin" and src_script == "Mongolian":
src_lang = "mn-Cyrl"
trans_text = dial_comparison_transliteration_th_sin_mng_heb_latin(
text, src_lang, src_script, dest_script
)
return trans_text
# -> Main Transliteration Function to co-ordingate all the functions
def makeTransliteration_only(**kwargs):
line = kwargs.get('line')
lang = kwargs.get('lang')
src_script = kwargs.get('src_script')
dest_script = kwargs.get('dest_script')
dual_dial_script = kwargs.get('dual_dial_script')
""" Checking if Transliteration is really Required or not """
if (src_script == dest_script and dual_dial_script == "No"):
return line
print("transliterating", dest_script, src_script, lang, str(line))
return transliterate(dest_script, src_script, lang, str(line))
def add_dual_dialogue(converted_df, original_df, non_dial_dest_lang, dial_dest_lang, dual_dialogue, dll=None, dls=None):
doc = docx.Document()
for idx, line in enumerate(converted_df):
if line[3] == 'transition':
addTransition(doc, str(line[2]))
elif line[3] == 'special_term':
addSpecialTerm(doc, str(line[2]))
elif line[3] == 'slugline':
addSlugLine(doc, str(line[2]))
elif line[3] == 'action':
addActionLine(doc, str(line[2]), non_dial_dest_lang)
elif line[3] == 'speaker':
addSpeaker(doc, str(line[2]))
elif line[3] == 'parenthetical':
addParenthetical(doc, str(line[2]))
elif line[3] == 'dialogue':
if dual_dialogue:
current_lang = language_detector(original_df[idx][2])
if current_lang != dll:
translated_text = translate_comparison2(original_df[idx][2], current_lang, dll)
else:
translated_text = original_df[idx][2]
current_script = script_det(translated_text)
if current_script != dls:
translated_text = transliterate(dls, current_script, dls, translated_text)
dual_script(doc, str(translated_text), str(line[2]), dial_dest_lang)
else:
addDialogue(doc, str(line[2]), dial_dest_lang)
return doc