# Module Imports import docx import re from indicnlp.tokenize import sentence_tokenize from .buck_2_unicode import buck_2_unicode from .transString import transString from .selection_source import ( selection_source_transliteration, ) from .detection import language_detector, script_det from .script_writing import ( addSlugLine, addActionLine, addSpeaker, addParenthetical, addDialogue, dual_script, addTransition, dial_checker, non_dial_checker, addSpecialTerm ) from .translation_resources import google, aws, azure, yandex from .transliteration_resources import ( azure_transliteration, indic_trans, indic_transliteration_OTHER_GUJARATI, indic_transliteration_OTHER_GURMUKHI, indic_transliteration_OTHER_ORIYA, om_transliterator, libindic, indic_transliteration_IAST, indic_transliteration_ITRANS, # polyglot_trans, sheetal, unicode_transliteration_GURMUKHI, indic_transliteration_GURMUKHI, transliteration_LATIN_CYRILLIC, indic_transliteration_TELUGU, unicode_transliteration_GURMUKHI_LATIN, indic_transliteration_GURMUKHI_LATIN, transliteration_CYRILIC_LATIN, ConvertToLatin, readonly, indic_transliteration_OTHER_DEVANAGRI, indic_transliteration_DEVANAGRI_OTHER, indic_transliteration_KANNADA_OTHER, indic_transliteration_OTHER_KANNADA, indic_transliteration_TAMIL_OTHER, indic_transliteration_OTHER_TAMIL, indic_transliteration_TELUGU_OTHER, indic_transliteration_MALAYALAM_OTHER, indic_transliteration_OTHER_GUJARATI, indic_transliteration_OTHER_GURMUKHI, indic_transliteration_OTHER_ORIYA, translit_CHINESE_LATIN, translit_th_sin_mng_heb_to_latin ) from conversion.translation.translation_function import translate_comparison2 from MNF.settings import BasePath # Importing Basepath of System basePath = BasePath() etc_punctuation = ["", " . . .", " . .", " . . ”"] """overriding dictionary class""" class myDict(dict): def __init__(self): self = dict() def add(self, key, value): self[key] = value # -> Punctuation Remover code def punct_remover(string): punctuations = """!()-[]{};:'"\,<>./?@#$%^&*_~…।""" for x in string.lower(): if x in punctuations: string = string.replace(x, " ") return string # -> Space After Punctuation Remover code def space_after_punct(text): # text = text.replace('...',' ... ') text = text.replace(". . .", " ... ") text = re.sub("([,!?()…-])", r"\1 ", text) text = re.sub("\s{2,}", " ", text) return text # -> Removing Punctuation from Transliterated text code def final_transliterated_sentence(original, transliterated): original = space_after_punct(original) punct_list = [ "!", '"', "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", " ", "-", ".", "/", ":", ";", "<", "=", ">", "?", "@", "[", "\\", "]", "^", "_", "`", "{", "|", "}", "~", "…", "...", "।", ] sentence = [] j = 0 for i in range(len(original.split())): if original.split()[i] in punct_list: sentence.append(original.split()[i]) elif original.split()[i][-1] in punct_list: temp = transliterated.split()[j] + original.split()[i][-1] sentence.append(temp) j = j + 1 elif original.split()[i][-1] not in punct_list: temp = transliterated.split()[j] sentence.append(temp) j = j + 1 transliterated_sentence = " ".join(sentence) transliterated_sentence.replace(" ... ", "...") transliterated_sentence.replace("… ", "…") return transliterated_sentence def google_length_checker(t, temp_sentence, t0): if len(t.split()) >= len(temp_sentence.split()): return t elif len(t.split()) == len(temp_sentence.split()) - 1: final_t = t + " " + t0.split()[-1] return final_t elif len(t.split()) == len(temp_sentence.split()) - 2: final_t = t + " " + t0.split()[-2] + " " + t0.split()[-1] return final_t else: return t # Special Symbol(Hindi Sentence Ending) Remover def Halant_remover(T3): if T3[-1] == "्": return T3[:-1] else: return T3 def whole_transliteration_func_wrapper(**kwargs): text = kwargs.get("text") source_lang = kwargs.get("source_lang") func_params = kwargs.get("func_params") sentences = sentence_tokenize.sentence_split(text, lang=source_lang) final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in func_params: try: transliterated_word = function(temp_sentence, *args) if source == "libindic": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) def dial_comparison_transliteration_rom_dev_ph1(text, source_lang, source_script, dest_script): sentences = sentence_tokenize.sentence_split(text, lang="en") if source_lang == "ne": source_lang = "hi" final_transliterated_whole_sentence = [] # source_lang = "hi" # source_script = "Latin" # dest_script = "Devanagari" for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["indic_trans", "Azure", "google", "indic_trans_IAST"], [(temp_sentence, source_script, dest_script), (temp_sentence, source_lang, source_script, dest_script), (temp_sentence, "en", "hi"), (temp_sentence)], [indic_trans, azure_transliteration, google, indic_transliteration_IAST]): try: transliterated_word = function(*args) if source == "libindic": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) """not used anymore""" # def dial_comparison_transliteration_rom_dev_ph1_sentence_wise( # text, source_lang, source_script, dest_script # ): # source_lang = "hi" # sources_name = { # "0": "Azure", # "1": "indic_trans", # "2": "google", # "3": "indic_trans_IAST", # } # # sentences = sentence_tokenize.sentence_split(text, lang="en") # priority_list = ["Azure", "indic_trans", "google", "indic_trans_IAST"] # transliterated_text = [] # # for sentence in sentences: # if sentence in etc_punctuation: # continue # print("original_sentence", sentence) # temp_sentence = punct_remover(sentence) # print("sentence_without_punctuation", temp_sentence) # t00 = azure_transliteration( # temp_sentence, source_lang, source_script, dest_script # ) # t11 = indic_trans(temp_sentence, source_script, dest_script) # t = google(temp_sentence, "en", "hi") # t22 = google_length_checker(t, temp_sentence, t00) # t33 = indic_transliteration_IAST(temp_sentence) # Out = [] # for i in range(len(temp_sentence.split())): # word = temp_sentence.split()[i] # # T0 = t00.split()[i] # T1 = t11.split()[i] # T2 = t22.split()[i] # T3 = t33.split()[i] # T3 = Halant_remover(T3) # # outputs = [T0, T1, T2, T3] # out = compare_outputs_transliteration( # word, outputs, sources_name, priority_list # ) # Out.append(out) # trans_sent_wo_punct = " ".join(Out) # print("trans_sent_wo_punct", trans_sent_wo_punct) # transliterated_sentence = final_transliterated_sentence( # sentence, trans_sent_wo_punct # ) # print("trans_sent_w_punct", transliterated_sentence) # transliterated_text.append(transliterated_sentence) # # return " ".join(transliterated_text) def dial_comparison_transliteration_dev_rom_ph1_sentence_wise2(text, source_lang, source_script, dest_script): sentences = sentence_tokenize.sentence_split(text, lang="hi") if source_lang == "ne": source_lang = "hi" final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["indic_trans", "Azure", "libindic", "sheetal"], [(temp_sentence, source_script, dest_script), (temp_sentence, source_lang, source_script, dest_script), (temp_sentence, dest_script), (temp_sentence)], [indic_trans, azure_transliteration, libindic, sheetal]): try: transliterated_word = function(*args) if source == "libindic": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) """not used anymore""" # def dial_comparison_transliteration_dev_rom_ph1_sentence_wise3(text, source_lang, source_script, dest_script): # if source_lang == "ne": # source_lang = "hi" # kwargs = { # "text": text, # "source_lang": source_lang, # "func_params": zip(["indic_trans", "Azure", "libindic", "sheetal"], # [(source_script, dest_script), # (source_lang, source_script, dest_script), # (dest_script), ()], # [indic_trans, azure_transliteration, libindic, sheetal]) # } # # sentences = sentence_tokenize.sentence_split(text, lang="hi") # # final_transliterated_whole_sentence = [] # # for sentence_ in sentences: # # print("Full Sentence is", sentence_) # # final_transliterated_words = [] # # for sentence in sentence_.split(" "): # # if sentence in etc_punctuation: # # continue # # print("Original Word", sentence) # # temp_sentence = punct_remover(sentence) # # i = 0 # # priority_list = list() # # sources_name = myDict() # # transliterated_words = [] # # for source, args, function in zip(["indic_trans", "Azure", "libindic", "sheetal"], # # [(temp_sentence, source_script, dest_script), # # (temp_sentence, source_lang, source_script, dest_script), # # (temp_sentence, dest_script), (temp_sentence)], # # [indic_trans, azure_transliteration, libindic, sheetal]): # # # # try: # # transliterated_word = function(*args) # # if source == "libindic": # # transliterated_word = transliterated_word.rstrip() # # elif source == "sheetal": # # transliterated_word = transliterated_word.replace("\n", "") # # transliterated_words.append(transliterated_word) # # priority_list.append(source) # # sources_name.add(str(i), str(source)) # # i = i + 1 # # except: # # pass # # # # best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) # # best_output = final_transliterated_sentence( # # temp_sentence, best_output # # ) # # final_transliterated_words.append(best_output) # # final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) # # return " ".join(final_transliterated_whole_sentence) # final_transliterated_sentence = whole_transliteration_func_wrapper(**kwargs) # return final_transliterated_sentence """not used anymore""" # def dial_comparison_transliteration_dev_rom_ph1_sentence_wise( # text, source_lang, source_script, dest_script # ): # print("Entered Here1212") # print("Line is", text) # sources_name = {"0": "indic_trans", "1": "Azure", # "2": "libindic", "3": "sheetal"} # priority_list = ["indic_trans", "Azure", "libindic", "sheetal"] # etc_punctuation = ["", " . . .", " . .", " . . ”"] # sentences = sentence_tokenize.sentence_split(text, lang="hi") # if source_lang == "ne": # source_lang = "hi" # transliterated_text = [] # # for sentence in sentences: # # print("sentence is") # # if sentence in etc_punctuation: # # continue # # print("original_sentence", sentence) # # temp_sentence = punct_remover(sentence) # # print("sentence_without_punctuation", temp_sentence) # # t0 = indic_trans(temp_sentence, source_script, dest_script) # # t1 = azure_transliteration( # # temp_sentence, source_lang, source_script, dest_script # # ) # # print("before t1111111111") # # t2 = libindic(temp_sentence, dest_script).rstrip() # # print("before sheetal", t2) # # t3 = sheetal(temp_sentence).replace("\n", "") # # print("after sheetal", t3) # # Out = [] # # # # # trans_counter = Counter([len(t0), len(t1), len(t2), len(t3)]) # # # print(trans_counter) # # # trans_counter_keys = list(trans_counter.keys()) # # # # trans_counter_keys = list(trans_counter.values()) # # # outputsidx = [] # # # highest = trans_counter_keys[0] # # # for idx, output in enumerate([t0, t1, t2, t3]): # # # if len(output) == highest: # # # outputsidx.append(idx) # # # print("all outputs are -> ", t0, t1, t2, t3, t3) # # # outputs = [] # # # priority_list2 = [] # # # sources_name2 = {} # # # for key in sources_name.keys(): # # # if int(key) not in outputsidx: # # # pass # # # else: # # # sources_name2[key] = sources_name[key] # # # for idx, value in enumerate(priority_list): # # # if idx not in outputsidx: # # # pass # # # else: # # # priority_list2.append(value) # # # print(outputsidx, "outputsidx") # # # for i in range(len(temp_sentence.split())): # # # word = temp_sentence.split()[i] # # # # # # if 0 in outputsidx: # # # T0 = t0.split()[i] # # # outputs.append(T0) # # # if 1 in outputsidx: # # # T1 = t1.split()[i] # # # outputs.append(T1) # # # if 2 in outputsidx: # # # T2 = t2.split()[i] # # # outputs.append(T2) # # # if 3 in outputsidx: # # # T3 = t3.split()[i] # # # outputs.append(T3) # # # # T2 = t2.split()[i] # # # # T3 = t3.split()[i] # # # # outputs = [T0, T1, T2, T3] # # # print("ouputs -> ", outputs, sources_name2, priority_list2) # # # out = compare_outputs_transliteration( # # # word, outputs, sources_name2, priority_list2 # # # ) # # Out.append(out) # # trans_sent_wo_punct = " ".join(Out) # if text in etc_punctuation: # return text # # print("original_sentence", sentence) # temp_sentence = punct_remover(text) # tt = 0 # try: # t0 = indic_trans(temp_sentence, source_script, dest_script) # outputa = t0 # except: # tt += 1 # try: # if tt == 1: # t1 = azure_transliteration( # temp_sentence, source_lang, source_script, dest_script # ) # outputa = t1 # except: # tt += 1 # # print("before t1111111111") # try: # if tt == 2: # t2 = libindic(temp_sentence, dest_script).rstrip() # outputa = t2 # except: # tt += 1 # # print("before sheetal", t2) # try: # if tt == 3: # t3 = sheetal(temp_sentence).replace("\n", "") # outputa = t3 # except: # tt += 1 # # if tt == 4: # outputa = text # # else: # # trans_sent_wo_punct = outputa # # # print("trans_sent_wo_punct", trans_sent_wo_punct) # # # transliterated_sentence = final_transliterated_sentence( # # # sentence, trans_sent_wo_punct # # # ) # # # print("trans_sent_w_punct", transliterated_sentence) # # # transliterated_text.append(transliterated_sentence) # # # print("Entered Exiting Here1212") # return outputa """not used anymore""" # def dial_comparison_transliteration_dev_rom_ph1( # text, source_lang, source_script, dest_script # ): # sources_name = {"0": "indic_trans", "1": "Azure", # "2": "libindic", "3": "sheetal"} # sentences = sentence_tokenize.sentence_split(text, lang="hi") # priority_list = ["indic_trans", "Azure", "sheetal", "libindic"] # transliterated_text = [] # # for sentence in sentences: # if ( # sentence == "" # or sentence == " . . ." # or sentence == " . ." # or sentence == " . . ”" # ): # continue # OUT = [] # for word in sentence.split(): # if word == ".": # continue # t0 = indic_trans(word, source_script, dest_script) # t1 = azure_transliteration( # word, source_lang, source_script, dest_script) # t2 = libindic(word, dest_script).rstrip() # t3 = sheetal(word).replace("\n", "") # outputs = [t0, t1, t2, t3] # out = compare_outputs_transliteration( # word, outputs, sources_name, priority_list # ) # OUT.append(out) # transliterated_text.append(" ".join(OUT)) # # return " ".join(transliterated_text) def dial_comparison_transliteration_arbic_to_rom_ph1(text, source_lang, source_script, dest_script): sentences = sentence_tokenize.sentence_split(text, lang="ar") final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["indic_trans", "Azure", "buck_2_unicode"], [(temp_sentence, source_script, dest_script), (temp_sentence, source_lang, source_script, dest_script), (temp_sentence)], [indic_trans, azure_transliteration, buck_2_unicode]): try: transliterated_word = function(*args) if source == "libindic": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) def dial_comparison_transliteration_kann_to_rom_ph1(text, source_lang, source_script, dest_script): sentences = sentence_tokenize.sentence_split(text, lang="kn") final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["om_transliteration", "indic_trans", "libindic", "Azure"], [(temp_sentence), (temp_sentence, source_script, dest_script), (temp_sentence, dest_script), (temp_sentence, source_lang, source_script, dest_script)], [om_transliterator, indic_trans, libindic, azure_transliteration]): try: transliterated_word = function(*args) if source == "libindic": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) def dial_comparison_transliteration_tamil_to_rom_ph1(text, source_lang, source_script, dest_script): sentences = sentence_tokenize.sentence_split(text, lang="ta") final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["Azure", "libindic", "indic_trans"], [(temp_sentence, source_lang, source_script, dest_script), (temp_sentence, dest_script), (temp_sentence, source_script, dest_script)], [azure_transliteration, libindic, indic_trans]): try: transliterated_word = function(*args) if source == "libindic": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) def dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(text, source_lang, source_script, dest_script): sentences = sentence_tokenize.sentence_split(text, lang=source_lang) final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["Azure", "indic_trans", "libindic"], [(temp_sentence, source_lang, source_script, dest_script), (temp_sentence, source_script, dest_script), (temp_sentence, dest_script)], [azure_transliteration, indic_trans, libindic]): try: transliterated_word = function(*args) if source == "libindic": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) def dial_comparison_transliteration_latin_gurmukhi(text, source_lang, source_script, dest_script): source_lang = "pa" sentences = sentence_tokenize.sentence_split(text, lang=source_lang) final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["Azure", "indic_trans", "indic_trans_IAST"], [(temp_sentence, source_lang, source_script, dest_script), (temp_sentence), (temp_sentence)], [azure_transliteration, indic_transliteration_GURMUKHI, unicode_transliteration_GURMUKHI]): try: transliterated_word = function(args) if source == "libindic": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except Exception as e: print(f"Error occured for {function} which is ->", e) best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) def dial_comparison_transliteration_latin_cyrillic(text, source_lang, source_script, dest_script): source_lang = "bg" sentences = sentence_tokenize.sentence_split(text, lang=source_lang) final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["Azure", "indic_trans", "google"], [(temp_sentence, source_lang, source_script, dest_script), (temp_sentence), (temp_sentence, "en", source_lang)], [azure_transliteration, transliteration_LATIN_CYRILLIC, google]): try: transliterated_word = function(*args) if source == "libindic": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) def dial_comparison_transliteration_latin_telugu_sentence_wise(text, source_lang, source_script, dest_script): source_lang = "te" sentences = sentence_tokenize.sentence_split(text, lang=source_lang) final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["indic_translit", "Azure", "indic_trans", "libindic"], [(temp_sentence), (temp_sentence, source_lang, source_script, dest_script), (temp_sentence, source_script, dest_script), (temp_sentence, dest_script)], [indic_transliteration_TELUGU, azure_transliteration, indic_trans, libindic]): try: transliterated_word = function(*args) if source == "libindic": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) def dial_comparison_transliteration_gurmukhi_latin_sentence_wise(text, source_lang, source_script, dest_script): source_lang = "pa" sentences = sentence_tokenize.sentence_split(text, lang=source_lang) final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["indic_trans", "Azure", "unicode"], [(temp_sentence), (temp_sentence, source_lang, source_script, dest_script), (temp_sentence)], [indic_transliteration_GURMUKHI_LATIN, azure_transliteration, unicode_transliteration_GURMUKHI_LATIN]): try: transliterated_word = function(*args) if source == "libindic" or source == "unicode": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) def dial_comparison_transliteration_cyrilic_latin_sentence_wise(text, source_lang, source_script, dest_script): source_lang = "bg" sentences = sentence_tokenize.sentence_split(text, lang=source_lang) final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["Azure", "indic_trans", "unicode"], [(temp_sentence, source_lang, source_script, dest_script), (temp_sentence), (temp_sentence)], [azure_transliteration, transliteration_CYRILIC_LATIN, ConvertToLatin]): try: transliterated_word = function(*args) if source == "libindic" or source == "unicode": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) def dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(text, source_lang, source_script, dest_script): if dest_script == "Gujarati": source_lang = "gu" if dest_script == "Oriya": source_lang = "or" if dest_script == "Malayalam": source_lang = "ml" if dest_script == "Tamil": source_lang = "ta" if dest_script == "Bengali": source_lang = "bn" if dest_script == "Kannada": source_lang = "kn" sentences = sentence_tokenize.sentence_split(text, lang=source_lang) final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["Azure", "libindic", "indic_trans"], [(temp_sentence, source_lang, source_script, dest_script), (temp_sentence, dest_script), (temp_sentence, source_script, dest_script)], [azure_transliteration, libindic, indic_trans]): try: transliterated_word = function(*args) if source == "libindic" or source == "unicode": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) def dial_comparison_transliteration_or_ml_gu_te_devanagari(text, source_lang, source_script, dest_script): sentences = sentence_tokenize.sentence_split(text, lang=source_lang) final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["indic_trans_IAST", "libindic", "indic_trans"], [(temp_sentence, source_script), (temp_sentence, dest_script), (temp_sentence, source_script, dest_script)], [indic_transliteration_OTHER_DEVANAGRI, libindic, indic_trans]): try: transliterated_word = function(*args) if source == "libindic" or source == "unicode": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) def dial_comparison_transliteration_devanagari_or_ml_gu_te(text, source_lang, source_script, dest_script): sentences = sentence_tokenize.sentence_split(text, lang=source_lang) final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["indic_trans_IAST", "libindic", "indic_trans"], [(temp_sentence, dest_script), (temp_sentence, dest_script), (temp_sentence, source_script, dest_script)], [indic_transliteration_DEVANAGRI_OTHER, libindic, indic_trans]): try: transliterated_word = function(*args) if source == "libindic" or source == "unicode": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) def dial_comparison_transliteration_kannada_ml_ta_te_ben(text, source_lang, source_script, dest_script): sentences = sentence_tokenize.sentence_split(text, lang=source_lang) final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["indic_trans_IAST", "libindic", "indic_trans"], [(temp_sentence, dest_script), (temp_sentence, dest_script), (temp_sentence, source_script, dest_script)], [indic_transliteration_KANNADA_OTHER, libindic, indic_trans]): try: transliterated_word = function(*args) if source == "libindic" or source == "unicode": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) def dial_comparison_transliteration_ml_ta_te_ben_kannada(text, source_lang, source_script, dest_script): sentences = sentence_tokenize.sentence_split(text, lang=source_lang) final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["indic_trans_IAST", "libindic", "indic_trans"], [(temp_sentence, source_script), (temp_sentence, source_script), (temp_sentence, source_script, dest_script)], [indic_transliteration_OTHER_KANNADA, libindic, indic_trans]): try: transliterated_word = function(*args) if source == "libindic" or source == "unicode": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) def dial_comparison_transliteration_tamil_other(text, source_lang, source_script, dest_script): sentences = sentence_tokenize.sentence_split(text, lang=source_lang) final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["Azure", "libindic", "indic_trans"], [(temp_sentence, dest_script), (temp_sentence, source_script), (temp_sentence, source_script, dest_script)], [indic_transliteration_TAMIL_OTHER, libindic, indic_trans]): try: transliterated_word = function(*args) if source == "libindic" or source == "unicode": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) def dial_comparison_transliteration_other_tamil(text, source_lang, source_script, dest_script): sentences = sentence_tokenize.sentence_split(text, lang=source_lang) final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["Azure", "libindic", "indic_trans"], [(temp_sentence, source_script), (temp_sentence, source_script), (temp_sentence, source_script, dest_script)], [indic_transliteration_OTHER_TAMIL, libindic, indic_trans]): try: transliterated_word = function(*args) if source == "libindic" or source == "unicode": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) # -> Function to transliterate from telugu to malayalam def dial_comparison_transliteration_te_to_ml(text, source_lang, source_script, dest_script): sentences = sentence_tokenize.sentence_split(text, lang=source_lang) final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["indic_trans", "libindic", "indic_trans_IAST"], [(temp_sentence, source_script, dest_script), (temp_sentence, dest_script), (temp_sentence, dest_script)], [indic_trans, libindic, indic_transliteration_TELUGU_OTHER]): try: transliterated_word = function(*args) if source == "libindic" or source == "unicode": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) # -> Function to transliterate from malayalam to telugu def dial_comparison_transliteration_ml_to_te(text, source_lang, source_script, dest_script): sentences = sentence_tokenize.sentence_split(text, lang=source_lang) final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["Azure", "libindic", "indic_trans_IAST"], [(temp_sentence, source_lang, source_script, dest_script), (temp_sentence, dest_script), (temp_sentence, dest_script)], [azure_transliteration, libindic, indic_transliteration_MALAYALAM_OTHER]): try: transliterated_word = function(*args) if source == "libindic" or source == "unicode": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) # -> Function to transliterate from gujarati and oriya to gurmukhi def dial_comparison_transliteration_guj_or_to_gur(text, source_lang, source_script, dest_script): sentences = sentence_tokenize.sentence_split(text, lang=source_lang) final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["indic_trans", "libindic", "indic_trans_IAST"], [(temp_sentence, source_script, dest_script), (temp_sentence, dest_script), (temp_sentence, source_script)], [indic_trans, libindic, indic_transliteration_OTHER_GURMUKHI]): try: transliterated_word = function(*args) if source == "libindic" or source == "unicode": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) # -> Function to transliterate from gurmukhi and oriya to gujarati def dial_comparison_transliteration_gur_or_to_guj(text, source_lang, source_script, dest_script): sentences = sentence_tokenize.sentence_split(text, lang=source_lang) final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["indic_trans", "libindic", "indic_trans_IAST"], [(temp_sentence, source_script, dest_script), (temp_sentence, dest_script), (temp_sentence, source_script)], [indic_trans, libindic, indic_transliteration_OTHER_GUJARATI]): try: transliterated_word = function(*args) if source == "libindic" or source == "unicode": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) # -> Function to transliterate from gujarati and gurmukhi to oriya def dial_comparison_transliteration_guj_gur_to_or(text, source_lang, source_script, dest_script): sentences = sentence_tokenize.sentence_split(text, lang=source_lang) final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["indic_trans", "libindic", "indic_trans_IAST"], [(temp_sentence, source_script, dest_script), (temp_sentence, dest_script), (temp_sentence, source_script)], [indic_trans, libindic, indic_transliteration_OTHER_ORIYA]): try: transliterated_word = function(*args) if source == "libindic" or source == "unicode": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) # -> Function to transliterate from latin to arabic def dial_comparison_transliteration_latin_arabic(text, source_lang, source_script, dest_script): sentences = sentence_tokenize.sentence_split(text, lang=source_lang) final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["Azure", "transString", "google"], [(temp_sentence, source_lang, source_script, dest_script), (temp_sentence, 1), (temp_sentence, "en", "ar")], [azure_transliteration, transString, google]): try: transliterated_word = function(*args) if source == "libindic" or source == "unicode": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) # -> Function to transliterate from chinese to latin def dial_comparison_transliteration_chinese_latin(text, source_lang, source_script, dest_script): sentences = sentence_tokenize.sentence_split(text, lang=source_lang) final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(["Azure", "Pinyin"], [(temp_sentence, source_lang, source_script, dest_script), (temp_sentence)], [azure_transliteration, translit_CHINESE_LATIN]): try: transliterated_word = function(*args) if source == "libindic" or source == "unicode": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) # -> Function to transliterate from thai, sinhala, mongolian and Hebrew to latin def dial_comparison_transliteration_th_sin_mng_heb_latin(text, source_lang, source_script, dest_script): if source_lang == "iw": source_lang = "he" sentences = sentence_tokenize.sentence_split(text, lang=source_lang) final_transliterated_whole_sentence = [] for sentence_ in sentences: print("Full Sentence is", sentence_) final_transliterated_words = [] for sentence in sentence_.split(" "): if sentence in etc_punctuation: continue print("Original Word", sentence) temp_sentence = punct_remover(sentence) i = 0 priority_list = list() sources_name = myDict() transliterated_words = [] for source, args, function in zip(['Azure', 'anyascii'], [(temp_sentence, source_lang, source_script, dest_script), (temp_sentence)], [azure_transliteration, translit_th_sin_mng_heb_to_latin]): try: transliterated_word = function(*args) if source == "libindic" or source == "unicode": transliterated_word = transliterated_word.rstrip() elif source == "sheetal": transliterated_word = transliterated_word.replace("\n", "") transliterated_words.append(transliterated_word) priority_list.append(source) sources_name.add(str(i), str(source)) i = i + 1 except: pass best_output = compare_outputs_transliteration(temp_sentence, transliterated_words, sources_name, priority_list) best_output = final_transliterated_sentence( temp_sentence, best_output ) final_transliterated_words.append(best_output) final_transliterated_whole_sentence.append(" ".join(final_transliterated_words)) return " ".join(final_transliterated_whole_sentence) def compare_outputs_transliteration(word, outputs, sources_name, priority_list): # print(outputs) # doc2 = docx.Document() # sections = doc2.sections # for section in sections: # section.top_margin = Inches(0.2) # section.bottom_margin = Inches(0.2) # section.left_margin = Inches(0.2) # section.right_margin = Inches(0.2) # section = doc2.sections[-1] # new_height = section.page_width # section.page_width = section.page_height # section.page_height = new_height # name = 'Final table ' + doc_file # doc2.add_heading(name, 0) # doc_para = doc2.add_paragraph() # doc_para.add_run('Translation resources used : Google, IBM watson, AWS, Azure, Lingvanex, Yandex').bold = True # table2 = doc2.add_table(rows=1, cols=4) # table2.style = 'TableGrid' # hdr_Cells = table2.rows[0].cells # hdr_Cells[0].paragraphs[0].add_run("Input").bold = True # hdr_Cells[1].paragraphs[0].add_run("Output1").bold = True # hdr_Cells[2].paragraphs[0].add_run("Output2").bold = True # hdr_Cells[3].paragraphs[0].add_run("Output3").bold = True print("Before Comparing transliteration outputs", sources_name, outputs, priority_list) O1ANDS1, O2ANDS2 = selection_source_transliteration( sources_name, outputs, priority_list ) print(O1ANDS1, "compare all transliterations") # add_dial_comparison_doc2_transliteration(doc2, table2, word, O1ANDS1, O2ANDS2, sources_name) return O1ANDS1[0] def add_dial_comparison_doc2_transliteration(doc2, table2, word, O1ANDS1, O2ANDS2, sources_name): row_Cells = table2.add_row().cells row_Cells[0].text = word row_Cells[1].text = O1ANDS1[0] row_Cells[1].paragraphs[0].add_run("(Source : " + str(O1ANDS1[1]) + ")") row_Cells[2].text = O2ANDS2[0] row_Cells[2].paragraphs[0].add_run("(Source : " + str(O2ANDS2[1]) + ")") # -> Housing all the Script Pair Combinations for Transliterations def transliterate(dest_script, src_script, src_lang, text): print("transliterate", dest_script, src_script, src_lang, text) # if src_script == "Common" or dest_script == "Common" or src_script == "None" or dest_script == "None" or src_script == dest_script: # return trans_text = text if dest_script == "Latin" and src_script == "Devanagari": # trans_text = dial_comparison_transliteration_dev_rom_ph1(text, src_lang, src_script,dest_script) trans_text = dial_comparison_transliteration_dev_rom_ph1_sentence_wise2( text, src_lang, src_script, dest_script ) elif dest_script == "Devanagari" and src_script == "Latin": trans_text = dial_comparison_transliteration_rom_dev_ph1( text, src_lang, src_script, dest_script ) # trans_text=dial_comparison_transliteration_rom_dev_ph1_sentence_wise(text, src_lang, src_script,dest_script) elif dest_script == "Latin" and src_script == "Arabic": trans_text = dial_comparison_transliteration_arbic_to_rom_ph1( text, src_lang, src_script, dest_script ) elif dest_script == "Latin" and src_script == "Kannada": trans_text = dial_comparison_transliteration_kann_to_rom_ph1( text, src_lang, src_script, dest_script ) elif dest_script == "Latin" and src_script == "Tamil": trans_text = dial_comparison_transliteration_tamil_to_rom_ph1( text, src_lang, src_script, dest_script ) elif dest_script == "Latin" and src_script == "Bengali": trans_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1( text, src_lang, src_script, dest_script ) elif dest_script == "Latin" and src_script == "Telugu": trans_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1( text, src_lang, src_script, dest_script ) elif dest_script == "Latin" and src_script == "Malayalam": trans_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1( text, src_lang, src_script, dest_script ) elif dest_script == "Gurmukhi" and src_script == "Latin": trans_text = dial_comparison_transliteration_latin_gurmukhi( text, src_lang, src_script, dest_script ) elif dest_script == "Cyrillic" and src_script == "Latin": trans_text = dial_comparison_transliteration_latin_cyrillic( text, src_lang, src_script, dest_script ) elif dest_script == "Telugu" and src_script == "Latin": trans_text = dial_comparison_transliteration_latin_telugu_sentence_wise( text, src_lang, src_script, dest_script ) elif dest_script == "Latin" and src_script == "Gurmukhi": trans_text = dial_comparison_transliteration_gurmukhi_latin_sentence_wise( text, src_lang, src_script, dest_script ) elif dest_script == "Latin" and src_script == "Cyrillic": trans_text = dial_comparison_transliteration_cyrilic_latin_sentence_wise( text, src_lang, src_script, dest_script ) elif dest_script == "Latin" and src_script == "Gujarati": trans_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1( text, src_lang, src_script, dest_script ) elif dest_script == "Latin" and src_script == "Oriya": trans_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1( text, src_lang, src_script, dest_script ) elif dest_script == "Gujarati" and src_script == "Latin": trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn( text, src_lang, src_script, dest_script ) elif dest_script == "Oriya" and src_script == "Latin": trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn( text, src_lang, src_script, dest_script ) elif dest_script == "Tamil" and src_script == "Latin": trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn( text, src_lang, src_script, dest_script ) elif dest_script == "Malayalam" and src_script == "Latin": trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn( text, src_lang, src_script, dest_script ) elif dest_script == "Bengali" and src_script == "Latin": trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn( text, src_lang, src_script, dest_script ) elif dest_script == "Devanagari" and src_script == "Oriya": trans_text = dial_comparison_transliteration_or_ml_gu_te_devanagari( text, src_lang, src_script, dest_script ) elif dest_script == "Devanagari" and src_script == "Gujarati": trans_text = dial_comparison_transliteration_or_ml_gu_te_devanagari( text, src_lang, src_script, dest_script ) elif dest_script == "Devanagari" and src_script == "Malayalam": trans_text = dial_comparison_transliteration_or_ml_gu_te_devanagari( text, src_lang, src_script, dest_script ) elif dest_script == "Devanagari" and src_script == "Telugu": trans_text = dial_comparison_transliteration_or_ml_gu_te_devanagari( text, src_lang, src_script, dest_script ) elif dest_script == "Oriya" and src_script == "Devanagari": trans_text = dial_comparison_transliteration_devanagari_or_ml_gu_te( text, src_lang, src_script, dest_script ) elif dest_script == "Gujarati" and src_script == "Devanagari": trans_text = dial_comparison_transliteration_devanagari_or_ml_gu_te( text, src_lang, src_script, dest_script ) elif dest_script == "Malayalam" and src_script == "Devanagari": trans_text = dial_comparison_transliteration_devanagari_or_ml_gu_te( text, src_lang, src_script, dest_script ) elif dest_script == "Telugu" and src_script == "Devanagari": trans_text = dial_comparison_transliteration_devanagari_or_ml_gu_te( text, src_lang, src_script, dest_script ) elif dest_script == "Devanagari" and src_script == "Bengali": trans_text = dial_comparison_transliteration_or_ml_gu_te_devanagari( text, src_lang, src_script, dest_script ) elif dest_script == "Devanagari" and src_script == "Gurmukhi": trans_text = dial_comparison_transliteration_or_ml_gu_te_devanagari( text, src_lang, src_script, dest_script ) elif dest_script == "Devanagari" and src_script == "Kannada": trans_text = dial_comparison_transliteration_or_ml_gu_te_devanagari( text, src_lang, src_script, dest_script ) elif dest_script == "Bengali" and src_script == "Devanagari": trans_text = dial_comparison_transliteration_devanagari_or_ml_gu_te( text, src_lang, src_script, dest_script ) elif dest_script == "Gurmukhi" and src_script == "Devanagari": trans_text = dial_comparison_transliteration_devanagari_or_ml_gu_te( text, src_lang, src_script, dest_script ) elif dest_script == "Kannada" and src_script == "Devanagari": trans_text = dial_comparison_transliteration_devanagari_or_ml_gu_te( text, src_lang, src_script, dest_script ) elif dest_script == "Tamil" and src_script == "Kannada": trans_text = dial_comparison_transliteration_kannada_ml_ta_te_ben( text, src_lang, src_script, dest_script ) elif dest_script == "Malayalam" and src_script == "Kannada": trans_text = dial_comparison_transliteration_kannada_ml_ta_te_ben( text, src_lang, src_script, dest_script ) elif dest_script == "Telugu" and src_script == "Kannada": trans_text = dial_comparison_transliteration_kannada_ml_ta_te_ben( text, src_lang, src_script, dest_script ) elif dest_script == "Kannada" and src_script == "Tamil": trans_text = dial_comparison_transliteration_ml_ta_te_ben_kannada( text, src_lang, src_script, dest_script ) elif dest_script == "Kannada" and src_script == "Malayalam": trans_text = dial_comparison_transliteration_ml_ta_te_ben_kannada( text, src_lang, src_script, dest_script ) elif dest_script == "Kannada" and src_script == "Telugu": trans_text = dial_comparison_transliteration_ml_ta_te_ben_kannada( text, src_lang, src_script, dest_script ) elif dest_script == "Kannada" and src_script == "Latin": trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn( text, src_lang, src_script, dest_script ) elif dest_script == "Tamil" and src_script == "Devanagari": trans_text = dial_comparison_transliteration_devanagari_or_ml_gu_te( text, src_lang, src_script, dest_script ) elif dest_script == "Devanagari" and src_script == "Tamil": trans_text = dial_comparison_transliteration_or_ml_gu_te_devanagari( text, src_lang, src_script, dest_script ) elif dest_script == "Telugu" and src_script == "Tamil": trans_text = dial_comparison_transliteration_tamil_other( text, src_lang, src_script, dest_script ) elif dest_script == "Malayalam" and src_script == "Tamil": trans_text = dial_comparison_transliteration_tamil_other( text, src_lang, src_script, dest_script ) elif dest_script == "Tamil" and src_script == "Malayalam": trans_text = dial_comparison_transliteration_other_tamil( text, src_lang, src_script, dest_script ) elif dest_script == "Tamil" and src_script == "Telugu": trans_text = dial_comparison_transliteration_other_tamil( text, src_lang, src_script, dest_script ) elif dest_script == "Malayalam" and src_script == "Telugu": trans_text = dial_comparison_transliteration_te_to_ml( text, src_lang, src_script, dest_script ) elif dest_script == "Telugu" and src_script == "Malayalam": trans_text = dial_comparison_transliteration_ml_to_te( text, src_lang, src_script, dest_script ) elif dest_script == "Gurmukhi" and src_script == "Gujarati": trans_text = dial_comparison_transliteration_guj_or_to_gur( text, src_lang, src_script, dest_script ) elif dest_script == "Gujarati" and src_script == "Gurmukhi": trans_text = dial_comparison_transliteration_gur_or_to_guj( text, src_lang, src_script, dest_script ) elif dest_script == "Gujarati" and src_script == "Oriya": trans_text = dial_comparison_transliteration_gur_or_to_guj( text, src_lang, src_script, dest_script ) elif dest_script == "Gurmukhi" and src_script == "Oriya": trans_text = dial_comparison_transliteration_guj_or_to_gur( text, src_lang, src_script, dest_script ) elif dest_script == "Oriya" and src_script == "Gujarati": trans_text = dial_comparison_transliteration_guj_gur_to_or( text, src_lang, src_script, dest_script ) elif dest_script == "Oriya" and src_script == "Gurmukhi": trans_text = dial_comparison_transliteration_guj_gur_to_or( text, src_lang, src_script, dest_script ) elif dest_script == "Bengali" and src_script == "Kannada": trans_text = dial_comparison_transliteration_kannada_ml_ta_te_ben( text, src_lang, src_script, dest_script ) elif dest_script == "Kannada" and src_script == "Bengali": trans_text = dial_comparison_transliteration_ml_ta_te_ben_kannada( text, src_lang, src_script, dest_script ) elif dest_script == "Devanagari" and src_script == "Arabic": temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_arbic_to_rom_ph1( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_rom_dev_ph1( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Gurmukhi" and src_script == "Arabic": temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_arbic_to_rom_ph1( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_gurmukhi( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Gujarati" and src_script == "Arabic": temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_arbic_to_rom_ph1( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Cyrillic" and src_script == "Arabic": temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_arbic_to_rom_ph1( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_cyrillic( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Arabic" and src_script == "Latin": trans_text = dial_comparison_transliteration_latin_arabic( text, src_lang, src_script, dest_script ) elif dest_script == "Cyrillic" and src_script == "Devanagari": temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_dev_rom_ph1_sentence_wise2( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_cyrillic( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Kannada" and src_script == "Arabic": temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_arbic_to_rom_ph1( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Tamil" and src_script == "Arabic": temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_arbic_to_rom_ph1( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Telugu" and src_script == "Arabic": temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_arbic_to_rom_ph1( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_telugu_sentence_wise( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Malayalam" and src_script == "Arabic": temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_arbic_to_rom_ph1( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Bengali" and src_script == "Arabic": temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_arbic_to_rom_ph1( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Oriya" and src_script == "Arabic": temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_arbic_to_rom_ph1( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Cyrillic" and src_script == "Kannada": temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_kann_to_rom_ph1( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_cyrillic( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Gujarati" and src_script == "Kannada": temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_kann_to_rom_ph1( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Gurmukhi" and src_script == "Kannada": temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_kann_to_rom_ph1( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_gurmukhi( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Oriya" and src_script == "Kannada": temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_kann_to_rom_ph1( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Cyrillic" and src_script == "Tamil": temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_tamil_to_rom_ph1( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_cyrillic( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Tamil" and src_script == "Cyrillic": temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_cyrilic_latin_sentence_wise( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Tamil" and src_script == "Bengali": temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Telugu" and src_script == "Bengali": temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_telugu_sentence_wise( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Malayalam" and src_script == "Bengali": temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Arabic" and src_script == "Devanagari": temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_dev_rom_ph1_sentence_wise2( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_arabic( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Arabic" and src_script == "Cyrillic": temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_cyrilic_latin_sentence_wise( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_arabic( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Arabic" and src_script == "Gurmukhi": temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_gurmukhi_latin_sentence_wise( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_arabic( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Arabic" and src_script == "Gujarati": temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_arabic( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Latin" and src_script == "Hanji": if src_lang == "zh-CN": src_lang = "zh-Hans" trans_text = dial_comparison_transliteration_chinese_latin( text, src_lang, src_script, dest_script ) elif dest_script == "Devanagari" and src_script == "Hanji": if src_lang == "zh-CN": src_lang = "zh-Hans" temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_chinese_latin( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_rom_dev_ph1( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Arabic" and src_script == "Hanji": if src_lang == "zh-CN": src_lang = "zh-Hans" temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_chinese_latin( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_arabic( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Gurmukhi" and src_script == "Hanji": if src_lang == "zh-CN": src_lang = "zh-Hans" temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_chinese_latin( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_gurmukhi( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Gujarati" and src_script == "Hanji": if src_lang == "zh-CN": src_lang = "zh-Hans" temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_chinese_latin( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Oriya" and src_script == "Hanji": if src_lang == "zh-CN": src_lang = "zh-Hans" temp_dest_script = "Latin" temp_text = dial_comparison_transliteration_chinese_latin( text, src_lang, src_script, temp_dest_script ) trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn( temp_text, src_lang, temp_dest_script, dest_script ) elif dest_script == "Latin" and src_script == "Thai": trans_text = dial_comparison_transliteration_th_sin_mng_heb_latin( text, src_lang, src_script, dest_script ) elif dest_script == "Latin" and src_script == "Sinhala": trans_text = dial_comparison_transliteration_th_sin_mng_heb_latin( text, src_lang, src_script, dest_script ) elif dest_script == "Latin" and src_script == "Hebrew": trans_text = dial_comparison_transliteration_th_sin_mng_heb_latin( text, src_lang, src_script, dest_script ) elif dest_script == "Latin" and src_script == "Mongolian": src_lang = "mn-Cyrl" trans_text = dial_comparison_transliteration_th_sin_mng_heb_latin( text, src_lang, src_script, dest_script ) return trans_text # -> Main Transliteration Function to co-ordingate all the functions def makeTransliteration_only(**kwargs): line = kwargs.get('line') lang = kwargs.get('lang') src_script = kwargs.get('src_script') dest_script = kwargs.get('dest_script') dual_dial_script = kwargs.get('dual_dial_script') """ Checking if Transliteration is really Required or not """ if (src_script == dest_script and dual_dial_script == "No"): return line print("transliterating", dest_script, src_script, lang, str(line)) return transliterate(dest_script, src_script, lang, str(line)) def add_dual_dialogue(converted_df, original_df, non_dial_dest_lang, dial_dest_lang, dual_dialogue, dll=None, dls=None): doc = docx.Document() for idx, line in enumerate(converted_df): if line[3] == 'transition': addTransition(doc, str(line[2])) elif line[3] == 'special_term': addSpecialTerm(doc, str(line[2])) elif line[3] == 'slugline': addSlugLine(doc, str(line[2])) elif line[3] == 'action': addActionLine(doc, str(line[2]), non_dial_dest_lang) elif line[3] == 'speaker': addSpeaker(doc, str(line[2])) elif line[3] == 'parenthetical': addParenthetical(doc, str(line[2])) elif line[3] == 'dialogue': if dual_dialogue: current_lang = language_detector(original_df[idx][2]) if current_lang != dll: translated_text = translate_comparison2(original_df[idx][2], current_lang, dll) else: translated_text = original_df[idx][2] current_script = script_det(translated_text) if current_script != dls: translated_text = transliterate(dls, current_script, dls, translated_text) dual_script(doc, str(translated_text), str(line[2]), dial_dest_lang) else: addDialogue(doc, str(line[2]), dial_dest_lang) return doc