import os import sys import docx import re # import textract from tqdm import tqdm from collections import Counter import ntpath from docx.shared import Inches, Cm, Pt from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.enum.table import WD_TABLE_ALIGNMENT, WD_ALIGN_VERTICAL import requests, uuid, json import nltk.translate.bleu_score as bleu import nltk.translate.gleu_score as gleu from rouge_score import rouge_scorer import numpy as np from indicnlp.tokenize import sentence_tokenize import nltk # try: nltk.data.find('tokenizers/punkt') # except LookupError: nltk.download('punkt') # try: nltk.data.find('wordnet') # except LookupError: ###nltk.download('wordnet') # print("error in finding wordnet") # import logging # from logger import get_module_logger # log = get_module_logger(__name__) # log.info('Logger working') from MNF.settings import BasePath basePath = BasePath() #google from google.cloud import translate from google.cloud import translate_v2 as Translate import datetime from pytz import timezone from .script_detector import script_cat from .buck_2_unicode import buck_2_unicode from .translation_metric import manual_diff_score, bleu_diff_score, gleu_diff_score, meteor_diff_score, rouge_diff_score, diff_score, critera4_5 from .selection_source import selection_source, function5, function41, function311, function221, function2111, function11111, selection_source_transliteration, two_sources_two_outputs from .script_writing import addSlugLine, addActionLine, addSpeaker, addParenthetical, addDialogue, dual_script, addTransition, dial_checker, non_dial_checker from .script_reading import breaksen, getRefined, getSlugAndNonSlug, getSpeakers, getScenes from .translation_resources import ibm_watson, google, aws, azure, yandex ,lingvanex from .transliteration_resources import azure_transliteration, indic_trans, om_transliterator, libindic, indic_transliteration_IAST, indic_transliteration_ITRANS, sheetal,unicode_transliteration_GURMUKHI,indic_transliteration_GURMUKHI,transliteration_LATIN_CYRILLIC,indic_transliteration_TELUGU,unicode_transliteration_GURMUKHI_LATIN,indic_transliteration_GURMUKHI_LATIN os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = rf"{basePath}/MNF/json_keys/authentication.json" def makeTransliteration_only(translation_and_transliteration, dial_dest_script, dual_dial_script, original_file,dial_dest_lang): # os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = rf"{basePath}/conversion/gifted-mountain-318504-4f001d5f08db.json" translate_client = Translate.Client() client = translate.TranslationServiceClient() project_id = 'authentic-bongo-272808' location = "global" parent = f"projects/{project_id}/locations/{location}" translation_and_transliteration = translation_and_transliteration dial_dest_script = dial_dest_script dual_dial_script = dual_dial_script filename2 = original_file dial_dest_lang = dial_dest_lang # filename1 = sys.argv[1] # get translated file from UI-1(translation) # dial_dest_script = sys.argv[2] # dual_dial_script = sys.argv[3] #Yes/No # translation_and_transliteration = sys.argv[4] #Yes/No # filename2 = sys.argv[5] # original file or take input as scenes from final translation # create an instance of a word document doc = docx.Document() x = datetime.datetime.now(timezone('UTC')).astimezone(timezone('Asia/Kolkata')) doc_file = basePath+"/media/scripts/translated/" + "trans_"+str(dial_dest_lang)+"_"+str(x.strftime("%d"))+ "_"+ str(x.strftime("%b"))+ "_"+ str(x.strftime("%H"))+ str(x.strftime("%I")) + "_" + "trans" + '_of_' + ntpath.basename(filename2) print(doc_file) doc2 = docx.Document() sections = doc2.sections for section in sections: section.top_margin = Inches(0.2) section.bottom_margin = Inches(0.2) section.left_margin = Inches(0.2) section.right_margin = Inches(0.2) section = doc2.sections[-1] new_height = section.page_width section.page_width = section.page_height section.page_height = new_height name = 'Final table '+doc_file doc2.add_heading(name, 0) doc_para = doc2.add_paragraph() doc_para.add_run('Translation resources used : Google, IBM watson, AWS, Azure, Lingvanex, Yandex').bold = True table2 = doc2.add_table(rows=1,cols=4) table2.style = 'TableGrid' hdr_Cells = table2.rows[0].cells hdr_Cells[0].paragraphs[0].add_run("Input").bold=True hdr_Cells[1].paragraphs[0].add_run("Output1").bold=True hdr_Cells[2].paragraphs[0].add_run("Output2").bold=True hdr_Cells[3].paragraphs[0].add_run("Output3").bold=True refined,total_scenes = getRefined(filename2) sluglines,without_slug = getSlugAndNonSlug(refined) characters = getSpeakers(without_slug) scenes1,actionline,parenthetical_lis,speakers,dialogues = getScenes(refined,total_scenes,characters) def language_detector(text): result = translate_client.translate(text, target_language='hi') det_lang = result["detectedSourceLanguage"] return det_lang def script_det(text): punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~''' no_punct = "" for char in text: if char not in punctuations: no_punct = char break script = script_cat(no_punct)[0] return script def punct_remover(string): punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।''' for x in string.lower(): if x in punctuations: string = string.replace(x, " ") return string def space_after_punct(text): #text = text.replace('...',' ... ') text = text.replace('. . .',' ... ') text = re.sub('([,!?()…-])', r'\1 ', text) text = re.sub('\s{2,}', ' ', text) return text def final_transliterated_sentence(original, transliterated): original = space_after_punct(original) punct_list = ['!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', ' ', '-', '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~', '…', '...', '।'] sentence = [] j = 0 for i in range(len(original.split())): if original.split()[i] in punct_list: sentence.append(original.split()[i]) elif original.split()[i][-1] in punct_list: temp = transliterated.split()[j] + original.split()[i][-1] sentence.append(temp) j = j+1 elif original.split()[i][-1] not in punct_list: temp = transliterated.split()[j] sentence.append(temp) j = j+1 transliterated_sentence = " ".join(sentence) transliterated_sentence.replace(' ... ','...') transliterated_sentence.replace('… ', '…') return transliterated_sentence def MNF_translate(text, dest_lang): result = translate_client.translate(text, target_language = dest_lang) translated_text = result['translatedText'] return translated_text def google_length_checker(t, temp_sentence, t0): if len(t.split()) >= len(temp_sentence.split()): return t elif len(t.split()) == len(temp_sentence.split())-1: final_t = t+ " " + t0.split()[-1] return final_t elif len(t.split()) == len(temp_sentence.split())-2: final_t = t+ " " + t0.split()[-2] + " " + t0.split()[-1] return final_t else: return t def Halant_remover(T3): if T3[-1] == "्": return T3[:-1] else: return T3 def dial_comparison_transliteration_rom_dev_ph1(text, source_lang, source_script, dest_script): source_lang = "hi" source_script = "Latin" dest_script = "Devanagari" sources_name = {'0':'Azure', '1':'indic_trans', '2':'google', '3':'indic_trans_IAST'} sentences=sentence_tokenize.sentence_split(text, lang='en') priority_list =['Azure', 'indic_trans', 'google', 'indic_trans_IAST', ] transliterated_text=[] for sentence in sentences: if sentence == "" or sentence == " . . ." or sentence == " . ." or sentence ==" . . ”": continue print(sentence) OUT=[] for word in sentence.split(): if word==".": continue print(word) t0 = azure_transliteration(word, source_lang, source_script, dest_script) #print(t0) t1 = indic_trans(word, source_script, dest_script) #print(t1) t2 = google(word, 'en', 'hi') #print(t2) t3 = indic_transliteration_IAST(word) #print(t3) outputs=[t0, t1, t2, t3] out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) OUT.append(out) transliterated_text.append(" ".join(OUT)) return " ".join(transliterated_text) def dial_comparison_transliteration_rom_dev_ph1_sentence_wise(text, source_lang, source_script, dest_script): source_lang = "hi" sources_name = {'0':'Azure', '1':'indic_trans', '2':'google', '3':'indic_trans_IAST'} etc_punctuation =["", " . . .", " . .", " . . ”"] sentences=sentence_tokenize.sentence_split(text, lang='en') priority_list =['Azure', 'indic_trans', 'google', 'indic_trans_IAST'] transliterated_text=[] for sentence in sentences: if sentence in etc_punctuation: continue print("original_sentence", sentence) temp_sentence = punct_remover(sentence) print("sentence_without_punctuation", temp_sentence) t00 = azure_transliteration(temp_sentence, source_lang, source_script, dest_script) print(t00) t11 = indic_trans(temp_sentence, source_script, dest_script) print(t11) t = google(temp_sentence, 'en', 'hi') #print("btw", t) t22 = google_length_checker(t, temp_sentence, t00) print("T22 transliteration",t22) t33 = indic_transliteration_IAST(temp_sentence) print(t33) Out= [] outputs = [] for i in range(len(temp_sentence.split())): word = temp_sentence.split()[i] # print("test of trasnliteration",i) T0 = t00.split()[i] # print("test of trasnliteration T0",T0) # print("test of trasnliteration T11",t11) T1 = t11.split()[i] # print("test of trasnliteration T1",T1) # print("test of trasnliteration T22",t22) T2 = t22.split()[i] # print("test of trasnliteration T2",T2) T3 = t33.split()[i] T3 = Halant_remover(T3) outputs=[T0, T1, T2, T3] out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) Out.append(out) trans_sent_wo_punct = " ".join(Out) print("trans_sent_wo_punct", trans_sent_wo_punct) transliterated_sentence = final_transliterated_sentence(sentence, trans_sent_wo_punct) print("trans_sent_w_punct", transliterated_sentence) transliterated_text.append(transliterated_sentence) return " ".join(transliterated_text) def dial_comparison_transliteration_dev_rom_ph1_sentence_wise(text, source_lang, source_script, dest_script): # sources_name = {'0':'indic_trans', '1':'Azure', '2':'libindic', '3':'sheetal', '4':'ritwik'} sources_name = {'0':'indic_trans', '1':'Azure', '2':'libindic', '3':'sheetal'} # priority_list =['indic_trans', 'Azure', 'ritwik', 'sheetal', 'libindic'] priority_list =['indic_trans', 'Azure', 'sheetal', 'libindic'] etc_punctuation =["", " . . .", " . .", " . . ”"] sentences=sentence_tokenize.sentence_split(text, lang='hi') transliterated_text=[] for sentence in sentences: #if sentence == "" or sentence == " . . ." or sentence == " . ." or sentence ==" . . ”": if sentence in etc_punctuation: continue print("original_sentence", sentence) temp_sentence = punct_remover(sentence) print("sentence_without_punctuation", temp_sentence) t0 = indic_trans(temp_sentence, source_script, dest_script) print(t0) t1 = azure_transliteration(temp_sentence, source_lang, source_script, dest_script) print(t1) t2 = libindic(temp_sentence, dest_script).rstrip() print(t2) t3 = sheetal(temp_sentence).replace('\n','') print(t3) # t4 = ritwik(temp_sentence).replace('\n','').rstrip() # print(t4) Out= [] outputs = [] for i in range(len(temp_sentence.split())): word = temp_sentence.split()[i] T0 = t0.split()[i] T1 = t1.split()[i] T2 = t2.split()[i] T3 = t3.split()[i] # T4 = t4.split()[i] # outputs=[T0, T1, T2, T3, T4] outputs=[T0, T1, T2, T3] out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) Out.append(out) trans_sent_wo_punct = " ".join(Out) print("trans_sent_wo_punct", trans_sent_wo_punct) transliterated_sentence = final_transliterated_sentence(sentence, trans_sent_wo_punct) print("trans_sent_w_punct", transliterated_sentence) transliterated_text.append(transliterated_sentence) return " ".join(transliterated_text) def dial_comparison_transliteration_dev_rom_ph1(text, source_lang, source_script, dest_script): #sources_name = {'0':'indic_trans', '1':'Azure', '2':'libindic', '3':'sheetal', '4':'ritwik'} sources_name = {'0':'indic_trans', '1':'Azure', '2':'libindic', '3':'sheetal'} sentences=sentence_tokenize.sentence_split(text, lang='hi') #priority_list =['indic_trans', 'Azure', 'ritwik, 'sheetal', 'libindic'] priority_list =['indic_trans', 'Azure', 'sheetal', 'libindic'] transliterated_text=[] for sentence in sentences: if sentence == "" or sentence == " . . ." or sentence == " . ." or sentence ==" . . ”": continue print(sentence) OUT=[] for word in sentence.split(): if word==".": continue print(word) t0 = indic_trans(word, source_script, dest_script) #print(t0) t1 = azure_transliteration(word, source_lang, source_script, dest_script) #print(t1) t2 = libindic(word, dest_script).rstrip() #print(t2) t3 = sheetal(word).replace('\n','') #print(t3) #t4 = ritwik(word).replace('\n','').rstrip() #print(t4) # outputs=[t0, t1, t2, t3, t4] outputs=[t0, t1, t2, t3] out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) OUT.append(out) transliterated_text.append(" ".join(OUT)) return " ".join(transliterated_text) def dial_comparison_transliteration_arbic_to_rom_ph1(text, source_lang, source_script, dest_script): print("hello") sources_name = {'0':'indic_trans', '1':'Azure', '2':'buck_2_unicode'} sentences=sentence_tokenize.sentence_split(text, lang='en') priority_list =['indic_trans', 'Azure', 'buck_2_unicode' ] transliterated_text=[] for sentence in sentences: if sentence == "" or sentence == " . . ." or sentence == " . ." or sentence ==" . . ”": continue print(sentence) OUT=[] for word in sentence.split(): if word==".": continue print(word) t0 = indic_trans(word, source_script, dest_script) t1 = azure_transliteration(word, source_lang, source_script, dest_script) t2 = buck_2_unicode(word) outputs=[t0, t1, t2] out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) OUT.append(out) transliterated_text.append(" ".join(OUT)) return " ".join(transliterated_text) def dial_comparison_transliteration_kann_to_rom_ph1(text, source_lang, source_script, dest_script): print("hello") sources_name = {'0':'om_transliteration', '1':'indic_trans', '2':'libindic', '3':'Azure'} sentences=sentence_tokenize.sentence_split(text, lang='en') priority_list =['om_transliteration', 'indic_trans', 'libindic', 'Azure'] transliterated_text=[] for sentence in sentences: if sentence == "" or sentence == " . . ." or sentence == " . ." or sentence ==" . . ”": continue print(sentence) OUT=[] for word in sentence.split(): if word==".": continue print(word) t0 = om_transliterator(word) t1 = indic_trans(word, source_script, dest_script) t2 = libindic(word, dest_script) t3 = azure_transliteration(word, source_lang, source_script, dest_script) outputs=[t0, t1, t2, t3] out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) OUT.append(out) transliterated_text.append(" ".join(OUT)) return " ".join(transliterated_text) def dial_comparison_transliteration_tamil_to_rom_ph1(text, source_lang, source_script, dest_script): print("hello") sources_name = {'0':'Azure', '1':'libindic', '2':'indic_trans', } sentences=sentence_tokenize.sentence_split(text, lang='en') priority_list =['Azure', 'libindic', 'indic_trans'] transliterated_text=[] for sentence in sentences: if sentence == "" or sentence == " . . ." or sentence == " . ." or sentence ==" . . ”": continue print(sentence) OUT=[] for word in sentence.split(): if word==".": continue print(word) t0 = azure_transliteration(word, source_lang, source_script, dest_script) t2 = libindic(word, dest_script) t1 = indic_trans(word, source_script, dest_script) outputs=[t0, t1, t2] out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) OUT.append(out) transliterated_text.append(" ".join(OUT)) return " ".join(transliterated_text) def dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(text, source_lang, source_script, dest_script): #print("hello") sources_name = {'0':'Azure', '1':'indic_trans', '2':'libindic'} sentences=sentence_tokenize.sentence_split(text, lang='en') priority_list =['Azure', 'indic_trans', 'libindic'] transliterated_text=[] for sentence in sentences: if sentence == "" or sentence == " . . ." or sentence == " . ." or sentence ==" . . ”": continue #print(sentence) OUT=[] for word in sentence.split(): if word==".": continue print(word) t0 = azure_transliteration(word, source_lang, source_script, dest_script) t1 = indic_trans(word, source_script, dest_script) t2 = libindic(word, dest_script) outputs=[t0, t1, t2] out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) OUT.append(out) transliterated_text.append(" ".join(OUT)) return " ".join(transliterated_text) def dial_comparison_transliteration_latin_gurmukhi(text,source_lang, source_script, dest_script): source_lang = "pa" sources_name = {'0':'Azure', '1':'indic_trans', '2':'indic_trans_IAST'} etc_punctuation =["", " . . .", " . .", " . . ”"] sentences=sentence_tokenize.sentence_split(text, lang='en') priority_list =['Azure', 'indic_trans', 'indic_trans_IAST'] transliterated_text=[] for sentence in sentences: if sentence in etc_punctuation: continue temp_sentence = punct_remover(sentence) t00 = azure_transliteration(temp_sentence, source_lang, source_script, dest_script) t11 = indic_transliteration_GURMUKHI(temp_sentence) t22 = unicode_transliteration_GURMUKHI(temp_sentence) Out= [] outputs = [] for i in range(len(temp_sentence.split())): word = temp_sentence.split()[i] T0 = t00.split()[i] T1 = t11.split()[i] T2 = t22.split()[i] outputs=[T0, T1, T2] out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) Out.append(out) trans_sent_wo_punct = " ".join(Out) transliterated_sentence = final_transliterated_sentence(sentence, trans_sent_wo_punct) transliterated_text.append(transliterated_sentence) return " ".join(transliterated_text) def dial_comparison_transliteration_latin_cyrillic(text,source_lang, source_script, dest_script): source_lang = "bg" sources_name = {'0':'Azure', '1':'indic_trans'} etc_punctuation =["", " . . .", " . .", " . . ”"] sentences=sentence_tokenize.sentence_split(text, lang='en') priority_list =['Azure', 'indic_trans'] transliterated_text=[] for sentence in sentences: if sentence in etc_punctuation: continue temp_sentence = punct_remover(sentence) t00 = azure_transliteration(temp_sentence, source_lang, source_script, dest_script) t11 = transliteration_LATIN_CYRILLIC(temp_sentence) #t22 = polygot(text) Out= [] outputs = [] for i in range(len(temp_sentence.split())): word = temp_sentence.split()[i] T0 = t00.split()[i] T1 = t11.split()[i] #T2 = t22.split()[i] outputs=[T0, T1] #outputs=[T0, T1, T2] out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) Out.append(out) trans_sent_wo_punct = " ".join(Out) transliterated_sentence = final_transliterated_sentence(sentence, trans_sent_wo_punct) transliterated_text.append(transliterated_sentence) return " ".join(transliterated_text) def dial_comparison_transliteration_latin_telugu_sentence_wise(text, source_lang, source_script, dest_script): source_lang = "te" sources_name = {'0':'indic_trans', '1':'Azure'} priority_list =['indic_trans', 'Azure',] etc_punctuation =["", " . . .", " . .", " . . ”"] sentences=sentence_tokenize.sentence_split(text, lang='hi') transliterated_text=[] for sentence in sentences: #if sentence == "" or sentence == " . . ." or sentence == " . ." or sentence ==" . . ”": if sentence in etc_punctuation: continue print("original_sentence", sentence) temp_sentence = punct_remover(sentence) print("sentence_without_punctuation", temp_sentence) t0 = indic_transliteration_TELUGU(temp_sentence) print(t0) t1 = azure_transliteration(temp_sentence, source_lang, source_script, dest_script) print(t1) Out= [] outputs = [] for i in range(len(temp_sentence.split())): word = temp_sentence.split()[i] T0 = t0.split()[i] T1 = t1.split()[i] outputs=[T0, T1] out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) Out.append(out) trans_sent_wo_punct = " ".join(Out) print("trans_sent_wo_punct", trans_sent_wo_punct) transliterated_sentence = final_transliterated_sentence(sentence, trans_sent_wo_punct) print("trans_sent_w_punct", transliterated_sentence) transliterated_text.append(transliterated_sentence) return " ".join(transliterated_text) def dial_comparison_transliteration_gurmukhi_latin_sentence_wise(text, source_lang, source_script, dest_script): source_lang = "pa" sources_name = {'0':'indic_trans', '1':'Azure', '2':'unicode'} priority_list =['indic_trans', 'Azure', 'unicode'] etc_punctuation =["", " . . .", " . .", " . . ”"] sentences=sentence_tokenize.sentence_split(text, lang='hi') transliterated_text=[] for sentence in sentences: #if sentence == "" or sentence == " . . ." or sentence == " . ." or sentence ==" . . ”": if sentence in etc_punctuation: continue print("original_sentence", sentence) temp_sentence = punct_remover(sentence) print("sentence_without_punctuation", temp_sentence) t0 = indic_transliteration_GURMUKHI_LATIN(temp_sentence) print(t0) t1 = azure_transliteration(temp_sentence, source_lang, source_script, dest_script) print(t1) t2 = unicode_transliteration_GURMUKHI_LATIN(temp_sentence).rstrip() print(t2) Out= [] outputs = [] for i in range(len(temp_sentence.split())): word = temp_sentence.split()[i] T0 = t0.split()[i] T1 = t1.split()[i] T2 = t2.split()[i] outputs=[T0, T1, T2] out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) Out.append(out) trans_sent_wo_punct = " ".join(Out) print("trans_sent_wo_punct", trans_sent_wo_punct) transliterated_sentence = final_transliterated_sentence(sentence, trans_sent_wo_punct) print("trans_sent_w_punct", transliterated_sentence) transliterated_text.append(transliterated_sentence) return " ".join(transliterated_text) def compare_outputs_transliteration(word, outputs, sources_name, priority_list): #print(outputs) O1ANDS1, O2ANDS2 = selection_source_transliteration(sources_name, outputs, priority_list) print(O1ANDS1) add_dial_comparison_doc2_transliteration(doc2, table2, word, O1ANDS1, O2ANDS2, sources_name) return O1ANDS1[0] def add_dial_comparison_doc2_transliteration(doc2, table2, word, O1ANDS1, O2ANDS2, sources_name): row_Cells = table2.add_row().cells row_Cells[0].text= word row_Cells[1].text= O1ANDS1[0] row_Cells[1].paragraphs[0].add_run('(Source : '+str(O1ANDS1[1])+')') row_Cells[2].text= O2ANDS2[0] row_Cells[2].paragraphs[0].add_run('(Source : '+str(O2ANDS2[1])+')') #.... original_dialogues = [] for scene in tqdm(scenes1[:]): for i,line in enumerate(scene): if i == 0: #addSlugLine(doc,line) continue if type(line)==type(""): #addActionLine(doc, line, non_dial_src_lang) continue #print("action_line") #non_dial_src_lang = language_detector(line) #print("non_dial_src_lang", non_dial_src_lang) #non_dial_translate = non_dial_checker( non_dial_dest_lang, non_dial_src_lang ) #print("non_dial_translate", non_dial_translate) #print("line", line) #if non_dial_translate: # if non_dial_src_lang in translation_list and non_dial_dest_lang in translation_list: # trans_text = dial_comparison(line , non_dial_src_lang, non_dial_dest_lang) # addActionLine(doc, trans_text, non_dial_dest_lang) #else: # addActionLine(doc, line, non_dial_dest_lang) else: print("In dialogue") [speaker] = line.keys() if speaker == 'Transition': # if want to translate transition also along with action line use addTransition(doc,translator.translate(speaker,dest = gtrans_dict[actionline_dest_lang]).text) #addTransition(doc,line[speaker]) continue #addSpeaker(doc,speaker) if line[speaker][0] != 'NONE': # In parenthitical part # non_dial_translate = "no" # if non_dial_translate == "yes": # out = MNF_translate(line[speaker][0], non_dial_dest_lang) # addParenthetical(doc,out) # else: # addParenthetical(doc,line[speaker][0]) continue #print("dialogue to be transliterated ", line[speaker][2]) if line[speaker][2] == "": continue original_dialogues.append(line[speaker][2]) for scene in tqdm(scenes1): x = "False" y = "False" for i,line in enumerate(scene): if i == 0: continue if type(line)==type(""): x = "True" non_dial_src_lang = language_detector(line) else: y = "True" [speaker] = line.keys() if speaker == 'Transition': continue if line[speaker][0] != 'NONE': continue dial_src_lang = language_detector(line[speaker][2]) dial_src_script = script_det(line[speaker][2]) if x == "True" and y == "True": break print("non_dial_src_lang", non_dial_src_lang) print("dial_src_lang", dial_src_lang) print("dial_src_script", dial_src_script) j = 0 for scene in tqdm(scenes1[:]): for i,line in enumerate(scene): if i == 0: addSlugLine(doc,line) continue if type(line)==type(""): addActionLine(doc, line, non_dial_src_lang) #print("action_line") #non_dial_src_lang = language_detector(line) #print("non_dial_src_lang", non_dial_src_lang) #non_dial_translate = non_dial_checker( non_dial_dest_lang, non_dial_src_lang ) #print("non_dial_translate", non_dial_translate) #print("line", line) #if non_dial_translate: # if non_dial_src_lang in translation_list and non_dial_dest_lang in translation_list: # trans_text = dial_comparison(line , non_dial_src_lang, non_dial_dest_lang) # addActionLine(doc, trans_text, non_dial_dest_lang) #else: # addActionLine(doc, line, non_dial_dest_lang) else: print("In dialogue") [speaker] = line.keys() if speaker == 'Transition': # if want to translate transition also along with action line use addTransition(doc,translator.translate(speaker,dest = gtrans_dict[actionline_dest_lang]).text) addTransition(doc,line[speaker]) continue addSpeaker(doc,speaker) if line[speaker][0] != 'NONE': # In parenthitical part non_dial_translate = "no" if non_dial_translate == "yes": out = MNF_translate(line[speaker][0], non_dial_dest_lang) addParenthetical(doc,out) else: addParenthetical(doc,line[speaker][0]) print("dialogue to be transliterated ", line[speaker][2]) if line[speaker][2] == "": continue if dial_dest_script == "Latin" and dial_src_script == "Devanagari": # trans_text = dial_comparison_transliteration_dev_rom_ph1(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script) trans_text=dial_comparison_transliteration_dev_rom_ph1_sentence_wise(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script) elif dial_dest_script == "Devanagari" and dial_src_script == "Latin": trans_text = dial_comparison_transliteration_rom_dev_ph1(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script) #trans_text=dial_comparison_transliteration_rom_dev_ph1_sentence_wise(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script) elif dial_dest_script == "Latin" and dial_src_script == "Arabic": trans_text = dial_comparison_transliteration_arbic_to_rom_ph1(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script ) elif dial_dest_script == "Latin" and dial_src_script == "Kannada": trans_text = dial_comparison_transliteration_kann_to_rom_ph1(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script ) elif dial_dest_script == "Latin" and dial_src_script == "Tamil": trans_text = dial_comparison_transliteration_tamil_to_rom_ph1(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script ) elif dial_dest_script == "Latin" and dial_src_script == "Bengali": trans_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script ) elif dial_dest_script == "Latin" and dial_src_script == "Telugu": trans_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script ) elif dial_dest_script == "Latin" and dial_src_script == "Malayalam": trans_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script ) elif dial_dest_script == "Gurmukhi" and dial_src_script == "Latin": trans_text = dial_comparison_transliteration_latin_gurmukhi(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script ) elif dial_dest_script == "Cyrillic" and dial_src_script == "Latin": trans_text = dial_comparison_transliteration_latin_cyrillic(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script ) elif dial_dest_script == "Telugu" and dial_src_script == "Latin": trans_text = dial_comparison_transliteration_latin_telugu_sentence_wise(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script ) elif dial_dest_script == "Latin" and dial_src_script == "Gurmukhi": trans_text = dial_comparison_transliteration_gurmukhi_latin_sentence_wise(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script ) if dual_dial_script == "Yes": if translation_and_transliteration == "Yes": dual_script(doc, original_dialogues[j], trans_text, dial_src_lang) j=j+1 else: dual_script(doc, line[speaker][2], trans_text, dial_src_lang) else: addDialogue(doc, trans_text, dial_src_lang) doc.save(doc_file) # file_tr = rf"{basePath}/media/scripts/Final_Table" + str(forFinal.split('.')[0])+"_trans_to_"+str(dial_dest_lang)+'_'+"final.docx" # doc2.save(file_tr) print('done file is saved') return doc_file