import os import sys import docx import re # import textract from tqdm import tqdm from collections import Counter import ntpath from docx.shared import Inches, Cm, Pt from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.enum.table import WD_TABLE_ALIGNMENT, WD_ALIGN_VERTICAL import requests, uuid, json import nltk.translate.bleu_score as bleu import nltk.translate.gleu_score as gleu from rouge_score import rouge_scorer import numpy as np from indicnlp.tokenize import sentence_tokenize import nltk from MNF.settings import BasePath basePath = BasePath() from google.cloud import translate from google.cloud import translate_v2 as Translate from .script_detector import script_cat from .buck_2_unicode import buck_2_unicode from .translation_metric import manual_diff_score, bleu_diff_score, gleu_diff_score, meteor_diff_score, rouge_diff_score, diff_score, critera4_5 from .selection_source import selection_source, function5, function41, function311, function221, function2111, function11111, selection_source_transliteration, two_sources_two_outputs from .script_writing import addSlugLine, addActionLine, addSpeaker, addParenthetical, addDialogue, dual_script, addTransition, dial_checker, non_dial_checker from .script_reading import breaksen, getRefined, getSlugAndNonSlug, getSpeakers, getScenes from .translation_resources import ibm_watson, google, aws, azure, yandex #lingvanex from .transliteration_resources import azure_transliteration, indic_trans, om_transliterator, libindic, indic_transliteration_IAST, indic_transliteration_ITRANS, sheetal,unicode_transliteration_GURMUKHI,indic_transliteration_GURMUKHI,transliteration_LATIN_CYRILLIC,indic_transliteration_TELUGU,unicode_transliteration_GURMUKHI_LATIN,indic_transliteration_GURMUKHI_LATIN,transliteration_CYRILIC_LATIN,ConvertToLatin,readonly from .detection import language_detector, script_det # import logging # from logger import get_module_logger # log = get_module_logger(__name__) # log.info('Logger working') def makeTransliteration_translation(translation_and_transliteration, translated_file, dial_dest_script, dual_dial_script, original_file): #os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = rf"{basePath}/conversion/My First Project-2573112d5326.json" os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = rf"{basePath}/MNF/json_keys/authentication.json" # os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = rf"{basePath}/conversion/gifted-mountain-318504-4f001d5f08db.json" translate_client = Translate.Client() client = translate.TranslationServiceClient() project_id = 'authentic-bongo-272808' location = "global" parent = f"projects/{project_id}/locations/{location}" translation_and_transliteration = translation_and_transliteration filename1 = translated_file dial_dest_script = dial_dest_script dual_dial_script = dual_dial_script filename2 = original_file # create an instance of a word document doc = docx.Document() docfile = translated_file print(docfile) doc2 = docx.Document() sections = doc2.sections for section in sections: section.top_margin = Inches(0.2) section.bottom_margin = Inches(0.2) section.left_margin = Inches(0.2) section.right_margin = Inches(0.2) section = doc2.sections[-1] new_height = section.page_width section.page_width = section.page_height section.page_height = new_height name = 'Final table '+docfile doc2.add_heading(name, 0) doc_para = doc2.add_paragraph() doc_para.add_run('Translation resources used : Google, IBM watson, AWS, Azure, Lingvanex, Yandex').bold = True table2 = doc2.add_table(rows=1,cols=4) table2.style = 'TableGrid' hdr_Cells = table2.rows[0].cells hdr_Cells[0].paragraphs[0].add_run("Input").bold=True hdr_Cells[1].paragraphs[0].add_run("Output1").bold=True hdr_Cells[2].paragraphs[0].add_run("Output2").bold=True hdr_Cells[3].paragraphs[0].add_run("Output3").bold=True #scenes for translated file refined,total_scenes = getRefined(filename1) #print(refined) sluglines,without_slug = getSlugAndNonSlug(refined) #print(sluglines) characters = getSpeakers(without_slug) #print(characters) scenes,actionline,parenthetical_lis,speakers,dialogues = getScenes(refined,total_scenes,characters) #print(scenes) #scenes1 for original file refined,total_scenes = getRefined(filename2) sluglines,without_slug = getSlugAndNonSlug(refined) characters = getSpeakers(without_slug) scenes1,actionline,parenthetical_lis,speakers,dialogues = getScenes(refined,total_scenes,characters) def punct_remover(string): punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।''' for x in string.lower(): if x in punctuations: string = string.replace(x, " ") return string def space_after_punct(text): #text = text.replace('...',' ... ') text = text.replace('. . .',' ... ') text = re.sub('([,!?()…-])', r'\1 ', text) text = re.sub('\s{2,}', ' ', text) return text def final_transliterated_sentence(original, transliterated): original = space_after_punct(original) punct_list = ['!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', ' ', '-', '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~', '…', '...', '।'] sentence = [] j = 0 for i in range(len(original.split())): if original.split()[i] in punct_list: sentence.append(original.split()[i]) elif original.split()[i][-1] in punct_list: temp = transliterated.split()[j] + original.split()[i][-1] sentence.append(temp) j = j+1 elif original.split()[i][-1] not in punct_list: temp = transliterated.split()[j] sentence.append(temp) j = j+1 transliterated_sentence = " ".join(sentence) transliterated_sentence.replace(' ... ','...') transliterated_sentence.replace('… ', '…') return transliterated_sentence def MNF_translate(text, dest_lang): result = translate_client.translate(text, target_language = dest_lang) translated_text = result['translatedText'] return translated_text def google_length_checker(t, temp_sentence, t0): if len(t.split()) >= len(temp_sentence.split()): return t elif len(t.split()) == len(temp_sentence.split())-1: final_t = t+ " " + t0.split()[-1] return final_t elif len(t.split()) == len(temp_sentence.split())-2: final_t = t+ " " + t0.split()[-2] + " " + t0.split()[-1] return final_t else: return t def Halant_remover(T3): if T3[-1] == "्": return T3[:-1] else: return T3 def dial_comparison_transliteration_rom_dev_ph1(text, source_lang, source_script, dest_script): source_lang = "hi" source_script = "Latin" dest_script = "Devanagari" sources_name = {'0':'Azure', '1':'indic_trans', '2':'google', '3':'indic_trans_IAST'} sentences=sentence_tokenize.sentence_split(text, lang='en') priority_list =['Azure', 'indic_trans', 'google', 'indic_trans_IAST', ] transliterated_text=[] for sentence in sentences: if sentence == "" or sentence == " . . ." or sentence == " . ." or sentence ==" . . ”": continue print(sentence) OUT=[] for word in sentence.split(): if word==".": continue print(word) t0 = azure_transliteration(word, source_lang, source_script, dest_script) #print(t0) t1 = indic_trans(word, source_script, dest_script) #print(t1) t2 = google(word, 'en', 'hi') #print(t2) t3 = indic_transliteration_IAST(word) #print(t3) outputs=[t0, t1, t2, t3] out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) OUT.append(out) transliterated_text.append(" ".join(OUT)) return " ".join(transliterated_text) def dial_comparison_transliteration_rom_dev_ph1_sentence_wise(text, source_lang, source_script, dest_script): source_lang = "hi" sources_name = {'0':'Azure', '1':'indic_trans', '2':'google', '3':'indic_trans_IAST'} etc_punctuation =["", " . . .", " . .", " . . ”"] sentences=sentence_tokenize.sentence_split(text, lang='en') priority_list =['Azure', 'indic_trans', 'google', 'indic_trans_IAST'] transliterated_text=[] for sentence in sentences: if sentence in etc_punctuation: continue print("original_sentence", sentence) temp_sentence = punct_remover(sentence) print("sentence_without_punctuation", temp_sentence) t00 = azure_transliteration(temp_sentence, source_lang, source_script, dest_script) print(t00) t11 = indic_trans(temp_sentence, source_script, dest_script) print(t11) t = google(temp_sentence, 'en', 'hi') #print("btw", t) t22 = google_length_checker(t, temp_sentence, t00) print("T22 transliteration",t22) t33 = indic_transliteration_IAST(temp_sentence) print(t33) Out= [] outputs = [] for i in range(len(temp_sentence.split())): word = temp_sentence.split()[i] # print("test of trasnliteration",i) T0 = t00.split()[i] # print("test of trasnliteration T0",T0) # print("test of trasnliteration T11",t11) T1 = t11.split()[i] # print("test of trasnliteration T1",T1) # print("test of trasnliteration T22",t22) T2 = t22.split()[i] # print("test of trasnliteration T2",T2) T3 = t33.split()[i] T3 = Halant_remover(T3) outputs=[T0, T1, T2, T3] out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) Out.append(out) trans_sent_wo_punct = " ".join(Out) print("trans_sent_wo_punct", trans_sent_wo_punct) transliterated_sentence = final_transliterated_sentence(sentence, trans_sent_wo_punct) print("trans_sent_w_punct", transliterated_sentence) transliterated_text.append(transliterated_sentence) return " ".join(transliterated_text) # def dial_comparison_transliteration_dev_rom_ph1(text, source_lang, source_script, dest_script): # sources_name = {'0':'indic_trans', '1':'Azure', '2':'libindic', '3':'sheetal', '4':'ritwik'} # sentences=sentence_tokenize.sentence_split(text, lang='hi') # priority_list =['indic_trans', 'Azure', 'ritwik', 'sheetal', 'libindic'] # transliterated_text=[] # for sentence in sentences: # if sentence == "" or sentence == " . . ." or sentence == " . ." or sentence ==" . . ”": # continue # print("original sentence", sentence) # temp_sentence = punct_remover(sentence) # print("sentence after punctuation", temp_sentence) # t0 = indic_trans(temp_sentence, source_script, dest_script) # #print(t0) # t1 = azure_transliteration(temp_sentence, source_lang, source_script, dest_script) # #print(t1) # t2 = libindic(temp_sentence, dest_script).rstrip() # #print(t2) # t3 = sheetal(temp_sentence).replace('\n','') # #print(t3) # t4 = ritwik(temp_sentence).replace('\n','').rstrip() # #print(t4) # Out= [] # outputs = [] # for i in range(len(temp_sentence.split())): # word = temp_sentence.split()[i] # T0 = t0.split()[i] # T1 = t1.split()[i] # T2 = t2.split()[i] # T3 = t3.split()[i] # T4 = t4.split()[i] # outputs=[T0, T1, T2, T3, T4] # out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) # Out.append(out) # trans_sent_wo_punct = " ".join(Out) # print("trans_sent_wo_punct", trans_sent_wo_punct) # transliterated_sentence = final_transliterated_sentence(sentence, trans_sent_wo_punct) # print("trans_sent_with_punct", transliterated_sentence) # transliterated_text.append(transliterated_sentence) # return " ".join(transliterated_text) def dial_comparison_transliteration_dev_rom_ph1(text, source_lang, source_script, dest_script): #sources_name = {'0':'indic_trans', '1':'Azure', '2':'libindic', '3':'sheetal', '4':'ritwik'} sources_name = {'0':'indic_trans', '1':'Azure', '2':'libindic', '3':'sheetal'} sentences=sentence_tokenize.sentence_split(text, lang='hi') #priority_list =['indic_trans', 'Azure', 'ritwik, 'sheetal', 'libindic'] priority_list =['indic_trans', 'Azure', 'sheetal', 'libindic'] transliterated_text=[] for sentence in sentences: if sentence == "" or sentence == " . . ." or sentence == " . ." or sentence ==" . . ”": continue print(sentence) OUT=[] for word in sentence.split(): if word==".": continue print(word) t0 = indic_trans(word, source_script, dest_script) #print(t0) t1 = azure_transliteration(word, source_lang, source_script, dest_script) #print(t1) t2 = libindic(word, dest_script).rstrip() #print(t2) t3 = sheetal(word).replace('\n','') #print(t3) #t4 = ritwik(word).replace('\n','').rstrip() #print(t4) # outputs=[t0, t1, t2, t3, t4] outputs=[t0, t1, t2, t3] out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) OUT.append(out) transliterated_text.append(" ".join(OUT)) return " ".join(transliterated_text) def dial_comparison_transliteration_arbic_to_rom_ph1(text, source_lang, source_script, dest_script): print("hello") sources_name = {'0':'indic_trans', '1':'Azure', '2':'buck_2_unicode'} sentences=sentence_tokenize.sentence_split(text, lang='en') priority_list =['indic_trans', 'Azure', 'buck_2_unicode' ] transliterated_text=[] for sentence in sentences: if sentence == "" or sentence == " . . ." or sentence == " . ." or sentence ==" . . ”": continue print(sentence) OUT=[] for word in sentence.split(): if word==".": continue print(word) t0 = indic_trans(word, source_script, dest_script) t1 = azure_transliteration(word, source_lang, source_script, dest_script) t2 = buck_2_unicode(word) outputs=[t0, t1, t2] out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) OUT.append(out) transliterated_text.append(" ".join(OUT)) return " ".join(transliterated_text) def dial_comparison_transliteration_kann_to_rom_ph1(text, source_lang, source_script, dest_script): print("hello") sources_name = {'0':'om_transliteration', '1':'indic_trans', '2':'libindic', '3':'Azure'} sentences=sentence_tokenize.sentence_split(text, lang='en') priority_list =['om_transliteration', 'indic_trans', 'libindic', 'Azure'] transliterated_text=[] for sentence in sentences: if sentence == "" or sentence == " . . ." or sentence == " . ." or sentence ==" . . ”": continue print(sentence) OUT=[] for word in sentence.split(): if word==".": continue print(word) t0 = om_transliterator(word) t1 = indic_trans(word, source_script, dest_script) t2 = libindic(word, dest_script) t3 = azure_transliteration(word, source_lang, source_script, dest_script) outputs=[t0, t1, t2, t3] out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) OUT.append(out) transliterated_text.append(" ".join(OUT)) return " ".join(transliterated_text) def dial_comparison_transliteration_tamil_to_rom_ph1(text, source_lang, source_script, dest_script): print("hello") sources_name = {'0':'Azure', '1':'libindic', '2':'indic_trans', } sentences=sentence_tokenize.sentence_split(text, lang='en') priority_list =['Azure', 'libindic', 'indic_trans'] transliterated_text=[] for sentence in sentences: if sentence == "" or sentence == " . . ." or sentence == " . ." or sentence ==" . . ”": continue print(sentence) OUT=[] for word in sentence.split(): if word==".": continue print(word) t0 = azure_transliteration(word, source_lang, source_script, dest_script) t2 = libindic(word, dest_script) t1 = indic_trans(word, source_script, dest_script) outputs=[t0, t1, t2] out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) OUT.append(out) transliterated_text.append(" ".join(OUT)) return " ".join(transliterated_text) def dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(text, source_lang, source_script, dest_script): print("hello gujarati to latin") sources_name = {'0':'Azure', '1':'indic_trans', '2':'libindic'} sentences=sentence_tokenize.sentence_split(text, lang='en') priority_list =['Azure', 'indic_trans', 'libindic'] transliterated_text=[] for sentence in sentences: if sentence == "" or sentence == " . . ." or sentence == " . ." or sentence ==" . . ”": continue #print(sentence) OUT=[] for word in sentence.split(): if word==".": continue print(word) t0 = azure_transliteration(word, source_lang, source_script, dest_script) t1 = indic_trans(word, source_script, dest_script) t2 = libindic(word, dest_script) outputs=[t0, t1, t2] out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) OUT.append(out) transliterated_text.append(" ".join(OUT)) return " ".join(transliterated_text) def dial_comparison_transliteration_latin_gurmukhi(text,source_lang, source_script, dest_script): source_lang = "pa" sources_name = {'0':'Azure', '1':'indic_trans', '2':'indic_trans_IAST'} etc_punctuation =["", " . . .", " . .", " . . ”"] sentences=sentence_tokenize.sentence_split(text, lang='en') priority_list =['Azure', 'indic_trans', 'indic_trans_IAST'] transliterated_text=[] for sentence in sentences: if sentence in etc_punctuation: continue temp_sentence = punct_remover(sentence) t00 = azure_transliteration(temp_sentence, source_lang, source_script, dest_script) t11 = indic_transliteration_GURMUKHI(temp_sentence) t22 = unicode_transliteration_GURMUKHI(temp_sentence) Out= [] outputs = [] for i in range(len(temp_sentence.split())): word = temp_sentence.split()[i] T0 = t00.split()[i] T1 = t11.split()[i] T2 = t22.split()[i] outputs=[T0, T1, T2] out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) Out.append(out) trans_sent_wo_punct = " ".join(Out) transliterated_sentence = final_transliterated_sentence(sentence, trans_sent_wo_punct) transliterated_text.append(transliterated_sentence) return " ".join(transliterated_text) def dial_comparison_transliteration_latin_cyrillic(text,source_lang, source_script, dest_script): source_lang = "bg" sources_name = {'0':'Azure', '1':'indic_trans'} etc_punctuation =["", " . . .", " . .", " . . ”"] sentences=sentence_tokenize.sentence_split(text, lang='en') priority_list =['Azure', 'indic_trans'] transliterated_text=[] for sentence in sentences: if sentence in etc_punctuation: continue temp_sentence = punct_remover(sentence) t00 = azure_transliteration(temp_sentence, source_lang, source_script, dest_script) t11 = transliteration_LATIN_CYRILLIC(temp_sentence) #t22 = polygot(text) Out= [] outputs = [] for i in range(len(temp_sentence.split())): word = temp_sentence.split()[i] T0 = t00.split()[i] T1 = t11.split()[i] #T2 = t22.split()[i] outputs=[T0, T1] #outputs=[T0, T1, T2] out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) Out.append(out) trans_sent_wo_punct = " ".join(Out) transliterated_sentence = final_transliterated_sentence(sentence, trans_sent_wo_punct) transliterated_text.append(transliterated_sentence) return " ".join(transliterated_text) def dial_comparison_transliteration_latin_telugu_sentence_wise(text, source_lang, source_script, dest_script): source_lang = "te" sources_name = {'0':'indic_trans', '1':'Azure'} priority_list =['indic_trans', 'Azure',] etc_punctuation =["", " . . .", " . .", " . . ”"] sentences=sentence_tokenize.sentence_split(text, lang='hi') transliterated_text=[] for sentence in sentences: #if sentence == "" or sentence == " . . ." or sentence == " . ." or sentence ==" . . ”": if sentence in etc_punctuation: continue print("original_sentence", sentence) temp_sentence = punct_remover(sentence) print("sentence_without_punctuation", temp_sentence) t0 = indic_transliteration_TELUGU(temp_sentence) print(t0) t1 = azure_transliteration(temp_sentence, source_lang, source_script, dest_script) print(t1) Out= [] outputs = [] for i in range(len(temp_sentence.split())): word = temp_sentence.split()[i] T0 = t0.split()[i] T1 = t1.split()[i] outputs=[T0, T1] out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) Out.append(out) trans_sent_wo_punct = " ".join(Out) print("trans_sent_wo_punct", trans_sent_wo_punct) transliterated_sentence = final_transliterated_sentence(sentence, trans_sent_wo_punct) print("trans_sent_w_punct", transliterated_sentence) transliterated_text.append(transliterated_sentence) return " ".join(transliterated_text) def dial_comparison_transliteration_gurmukhi_latin_sentence_wise(text, source_lang, source_script, dest_script): source_lang = "pa" sources_name = {'0':'indic_trans', '1':'Azure', '2':'unicode'} priority_list =['indic_trans', 'Azure', 'unicode'] etc_punctuation =["", " . . .", " . .", " . . ”"] sentences=sentence_tokenize.sentence_split(text, lang='hi') transliterated_text=[] for sentence in sentences: #if sentence == "" or sentence == " . . ." or sentence == " . ." or sentence ==" . . ”": if sentence in etc_punctuation: continue print("original_sentence", sentence) temp_sentence = punct_remover(sentence) print("sentence_without_punctuation", temp_sentence) t0 = indic_transliteration_GURMUKHI_LATIN(temp_sentence) print(t0) t1 = azure_transliteration(temp_sentence, source_lang, source_script, dest_script) print(t1) t2 = unicode_transliteration_GURMUKHI_LATIN(temp_sentence).rstrip() print(t2) Out= [] outputs = [] for i in range(len(temp_sentence.split())): word = temp_sentence.split()[i] T0 = t0.split()[i] T1 = t1.split()[i] T2 = t2.split()[i] outputs=[T0, T1, T2] out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) Out.append(out) trans_sent_wo_punct = " ".join(Out) print("trans_sent_wo_punct", trans_sent_wo_punct) transliterated_sentence = final_transliterated_sentence(sentence, trans_sent_wo_punct) print("trans_sent_w_punct", transliterated_sentence) transliterated_text.append(transliterated_sentence) return " ".join(transliterated_text) def dial_comparison_transliteration_cyrilic_latin_sentence_wise(text, source_lang, source_script, dest_script): print("cyrillic to latin") source_lang = "bg" sources_name = {'0':'indic_trans', '1':'Azure', '2':'unicode'} priority_list =['indic_trans', 'Azure', 'unicode'] etc_punctuation =["", " . . .", " . .", " . . ”"] sentences=sentence_tokenize.sentence_split(text, lang='hi') transliterated_text=[] for sentence in sentences: #if sentence == "" or sentence == " . . ." or sentence == " . ." or sentence ==" . . ”": if sentence in etc_punctuation: continue print("original_sentence", sentence) temp_sentence = punct_remover(sentence) print("sentence_without_punctuation", temp_sentence) t0 = azure_transliteration(temp_sentence, source_lang, source_script, dest_script) print("t0",t0) t1 = transliteration_CYRILIC_LATIN(temp_sentence) print("t1",t1) t2 = ConvertToLatin(temp_sentence) print("t2",t2) Out= [] outputs = [] for i in range(len(temp_sentence.split())): word = temp_sentence.split()[i] T0 = t0.split()[i] T1 = t1.split()[i] T2 = t2.split()[i] outputs=[T0, T1, T2] out = compare_outputs_transliteration(word, outputs, sources_name, priority_list) Out.append(out) trans_sent_wo_punct = " ".join(Out) print("trans_sent_wo_punct", trans_sent_wo_punct) transliterated_sentence = final_transliterated_sentence(sentence, trans_sent_wo_punct) print("trans_sent_w_punct", transliterated_sentence) transliterated_text.append(transliterated_sentence) return " ".join(transliterated_text) def compare_outputs_transliteration(word, outputs, sources_name, priority_list): #print(outputs) O1ANDS1, O2ANDS2 = selection_source_transliteration(sources_name, outputs, priority_list) print(O1ANDS1) add_dial_comparison_doc2_transliteration(doc2, table2, word, O1ANDS1, O2ANDS2, sources_name) return O1ANDS1[0] def add_dial_comparison_doc2_transliteration(doc2, table2, word, O1ANDS1, O2ANDS2, sources_name): row_Cells = table2.add_row().cells row_Cells[0].text= word row_Cells[1].text= O1ANDS1[0] row_Cells[1].paragraphs[0].add_run('(Source : '+str(O1ANDS1[1])+')') row_Cells[2].text= O2ANDS2[0] row_Cells[2].paragraphs[0].add_run('(Source : '+str(O2ANDS2[1])+')') #store original dialogues for dual dialogue format original_dialogues = [] for scene in tqdm(scenes1[:]): for i,line in enumerate(scene): if i == 0: continue if type(line)==type(""): continue else: print("In dialogue") [speaker] = line.keys() if speaker == 'Transition': continue #if line[speaker][0] != 'NONE': #cooment #continue # In parenthitical part # print("parenthitical", line[speaker][0]) if line[speaker][2] == "": continue print("dialogue", line[speaker][2]) original_dialogues.append(line[speaker][2]) print("length of dialogues", len(original_dialogues)) print(original_dialogues) #for detection for scene in tqdm(scenes): x = "False" y = "False" for i,line in enumerate(scene): if i == 0: continue if type(line)==type(""): x = "True" non_dial_src_lang = language_detector(line) else: y = "True" [speaker] = line.keys() if speaker == 'Transition': continue if line[speaker][0] != 'NONE': continue dial_src_lang = language_detector(line[speaker][2]) dial_src_script = script_det(line[speaker][2]) if x == "True" and y == "True": break print("non_dial_src_lang", non_dial_src_lang) print("dial_src_lang", dial_src_lang) print("dial_src_script", dial_src_script) #main for loop j = 0 for scene in tqdm(scenes[:]): for i,line in enumerate(scene): if i == 0: addSlugLine(doc,line) continue if type(line)==type(""): addActionLine(doc, line, non_dial_src_lang) else: print("In dialogue") [speaker] = line.keys() if speaker == 'Transition': addTransition(doc,line[speaker]) continue addSpeaker(doc,speaker) if line[speaker][0] != 'NONE': # In parenthitical part non_dial_translate = "no" if non_dial_translate == "yes": out = MNF_translate(line[speaker][0], non_dial_dest_lang) addParenthetical(doc,out) else: addParenthetical(doc,line[speaker][0]) print("dialogue to be transliterated ", line[speaker][2]) if line[speaker][2] == "": continue if dial_dest_script == "Latin" and dial_src_script == "Devanagari": trans_text = dial_comparison_transliteration_dev_rom_ph1(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script) elif dial_dest_script == "Devanagari" and dial_src_script == "Latin": trans_text = dial_comparison_transliteration_rom_dev_ph1(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script) #trans_text=dial_comparison_transliteration_rom_dev_ph1_sentence_wise(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script) elif dial_dest_script == "Latin" and dial_src_script == "Arabic": trans_text = dial_comparison_transliteration_arbic_to_rom_ph1(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script ) elif dial_dest_script == "Latin" and dial_src_script == "Kannada": trans_text = dial_comparison_transliteration_kann_to_rom_ph1(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script ) elif dial_dest_script == "Latin" and dial_src_script == "Tamil": trans_text = dial_comparison_transliteration_tamil_to_rom_ph1(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script ) elif dial_dest_script == "Latin" and dial_src_script == "Bengali": trans_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script ) elif dial_dest_script == "Latin" and dial_src_script == "Telugu": trans_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script ) elif dial_dest_script == "Latin" and dial_src_script == "Malayalam": trans_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script ) elif dial_dest_script == "Gurmukhi" and dial_src_script == "Latin": trans_text = dial_comparison_transliteration_latin_gurmukhi(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script ) elif dial_dest_script == "" and dial_src_script == "Cyrillic": trans_text = dial_comparison_transliteration_latin_cyrillic(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script ) elif dial_dest_script == "Telugu" and dial_src_script == "Latin": trans_text = dial_comparison_transliteration_latin_telugu_sentence_wise(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script ) elif dial_dest_script == "Latin" and dial_src_script == "Gurmukhi": trans_text = dial_comparison_transliteration_gurmukhi_latin_sentence_wise(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script ) elif dial_dest_script == "Latin" and dial_src_script == "Cyrillic": trans_text = dial_comparison_transliteration_cyrilic_latin_sentence_wise(line[speaker][2], dial_src_lang, dial_src_script, dial_dest_script ) elif dest_script == "Latin" and src_script == "Gujarati": print("Gujarti to latin hoga") trans_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(line[speaker][2], src_lang, src_script,dest_script ) elif dest_script == "Latin" and src_script == "Oriya": trans_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(line[speaker][2], src_lang, src_script,dest_script ) if dual_dial_script == "Yes": if translation_and_transliteration == "Yes": dual_script(doc, original_dialogues[j], trans_text, dial_src_lang) j=j+1 else: dual_script(doc, line[speaker][2], trans_text, dial_src_lang) else: addDialogue(doc, trans_text, dial_src_lang) doc.save(docfile) # file_tr = rf"{basePath}/media/scripts/Final_Table" +ss str(forFinal.split('.')[0])+"_trans_to_"+str(dial_dest_lang)+'_'+"final.docx" # doc2.save(file_tr) print('done file is saved') return docfile