from .translation_resources import google, aws, azure, yandex from nltk.tokenize import regexp_tokenize from .script_writing import default_script from narration.vectorcode.code.functions import ScriptBreakdown from .transliteration_resources import azure_transliteration, om_transliterator, libindic, indic_transliteration_IAST, indic_transliteration_ITRANS, sheetal, ritwik from .script_reading import breaksen, getRefined, getSlugAndNonSlug, getSpeakers, getScenes from .script_writing import addSlugLine, addActionLine, addSpeaker, addParenthetical, addDialogue, dual_script, addTransition, dial_checker, non_dial_checker from .selection_source import selection_source, function5, function41, function311, function221, function2111, function11111, selection_source_transliteration, two_sources_two_outputs from .translation_metric import manual_diff_score, bleu_diff_score, gleu_diff_score, meteor_diff_score, rouge_diff_score, diff_score, critera4_5 from .buck_2_unicode import buck_2_unicode from .script_detector import script_cat from google.cloud import translate_v2 as Translate from google.cloud import translate import os import sys import docx import re # import textract from tqdm import tqdm from collections import Counter import ntpath from docx.shared import Inches, Cm, Pt from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.enum.table import WD_TABLE_ALIGNMENT, WD_ALIGN_VERTICAL import requests import uuid import json import nltk.translate.bleu_score as bleu import nltk.translate.gleu_score as gleu from rouge_score import rouge_scorer import numpy as np import statistics from statistics import mode from indicnlp.tokenize import sentence_tokenize import nltk try: print("time9999") nltk.data.find('tokenizers/punkt') except LookupError: # nltk.download('punkt') pass try: nltk.data.find('wordnet') except LookupError: ###nltk.download('wordnet') print("error in finding wordnet6666666") from nltk.tokenize import sent_tokenize print("7777777") from .all_transliteration import all_transliteration print("88") from MNF.settings import BasePath basePath = BasePath() #basePath = '/home/user/mnf/project/MNF' # google # os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="gifted-mountain-318504-0a5f94cda0c8.json" #os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = rf"{basePath}/conversion/My First Project-2573112d5326.json" os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = rf"{basePath}/MNF/json_keys/authentication.json" # os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = rf"{basePath}/conversion/gifted-mountain-318504-4f001d5f08db.json" translate_client = Translate.Client() print("9999") client = translate.TranslationServiceClient() print("101010") project_id = 'authentic-bongo-272808' location = "global" parent = f"projects/{project_id}/locations/{location}" print("11111") def action_line_english(script_path): filename1 = script_path translation_list = ['en', 'ta', 'hi', 'ar', 'ur', 'kn', 'gu', 'bg', 'bn', 'te', 'ml', 'ru', 'sr', 'uk', 'hr', 'ga', 'sq', 'mr', 'fa', 'tr', 'hu', 'it', 'ro', 'pa', 'gu', 'or', 'zh-CN', 'zh-TW', 'ne', 'fr', 'es', 'id', 'el', 'ja', 'ko', 'be', 'uz', 'sd', 'af', 'de', 'is', 'ig', 'la', 'pt', 'my', 'th', 'su', 'lo', 'am', 'si', 'az', 'kk', 'mk', 'bs', 'ps', 'mg', 'ms', 'yo', 'cs', 'da', 'nl', 'tl', 'no', 'sl', 'sv', 'vi', 'cy', 'he', 'hy', 'km', 'ka', 'mn', 'ku', 'ky', 'tk', 'he', 'hy', 'km', 'ka', 'mn', 'ku', 'ky', 'tk', 'fi', 'ht', 'haw', 'lt', 'lb', 'mt', 'pl', 'eo', 'tt', 'ug', 'ha', 'so', 'sw', 'yi', 'eu', 'ca', 'ceb', 'co', 'et', 'fy', 'gl', 'hmn', 'rw', 'lv', 'mi', 'sm', 'gd', 'st', 'sn', 'sk', 'xh', 'zu'] # create an instance of a word document doc = docx.Document() doc_file = BasePath()+"/conversion/translation/translated/" + "actionline" + \ "trans" + '_of_' + ntpath.basename(filename1) print(doc_file) doc2 = docx.Document() sections = doc2.sections for section in sections: section.top_margin = Inches(0.2) section.bottom_margin = Inches(0.2) section.left_margin = Inches(0.2) section.right_margin = Inches(0.2) section = doc2.sections[-1] new_height = section.page_width section.page_width = section.page_height section.page_height = new_height name = 'Final table '+doc_file doc2.add_heading(name, 0) doc_para = doc2.add_paragraph() doc_para.add_run( 'Translation resources used : Google, IBM watson, AWS, Azure, Lingvanex, Yandex').bold = True table2 = doc2.add_table(rows=1, cols=4) table2.style = 'TableGrid' hdr_Cells = table2.rows[0].cells hdr_Cells[0].paragraphs[0].add_run("Input").bold = True hdr_Cells[1].paragraphs[0].add_run("Output1").bold = True hdr_Cells[2].paragraphs[0].add_run("Output2").bold = True hdr_Cells[3].paragraphs[0].add_run("Output3").bold = True # process the input script and return scenes refined, total_scenes = getRefined(filename1) # print(refined) # log.debug(refined) sluglines, without_slug = getSlugAndNonSlug(refined) # print(sluglines) # log.debug(sluglines) characters = getSpeakers(without_slug) # log.debug(characters) scenes, actionline, parenthetical_lis, speakers, dialogues = getScenes( refined, total_scenes, characters) # refined, total_scenes = ScriptBreakdown().getRefined(filename1) # sluglines, without_slug = ScriptBreakdown().getSlugAndNonSlug(refined) # characters = ScriptBreakdown().getSpeakers(without_slug) # scenes, actionline, parenthetical_lis, speakers, dialogues = ScriptBreakdown().getScenes( # refined, total_scenes, characters) print(scenes) # to detect the language def language_detector(text): result = translate_client.translate(text, target_language='hi') det_lang = result["detectedSourceLanguage"] return det_lang class myDict(dict): def __init__(self): self = dict() def add(self, key, value): self[key] = value def all_translator(sentence, source_lang, target_lang): i = 0 trans = myDict() sources_name = myDict() try: globals()['t%s' % i] = google(sentence, source_lang, target_lang) trans.add(str(i), globals()['t%s' % i]) sources_name.add(str(i), "GOOGLE") i = i+1 except: pass try: globals()['t%s' % i] = ibm_watson( sentence, source_lang, target_lang) trans.add(str(i), globals()['t%s' % i]) sources_name.add(str(i), "IBM_WATSON") i = i+1 except: pass try: globals()['t%s' % i] = aws(sentence, source_lang, target_lang) trans.add(str(i), globals()['t%s' % i]) sources_name.add(str(i), "AWS") i = i+1 except: pass try: globals()['t%s' % i] = azure(sentence, target_lang) trans.add(str(i), globals()['t%s' % i]) sources_name.add(str(i), "AZURE") i = i+1 except: pass try: globals()['t%s' % i] = lingvanex( sentence, source_lang, target_lang) trans.add(str(i), globals()['t%s' % i]) sources_name.add(str(i), "LINGVANEX") i = i+1 except: pass try: globals()['t%s' % i] = yandex(sentence, source_lang, target_lang) trans.add(str(i), globals()['t%s' % i]) sources_name.add(str(i), "YANDEX") i = i+1 except: pass trans_text = compare_outputs( sentence, trans["0"], trans, sources_name, target_lang) return trans_text def recursive_dots(Sentence, source_lang, target_lang): special_characters = ['....', '…', '. . .', '...'] translated_text = [] for i in special_characters: if i not in Sentence: continue Sentences = Sentence.split(i) for Sentence in Sentences: if Sentence == "" or Sentence == " ": continue if any(ext in Sentence for ext in special_characters): trans_text = translation_with_spcecial_dots( Sentence, source_lang, target_lang) else: if Sentence != Sentences[-1]: trans_text = all_translator( Sentence, source_lang, target_lang) + i else: trans_text = all_translator( Sentence, source_lang, target_lang) translated_text.append(trans_text) return " ".join(translated_text) def translation_with_spcecial_dots(Sentence, source_lang, target_lang): special_characters = ['....', '…', '. . .', '...'] translated_text = [] for ext in special_characters: if ext in Sentence: splitter = ext break Sentences = Sentence.split(splitter) for Sentence in Sentences: if Sentence == "" or Sentence == " ": continue if any(ext in Sentence for ext in special_characters): trans_text = recursive_dots(Sentence, source_lang, target_lang) else: if Sentence != Sentences[-1]: trans_text = all_translator( Sentence, source_lang, target_lang) + splitter else: trans_text = all_translator( Sentence, source_lang, target_lang) translated_text.append(trans_text) return " ".join(translated_text) def translate_comparison(text, source_lang, target_lang): sentences = sent_tokenize(text) special_characters = ['....', '…', '. . .', '...'] translated_text = [] for sentence in sentences: if any(ext in sentence for ext in special_characters): trans_text = translation_with_spcecial_dots( sentence, source_lang, target_lang) translated_text.append(trans_text) else: trans_text = all_translator(sentence, source_lang, target_lang) translated_text.append(trans_text) return " ".join(translated_text) def script_det(text): punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~“"”''' no_punct = "" for char in text: if char not in punctuations: no_punct = char break #print("alphabet", no_punct) script = script_cat(no_punct)[0] #print("script", script) return script def punct_remover(string): # punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।“”''' punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।1234567890“”"''' for x in string.lower(): if x in punctuations: string = string.replace(x, " ") return string def word_transliterate(sentence, dest_script): return sentence def final_out(output1, output2, output3, dest_lang): temp_output1 = punct_remover(output1) temp_output2 = punct_remover(output2) temp_output3 = punct_remover(output3) # for word in regexp_tokenize(output1, "[\w']+") for word in temp_output1.split(): #print("word", word) if script_det(word) != default_script[dest_lang]: for word in temp_output2.split(): if script_det(word) != default_script[dest_lang]: for word in temp_output3.split(): if script_det(word) != default_script[dest_lang]: # print("in3") output1 = word_transliterate( output1, default_script[dest_lang]) return output1 return output3 return output2 return output1 # take a sentence and give translated sentence by comparing outputs from different resources def compare_outputs(sentence, t0, trans, sources_name, target_lang): k = [] s = [] methods_name = {'0': 'MNF', '1': 'Gleu', '2': 'Meteor', '3': 'Rougen', '4': 'Rougel'} google_output = t0 #print("google", google_output) output1, source1 = manual_diff_score(trans, sources_name) #print("MNF", output1) output2, source2 = gleu_diff_score(trans, sources_name) #print("gleu", output2) output3, source3 = meteor_diff_score(trans, sources_name) #print("meteor", output3) output4, source4, output5, source5 = rouge_diff_score( trans, sources_name) #print("rougen", output4) #print("rougel", output5) if google_output == output1 == output2 == output3 == output4 == output5: #print("all output are same as google") return google_output else: if google_output != output1: k.append(output1) s.append(source1) else: k.append(" ") s.append(" ") if google_output != output2: k.append(output2) s.append(source2) else: k.append(" ") s.append(" ") if google_output != output3: k.append(output3) s.append(source3) else: k.append(" ") s.append(" ") if google_output != output4: k.append(output4) s.append(source4) else: k.append(" ") s.append(" ") if google_output != output5: k.append(output5) s.append(source5) else: k.append(" ") s.append(" ") k.insert(0, sentence) k.insert(1, google_output) s1ANDm1, s2ANDm2, s3ANDm3 = selection_source( s, sources_name, trans, methods_name) # print("s1", s1ANDm1) # print("s2", s2ANDm2) # print("s3", s3ANDm3) # print(s1ANDm1[0]) # print(sources_name) #add_dial_comparison_doc2(doc2, table2, sentence, s1ANDm1, s2ANDm2, s3ANDm3, sources_name, trans) for a, b in sources_name.items(): if b == s1ANDm1[0]: k = a output1 = trans[str(k)] if s2ANDm2[0] != "": for c, d in sources_name.items(): if d == s2ANDm2[0]: l = c output2 = trans[str(l)] else: output2 = output1 if s3ANDm3[0] != "": for e, f in sources_name.items(): if f == s3ANDm3[0]: m = e output3 = trans[str(m)] else: output3 = output1 # print("output1", output1) # print("output2", output2) # print("output3", output3) output = final_out(output1, output2, output3, target_lang) # print("output", output) return output # to return the table with best 3 outputs def add_dial_comparison_doc2(doc2, table2, sentence, s1ANDm1, s2ANDm2, s3ANDm3, sources_name, trans): row_Cells = table2.add_row().cells for a, b in sources_name.items(): if b == s1ANDm1[0]: k = a output1 = trans[str(k)] row_Cells[0].text = sentence row_Cells[1].text = output1 row_Cells[1].paragraphs[0].add_run('(Source : '+str(s1ANDm1[0])+')') row_Cells[1].paragraphs[0].add_run('(Methods : '+str(s1ANDm1[1])+')') if s2ANDm2[0] == "": row_Cells[2].text = "" else: for a, b in sources_name.items(): if b == s2ANDm2[0]: k = a output2 = trans[str(k)] row_Cells[2].text = output2 row_Cells[2].paragraphs[0].add_run( '(Source : '+str(s2ANDm2[0])+')') row_Cells[2].paragraphs[0].add_run( '(Methods : '+str(s2ANDm2[1])+')') if s3ANDm3[0] == "": row_Cells[3].text = "" else: for a, b in sources_name.items(): if b == s3ANDm3[0]: k = a output3 = trans[str(k)] row_Cells[3].text = output3 row_Cells[3].paragraphs[0].add_run( '(Source : '+str(s3ANDm3[0])+')') row_Cells[3].paragraphs[0].add_run( '(Methods : '+str(s3ANDm3[1])+')') def actionline_translation(text, non_dial_src_lang, non_dial_dest_lang): if non_dial_src_lang in translation_list and non_dial_dest_lang in translation_list: trans_text = translate_comparison( text, non_dial_src_lang, non_dial_dest_lang) addActionLine(doc, trans_text, non_dial_dest_lang) else: addActionLine(doc, text, non_dial_dest_lang) # def all_transliterator(text, source_script, dest_script): # return text count = 0 for scene in tqdm(scenes[:]): for i, line in enumerate(scene): if i == 0: continue if type(line) == type(""): if count == 0: non_dial_src_lang = language_detector(line) non_dial_script = script_det(line) count += 1 else: pass if count != 0: break print("non_dial_src_lang", non_dial_src_lang) print("non_dial_script", non_dial_script) non_dial_dest_lang = "en" for scene in tqdm(scenes[:]): for i, line in enumerate(scene): if i == 0: addSlugLine(doc, line) continue if type(line) == type(""): if non_dial_src_lang == non_dial_dest_lang: # print("here1") addActionLine(doc, line, non_dial_dest_lang) else: # print("here2") if non_dial_script == default_script[non_dial_src_lang]: # print("here3") actionline_translation( line, non_dial_src_lang, non_dial_dest_lang) else: transliterated_text = all_transliteration(line, script_det( non_dial_src_lang), default_script[non_dial_src_lang]) actionline_translation( transliterated_text, non_dial_src_lang, non_dial_dest_lang) else: [speaker] = line.keys() if speaker == 'Transition': addTransition(doc, line[speaker]) continue addSpeaker(doc, speaker) if line[speaker][0] != 'NONE': addParenthetical(doc, line[speaker][0]) if line[speaker][2] == "": continue addDialogue(doc, line[speaker][2], non_dial_dest_lang) doc.save(doc_file) return doc_file # doc2.save("....")