520 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			Python
		
	
	
	
		
		
			
		
	
	
			520 lines
		
	
	
		
			20 KiB
		
	
	
	
		
			Python
		
	
	
	
| 
								 | 
							
								from .translation_resources import google, aws, azure, yandex
							 | 
						||
| 
								 | 
							
								from nltk.tokenize import regexp_tokenize
							 | 
						||
| 
								 | 
							
								from .script_writing import default_script
							 | 
						||
| 
								 | 
							
								from narration.vectorcode.code.functions import ScriptBreakdown
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								from .transliteration_resources import azure_transliteration, om_transliterator, libindic, indic_transliteration_IAST, indic_transliteration_ITRANS, sheetal, ritwik
							 | 
						||
| 
								 | 
							
								from .script_reading import breaksen, getRefined, getSlugAndNonSlug, getSpeakers, getScenes
							 | 
						||
| 
								 | 
							
								from .script_writing import addSlugLine, addActionLine, addSpeaker, addParenthetical, addDialogue, dual_script, addTransition, dial_checker,  non_dial_checker
							 | 
						||
| 
								 | 
							
								from .selection_source import selection_source, function5, function41, function311, function221, function2111, function11111, selection_source_transliteration, two_sources_two_outputs
							 | 
						||
| 
								 | 
							
								from .translation_metric import manual_diff_score, bleu_diff_score, gleu_diff_score, meteor_diff_score, rouge_diff_score, diff_score, critera4_5
							 | 
						||
| 
								 | 
							
								from .buck_2_unicode import buck_2_unicode
							 | 
						||
| 
								 | 
							
								from .script_detector import script_cat
							 | 
						||
| 
								 | 
							
								from google.cloud import translate_v2 as Translate
							 | 
						||
| 
								 | 
							
								from google.cloud import translate
							 | 
						||
| 
								 | 
							
								import os
							 | 
						||
| 
								 | 
							
								import sys
							 | 
						||
| 
								 | 
							
								import docx
							 | 
						||
| 
								 | 
							
								import re
							 | 
						||
| 
								 | 
							
								# import textract
							 | 
						||
| 
								 | 
							
								from tqdm import tqdm
							 | 
						||
| 
								 | 
							
								from collections import Counter
							 | 
						||
| 
								 | 
							
								import ntpath
							 | 
						||
| 
								 | 
							
								from docx.shared import Inches, Cm, Pt
							 | 
						||
| 
								 | 
							
								from docx.enum.text import WD_ALIGN_PARAGRAPH
							 | 
						||
| 
								 | 
							
								from docx.enum.table import WD_TABLE_ALIGNMENT, WD_ALIGN_VERTICAL
							 | 
						||
| 
								 | 
							
								import requests
							 | 
						||
| 
								 | 
							
								import uuid
							 | 
						||
| 
								 | 
							
								import json
							 | 
						||
| 
								 | 
							
								import nltk.translate.bleu_score as bleu
							 | 
						||
| 
								 | 
							
								import nltk.translate.gleu_score as gleu
							 | 
						||
| 
								 | 
							
								from rouge_score import rouge_scorer
							 | 
						||
| 
								 | 
							
								import numpy as np
							 | 
						||
| 
								 | 
							
								import statistics
							 | 
						||
| 
								 | 
							
								from statistics import mode
							 | 
						||
| 
								 | 
							
								from indicnlp.tokenize import sentence_tokenize
							 | 
						||
| 
								 | 
							
								import nltk
							 | 
						||
| 
								 | 
							
								try:
							 | 
						||
| 
								 | 
							
								    print("time9999")
							 | 
						||
| 
								 | 
							
								    nltk.data.find('tokenizers/punkt')
							 | 
						||
| 
								 | 
							
								except LookupError:
							 | 
						||
| 
								 | 
							
								    # nltk.download('punkt')
							 | 
						||
| 
								 | 
							
								    pass
							 | 
						||
| 
								 | 
							
								try:
							 | 
						||
| 
								 | 
							
								    nltk.data.find('wordnet')
							 | 
						||
| 
								 | 
							
								except LookupError:
							 | 
						||
| 
								 | 
							
								    ###nltk.download('wordnet')
							 | 
						||
| 
								 | 
							
								    print("error in finding wordnet6666666")
							 | 
						||
| 
								 | 
							
								from nltk.tokenize import sent_tokenize
							 | 
						||
| 
								 | 
							
								print("7777777")
							 | 
						||
| 
								 | 
							
								from .all_transliteration import all_transliteration
							 | 
						||
| 
								 | 
							
								print("88")
							 | 
						||
| 
								 | 
							
								from MNF.settings import BasePath
							 | 
						||
| 
								 | 
							
								basePath = BasePath()
							 | 
						||
| 
								 | 
							
								#basePath = '/home/user/mnf/project/MNF'
							 | 
						||
| 
								 | 
							
								# google
							 | 
						||
| 
								 | 
							
								# os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="gifted-mountain-318504-0a5f94cda0c8.json"
							 | 
						||
| 
								 | 
							
								#os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = rf"{basePath}/conversion/My First Project-2573112d5326.json"
							 | 
						||
| 
								 | 
							
								os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = rf"{basePath}/MNF/json_keys/authentication.json"
							 | 
						||
| 
								 | 
							
								# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = rf"{basePath}/conversion/gifted-mountain-318504-4f001d5f08db.json"
							 | 
						||
| 
								 | 
							
								translate_client = Translate.Client()
							 | 
						||
| 
								 | 
							
								print("9999")
							 | 
						||
| 
								 | 
							
								client = translate.TranslationServiceClient()
							 | 
						||
| 
								 | 
							
								print("101010")
							 | 
						||
| 
								 | 
							
								project_id = 'authentic-bongo-272808'
							 | 
						||
| 
								 | 
							
								location = "global"
							 | 
						||
| 
								 | 
							
								parent = f"projects/{project_id}/locations/{location}"
							 | 
						||
| 
								 | 
							
								print("11111")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								def action_line_english(script_path):
							 | 
						||
| 
								 | 
							
								    filename1 = script_path
							 | 
						||
| 
								 | 
							
								    translation_list = ['en', 'ta', 'hi', 'ar', 'ur', 'kn', 'gu', 'bg', 'bn', 'te', 'ml', 'ru', 'sr', 'uk', 'hr', 'ga', 'sq', 'mr',
							 | 
						||
| 
								 | 
							
								                        'fa', 'tr', 'hu', 'it', 'ro', 'pa', 'gu', 'or', 'zh-CN', 'zh-TW', 'ne', 'fr', 'es', 'id', 'el', 'ja', 'ko', 'be', 'uz', 'sd', 'af', 'de', 'is',
							 | 
						||
| 
								 | 
							
								                        'ig', 'la', 'pt', 'my', 'th', 'su', 'lo', 'am', 'si', 'az', 'kk', 'mk', 'bs', 'ps', 'mg', 'ms', 'yo', 'cs', 'da', 'nl', 'tl', 'no', 'sl', 'sv',
							 | 
						||
| 
								 | 
							
								                        'vi', 'cy', 'he', 'hy', 'km', 'ka', 'mn', 'ku', 'ky', 'tk', 'he', 'hy', 'km', 'ka', 'mn', 'ku', 'ky', 'tk', 'fi', 'ht', 'haw', 'lt', 'lb', 'mt',
							 | 
						||
| 
								 | 
							
								                        'pl', 'eo', 'tt', 'ug', 'ha', 'so', 'sw', 'yi', 'eu', 'ca', 'ceb', 'co', 'et', 'fy', 'gl', 'hmn', 'rw', 'lv', 'mi', 'sm', 'gd', 'st', 'sn', 'sk',
							 | 
						||
| 
								 | 
							
								                        'xh', 'zu']
							 | 
						||
| 
								 | 
							
								    # create an instance of a word document
							 | 
						||
| 
								 | 
							
								    doc = docx.Document()
							 | 
						||
| 
								 | 
							
								    doc_file = BasePath()+"/conversion/translation/translated/" + "actionline" + \
							 | 
						||
| 
								 | 
							
								        "trans" + '_of_' + ntpath.basename(filename1)
							 | 
						||
| 
								 | 
							
								    print(doc_file)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    doc2 = docx.Document()
							 | 
						||
| 
								 | 
							
								    sections = doc2.sections
							 | 
						||
| 
								 | 
							
								    for section in sections:
							 | 
						||
| 
								 | 
							
								        section.top_margin = Inches(0.2)
							 | 
						||
| 
								 | 
							
								        section.bottom_margin = Inches(0.2)
							 | 
						||
| 
								 | 
							
								        section.left_margin = Inches(0.2)
							 | 
						||
| 
								 | 
							
								        section.right_margin = Inches(0.2)
							 | 
						||
| 
								 | 
							
								    section = doc2.sections[-1]
							 | 
						||
| 
								 | 
							
								    new_height = section.page_width
							 | 
						||
| 
								 | 
							
								    section.page_width = section.page_height
							 | 
						||
| 
								 | 
							
								    section.page_height = new_height
							 | 
						||
| 
								 | 
							
								    name = 'Final table '+doc_file
							 | 
						||
| 
								 | 
							
								    doc2.add_heading(name, 0)
							 | 
						||
| 
								 | 
							
								    doc_para = doc2.add_paragraph()
							 | 
						||
| 
								 | 
							
								    doc_para.add_run(
							 | 
						||
| 
								 | 
							
								        'Translation resources used : Google, IBM watson, AWS, Azure, Lingvanex, Yandex').bold = True
							 | 
						||
| 
								 | 
							
								    table2 = doc2.add_table(rows=1, cols=4)
							 | 
						||
| 
								 | 
							
								    table2.style = 'TableGrid'
							 | 
						||
| 
								 | 
							
								    hdr_Cells = table2.rows[0].cells
							 | 
						||
| 
								 | 
							
								    hdr_Cells[0].paragraphs[0].add_run("Input").bold = True
							 | 
						||
| 
								 | 
							
								    hdr_Cells[1].paragraphs[0].add_run("Output1").bold = True
							 | 
						||
| 
								 | 
							
								    hdr_Cells[2].paragraphs[0].add_run("Output2").bold = True
							 | 
						||
| 
								 | 
							
								    hdr_Cells[3].paragraphs[0].add_run("Output3").bold = True
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    # process the input script and return scenes
							 | 
						||
| 
								 | 
							
								    refined, total_scenes = getRefined(filename1)
							 | 
						||
| 
								 | 
							
								    # print(refined)
							 | 
						||
| 
								 | 
							
								    # log.debug(refined)
							 | 
						||
| 
								 | 
							
								    sluglines, without_slug = getSlugAndNonSlug(refined)
							 | 
						||
| 
								 | 
							
								    # print(sluglines)
							 | 
						||
| 
								 | 
							
								    # log.debug(sluglines)
							 | 
						||
| 
								 | 
							
								    characters = getSpeakers(without_slug)
							 | 
						||
| 
								 | 
							
								    # log.debug(characters)
							 | 
						||
| 
								 | 
							
								    scenes, actionline, parenthetical_lis, speakers, dialogues = getScenes(
							 | 
						||
| 
								 | 
							
								        refined, total_scenes, characters)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    # refined, total_scenes = ScriptBreakdown().getRefined(filename1)
							 | 
						||
| 
								 | 
							
								    # sluglines, without_slug = ScriptBreakdown().getSlugAndNonSlug(refined)
							 | 
						||
| 
								 | 
							
								    # characters = ScriptBreakdown().getSpeakers(without_slug)
							 | 
						||
| 
								 | 
							
								    # scenes, actionline, parenthetical_lis, speakers, dialogues = ScriptBreakdown().getScenes(
							 | 
						||
| 
								 | 
							
								    #     refined, total_scenes, characters)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    print(scenes)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    # to detect the language
							 | 
						||
| 
								 | 
							
								    def language_detector(text):
							 | 
						||
| 
								 | 
							
								        result = translate_client.translate(text, target_language='hi')
							 | 
						||
| 
								 | 
							
								        det_lang = result["detectedSourceLanguage"]
							 | 
						||
| 
								 | 
							
								        return det_lang
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    class myDict(dict):
							 | 
						||
| 
								 | 
							
								        def __init__(self):
							 | 
						||
| 
								 | 
							
								            self = dict()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        def add(self, key, value):
							 | 
						||
| 
								 | 
							
								            self[key] = value
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def all_translator(sentence, source_lang, target_lang):
							 | 
						||
| 
								 | 
							
								        i = 0
							 | 
						||
| 
								 | 
							
								        trans = myDict()
							 | 
						||
| 
								 | 
							
								        sources_name = myDict()
							 | 
						||
| 
								 | 
							
								        try:
							 | 
						||
| 
								 | 
							
								            globals()['t%s' % i] = google(sentence, source_lang, target_lang)
							 | 
						||
| 
								 | 
							
								            trans.add(str(i), globals()['t%s' % i])
							 | 
						||
| 
								 | 
							
								            sources_name.add(str(i), "GOOGLE")
							 | 
						||
| 
								 | 
							
								            i = i+1
							 | 
						||
| 
								 | 
							
								        except:
							 | 
						||
| 
								 | 
							
								            pass
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        try:
							 | 
						||
| 
								 | 
							
								            globals()['t%s' % i] = ibm_watson(
							 | 
						||
| 
								 | 
							
								                sentence, source_lang, target_lang)
							 | 
						||
| 
								 | 
							
								            trans.add(str(i), globals()['t%s' % i])
							 | 
						||
| 
								 | 
							
								            sources_name.add(str(i), "IBM_WATSON")
							 | 
						||
| 
								 | 
							
								            i = i+1
							 | 
						||
| 
								 | 
							
								        except:
							 | 
						||
| 
								 | 
							
								            pass
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        try:
							 | 
						||
| 
								 | 
							
								            globals()['t%s' % i] = aws(sentence, source_lang, target_lang)
							 | 
						||
| 
								 | 
							
								            trans.add(str(i), globals()['t%s' % i])
							 | 
						||
| 
								 | 
							
								            sources_name.add(str(i), "AWS")
							 | 
						||
| 
								 | 
							
								            i = i+1
							 | 
						||
| 
								 | 
							
								        except:
							 | 
						||
| 
								 | 
							
								            pass
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        try:
							 | 
						||
| 
								 | 
							
								            globals()['t%s' % i] = azure(sentence, target_lang)
							 | 
						||
| 
								 | 
							
								            trans.add(str(i), globals()['t%s' % i])
							 | 
						||
| 
								 | 
							
								            sources_name.add(str(i), "AZURE")
							 | 
						||
| 
								 | 
							
								            i = i+1
							 | 
						||
| 
								 | 
							
								        except:
							 | 
						||
| 
								 | 
							
								            pass
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        try:
							 | 
						||
| 
								 | 
							
								            globals()['t%s' % i] = lingvanex(
							 | 
						||
| 
								 | 
							
								                sentence, source_lang, target_lang)
							 | 
						||
| 
								 | 
							
								            trans.add(str(i), globals()['t%s' % i])
							 | 
						||
| 
								 | 
							
								            sources_name.add(str(i), "LINGVANEX")
							 | 
						||
| 
								 | 
							
								            i = i+1
							 | 
						||
| 
								 | 
							
								        except:
							 | 
						||
| 
								 | 
							
								            pass
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        try:
							 | 
						||
| 
								 | 
							
								            globals()['t%s' % i] = yandex(sentence, source_lang, target_lang)
							 | 
						||
| 
								 | 
							
								            trans.add(str(i), globals()['t%s' % i])
							 | 
						||
| 
								 | 
							
								            sources_name.add(str(i), "YANDEX")
							 | 
						||
| 
								 | 
							
								            i = i+1
							 | 
						||
| 
								 | 
							
								        except:
							 | 
						||
| 
								 | 
							
								            pass
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        trans_text = compare_outputs(
							 | 
						||
| 
								 | 
							
								            sentence, trans["0"], trans, sources_name, target_lang)
							 | 
						||
| 
								 | 
							
								        return trans_text
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def recursive_dots(Sentence, source_lang, target_lang):
							 | 
						||
| 
								 | 
							
								        special_characters = ['....', '…', '. . .', '...']
							 | 
						||
| 
								 | 
							
								        translated_text = []
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        for i in special_characters:
							 | 
						||
| 
								 | 
							
								            if i not in Sentence:
							 | 
						||
| 
								 | 
							
								                continue
							 | 
						||
| 
								 | 
							
								            Sentences = Sentence.split(i)
							 | 
						||
| 
								 | 
							
								            for Sentence in Sentences:
							 | 
						||
| 
								 | 
							
								                if Sentence == "" or Sentence == " ":
							 | 
						||
| 
								 | 
							
								                    continue
							 | 
						||
| 
								 | 
							
								                if any(ext in Sentence for ext in special_characters):
							 | 
						||
| 
								 | 
							
								                    trans_text = translation_with_spcecial_dots(
							 | 
						||
| 
								 | 
							
								                        Sentence, source_lang, target_lang)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								                else:
							 | 
						||
| 
								 | 
							
								                    if Sentence != Sentences[-1]:
							 | 
						||
| 
								 | 
							
								                        trans_text = all_translator(
							 | 
						||
| 
								 | 
							
								                            Sentence, source_lang, target_lang) + i
							 | 
						||
| 
								 | 
							
								                    else:
							 | 
						||
| 
								 | 
							
								                        trans_text = all_translator(
							 | 
						||
| 
								 | 
							
								                            Sentence, source_lang, target_lang)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								                translated_text.append(trans_text)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        return " ".join(translated_text)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def translation_with_spcecial_dots(Sentence, source_lang, target_lang):
							 | 
						||
| 
								 | 
							
								        special_characters = ['....', '…', '. . .', '...']
							 | 
						||
| 
								 | 
							
								        translated_text = []
							 | 
						||
| 
								 | 
							
								        for ext in special_characters:
							 | 
						||
| 
								 | 
							
								            if ext in Sentence:
							 | 
						||
| 
								 | 
							
								                splitter = ext
							 | 
						||
| 
								 | 
							
								                break
							 | 
						||
| 
								 | 
							
								        Sentences = Sentence.split(splitter)
							 | 
						||
| 
								 | 
							
								        for Sentence in Sentences:
							 | 
						||
| 
								 | 
							
								            if Sentence == "" or Sentence == " ":
							 | 
						||
| 
								 | 
							
								                continue
							 | 
						||
| 
								 | 
							
								            if any(ext in Sentence for ext in special_characters):
							 | 
						||
| 
								 | 
							
								                trans_text = recursive_dots(Sentence, source_lang, target_lang)
							 | 
						||
| 
								 | 
							
								            else:
							 | 
						||
| 
								 | 
							
								                if Sentence != Sentences[-1]:
							 | 
						||
| 
								 | 
							
								                    trans_text = all_translator(
							 | 
						||
| 
								 | 
							
								                        Sentence, source_lang, target_lang) + splitter
							 | 
						||
| 
								 | 
							
								                else:
							 | 
						||
| 
								 | 
							
								                    trans_text = all_translator(
							 | 
						||
| 
								 | 
							
								                        Sentence, source_lang, target_lang)
							 | 
						||
| 
								 | 
							
								            translated_text.append(trans_text)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        return " ".join(translated_text)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def translate_comparison(text, source_lang, target_lang):
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        sentences = sent_tokenize(text)
							 | 
						||
| 
								 | 
							
								        special_characters = ['....', '…', '. . .', '...']
							 | 
						||
| 
								 | 
							
								        translated_text = []
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        for sentence in sentences:
							 | 
						||
| 
								 | 
							
								            if any(ext in sentence for ext in special_characters):
							 | 
						||
| 
								 | 
							
								                trans_text = translation_with_spcecial_dots(
							 | 
						||
| 
								 | 
							
								                    sentence, source_lang, target_lang)
							 | 
						||
| 
								 | 
							
								                translated_text.append(trans_text)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								            else:
							 | 
						||
| 
								 | 
							
								                trans_text = all_translator(sentence, source_lang, target_lang)
							 | 
						||
| 
								 | 
							
								                translated_text.append(trans_text)
							 | 
						||
| 
								 | 
							
								        return " ".join(translated_text)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def script_det(text):
							 | 
						||
| 
								 | 
							
								        punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~“"”'''
							 | 
						||
| 
								 | 
							
								        no_punct = ""
							 | 
						||
| 
								 | 
							
								        for char in text:
							 | 
						||
| 
								 | 
							
								            if char not in punctuations:
							 | 
						||
| 
								 | 
							
								                no_punct = char
							 | 
						||
| 
								 | 
							
								                break
							 | 
						||
| 
								 | 
							
								        #print("alphabet", no_punct)
							 | 
						||
| 
								 | 
							
								        script = script_cat(no_punct)[0]
							 | 
						||
| 
								 | 
							
								        #print("script", script)
							 | 
						||
| 
								 | 
							
								        return script
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def punct_remover(string):
							 | 
						||
| 
								 | 
							
								        # punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।“”'''
							 | 
						||
| 
								 | 
							
								        punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।1234567890“”"'''
							 | 
						||
| 
								 | 
							
								        for x in string.lower():
							 | 
						||
| 
								 | 
							
								            if x in punctuations:
							 | 
						||
| 
								 | 
							
								                string = string.replace(x, " ")
							 | 
						||
| 
								 | 
							
								        return string
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def word_transliterate(sentence, dest_script):
							 | 
						||
| 
								 | 
							
								        return sentence
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def final_out(output1, output2, output3, dest_lang):
							 | 
						||
| 
								 | 
							
								        temp_output1 = punct_remover(output1)
							 | 
						||
| 
								 | 
							
								        temp_output2 = punct_remover(output2)
							 | 
						||
| 
								 | 
							
								        temp_output3 = punct_remover(output3)
							 | 
						||
| 
								 | 
							
								        # for word in regexp_tokenize(output1, "[\w']+")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        for word in temp_output1.split():
							 | 
						||
| 
								 | 
							
								            #print("word", word)
							 | 
						||
| 
								 | 
							
								            if script_det(word) != default_script[dest_lang]:
							 | 
						||
| 
								 | 
							
								                for word in temp_output2.split():
							 | 
						||
| 
								 | 
							
								                    if script_det(word) != default_script[dest_lang]:
							 | 
						||
| 
								 | 
							
								                        for word in temp_output3.split():
							 | 
						||
| 
								 | 
							
								                            if script_det(word) != default_script[dest_lang]:
							 | 
						||
| 
								 | 
							
								                                # print("in3")
							 | 
						||
| 
								 | 
							
								                                output1 = word_transliterate(
							 | 
						||
| 
								 | 
							
								                                    output1, default_script[dest_lang])
							 | 
						||
| 
								 | 
							
								                                return output1
							 | 
						||
| 
								 | 
							
								                        return output3
							 | 
						||
| 
								 | 
							
								                return output2
							 | 
						||
| 
								 | 
							
								        return output1
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    # take a sentence and give translated sentence by comparing outputs from different resources
							 | 
						||
| 
								 | 
							
								    def compare_outputs(sentence, t0, trans, sources_name, target_lang):
							 | 
						||
| 
								 | 
							
								        k = []
							 | 
						||
| 
								 | 
							
								        s = []
							 | 
						||
| 
								 | 
							
								        methods_name = {'0': 'MNF', '1': 'Gleu',
							 | 
						||
| 
								 | 
							
								                        '2': 'Meteor', '3': 'Rougen', '4': 'Rougel'}
							 | 
						||
| 
								 | 
							
								        google_output = t0
							 | 
						||
| 
								 | 
							
								        #print("google", google_output)
							 | 
						||
| 
								 | 
							
								        output1, source1 = manual_diff_score(trans, sources_name)
							 | 
						||
| 
								 | 
							
								        #print("MNF", output1)
							 | 
						||
| 
								 | 
							
								        output2, source2 = gleu_diff_score(trans, sources_name)
							 | 
						||
| 
								 | 
							
								        #print("gleu", output2)
							 | 
						||
| 
								 | 
							
								        output3, source3 = meteor_diff_score(trans, sources_name)
							 | 
						||
| 
								 | 
							
								        #print("meteor", output3)
							 | 
						||
| 
								 | 
							
								        output4, source4, output5, source5 = rouge_diff_score(
							 | 
						||
| 
								 | 
							
								            trans, sources_name)
							 | 
						||
| 
								 | 
							
								        #print("rougen", output4)
							 | 
						||
| 
								 | 
							
								        #print("rougel", output5)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if google_output == output1 == output2 == output3 == output4 == output5:
							 | 
						||
| 
								 | 
							
								            #print("all output are same as google")
							 | 
						||
| 
								 | 
							
								            return google_output
							 | 
						||
| 
								 | 
							
								        else:
							 | 
						||
| 
								 | 
							
								            if google_output != output1:
							 | 
						||
| 
								 | 
							
								                k.append(output1)
							 | 
						||
| 
								 | 
							
								                s.append(source1)
							 | 
						||
| 
								 | 
							
								            else:
							 | 
						||
| 
								 | 
							
								                k.append(" ")
							 | 
						||
| 
								 | 
							
								                s.append(" ")
							 | 
						||
| 
								 | 
							
								            if google_output != output2:
							 | 
						||
| 
								 | 
							
								                k.append(output2)
							 | 
						||
| 
								 | 
							
								                s.append(source2)
							 | 
						||
| 
								 | 
							
								            else:
							 | 
						||
| 
								 | 
							
								                k.append(" ")
							 | 
						||
| 
								 | 
							
								                s.append(" ")
							 | 
						||
| 
								 | 
							
								            if google_output != output3:
							 | 
						||
| 
								 | 
							
								                k.append(output3)
							 | 
						||
| 
								 | 
							
								                s.append(source3)
							 | 
						||
| 
								 | 
							
								            else:
							 | 
						||
| 
								 | 
							
								                k.append(" ")
							 | 
						||
| 
								 | 
							
								                s.append(" ")
							 | 
						||
| 
								 | 
							
								            if google_output != output4:
							 | 
						||
| 
								 | 
							
								                k.append(output4)
							 | 
						||
| 
								 | 
							
								                s.append(source4)
							 | 
						||
| 
								 | 
							
								            else:
							 | 
						||
| 
								 | 
							
								                k.append(" ")
							 | 
						||
| 
								 | 
							
								                s.append(" ")
							 | 
						||
| 
								 | 
							
								            if google_output != output5:
							 | 
						||
| 
								 | 
							
								                k.append(output5)
							 | 
						||
| 
								 | 
							
								                s.append(source5)
							 | 
						||
| 
								 | 
							
								            else:
							 | 
						||
| 
								 | 
							
								                k.append(" ")
							 | 
						||
| 
								 | 
							
								                s.append(" ")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								            k.insert(0, sentence)
							 | 
						||
| 
								 | 
							
								            k.insert(1, google_output)
							 | 
						||
| 
								 | 
							
								            s1ANDm1, s2ANDm2, s3ANDm3 = selection_source(
							 | 
						||
| 
								 | 
							
								                s, sources_name, trans, methods_name)
							 | 
						||
| 
								 | 
							
								            # print("s1", s1ANDm1)
							 | 
						||
| 
								 | 
							
								            # print("s2", s2ANDm2)
							 | 
						||
| 
								 | 
							
								            # print("s3", s3ANDm3)
							 | 
						||
| 
								 | 
							
								            # print(s1ANDm1[0])
							 | 
						||
| 
								 | 
							
								            # print(sources_name)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								            #add_dial_comparison_doc2(doc2, table2, sentence, s1ANDm1, s2ANDm2, s3ANDm3, sources_name, trans)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								            for a, b in sources_name.items():
							 | 
						||
| 
								 | 
							
								                if b == s1ANDm1[0]:
							 | 
						||
| 
								 | 
							
								                    k = a
							 | 
						||
| 
								 | 
							
								            output1 = trans[str(k)]
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								            if s2ANDm2[0] != "":
							 | 
						||
| 
								 | 
							
								                for c, d in sources_name.items():
							 | 
						||
| 
								 | 
							
								                    if d == s2ANDm2[0]:
							 | 
						||
| 
								 | 
							
								                        l = c
							 | 
						||
| 
								 | 
							
								                output2 = trans[str(l)]
							 | 
						||
| 
								 | 
							
								            else:
							 | 
						||
| 
								 | 
							
								                output2 = output1
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								            if s3ANDm3[0] != "":
							 | 
						||
| 
								 | 
							
								                for e, f in sources_name.items():
							 | 
						||
| 
								 | 
							
								                    if f == s3ANDm3[0]:
							 | 
						||
| 
								 | 
							
								                        m = e
							 | 
						||
| 
								 | 
							
								                output3 = trans[str(m)]
							 | 
						||
| 
								 | 
							
								            else:
							 | 
						||
| 
								 | 
							
								                output3 = output1
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								            # print("output1", output1)
							 | 
						||
| 
								 | 
							
								            # print("output2", output2)
							 | 
						||
| 
								 | 
							
								            # print("output3", output3)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								            output = final_out(output1, output2, output3, target_lang)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								            # print("output", output)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								            return output
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    # to return the table with best 3 outputs
							 | 
						||
| 
								 | 
							
								    def add_dial_comparison_doc2(doc2, table2, sentence, s1ANDm1, s2ANDm2, s3ANDm3, sources_name, trans):
							 | 
						||
| 
								 | 
							
								        row_Cells = table2.add_row().cells
							 | 
						||
| 
								 | 
							
								        for a, b in sources_name.items():
							 | 
						||
| 
								 | 
							
								            if b == s1ANDm1[0]:
							 | 
						||
| 
								 | 
							
								                k = a
							 | 
						||
| 
								 | 
							
								        output1 = trans[str(k)]
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        row_Cells[0].text = sentence
							 | 
						||
| 
								 | 
							
								        row_Cells[1].text = output1
							 | 
						||
| 
								 | 
							
								        row_Cells[1].paragraphs[0].add_run('(Source : '+str(s1ANDm1[0])+')')
							 | 
						||
| 
								 | 
							
								        row_Cells[1].paragraphs[0].add_run('(Methods : '+str(s1ANDm1[1])+')')
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if s2ANDm2[0] == "":
							 | 
						||
| 
								 | 
							
								            row_Cells[2].text = ""
							 | 
						||
| 
								 | 
							
								        else:
							 | 
						||
| 
								 | 
							
								            for a, b in sources_name.items():
							 | 
						||
| 
								 | 
							
								                if b == s2ANDm2[0]:
							 | 
						||
| 
								 | 
							
								                    k = a
							 | 
						||
| 
								 | 
							
								            output2 = trans[str(k)]
							 | 
						||
| 
								 | 
							
								            row_Cells[2].text = output2
							 | 
						||
| 
								 | 
							
								            row_Cells[2].paragraphs[0].add_run(
							 | 
						||
| 
								 | 
							
								                '(Source : '+str(s2ANDm2[0])+')')
							 | 
						||
| 
								 | 
							
								            row_Cells[2].paragraphs[0].add_run(
							 | 
						||
| 
								 | 
							
								                '(Methods : '+str(s2ANDm2[1])+')')
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if s3ANDm3[0] == "":
							 | 
						||
| 
								 | 
							
								            row_Cells[3].text = ""
							 | 
						||
| 
								 | 
							
								        else:
							 | 
						||
| 
								 | 
							
								            for a, b in sources_name.items():
							 | 
						||
| 
								 | 
							
								                if b == s3ANDm3[0]:
							 | 
						||
| 
								 | 
							
								                    k = a
							 | 
						||
| 
								 | 
							
								            output3 = trans[str(k)]
							 | 
						||
| 
								 | 
							
								            row_Cells[3].text = output3
							 | 
						||
| 
								 | 
							
								            row_Cells[3].paragraphs[0].add_run(
							 | 
						||
| 
								 | 
							
								                '(Source : '+str(s3ANDm3[0])+')')
							 | 
						||
| 
								 | 
							
								            row_Cells[3].paragraphs[0].add_run(
							 | 
						||
| 
								 | 
							
								                '(Methods : '+str(s3ANDm3[1])+')')
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def actionline_translation(text, non_dial_src_lang, non_dial_dest_lang):
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if non_dial_src_lang in translation_list and non_dial_dest_lang in translation_list:
							 | 
						||
| 
								 | 
							
								            trans_text = translate_comparison(
							 | 
						||
| 
								 | 
							
								                text, non_dial_src_lang, non_dial_dest_lang)
							 | 
						||
| 
								 | 
							
								            addActionLine(doc, trans_text, non_dial_dest_lang)
							 | 
						||
| 
								 | 
							
								        else:
							 | 
						||
| 
								 | 
							
								            addActionLine(doc, text, non_dial_dest_lang)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    # def all_transliterator(text, source_script, dest_script):
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    #     return text
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    count = 0
							 | 
						||
| 
								 | 
							
								    for scene in tqdm(scenes[:]):
							 | 
						||
| 
								 | 
							
								        for i, line in enumerate(scene):
							 | 
						||
| 
								 | 
							
								            if i == 0:
							 | 
						||
| 
								 | 
							
								                continue
							 | 
						||
| 
								 | 
							
								            if type(line) == type(""):
							 | 
						||
| 
								 | 
							
								                if count == 0:
							 | 
						||
| 
								 | 
							
								                    non_dial_src_lang = language_detector(line)
							 | 
						||
| 
								 | 
							
								                    non_dial_script = script_det(line)
							 | 
						||
| 
								 | 
							
								                    count += 1
							 | 
						||
| 
								 | 
							
								            else:
							 | 
						||
| 
								 | 
							
								                pass
							 | 
						||
| 
								 | 
							
								        if count != 0:
							 | 
						||
| 
								 | 
							
								            break
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    print("non_dial_src_lang", non_dial_src_lang)
							 | 
						||
| 
								 | 
							
								    print("non_dial_script", non_dial_script)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    non_dial_dest_lang = "en"
							 | 
						||
| 
								 | 
							
								    for scene in tqdm(scenes[:]):
							 | 
						||
| 
								 | 
							
								        for i, line in enumerate(scene):
							 | 
						||
| 
								 | 
							
								            if i == 0:
							 | 
						||
| 
								 | 
							
								                addSlugLine(doc, line)
							 | 
						||
| 
								 | 
							
								                continue
							 | 
						||
| 
								 | 
							
								            if type(line) == type(""):
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								                if non_dial_src_lang == non_dial_dest_lang:
							 | 
						||
| 
								 | 
							
								                    # print("here1")
							 | 
						||
| 
								 | 
							
								                    addActionLine(doc, line, non_dial_dest_lang)
							 | 
						||
| 
								 | 
							
								                else:
							 | 
						||
| 
								 | 
							
								                    # print("here2")
							 | 
						||
| 
								 | 
							
								                    if non_dial_script == default_script[non_dial_src_lang]:
							 | 
						||
| 
								 | 
							
								                        # print("here3")
							 | 
						||
| 
								 | 
							
								                        actionline_translation(
							 | 
						||
| 
								 | 
							
								                            line, non_dial_src_lang, non_dial_dest_lang)
							 | 
						||
| 
								 | 
							
								                    else:
							 | 
						||
| 
								 | 
							
								                        transliterated_text = all_transliteration(line, script_det(
							 | 
						||
| 
								 | 
							
								                            non_dial_src_lang), default_script[non_dial_src_lang])
							 | 
						||
| 
								 | 
							
								                        actionline_translation(
							 | 
						||
| 
								 | 
							
								                            transliterated_text, non_dial_src_lang, non_dial_dest_lang)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								            else:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								                [speaker] = line.keys()
							 | 
						||
| 
								 | 
							
								                if speaker == 'Transition':
							 | 
						||
| 
								 | 
							
								                    addTransition(doc, line[speaker])
							 | 
						||
| 
								 | 
							
								                    continue
							 | 
						||
| 
								 | 
							
								                addSpeaker(doc, speaker)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								                if line[speaker][0] != 'NONE':
							 | 
						||
| 
								 | 
							
								                    addParenthetical(doc, line[speaker][0])
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								                if line[speaker][2] == "":
							 | 
						||
| 
								 | 
							
								                    continue
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								                addDialogue(doc, line[speaker][2], non_dial_dest_lang)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    doc.save(doc_file)
							 | 
						||
| 
								 | 
							
								    return doc_file
							 | 
						||
| 
								 | 
							
								    # doc2.save("....")
							 | 
						||
| 
								 | 
							
								
							 |