899 lines
39 KiB
Python
899 lines
39 KiB
Python
|
from .all_transliteration import all_transliteration
|
||
|
from nltk.tokenize import regexp_tokenize
|
||
|
from .script_writing import default_script
|
||
|
from google.cloud import translate_v2 as Translate
|
||
|
from .translation_resources import ibm_watson, google, aws, azure, yandex, lingvanex
|
||
|
from .transliteration_resources import azure_transliteration, indic_trans, om_transliterator, libindic, indic_transliteration_IAST, indic_transliteration_ITRANS, sheetal
|
||
|
from .script_reading import breaksen, getRefined, getSlugAndNonSlug, getSpeakers, getScenes
|
||
|
from .script_writing import addSlugLine, addActionLine, addSpeaker, addParenthetical, addDialogue, dual_script, addTransition, dial_checker, non_dial_checker
|
||
|
from .selection_source import selection_source, function5, function41, function311, function221, function2111, function11111, selection_source_transliteration, two_sources_two_outputs
|
||
|
from .translation_metric import manual_diff_score, bleu_diff_score, gleu_diff_score, meteor_diff_score, rouge_diff_score, diff_score, critera4_5
|
||
|
from .buck_2_unicode import buck_2_unicode
|
||
|
from .script_detector import script_cat
|
||
|
from google.cloud import translate_v2 as Translate
|
||
|
from google.cloud import translate
|
||
|
from pymongo import MongoClient
|
||
|
from docx2pdf import convert
|
||
|
import subprocess
|
||
|
import os
|
||
|
import sys
|
||
|
import docx
|
||
|
import re
|
||
|
# import textract
|
||
|
from tqdm import tqdm
|
||
|
from collections import Counter
|
||
|
import ntpath
|
||
|
from docx.shared import Inches, Cm, Pt
|
||
|
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||
|
from docx.enum.table import WD_TABLE_ALIGNMENT, WD_ALIGN_VERTICAL
|
||
|
import requests
|
||
|
import uuid
|
||
|
import json
|
||
|
import nltk.translate.bleu_score as bleu
|
||
|
import nltk.translate.gleu_score as gleu
|
||
|
from rouge_score import rouge_scorer
|
||
|
import numpy as np
|
||
|
import statistics
|
||
|
from statistics import mode
|
||
|
from indicnlp.tokenize import sentence_tokenize
|
||
|
import nltk
|
||
|
from conversion.translation.detection import getInputs
|
||
|
from .final_transliteration_translation import makeTransliteration_translation
|
||
|
from .final_transliteration_only import makeTransliteration_only
|
||
|
import datetime
|
||
|
from pytz import timezone
|
||
|
|
||
|
#######
|
||
|
import numpy as np
|
||
|
import statistics
|
||
|
from statistics import mode
|
||
|
from indicnlp.tokenize import sentence_tokenize
|
||
|
import nltk
|
||
|
try:
|
||
|
print("time7777")
|
||
|
nltk.data.find('tokenizers/punkt')
|
||
|
except LookupError:
|
||
|
#nltk.download('punkt')
|
||
|
pass
|
||
|
try:
|
||
|
nltk.data.find('wordnet')
|
||
|
except LookupError:
|
||
|
###nltk.download('wordnet')
|
||
|
print("error in finding wordnet5555555")
|
||
|
from nltk.tokenize import sent_tokenize
|
||
|
|
||
|
|
||
|
# try: nltk.data.find('tokenizers/punkt')
|
||
|
# except LookupError: nltk.download('punkt')
|
||
|
# try: nltk.data.find('wordnet')
|
||
|
# except LookupError: ###nltk.download('wordnet')
|
||
|
# print("error in finding wordnet")
|
||
|
|
||
|
from MNF.settings import BasePath
|
||
|
basePath = BasePath()
|
||
|
|
||
|
# import logging
|
||
|
# log = logging.getLogger(__name__)
|
||
|
# log.info('Logger working')
|
||
|
|
||
|
# from logger import get_module_logger
|
||
|
# log = get_module_logger(__name__)
|
||
|
# log.info('Logger working')
|
||
|
|
||
|
|
||
|
def convert_to_pdf(input_docx, out_folder):
|
||
|
p = subprocess.Popen(['libreoffice', '--headless', '--convert-to', 'pdf', '--outdir',
|
||
|
out_folder, input_docx])
|
||
|
print(['--convert-to', 'pdf', input_docx])
|
||
|
p.communicate()
|
||
|
|
||
|
|
||
|
def translate_function(script_path, script_id, dial_conv_script, dial_src_lang, non_dial_src_lang, dial_src_script, restrict_to_five, option3, option4, option5, option6):
|
||
|
MongoDBUrl = "mongodb+srv://MNF:root@cluster0.gbkxi.gcp.mongodb.net/DB?retryWrites=true&w=majority"
|
||
|
global client, script_title, db
|
||
|
client = MongoClient(MongoDBUrl)
|
||
|
db = client.DB
|
||
|
#global client,script_title,db
|
||
|
# script_id = "scr_1612524497" # for testing and then we will pass it through cli
|
||
|
|
||
|
script_id = script_id
|
||
|
user_script_data = db["mnfapp_mnfscriptdatabase"].find_one(
|
||
|
{"script_id": script_id})
|
||
|
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = rf"{basePath}/MNF/json_keys/authentication.json"
|
||
|
# google
|
||
|
#os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = rf"{basePath}/conversion/translation/My First Project-2573112d5326.json"
|
||
|
# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = rf"{basePath}/conversion/gifted-mountain-318504-4f001d5f08db.json"
|
||
|
translate_client = Translate.Client()
|
||
|
client = translate.TranslationServiceClient()
|
||
|
project_id = 'authentic-bongo-272808'
|
||
|
location = "global"
|
||
|
parent = f"projects/{project_id}/locations/{location}"
|
||
|
|
||
|
doc = docx.Document()
|
||
|
filename1 = script_path
|
||
|
total_dial_src_lang = dial_src_lang
|
||
|
non_dial_src_lang = non_dial_src_lang
|
||
|
dial_src_script = dial_src_script
|
||
|
|
||
|
dial_dest_lang = user_script_data.get("dial_dest_language")
|
||
|
dial_dest_script = user_script_data.get("dial_dest_script")
|
||
|
non_dial_dest_lang = user_script_data.get("nondial_dest_language")
|
||
|
dial_conv_script = dial_conv_script
|
||
|
# print(dial_conv_script)
|
||
|
# print(dial_dest_script)
|
||
|
if dial_conv_script != dial_dest_script:
|
||
|
dual_dial_script = 'No'
|
||
|
else:
|
||
|
dual_dial_script = user_script_data.get("dual_dial_script") # Yes,No
|
||
|
|
||
|
x = datetime.datetime.now(timezone('UTC')).astimezone(
|
||
|
timezone('Asia/Kolkata'))
|
||
|
doc_file = basePath+"/media/scripts/translated/" + "trans_"+str(dial_dest_lang)+"_"+str(x.strftime("%d")) + "_" + str(
|
||
|
x.strftime("%b")) + "_" + str(x.strftime("%H")) + str(x.strftime("%I")) + "_" + "trans" + '_of_' + ntpath.basename(filename1)
|
||
|
# print(doc_file)
|
||
|
|
||
|
# Yes=Destination
|
||
|
# N0=Same
|
||
|
ui_option3_choice = option3
|
||
|
ui_option4_choice = option4
|
||
|
ui_option5_choice = option5
|
||
|
ui_option6_choice = option6
|
||
|
|
||
|
print(" ui_option3_choice", ui_option3_choice)
|
||
|
print(" ui_option4_choice", ui_option4_choice)
|
||
|
print(" ui_option5_choice", ui_option5_choice)
|
||
|
print(" ui_option6_choice", ui_option6_choice)
|
||
|
|
||
|
if (non_dial_src_lang != non_dial_dest_lang) or (total_dial_src_lang != dial_dest_lang):
|
||
|
print("non dialoge destination language", non_dial_dest_lang)
|
||
|
print("non dialoge source language", non_dial_src_lang)
|
||
|
print("total dialoge source language", total_dial_src_lang)
|
||
|
print("dialouge destination language", dial_dest_lang)
|
||
|
|
||
|
if non_dial_src_lang != non_dial_dest_lang:
|
||
|
global_non_dialogue_flag = "Yes"
|
||
|
else:
|
||
|
global_non_dialogue_flag = "No"
|
||
|
|
||
|
if total_dial_src_lang != dial_dest_lang:
|
||
|
global_dialogue_flag = "Yes"
|
||
|
else:
|
||
|
global_dialogue_flag = "No"
|
||
|
|
||
|
translation_list = ['en', 'ta', 'hi', 'ar', 'ur', 'kn', 'gu', 'bg', 'bn', 'te', 'ml', 'ru', 'sr', 'uk', 'hr', 'ga', 'sq', 'mr',
|
||
|
'fa', 'tr', 'hu', 'it', 'ro', 'pa', 'gu', 'or', 'zh-CN', 'zh-TW', 'ne', 'fr', 'es', 'id', 'el', 'ja', 'ko', 'be', 'uz', 'sd', 'af', 'de', 'is',
|
||
|
'ig', 'la', 'pt', 'my', 'th', 'su', 'lo', 'am', 'si', 'az', 'kk', 'mk', 'bs', 'ps', 'mg', 'ms', 'yo', 'cs', 'da', 'nl', 'tl', 'no', 'sl', 'sv',
|
||
|
'vi', 'cy', 'he', 'hy', 'km', 'ka', 'mn', 'ku', 'ky', 'tk', 'he', 'hy', 'km', 'ka', 'mn', 'ku', 'ky', 'tk', 'fi', 'ht', 'haw', 'lt', 'lb', 'mt',
|
||
|
'pl', 'eo', 'tt', 'ug', 'ha', 'so', 'sw', 'yi', 'eu', 'ca', 'ceb', 'co', 'et', 'fy', 'gl', 'hmn', 'rw', 'lv', 'mi', 'sm', 'gd', 'st', 'sn', 'sk',
|
||
|
'xh', 'zu', 'jv']
|
||
|
|
||
|
# doc2 = docx.Document()
|
||
|
# sections = doc2.sections
|
||
|
# for section in sections:
|
||
|
# section.top_margin = Inches(0.2)
|
||
|
# section.bottom_margin = Inches(0.2)
|
||
|
# section.left_margin = Inches(0.2)
|
||
|
# section.right_margin = Inches(0.2)
|
||
|
# section = doc2.sections[-1]
|
||
|
# new_height = section.page_width
|
||
|
# section.page_width = section.page_height
|
||
|
# section.page_height = new_height
|
||
|
# name = 'Final table '+doc_file
|
||
|
# doc2.add_heading(name, 0)
|
||
|
# doc_para = doc2.add_paragraph()
|
||
|
# doc_para.add_run('Translation resources used : Google, IBM watson, AWS, Azure, Lingvanex, Yandex').bold = True
|
||
|
# table2 = doc2.add_table(rows=1,cols=4)
|
||
|
# table2.style = 'TableGrid'
|
||
|
# hdr_Cells = table2.rows[0].cells
|
||
|
# hdr_Cells[0].paragraphs[0].add_run("Input").bold=True
|
||
|
# hdr_Cells[1].paragraphs[0].add_run("Output1").bold=True
|
||
|
# hdr_Cells[2].paragraphs[0].add_run("Output2").bold=True
|
||
|
# hdr_Cells[3].paragraphs[0].add_run("Output3").bold=True
|
||
|
|
||
|
# process the input script and return scenes
|
||
|
refined, total_scenes = getRefined(filename1)
|
||
|
print(refined)
|
||
|
# log.debug(refined)
|
||
|
sluglines, without_slug = getSlugAndNonSlug(refined)
|
||
|
print(sluglines)
|
||
|
# log.debug(sluglines)
|
||
|
characters = getSpeakers(without_slug)
|
||
|
# print(characters)
|
||
|
# log.debug(characters)
|
||
|
scenes, actionline, parenthetical_lis, speakers, dialogues = getScenes(
|
||
|
refined, total_scenes, characters)
|
||
|
# print(scenes)
|
||
|
|
||
|
# to detect the language
|
||
|
def language_detector(text):
|
||
|
result = translate_client.translate(text, target_language='hi')
|
||
|
det_lang = result["detectedSourceLanguage"]
|
||
|
return det_lang
|
||
|
|
||
|
class myDict(dict):
|
||
|
def __init__(self):
|
||
|
self = dict()
|
||
|
|
||
|
def add(self, key, value):
|
||
|
self[key] = value
|
||
|
|
||
|
def all_translator(sentence, source_lang, target_lang):
|
||
|
i = 0
|
||
|
trans = myDict()
|
||
|
sources_name = myDict()
|
||
|
try:
|
||
|
globals()['t%s' % i] = google(
|
||
|
sentence, source_lang, target_lang)
|
||
|
trans.add(str(i), globals()['t%s' % i])
|
||
|
sources_name.add(str(i), "GOOGLE")
|
||
|
i = i+1
|
||
|
except:
|
||
|
pass
|
||
|
|
||
|
try:
|
||
|
globals()['t%s' % i] = ibm_watson(
|
||
|
sentence, source_lang, target_lang)
|
||
|
trans.add(str(i), globals()['t%s' % i])
|
||
|
sources_name.add(str(i), "IBM_WATSON")
|
||
|
i = i+1
|
||
|
except:
|
||
|
pass
|
||
|
|
||
|
try:
|
||
|
globals()['t%s' % i] = aws(sentence, source_lang, target_lang)
|
||
|
trans.add(str(i), globals()['t%s' % i])
|
||
|
sources_name.add(str(i), "AWS")
|
||
|
i = i+1
|
||
|
except:
|
||
|
pass
|
||
|
|
||
|
try:
|
||
|
globals()['t%s' % i] = azure(sentence, target_lang)
|
||
|
trans.add(str(i), globals()['t%s' % i])
|
||
|
sources_name.add(str(i), "AZURE")
|
||
|
i = i+1
|
||
|
except:
|
||
|
pass
|
||
|
|
||
|
try:
|
||
|
globals()['t%s' % i] = lingvanex(
|
||
|
sentence, source_lang, target_lang)
|
||
|
trans.add(str(i), globals()['t%s' % i])
|
||
|
sources_name.add(str(i), "LINGVANEX")
|
||
|
i = i+1
|
||
|
except:
|
||
|
pass
|
||
|
|
||
|
try:
|
||
|
globals()['t%s' % i] = yandex(
|
||
|
sentence, source_lang, target_lang)
|
||
|
trans.add(str(i), globals()['t%s' % i])
|
||
|
sources_name.add(str(i), "YANDEX")
|
||
|
i = i+1
|
||
|
except:
|
||
|
pass
|
||
|
if len(sources_name) == 1:
|
||
|
trans_text = trans["0"]
|
||
|
else:
|
||
|
trans_text = compare_outputs(
|
||
|
sentence, trans["0"], trans, sources_name, target_lang)
|
||
|
return trans_text
|
||
|
|
||
|
def recursive_dots(Sentence, source_lang, target_lang):
|
||
|
special_characters = ['....', '…', '. . .', '...']
|
||
|
translated_text = []
|
||
|
|
||
|
for i in special_characters:
|
||
|
if i not in Sentence:
|
||
|
continue
|
||
|
Sentences = Sentence.split(i)
|
||
|
for Sentence in Sentences:
|
||
|
if Sentence == "" or Sentence == " ":
|
||
|
continue
|
||
|
if any(ext in Sentence for ext in special_characters):
|
||
|
trans_text = translation_with_spcecial_dots(
|
||
|
Sentence, source_lang, target_lang)
|
||
|
|
||
|
else:
|
||
|
if Sentence != Sentences[-1]:
|
||
|
trans_text = all_translator(
|
||
|
Sentence, source_lang, target_lang) + i
|
||
|
else:
|
||
|
trans_text = all_translator(
|
||
|
Sentence, source_lang, target_lang)
|
||
|
|
||
|
translated_text.append(trans_text)
|
||
|
|
||
|
return " ".join(translated_text)
|
||
|
|
||
|
def translation_with_spcecial_dots(Sentence, source_lang, target_lang):
|
||
|
special_characters = ['....', '…', '. . .', '...']
|
||
|
translated_text = []
|
||
|
for ext in special_characters:
|
||
|
if ext in Sentence:
|
||
|
splitter = ext
|
||
|
break
|
||
|
Sentences = Sentence.split(splitter)
|
||
|
|
||
|
for Sentence in Sentences:
|
||
|
if Sentence == "" or Sentence == " ":
|
||
|
continue
|
||
|
if any(ext in Sentence for ext in special_characters):
|
||
|
trans_text = recursive_dots(
|
||
|
Sentence, source_lang, target_lang)
|
||
|
|
||
|
else:
|
||
|
if Sentence != Sentences[-1]:
|
||
|
trans_text = all_translator(
|
||
|
Sentence, source_lang, target_lang) + splitter
|
||
|
else:
|
||
|
trans_text = all_translator(
|
||
|
Sentence, source_lang, target_lang)
|
||
|
|
||
|
translated_text.append(trans_text)
|
||
|
|
||
|
return " ".join(translated_text)
|
||
|
|
||
|
def translate_comparison(text, source_lang, target_lang):
|
||
|
|
||
|
sentences = sent_tokenize(text)
|
||
|
special_characters = ['....', '…', '. . .', '...']
|
||
|
translated_text = []
|
||
|
|
||
|
for sentence in sentences:
|
||
|
if any(ext in sentence for ext in special_characters):
|
||
|
trans_text = translation_with_spcecial_dots(
|
||
|
sentence, source_lang, target_lang)
|
||
|
translated_text.append(trans_text)
|
||
|
|
||
|
else:
|
||
|
trans_text = all_translator(
|
||
|
sentence, source_lang, target_lang)
|
||
|
translated_text.append(trans_text)
|
||
|
return " ".join(translated_text)
|
||
|
|
||
|
def script_det(text):
|
||
|
punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~“"”'''
|
||
|
no_punct = ""
|
||
|
for char in text:
|
||
|
if char not in punctuations:
|
||
|
no_punct = char
|
||
|
break
|
||
|
#print("alphabet", no_punct)
|
||
|
script = script_cat(no_punct)[0]
|
||
|
#print("script", script)
|
||
|
return script
|
||
|
|
||
|
def word_transliterate(sentence, dest_script):
|
||
|
return sentence
|
||
|
|
||
|
def final_out(output1, output2, output3, dest_lang):
|
||
|
# for word in output1.split():
|
||
|
for word in regexp_tokenize(output1, "[\w']+"):
|
||
|
if script_det(word) != default_script[dest_lang]:
|
||
|
for word in regexp_tokenize(output2, "[\w']+"):
|
||
|
if script_det(word) != default_script[dest_lang]:
|
||
|
for word in regexp_tokenize(output3, "[\w']+"):
|
||
|
if script_det(word) != default_script[dest_lang]:
|
||
|
# print("in3")
|
||
|
output1 = word_transliterate(
|
||
|
output1, default_script[dest_lang])
|
||
|
return output1
|
||
|
return output3
|
||
|
return output2
|
||
|
return output1
|
||
|
|
||
|
# take a sentence and give translated sentence by comparing outputs from different resources
|
||
|
def compare_outputs(sentence, t0, trans, sources_name, target_lang):
|
||
|
k = []
|
||
|
s = []
|
||
|
methods_name = {'0': 'MNF', '1': 'Gleu',
|
||
|
'2': 'Meteor', '3': 'Rougen', '4': 'Rougel'}
|
||
|
google_output = t0
|
||
|
#print("google", google_output)
|
||
|
output1, source1 = manual_diff_score(trans, sources_name)
|
||
|
#print("MNF", output1)
|
||
|
output2, source2 = gleu_diff_score(trans, sources_name)
|
||
|
#print("gleu", output2)
|
||
|
output3, source3 = meteor_diff_score(trans, sources_name)
|
||
|
#print("meteor", output3)
|
||
|
output4, source4, output5, source5 = rouge_diff_score(
|
||
|
trans, sources_name)
|
||
|
#print("rougen", output4)
|
||
|
#print("rougel", output5)
|
||
|
|
||
|
if google_output == output1 == output2 == output3 == output4 == output5:
|
||
|
#print("all output is same as google")
|
||
|
return google_output
|
||
|
else:
|
||
|
if google_output != output1:
|
||
|
k.append(output1)
|
||
|
s.append(source1)
|
||
|
else:
|
||
|
k.append(" ")
|
||
|
s.append(" ")
|
||
|
if google_output != output2:
|
||
|
k.append(output2)
|
||
|
s.append(source2)
|
||
|
else:
|
||
|
k.append(" ")
|
||
|
s.append(" ")
|
||
|
if google_output != output3:
|
||
|
k.append(output3)
|
||
|
s.append(source3)
|
||
|
else:
|
||
|
k.append(" ")
|
||
|
s.append(" ")
|
||
|
if google_output != output4:
|
||
|
k.append(output4)
|
||
|
s.append(source4)
|
||
|
else:
|
||
|
k.append(" ")
|
||
|
s.append(" ")
|
||
|
if google_output != output5:
|
||
|
k.append(output5)
|
||
|
s.append(source5)
|
||
|
else:
|
||
|
k.append(" ")
|
||
|
s.append(" ")
|
||
|
|
||
|
k.insert(0, sentence)
|
||
|
k.insert(1, google_output)
|
||
|
s1ANDm1, s2ANDm2, s3ANDm3 = selection_source(
|
||
|
s, sources_name, trans, methods_name)
|
||
|
#print("s1", s1ANDm1)
|
||
|
#print("s2", s2ANDm2)
|
||
|
#print("s3", s3ANDm3)
|
||
|
# print(s1ANDm1[0])
|
||
|
# print(sources_name)
|
||
|
|
||
|
#add_dial_comparison_doc1a(doc1a, table1a , k, s, s1ANDm1[0])
|
||
|
#add_dial_comparison_doc1b(doc1b, table1b , k, s, s1ANDm1[0])
|
||
|
#add_dial_comparison_doc2(doc2, table2, sentence, s1ANDm1, s2ANDm2, s3ANDm3, sources_name, trans)
|
||
|
|
||
|
for a, b in sources_name.items():
|
||
|
if b == s1ANDm1[0]:
|
||
|
k = a
|
||
|
output1 = trans[str(k)]
|
||
|
|
||
|
if s2ANDm2[0] != "":
|
||
|
for c, d in sources_name.items():
|
||
|
if d == s2ANDm2[0]:
|
||
|
l = c
|
||
|
output2 = trans[str(l)]
|
||
|
else:
|
||
|
output2 = output1
|
||
|
|
||
|
if s3ANDm3[0] != "":
|
||
|
for e, f in sources_name.items():
|
||
|
if f == s3ANDm3[0]:
|
||
|
m = e
|
||
|
output3 = trans[str(m)]
|
||
|
else:
|
||
|
output3 = output1
|
||
|
|
||
|
output = final_out(output1, output2, output3, target_lang)
|
||
|
|
||
|
return output
|
||
|
|
||
|
# to return the table with best 3 outputs
|
||
|
def add_dial_comparison_doc2(doc2, table2, sentence, s1ANDm1, s2ANDm2, s3ANDm3, sources_name, trans):
|
||
|
row_Cells = table2.add_row().cells
|
||
|
for a, b in sources_name.items():
|
||
|
# print(sources_name.items())
|
||
|
# print(b)
|
||
|
# print(s1ANDm1[0])
|
||
|
|
||
|
if b == s1ANDm1[0]:
|
||
|
k = a
|
||
|
output1 = trans[str(k)]
|
||
|
|
||
|
row_Cells[0].text = sentence
|
||
|
row_Cells[1].text = output1
|
||
|
row_Cells[1].paragraphs[0].add_run(
|
||
|
'(Source : '+str(s1ANDm1[0])+')')
|
||
|
row_Cells[1].paragraphs[0].add_run(
|
||
|
'(Methods : '+str(s1ANDm1[1])+')')
|
||
|
|
||
|
if s2ANDm2[0] == "":
|
||
|
row_Cells[2].text = ""
|
||
|
else:
|
||
|
for a, b in sources_name.items():
|
||
|
if b == s2ANDm2[0]:
|
||
|
k = a
|
||
|
output2 = trans[str(k)]
|
||
|
row_Cells[2].text = output2
|
||
|
row_Cells[2].paragraphs[0].add_run(
|
||
|
'(Source : '+str(s2ANDm2[0])+')')
|
||
|
row_Cells[2].paragraphs[0].add_run(
|
||
|
'(Methods : '+str(s2ANDm2[1])+')')
|
||
|
|
||
|
if s3ANDm3[0] == "":
|
||
|
row_Cells[3].text = ""
|
||
|
else:
|
||
|
for a, b in sources_name.items():
|
||
|
if b == s3ANDm3[0]:
|
||
|
k = a
|
||
|
output3 = trans[str(k)]
|
||
|
row_Cells[3].text = output3
|
||
|
row_Cells[3].paragraphs[0].add_run(
|
||
|
'(Source : '+str(s3ANDm3[0])+')')
|
||
|
row_Cells[3].paragraphs[0].add_run(
|
||
|
'(Methods : '+str(s3ANDm3[1])+')')
|
||
|
|
||
|
def ui_option3_and_4(dial_src_lang, dial_dest_lang, dialogue, ui_option_1st_choice):
|
||
|
line[speaker][2] = dialogue
|
||
|
print("dial_src_lang", dial_src_lang)
|
||
|
print("dial_dest_lang", dial_dest_lang)
|
||
|
|
||
|
if ui_option_1st_choice == "Yes":
|
||
|
print("in ui31")
|
||
|
dial_translate = dial_checker(dial_dest_lang, dial_src_lang)
|
||
|
if dial_translate:
|
||
|
print("in ui311")
|
||
|
if line[speaker][2] == "":
|
||
|
return
|
||
|
if dial_src_lang in translation_list and dial_dest_lang in translation_list:
|
||
|
trans_text = translate_comparison(
|
||
|
line[speaker][2], dial_src_lang, dial_dest_lang)
|
||
|
if dual_dial_script == "Yes":
|
||
|
dual_script(doc, line[speaker][2],
|
||
|
trans_text, dial_dest_lang)
|
||
|
else:
|
||
|
addDialogue(doc, trans_text, dial_dest_lang)
|
||
|
else:
|
||
|
print("in ui312")
|
||
|
if dual_dial_script == "Yes":
|
||
|
dual_script(doc, line[speaker][2],
|
||
|
line[speaker][2], dial_dest_lang)
|
||
|
else:
|
||
|
addDialogue(doc, line[speaker][2], dial_dest_lang)
|
||
|
|
||
|
else:
|
||
|
print("in ui32")
|
||
|
output = all_transliteration(line[speaker][2], script_det(
|
||
|
line[speaker][2]), default_script[dial_src_lang])
|
||
|
if dual_dial_script == "Yes":
|
||
|
# transliteration
|
||
|
dual_script(doc, line[speaker][2], output, dial_dest_lang)
|
||
|
else:
|
||
|
addDialogue(doc, output, dial_dest_lang)
|
||
|
|
||
|
def check_each_word1(sentence):
|
||
|
for word in sentence.split():
|
||
|
if language_detector(word) == non_dial_src_lang:
|
||
|
# print("here")
|
||
|
return "Yes"
|
||
|
return "No"
|
||
|
|
||
|
def check_each_word2(sentence):
|
||
|
for word in sentence.split():
|
||
|
if language_detector(word) != non_dial_src_lang and language_detector(word) != total_dial_src_lang:
|
||
|
return "Yes"
|
||
|
return "No"
|
||
|
|
||
|
def word_replacement(sentence, trans_text, lang, dest_lang):
|
||
|
words = [word for word in sentence.split(
|
||
|
) if language_detector(word) == lang]
|
||
|
# print(words)
|
||
|
#words = ['फ्रेम', 'टेररिस्ट', '(फोकस)']
|
||
|
translate = []
|
||
|
transliterate = []
|
||
|
for word in words:
|
||
|
translate.append(google(word, '', dest_lang))
|
||
|
transliterate.append(google(word, lang, dest_lang))
|
||
|
for i in range(len(translate)):
|
||
|
if translate[i] in trans_text:
|
||
|
trans_text = trans_text.replace(
|
||
|
translate[i], transliterate[i])
|
||
|
return trans_text
|
||
|
|
||
|
def ui_option5_translate_comparison(text, source_lang, target_lang):
|
||
|
sentences = sent_tokenize(text)
|
||
|
special_characters = ['....', '…', '. . .', '...']
|
||
|
translated_text = []
|
||
|
|
||
|
for sentence in sentences:
|
||
|
|
||
|
# check for sentence
|
||
|
if language_detector(sentence) == non_dial_src_lang: # option5
|
||
|
|
||
|
if ui_option5_choice == "Yes":
|
||
|
print("in ui 51A")
|
||
|
if any(ext in sentence for ext in special_characters):
|
||
|
trans_text = translation_with_spcecial_dots(
|
||
|
sentence, source_lang, target_lang)
|
||
|
translated_text.append(trans_text)
|
||
|
else:
|
||
|
trans_text = all_translator(
|
||
|
sentence, source_lang, target_lang)
|
||
|
translated_text.append(trans_text)
|
||
|
else:
|
||
|
print("in ui 51B")
|
||
|
# transliterate(same)
|
||
|
source_lang = language_detector(sentence)
|
||
|
transliterated_text = all_transliteration(
|
||
|
sentence, script_det(sentence), default_script[source_lang])
|
||
|
translated_text.append(transliterated_text)
|
||
|
|
||
|
# option6
|
||
|
elif language_detector(sentence) != total_dial_src_lang and language_detector(sentence) != non_dial_src_lang:
|
||
|
print("in ui 513")
|
||
|
if ui_option6_choice == "Yes":
|
||
|
if any(ext in sentence for ext in special_characters):
|
||
|
trans_text = translation_with_spcecial_dots(
|
||
|
sentence, source_lang, target_lang)
|
||
|
translated_text.append(trans_text)
|
||
|
else:
|
||
|
trans_text = all_translator(
|
||
|
sentence, source_lang, target_lang)
|
||
|
translated_text.append(trans_text)
|
||
|
else:
|
||
|
# transliterate
|
||
|
source_lang = language_detector(sentence)
|
||
|
transliterated_text = all_transliteration(
|
||
|
sentence, script_det(sentence), default_script[source_lang])
|
||
|
translated_text.append(transliterated_text)
|
||
|
|
||
|
# option5&6
|
||
|
# for some words are in actionline language
|
||
|
elif language_detector(sentence) == total_dial_src_lang:
|
||
|
print("5word")
|
||
|
# option5 for actionline
|
||
|
if check_each_word1(sentence) == "Yes":
|
||
|
print("5word1")
|
||
|
trans_text = translate_comparison(
|
||
|
sentence, source_lang, target_lang)
|
||
|
|
||
|
if ui_option5_choice == "Yes":
|
||
|
translated_text.append(trans_text)
|
||
|
else:
|
||
|
trans_text = word_replacement(
|
||
|
sentence, trans_text, non_dial_src_lang, target_lang)
|
||
|
translated_text.append(trans_text)
|
||
|
|
||
|
# option6 for others
|
||
|
elif check_each_word2(sentence) == "Yes":
|
||
|
# print("5word2")
|
||
|
trans_text = translate_comparison(
|
||
|
sentence, source_lang, target_lang)
|
||
|
|
||
|
if ui_option6_choice == "Yes":
|
||
|
translated_text.append(trans_text)
|
||
|
else:
|
||
|
trans_text = word_replacement(
|
||
|
sentence, trans_text, non_dial_src_lang, target_lang)
|
||
|
translated_text.append(trans_text)
|
||
|
|
||
|
# others
|
||
|
else:
|
||
|
# print("5122")
|
||
|
trans_text = translate_comparison(
|
||
|
sentence, source_lang, target_lang)
|
||
|
#print("here", trans_text)
|
||
|
translated_text.append(trans_text)
|
||
|
|
||
|
return " ".join(translated_text)
|
||
|
|
||
|
def ui_option5_and_6(dial_src_lang, dial_dest_lang, dialogue):
|
||
|
line[speaker][2] = dialogue
|
||
|
if line[speaker][2] == "":
|
||
|
return
|
||
|
|
||
|
dial_translate = dial_checker(dial_dest_lang, dial_src_lang)
|
||
|
if dial_translate:
|
||
|
print("in 51")
|
||
|
if dial_src_lang in translation_list and dial_dest_lang in translation_list:
|
||
|
trans_text = ui_option5_translate_comparison(
|
||
|
line[speaker][2], dial_src_lang, dial_dest_lang)
|
||
|
|
||
|
if dual_dial_script == "Yes":
|
||
|
dual_script(doc, line[speaker][2],
|
||
|
trans_text, dial_dest_lang)
|
||
|
else:
|
||
|
addDialogue(doc, trans_text, dial_dest_lang)
|
||
|
else:
|
||
|
print("in 52")
|
||
|
if dual_dial_script == "Yes":
|
||
|
dual_script(doc, line[speaker][2],
|
||
|
line[speaker][2], dial_dest_lang)
|
||
|
else:
|
||
|
addDialogue(doc, line[speaker][2], dial_dest_lang)
|
||
|
|
||
|
if restrict_to_five == 'yes':
|
||
|
for scene in tqdm(scenes[:5]):
|
||
|
for i, line in enumerate(scene):
|
||
|
if i == 0:
|
||
|
addSlugLine(doc, line)
|
||
|
continue
|
||
|
if type(line) == type(""):
|
||
|
# global_non_dialogue_flag is for checking dial_src_lang is not same as dial_dest_lang
|
||
|
if global_non_dialogue_flag == "Yes":
|
||
|
|
||
|
print("in actionline")
|
||
|
if non_dial_src_lang in translation_list and non_dial_dest_lang in translation_list:
|
||
|
trans_text = translate_comparison(
|
||
|
line, non_dial_src_lang, non_dial_dest_lang)
|
||
|
addActionLine(doc, trans_text,
|
||
|
non_dial_dest_lang)
|
||
|
else:
|
||
|
print("Adding actionline without translating")
|
||
|
addActionLine(doc, line, non_dial_dest_lang)
|
||
|
|
||
|
else:
|
||
|
|
||
|
print(
|
||
|
"Adding actionline without translating:else of global non dialogue flag")
|
||
|
addActionLine(doc, line, non_dial_dest_lang)
|
||
|
|
||
|
else:
|
||
|
# print(line)
|
||
|
[speaker] = line.keys()
|
||
|
# print([speaker])
|
||
|
if speaker == 'Transition':
|
||
|
addTransition(doc, line[speaker])
|
||
|
continue
|
||
|
addSpeaker(doc, speaker)
|
||
|
|
||
|
# dialogue block starts
|
||
|
if global_dialogue_flag == "Yes":
|
||
|
print("In dialogue")
|
||
|
|
||
|
dial_src_lang = language_detector(line[speaker][2])
|
||
|
print("dial_src_lang", dial_src_lang)
|
||
|
|
||
|
if line[speaker][0] != 'NONE':
|
||
|
#print("parenthitical", line[speaker][0])
|
||
|
par_lang = language_detector(line[speaker][0])
|
||
|
#out = google(line[speaker][0], par_lang, dial_dest_lang)
|
||
|
if(par_lang == dial_dest_lang):
|
||
|
out = line[speaker][0]
|
||
|
else:
|
||
|
out = google(
|
||
|
line[speaker][0], par_lang, dial_dest_lang)
|
||
|
addParenthetical(doc, out)
|
||
|
|
||
|
# only dialogues
|
||
|
if line[speaker][2] == "":
|
||
|
continue
|
||
|
|
||
|
print("dialogue", line[speaker][2])
|
||
|
#print("total_dial_src_lang", total_dial_src_lang)
|
||
|
|
||
|
if dial_src_lang == non_dial_src_lang:
|
||
|
print("in case ui3")
|
||
|
ui_option3_and_4(
|
||
|
dial_src_lang, dial_dest_lang, line[speaker][2], ui_option3_choice)
|
||
|
|
||
|
elif dial_src_lang != non_dial_src_lang and dial_src_lang != total_dial_src_lang:
|
||
|
print("in case ui4")
|
||
|
ui_option3_and_4(
|
||
|
dial_src_lang, dial_dest_lang, line[speaker][2], ui_option4_choice)
|
||
|
|
||
|
elif dial_src_lang == total_dial_src_lang:
|
||
|
print("in case ui5_and_6")
|
||
|
ui_option5_and_6(
|
||
|
dial_src_lang, dial_dest_lang, line[speaker][2])
|
||
|
|
||
|
else:
|
||
|
addParenthetical(doc, line[speaker][0])
|
||
|
addDialogue(doc, line[speaker][2], dial_dest_lang)
|
||
|
|
||
|
else:
|
||
|
for scene in tqdm(scenes[:]):
|
||
|
for i, line in enumerate(scene):
|
||
|
if i == 0:
|
||
|
addSlugLine(doc, line)
|
||
|
continue
|
||
|
if type(line) == type(""):
|
||
|
|
||
|
if global_non_dialogue_flag == "Yes":
|
||
|
print("in actionline")
|
||
|
if non_dial_src_lang in translation_list and non_dial_dest_lang in translation_list:
|
||
|
trans_text = translate_comparison(
|
||
|
line, non_dial_src_lang, non_dial_dest_lang)
|
||
|
addActionLine(doc, trans_text,
|
||
|
non_dial_dest_lang)
|
||
|
else:
|
||
|
addActionLine(doc, line, non_dial_dest_lang)
|
||
|
|
||
|
else:
|
||
|
addActionLine(doc, line, non_dial_dest_lang)
|
||
|
|
||
|
else:
|
||
|
# print(line)
|
||
|
[speaker] = line.keys()
|
||
|
# print([speaker])
|
||
|
if speaker == 'Transition':
|
||
|
addTransition(doc, line[speaker])
|
||
|
continue
|
||
|
addSpeaker(doc, speaker)
|
||
|
|
||
|
# dialogue block starts
|
||
|
if global_dialogue_flag == "Yes":
|
||
|
print("In dialogue")
|
||
|
|
||
|
dial_src_lang = language_detector(line[speaker][2])
|
||
|
print("dial_src_lang", dial_src_lang)
|
||
|
|
||
|
if line[speaker][0] != 'NONE':
|
||
|
#print("parenthitical", line[speaker][0])
|
||
|
par_lang = language_detector(line[speaker][0])
|
||
|
if(dial_dest_lang == par_lang):
|
||
|
out = line[speaker][0]
|
||
|
#out = google(line[speaker][0], par_lang, dial_dest_lang)
|
||
|
else:
|
||
|
out = google(
|
||
|
line[speaker][0], par_lang, dial_dest_lang)
|
||
|
addParenthetical(doc, out)
|
||
|
|
||
|
# only dialogues
|
||
|
if line[speaker][2] == "":
|
||
|
continue
|
||
|
|
||
|
print("dialogue", line[speaker][2])
|
||
|
#print("total_dial_src_lang", total_dial_src_lang)
|
||
|
|
||
|
if dial_src_lang == non_dial_src_lang:
|
||
|
print("in case ui3")
|
||
|
ui_option3_and_4(
|
||
|
dial_src_lang, dial_dest_lang, line[speaker][2], ui_option3_choice)
|
||
|
|
||
|
elif dial_src_lang != non_dial_src_lang and dial_src_lang != total_dial_src_lang:
|
||
|
print("in case ui4")
|
||
|
ui_option3_and_4(
|
||
|
dial_src_lang, dial_dest_lang, line[speaker][2], ui_option4_choice)
|
||
|
|
||
|
elif dial_src_lang == total_dial_src_lang:
|
||
|
print("in case ui5_and_6")
|
||
|
ui_option5_and_6(
|
||
|
dial_src_lang, dial_dest_lang, line[speaker][2])
|
||
|
|
||
|
else:
|
||
|
if line[speaker][0] != 'NONE':
|
||
|
addParenthetical(doc, line[speaker][0])
|
||
|
addDialogue(doc, line[speaker][2], dial_dest_lang)
|
||
|
|
||
|
doc.save(doc_file)
|
||
|
|
||
|
formInput = getInputs(doc_file)
|
||
|
dial_src_script = formInput[2]
|
||
|
|
||
|
if dial_conv_script != dial_dest_script:
|
||
|
print(dial_conv_script)
|
||
|
print(dial_dest_script)
|
||
|
print(dual_dial_script)
|
||
|
dual_dial_script = user_script_data.get(
|
||
|
"dual_dial_script") # Yes,No
|
||
|
print("this dual dial script for transliteration. :", dual_dial_script)
|
||
|
print("transliteration_translation started")
|
||
|
makeTransliteration_translation(
|
||
|
"Yes", doc_file, dial_dest_script, dual_dial_script, filename1)
|
||
|
else:
|
||
|
if dial_src_script != dial_dest_script:
|
||
|
print(dial_src_script)
|
||
|
print(dial_dest_script)
|
||
|
print(total_dial_src_lang)
|
||
|
print(dial_dest_lang)
|
||
|
|
||
|
print(dual_dial_script)
|
||
|
dual_dial_script = user_script_data.get(
|
||
|
"dual_dial_script") # Yes,No
|
||
|
print("this dual dial script for transliteration. :", dual_dial_script)
|
||
|
print("transliteration started")
|
||
|
makeTransliteration_only(
|
||
|
"Yes", dial_dest_script, dual_dial_script, filename1, dial_dest_lang)
|
||
|
|
||
|
if (user_script_data != None):
|
||
|
convert_to_pdf(
|
||
|
doc_file, rf'{basePath}/media/scripts/translated')
|
||
|
saveFile = doc_file.split('.')[0] + ".pdf"
|
||
|
user_script_data["translated_script_path"] = saveFile.split('MNF')[1]
|
||
|
|
||
|
db["mnfapp_mnfscriptdatabase"].save(user_script_data)
|
||
|
print("saved done go to ")
|
||
|
|
||
|
else:
|
||
|
#print(" Entry for script id not found")
|
||
|
log.info("Entry for script id not found")
|