533 lines
19 KiB
Python
Executable File
533 lines
19 KiB
Python
Executable File
from google.cloud import translate_v2 as Translate
|
||
from google.cloud import translate
|
||
from MNF.settings import BasePath
|
||
from narration.vectorcode.code.functions import ScriptBreakdown
|
||
from .translation.script_writing import default_script
|
||
from .translation.script_detector import script_cat
|
||
from statistics import mode
|
||
from collections import Counter
|
||
from docx.enum.table import WD_TABLE_ALIGNMENT, WD_ALIGN_VERTICAL
|
||
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||
from docx.shared import Inches, Cm, Pt
|
||
# import textract
|
||
from tqdm import tqdm
|
||
import sys
|
||
import re
|
||
import docx
|
||
import os
|
||
doc = docx.Document()
|
||
|
||
|
||
doc = docx.Document()
|
||
basePath = BasePath()
|
||
|
||
|
||
# google
|
||
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = rf"{basePath}/conversion/My First Project-2573112d5326.json"
|
||
# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = rf"{basePath}/conversion/gifted-mountain-318504-4f001d5f08db.json"
|
||
translate_client = Translate.Client()
|
||
client = translate.TranslationServiceClient()
|
||
project_id = 'authentic-bongo-272808'
|
||
location = "global"
|
||
parent = f"projects/{project_id}/locations/{location}"
|
||
|
||
|
||
slug_pattern = r'[\d]*[.]?[\s]*[IE][NX]T'
|
||
pat = r'[\d]*[\s]*[IE]/[IE][.]?'
|
||
transitions = ['CUT TO:', 'FADE IN:', 'FADE OUT:', 'DISSOLVE TO:', 'JUMP CUT TO:',
|
||
'JUMP TO:', 'CUT BACK TO:', 'INTERCUT WITH:', 'I/C WITH:', 'BACK TO:', 'INTERVAL']
|
||
reserved_words = ['MONTAGE', 'PBS', 'FADE',
|
||
'FADE', 'TITLE', 'SPLIT', 'SCREEN', 'CUT']
|
||
style = doc.styles['Normal']
|
||
font = style.font
|
||
font.name = 'Courier New'
|
||
font.size = Pt(12)
|
||
|
||
|
||
def breaksen(s):
|
||
l = []
|
||
# if len(s.split())<=256:
|
||
if len(s.split()) <= 256:
|
||
l.append(s)
|
||
else:
|
||
n = len(s.split())
|
||
for i in range(n//32 + 1):
|
||
l.append(" ".join(s.split()[32*i:32*(i+1)]))
|
||
return l
|
||
|
||
|
||
# def getRefined(filename1):
|
||
# print("get_refined_called")
|
||
# total_scenes = 0
|
||
# text = textract.process(filename1, encoding="utf8", errors='ignore')
|
||
# filename = rf"{basePath}/conversion/translation/file.txt"
|
||
# f = open(filename, 'wb')
|
||
# f.write(text)
|
||
# f.close()
|
||
# dialog_coming = False
|
||
# f = open(filename, 'r', encoding="utf8", errors='ignore')
|
||
# doc11 = f.read()
|
||
|
||
# f.close()
|
||
# f1 = open(rf"{basePath}/conversion/translation/file1.txt",
|
||
# 'w', encoding="utf8", errors='ignore')
|
||
# c = 0
|
||
# flag = False
|
||
# print("Slugline")
|
||
# for line in doc11.split("\n"):
|
||
# line = line.strip()
|
||
# print("line 427:", line)
|
||
# if (line.strip().startswith(('INT.', 'INT ')) or
|
||
# line.strip().startswith(('I/E', 'E/I')) or
|
||
# line.strip().startswith(('EXT.', 'EXT ')) or
|
||
# line.strip().startswith('EXT/INT') or
|
||
# line.strip().startswith('INT/EXT') or
|
||
# re.match(slug_pattern, line.strip()) or re.match(pat, line.strip())) and not (line.strip().startswith(('INTERCUT', 'INTERMISSION', 'INTERVAL'))):
|
||
|
||
# flag = True
|
||
# f1.write(line)
|
||
# f1.write('\n')
|
||
# continue
|
||
# else:
|
||
# print("line 96: else loop", line)
|
||
# #line = line.strip()
|
||
# if flag:
|
||
# print("line 99: if loop:", line)
|
||
# if line.strip() == '\n':
|
||
# continue
|
||
# if dialog_coming and (line == '\n' or line.strip() == ""):
|
||
# print("line empty or just have newline", line)
|
||
# continue
|
||
# if dialog_coming:
|
||
# print("line 101 probable dialog or PC: ", line)
|
||
# f1.write(line)
|
||
# f1.write('\n')
|
||
# if re.match(r"\(.*\)", line):
|
||
|
||
# continue
|
||
# else:
|
||
# print(" line 207: else of PCs", line)
|
||
# dialog_coming = False
|
||
# print(" line 457 dialog over")
|
||
# continue
|
||
# continue
|
||
# # if line.isupper() and re.fullmatch(r"([A-Z'’]*[.]*[\s]*[-]*)*([#]*[\s]*[1-9])*(\(.*\))*",line.strip()):
|
||
# if line.isupper() and (re.fullmatch(r"([A-Z'’]+[\s]*[-]*)*([#]*[\s]*[1-9])*(\(.*\))*", line.strip()) or re.fullmatch(r"(MRS?|DR|ER|PHD|ESQ|HON|JR|MS|MESSRS|MMES|MSGR|PROF|REV|RT. HON|SR|ST)\. [A-Z]+", line.strip())):
|
||
# print("line 111: May be speaker: ", line)
|
||
# f1.write(line)
|
||
# f1.write('\n')
|
||
# dialog_coming = True
|
||
# continue
|
||
|
||
# if not line == '\n':
|
||
# print(
|
||
# "470 probably action or something else so just write it", line)
|
||
# f1.write(line)
|
||
# f1.write('\n')
|
||
|
||
# f1.close()
|
||
# print("line 132 file closed")
|
||
# filename1 = rf"{basePath}/conversion/translation/file1.txt"
|
||
# # file.txt contains the data of file1.txt , no usage as of now may be change the mame of the file.txt to file1.txt
|
||
# text = textract.process(filename1, encoding="utf8", errors='ignore')
|
||
# print("line 136: ", text)
|
||
# filename = rf"{basePath}/conversion/translation/file.txt"
|
||
# _, file_extension = os.path.splitext(filename1)
|
||
# f = open(filename, 'wb')
|
||
# f.write(text)
|
||
# f.close()
|
||
|
||
# with open(filename, "r") as input:
|
||
# input_ = input.read().split('\n\n')
|
||
|
||
# refined = []
|
||
|
||
# for line in input_:
|
||
# refined.append(line.strip())
|
||
# refined = list(filter(lambda a: a != "", refined))
|
||
# #print("processing the script")
|
||
|
||
# for i in range(len(refined)):
|
||
# if not (refined[i].strip().startswith(('INT.', 'INT ')) or refined[i].strip().startswith(('EXT.', 'EXT ')) or refined[i].strip().startswith('EXT/INT') or refined[i].strip().startswith('INT/EXT') or refined[i].strip().startswith(('I/E', 'E/I')) or re.match(slug_pattern, refined[i].strip()) or re.match(pat, refined[i].strip())):
|
||
# total_scenes = total_scenes + 1
|
||
# continue
|
||
# refined = refined[i:]
|
||
# break
|
||
# # refined.append(line.strip())
|
||
|
||
# refined = list(filter(lambda a: a != "", refined))
|
||
# print("line 163:Refined", refined)
|
||
# return refined, total_scenes
|
||
|
||
|
||
# def getSlugAndNonSlug(refined):
|
||
# sluglines = []
|
||
# without_slug = []
|
||
# for para in refined:
|
||
# para = para.strip()
|
||
# if para.strip().startswith(('INT.', 'INT')) or para.strip().startswith(('EXT.', 'EXT')) or para.strip().startswith('EXT/INT') or para.strip().startswith(('I/E', 'E/I')) or para.strip().startswith('INT/EXT') or re.match(slug_pattern, para.strip()) or re.match(pat, para.strip()):
|
||
# sluglines.append(para)
|
||
# continue
|
||
# without_slug.append(para)
|
||
# return sluglines, without_slug
|
||
|
||
|
||
# def getSpeakers(without_slug):
|
||
# characters = []
|
||
# for para in without_slug:
|
||
# lis = para.split('\n')
|
||
# i = 0
|
||
# for item in lis:
|
||
# i = i+1
|
||
# i = min(i, len(lis)-2)
|
||
# if item.isupper() and not(lis[i+1].strip() == ""):
|
||
# if re.match(r"[A-Z'’]+[\s]*[-]*[A-Z'’]*([#]*[\s]*[1-9])*(\(.*\))*", item):
|
||
# tem = item.split("(")[0].strip()
|
||
# characters.append(tem.strip())
|
||
# else:
|
||
# continue
|
||
|
||
# characters = list(set(characters))
|
||
# characters = list(filter(lambda x: len(x) > 0, characters))
|
||
# characters = [character for character in characters if set(
|
||
# character.split(" ")).intersection(reserved_words) == set()]
|
||
# return characters
|
||
|
||
|
||
# def getScenes(refined, total_scenes, characters):
|
||
# # To find scenes data structure and prev and next scenes numbers
|
||
# i = 0
|
||
# scene = []
|
||
# dialogues = []
|
||
# speakers = []
|
||
# slugline_dic = {}
|
||
# prev_dial_speaker = ""
|
||
# next_dial_speaker = ""
|
||
# pc = 0
|
||
# scene_no = 0
|
||
# actionline = []
|
||
# successor_scene_no = 0
|
||
# predecessor_scene_no = 0
|
||
# parenthetical_lis = []
|
||
|
||
# scenes = []
|
||
# speaker = ""
|
||
# parenthetical = 'NONE'
|
||
# patttern = r'[\d]*[.]?[\s]*[IE][NX]T'
|
||
# for line in refined:
|
||
# if line.strip().startswith(('INT.', 'INT')) or line.strip().startswith(('EXT.', 'EXT')) or line.strip().startswith('EXT/INT') or line.strip().startswith('INT/EXT') or line.strip().startswith(('I/E', 'E/I')) or re.match(slug_pattern, line.strip()) or re.match(pat, line.strip()):
|
||
# scenes.append(scene)
|
||
# scene = []
|
||
# i = 0
|
||
# scene_no += 1
|
||
# scene.append(line)
|
||
# slugline_dic[scene_no] = line.split("\n")[0].strip('0123456789.- ')
|
||
|
||
# else:
|
||
# lis = line.split("\n")
|
||
# lis = [l.strip() for l in lis]
|
||
# print(" \n Line 222 probable dialogue list", lis)
|
||
# word = lis[0]
|
||
# if word.split('(')[0].strip() in characters:
|
||
# mydic = {}
|
||
# prev_dial_speaker = speaker
|
||
# speaker = word.split('(')[0].strip()
|
||
# print("Speaker 228", speaker)
|
||
# if len(lis) > 1 and re.match(r"\(.*\)", lis[1]):
|
||
# pc = pc+1
|
||
# parenthetical = lis[1]
|
||
# parenthetical = parenthetical.replace("\n", "")
|
||
# dia = ' '.join(lis[2:])
|
||
# dia = dia.replace("\n", "")
|
||
# # renu
|
||
# dia = dia.replace("\"", '')
|
||
|
||
# else:
|
||
# dia = ''.join(lis[1:])
|
||
# dia = dia.replace("\n", "")
|
||
# dia = dia.replace("\"", '')
|
||
# print(" length dia\n", len(dia))
|
||
# if not (len(dia) == 0 and parenthetical == "NONE"):
|
||
# print(" len dia != and Parenthetical == NONE: 384 ")
|
||
|
||
# if i-1 >= 0:
|
||
# try:
|
||
# prev = main_lis[scene_no-1][i-1]
|
||
# except:
|
||
# prev = ""
|
||
# else:
|
||
# prev = ""
|
||
# try:
|
||
# next = main_lis[scene_no-1][i+1]
|
||
# except:
|
||
# next = ""
|
||
# # prev is previous speaker and next is next speaker of the dialogue
|
||
# mydic[speaker] = [parenthetical,
|
||
# scene_no, dia, len(dia), prev, next]
|
||
# print("line 259", mydic)
|
||
# #print("mydic 260", speaker, mydic[speaker])
|
||
# prev, next = "", ""
|
||
# i = i+1
|
||
# speakers.append(speaker)
|
||
# parenthetical_lis.append(parenthetical)
|
||
# dialogues.append(mydic)
|
||
# scene.append(mydic)
|
||
# parenthetical = "NONE"
|
||
# else:
|
||
# line = line.replace("\n", " ")
|
||
# line = ' '.join(line.split())
|
||
# if line.strip() in transitions:
|
||
# scene.append({'Transition': line.strip()})
|
||
# continue
|
||
# actionline.append(line)
|
||
# scene.append(line.strip())
|
||
|
||
# scenes.append(scene)
|
||
# speakers = list(set(speakers))
|
||
# scenes = scenes[1:]
|
||
# s = []
|
||
# for scene in scenes:
|
||
# s1 = []
|
||
# for ele in scene:
|
||
# if type(ele) == type(""):
|
||
# s1.extend(ele.split("\n"))
|
||
# else:
|
||
# s1.append(ele)
|
||
# s.append(s1)
|
||
# print("dialogue: ", dialogues)
|
||
# return s, actionline, parenthetical_lis, speakers, dialogues
|
||
|
||
|
||
def language_detector(text):
|
||
result = translate_client.translate(text, target_language='hi')
|
||
det_lang = result["detectedSourceLanguage"]
|
||
return det_lang
|
||
|
||
|
||
def script_det(text):
|
||
punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~'''
|
||
no_punct = ""
|
||
for char in text:
|
||
if char not in punctuations:
|
||
no_punct = char
|
||
break
|
||
script = script_cat(no_punct)[0]
|
||
return script
|
||
|
||
|
||
'''
|
||
A. Language of Highest number of full dialogues,
|
||
B. Numbers of dialogues in action line language,
|
||
C. Number of dialogues in other languages)
|
||
'''
|
||
|
||
|
||
def A_B_C(dialogue_language, non_dial_src_lang):
|
||
print("line 316:dialogue_language", dialogue_language)
|
||
print(non_dial_src_lang)
|
||
dict1 = dict(Counter(dialogue_language))
|
||
print("line 319:dict1", dict1)
|
||
sorted_values = sorted(dict1.values(), reverse=True) # Sort the values
|
||
print("line 321:sorted_values:", sorted_values)
|
||
sorted_dict = {}
|
||
for i in sorted_values:
|
||
for k in dict1.keys():
|
||
if dict1[k] == i:
|
||
sorted_dict[k] = dict1[k]
|
||
sources = list(sorted_dict.keys())
|
||
print("line 328: sources: ", sources)
|
||
A = sources[0]
|
||
|
||
if len(sources) == 1:
|
||
B = 0
|
||
C = 0
|
||
elif non_dial_src_lang not in sources:
|
||
B = 0
|
||
C = sum(sorted_values[1:])
|
||
else:
|
||
if A == non_dial_src_lang:
|
||
B = 0
|
||
else:
|
||
B = sorted_values[sources.index(non_dial_src_lang)]
|
||
C = sum(sorted_values[2:])
|
||
return A, B, C
|
||
|
||
|
||
def dial_each_word_lang1(non_dial_src_lang, dial):
|
||
for word in dial.split():
|
||
if language_detector(word) == non_dial_src_lang:
|
||
#print("word", word)
|
||
return "True"
|
||
return "False"
|
||
|
||
|
||
def dial_each_word_lang2(non_dial_src_lang, A, dial):
|
||
for word in dial.split():
|
||
if (language_detector(word) != non_dial_src_lang) or (language_detector(word) != A):
|
||
#print("in 4")
|
||
#print("word", word)
|
||
return "True"
|
||
return "False"
|
||
|
||
|
||
def word_with_actionline(scenes, A, non_dial_src_lang):
|
||
word_lang_with_actionline = "False"
|
||
if A == non_dial_src_lang:
|
||
return "False"
|
||
for scene in tqdm(scenes[:]):
|
||
for i, line in enumerate(scene):
|
||
if i == 0:
|
||
continue
|
||
if type(line) == type(""):
|
||
continue
|
||
|
||
else:
|
||
[speaker] = line.keys()
|
||
if speaker == 'Transition':
|
||
continue
|
||
dial_src_lang = language_detector(line[speaker][2])
|
||
if dial_src_lang == A:
|
||
word_lang_with_actionline = dial_each_word_lang1(
|
||
non_dial_src_lang, line[speaker][2])
|
||
if word_lang_with_actionline == "True":
|
||
return word_lang_with_actionline
|
||
|
||
|
||
def word_with_other(scenes, A, non_dial_src_lang):
|
||
word_lang_with_other = "False"
|
||
for scene in tqdm(scenes[:]):
|
||
for i, line in enumerate(scene):
|
||
if i == 0:
|
||
continue
|
||
if type(line) == type(""):
|
||
continue
|
||
|
||
else:
|
||
[speaker] = line.keys()
|
||
if speaker == 'Transition':
|
||
continue
|
||
dial_src_lang = language_detector(line[speaker][2])
|
||
if dial_src_lang == A:
|
||
word_lang_with_other = dial_each_word_lang2(
|
||
non_dial_src_lang, A, line[speaker][2])
|
||
if word_lang_with_other == "True":
|
||
return word_lang_with_other
|
||
|
||
|
||
def getInputs(filename1):
|
||
|
||
refined, total_scenes = ScriptBreakdown().getRefined(filename1)
|
||
sluglines, without_slug = ScriptBreakdown().getSlugAndNonSlug(refined)
|
||
characters = ScriptBreakdown().getSpeakers(without_slug)
|
||
scenes, actionline, parenthetical_lis, speakers, dialogues = ScriptBreakdown().getScenes(
|
||
refined, total_scenes, characters)
|
||
print("line 405:scenes: ", scenes)
|
||
|
||
language_of_all_dialogues = []
|
||
script_of_all_dialogues = []
|
||
count = 0
|
||
length = len(scenes)
|
||
if(length > 10):
|
||
length = 10
|
||
for scene in tqdm(scenes[:length]):
|
||
|
||
for i, line in enumerate(scene):
|
||
if i == 0:
|
||
continue
|
||
if type(line) == type(""):
|
||
# print("here")
|
||
if count == 0:
|
||
# print(line)
|
||
non_dial_src_lang = language_detector(line)
|
||
non_dial_src_script=script_det(line)
|
||
count += 1
|
||
#print("non_dial_src_lang", non_dial_src_lang)
|
||
else:
|
||
#print("line", line)
|
||
[speaker] = line.keys()
|
||
# print([speaker])
|
||
if speaker == 'Transition':
|
||
continue
|
||
|
||
#print("dial", line[speaker][2])
|
||
dial_src_lang = language_detector(line[speaker][2])
|
||
print("dial_src_lang:line 430:", dial_src_lang)
|
||
language_of_all_dialogues.append(dial_src_lang)
|
||
script_of_all_dialogues.append(script_det(line[speaker][2]))
|
||
|
||
# print(non_dial_src_lang)
|
||
print(language_of_all_dialogues)
|
||
# print(script_of_all_dialogues)
|
||
A, B, C = A_B_C(language_of_all_dialogues, non_dial_src_lang)
|
||
dial_src_script = mode(script_of_all_dialogues)
|
||
|
||
one_step_process = "Yes" if dial_src_script == default_script[A] else "Can_not_say"
|
||
print("one_step_process", one_step_process)
|
||
if B > 0:
|
||
print("UI option3 - yes")
|
||
UI_option3 = "Yes"
|
||
else:
|
||
print("UI option3 - no")
|
||
UI_option3 = "No"
|
||
|
||
if C > 0:
|
||
print("UI option4 - yes")
|
||
UI_option4 = "Yes"
|
||
else:
|
||
print("UI option4 - no")
|
||
UI_option4 = "No"
|
||
dial_src_lang = A
|
||
return [non_dial_src_lang, dial_src_lang, dial_src_script, non_dial_src_script, UI_option3, UI_option4, "Yes", "Yes"]
|
||
|
||
word_lang_with_actionline = word_with_actionline(
|
||
scenes, A, non_dial_src_lang)
|
||
# print(word_lang_with_actionline)
|
||
|
||
word_lang_with_other = word_with_other(scenes, A, non_dial_src_lang)
|
||
# print(word_lang_with_other)
|
||
|
||
print("actionline_lanuge", non_dial_src_lang)
|
||
non_dial_src_lang = non_dial_src_lang
|
||
|
||
print("A = {} B = {} C = {}".format(A, B, C))
|
||
print("dial_language", A)
|
||
dial_src_lang = A
|
||
|
||
print("dial_src_script", dial_src_script)
|
||
|
||
# print("Steps in the process:")
|
||
# print("")
|
||
|
||
if B > 0:
|
||
print("UI option3 - yes")
|
||
UI_option3 = "Yes"
|
||
else:
|
||
print("UI option3 - no")
|
||
UI_option3 = "No"
|
||
|
||
if C > 0:
|
||
print("UI option4 - yes")
|
||
UI_option4 = "Yes"
|
||
else:
|
||
print("UI option4 - no")
|
||
UI_option4 = "No"
|
||
|
||
if word_lang_with_actionline == "True":
|
||
print("UI option5 - Yes")
|
||
UI_option5 = "Yes"
|
||
else:
|
||
print("UI_option5 - NO")
|
||
UI_option5 = "No"
|
||
|
||
if word_lang_with_other == "True":
|
||
print("UI option6 - Yes")
|
||
UI_option6 = "Yes"
|
||
else:
|
||
print("UI option6 - No")
|
||
UI_option6 = "No"
|
||
|
||
return [non_dial_src_lang, dial_src_lang, dial_src_script, one_step_process, UI_option3, UI_option4, UI_option5, UI_option6]
|
||
|
||
# filename1 = sys.argv[1]
|
||
# getInputs(filename1)
|