637 lines
23 KiB
Python
637 lines
23 KiB
Python
|
# import textract
|
|||
|
from tqdm import tqdm
|
|||
|
import sys
|
|||
|
import re
|
|||
|
import docx
|
|||
|
import os
|
|||
|
doc = docx.Document()
|
|||
|
from docx.shared import Inches, Cm, Pt
|
|||
|
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
|||
|
from docx.enum.table import WD_TABLE_ALIGNMENT, WD_ALIGN_VERTICAL
|
|||
|
from collections import Counter
|
|||
|
from statistics import mode
|
|||
|
|
|||
|
|
|||
|
from .translation.script_detector import script_cat
|
|||
|
from .translation.script_writing import default_script
|
|||
|
|
|||
|
|
|||
|
# import textract
|
|||
|
from tqdm import tqdm
|
|||
|
import sys
|
|||
|
import re
|
|||
|
import docx
|
|||
|
import os
|
|||
|
doc = docx.Document()
|
|||
|
from docx.shared import Inches, Cm, Pt
|
|||
|
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
|||
|
from docx.enum.table import WD_TABLE_ALIGNMENT, WD_ALIGN_VERTICAL
|
|||
|
from MNF.settings import BasePath
|
|||
|
basePath = BasePath()
|
|||
|
|
|||
|
|
|||
|
#google
|
|||
|
#os.environ["GOOGLE_APPLICATION_CREDENTIALS"]=rf"{basePath}/conversion/My First Project-2573112d5326.json"
|
|||
|
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = rf"{basePath}/conversion/gifted-mountain-318504-4f001d5f08db.json"
|
|||
|
from google.cloud import translate
|
|||
|
from google.cloud import translate_v2 as Translate
|
|||
|
translate_client = Translate.Client()
|
|||
|
client = translate.TranslationServiceClient()
|
|||
|
project_id = 'authentic-bongo-272808'
|
|||
|
location = "global"
|
|||
|
parent = f"projects/{project_id}/locations/{location}"
|
|||
|
|
|||
|
|
|||
|
slug_pattern= r'[\d]*[.]?[\s]*[IE][NX]T'
|
|||
|
pat = r'[\d]*[\s]*[IE]/[IE][.]?'
|
|||
|
transitions = ['CUT TO:','FADE IN:','FADE OUT:','DISSOLVE TO:','JUMP CUT TO:','JUMP TO:','CUT BACK TO:','INTERCUT WITH:','I/C WITH:','BACK TO:', 'INTERVAL']
|
|||
|
reserved_words = ['MONTAGE','PBS','FADE','FADE','TITLE','SPLIT', 'SCREEN','CUT']
|
|||
|
style = doc.styles['Normal']
|
|||
|
font = style.font
|
|||
|
font.name = 'Courier New'
|
|||
|
font.size = Pt(12)
|
|||
|
|
|||
|
def breaksen(s):
|
|||
|
l =[]
|
|||
|
#if len(s.split())<=256:
|
|||
|
if len(s.split())<=256:
|
|||
|
l.append(s)
|
|||
|
else:
|
|||
|
n = len(s.split())
|
|||
|
for i in range(n//32 + 1):
|
|||
|
l.append(" ".join(s.split()[32*i:32*(i+1)]))
|
|||
|
return l
|
|||
|
|
|||
|
def getRefined(filename1):
|
|||
|
print("get_refined_called")
|
|||
|
total_scenes = 0
|
|||
|
text = textract.process(filename1, encoding="utf8", errors='ignore')
|
|||
|
filename= rf"{basePath}/conversion/translation/file.txt"
|
|||
|
f=open(filename, 'wb')
|
|||
|
f.write(text)
|
|||
|
f.close()
|
|||
|
dialog_coming=False
|
|||
|
f=open(filename, 'r', encoding="utf8", errors='ignore')
|
|||
|
doc11=f.read()
|
|||
|
|
|||
|
f.close()
|
|||
|
f1=open(rf"{basePath}/conversion/translation/file1.txt",'w', encoding="utf8", errors='ignore')
|
|||
|
c=0
|
|||
|
flag=False
|
|||
|
print("Slugline")
|
|||
|
for line in doc11.split("\n"):
|
|||
|
line=line.strip()
|
|||
|
print("line 427:",line)
|
|||
|
if (line.strip().startswith(('INT.','INT ')) or \
|
|||
|
line.strip().startswith(('I/E','E/I')) or \
|
|||
|
line.strip().startswith(('EXT.','EXT ')) or \
|
|||
|
line.strip().startswith('EXT/INT') or \
|
|||
|
line.strip().startswith('INT/EXT') or \
|
|||
|
re.match(slug_pattern, line.strip()) or re.match(pat, line.strip())) and not (line.strip().startswith(('INTERCUT', 'INTERMISSION', 'INTERVAL'))):
|
|||
|
|
|||
|
flag=True
|
|||
|
f1.write(line)
|
|||
|
f1.write('\n')
|
|||
|
continue
|
|||
|
else:
|
|||
|
print("line 96: else loop", line)
|
|||
|
#line = line.strip()
|
|||
|
if flag:
|
|||
|
print("line 99: if loop:", line)
|
|||
|
if line.strip()=='\n':
|
|||
|
continue
|
|||
|
if dialog_coming and (line=='\n' or line.strip()==""):
|
|||
|
print("line empty or just have newline", line)
|
|||
|
continue
|
|||
|
if dialog_coming:
|
|||
|
print("line 101 probable dialog or PC: ", line)
|
|||
|
f1.write(line)
|
|||
|
f1.write('\n')
|
|||
|
if re.match(r"\(.*\)",line):
|
|||
|
|
|||
|
continue
|
|||
|
else:
|
|||
|
print(" line 207: else of PCs", line)
|
|||
|
dialog_coming=False
|
|||
|
print(" line 457 dialog over")
|
|||
|
continue
|
|||
|
continue
|
|||
|
# if line.isupper() and re.fullmatch(r"([A-Z'’]*[.]*[\s]*[-]*)*([#]*[\s]*[1-9])*(\(.*\))*",line.strip()):
|
|||
|
if line.isupper() and (re.fullmatch(r"([A-Z'’]+[\s]*[-]*)*([#]*[\s]*[1-9])*(\(.*\))*", line.strip()) or re.fullmatch(r"(MRS?|DR|ER|PHD|ESQ|HON|JR|MS|MESSRS|MMES|MSGR|PROF|REV|RT. HON|SR|ST)\. [A-Z]+",line.strip())):
|
|||
|
print("line 111: May be speaker: ", line)
|
|||
|
f1.write(line)
|
|||
|
f1.write('\n')
|
|||
|
dialog_coming=True
|
|||
|
continue
|
|||
|
|
|||
|
if not line=='\n':
|
|||
|
print("470 probably action or something else so just write it", line)
|
|||
|
f1.write(line)
|
|||
|
f1.write('\n')
|
|||
|
|
|||
|
|
|||
|
f1.close()
|
|||
|
print("line 132 file closed")
|
|||
|
filename1=rf"{basePath}/conversion/translation/file1.txt"
|
|||
|
#file.txt contains the data of file1.txt , no usage as of now may be change the mame of the file.txt to file1.txt
|
|||
|
text = textract.process(filename1, encoding="utf8", errors='ignore')
|
|||
|
print("line 136: ",text)
|
|||
|
filename=rf"{basePath}/conversion/translation/file.txt"
|
|||
|
_, file_extension = os.path.splitext(filename1)
|
|||
|
f=open(filename, 'wb')
|
|||
|
f.write(text)
|
|||
|
f.close()
|
|||
|
|
|||
|
with open(filename, "r") as input:
|
|||
|
input_ = input.read().split('\n\n')
|
|||
|
|
|||
|
refined=[]
|
|||
|
|
|||
|
for line in input_:
|
|||
|
refined.append(line.strip())
|
|||
|
refined=list(filter(lambda a: a != "", refined))
|
|||
|
#print("processing the script")
|
|||
|
|
|||
|
for i in range(len(refined)):
|
|||
|
if not (refined[i].strip().startswith(('INT.','INT ')) or refined[i].strip().startswith(('EXT.','EXT ')) or refined[i].strip().startswith('EXT/INT') or refined[i].strip().startswith('INT/EXT') or refined[i].strip().startswith(('I/E','E/I')) or re.match(slug_pattern,refined[i].strip()) or re.match(pat,refined[i].strip())):
|
|||
|
total_scenes = total_scenes + 1
|
|||
|
continue
|
|||
|
refined=refined[i:]
|
|||
|
break
|
|||
|
# refined.append(line.strip())
|
|||
|
|
|||
|
refined=list(filter(lambda a: a != "", refined))
|
|||
|
print("line 163:Refined",refined)
|
|||
|
return refined,total_scenes
|
|||
|
|
|||
|
def getSlugAndNonSlug(refined):
|
|||
|
sluglines=[]
|
|||
|
without_slug=[]
|
|||
|
for para in refined:
|
|||
|
para=para.strip()
|
|||
|
if para.strip().startswith(('INT.','INT')) or para.strip().startswith(('EXT.','EXT')) or para.strip().startswith('EXT/INT') or para.strip().startswith(('I/E','E/I')) or para.strip().startswith('INT/EXT') or re.match(slug_pattern,para.strip()) or re.match(pat,para.strip()):
|
|||
|
sluglines.append(para)
|
|||
|
continue
|
|||
|
without_slug.append(para)
|
|||
|
return sluglines,without_slug
|
|||
|
|
|||
|
def getSpeakers(without_slug):
|
|||
|
characters=[]
|
|||
|
for para in without_slug:
|
|||
|
lis=para.split('\n')
|
|||
|
i=0
|
|||
|
for item in lis:
|
|||
|
i=i+1
|
|||
|
i=min(i,len(lis)-2)
|
|||
|
if item.isupper() and not(lis[i+1].strip()==""):
|
|||
|
if re.match(r"[A-Z'’]+[\s]*[-]*[A-Z'’]*([#]*[\s]*[1-9])*(\(.*\))*",item):
|
|||
|
tem = item.split("(")[0].strip()
|
|||
|
characters.append(tem.strip())
|
|||
|
else:
|
|||
|
continue
|
|||
|
|
|||
|
characters=list(set(characters))
|
|||
|
characters=list(filter(lambda x: len(x) >0,characters))
|
|||
|
characters = [character for character in characters if set(character.split(" ")).intersection(reserved_words) == set()]
|
|||
|
return characters
|
|||
|
|
|||
|
|
|||
|
def getScenes(refined,total_scenes,characters):
|
|||
|
# To find scenes data structure and prev and next scenes numbers
|
|||
|
i=0
|
|||
|
scene=[]
|
|||
|
dialogues=[]
|
|||
|
speakers=[]
|
|||
|
slugline_dic={}
|
|||
|
prev_dial_speaker=""
|
|||
|
next_dial_speaker=""
|
|||
|
pc=0
|
|||
|
scene_no=0
|
|||
|
actionline=[]
|
|||
|
successor_scene_no=0
|
|||
|
predecessor_scene_no=0
|
|||
|
parenthetical_lis=[]
|
|||
|
|
|||
|
scenes=[]
|
|||
|
speaker=""
|
|||
|
parenthetical='NONE'
|
|||
|
patttern=r'[\d]*[.]?[\s]*[IE][NX]T'
|
|||
|
for line in refined:
|
|||
|
if line.strip().startswith(('INT.','INT')) or line.strip().startswith(('EXT.','EXT')) or line.strip().startswith('EXT/INT') or line.strip().startswith('INT/EXT') or line.strip().startswith(('I/E','E/I')) or re.match(slug_pattern,line.strip()) or re.match(pat,line.strip()):
|
|||
|
scenes.append(scene)
|
|||
|
scene=[]
|
|||
|
i=0
|
|||
|
scene_no+=1
|
|||
|
scene.append(line)
|
|||
|
slugline_dic[scene_no]=line.split("\n")[0].strip('0123456789.- ')
|
|||
|
|
|||
|
else:
|
|||
|
lis=line.split("\n")
|
|||
|
lis=[l.strip() for l in lis]
|
|||
|
print(" \n Line 222 probable dialogue list", lis)
|
|||
|
word=lis[0]
|
|||
|
if word.split('(')[0].strip() in characters:
|
|||
|
mydic={}
|
|||
|
prev_dial_speaker=speaker
|
|||
|
speaker=word.split('(')[0].strip()
|
|||
|
print("Speaker 228", speaker)
|
|||
|
if len(lis)>1 and re.match(r"\(.*\)",lis[1]):
|
|||
|
pc=pc+1
|
|||
|
parenthetical=lis[1]
|
|||
|
parenthetical=parenthetical.replace("\n","")
|
|||
|
dia=' '.join(lis[2:])
|
|||
|
dia=dia.replace("\n","")
|
|||
|
##renu
|
|||
|
dia=dia.replace("\"", '')
|
|||
|
|
|||
|
else:
|
|||
|
dia=''.join(lis[1:])
|
|||
|
dia=dia.replace("\n","")
|
|||
|
dia=dia.replace("\"", '')
|
|||
|
print(" length dia\n", len(dia))
|
|||
|
if not (len(dia)==0 and parenthetical=="NONE"):
|
|||
|
print(" len dia != and Parenthetical == NONE: 384 ")
|
|||
|
|
|||
|
if i-1 >= 0:
|
|||
|
try:
|
|||
|
prev=main_lis[scene_no-1][i-1]
|
|||
|
except:
|
|||
|
prev=""
|
|||
|
else:
|
|||
|
prev=""
|
|||
|
try:
|
|||
|
next=main_lis[scene_no-1][i+1]
|
|||
|
except:
|
|||
|
next=""
|
|||
|
#prev is previous speaker and next is next speaker of the dialogue
|
|||
|
mydic[speaker]=[parenthetical,scene_no,dia,len(dia),prev,next]
|
|||
|
print("line 259",mydic)
|
|||
|
#print("mydic 260", speaker, mydic[speaker])
|
|||
|
prev,next="",""
|
|||
|
i=i+1
|
|||
|
speakers.append(speaker)
|
|||
|
parenthetical_lis.append(parenthetical)
|
|||
|
dialogues.append(mydic)
|
|||
|
scene.append(mydic)
|
|||
|
parenthetical="NONE"
|
|||
|
else:
|
|||
|
line=line.replace("\n"," ")
|
|||
|
line=' '.join(line.split())
|
|||
|
if line.strip() in transitions:
|
|||
|
scene.append({'Transition':line.strip()})
|
|||
|
continue
|
|||
|
actionline.append(line)
|
|||
|
scene.append(line.strip())
|
|||
|
|
|||
|
|
|||
|
scenes.append(scene)
|
|||
|
speakers=list(set(speakers))
|
|||
|
scenes=scenes[1:]
|
|||
|
s = []
|
|||
|
for scene in scenes:
|
|||
|
s1=[]
|
|||
|
for ele in scene:
|
|||
|
if type(ele) == type(""):
|
|||
|
s1.extend(ele.split("\n"))
|
|||
|
else:
|
|||
|
s1.append(ele)
|
|||
|
s.append(s1)
|
|||
|
print("dialogue: ",dialogues)
|
|||
|
return s,actionline,parenthetical_lis,speakers,dialogues
|
|||
|
|
|||
|
# def getScenes(refined, total_scenes, characters):
|
|||
|
# # To find scenes data structure and prev and next scenes numbers
|
|||
|
# i = 0
|
|||
|
# scene = []
|
|||
|
# dialogues = []
|
|||
|
# speakers = []
|
|||
|
# slugline_dic = {}
|
|||
|
# prev_dial_speaker = ""
|
|||
|
# next_dial_speaker = ""
|
|||
|
# pc = 0
|
|||
|
# scene_no = 0
|
|||
|
# actionline = []
|
|||
|
# successor_scene_no = 0
|
|||
|
# predecessor_scene_no = 0
|
|||
|
# parenthetical_lis = []
|
|||
|
|
|||
|
# scenes = []
|
|||
|
# speaker = ""
|
|||
|
# parenthetical = 'NONE'
|
|||
|
# predecessor_scene_no_dict = {
|
|||
|
# 'Scene '+str(i+1): 0 for i in range(total_scenes)}
|
|||
|
# dia_count = {'Scene '+str(i+1): 0 for i in range(total_scenes)}
|
|||
|
# successor_scene_no_dict = {
|
|||
|
# 'Scene '+str(i+1): 0 for i in range(total_scenes)}
|
|||
|
# parenthetical_count_dict = {
|
|||
|
# 'Scene '+str(i+1): 0 for i in range(total_scenes)}
|
|||
|
# patttern = r'[\d]*[.]?[\s]*[IE][NX]T'
|
|||
|
# for line in refined:
|
|||
|
# if ((line.strip().startswith(('INT.', 'INT')) or line.strip().startswith(('EXT.', 'EXT')) or line.strip().startswith('EXT/INT') or line.strip().startswith('INT/EXT') or line.strip().startswith(('I/E', 'E/I')) or re.match(slug_pattern, line.strip()) or re.match(pat, line.strip())) and not (line.strip().startswith(('INTERCUT', 'INTERVAL', 'INTERMISSION')))):
|
|||
|
# # if re.match(patttern,line):
|
|||
|
# # current_scene=line.split(" ")[0]
|
|||
|
# # current_scene=current_scene[:1]
|
|||
|
# # current_scene=int(current_scene)
|
|||
|
# # successor_scene_no=min(int(current_scene)+2,total_scenes)
|
|||
|
# # predecessor_scene_no=max(int(current_scene),-1)
|
|||
|
# # else:
|
|||
|
|
|||
|
# successor_scene_no = min(scene_no+3, total_scenes)
|
|||
|
# predecessor_scene_no = max(scene_no+1, 0)
|
|||
|
# if scene_no > 0:
|
|||
|
# parenthetical_count_dict['Scene '+str(scene_no+1)] = pc
|
|||
|
# pc = 0
|
|||
|
# scenes.append(scene)
|
|||
|
# scene = []
|
|||
|
# i = 0
|
|||
|
# scene_no += 1
|
|||
|
# predecessor_scene_no_dict['Scene ' +
|
|||
|
# str(scene_no+1)] = predecessor_scene_no
|
|||
|
# successor_scene_no_dict['Scene ' +
|
|||
|
# str(scene_no+1)] = successor_scene_no
|
|||
|
# successor_scene_no_dict['Scene '+str(1)] = 2
|
|||
|
# scene.append(line)
|
|||
|
# slugline_dic[scene_no] = line.split(
|
|||
|
# "\n")[0].strip('0123456789.- ')
|
|||
|
|
|||
|
# else:
|
|||
|
# lis = line.split("\n")
|
|||
|
# lis = [l.strip() for l in lis]
|
|||
|
# print(" \n Line 363 probable dialogue list", lis)
|
|||
|
# word = lis[0]
|
|||
|
# extendedSpeaker = ""
|
|||
|
# if word.split('(')[0].strip() in characters:
|
|||
|
# mydic = {}
|
|||
|
# prev_dial_speaker = speaker
|
|||
|
# speakerline = word.split('(')
|
|||
|
# # speaker = word.split('(')[0].strip()
|
|||
|
# speaker = speakerline[0].strip()
|
|||
|
# print("Speaker 378", speaker)
|
|||
|
# extendedSpeaker = word.strip()
|
|||
|
# if len(lis) > 1 and re.match(r"\(.*\)", lis[1]):
|
|||
|
# pc = pc+1
|
|||
|
# parenthetical = lis[1]
|
|||
|
# parenthetical = parenthetical.replace("\n", "")
|
|||
|
# dia = ' '.join(lis[2:])
|
|||
|
# dia = dia.replace("\n", "")
|
|||
|
# # renu
|
|||
|
# # dia=dia.replace("\"", '')
|
|||
|
|
|||
|
# else:
|
|||
|
# dia = ''.join(lis[1:])
|
|||
|
# dia = dia.replace("\n", "")
|
|||
|
# dia = dia.replace("\"", '')
|
|||
|
# print(" length dia\n", len(dia))
|
|||
|
# if not (len(dia) == 0 and parenthetical == "NONE"):
|
|||
|
# print(" len dia != and Parenthetical == NONE: 384 ")
|
|||
|
# if i-1 >= 0:
|
|||
|
# try:
|
|||
|
# prev = main_lis[scene_no-1][i-1]
|
|||
|
# except:
|
|||
|
# prev = ""
|
|||
|
# else:
|
|||
|
# prev = ""
|
|||
|
# try:
|
|||
|
# next = main_lis[scene_no-1][i+1]
|
|||
|
# except:
|
|||
|
# next = ""
|
|||
|
# # prev is previous speaker and next is next speaker of the dialogue
|
|||
|
# mydic[speaker] = [parenthetical,
|
|||
|
# scene_no, dia, len(dia), prev, next, extendedSpeaker]
|
|||
|
# print("mydic 398", speaker, mydic[speaker])
|
|||
|
# dia_count['Scene '+str(scene_no)] += 1
|
|||
|
# # print(mydic)
|
|||
|
# prev, next = "", ""
|
|||
|
# i = i+1
|
|||
|
# speakers.append(speaker)
|
|||
|
# parenthetical_lis.append(parenthetical)
|
|||
|
# dialogues.append(mydic)
|
|||
|
# scene.append(mydic)
|
|||
|
# parenthetical = "NONE"
|
|||
|
# else:
|
|||
|
# line = line.replace("\n", " ")
|
|||
|
# line = ' '.join(line.split())
|
|||
|
# if line.strip() in transitions:
|
|||
|
# scene.append({'Transition': line.strip()})
|
|||
|
# continue
|
|||
|
# actionline.append(line)
|
|||
|
# scene.append(line.strip())
|
|||
|
|
|||
|
# scenes.append(scene)
|
|||
|
# parenthetical_count_dict['Scene '+str(scene_no)] = pc
|
|||
|
# speakers = list(set(speakers))
|
|||
|
# scenes = scenes[1:]
|
|||
|
# print("Scenes:", scenes)
|
|||
|
# # for removing '\n' from action lines
|
|||
|
# # return scenes also if '\n' required and modify practice_with_db also
|
|||
|
# # s = []
|
|||
|
# # for scene in scenes:
|
|||
|
# # s1=[]
|
|||
|
# # for ele in scene:
|
|||
|
# # if type(ele) == type(""):
|
|||
|
# # s1.extend(ele.split("\n"))
|
|||
|
# # else:
|
|||
|
# # s1.append(ele)
|
|||
|
# # s.append(s1)
|
|||
|
# return scenes,actionline,parenthetical_lis,speakers,dialogues
|
|||
|
|
|||
|
|
|||
|
def language_detector(text):
|
|||
|
result = translate_client.translate(text, target_language='hi')
|
|||
|
det_lang = result["detectedSourceLanguage"]
|
|||
|
return det_lang
|
|||
|
|
|||
|
def script_det(text):
|
|||
|
punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~'''
|
|||
|
no_punct = ""
|
|||
|
for char in text:
|
|||
|
if char not in punctuations:
|
|||
|
no_punct = char
|
|||
|
break
|
|||
|
script = script_cat(no_punct)[0]
|
|||
|
return script
|
|||
|
|
|||
|
|
|||
|
'''
|
|||
|
A. Language of Highest number of full dialogues,
|
|||
|
B. Numbers of dialogues in action line language,
|
|||
|
C. Number of dialogues in other languages)
|
|||
|
'''
|
|||
|
|
|||
|
def A_B_C(dialogue_language, non_dial_src_lang):
|
|||
|
dict1 = dict(Counter(dialogue_language))
|
|||
|
sorted_values = sorted(dict1.values(), reverse=True) # Sort the values
|
|||
|
sorted_dict = {}
|
|||
|
for i in sorted_values:
|
|||
|
for k in dict1.keys():
|
|||
|
if dict1[k] == i:
|
|||
|
sorted_dict[k] = dict1[k]
|
|||
|
sources = list(sorted_dict.keys())
|
|||
|
A = sources[0]
|
|||
|
|
|||
|
if len(sources)==1:
|
|||
|
B=0
|
|||
|
C=0
|
|||
|
elif non_dial_src_lang not in sources:
|
|||
|
B=0
|
|||
|
C = sum(sorted_values[1:])
|
|||
|
else:
|
|||
|
B = sorted_values[sources.index(non_dial_src_lang)]
|
|||
|
C = sum(sorted_values[2:])
|
|||
|
return A, B, C
|
|||
|
|
|||
|
|
|||
|
def dial_each_word_lang1(non_dial_src_lang, dial):
|
|||
|
for word in dial.split():
|
|||
|
if language_detector(word)==non_dial_src_lang:
|
|||
|
#print("word", word)
|
|||
|
return "True"
|
|||
|
return "False"
|
|||
|
|
|||
|
def dial_each_word_lang2(non_dial_src_lang, A, dial ):
|
|||
|
for word in dial.split():
|
|||
|
if (language_detector(word)!=non_dial_src_lang) or (language_detector(word)!=A):
|
|||
|
#print("in 4")
|
|||
|
#print("word", word)
|
|||
|
return "True"
|
|||
|
return "False"
|
|||
|
|
|||
|
def word_with_actionline(scenes, A, non_dial_src_lang):
|
|||
|
if A==non_dial_src_lang:
|
|||
|
return "False"
|
|||
|
for scene in tqdm(scenes[:]):
|
|||
|
for i,line in enumerate(scene):
|
|||
|
if i == 0:
|
|||
|
continue
|
|||
|
if type(line)==type(""):
|
|||
|
continue
|
|||
|
|
|||
|
else:
|
|||
|
[speaker] = line.keys()
|
|||
|
if speaker == 'Transition':
|
|||
|
continue
|
|||
|
dial_src_lang = language_detector(line[speaker][2])
|
|||
|
if dial_src_lang==A:
|
|||
|
word_lang_with_actionline = dial_each_word_lang1(non_dial_src_lang, line[speaker][2])
|
|||
|
if word_lang_with_actionline == "True":
|
|||
|
return word_lang_with_actionline
|
|||
|
|
|||
|
|
|||
|
def word_with_other(scenes, A, non_dial_src_lang):
|
|||
|
word_lang_with_other = "False"
|
|||
|
for scene in tqdm(scenes[:]):
|
|||
|
for i,line in enumerate(scene):
|
|||
|
if i == 0:
|
|||
|
continue
|
|||
|
if type(line)==type(""):
|
|||
|
continue
|
|||
|
|
|||
|
else:
|
|||
|
[speaker] = line.keys()
|
|||
|
if speaker == 'Transition':
|
|||
|
continue
|
|||
|
dial_src_lang = language_detector(line[speaker][2])
|
|||
|
if dial_src_lang==A:
|
|||
|
word_lang_with_other = dial_each_word_lang2(non_dial_src_lang, A, line[speaker][2])
|
|||
|
if word_lang_with_other == "True":
|
|||
|
return word_lang_with_other
|
|||
|
|
|||
|
|
|||
|
def getInputs(filename1):
|
|||
|
|
|||
|
refined, total_scenes = getRefined(filename1)
|
|||
|
sluglines, without_slug = getSlugAndNonSlug(refined)
|
|||
|
characters = getSpeakers(without_slug)
|
|||
|
scenes, actionline, parenthetical_lis, speakers,dialogues = getScenes(refined,total_scenes,characters)
|
|||
|
print("line 405:scenes: ",scenes)
|
|||
|
|
|||
|
language_of_all_dialogues = []
|
|||
|
script_of_all_dialogues = []
|
|||
|
count =0
|
|||
|
for scene in tqdm(scenes[:]):
|
|||
|
|
|||
|
for i,line in enumerate(scene):
|
|||
|
if i == 0:
|
|||
|
continue
|
|||
|
if type(line)==type(""):
|
|||
|
#print("here")
|
|||
|
if count==0:
|
|||
|
#print(line)
|
|||
|
non_dial_src_lang = language_detector(line)
|
|||
|
non_dial_src_script=script_det(line)
|
|||
|
count+=1
|
|||
|
#print("non_dial_src_lang", non_dial_src_lang)
|
|||
|
else:
|
|||
|
#print("line", line)
|
|||
|
[speaker] = line.keys()
|
|||
|
#print([speaker])
|
|||
|
if speaker == 'Transition':
|
|||
|
continue
|
|||
|
|
|||
|
#print("dial", line[speaker][2])
|
|||
|
dial_src_lang = language_detector(line[speaker][2])
|
|||
|
language_of_all_dialogues.append(dial_src_lang)
|
|||
|
script_of_all_dialogues.append(script_det(line[speaker][2]))
|
|||
|
|
|||
|
# print(non_dial_src_lang)
|
|||
|
# print(language_of_all_dialogues)
|
|||
|
# print(script_of_all_dialogues)
|
|||
|
|
|||
|
A, B, C = A_B_C(language_of_all_dialogues, non_dial_src_lang)
|
|||
|
dial_src_script = mode(script_of_all_dialogues)
|
|||
|
|
|||
|
|
|||
|
word_lang_with_actionline = word_with_actionline(scenes, A, non_dial_src_lang)
|
|||
|
#print(word_lang_with_actionline)
|
|||
|
|
|||
|
word_lang_with_other = word_with_other(scenes, A, non_dial_src_lang)
|
|||
|
#print(word_lang_with_other)
|
|||
|
|
|||
|
|
|||
|
print("actionline_lanuge", non_dial_src_lang)
|
|||
|
non_dial_src_lang = non_dial_src_lang
|
|||
|
|
|||
|
print("A = {} B = {} C = {}".format(A, B, C))
|
|||
|
print("dial_language", A)
|
|||
|
dial_src_lang = A
|
|||
|
|
|||
|
print("dial_src_script", dial_src_script)
|
|||
|
|
|||
|
# print("Steps in the process:")
|
|||
|
# print("")
|
|||
|
|
|||
|
if B>0:
|
|||
|
print("UI option3 - yes" )
|
|||
|
UI_option3 = "Yes"
|
|||
|
else:
|
|||
|
print("UI option3 - no" )
|
|||
|
UI_option3 = "No"
|
|||
|
|
|||
|
if C>0:
|
|||
|
print("UI option4 - yes" )
|
|||
|
UI_option4 = "Yes"
|
|||
|
else:
|
|||
|
print("UI option4 - no" )
|
|||
|
UI_option4 = "No"
|
|||
|
|
|||
|
if word_lang_with_actionline=="True":
|
|||
|
print("UI option5 - Yes")
|
|||
|
UI_option5 = "Yes"
|
|||
|
else:
|
|||
|
print("UI_option5 - NO")
|
|||
|
UI_option5 = "No"
|
|||
|
|
|||
|
if word_lang_with_other=="True":
|
|||
|
print("UI option6 - Yes")
|
|||
|
UI_option6 = "Yes"
|
|||
|
else:
|
|||
|
print("UI option6 - No")
|
|||
|
UI_option6 = "No"
|
|||
|
|
|||
|
return [non_dial_src_lang, dial_src_lang, dial_src_script,non_dial_src_script, UI_option3, UI_option4, UI_option5, UI_option6]
|
|||
|
|
|||
|
# filename1 = sys.argv[1]
|
|||
|
# getInputs(filename1)
|
|||
|
|
|||
|
|