Conversion_Kitchen_Code/kitchen_counter/conversion/translation/detection5april.py

from google.cloud import translate_v2 as Translate
from google.cloud import translate
from MNF.settings import BasePath
from narration.vectorcode.code.functions import ScriptBreakdown
from .translation.script_writing import default_script
from .translation.script_detector import script_cat
from statistics import mode
from collections import Counter
from docx.enum.table import WD_TABLE_ALIGNMENT, WD_ALIGN_VERTICAL
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.shared import Inches, Cm, Pt
# import textract
from tqdm import tqdm
import sys
import re
import docx
import os
doc = docx.Document()


doc = docx.Document()
basePath = BasePath()


# google
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = rf"{basePath}/conversion/My First Project-2573112d5326.json"
# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = rf"{basePath}/conversion/gifted-mountain-318504-4f001d5f08db.json"
translate_client = Translate.Client()
client = translate.TranslationServiceClient()
project_id = 'authentic-bongo-272808'
location = "global"
parent = f"projects/{project_id}/locations/{location}"


slug_pattern = r'[\d]*[.]?[\s]*[IE][NX]T'
pat = r'[\d]*[\s]*[IE]/[IE][.]?'
transitions = ['CUT TO:', 'FADE IN:', 'FADE OUT:', 'DISSOLVE TO:', 'JUMP CUT TO:',
               'JUMP TO:', 'CUT BACK TO:', 'INTERCUT WITH:', 'I/C WITH:', 'BACK TO:', 'INTERVAL']
reserved_words = ['MONTAGE', 'PBS', 'FADE',
                  'FADE', 'TITLE', 'SPLIT', 'SCREEN', 'CUT']
style = doc.styles['Normal']
font = style.font
font.name = 'Courier New'
font.size = Pt(12)


def breaksen(s):
    l = []
    # if len(s.split())<=256:
    if len(s.split()) <= 256:
        l.append(s)
    else:
        n = len(s.split())
        for i in range(n//32 + 1):
            l.append(" ".join(s.split()[32*i:32*(i+1)]))
    return l


# def getRefined(filename1):
#     print("get_refined_called")
#     total_scenes = 0
#     text = textract.process(filename1, encoding="utf8", errors='ignore')
#     filename = rf"{basePath}/conversion/translation/file.txt"
#     f = open(filename, 'wb')
#     f.write(text)
#     f.close()
#     dialog_coming = False
#     f = open(filename, 'r',  encoding="utf8", errors='ignore')
#     doc11 = f.read()

#     f.close()
#     f1 = open(rf"{basePath}/conversion/translation/file1.txt",
#               'w', encoding="utf8", errors='ignore')
#     c = 0
#     flag = False
#     print("Slugline")
#     for line in doc11.split("\n"):
#         line = line.strip()
#         print("line 427:", line)
#         if (line.strip().startswith(('INT.', 'INT ')) or
#             line.strip().startswith(('I/E', 'E/I')) or
#             line.strip().startswith(('EXT.', 'EXT ')) or
#             line.strip().startswith('EXT/INT') or
#             line.strip().startswith('INT/EXT') or
#                 re.match(slug_pattern, line.strip()) or re.match(pat, line.strip())) and not (line.strip().startswith(('INTERCUT', 'INTERMISSION', 'INTERVAL'))):

#             flag = True
#             f1.write(line)
#             f1.write('\n')
#             continue
#         else:
#             print("line 96: else loop", line)
#             #line = line.strip()
#             if flag:
#                 print("line 99: if loop:", line)
#                 if line.strip() == '\n':
#                     continue
#                 if dialog_coming and (line == '\n' or line.strip() == ""):
#                     print("line empty or just have newline", line)
#                     continue
#                 if dialog_coming:
#                     print("line 101 probable dialog or PC: ", line)
#                     f1.write(line)
#                     f1.write('\n')
#                     if re.match(r"\(.*\)", line):

#                         continue
#                     else:
#                         print(" line 207: else of PCs", line)
#                         dialog_coming = False
#                         print(" line 457 dialog over")
#                         continue
#                     continue
#                 # if line.isupper() and re.fullmatch(r"([A-Z'’]*[.]*[\s]*[-]*)*([#]*[\s]*[1-9])*(\(.*\))*",line.strip()):
#                 if line.isupper() and (re.fullmatch(r"([A-Z'’]+[\s]*[-]*)*([#]*[\s]*[1-9])*(\(.*\))*", line.strip()) or re.fullmatch(r"(MRS?|DR|ER|PHD|ESQ|HON|JR|MS|MESSRS|MMES|MSGR|PROF|REV|RT. HON|SR|ST)\. [A-Z]+", line.strip())):
#                     print("line 111: May be speaker: ", line)
#                     f1.write(line)
#                     f1.write('\n')
#                     dialog_coming = True
#                     continue

#                 if not line == '\n':
#                     print(
#                         "470 probably action or something else so just write it", line)
#                     f1.write(line)
#                     f1.write('\n')

#     f1.close()
#     print("line 132 file closed")
#     filename1 = rf"{basePath}/conversion/translation/file1.txt"
#     # file.txt contains the data of file1.txt  , no usage as of now may be change the mame of the file.txt to file1.txt
#     text = textract.process(filename1, encoding="utf8", errors='ignore')
#     print("line 136: ", text)
#     filename = rf"{basePath}/conversion/translation/file.txt"
#     _, file_extension = os.path.splitext(filename1)
#     f = open(filename, 'wb')
#     f.write(text)
#     f.close()

#     with open(filename, "r") as input:
#         input_ = input.read().split('\n\n')

#     refined = []

#     for line in input_:
#         refined.append(line.strip())
#     refined = list(filter(lambda a: a != "", refined))
#     #print("processing the script")

#     for i in range(len(refined)):
#         if not (refined[i].strip().startswith(('INT.', 'INT ')) or refined[i].strip().startswith(('EXT.', 'EXT ')) or refined[i].strip().startswith('EXT/INT') or refined[i].strip().startswith('INT/EXT') or refined[i].strip().startswith(('I/E', 'E/I')) or re.match(slug_pattern, refined[i].strip()) or re.match(pat, refined[i].strip())):
#             total_scenes = total_scenes + 1
#             continue
#         refined = refined[i:]
#         break
#     # refined.append(line.strip())

#     refined = list(filter(lambda a: a != "", refined))
#     print("line 163:Refined", refined)
#     return refined, total_scenes


# def getSlugAndNonSlug(refined):
#     sluglines = []
#     without_slug = []
#     for para in refined:
#         para = para.strip()
#         if para.strip().startswith(('INT.', 'INT')) or para.strip().startswith(('EXT.', 'EXT')) or para.strip().startswith('EXT/INT') or para.strip().startswith(('I/E', 'E/I')) or para.strip().startswith('INT/EXT') or re.match(slug_pattern, para.strip()) or re.match(pat, para.strip()):
#             sluglines.append(para)
#             continue
#         without_slug.append(para)
#     return sluglines, without_slug


# def getSpeakers(without_slug):
#     characters = []
#     for para in without_slug:
#         lis = para.split('\n')
#         i = 0
#         for item in lis:
#             i = i+1
#             i = min(i, len(lis)-2)
#             if item.isupper() and not(lis[i+1].strip() == ""):
#                 if re.match(r"[A-Z'’]+[\s]*[-]*[A-Z'’]*([#]*[\s]*[1-9])*(\(.*\))*", item):
#                     tem = item.split("(")[0].strip()
#                     characters.append(tem.strip())
#                 else:
#                     continue

#     characters = list(set(characters))
#     characters = list(filter(lambda x: len(x) > 0, characters))
#     characters = [character for character in characters if set(
#         character.split(" ")).intersection(reserved_words) == set()]
#     return characters


# def getScenes(refined, total_scenes, characters):
#     # To find scenes data structure and prev and next scenes numbers
#     i = 0
#     scene = []
#     dialogues = []
#     speakers = []
#     slugline_dic = {}
#     prev_dial_speaker = ""
#     next_dial_speaker = ""
#     pc = 0
#     scene_no = 0
#     actionline = []
#     successor_scene_no = 0
#     predecessor_scene_no = 0
#     parenthetical_lis = []

#     scenes = []
#     speaker = ""
#     parenthetical = 'NONE'
#     patttern = r'[\d]*[.]?[\s]*[IE][NX]T'
#     for line in refined:
#         if line.strip().startswith(('INT.', 'INT')) or line.strip().startswith(('EXT.', 'EXT')) or line.strip().startswith('EXT/INT') or line.strip().startswith('INT/EXT') or line.strip().startswith(('I/E', 'E/I')) or re.match(slug_pattern, line.strip()) or re.match(pat, line.strip()):
#             scenes.append(scene)
#             scene = []
#             i = 0
#             scene_no += 1
#             scene.append(line)
#             slugline_dic[scene_no] = line.split("\n")[0].strip('0123456789.- ')

#         else:
#             lis = line.split("\n")
#             lis = [l.strip() for l in lis]
#             print(" \n Line 222 probable dialogue list", lis)
#             word = lis[0]
#             if word.split('(')[0].strip() in characters:
#                 mydic = {}
#                 prev_dial_speaker = speaker
#                 speaker = word.split('(')[0].strip()
#                 print("Speaker 228", speaker)
#                 if len(lis) > 1 and re.match(r"\(.*\)", lis[1]):
#                     pc = pc+1
#                     parenthetical = lis[1]
#                     parenthetical = parenthetical.replace("\n", "")
#                     dia = ' '.join(lis[2:])
#                     dia = dia.replace("\n", "")
#                     # renu
#                     dia = dia.replace("\"", '')

#                 else:
#                     dia = ''.join(lis[1:])
#                     dia = dia.replace("\n", "")
#                     dia = dia.replace("\"", '')
#                     print("  length dia\n", len(dia))
#                 if not (len(dia) == 0 and parenthetical == "NONE"):
#                     print(" len dia != and Parenthetical == NONE: 384 ")

#                     if i-1 >= 0:
#                         try:
#                             prev = main_lis[scene_no-1][i-1]
#                         except:
#                             prev = ""
#                     else:
#                         prev = ""
#                     try:
#                         next = main_lis[scene_no-1][i+1]
#                     except:
#                         next = ""
#                     # prev is previous speaker and next is next speaker of the dialogue
#                     mydic[speaker] = [parenthetical,
#                                       scene_no, dia, len(dia), prev, next]
#                     print("line 259", mydic)
#                     #print("mydic  260", speaker, mydic[speaker])
#                     prev, next = "", ""
#                     i = i+1
#                     speakers.append(speaker)
#                     parenthetical_lis.append(parenthetical)
#                     dialogues.append(mydic)
#                     scene.append(mydic)
#                 parenthetical = "NONE"
#             else:
#                 line = line.replace("\n", " ")
#                 line = ' '.join(line.split())
#                 if line.strip() in transitions:
#                     scene.append({'Transition': line.strip()})
#                     continue
#                 actionline.append(line)
#                 scene.append(line.strip())

#     scenes.append(scene)
#     speakers = list(set(speakers))
#     scenes = scenes[1:]
#     s = []
#     for scene in scenes:
#         s1 = []
#         for ele in scene:
#             if type(ele) == type(""):
#                 s1.extend(ele.split("\n"))
#             else:
#                 s1.append(ele)
#         s.append(s1)
#     print("dialogue: ", dialogues)
#     return s, actionline, parenthetical_lis, speakers, dialogues


def language_detector(text):
    result = translate_client.translate(text, target_language='hi')
    det_lang = result["detectedSourceLanguage"]
    return det_lang


def script_det(text):
    punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~'''
    no_punct = ""
    for char in text:
        if char not in punctuations:
            no_punct = char
            break
    script = script_cat(no_punct)[0]
    return script


'''
A. Language of Highest number of full dialogues,
B. Numbers of dialogues in action line language,
C. Number of dialogues in other languages)
'''


def A_B_C(dialogue_language, non_dial_src_lang):
    print("line 316:dialogue_language", dialogue_language)
    print(non_dial_src_lang)
    dict1 = dict(Counter(dialogue_language))
    print("line 319:dict1", dict1)
    sorted_values = sorted(dict1.values(), reverse=True)  # Sort the values
    print("line 321:sorted_values:", sorted_values)
    sorted_dict = {}
    for i in sorted_values:
        for k in dict1.keys():
            if dict1[k] == i:
                sorted_dict[k] = dict1[k]
    sources = list(sorted_dict.keys())
    print("line 328: sources: ", sources)
    A = sources[0]

    if len(sources) == 1:
        B = 0
        C = 0
    elif non_dial_src_lang not in sources:
        B = 0
        C = sum(sorted_values[1:])
    else:
        if A == non_dial_src_lang:
            B = 0
        else:
            B = sorted_values[sources.index(non_dial_src_lang)]
        C = sum(sorted_values[2:])
    return A, B, C


def dial_each_word_lang1(non_dial_src_lang, dial):
    for word in dial.split():
        if language_detector(word) == non_dial_src_lang:
            #print("word", word)
            return "True"
    return "False"


def dial_each_word_lang2(non_dial_src_lang, A, dial):
    for word in dial.split():
        if (language_detector(word) != non_dial_src_lang) or (language_detector(word) != A):
            #print("in 4")
            #print("word", word)
            return "True"
    return "False"


def word_with_actionline(scenes, A, non_dial_src_lang):
    word_lang_with_actionline = "False"
    if A == non_dial_src_lang:
        return "False"
    for scene in tqdm(scenes[:]):
        for i, line in enumerate(scene):
            if i == 0:
                continue
            if type(line) == type(""):
                continue

            else:
                [speaker] = line.keys()
                if speaker == 'Transition':
                    continue
                dial_src_lang = language_detector(line[speaker][2])
                if dial_src_lang == A:
                    word_lang_with_actionline = dial_each_word_lang1(
                        non_dial_src_lang, line[speaker][2])
                if word_lang_with_actionline == "True":
                    return word_lang_with_actionline


def word_with_other(scenes, A, non_dial_src_lang):
    word_lang_with_other = "False"
    for scene in tqdm(scenes[:]):
        for i, line in enumerate(scene):
            if i == 0:
                continue
            if type(line) == type(""):
                continue

            else:
                [speaker] = line.keys()
                if speaker == 'Transition':
                    continue
                dial_src_lang = language_detector(line[speaker][2])
                if dial_src_lang == A:
                    word_lang_with_other = dial_each_word_lang2(
                        non_dial_src_lang, A, line[speaker][2])
                if word_lang_with_other == "True":
                    return word_lang_with_other


def getInputs(filename1):

    refined, total_scenes = ScriptBreakdown().getRefined(filename1)
    sluglines, without_slug = ScriptBreakdown().getSlugAndNonSlug(refined)
    characters = ScriptBreakdown().getSpeakers(without_slug)
    scenes, actionline, parenthetical_lis, speakers, dialogues = ScriptBreakdown().getScenes(
        refined, total_scenes, characters)
    print("line 405:scenes: ", scenes)

    language_of_all_dialogues = []
    script_of_all_dialogues = []
    count = 0
    length = len(scenes)
    if(length > 10):
        length = 10
    for scene in tqdm(scenes[:length]):

        for i, line in enumerate(scene):
            if i == 0:
                continue
            if type(line) == type(""):
                # print("here")
                if count == 0:
                    # print(line)
                    non_dial_src_lang = language_detector(line)
                    non_dial_src_script=script_det(line)
                    count += 1
                    #print("non_dial_src_lang", non_dial_src_lang)
            else:
                #print("line", line)
                [speaker] = line.keys()
                # print([speaker])
                if speaker == 'Transition':
                    continue

                #print("dial", line[speaker][2])
                dial_src_lang = language_detector(line[speaker][2])
                print("dial_src_lang:line 430:", dial_src_lang)
                language_of_all_dialogues.append(dial_src_lang)
                script_of_all_dialogues.append(script_det(line[speaker][2]))

    # print(non_dial_src_lang)
    print(language_of_all_dialogues)
    # print(script_of_all_dialogues)
    A, B, C = A_B_C(language_of_all_dialogues, non_dial_src_lang)
    dial_src_script = mode(script_of_all_dialogues)

    one_step_process = "Yes" if dial_src_script == default_script[A] else "Can_not_say"
    print("one_step_process", one_step_process)
    if B > 0:
        print("UI option3 - yes")
        UI_option3 = "Yes"
    else:
        print("UI option3 - no")
        UI_option3 = "No"

    if C > 0:
        print("UI option4 - yes")
        UI_option4 = "Yes"
    else:
        print("UI option4 - no")
        UI_option4 = "No"
    dial_src_lang = A
    return [non_dial_src_lang, dial_src_lang, dial_src_script, non_dial_src_script,  UI_option3, UI_option4, "Yes", "Yes"]

    word_lang_with_actionline = word_with_actionline(
        scenes, A, non_dial_src_lang)
    # print(word_lang_with_actionline)

    word_lang_with_other = word_with_other(scenes, A, non_dial_src_lang)
    # print(word_lang_with_other)

    print("actionline_lanuge", non_dial_src_lang)
    non_dial_src_lang = non_dial_src_lang

    print("A = {} B = {} C = {}".format(A, B, C))
    print("dial_language", A)
    dial_src_lang = A

    print("dial_src_script", dial_src_script)

    # print("Steps in the process:")
    # print("")

    if B > 0:
        print("UI option3 - yes")
        UI_option3 = "Yes"
    else:
        print("UI option3 - no")
        UI_option3 = "No"

    if C > 0:
        print("UI option4 - yes")
        UI_option4 = "Yes"
    else:
        print("UI option4 - no")
        UI_option4 = "No"

    if word_lang_with_actionline == "True":
        print("UI option5 - Yes")
        UI_option5 = "Yes"
    else:
        print("UI_option5 - NO")
        UI_option5 = "No"

    if word_lang_with_other == "True":
        print("UI option6 - Yes")
        UI_option6 = "Yes"
    else:
        print("UI option6 - No")
        UI_option6 = "No"

    return [non_dial_src_lang, dial_src_lang, dial_src_script, one_step_process, UI_option3, UI_option4, UI_option5, UI_option6]

# filename1 = sys.argv[1]
# getInputs(filename1)