from google.cloud import translate_v2 as Translate from google.cloud import translate from MNF.settings import BasePath from .script_writing import default_script from .script_detector import script_cat from statistics import mode from collections import Counter # import textract from tqdm import tqdm import sys import re import os from .script_reading import getRefined, getSlugAndNonSlug, getSpeakers, getScenes basePath = BasePath() # -> Google Translation API Credentials os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = rf"{basePath}/MNF/json_keys/authentication.json" translate_client = Translate.Client() client = translate.TranslationServiceClient() # -> For Detecting language of any text def language_detector(text): print(text,"sentence recieved") result = translate_client.detect_language(text) # print(result) if result['language'] == "hi-Latn": return 'hi' else: return result['language'] # -> For Detecting Script of any text def script_det(text): punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~''' no_punct = "" for char in text: if char not in punctuations: no_punct = char break script = script_cat(no_punct)[0] return script ''' A. Language of Highest number of full dialogues, B. Numbers of dialogues in action line language, C. Number of dialogues in other languages) ''' # -> For Detecting presence of different languages in dialogues (whole sentences) def A_B_C(dialogue_language, non_dial_src_lang): print("line 316:dialogue_language", dialogue_language) dict1 = dict(Counter(dialogue_language)) print("line 319:dict1", dict1) sorted_values = sorted(dict1.values(), reverse=True) # Sort the values print("line 321:sorted_values:", sorted_values) sorted_dict = {} for i in sorted_values: for k in dict1.keys(): if dict1[k] == i: sorted_dict[k] = dict1[k] sources = list(sorted_dict.keys()) print("line 328: sources: ", sources) A = sources[0] print("Most Prominent Dialogue Language", A) if len(sources) == 1: B = 0 C = 0 elif non_dial_src_lang not in sources: B = 0 C = sum(sorted_values[1:]) else: if A == non_dial_src_lang: B = 0 else: B = sorted_values[sources.index(non_dial_src_lang)] C = sum(sorted_values[1:]) - B return A, B, C # -> Detection of Different Lanugages and Scripts in Script def dial_each_word_lang1(non_dial_src_lang, dial): for word in dial.split(): if language_detector(word) == non_dial_src_lang: return "True" return "False" # -> Detection of Different Lanugages and Scripts in Script def dial_each_word_lang2(non_dial_src_lang, A, dial): for word in dial.split(): if (language_detector(word) != non_dial_src_lang) or (language_detector(word) != A): return "True" return "False" # -> Detection of words in lines with different languages def word_with_actionline_other_lang(scenes, A, non_dial_src_lang): dials_with_actionline_langs = 0 dials_with_other_langs = 0 sceneno = 0 actionline_lang_output = "False" other_lang_output = "False" ignore_actionline_match = "False" if A == non_dial_src_lang: ignore_actionline_match = "True" for scene in tqdm(scenes[:]): sceneno += 1 for i, line in enumerate(scene): if i == 0: continue if isinstance(line, str): continue else: [speaker] = line.keys() if speaker == 'Transition': continue dial_src_lang = language_detector(line[speaker][2]) if actionline_lang_output == "False" or other_lang_output == "False": print( "Still Searching if Words of other langs are present or not...") if dial_src_lang == A: if actionline_lang_output != "True" and not ignore_actionline_match: output = dial_each_word_lang1( non_dial_src_lang, line[speaker][2]) if output == "True": dials_with_actionline_langs += 1 if dials_with_actionline_langs > 5: actionline_lang_output = "True" if other_lang_output != "True": output = dial_each_word_lang2( non_dial_src_lang, A, line[speaker][2]) if output == "True": dials_with_other_langs += 1 if dials_with_other_langs > 5: other_lang_output = "True" else: print("Found Presence of other Langs in Words") return actionline_lang_output, other_lang_output return actionline_lang_output, other_lang_output # -> Detection of Different Lanugages and Scripts in Script def getInputs(filename1): print("Detecting Languages and Scripts present in Script") refined, total_scenes = getRefined(filename1) sluglines, without_slug = getSlugAndNonSlug(refined) characters = getSpeakers(without_slug) scenes, actionline, parenthetical_lis, speakers, dialogues = getScenes( refined, total_scenes, characters) print("line 520:scenes: ", scenes) language_of_all_dialogues = [] script_of_all_dialogues = [] count = 0 length = len(scenes) if (length > 5): length = 5 scenes = scenes[:length] for scene in tqdm(scenes[:length]): for i, line in enumerate(scene): if i == 0: continue if isinstance(line, str): if count == 0: non_dial_src_lang = language_detector(line) non_dial_src_script = script_det(line) count += 1 print("Non Dialogue/Actionline Language:", non_dial_src_lang) print("Non Dialogue/Actionline Script:", non_dial_src_script) else: [speaker] = line.keys() if speaker == 'Transition': continue dial_src_lang = language_detector(line[speaker][2]) language_of_all_dialogues.append(dial_src_lang) script_of_all_dialogues.append(script_det(line[speaker][2])) # -> For Detecting presence of different languages in dialogues (whole sentences) A, B, C = A_B_C(language_of_all_dialogues, non_dial_src_lang) totaldials = len(language_of_all_dialogues) dial_src_script = mode(script_of_all_dialogues) dial_src_lang = A one_step_process = "Yes" if dial_src_script == default_script[A] else "Can_not_say" # word_lang_with_actionline = word_with_actionline(scenes, A, non_dial_src_lang) # word_lang_with_other = word_with_other(scenes, A, non_dial_src_lang) # -> For Detecting presence of different languages in dialogues (words) word_lang_with_actionline, word_lang_with_other = word_with_actionline_other_lang( scenes, A, non_dial_src_lang) print("A = {} B = {} C = {}".format(A, B, C)) print("dial_language", A) print("dial_src_script", dial_src_script) if round(B / totaldials, 2) > 0.15: print("UI option3 - yes") UI_option3 = "Yes" else: print("UI option3 - no") UI_option3 = "No" if round(C / totaldials, 2) > 0.20: print("UI option4 - yes") UI_option4 = "Yes" else: print("UI option4 - no") UI_option4 = "No" if word_lang_with_actionline == "True": print("UI option5 - Yes") UI_option5 = "Yes" else: print("UI_option5 - NO") UI_option5 = "No" print("checking other lang", word_lang_with_other) if word_lang_with_other == "True": print("UI option6 - Yes") UI_option6 = "Yes" else: print("UI option6 - No") UI_option6 = "No" print("*******************------------Detection------------***********************") print(UI_option3, UI_option4, UI_option5, UI_option6, non_dial_src_script) return [non_dial_src_lang, dial_src_lang, dial_src_script, non_dial_src_script, UI_option3, UI_option4, UI_option5, UI_option6]