Conversion_Kitchen_Code/kitchen_counter/conversion/translation/detection_w.py

from google.cloud import translate_v2 as Translate
from google.cloud import translate
from MNF.settings import BasePath
from .script_writing import default_script
from .script_detector import script_cat
from statistics import mode
from collections import Counter
# import textract
from tqdm import tqdm
import sys
import re
import os
from .script_reading import getRefined, getSlugAndNonSlug, getSpeakers, getScenes 

basePath = BasePath()

# -> Google Translation API Credentials
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = rf"{basePath}/MNF/json_keys/authentication.json"
translate_client = Translate.Client()
client = translate.TranslationServiceClient()


# -> For Detecting language of any text
def language_detector(text):
    print(text,"sentence recieved")
    result = translate_client.detect_language(text)
    # print(result)
    if result['language'] == "hi-Latn":
        return 'hi'
    else:
        return result['language']


# -> For Detecting Script of any text
def script_det(text):
    punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~'''
    no_punct = ""
    for char in text:
        if char not in punctuations:
            no_punct = char
            break
    script = script_cat(no_punct)[0]
    return script


'''
A. Language of Highest number of full dialogues, 
B. Numbers of dialogues in action line language, 
C. Number of dialogues in other languages) 
'''


# -> For Detecting presence of different languages in dialogues (whole sentences)
def A_B_C(dialogue_language, non_dial_src_lang):
    print("line 316:dialogue_language", dialogue_language)
    dict1 = dict(Counter(dialogue_language))
    print("line 319:dict1", dict1)
    sorted_values = sorted(dict1.values(), reverse=True)  # Sort the values
    print("line 321:sorted_values:", sorted_values)
    sorted_dict = {}
    for i in sorted_values:
        for k in dict1.keys():
            if dict1[k] == i:
                sorted_dict[k] = dict1[k]
    sources = list(sorted_dict.keys())
    print("line 328: sources: ", sources)
    A = sources[0]
    print("Most Prominent Dialogue Language", A)
    if len(sources) == 1:
        B = 0
        C = 0
    elif non_dial_src_lang not in sources:
        B = 0
        C = sum(sorted_values[1:])
    else:
        if A == non_dial_src_lang:
            B = 0
        else:
            B = sorted_values[sources.index(non_dial_src_lang)]
        C = sum(sorted_values[1:]) - B
    return A, B, C


# -> Detection of Different Lanugages and Scripts in Script
def dial_each_word_lang1(non_dial_src_lang, dial):
    for word in dial.split():
        if language_detector(word) == non_dial_src_lang:
            return "True"
    return "False"


# -> Detection of Different Lanugages and Scripts in Script
def dial_each_word_lang2(non_dial_src_lang, A, dial):
    for word in dial.split():
        if (language_detector(word) != non_dial_src_lang) or (language_detector(word) != A):
            return "True"
    return "False"


# -> Detection of words in lines with different languages
def word_with_actionline_other_lang(scenes, A, non_dial_src_lang):
    dials_with_actionline_langs = 0
    dials_with_other_langs = 0
    sceneno = 0
    actionline_lang_output = "False"
    other_lang_output = "False"
    ignore_actionline_match = "False"
    if A == non_dial_src_lang:
        ignore_actionline_match = "True"
    for scene in tqdm(scenes[:]):
        sceneno += 1
        for i, line in enumerate(scene):
            if i == 0:
                continue
            if isinstance(line, str):
                continue
            else:
                [speaker] = line.keys()
                if speaker == 'Transition':
                    continue
                dial_src_lang = language_detector(line[speaker][2])
                if actionline_lang_output == "False" or other_lang_output == "False":
                    print(
                        "Still Searching if Words of other langs are present or not...")
                    if dial_src_lang == A:
                        if actionline_lang_output != "True" and not ignore_actionline_match:
                            output = dial_each_word_lang1(
                                non_dial_src_lang, line[speaker][2])
                            if output == "True":
                                dials_with_actionline_langs += 1
                                if dials_with_actionline_langs > 5:
                                    actionline_lang_output = "True"
                        if other_lang_output != "True":
                            output = dial_each_word_lang2(
                                non_dial_src_lang, A, line[speaker][2])
                            if output == "True":
                                dials_with_other_langs += 1
                                if dials_with_other_langs > 5:
                                    other_lang_output = "True"
                else:
                    print("Found Presence of other Langs in Words")
                    return actionline_lang_output, other_lang_output

    return actionline_lang_output, other_lang_output


# -> Detection of Different Lanugages and Scripts in Script
def getInputs(filename1):
    print("Detecting Languages and Scripts present in Script")
    refined, total_scenes = getRefined(filename1)
    sluglines, without_slug = getSlugAndNonSlug(refined)
    characters = getSpeakers(without_slug)
    scenes, actionline, parenthetical_lis, speakers, dialogues = getScenes(
        refined, total_scenes, characters)
    print("line 520:scenes: ", scenes)
    language_of_all_dialogues = []
    script_of_all_dialogues = []
    count = 0
    length = len(scenes)

    if (length > 5):
        length = 5
        scenes = scenes[:length]

    for scene in tqdm(scenes[:length]):
        for i, line in enumerate(scene):
            if i == 0:
                continue
            if isinstance(line, str):
                if count == 0:
                    non_dial_src_lang = language_detector(line)
                    non_dial_src_script = script_det(line)
                    count += 1
                    print("Non Dialogue/Actionline Language:", non_dial_src_lang)
                    print("Non Dialogue/Actionline Script:", non_dial_src_script)
            else:
                [speaker] = line.keys()
                if speaker == 'Transition':
                    continue
                dial_src_lang = language_detector(line[speaker][2])
                language_of_all_dialogues.append(dial_src_lang)
                script_of_all_dialogues.append(script_det(line[speaker][2]))

    # -> For Detecting presence of different languages in dialogues (whole sentences)
    A, B, C = A_B_C(language_of_all_dialogues, non_dial_src_lang)
    totaldials = len(language_of_all_dialogues)
    dial_src_script = mode(script_of_all_dialogues)
    dial_src_lang = A
    one_step_process = "Yes" if dial_src_script == default_script[A] else "Can_not_say"
    # word_lang_with_actionline = word_with_actionline(scenes, A, non_dial_src_lang)
    # word_lang_with_other = word_with_other(scenes, A, non_dial_src_lang)
    # -> For Detecting presence of different languages in dialogues (words)
    word_lang_with_actionline, word_lang_with_other = word_with_actionline_other_lang(
        scenes, A, non_dial_src_lang)
    print("A = {} B = {} C = {}".format(A, B, C))
    print("dial_language", A)
    print("dial_src_script", dial_src_script)

    if round(B / totaldials, 2) > 0.15:
        print("UI option3 - yes")
        UI_option3 = "Yes"
    else:
        print("UI option3 - no")
        UI_option3 = "No"
    if round(C / totaldials, 2) > 0.20:
        print("UI option4 - yes")
        UI_option4 = "Yes"
    else:
        print("UI option4 - no")
        UI_option4 = "No"
    if word_lang_with_actionline == "True":
        print("UI option5 - Yes")
        UI_option5 = "Yes"
    else:
        print("UI_option5 - NO")
        UI_option5 = "No"
    print("checking other lang", word_lang_with_other)
    if word_lang_with_other == "True":
        print("UI option6 - Yes")
        UI_option6 = "Yes"
    else:
        print("UI option6 - No")
        UI_option6 = "No"

    print("*******************------------Detection------------***********************")
    print(UI_option3, UI_option4, UI_option5, UI_option6, non_dial_src_script)

    return [non_dial_src_lang, dial_src_lang, dial_src_script, non_dial_src_script, UI_option3, UI_option4, UI_option5,
            UI_option6]
first commit 2024-04-27 09:33:09 +00:00			`from google.cloud import translate_v2 as Translate`
			`from google.cloud import translate`
			`from MNF.settings import BasePath`
			`from .script_writing import default_script`
			`from .script_detector import script_cat`
			`from statistics import mode`
			`from collections import Counter`
			`# import textract`
			`from tqdm import tqdm`
			`import sys`
			`import re`
			`import os`
			`from .script_reading import getRefined, getSlugAndNonSlug, getSpeakers, getScenes`

			`basePath = BasePath()`

			`# -> Google Translation API Credentials`
			`os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = rf"{basePath}/MNF/json_keys/authentication.json"`
			`translate_client = Translate.Client()`
			`client = translate.TranslationServiceClient()`


			`# -> For Detecting language of any text`
			`def language_detector(text):`
			`print(text,"sentence recieved")`
			`result = translate_client.detect_language(text)`
			`# print(result)`
			`if result['language'] == "hi-Latn":`
			`return 'hi'`
			`else:`
			`return result['language']`


			`# -> For Detecting Script of any text`
			`def script_det(text):`
			`punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~'''`
			`no_punct = ""`
			`for char in text:`
			`if char not in punctuations:`
			`no_punct = char`
			`break`
			`script = script_cat(no_punct)[0]`
			`return script`


			`'''`
			`A. Language of Highest number of full dialogues,`
			`B. Numbers of dialogues in action line language,`
			`C. Number of dialogues in other languages)`
			`'''`


			`# -> For Detecting presence of different languages in dialogues (whole sentences)`
			`def A_B_C(dialogue_language, non_dial_src_lang):`
			`print("line 316:dialogue_language", dialogue_language)`
			`dict1 = dict(Counter(dialogue_language))`
			`print("line 319:dict1", dict1)`
			`sorted_values = sorted(dict1.values(), reverse=True) # Sort the values`
			`print("line 321:sorted_values:", sorted_values)`
			`sorted_dict = {}`
			`for i in sorted_values:`
			`for k in dict1.keys():`
			`if dict1[k] == i:`
			`sorted_dict[k] = dict1[k]`
			`sources = list(sorted_dict.keys())`
			`print("line 328: sources: ", sources)`
			`A = sources[0]`
			`print("Most Prominent Dialogue Language", A)`
			`if len(sources) == 1:`
			`B = 0`
			`C = 0`
			`elif non_dial_src_lang not in sources:`
			`B = 0`
			`C = sum(sorted_values[1:])`
			`else:`
			`if A == non_dial_src_lang:`
			`B = 0`
			`else:`
			`B = sorted_values[sources.index(non_dial_src_lang)]`
			`C = sum(sorted_values[1:]) - B`
			`return A, B, C`


			`# -> Detection of Different Lanugages and Scripts in Script`
			`def dial_each_word_lang1(non_dial_src_lang, dial):`
			`for word in dial.split():`
			`if language_detector(word) == non_dial_src_lang:`
			`return "True"`
			`return "False"`


			`# -> Detection of Different Lanugages and Scripts in Script`
			`def dial_each_word_lang2(non_dial_src_lang, A, dial):`
			`for word in dial.split():`
			`if (language_detector(word) != non_dial_src_lang) or (language_detector(word) != A):`
			`return "True"`
			`return "False"`


			`# -> Detection of words in lines with different languages`
			`def word_with_actionline_other_lang(scenes, A, non_dial_src_lang):`
			`dials_with_actionline_langs = 0`
			`dials_with_other_langs = 0`
			`sceneno = 0`
			`actionline_lang_output = "False"`
			`other_lang_output = "False"`
			`ignore_actionline_match = "False"`
			`if A == non_dial_src_lang:`
			`ignore_actionline_match = "True"`
			`for scene in tqdm(scenes[:]):`
			`sceneno += 1`
			`for i, line in enumerate(scene):`
			`if i == 0:`
			`continue`
			`if isinstance(line, str):`
			`continue`
			`else:`
			`[speaker] = line.keys()`
			`if speaker == 'Transition':`
			`continue`
			`dial_src_lang = language_detector(line[speaker][2])`
			`if actionline_lang_output == "False" or other_lang_output == "False":`
			`print(`
			`"Still Searching if Words of other langs are present or not...")`
			`if dial_src_lang == A:`
			`if actionline_lang_output != "True" and not ignore_actionline_match:`
			`output = dial_each_word_lang1(`
			`non_dial_src_lang, line[speaker][2])`
			`if output == "True":`
			`dials_with_actionline_langs += 1`
			`if dials_with_actionline_langs > 5:`
			`actionline_lang_output = "True"`
			`if other_lang_output != "True":`
			`output = dial_each_word_lang2(`
			`non_dial_src_lang, A, line[speaker][2])`
			`if output == "True":`
			`dials_with_other_langs += 1`
			`if dials_with_other_langs > 5:`
			`other_lang_output = "True"`
			`else:`
			`print("Found Presence of other Langs in Words")`
			`return actionline_lang_output, other_lang_output`

			`return actionline_lang_output, other_lang_output`


			`# -> Detection of Different Lanugages and Scripts in Script`
			`def getInputs(filename1):`
			`print("Detecting Languages and Scripts present in Script")`
			`refined, total_scenes = getRefined(filename1)`
			`sluglines, without_slug = getSlugAndNonSlug(refined)`
			`characters = getSpeakers(without_slug)`
			`scenes, actionline, parenthetical_lis, speakers, dialogues = getScenes(`
			`refined, total_scenes, characters)`
			`print("line 520:scenes: ", scenes)`
			`language_of_all_dialogues = []`
			`script_of_all_dialogues = []`
			`count = 0`
			`length = len(scenes)`

			`if (length > 5):`
			`length = 5`
			`scenes = scenes[:length]`

			`for scene in tqdm(scenes[:length]):`
			`for i, line in enumerate(scene):`
			`if i == 0:`
			`continue`
			`if isinstance(line, str):`
			`if count == 0:`
			`non_dial_src_lang = language_detector(line)`
			`non_dial_src_script = script_det(line)`
			`count += 1`
			`print("Non Dialogue/Actionline Language:", non_dial_src_lang)`
			`print("Non Dialogue/Actionline Script:", non_dial_src_script)`
			`else:`
			`[speaker] = line.keys()`
			`if speaker == 'Transition':`
			`continue`
			`dial_src_lang = language_detector(line[speaker][2])`
			`language_of_all_dialogues.append(dial_src_lang)`
			`script_of_all_dialogues.append(script_det(line[speaker][2]))`

			`# -> For Detecting presence of different languages in dialogues (whole sentences)`
			`A, B, C = A_B_C(language_of_all_dialogues, non_dial_src_lang)`
			`totaldials = len(language_of_all_dialogues)`
			`dial_src_script = mode(script_of_all_dialogues)`
			`dial_src_lang = A`
			`one_step_process = "Yes" if dial_src_script == default_script[A] else "Can_not_say"`
			`# word_lang_with_actionline = word_with_actionline(scenes, A, non_dial_src_lang)`
			`# word_lang_with_other = word_with_other(scenes, A, non_dial_src_lang)`
			`# -> For Detecting presence of different languages in dialogues (words)`
			`word_lang_with_actionline, word_lang_with_other = word_with_actionline_other_lang(`
			`scenes, A, non_dial_src_lang)`
			`print("A = {} B = {} C = {}".format(A, B, C))`
			`print("dial_language", A)`
			`print("dial_src_script", dial_src_script)`

			`if round(B / totaldials, 2) > 0.15:`
			`print("UI option3 - yes")`
			`UI_option3 = "Yes"`
			`else:`
			`print("UI option3 - no")`
			`UI_option3 = "No"`
			`if round(C / totaldials, 2) > 0.20:`
			`print("UI option4 - yes")`
			`UI_option4 = "Yes"`
			`else:`
			`print("UI option4 - no")`
			`UI_option4 = "No"`
			`if word_lang_with_actionline == "True":`
			`print("UI option5 - Yes")`
			`UI_option5 = "Yes"`
			`else:`
			`print("UI_option5 - NO")`
			`UI_option5 = "No"`
			`print("checking other lang", word_lang_with_other)`
			`if word_lang_with_other == "True":`
			`print("UI option6 - Yes")`
			`UI_option6 = "Yes"`
			`else:`
			`print("UI option6 - No")`
			`UI_option6 = "No"`

			`print("*****************------------Detection------------*********************")`
			`print(UI_option3, UI_option4, UI_option5, UI_option6, non_dial_src_script)`

			`return [non_dial_src_lang, dial_src_lang, dial_src_script, non_dial_src_script, UI_option3, UI_option4, UI_option5,`
			`UI_option6]`