Conversion_Kitchen_Code/kitchen_counter/conversion/subtitling/asad_test.py

from translation_metric import diff_score
import docx
from docx.shared import Inches, Cm, Pt
import os
import sys
import re
from script_detector import script_cat
from translation_metric import manual_diff_score, bleu_diff_score, gleu_diff_score, meteor_diff_score, rouge_diff_score, diff_score, critera4_5
from selection_source1 import selection_source, function5, function41, function311, function221, function2111, function11111, selection_source_transliteration, two_sources_two_outputs
from script_writing import default_script


def script_det(text):
    punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~“"”'''
    no_punct = ""
    for char in text:
        if char not in punctuations:
            no_punct = char
            break
    #print("alphabet", no_punct)
    script = script_cat(no_punct)[0]
    #print("script", script)
    return script


def punct_remover(string):
    # punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।“”'''
    punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।1234567890'''
    for x in string.lower():
        if x in punctuations:
            string = string.replace(x, " ")
    return string


def word_transliterate(sentence, dest_script):
    return sentence


def final_out(output1, output2, output3, dest_lang):
    temp_output1 = punct_remover(output1)
    temp_output2 = punct_remover(output2)
    temp_output3 = punct_remover(output3)
    # for word in regexp_tokenize(output1, "[\w']+")

    for word in temp_output1.split():
        if script_det(word) != default_script[dest_lang]:
            for word in temp_output2.split():
                if script_det(word) != default_script[dest_lang]:
                    for word in temp_output3.split():
                        if script_det(word) != default_script[dest_lang]:
                            # print("in3")
                            output1 = word_transliterate(
                                output1, default_script[dest_lang])
                            return output1
                    return output3
            return output2
    return output1

basepath = "/home/user/mnf/project/MNF/conversion/subtitling"
doc_file= basepath
doc2 = docx.Document()
sections = doc2.sections
for section in sections:
    section.top_margin = Inches(0.2)
    section.bottom_margin = Inches(0.2)
    section.left_margin = Inches(0.2)
    section.right_margin = Inches(0.2)
section = doc2.sections[-1]
new_height = section.page_width
section.page_width = section.page_height
section.page_height = new_height
name = 'Final table '+doc_file
doc2.add_heading(name, 0)
doc_para = doc2.add_paragraph()
doc_para.add_run('SRT Inputs : Google, AWS, Azure').bold = True
table2 = doc2.add_table(rows=1,cols=3)
table2.style = 'TableGrid'
hdr_Cells = table2.rows[0].cells
hdr_Cells[0].paragraphs[0].add_run("Google").bold=True
hdr_Cells[1].paragraphs[0].add_run("AWS").bold=True
hdr_Cells[2].paragraphs[0].add_run("Azure").bold=True
# hdr_Cells[3].paragraphs[0].add_run("Azure").bold=True

def add_dial_comparison_doc2(doc2, table2, trans):
    row_Cells = table2.add_row().cells
    if trans["0"]==".":
        row_Cells[0].text= "No SRT from Google"
    else:
        row_Cells[0].text= trans["0"]
    
    if trans["1"]==".":
        row_Cells[1].text= "No SRT from AWS"
    else:
        row_Cells[1].text= trans["1"]
    if trans["2"]==".":
        row_Cells[2].text= "No SRT from Azure"
    else:
        row_Cells[2].text= trans["2"]
    doc2.save("final_comparision.docx")


def compare_outputs(sentence, t0, trans, sources_name, target_lang):
    k = []
    s = []
    methods_name = {'0': 'MNF', '1': 'Gleu',
                    '2': 'Meteor', '3': 'Rougen', '4': 'Rougel'}
    google_output = t0
    #print("google", google_output)
    output1, source1 = manual_diff_score(trans, sources_name)
    #print("MNF", output1)
    output2, source2 = gleu_diff_score(trans, sources_name)
    #print("gleu", output2)
    output3, source3 = meteor_diff_score(trans, sources_name)
    #print("meteor", output3)
    output4, source4, output5, source5 = rouge_diff_score(trans, sources_name)
    #print("rougen", output4)
    #print("rougel", output5)

    if google_output == output1 == output2 == output3 == output4 == output5:
        #print("all output are same as google")
        return google_output
    else:
        if google_output != output1:
            k.append(output1)
            s.append(source1)
        else:
            k.append(" ")
            s.append(" ")
        if google_output != output2:
            k.append(output2)
            s.append(source2)
        else:
            k.append(" ")
            s.append(" ")
        if google_output != output3:
            k.append(output3)
            s.append(source3)
        else:
            k.append(" ")
            s.append(" ")
        if google_output != output4:
            k.append(output4)
            s.append(source4)
        else:
            k.append(" ")
            s.append(" ")
        if google_output != output5:
            k.append(output5)
            s.append(source5)
        else:
            k.append(" ")
            s.append(" ")

        k.insert(0, sentence)
        k.insert(1, google_output)
        s1ANDm1, s2ANDm2, s3ANDm3 = selection_source(s, sources_name, trans, methods_name)
        # print("s1", s1ANDm1)
        # print("s2", s2ANDm2)
        # print("s3", s3ANDm3)
        # print(s1ANDm1[0])
        # print(sources_name)

        #add_dial_comparison_doc1a(doc1a, table1a , k, s, s1ANDm1[0])
        #add_dial_comparison_doc1b(doc1b, table1b , k, s, s1ANDm1[0])
        # add_dial_comparison_doc2(doc2, table2, sentence, s1ANDm1, s2ANDm2, s3ANDm3, sources_name, trans)

        for a, b in sources_name.items():
            if b == s1ANDm1[0]:
                k = a
        output1 = trans[str(k)]

        if s2ANDm2[0] != "":
            for c, d in sources_name.items():
                if d == s2ANDm2[0]:
                    l = c
            output2 = trans[str(l)]
        else:
            output2 = output1

        if s3ANDm3[0] != "":
            for e, f in sources_name.items():
                if f == s3ANDm3[0]:
                    m = e
            output3 = trans[str(m)]
        else:
            output3 = output1

        # print("output1", output1)
        # print("output2", output2)
        # print("output3", output3)

        output = final_out(output1, output2, output3, target_lang)

        # print("output", output)

        return output


####
basepath = "/home/user/mnf/project/MNF/conversion/subtitling"
sub_path = sys.argv[1]
current = basepath + "/" + sub_path
google_srt = current + "/google_subtitle.srt"
aws_srt = current + "/aws_subtitle.srt"
azure_srt = current + "/azure_subtitle.srt"


# trans={"0":[],"1":[],"2":[]}
trans = {}

def main1():
    # read file line by line
    # file = open( "google_subtitle.srt", "r",encoding="utf8")

    try:
        file = open(google_srt, "r", encoding="utf8")
        lines = file.readlines()
        file.close()

        text = ''
        for line in lines:
            if re.search('^[0-9]+$', line) is None and re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and re.search('^$', line) is None:
                text += ' ' + line.rstrip('\n')
            text = text.lstrip()
        trans["0"] = text
    except:
        trans["0"] = '.'
    try:
        file = open(aws_srt, "r", encoding="utf8")
        lines = file.readlines()
        file.close()

        text = ''
        for line in lines:
            if re.search('^[0-9]+$', line) is None and re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and re.search('^$', line) is None:
                text += ' ' + line.rstrip('\n')
            text = text.lstrip()
        trans["1"] = text
    except:
        trans["1"] = '.'
    try:
        file = open(azure_srt, "r", encoding="utf8")
        lines = file.readlines()
        file.close()

        text = ''
        for line in lines:
            if re.search('^[0-9]+$', line) is None and re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and re.search('^$', line) is None:
                text += ' ' + line.rstrip('\n')
            text = text.lstrip()
        trans["2"] = text
    except:
        trans["2"] = '.'
    return trans

main1()


sentence = ""
t0 = " "
sources_name = {'0': "google", "1": 'aws', "2": 'azure'}
target_lang = "en"


# sources_name = {'0': "google", "1": 'aws', "2": 'azure'}
# trans={"0":"Asad","1":"Lokesh","2":"."}
# print(trans)
# print(sources_name)
# desired_value = "."
# for key, value in trans.items():
#   if value == desired_value:
#     del trans[key]
#     del sources_name[key]
#     break
# print(trans)
# print(sources_name)

add_dial_comparison_doc2(doc2, table2, trans)
selected_para = compare_outputs(sentence, t0, trans, sources_name, target_lang)
# doc2.save("final_comparision.docx")
print(trans)
print(selected_para)
key_list = list(trans.keys())
val_list = list(trans.values())
position = val_list.index(selected_para)
print("Position", position)

def final_srt(position):
    if position == 0:
        if trans["0"] != '.':
            return google_srt
        else:
            position += 1
    if position == 1:
        if trans["1"] != '.':
            return aws_srt
        else:
            position += 1
    if position == 2:
        if trans["2"] != '.':
            return azure_srt
    return "No srt found"

# # print(final_srt(position))
print(final_srt(position))
first commit 2024-04-27 09:33:09 +00:00			`from translation_metric import diff_score`
			`import docx`
			`from docx.shared import Inches, Cm, Pt`
			`import os`
			`import sys`
			`import re`
			`from script_detector import script_cat`
			`from translation_metric import manual_diff_score, bleu_diff_score, gleu_diff_score, meteor_diff_score, rouge_diff_score, diff_score, critera4_5`
			`from selection_source1 import selection_source, function5, function41, function311, function221, function2111, function11111, selection_source_transliteration, two_sources_two_outputs`
			`from script_writing import default_script`


			`def script_det(text):`
			`punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~“"”'''`
			`no_punct = ""`
			`for char in text:`
			`if char not in punctuations:`
			`no_punct = char`
			`break`
			`#print("alphabet", no_punct)`
			`script = script_cat(no_punct)[0]`
			`#print("script", script)`
			`return script`


			`def punct_remover(string):`
			`# punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।“”'''`
			`punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।1234567890'''`
			`for x in string.lower():`
			`if x in punctuations:`
			`string = string.replace(x, " ")`
			`return string`


			`def word_transliterate(sentence, dest_script):`
			`return sentence`


			`def final_out(output1, output2, output3, dest_lang):`
			`temp_output1 = punct_remover(output1)`
			`temp_output2 = punct_remover(output2)`
			`temp_output3 = punct_remover(output3)`
			`# for word in regexp_tokenize(output1, "[\w']+")`

			`for word in temp_output1.split():`
			`if script_det(word) != default_script[dest_lang]:`
			`for word in temp_output2.split():`
			`if script_det(word) != default_script[dest_lang]:`
			`for word in temp_output3.split():`
			`if script_det(word) != default_script[dest_lang]:`
			`# print("in3")`
			`output1 = word_transliterate(`
			`output1, default_script[dest_lang])`
			`return output1`
			`return output3`
			`return output2`
			`return output1`

			`basepath = "/home/user/mnf/project/MNF/conversion/subtitling"`
			`doc_file= basepath`
			`doc2 = docx.Document()`
			`sections = doc2.sections`
			`for section in sections:`
			`section.top_margin = Inches(0.2)`
			`section.bottom_margin = Inches(0.2)`
			`section.left_margin = Inches(0.2)`
			`section.right_margin = Inches(0.2)`
			`section = doc2.sections[-1]`
			`new_height = section.page_width`
			`section.page_width = section.page_height`
			`section.page_height = new_height`
			`name = 'Final table '+doc_file`
			`doc2.add_heading(name, 0)`
			`doc_para = doc2.add_paragraph()`
			`doc_para.add_run('SRT Inputs : Google, AWS, Azure').bold = True`
			`table2 = doc2.add_table(rows=1,cols=3)`
			`table2.style = 'TableGrid'`
			`hdr_Cells = table2.rows[0].cells`
			`hdr_Cells[0].paragraphs[0].add_run("Google").bold=True`
			`hdr_Cells[1].paragraphs[0].add_run("AWS").bold=True`
			`hdr_Cells[2].paragraphs[0].add_run("Azure").bold=True`
			`# hdr_Cells[3].paragraphs[0].add_run("Azure").bold=True`

			`def add_dial_comparison_doc2(doc2, table2, trans):`
			`row_Cells = table2.add_row().cells`
			`if trans["0"]==".":`
			`row_Cells[0].text= "No SRT from Google"`
			`else:`
			`row_Cells[0].text= trans["0"]`

			`if trans["1"]==".":`
			`row_Cells[1].text= "No SRT from AWS"`
			`else:`
			`row_Cells[1].text= trans["1"]`
			`if trans["2"]==".":`
			`row_Cells[2].text= "No SRT from Azure"`
			`else:`
			`row_Cells[2].text= trans["2"]`
			`doc2.save("final_comparision.docx")`



			`def compare_outputs(sentence, t0, trans, sources_name, target_lang):`
			`k = []`
			`s = []`
			`methods_name = {'0': 'MNF', '1': 'Gleu',`
			`'2': 'Meteor', '3': 'Rougen', '4': 'Rougel'}`
			`google_output = t0`
			`#print("google", google_output)`
			`output1, source1 = manual_diff_score(trans, sources_name)`
			`#print("MNF", output1)`
			`output2, source2 = gleu_diff_score(trans, sources_name)`
			`#print("gleu", output2)`
			`output3, source3 = meteor_diff_score(trans, sources_name)`
			`#print("meteor", output3)`
			`output4, source4, output5, source5 = rouge_diff_score(trans, sources_name)`
			`#print("rougen", output4)`
			`#print("rougel", output5)`

			`if google_output == output1 == output2 == output3 == output4 == output5:`
			`#print("all output are same as google")`
			`return google_output`
			`else:`
			`if google_output != output1:`
			`k.append(output1)`
			`s.append(source1)`
			`else:`
			`k.append(" ")`
			`s.append(" ")`
			`if google_output != output2:`
			`k.append(output2)`
			`s.append(source2)`
			`else:`
			`k.append(" ")`
			`s.append(" ")`
			`if google_output != output3:`
			`k.append(output3)`
			`s.append(source3)`
			`else:`
			`k.append(" ")`
			`s.append(" ")`
			`if google_output != output4:`
			`k.append(output4)`
			`s.append(source4)`
			`else:`
			`k.append(" ")`
			`s.append(" ")`
			`if google_output != output5:`
			`k.append(output5)`
			`s.append(source5)`
			`else:`
			`k.append(" ")`
			`s.append(" ")`

			`k.insert(0, sentence)`
			`k.insert(1, google_output)`
			`s1ANDm1, s2ANDm2, s3ANDm3 = selection_source(s, sources_name, trans, methods_name)`
			`# print("s1", s1ANDm1)`
			`# print("s2", s2ANDm2)`
			`# print("s3", s3ANDm3)`
			`# print(s1ANDm1[0])`
			`# print(sources_name)`

			`#add_dial_comparison_doc1a(doc1a, table1a , k, s, s1ANDm1[0])`
			`#add_dial_comparison_doc1b(doc1b, table1b , k, s, s1ANDm1[0])`
			`# add_dial_comparison_doc2(doc2, table2, sentence, s1ANDm1, s2ANDm2, s3ANDm3, sources_name, trans)`

			`for a, b in sources_name.items():`
			`if b == s1ANDm1[0]:`
			`k = a`
			`output1 = trans[str(k)]`

			`if s2ANDm2[0] != "":`
			`for c, d in sources_name.items():`
			`if d == s2ANDm2[0]:`
			`l = c`
			`output2 = trans[str(l)]`
			`else:`
			`output2 = output1`

			`if s3ANDm3[0] != "":`
			`for e, f in sources_name.items():`
			`if f == s3ANDm3[0]:`
			`m = e`
			`output3 = trans[str(m)]`
			`else:`
			`output3 = output1`

			`# print("output1", output1)`
			`# print("output2", output2)`
			`# print("output3", output3)`

			`output = final_out(output1, output2, output3, target_lang)`

			`# print("output", output)`

			`return output`


			`####`
			`basepath = "/home/user/mnf/project/MNF/conversion/subtitling"`
			`sub_path = sys.argv[1]`
			`current = basepath + "/" + sub_path`
			`google_srt = current + "/google_subtitle.srt"`
			`aws_srt = current + "/aws_subtitle.srt"`
			`azure_srt = current + "/azure_subtitle.srt"`




			`# trans={"0":[],"1":[],"2":[]}`
			`trans = {}`

			`def main1():`
			`# read file line by line`
			`# file = open( "google_subtitle.srt", "r",encoding="utf8")`

			`try:`
			`file = open(google_srt, "r", encoding="utf8")`
			`lines = file.readlines()`
			`file.close()`

			`text = ''`
			`for line in lines:`
			`if re.search('^[0-9]+$', line) is None and re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and re.search('^$', line) is None:`
			`text += ' ' + line.rstrip('\n')`
			`text = text.lstrip()`
			`trans["0"] = text`
			`except:`
			`trans["0"] = '.'`
			`try:`
			`file = open(aws_srt, "r", encoding="utf8")`
			`lines = file.readlines()`
			`file.close()`

			`text = ''`
			`for line in lines:`
			`if re.search('^[0-9]+$', line) is None and re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and re.search('^$', line) is None:`
			`text += ' ' + line.rstrip('\n')`
			`text = text.lstrip()`
			`trans["1"] = text`
			`except:`
			`trans["1"] = '.'`
			`try:`
			`file = open(azure_srt, "r", encoding="utf8")`
			`lines = file.readlines()`
			`file.close()`

			`text = ''`
			`for line in lines:`
			`if re.search('^[0-9]+$', line) is None and re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and re.search('^$', line) is None:`
			`text += ' ' + line.rstrip('\n')`
			`text = text.lstrip()`
			`trans["2"] = text`
			`except:`
			`trans["2"] = '.'`
			`return trans`

			`main1()`




			`sentence = ""`
			`t0 = " "`
			`sources_name = {'0': "google", "1": 'aws', "2": 'azure'}`
			`target_lang = "en"`


			`# sources_name = {'0': "google", "1": 'aws', "2": 'azure'}`
			`# trans={"0":"Asad","1":"Lokesh","2":"."}`
			`# print(trans)`
			`# print(sources_name)`
			`# desired_value = "."`
			`# for key, value in trans.items():`
			`# if value == desired_value:`
			`# del trans[key]`
			`# del sources_name[key]`
			`# break`
			`# print(trans)`
			`# print(sources_name)`

			`add_dial_comparison_doc2(doc2, table2, trans)`
			`selected_para = compare_outputs(sentence, t0, trans, sources_name, target_lang)`
			`# doc2.save("final_comparision.docx")`
			`print(trans)`
			`print(selected_para)`
			`key_list = list(trans.keys())`
			`val_list = list(trans.values())`
			`position = val_list.index(selected_para)`
			`print("Position", position)`

			`def final_srt(position):`
			`if position == 0:`
			`if trans["0"] != '.':`
			`return google_srt`
			`else:`
			`position += 1`
			`if position == 1:`
			`if trans["1"] != '.':`
			`return aws_srt`
			`else:`
			`position += 1`
			`if position == 2:`
			`if trans["2"] != '.':`
			`return azure_srt`
			`return "No srt found"`

			`# # print(final_srt(position))`
			`print(final_srt(position))`