Conversion_Kitchen_Code/kitchen_counter/conversion/subtitling/asad_test.py

313 lines
9.6 KiB
Python
Raw Permalink Normal View History

2024-04-27 09:33:09 +00:00
from translation_metric import diff_score
import docx
from docx.shared import Inches, Cm, Pt
import os
import sys
import re
from script_detector import script_cat
from translation_metric import manual_diff_score, bleu_diff_score, gleu_diff_score, meteor_diff_score, rouge_diff_score, diff_score, critera4_5
from selection_source1 import selection_source, function5, function41, function311, function221, function2111, function11111, selection_source_transliteration, two_sources_two_outputs
from script_writing import default_script
def script_det(text):
punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~“"'''
no_punct = ""
for char in text:
if char not in punctuations:
no_punct = char
break
#print("alphabet", no_punct)
script = script_cat(no_punct)[0]
#print("script", script)
return script
def punct_remover(string):
# punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।“”'''
punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।1234567890'''
for x in string.lower():
if x in punctuations:
string = string.replace(x, " ")
return string
def word_transliterate(sentence, dest_script):
return sentence
def final_out(output1, output2, output3, dest_lang):
temp_output1 = punct_remover(output1)
temp_output2 = punct_remover(output2)
temp_output3 = punct_remover(output3)
# for word in regexp_tokenize(output1, "[\w']+")
for word in temp_output1.split():
if script_det(word) != default_script[dest_lang]:
for word in temp_output2.split():
if script_det(word) != default_script[dest_lang]:
for word in temp_output3.split():
if script_det(word) != default_script[dest_lang]:
# print("in3")
output1 = word_transliterate(
output1, default_script[dest_lang])
return output1
return output3
return output2
return output1
basepath = "/home/user/mnf/project/MNF/conversion/subtitling"
doc_file= basepath
doc2 = docx.Document()
sections = doc2.sections
for section in sections:
section.top_margin = Inches(0.2)
section.bottom_margin = Inches(0.2)
section.left_margin = Inches(0.2)
section.right_margin = Inches(0.2)
section = doc2.sections[-1]
new_height = section.page_width
section.page_width = section.page_height
section.page_height = new_height
name = 'Final table '+doc_file
doc2.add_heading(name, 0)
doc_para = doc2.add_paragraph()
doc_para.add_run('SRT Inputs : Google, AWS, Azure').bold = True
table2 = doc2.add_table(rows=1,cols=3)
table2.style = 'TableGrid'
hdr_Cells = table2.rows[0].cells
hdr_Cells[0].paragraphs[0].add_run("Google").bold=True
hdr_Cells[1].paragraphs[0].add_run("AWS").bold=True
hdr_Cells[2].paragraphs[0].add_run("Azure").bold=True
# hdr_Cells[3].paragraphs[0].add_run("Azure").bold=True
def add_dial_comparison_doc2(doc2, table2, trans):
row_Cells = table2.add_row().cells
if trans["0"]==".":
row_Cells[0].text= "No SRT from Google"
else:
row_Cells[0].text= trans["0"]
if trans["1"]==".":
row_Cells[1].text= "No SRT from AWS"
else:
row_Cells[1].text= trans["1"]
if trans["2"]==".":
row_Cells[2].text= "No SRT from Azure"
else:
row_Cells[2].text= trans["2"]
doc2.save("final_comparision.docx")
def compare_outputs(sentence, t0, trans, sources_name, target_lang):
k = []
s = []
methods_name = {'0': 'MNF', '1': 'Gleu',
'2': 'Meteor', '3': 'Rougen', '4': 'Rougel'}
google_output = t0
#print("google", google_output)
output1, source1 = manual_diff_score(trans, sources_name)
#print("MNF", output1)
output2, source2 = gleu_diff_score(trans, sources_name)
#print("gleu", output2)
output3, source3 = meteor_diff_score(trans, sources_name)
#print("meteor", output3)
output4, source4, output5, source5 = rouge_diff_score(trans, sources_name)
#print("rougen", output4)
#print("rougel", output5)
if google_output == output1 == output2 == output3 == output4 == output5:
#print("all output are same as google")
return google_output
else:
if google_output != output1:
k.append(output1)
s.append(source1)
else:
k.append(" ")
s.append(" ")
if google_output != output2:
k.append(output2)
s.append(source2)
else:
k.append(" ")
s.append(" ")
if google_output != output3:
k.append(output3)
s.append(source3)
else:
k.append(" ")
s.append(" ")
if google_output != output4:
k.append(output4)
s.append(source4)
else:
k.append(" ")
s.append(" ")
if google_output != output5:
k.append(output5)
s.append(source5)
else:
k.append(" ")
s.append(" ")
k.insert(0, sentence)
k.insert(1, google_output)
s1ANDm1, s2ANDm2, s3ANDm3 = selection_source(s, sources_name, trans, methods_name)
# print("s1", s1ANDm1)
# print("s2", s2ANDm2)
# print("s3", s3ANDm3)
# print(s1ANDm1[0])
# print(sources_name)
#add_dial_comparison_doc1a(doc1a, table1a , k, s, s1ANDm1[0])
#add_dial_comparison_doc1b(doc1b, table1b , k, s, s1ANDm1[0])
# add_dial_comparison_doc2(doc2, table2, sentence, s1ANDm1, s2ANDm2, s3ANDm3, sources_name, trans)
for a, b in sources_name.items():
if b == s1ANDm1[0]:
k = a
output1 = trans[str(k)]
if s2ANDm2[0] != "":
for c, d in sources_name.items():
if d == s2ANDm2[0]:
l = c
output2 = trans[str(l)]
else:
output2 = output1
if s3ANDm3[0] != "":
for e, f in sources_name.items():
if f == s3ANDm3[0]:
m = e
output3 = trans[str(m)]
else:
output3 = output1
# print("output1", output1)
# print("output2", output2)
# print("output3", output3)
output = final_out(output1, output2, output3, target_lang)
# print("output", output)
return output
####
basepath = "/home/user/mnf/project/MNF/conversion/subtitling"
sub_path = sys.argv[1]
current = basepath + "/" + sub_path
google_srt = current + "/google_subtitle.srt"
aws_srt = current + "/aws_subtitle.srt"
azure_srt = current + "/azure_subtitle.srt"
# trans={"0":[],"1":[],"2":[]}
trans = {}
def main1():
# read file line by line
# file = open( "google_subtitle.srt", "r",encoding="utf8")
try:
file = open(google_srt, "r", encoding="utf8")
lines = file.readlines()
file.close()
text = ''
for line in lines:
if re.search('^[0-9]+$', line) is None and re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and re.search('^$', line) is None:
text += ' ' + line.rstrip('\n')
text = text.lstrip()
trans["0"] = text
except:
trans["0"] = '.'
try:
file = open(aws_srt, "r", encoding="utf8")
lines = file.readlines()
file.close()
text = ''
for line in lines:
if re.search('^[0-9]+$', line) is None and re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and re.search('^$', line) is None:
text += ' ' + line.rstrip('\n')
text = text.lstrip()
trans["1"] = text
except:
trans["1"] = '.'
try:
file = open(azure_srt, "r", encoding="utf8")
lines = file.readlines()
file.close()
text = ''
for line in lines:
if re.search('^[0-9]+$', line) is None and re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and re.search('^$', line) is None:
text += ' ' + line.rstrip('\n')
text = text.lstrip()
trans["2"] = text
except:
trans["2"] = '.'
return trans
main1()
sentence = ""
t0 = " "
sources_name = {'0': "google", "1": 'aws', "2": 'azure'}
target_lang = "en"
# sources_name = {'0': "google", "1": 'aws', "2": 'azure'}
# trans={"0":"Asad","1":"Lokesh","2":"."}
# print(trans)
# print(sources_name)
# desired_value = "."
# for key, value in trans.items():
# if value == desired_value:
# del trans[key]
# del sources_name[key]
# break
# print(trans)
# print(sources_name)
add_dial_comparison_doc2(doc2, table2, trans)
selected_para = compare_outputs(sentence, t0, trans, sources_name, target_lang)
# doc2.save("final_comparision.docx")
print(trans)
print(selected_para)
key_list = list(trans.keys())
val_list = list(trans.values())
position = val_list.index(selected_para)
print("Position", position)
def final_srt(position):
if position == 0:
if trans["0"] != '.':
return google_srt
else:
position += 1
if position == 1:
if trans["1"] != '.':
return aws_srt
else:
position += 1
if position == 2:
if trans["2"] != '.':
return azure_srt
return "No srt found"
# # print(final_srt(position))
print(final_srt(position))