from translation_metric import diff_score import docx from docx.shared import Inches, Cm, Pt import os import sys import re from script_detector import script_cat from translation_metric import manual_diff_score, bleu_diff_score, gleu_diff_score, meteor_diff_score, rouge_diff_score, diff_score, critera4_5 from selection_source1 import selection_source, function5, function41, function311, function221, function2111, function11111, selection_source_transliteration, two_sources_two_outputs from script_writing import default_script def script_det(text): punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~“"”''' no_punct = "" for char in text: if char not in punctuations: no_punct = char break #print("alphabet", no_punct) script = script_cat(no_punct)[0] #print("script", script) return script def punct_remover(string): # punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।“”''' punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।1234567890''' for x in string.lower(): if x in punctuations: string = string.replace(x, " ") return string def word_transliterate(sentence, dest_script): return sentence def final_out(output1, output2, output3, dest_lang): temp_output1 = punct_remover(output1) temp_output2 = punct_remover(output2) temp_output3 = punct_remover(output3) # for word in regexp_tokenize(output1, "[\w']+") for word in temp_output1.split(): if script_det(word) != default_script[dest_lang]: for word in temp_output2.split(): if script_det(word) != default_script[dest_lang]: for word in temp_output3.split(): if script_det(word) != default_script[dest_lang]: # print("in3") output1 = word_transliterate( output1, default_script[dest_lang]) return output1 return output3 return output2 return output1 basepath = "/home/user/mnf/project/MNF/conversion/subtitling" doc_file= basepath doc2 = docx.Document() sections = doc2.sections for section in sections: section.top_margin = Inches(0.2) section.bottom_margin = Inches(0.2) section.left_margin = Inches(0.2) section.right_margin = Inches(0.2) section = doc2.sections[-1] new_height = section.page_width section.page_width = section.page_height section.page_height = new_height name = 'Final table '+doc_file doc2.add_heading(name, 0) doc_para = doc2.add_paragraph() doc_para.add_run('SRT Inputs : Google, AWS, Azure').bold = True table2 = doc2.add_table(rows=1,cols=3) table2.style = 'TableGrid' hdr_Cells = table2.rows[0].cells hdr_Cells[0].paragraphs[0].add_run("Google").bold=True hdr_Cells[1].paragraphs[0].add_run("AWS").bold=True hdr_Cells[2].paragraphs[0].add_run("Azure").bold=True # hdr_Cells[3].paragraphs[0].add_run("Azure").bold=True def add_dial_comparison_doc2(doc2, table2, trans): row_Cells = table2.add_row().cells if trans["0"]==".": row_Cells[0].text= "No SRT from Google" else: row_Cells[0].text= trans["0"] if trans["1"]==".": row_Cells[1].text= "No SRT from AWS" else: row_Cells[1].text= trans["1"] if trans["2"]==".": row_Cells[2].text= "No SRT from Azure" else: row_Cells[2].text= trans["2"] doc2.save("final_comparision.docx") def compare_outputs(sentence, t0, trans, sources_name, target_lang): k = [] s = [] methods_name = {'0': 'MNF', '1': 'Gleu', '2': 'Meteor', '3': 'Rougen', '4': 'Rougel'} google_output = t0 #print("google", google_output) output1, source1 = manual_diff_score(trans, sources_name) #print("MNF", output1) output2, source2 = gleu_diff_score(trans, sources_name) #print("gleu", output2) output3, source3 = meteor_diff_score(trans, sources_name) #print("meteor", output3) output4, source4, output5, source5 = rouge_diff_score(trans, sources_name) #print("rougen", output4) #print("rougel", output5) if google_output == output1 == output2 == output3 == output4 == output5: #print("all output are same as google") return google_output else: if google_output != output1: k.append(output1) s.append(source1) else: k.append(" ") s.append(" ") if google_output != output2: k.append(output2) s.append(source2) else: k.append(" ") s.append(" ") if google_output != output3: k.append(output3) s.append(source3) else: k.append(" ") s.append(" ") if google_output != output4: k.append(output4) s.append(source4) else: k.append(" ") s.append(" ") if google_output != output5: k.append(output5) s.append(source5) else: k.append(" ") s.append(" ") k.insert(0, sentence) k.insert(1, google_output) s1ANDm1, s2ANDm2, s3ANDm3 = selection_source(s, sources_name, trans, methods_name) # print("s1", s1ANDm1) # print("s2", s2ANDm2) # print("s3", s3ANDm3) # print(s1ANDm1[0]) # print(sources_name) #add_dial_comparison_doc1a(doc1a, table1a , k, s, s1ANDm1[0]) #add_dial_comparison_doc1b(doc1b, table1b , k, s, s1ANDm1[0]) # add_dial_comparison_doc2(doc2, table2, sentence, s1ANDm1, s2ANDm2, s3ANDm3, sources_name, trans) for a, b in sources_name.items(): if b == s1ANDm1[0]: k = a output1 = trans[str(k)] if s2ANDm2[0] != "": for c, d in sources_name.items(): if d == s2ANDm2[0]: l = c output2 = trans[str(l)] else: output2 = output1 if s3ANDm3[0] != "": for e, f in sources_name.items(): if f == s3ANDm3[0]: m = e output3 = trans[str(m)] else: output3 = output1 # print("output1", output1) # print("output2", output2) # print("output3", output3) output = final_out(output1, output2, output3, target_lang) # print("output", output) return output #### basepath = "/home/user/mnf/project/MNF/conversion/subtitling" sub_path = sys.argv[1] current = basepath + "/" + sub_path google_srt = current + "/google_subtitle.srt" aws_srt = current + "/aws_subtitle.srt" azure_srt = current + "/azure_subtitle.srt" # trans={"0":[],"1":[],"2":[]} trans = {} def main1(): # read file line by line # file = open( "google_subtitle.srt", "r",encoding="utf8") try: file = open(google_srt, "r", encoding="utf8") lines = file.readlines() file.close() text = '' for line in lines: if re.search('^[0-9]+$', line) is None and re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and re.search('^$', line) is None: text += ' ' + line.rstrip('\n') text = text.lstrip() trans["0"] = text except: trans["0"] = '.' try: file = open(aws_srt, "r", encoding="utf8") lines = file.readlines() file.close() text = '' for line in lines: if re.search('^[0-9]+$', line) is None and re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and re.search('^$', line) is None: text += ' ' + line.rstrip('\n') text = text.lstrip() trans["1"] = text except: trans["1"] = '.' try: file = open(azure_srt, "r", encoding="utf8") lines = file.readlines() file.close() text = '' for line in lines: if re.search('^[0-9]+$', line) is None and re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and re.search('^$', line) is None: text += ' ' + line.rstrip('\n') text = text.lstrip() trans["2"] = text except: trans["2"] = '.' return trans main1() sentence = "" t0 = " " sources_name = {'0': "google", "1": 'aws', "2": 'azure'} target_lang = "en" # sources_name = {'0': "google", "1": 'aws', "2": 'azure'} # trans={"0":"Asad","1":"Lokesh","2":"."} # print(trans) # print(sources_name) # desired_value = "." # for key, value in trans.items(): # if value == desired_value: # del trans[key] # del sources_name[key] # break # print(trans) # print(sources_name) add_dial_comparison_doc2(doc2, table2, trans) selected_para = compare_outputs(sentence, t0, trans, sources_name, target_lang) # doc2.save("final_comparision.docx") print(trans) print(selected_para) key_list = list(trans.keys()) val_list = list(trans.values()) position = val_list.index(selected_para) print("Position", position) def final_srt(position): if position == 0: if trans["0"] != '.': return google_srt else: position += 1 if position == 1: if trans["1"] != '.': return aws_srt else: position += 1 if position == 2: if trans["2"] != '.': return azure_srt return "No srt found" # # print(final_srt(position)) print(final_srt(position))