from google.cloud import translate_v2 as Translate from google.cloud import translate import docx import sys from .translation_resources import ibm_watson, google, aws, azure, lingvanex, yandex from .script_detector import script_cat from .script_writing import default_script from .translation_metric import manual_diff_score, bleu_diff_score, gleu_diff_score, meteor_diff_score, rouge_diff_score, diff_score, critera4_5 from .selection_source import selection_source, function5, function41, function311, function221, function2111, function11111, selection_source_transliteration, two_sources_two_outputs from tqdm import tqdm import os from docx.shared import Inches, Cm, Pt from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.enum.table import WD_TABLE_ALIGNMENT, WD_ALIGN_VERTICAL import requests import uuid import json import string # google os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/home/mnfidea/project/MNF/conversion/subtitling/gifted-mountain-318504-0a5f94cda0c8.json" translate_client = Translate.Client() client = translate.TranslationServiceClient() project_id = "excellent-hue-272808" location = "global" parent = f"projects/{project_id}/locations/{location}" doc_file = "translated_abc" doc2 = docx.Document() sections = doc2.sections for section in sections: section.top_margin = Inches(0.2) section.bottom_margin = Inches(0.2) section.left_margin = Inches(0.2) section.right_margin = Inches(0.2) section = doc2.sections[-1] new_height = section.page_width section.page_width = section.page_height section.page_height = new_height name = 'Final table '+doc_file doc2.add_heading(name, 0) doc_para = doc2.add_paragraph() doc_para.add_run( 'Translation resources used : Google, IBM watson, AWS, Azure, Lingvanex, Yandex').bold = True table2 = doc2.add_table(rows=1, cols=4) table2.style = 'TableGrid' hdr_Cells = table2.rows[0].cells hdr_Cells[0].paragraphs[0].add_run("Input").bold = True hdr_Cells[1].paragraphs[0].add_run("Output1").bold = True hdr_Cells[2].paragraphs[0].add_run("Output2").bold = True hdr_Cells[3].paragraphs[0].add_run("Output3").bold = True # doc_file = "translated_abc" # doc2 = docx.Document() # sections = doc2.sections # for section in sections: # section.top_margin = Inches(0.2) # section.bottom_margin = Inches(0.2) # section.left_margin = Inches(0.2) # section.right_margin = Inches(0.2) # section = doc2.sections[-1] # new_height = section.page_width # section.page_width = section.page_height # section.page_height = new_height # name = filename # doc2.add_heading(name, 0) # doc_para = doc2.add_paragraph() # #doc_para.add_run('Translation resources used : Google, IBM watson, AWS, Azure, Lingvanex, Yandex').bold = True # table2 = doc2.add_table(rows=1,cols=4) # table2.style = 'TableGrid' # hdr_Cells = table2.rows[0].cells # hdr_Cells[0].paragraphs[0].add_run("Original").bold=True # hdr_Cells[1].paragraphs[0].add_run("Translated").bold=True def script_det(text): punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~“"”''' no_punct = "" for char in text: if char not in punctuations: no_punct = char break #print("alphabet", no_punct) script = script_cat(no_punct)[0] #print("script", script) return script def language_detector(text): result = translate_client.translate(text, target_language='hi') det_lang = result["detectedSourceLanguage"] return det_lang def punct_remover(string): # punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।“”''' punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।1234567890''' for x in string.lower(): if x in punctuations: string = string.replace(x, " ") return string def word_transliterate(sentence, dest_script): return sentence def final_out(output1, output2, output3, dest_lang): temp_output1 = punct_remover(output1) temp_output2 = punct_remover(output2) temp_output3 = punct_remover(output3) # for word in regexp_tokenize(output1, "[\w']+") for word in temp_output1.split(): if script_det(word) != default_script[dest_lang]: for word in temp_output2.split(): if script_det(word) != default_script[dest_lang]: for word in temp_output3.split(): if script_det(word) != default_script[dest_lang]: # print("in3") output1 = word_transliterate( output1, default_script[dest_lang]) return output1 return output3 return output2 return output1 def compare_outputs(sentence, t0, trans, sources_name, target_lang): k = [] s = [] methods_name = {'0': 'MNF', '1': 'Gleu', '2': 'Meteor', '3': 'Rougen', '4': 'Rougel'} google_output = t0 #print("google", google_output) output1, source1 = manual_diff_score(trans, sources_name) #print("MNF", output1) output2, source2 = gleu_diff_score(trans, sources_name) #print("gleu", output2) output3, source3 = meteor_diff_score(trans, sources_name) #print("meteor", output3) output4, source4, output5, source5 = rouge_diff_score(trans, sources_name) #print("rougen", output4) #print("rougel", output5) if google_output == output1 == output2 == output3 == output4 == output5: print("All outputs are same as google") return google_output else: if google_output != output1: k.append(output1) s.append(source1) else: k.append(" ") s.append(" ") if google_output != output2: k.append(output2) s.append(source2) else: k.append(" ") s.append(" ") if google_output != output3: k.append(output3) s.append(source3) else: k.append(" ") s.append(" ") if google_output != output4: k.append(output4) s.append(source4) else: k.append(" ") s.append(" ") if google_output != output5: k.append(output5) s.append(source5) else: k.append(" ") s.append(" ") k.insert(0, sentence) k.insert(1, google_output) s1ANDm1, s2ANDm2, s3ANDm3 = selection_source( s, sources_name, trans, methods_name) # print("s1", s1ANDm1) # print("s2", s2ANDm2) # print("s3", s3ANDm3) # print(s1ANDm1[0]) # print(sources_name) #add_dial_comparison_doc1a(doc1a, table1a , k, s, s1ANDm1[0]) #add_dial_comparison_doc1b(doc1b, table1b , k, s, s1ANDm1[0]) #add_dial_comparison_doc2(doc2, table2, sentence, s1ANDm1, s2ANDm2, s3ANDm3, sources_name, trans) #add_dial_comparison_doc22(doc2, table2, sentence, s1ANDm1, sources_name, trans) for a, b in sources_name.items(): if b == s1ANDm1[0]: k = a output1 = trans[str(k)] if s2ANDm2[0] != "": for c, d in sources_name.items(): if d == s2ANDm2[0]: l = c output2 = trans[str(l)] else: output2 = output1 if s3ANDm3[0] != "": for e, f in sources_name.items(): if f == s3ANDm3[0]: m = e output3 = trans[str(m)] else: output3 = output1 # print("output1", output1) # print("output2", output2) # print("output3", output3) output = final_out(output1, output2, output3, target_lang) # print("output", output) return output # to return the table with best 3 outputs def add_dial_comparison_doc2(doc2, table2, sentence, s1ANDm1, s2ANDm2, s3ANDm3, sources_name, trans): row_Cells = table2.add_row().cells for a, b in sources_name.items(): if b == s1ANDm1[0]: k = a output1 = trans[str(k)] row_Cells[0].text = sentence row_Cells[1].text = output1 row_Cells[1].paragraphs[0].add_run('(Source : '+str(s1ANDm1[0])+')') row_Cells[1].paragraphs[0].add_run('(Methods : '+str(s1ANDm1[1])+')') if s2ANDm2[0] == "": row_Cells[2].text = "" else: for a, b in sources_name.items(): if b == s2ANDm2[0]: k = a output2 = trans[str(k)] row_Cells[2].text = output2 row_Cells[2].paragraphs[0].add_run('(Source : '+str(s2ANDm2[0])+')') row_Cells[2].paragraphs[0].add_run('(Methods : '+str(s2ANDm2[1])+')') if s3ANDm3[0] == "": row_Cells[3].text = "" else: for a, b in sources_name.items(): if b == s3ANDm3[0]: k = a output3 = trans[str(k)] row_Cells[3].text = output3 row_Cells[3].paragraphs[0].add_run('(Source : '+str(s3ANDm3[0])+')') row_Cells[3].paragraphs[0].add_run('(Methods : '+str(s3ANDm3[1])+')') def add_dial_comparison_doc22(doc2, table2, sentence, s1ANDm1, sources_name, trans): row_Cells = table2.add_row().cells for a, b in sources_name.items(): if b == s1ANDm1[0]: k = a output1 = trans[str(k)] row_Cells[0].text = sentence row_Cells[1].text = output1 class myDict(dict): def __init__(self): self = dict() def add(self, key, value): self[key] = value def all_translator(sentence, source_lang, target_lang): if sentence in list(string.punctuation): return sentence i = 0 trans = myDict() sources_name = myDict() try: globals()['t%s' % i] = google(sentence, source_lang, target_lang) #print(globals()['t%s' % i]) trans.add(str(i), globals()['t%s' % i]) sources_name.add(str(i), "GOOGLE") i = i+1 except: pass try: globals()['t%s' % i] = ibm_watson(sentence, source_lang, target_lang) trans.add(str(i), globals()['t%s' % i]) sources_name.add(str(i), "IBM_WATSON") i = i+1 except: pass try: globals()['t%s' % i] = aws(sentence, source_lang, target_lang) trans.add(str(i), globals()['t%s' % i]) sources_name.add(str(i), "AWS") i = i+1 except: pass try: globals()['t%s' % i] = azure(sentence, target_lang) trans.add(str(i), globals()['t%s' % i]) sources_name.add(str(i), "AZURE") i = i+1 except: pass try: globals()['t%s' % i] = lingvanex(sentence, source_lang, target_lang) trans.add(str(i), globals()['t%s' % i]) sources_name.add(str(i), "LINGVANEX") i = i+1 except: pass try: globals()['t%s' % i] = yandex(sentence, source_lang, target_lang) trans.add(str(i), globals()['t%s' % i]) sources_name.add(str(i), "YANDEX") i = i+1 except: pass trans_text = compare_outputs( sentence, trans["0"], trans, sources_name, target_lang) # doc2.save("testing.docx") return trans_text def punct_remover_w_o_digits(string): punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।''' for x in string.lower(): if x in punctuations: string = string.replace(x, "") return string # Sentence = "I am Lokesh." # source_lang = "en" # target_lang = "hi" # print(all_translator(Sentence, source_lang, target_lang)) # doc2.save("testing.docx")