359 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
			
		
		
	
	
			359 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
| from google.cloud import translate_v2 as Translate
 | |
| from google.cloud import translate
 | |
| import docx
 | |
| import sys
 | |
| from .translation_resources import ibm_watson, google, aws, azure, lingvanex, yandex
 | |
| from .script_detector import script_cat
 | |
| from .script_writing import default_script
 | |
| from .translation_metric import manual_diff_score, bleu_diff_score, gleu_diff_score, meteor_diff_score, rouge_diff_score, diff_score, critera4_5
 | |
| from .selection_source import selection_source, function5, function41, function311, function221, function2111, function11111, selection_source_transliteration, two_sources_two_outputs
 | |
| from tqdm import tqdm
 | |
| import os
 | |
| from docx.shared import Inches, Cm, Pt
 | |
| from docx.enum.text import WD_ALIGN_PARAGRAPH
 | |
| from docx.enum.table import WD_TABLE_ALIGNMENT, WD_ALIGN_VERTICAL
 | |
| import requests
 | |
| import uuid
 | |
| import json
 | |
| import string
 | |
| 
 | |
| # google
 | |
| os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/home/mnfidea/project/MNF/conversion/subtitling/gifted-mountain-318504-0a5f94cda0c8.json"
 | |
| translate_client = Translate.Client()
 | |
| client = translate.TranslationServiceClient()
 | |
| project_id = "excellent-hue-272808"
 | |
| location = "global"
 | |
| parent = f"projects/{project_id}/locations/{location}"
 | |
| 
 | |
| 
 | |
| doc_file = "translated_abc"
 | |
| doc2 = docx.Document()
 | |
| sections = doc2.sections
 | |
| for section in sections:
 | |
|     section.top_margin = Inches(0.2)
 | |
|     section.bottom_margin = Inches(0.2)
 | |
|     section.left_margin = Inches(0.2)
 | |
|     section.right_margin = Inches(0.2)
 | |
| section = doc2.sections[-1]
 | |
| new_height = section.page_width
 | |
| section.page_width = section.page_height
 | |
| section.page_height = new_height
 | |
| name = 'Final table '+doc_file
 | |
| doc2.add_heading(name, 0)
 | |
| doc_para = doc2.add_paragraph()
 | |
| doc_para.add_run(
 | |
|     'Translation resources used : Google, IBM watson, AWS, Azure, Lingvanex, Yandex').bold = True
 | |
| table2 = doc2.add_table(rows=1, cols=4)
 | |
| table2.style = 'TableGrid'
 | |
| hdr_Cells = table2.rows[0].cells
 | |
| hdr_Cells[0].paragraphs[0].add_run("Input").bold = True
 | |
| hdr_Cells[1].paragraphs[0].add_run("Output1").bold = True
 | |
| hdr_Cells[2].paragraphs[0].add_run("Output2").bold = True
 | |
| hdr_Cells[3].paragraphs[0].add_run("Output3").bold = True
 | |
| 
 | |
| # doc_file = "translated_abc"
 | |
| # doc2 = docx.Document()
 | |
| # sections = doc2.sections
 | |
| # for section in sections:
 | |
| #     section.top_margin = Inches(0.2)
 | |
| #     section.bottom_margin = Inches(0.2)
 | |
| #     section.left_margin = Inches(0.2)
 | |
| #     section.right_margin = Inches(0.2)
 | |
| # section = doc2.sections[-1]
 | |
| # new_height = section.page_width
 | |
| # section.page_width = section.page_height
 | |
| # section.page_height = new_height
 | |
| # name = filename
 | |
| # doc2.add_heading(name, 0)
 | |
| # doc_para = doc2.add_paragraph()
 | |
| # #doc_para.add_run('Translation resources used : Google, IBM watson, AWS, Azure, Lingvanex, Yandex').bold = True
 | |
| # table2 = doc2.add_table(rows=1,cols=4)
 | |
| # table2.style = 'TableGrid'
 | |
| # hdr_Cells = table2.rows[0].cells
 | |
| # hdr_Cells[0].paragraphs[0].add_run("Original").bold=True
 | |
| # hdr_Cells[1].paragraphs[0].add_run("Translated").bold=True
 | |
| 
 | |
| 
 | |
| def script_det(text):
 | |
|     punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~“"”'''
 | |
|     no_punct = ""
 | |
|     for char in text:
 | |
|         if char not in punctuations:
 | |
|             no_punct = char
 | |
|             break
 | |
|     #print("alphabet", no_punct)
 | |
|     script = script_cat(no_punct)[0]
 | |
|     #print("script", script)
 | |
|     return script
 | |
| 
 | |
| 
 | |
| def language_detector(text):
 | |
|     result = translate_client.translate(text, target_language='hi')
 | |
|     det_lang = result["detectedSourceLanguage"]
 | |
|     return det_lang
 | |
| 
 | |
| 
 | |
| def punct_remover(string):
 | |
|     # punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।“”'''
 | |
|     punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।1234567890'''
 | |
|     for x in string.lower():
 | |
|         if x in punctuations:
 | |
|             string = string.replace(x, " ")
 | |
|     return string
 | |
| 
 | |
| 
 | |
| def word_transliterate(sentence, dest_script):
 | |
|     return sentence
 | |
| 
 | |
| 
 | |
| def final_out(output1, output2, output3, dest_lang):
 | |
|     temp_output1 = punct_remover(output1)
 | |
|     temp_output2 = punct_remover(output2)
 | |
|     temp_output3 = punct_remover(output3)
 | |
|     # for word in regexp_tokenize(output1, "[\w']+")
 | |
| 
 | |
|     for word in temp_output1.split():
 | |
|         if script_det(word) != default_script[dest_lang]:
 | |
|             for word in temp_output2.split():
 | |
|                 if script_det(word) != default_script[dest_lang]:
 | |
|                     for word in temp_output3.split():
 | |
|                         if script_det(word) != default_script[dest_lang]:
 | |
|                             # print("in3")
 | |
|                             output1 = word_transliterate(
 | |
|                                 output1, default_script[dest_lang])
 | |
|                             return output1
 | |
|                     return output3
 | |
|             return output2
 | |
|     return output1
 | |
| 
 | |
| 
 | |
| def compare_outputs(sentence, t0, trans, sources_name, target_lang):
 | |
|     k = []
 | |
|     s = []
 | |
|     methods_name = {'0': 'MNF', '1': 'Gleu',
 | |
|                     '2': 'Meteor', '3': 'Rougen', '4': 'Rougel'}
 | |
|     google_output = t0
 | |
|     #print("google", google_output)
 | |
|     output1, source1 = manual_diff_score(trans, sources_name)
 | |
|     #print("MNF", output1)
 | |
|     output2, source2 = gleu_diff_score(trans, sources_name)
 | |
|     #print("gleu", output2)
 | |
|     output3, source3 = meteor_diff_score(trans, sources_name)
 | |
|     #print("meteor", output3)
 | |
|     output4, source4, output5, source5 = rouge_diff_score(trans, sources_name)
 | |
|     #print("rougen", output4)
 | |
|     #print("rougel", output5)
 | |
| 
 | |
|     if google_output == output1 == output2 == output3 == output4 == output5:
 | |
|         print("All outputs are same as google")
 | |
|         return google_output
 | |
|     else:
 | |
|         if google_output != output1:
 | |
|             k.append(output1)
 | |
|             s.append(source1)
 | |
|         else:
 | |
|             k.append(" ")
 | |
|             s.append(" ")
 | |
|         if google_output != output2:
 | |
|             k.append(output2)
 | |
|             s.append(source2)
 | |
|         else:
 | |
|             k.append(" ")
 | |
|             s.append(" ")
 | |
|         if google_output != output3:
 | |
|             k.append(output3)
 | |
|             s.append(source3)
 | |
|         else:
 | |
|             k.append(" ")
 | |
|             s.append(" ")
 | |
|         if google_output != output4:
 | |
|             k.append(output4)
 | |
|             s.append(source4)
 | |
|         else:
 | |
|             k.append(" ")
 | |
|             s.append(" ")
 | |
|         if google_output != output5:
 | |
|             k.append(output5)
 | |
|             s.append(source5)
 | |
|         else:
 | |
|             k.append(" ")
 | |
|             s.append(" ")
 | |
| 
 | |
|         k.insert(0, sentence)
 | |
|         k.insert(1, google_output)
 | |
|         s1ANDm1, s2ANDm2, s3ANDm3 = selection_source(
 | |
|             s, sources_name, trans, methods_name)
 | |
|         # print("s1", s1ANDm1)
 | |
|         # print("s2", s2ANDm2)
 | |
|         # print("s3", s3ANDm3)
 | |
|         # print(s1ANDm1[0])
 | |
|         # print(sources_name)
 | |
| 
 | |
|         #add_dial_comparison_doc1a(doc1a, table1a , k, s, s1ANDm1[0])
 | |
|         #add_dial_comparison_doc1b(doc1b, table1b , k, s, s1ANDm1[0])
 | |
|         #add_dial_comparison_doc2(doc2, table2, sentence, s1ANDm1, s2ANDm2, s3ANDm3, sources_name, trans)
 | |
|         #add_dial_comparison_doc22(doc2, table2, sentence, s1ANDm1, sources_name, trans)
 | |
| 
 | |
|         for a, b in sources_name.items():
 | |
|             if b == s1ANDm1[0]:
 | |
|                 k = a
 | |
|         output1 = trans[str(k)]
 | |
| 
 | |
|         if s2ANDm2[0] != "":
 | |
|             for c, d in sources_name.items():
 | |
|                 if d == s2ANDm2[0]:
 | |
|                     l = c
 | |
|             output2 = trans[str(l)]
 | |
|         else:
 | |
|             output2 = output1
 | |
| 
 | |
|         if s3ANDm3[0] != "":
 | |
|             for e, f in sources_name.items():
 | |
|                 if f == s3ANDm3[0]:
 | |
|                     m = e
 | |
|             output3 = trans[str(m)]
 | |
|         else:
 | |
|             output3 = output1
 | |
| 
 | |
|         # print("output1", output1)
 | |
|         # print("output2", output2)
 | |
|         # print("output3", output3)
 | |
| 
 | |
|         output = final_out(output1, output2, output3, target_lang)
 | |
| 
 | |
|         # print("output", output)
 | |
| 
 | |
|         return output
 | |
| 
 | |
| # to return the table with best 3 outputs
 | |
| 
 | |
| 
 | |
| def add_dial_comparison_doc2(doc2, table2, sentence, s1ANDm1, s2ANDm2, s3ANDm3, sources_name, trans):
 | |
|     row_Cells = table2.add_row().cells
 | |
|     for a, b in sources_name.items():
 | |
|         if b == s1ANDm1[0]:
 | |
|             k = a
 | |
|     output1 = trans[str(k)]
 | |
| 
 | |
|     row_Cells[0].text = sentence
 | |
|     row_Cells[1].text = output1
 | |
|     row_Cells[1].paragraphs[0].add_run('(Source : '+str(s1ANDm1[0])+')')
 | |
|     row_Cells[1].paragraphs[0].add_run('(Methods : '+str(s1ANDm1[1])+')')
 | |
| 
 | |
|     if s2ANDm2[0] == "":
 | |
|         row_Cells[2].text = ""
 | |
|     else:
 | |
|         for a, b in sources_name.items():
 | |
|             if b == s2ANDm2[0]:
 | |
|                 k = a
 | |
|         output2 = trans[str(k)]
 | |
|         row_Cells[2].text = output2
 | |
|         row_Cells[2].paragraphs[0].add_run('(Source : '+str(s2ANDm2[0])+')')
 | |
|         row_Cells[2].paragraphs[0].add_run('(Methods : '+str(s2ANDm2[1])+')')
 | |
| 
 | |
|     if s3ANDm3[0] == "":
 | |
|         row_Cells[3].text = ""
 | |
|     else:
 | |
|         for a, b in sources_name.items():
 | |
|             if b == s3ANDm3[0]:
 | |
|                 k = a
 | |
|         output3 = trans[str(k)]
 | |
|         row_Cells[3].text = output3
 | |
|         row_Cells[3].paragraphs[0].add_run('(Source : '+str(s3ANDm3[0])+')')
 | |
|         row_Cells[3].paragraphs[0].add_run('(Methods : '+str(s3ANDm3[1])+')')
 | |
| 
 | |
| 
 | |
| def add_dial_comparison_doc22(doc2, table2, sentence, s1ANDm1, sources_name, trans):
 | |
|     row_Cells = table2.add_row().cells
 | |
|     for a, b in sources_name.items():
 | |
|         if b == s1ANDm1[0]:
 | |
|             k = a
 | |
|     output1 = trans[str(k)]
 | |
| 
 | |
|     row_Cells[0].text = sentence
 | |
|     row_Cells[1].text = output1
 | |
| 
 | |
| 
 | |
| class myDict(dict):
 | |
|     def __init__(self):
 | |
|         self = dict()
 | |
| 
 | |
|     def add(self, key, value):
 | |
|         self[key] = value
 | |
| 
 | |
| 
 | |
| def all_translator(sentence, source_lang, target_lang):
 | |
|     if sentence in list(string.punctuation):
 | |
|         return sentence
 | |
| 
 | |
|     i = 0
 | |
|     trans = myDict()
 | |
|     sources_name = myDict()
 | |
|     try:
 | |
|         globals()['t%s' % i] = google(sentence, source_lang, target_lang)
 | |
|         #print(globals()['t%s' % i])
 | |
|         trans.add(str(i), globals()['t%s' % i])
 | |
|         sources_name.add(str(i), "GOOGLE")
 | |
|         i = i+1
 | |
|     except:
 | |
|         pass
 | |
| 
 | |
|     try:
 | |
|         globals()['t%s' % i] = ibm_watson(sentence, source_lang, target_lang)
 | |
|         trans.add(str(i), globals()['t%s' % i])
 | |
|         sources_name.add(str(i), "IBM_WATSON")
 | |
|         i = i+1
 | |
|     except:
 | |
|         pass
 | |
| 
 | |
|     try:
 | |
|         globals()['t%s' % i] = aws(sentence, source_lang, target_lang)
 | |
|         trans.add(str(i), globals()['t%s' % i])
 | |
|         sources_name.add(str(i), "AWS")
 | |
|         i = i+1
 | |
|     except:
 | |
|         pass
 | |
| 
 | |
|     try:
 | |
|         globals()['t%s' % i] = azure(sentence, target_lang)
 | |
|         trans.add(str(i), globals()['t%s' % i])
 | |
|         sources_name.add(str(i), "AZURE")
 | |
|         i = i+1
 | |
|     except:
 | |
|         pass
 | |
| 
 | |
|     try:
 | |
|         globals()['t%s' % i] = lingvanex(sentence, source_lang, target_lang)
 | |
|         trans.add(str(i), globals()['t%s' % i])
 | |
|         sources_name.add(str(i), "LINGVANEX")
 | |
|         i = i+1
 | |
|     except:
 | |
|         pass
 | |
| 
 | |
|     try:
 | |
|         globals()['t%s' % i] = yandex(sentence, source_lang, target_lang)
 | |
|         trans.add(str(i), globals()['t%s' % i])
 | |
|         sources_name.add(str(i), "YANDEX")
 | |
|         i = i+1
 | |
|     except:
 | |
|         pass
 | |
| 
 | |
|     trans_text = compare_outputs(
 | |
|         sentence, trans["0"], trans, sources_name, target_lang)
 | |
|     # doc2.save("testing.docx")
 | |
|     return trans_text
 | |
| 
 | |
| 
 | |
| def punct_remover_w_o_digits(string):
 | |
|     punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।'''
 | |
|     for x in string.lower():
 | |
|         if x in punctuations:
 | |
|             string = string.replace(x, "")
 | |
|     return string
 | |
| 
 | |
| # Sentence = "I am Lokesh."
 | |
| # source_lang = "en"
 | |
| # target_lang = "hi"
 | |
| # print(all_translator(Sentence, source_lang, target_lang))
 | |
| # doc2.save("testing.docx")
 |