Conversion_Kitchen_Code/kitchen_counter/conversion/ppt_translation/optimisation1.py

401 lines
12 KiB
Python
Raw Normal View History

2024-04-27 09:33:09 +00:00
from google.cloud import translate_v2 as Translate
from google.cloud import translate
import docx
import sys
from .translation_resources import ibm_watson, google, aws, azure, lingvanex, yandex
from .script_detector import script_cat
from .script_writing import default_script
from .translation_metric import (
manual_diff_score,
bleu_diff_score,
gleu_diff_score,
meteor_diff_score,
rouge_diff_score,
diff_score,
critera4_5,
)
from .selection_source import (
selection_source,
function5,
function41,
function311,
function221,
function2111,
function11111,
selection_source_transliteration,
two_sources_two_outputs,
)
from tqdm import tqdm
import os
from docx.shared import Inches, Cm, Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.table import WD_TABLE_ALIGNMENT, WD_ALIGN_VERTICAL
import requests
import uuid
import json
import string
from MNF.settings import BasePath
basepath = BasePath()
# import logging
# from logger import get_module_logger
# log = get_module_logger(__name__)
# google
os.environ[
"GOOGLE_APPLICATION_CREDENTIALS"
] = f"{basepath}/MNF/json_keys/authentication.json"
# google
translate_client = Translate.Client()
client = translate.TranslationServiceClient()
project_id = "excellent-hue-272808"
location = "global"
parent = f"projects/{project_id}/locations/{location}"
doc_file = "translated_abc"
doc2 = docx.Document()
sections = doc2.sections
for section in sections:
section.top_margin = Inches(0.2)
section.bottom_margin = Inches(0.2)
section.left_margin = Inches(0.2)
section.right_margin = Inches(0.2)
section = doc2.sections[-1]
new_height = section.page_width
section.page_width = section.page_height
section.page_height = new_height
name = "Final table " + doc_file
doc2.add_heading(name, 0)
doc_para = doc2.add_paragraph()
doc_para.add_run(
"Translation resources used : Google, IBM watson, AWS, Azure, Lingvanex, Yandex"
).bold = True
table2 = doc2.add_table(rows=1, cols=4)
table2.style = "TableGrid"
hdr_Cells = table2.rows[0].cells
hdr_Cells[0].paragraphs[0].add_run("Input").bold = True
hdr_Cells[1].paragraphs[0].add_run("Output1").bold = True
hdr_Cells[2].paragraphs[0].add_run("Output2").bold = True
hdr_Cells[3].paragraphs[0].add_run("Output3").bold = True
# doc_file = "translated_abc"
# doc2 = docx.Document()
# sections = doc2.sections
# for section in sections:
# section.top_margin = Inches(0.2)
# section.bottom_margin = Inches(0.2)
# section.left_margin = Inches(0.2)
# section.right_margin = Inches(0.2)
# section = doc2.sections[-1]
# new_height = section.page_width
# section.page_width = section.page_height
# section.page_height = new_height
# name = filename
# doc2.add_heading(name, 0)
# doc_para = doc2.add_paragraph()
# #doc_para.add_run('Translation resources used : Google, IBM watson, AWS, Azure, Lingvanex, Yandex').bold = True
# table2 = doc2.add_table(rows=1,cols=4)
# table2.style = 'TableGrid'
# hdr_Cells = table2.rows[0].cells
# hdr_Cells[0].paragraphs[0].add_run("Original").bold=True
# hdr_Cells[1].paragraphs[0].add_run("Translated").bold=True
def script_det(text):
punctuations = """!()-[]{};:'"\,<>./?@#$%^&*_~“""""
no_punct = ""
for char in text:
if char not in punctuations:
no_punct = char
break
# print("alphabet", no_punct)
script = script_cat(no_punct)[0]
# print("script", script)
return script
def language_detector(text):
result = translate_client.translate(text, target_language="hi")
det_lang = result["detectedSourceLanguage"]
return det_lang
def punct_remover(string):
# punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।“”'''
punctuations = """!()-[]{};:'"\,<>./?@#$%^&*_~…।1234567890"""
for x in string.lower():
if x in punctuations:
string = string.replace(x, " ")
return string
def word_transliterate(sentence, dest_script):
return sentence
def final_out(output1, output2, output3, dest_lang):
temp_output1 = punct_remover(output1)
temp_output2 = punct_remover(output2)
temp_output3 = punct_remover(output3)
# for word in regexp_tokenize(output1, "[\w']+")
for word in temp_output1.split():
if script_det(word) != default_script[dest_lang]:
for word in temp_output2.split():
if script_det(word) != default_script[dest_lang]:
for word in temp_output3.split():
if script_det(word) != default_script[dest_lang]:
# print("in3")
output1 = word_transliterate(
output1, default_script[dest_lang]
)
return output1
return output3
return output2
return output1
def compare_outputs(sentence, t0, trans, sources_name, target_lang):
k = []
s = []
methods_name = {
"0": "MNF",
"1": "Gleu",
"2": "Meteor",
"3": "Rougen",
"4": "Rougel",
}
google_output = t0
# print("google", google_output)
output1, source1 = manual_diff_score(trans, sources_name)
# print("MNF", output1)
output2, source2 = gleu_diff_score(trans, sources_name)
# print("gleu", output2)
output3, source3 = meteor_diff_score(trans, sources_name)
# print("meteor", output3)
output4, source4, output5, source5 = rouge_diff_score(trans, sources_name)
# print("rougen", output4)
# print("rougel", output5)
if google_output == output1 == output2 == output3 == output4 == output5:
print("All outputs are same as google")
return google_output
else:
if google_output != output1:
k.append(output1)
s.append(source1)
else:
k.append(" ")
s.append(" ")
if google_output != output2:
k.append(output2)
s.append(source2)
else:
k.append(" ")
s.append(" ")
if google_output != output3:
k.append(output3)
s.append(source3)
else:
k.append(" ")
s.append(" ")
if google_output != output4:
k.append(output4)
s.append(source4)
else:
k.append(" ")
s.append(" ")
if google_output != output5:
k.append(output5)
s.append(source5)
else:
k.append(" ")
s.append(" ")
k.insert(0, sentence)
k.insert(1, google_output)
s1ANDm1, s2ANDm2, s3ANDm3 = selection_source(
s, sources_name, trans, methods_name
)
# print("s1", s1ANDm1)
# print("s2", s2ANDm2)
# print("s3", s3ANDm3)
# print(s1ANDm1[0])
# print(sources_name)
# add_dial_comparison_doc1a(doc1a, table1a , k, s, s1ANDm1[0])
# add_dial_comparison_doc1b(doc1b, table1b , k, s, s1ANDm1[0])
# add_dial_comparison_doc2(doc2, table2, sentence, s1ANDm1, s2ANDm2, s3ANDm3, sources_name, trans)
# add_dial_comparison_doc22(doc2, table2, sentence, s1ANDm1, sources_name, trans)
for a, b in sources_name.items():
if b == s1ANDm1[0]:
k = a
output1 = trans[str(k)]
if s2ANDm2[0] != "":
for c, d in sources_name.items():
if d == s2ANDm2[0]:
l = c
output2 = trans[str(l)]
else:
output2 = output1
if s3ANDm3[0] != "":
for e, f in sources_name.items():
if f == s3ANDm3[0]:
m = e
output3 = trans[str(m)]
else:
output3 = output1
# print("output1", output1)
# print("output2", output2)
# print("output3", output3)
output = final_out(output1, output2, output3, target_lang)
# print("output", output)
return output
# to return the table with best 3 outputs
def add_dial_comparison_doc2(
doc2, table2, sentence, s1ANDm1, s2ANDm2, s3ANDm3, sources_name, trans
):
row_Cells = table2.add_row().cells
for a, b in sources_name.items():
if b == s1ANDm1[0]:
k = a
output1 = trans[str(k)]
row_Cells[0].text = sentence
row_Cells[1].text = output1
row_Cells[1].paragraphs[0].add_run("(Source : " + str(s1ANDm1[0]) + ")")
row_Cells[1].paragraphs[0].add_run("(Methods : " + str(s1ANDm1[1]) + ")")
if s2ANDm2[0] == "":
row_Cells[2].text = ""
else:
for a, b in sources_name.items():
if b == s2ANDm2[0]:
k = a
output2 = trans[str(k)]
row_Cells[2].text = output2
row_Cells[2].paragraphs[0].add_run("(Source : " + str(s2ANDm2[0]) + ")")
row_Cells[2].paragraphs[0].add_run("(Methods : " + str(s2ANDm2[1]) + ")")
if s3ANDm3[0] == "":
row_Cells[3].text = ""
else:
for a, b in sources_name.items():
if b == s3ANDm3[0]:
k = a
output3 = trans[str(k)]
row_Cells[3].text = output3
row_Cells[3].paragraphs[0].add_run("(Source : " + str(s3ANDm3[0]) + ")")
row_Cells[3].paragraphs[0].add_run("(Methods : " + str(s3ANDm3[1]) + ")")
def add_dial_comparison_doc22(doc2, table2, sentence, s1ANDm1, sources_name, trans):
row_Cells = table2.add_row().cells
for a, b in sources_name.items():
if b == s1ANDm1[0]:
k = a
output1 = trans[str(k)]
row_Cells[0].text = sentence
row_Cells[1].text = output1
class myDict(dict):
def __init__(self):
self = dict()
def add(self, key, value):
self[key] = value
def all_translator(sentence, source_lang, target_lang):
if sentence in list(string.punctuation):
return sentence
i = 0
trans = myDict()
sources_name = myDict()
try:
globals()["t%s" % i] = google(sentence, source_lang, target_lang)
# print(globals()['t%s' % i])
trans.add(str(i), globals()["t%s" % i])
sources_name.add(str(i), "GOOGLE")
i = i + 1
except:
pass
try:
globals()["t%s" % i] = ibm_watson(sentence, source_lang, target_lang)
trans.add(str(i), globals()["t%s" % i])
sources_name.add(str(i), "IBM_WATSON")
i = i + 1
except:
pass
try:
globals()["t%s" % i] = aws(sentence, source_lang, target_lang)
trans.add(str(i), globals()["t%s" % i])
sources_name.add(str(i), "AWS")
i = i + 1
except:
pass
try:
globals()["t%s" % i] = azure(sentence, target_lang)
trans.add(str(i), globals()["t%s" % i])
sources_name.add(str(i), "AZURE")
i = i + 1
except:
pass
try:
globals()["t%s" % i] = lingvanex(sentence, source_lang, target_lang)
trans.add(str(i), globals()["t%s" % i])
sources_name.add(str(i), "LINGVANEX")
i = i + 1
except:
pass
try:
globals()["t%s" % i] = yandex(sentence, source_lang, target_lang)
trans.add(str(i), globals()["t%s" % i])
sources_name.add(str(i), "YANDEX")
i = i + 1
except:
pass
print(sources_name, " : OUTPUT FROM ALL SOURCES!")
if len(trans) == 1:
return trans["0"]
trans_text = compare_outputs(sentence, trans["0"], trans, sources_name, target_lang)
# doc2.save("testing.docx")
return trans_text
def punct_remover_w_o_digits(string):
punctuations = """!()-[]{};:'"\,<>./?@#$%^&*_~…।"""
for x in string.lower():
if x in punctuations:
string = string.replace(x, "")
return string
# Sentence = "I am Lokesh."
# source_lang = "en"
# target_lang = "hi"
# print(all_translator(Sentence, source_lang, target_lang))
# doc2.save("testing.docx")