Conversion_Kitchen_Code/kitchen_counter/conversion/subtitling/optimisation1.py

from google.cloud import translate_v2 as Translate
from google.cloud import translate
import docx
import sys
from .translation_resources import ibm_watson, google, aws, azure, lingvanex, yandex
from .script_detector import script_cat
from .script_writing import default_script
from .translation_metric import manual_diff_score, bleu_diff_score, gleu_diff_score, meteor_diff_score, rouge_diff_score, diff_score, critera4_5
from .selection_source import selection_source, function5, function41, function311, function221, function2111, function11111, selection_source_transliteration, two_sources_two_outputs
from tqdm import tqdm
import os
from docx.shared import Inches, Cm, Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.table import WD_TABLE_ALIGNMENT, WD_ALIGN_VERTICAL
import requests
import uuid
import json
import string

# google
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/home/mnfidea/project/MNF/conversion/subtitling/gifted-mountain-318504-0a5f94cda0c8.json"
translate_client = Translate.Client()
client = translate.TranslationServiceClient()
project_id = "excellent-hue-272808"
location = "global"
parent = f"projects/{project_id}/locations/{location}"


doc_file = "translated_abc"
doc2 = docx.Document()
sections = doc2.sections
for section in sections:
    section.top_margin = Inches(0.2)
    section.bottom_margin = Inches(0.2)
    section.left_margin = Inches(0.2)
    section.right_margin = Inches(0.2)
section = doc2.sections[-1]
new_height = section.page_width
section.page_width = section.page_height
section.page_height = new_height
name = 'Final table '+doc_file
doc2.add_heading(name, 0)
doc_para = doc2.add_paragraph()
doc_para.add_run(
    'Translation resources used : Google, IBM watson, AWS, Azure, Lingvanex, Yandex').bold = True
table2 = doc2.add_table(rows=1, cols=4)
table2.style = 'TableGrid'
hdr_Cells = table2.rows[0].cells
hdr_Cells[0].paragraphs[0].add_run("Input").bold = True
hdr_Cells[1].paragraphs[0].add_run("Output1").bold = True
hdr_Cells[2].paragraphs[0].add_run("Output2").bold = True
hdr_Cells[3].paragraphs[0].add_run("Output3").bold = True

# doc_file = "translated_abc"
# doc2 = docx.Document()
# sections = doc2.sections
# for section in sections:
#     section.top_margin = Inches(0.2)
#     section.bottom_margin = Inches(0.2)
#     section.left_margin = Inches(0.2)
#     section.right_margin = Inches(0.2)
# section = doc2.sections[-1]
# new_height = section.page_width
# section.page_width = section.page_height
# section.page_height = new_height
# name = filename
# doc2.add_heading(name, 0)
# doc_para = doc2.add_paragraph()
# #doc_para.add_run('Translation resources used : Google, IBM watson, AWS, Azure, Lingvanex, Yandex').bold = True
# table2 = doc2.add_table(rows=1,cols=4)
# table2.style = 'TableGrid'
# hdr_Cells = table2.rows[0].cells
# hdr_Cells[0].paragraphs[0].add_run("Original").bold=True
# hdr_Cells[1].paragraphs[0].add_run("Translated").bold=True


def script_det(text):
    punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~“"”'''
    no_punct = ""
    for char in text:
        if char not in punctuations:
            no_punct = char
            break
    #print("alphabet", no_punct)
    script = script_cat(no_punct)[0]
    #print("script", script)
    return script


def language_detector(text):
    result = translate_client.translate(text, target_language='hi')
    det_lang = result["detectedSourceLanguage"]
    return det_lang


def punct_remover(string):
    # punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।“”'''
    punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।1234567890'''
    for x in string.lower():
        if x in punctuations:
            string = string.replace(x, " ")
    return string


def word_transliterate(sentence, dest_script):
    return sentence


def final_out(output1, output2, output3, dest_lang):
    temp_output1 = punct_remover(output1)
    temp_output2 = punct_remover(output2)
    temp_output3 = punct_remover(output3)
    # for word in regexp_tokenize(output1, "[\w']+")

    for word in temp_output1.split():
        if script_det(word) != default_script[dest_lang]:
            for word in temp_output2.split():
                if script_det(word) != default_script[dest_lang]:
                    for word in temp_output3.split():
                        if script_det(word) != default_script[dest_lang]:
                            # print("in3")
                            output1 = word_transliterate(
                                output1, default_script[dest_lang])
                            return output1
                    return output3
            return output2
    return output1


def compare_outputs(sentence, t0, trans, sources_name, target_lang):
    k = []
    s = []
    methods_name = {'0': 'MNF', '1': 'Gleu',
                    '2': 'Meteor', '3': 'Rougen', '4': 'Rougel'}
    google_output = t0
    #print("google", google_output)
    output1, source1 = manual_diff_score(trans, sources_name)
    #print("MNF", output1)
    output2, source2 = gleu_diff_score(trans, sources_name)
    #print("gleu", output2)
    output3, source3 = meteor_diff_score(trans, sources_name)
    #print("meteor", output3)
    output4, source4, output5, source5 = rouge_diff_score(trans, sources_name)
    #print("rougen", output4)
    #print("rougel", output5)

    if google_output == output1 == output2 == output3 == output4 == output5:
        print("All outputs are same as google")
        return google_output
    else:
        if google_output != output1:
            k.append(output1)
            s.append(source1)
        else:
            k.append(" ")
            s.append(" ")
        if google_output != output2:
            k.append(output2)
            s.append(source2)
        else:
            k.append(" ")
            s.append(" ")
        if google_output != output3:
            k.append(output3)
            s.append(source3)
        else:
            k.append(" ")
            s.append(" ")
        if google_output != output4:
            k.append(output4)
            s.append(source4)
        else:
            k.append(" ")
            s.append(" ")
        if google_output != output5:
            k.append(output5)
            s.append(source5)
        else:
            k.append(" ")
            s.append(" ")

        k.insert(0, sentence)
        k.insert(1, google_output)
        s1ANDm1, s2ANDm2, s3ANDm3 = selection_source(
            s, sources_name, trans, methods_name)
        # print("s1", s1ANDm1)
        # print("s2", s2ANDm2)
        # print("s3", s3ANDm3)
        # print(s1ANDm1[0])
        # print(sources_name)

        #add_dial_comparison_doc1a(doc1a, table1a , k, s, s1ANDm1[0])
        #add_dial_comparison_doc1b(doc1b, table1b , k, s, s1ANDm1[0])
        #add_dial_comparison_doc2(doc2, table2, sentence, s1ANDm1, s2ANDm2, s3ANDm3, sources_name, trans)
        #add_dial_comparison_doc22(doc2, table2, sentence, s1ANDm1, sources_name, trans)

        for a, b in sources_name.items():
            if b == s1ANDm1[0]:
                k = a
        output1 = trans[str(k)]

        if s2ANDm2[0] != "":
            for c, d in sources_name.items():
                if d == s2ANDm2[0]:
                    l = c
            output2 = trans[str(l)]
        else:
            output2 = output1

        if s3ANDm3[0] != "":
            for e, f in sources_name.items():
                if f == s3ANDm3[0]:
                    m = e
            output3 = trans[str(m)]
        else:
            output3 = output1

        # print("output1", output1)
        # print("output2", output2)
        # print("output3", output3)

        output = final_out(output1, output2, output3, target_lang)

        # print("output", output)

        return output

# to return the table with best 3 outputs


def add_dial_comparison_doc2(doc2, table2, sentence, s1ANDm1, s2ANDm2, s3ANDm3, sources_name, trans):
    row_Cells = table2.add_row().cells
    for a, b in sources_name.items():
        if b == s1ANDm1[0]:
            k = a
    output1 = trans[str(k)]

    row_Cells[0].text = sentence
    row_Cells[1].text = output1
    row_Cells[1].paragraphs[0].add_run('(Source : '+str(s1ANDm1[0])+')')
    row_Cells[1].paragraphs[0].add_run('(Methods : '+str(s1ANDm1[1])+')')

    if s2ANDm2[0] == "":
        row_Cells[2].text = ""
    else:
        for a, b in sources_name.items():
            if b == s2ANDm2[0]:
                k = a
        output2 = trans[str(k)]
        row_Cells[2].text = output2
        row_Cells[2].paragraphs[0].add_run('(Source : '+str(s2ANDm2[0])+')')
        row_Cells[2].paragraphs[0].add_run('(Methods : '+str(s2ANDm2[1])+')')

    if s3ANDm3[0] == "":
        row_Cells[3].text = ""
    else:
        for a, b in sources_name.items():
            if b == s3ANDm3[0]:
                k = a
        output3 = trans[str(k)]
        row_Cells[3].text = output3
        row_Cells[3].paragraphs[0].add_run('(Source : '+str(s3ANDm3[0])+')')
        row_Cells[3].paragraphs[0].add_run('(Methods : '+str(s3ANDm3[1])+')')


def add_dial_comparison_doc22(doc2, table2, sentence, s1ANDm1, sources_name, trans):
    row_Cells = table2.add_row().cells
    for a, b in sources_name.items():
        if b == s1ANDm1[0]:
            k = a
    output1 = trans[str(k)]

    row_Cells[0].text = sentence
    row_Cells[1].text = output1


class myDict(dict):
    def __init__(self):
        self = dict()

    def add(self, key, value):
        self[key] = value


def all_translator(sentence, source_lang, target_lang):
    if sentence in list(string.punctuation):
        return sentence

    i = 0
    trans = myDict()
    sources_name = myDict()
    try:
        globals()['t%s' % i] = google(sentence, source_lang, target_lang)
        #print(globals()['t%s' % i])
        trans.add(str(i), globals()['t%s' % i])
        sources_name.add(str(i), "GOOGLE")
        i = i+1
    except:
        pass

    try:
        globals()['t%s' % i] = ibm_watson(sentence, source_lang, target_lang)
        trans.add(str(i), globals()['t%s' % i])
        sources_name.add(str(i), "IBM_WATSON")
        i = i+1
    except:
        pass

    try:
        globals()['t%s' % i] = aws(sentence, source_lang, target_lang)
        trans.add(str(i), globals()['t%s' % i])
        sources_name.add(str(i), "AWS")
        i = i+1
    except:
        pass

    try:
        globals()['t%s' % i] = azure(sentence, target_lang)
        trans.add(str(i), globals()['t%s' % i])
        sources_name.add(str(i), "AZURE")
        i = i+1
    except:
        pass

    try:
        globals()['t%s' % i] = lingvanex(sentence, source_lang, target_lang)
        trans.add(str(i), globals()['t%s' % i])
        sources_name.add(str(i), "LINGVANEX")
        i = i+1
    except:
        pass

    try:
        globals()['t%s' % i] = yandex(sentence, source_lang, target_lang)
        trans.add(str(i), globals()['t%s' % i])
        sources_name.add(str(i), "YANDEX")
        i = i+1
    except:
        pass

    trans_text = compare_outputs(
        sentence, trans["0"], trans, sources_name, target_lang)
    # doc2.save("testing.docx")
    return trans_text


def punct_remover_w_o_digits(string):
    punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।'''
    for x in string.lower():
        if x in punctuations:
            string = string.replace(x, "")
    return string

# Sentence = "I am Lokesh."
# source_lang = "en"
# target_lang = "hi"
# print(all_translator(Sentence, source_lang, target_lang))
# doc2.save("testing.docx")