Conversion_Kitchen_Code/kitchen_counter/conversion/translation/translation_function.py

543 lines
19 KiB
Python
Executable File

# Module imports
import subprocess
from nltk.tokenize import sent_tokenize, regexp_tokenize
# internal imports
from conversion.translation.script_writing import dial_checker, dual_script, addDialogue, default_script
from conversion.translation.translation_variables import is_in_translation_list, special_characters, code_2_language
from conversion.translation.all_transliteration import all_transliteration
from conversion.translation.script_writing import default_script
from conversion.translation.translation_metric import manual_diff_score, bleu_diff_score, gleu_diff_score, meteor_diff_score, rouge_diff_score, diff_score, critera4_5
from conversion.translation.selection_source import selection_source, function5, function41, function311, function221, function2111, function11111, selection_source_transliteration, two_sources_two_outputs
from conversion.translation.translation_resources import google, aws, azure, yandex
from .detection import language_detector, script_det
# -> Random Function
def word_transliterate(sentence, dest_script):
return sentence
# # -> to be used when option 5 or 6 is yes from frontend then this function is used for translation
# def ui_option5_and_6(doc, dial_src_lang, dial_dest_lang, dialogue, dual_dial_script):
# if dialogue == "":
# return
# dial_translate = dial_checker(dial_dest_lang, dial_src_lang)
# if dial_translate:
# print("in 51")
# if is_in_translation_list(dial_src_lang) and is_in_translation_list(dial_dest_lang):
# trans_text = ui_option5_translate_comparison(
# dialogue, dial_src_lang, dial_dest_lang)
#
# if dual_dial_script == "Yes":
# dual_script(doc, dialogue,
# trans_text, dial_dest_lang)
# else:
# addDialogue(doc, trans_text, dial_dest_lang)
# else:
# print("in 52")
# if dual_dial_script == "Yes":
# dual_script(doc, dialogue,
# dialogue, dial_dest_lang)
# else:
# addDialogue(doc, dialogue, dial_dest_lang)
# # -> If option 3 or 4 is yes from frontend then this function is used for translation
def ui_option3_and_4(dial_src_lang, dial_dest_lang, dialogue, ui_option_1st_choice, dual_dial_script):
print("dial_src_lang", dial_src_lang)
print("dial_dest_lang", dial_dest_lang)
print("ui option 1st choice", ui_option_1st_choice)
if ui_option_1st_choice == "Yes":
print("in ui31")
dial_translate = dial_checker(dial_dest_lang, dial_src_lang)
if dial_translate:
print("in ui311")
if dialogue == "":
return
print(dial_src_lang, dial_dest_lang, "fjdjfskd")
if is_in_translation_list(dial_src_lang) and is_in_translation_list(dial_dest_lang):
print("case 1")
trans_text = translate_comparison(
dialogue, dial_src_lang, dial_dest_lang)
print("the translated text is", trans_text)
else:
trans_text = dialogue
print("no creation of ")
if dual_dial_script == "Yes":
print("case 1")
# dual_script(doc, dialogue,
# trans_text, dial_dest_lang)
else:
try:
if trans_text == "":
trans_text = dialogue
else:
trans_text = trans_text
except:
trans_text = dialogue
return trans_text
else:
print("in ui312")
if dual_dial_script == "Yes":
pass
# dual_script(doc, dialogue,
# dialogue, dial_dest_lang)
else:
return dialogue
else:
print("in ui32")
print(dialogue, script_det(
dialogue), dial_src_lang)
output = all_transliteration(dialogue, script_det(
dialogue), default_script[dial_src_lang])
if dual_dial_script == "Yes":
pass
# transliteration
# dual_script(doc, dialogue, output, dial_dest_lang)
else:
return output
# addDialogue(doc, output, dial_dest_lang)
# -> Converting Docx to PDF using Libra-Office
def convert_to_pdf(input_docx, out_folder):
p = subprocess.Popen(['libreoffice', '--headless', '--convert-to', 'pdf', '--outdir',
out_folder, input_docx])
print(['--convert-to', 'pdf', input_docx])
p.communicate()
# saveFile = input_docx.split('.')[0] + ".pdf"
# change_chmod = subprocess.Popen(['sudo', 'chmod', '777', saveFile])
# change_chmod.communicate()
def final_out(output1, output2, output3, dest_lang):
# for word in output1.split():
for word in regexp_tokenize(output1, "[\w']+"):
if script_det(word) != default_script[dest_lang]:
for word in regexp_tokenize(output2, "[\w']+"):
if script_det(word) != default_script[dest_lang]:
for word in regexp_tokenize(output3, "[\w']+"):
if script_det(word) != default_script[dest_lang]:
# print("in3")
output1 = word_transliterate(
output1, default_script[dest_lang])
return output1
return output3
return output2
return output1
def manual_diff_score(trans, sources_name):
global_diff = []
n=len(sources_name)
for i in range(n):
local_diff = 0
for j in range(n):
if i!=j:
if trans[str(i)] and trans[str(j)] == " ":
continue
d = diff_score(trans[str(i)], trans[str(j)])
local_diff += d
global_diff.append(local_diff/(n-1))
Choiced_source = global_diff.index(min(global_diff))
return trans[str(Choiced_source)], sources_name[str(Choiced_source)]
# -> Comapre Outputs from all sources like google,ibm,aws,etc and decides the text to be returned as output
def compare_outputs(sentence, t0, trans, sources_name, target_lang):
# take a sentence and give translated sentence by comparing outputs from different resources
k = []
s = []
methods_name = {'0': 'MNF', '1': 'Gleu',
'2': 'Meteor', '3': 'Rougen', '4': 'Rougel'}
google_output = t0
#print("google", google_output)
output1, source1 = manual_diff_score(trans, sources_name)
#print("MNF", output1)
output2, source2 = gleu_diff_score(trans, sources_name)
#print("gleu", output2)
print("TRans 2-> ",trans)
print(type(trans))
output3, source3 = meteor_diff_score(trans, sources_name)
#print("meteor", output3)
output4, source4, output5, source5 = rouge_diff_score(
trans, sources_name)
#print("rougen", output4)
#print("rougel", output5)
if google_output == output1 == output2 == output3 == output4 == output5:
print("all output is same as google")
return google_output
else:
if google_output != output1:
k.append(output1)
s.append(source1)
else:
k.append(" ")
s.append(" ")
if google_output != output2:
k.append(output2)
s.append(source2)
else:
k.append(" ")
s.append(" ")
if google_output != output3:
k.append(output3)
s.append(source3)
else:
k.append(" ")
s.append(" ")
if google_output != output4:
k.append(output4)
s.append(source4)
else:
k.append(" ")
s.append(" ")
if google_output != output5:
k.append(output5)
s.append(source5)
else:
k.append(" ")
s.append(" ")
k.insert(0, sentence)
k.insert(1, google_output)
s1ANDm1, s2ANDm2, s3ANDm3 = selection_source(
s, sources_name, trans, methods_name)
for a, b in sources_name.items():
if b == s1ANDm1[0]:
k = a
output1 = trans[str(k)]
if s2ANDm2[0] != "":
for c, d in sources_name.items():
if d == s2ANDm2[0]:
l = c
output2 = trans[str(l)]
else:
output2 = output1
if s3ANDm3[0] != "":
for e, f in sources_name.items():
if f == s3ANDm3[0]:
m = e
output3 = trans[str(m)]
else:
output3 = output1
output = final_out(output1, output2, output3, target_lang)
return output
# -> Defining own way of declaring Dictionary
class myDict(dict):
def __init__(self):
self = dict()
def add(self, key, value):
self[key] = value
# -> Main Translation function to be called without any special dots in Sentence
def all_translator(sentence, source_lang, target_lang, makeExcel=False):
import time
i = 0
trans = myDict()
sources_name = myDict()
try:
globals()['t%s' % i] = google(
sentence, source_lang, target_lang)
trans.add(str(i), globals()['t%s' % i])
sources_name.add(str(i), "GOOGLE")
i = i+1
except:
pass
# try:
# globals()['t%s' % i] = ibm_watson(
# sentence, source_lang, target_lang)
# trans.add(str(i), globals()['t%s' % i])
# sources_name.add(str(i), "IBM_WATSON")
# i = i+1
# except:
# pass
try:
globals()['t%s' % i] = aws(sentence, source_lang, target_lang)
trans.add(str(i), globals()['t%s' % i])
sources_name.add(str(i), "AWS")
i = i+1
except:
pass
try:
globals()['t%s' % i] = azure(sentence, target_lang)
trans.add(str(i), globals()['t%s' % i])
sources_name.add(str(i), "AZURE")
i = i+1
except:
pass
# try:
# globals()['t%s' % i] = lingvanex(
# sentence, source_lang, target_lang)
# trans.add(str(i), globals()['t%s' % i])
# sources_name.add(str(i), "LINGVANEX")
# i = i+1
# except:
# pass
#
# try:
# globals()['t%s' % i] = yandex(
# sentence, source_lang, target_lang)
# trans.add(str(i), globals()['t%s' % i])
# sources_name.add(str(i), "YANDEX")
# i = i+1
# except:
# pass
if len(sources_name) == 1:
trans_text = trans["0"]
else:
print("Trans -> ", trans)
print(type(trans))
trans_text = compare_outputs(
sentence, trans["0"], trans, sources_name, target_lang)
# print("final trasnlated text 101", trans_text)
if makeExcel:
print("Translated texts are",trans)
return trans_text, str(rf"{sentence} | ".join(list(trans.values())))
else:
return trans_text
# -> Main Translation function to be called with any special dots in Sentence (TRY TO USE THIS FUNCTION FOR TRANSLATION)
def translate_comparison(text, source_lang, target_lang, makeExcel=False):
print(text, " : Text at 58%")
sentences = sent_tokenize(text)
translated_text = []
for sentence in sentences:
if any(ext in sentence for ext in special_characters):
print("Isme gaya")
trans_text = translation_with_spcecial_dots(
sentence, source_lang, target_lang)
translated_text.append(trans_text)
else:
if makeExcel:
trans_text = all_translator(
sentence, source_lang, target_lang, makeExcel)
translated_text.append(trans_text)
else:
trans_text = all_translator(
sentence, source_lang, target_lang, makeExcel)
translated_text.append(trans_text)
if makeExcel:
return " ".join(translated_text), str(trans)
else:
return " ".join(translated_text)
# -> Handling all special dots in sentence
# -> Main functio for handling sentences to remove recursive dots
def recursive_dots(Sentence, source_lang, target_lang):
translated_text = []
for i in special_characters:
if i not in Sentence:
continue
Sentences = Sentence.split(i)
for Sentence in Sentences:
if Sentence == "" or Sentence == " ":
continue
if any(ext in Sentence for ext in special_characters):
trans_text = translation_with_spcecial_dots(
Sentence, source_lang, target_lang)
else:
if Sentence != Sentences[-1]:
trans_text = all_translator(
Sentence, source_lang, target_lang) + i
else:
trans_text = all_translator(
Sentence, source_lang, target_lang)
translated_text.append(trans_text)
return " ".join(translated_text)
def translation_with_spcecial_dots(text, source_lang, target_lang, splitter, line_language, line_script, script_data=None, subsentence_choices=None):
sentences = text.split(splitter)
translated_text = []
for sentence in sentences:
if sentence == "" or sentence == " ":
continue
line_language2 = language_detector(sentence)
line_script2 = script_det(sentence)
if splitter2 := next((ext for ext in special_characters if ext in sentence), None):
trans_text = translation_with_spcecial_dots(
sentence, source_lang, target_lang, splitter2, line_language2, line_script2)
else:
if line_language == script_data['dial_dest_lang'] \
and line_script == script_data['dial_dest_script']:
if subsentence_choices[0]:
trans_text = all_translator(
sentence, source_lang, target_lang)
elif line_language == script_data['dial_dest_lang'] \
and line_script == script_data['non_dial_dest_script']:
if subsentence_choices[2]:
trans_text = all_translator(
sentence, source_lang, target_lang)
elif line_language == script_data['non_dial_src_lang'] \
and line_script == script_data['dial_dest_script']:
if subsentence_choices[4]:
trans_text = all_translator(
sentence, source_lang, target_lang)
elif line_language == script_data['non_dial_src_lang'] \
and line_script == script_data['non_dial_dest_script']:
if subsentence_choices[6]:
trans_text = all_translator(
sentence, source_lang, target_lang)
elif line_language == script_data['non_dial_dest_lang'] \
and line_script == script_data['dial_dest_script']:
if subsentence_choices[8]:
trans_text = all_translator(
sentence, source_lang, target_lang)
elif line_language == script_data['non_dial_dest_lang'] \
and line_script == script_data['non_dial_dest_script']:
if subsentence_choices[10]:
trans_text = all_translator(
sentence, source_lang, target_lang)
else:
if subsentence_choices[12]:
trans_text = all_translator(
sentence, source_lang, target_lang)
if sentence != sentences[-1]:
trans_text = trans_text + splitter
else:
trans_text = all_translator(
sentence, source_lang, target_lang)
translated_text.append(trans_text)
return " ".join(translated_text)
def translate_comparison2(text, source_lang, target_lang, script_data=None, subsentence_choices=None, is_dialogue=False):
if is_dialogue:
tokenizers_valid_langs = ("malayalam", "french", "italian", "german", "spanish", "swedish", "finnish", "danish", "english", "slovene", "norwegian", "dutch", "portuguese", "czech", "russian", "polish","turkish", "estonian", "greek")
try:
if code_2_language[source_lang].lower() in tokenizers_valid_langs:
sentences = sent_tokenize(text, language=(code_2_language[source_lang]).lower())
else:
sentences = text.split(".")
except Exception as e:
print("Error was : ", e)
return text
# List of translated sentences
translated_text = []
# Translating each sentence one by one
for sentence in sentences:
line_language = language_detector(sentence)
line_script = script_det(sentence)
if splitter := next((ext for ext in special_characters if ext in sentence), None):
trans_text = translation_with_spcecial_dots(
sentence, source_lang, target_lang, splitter, line_language, line_script, script_data, subsentence_choices)
translated_text.append(trans_text)
else:
if line_language == script_data['dial_dest_lang'] \
and line_script == script_data['dial_dest_script']:
if subsentence_choices[0]:
trans_text = all_translator(
sentence, source_lang, target_lang)
elif line_language == script_data['dial_dest_lang'] \
and line_script == script_data['non_dial_dest_script']:
if subsentence_choices[2]:
trans_text = all_translator(
sentence, source_lang, target_lang)
elif line_language == script_data['non_dial_src_lang'] \
and line_script == script_data['dial_dest_script']:
if subsentence_choices[4]:
trans_text = all_translator(
sentence, source_lang, target_lang)
elif line_language == script_data['non_dial_src_lang'] \
and line_script == script_data['non_dial_dest_script']:
if subsentence_choices[6]:
trans_text = all_translator(
sentence, source_lang, target_lang)
elif line_language == script_data['non_dial_dest_lang'] \
and line_script == script_data['dial_dest_script']:
if subsentence_choices[8]:
trans_text = all_translator(
sentence, source_lang, target_lang)
elif line_language == script_data['non_dial_dest_lang'] \
and line_script == script_data['non_dial_dest_script']:
if subsentence_choices[10]:
trans_text = all_translator(
sentence, source_lang, target_lang)
else:
if subsentence_choices[12]:
trans_text = all_translator(
sentence, source_lang, target_lang)
translated_text.append(trans_text)
else:
return " ".join(translated_text)
else:
return all_translator(text, source_lang, target_lang)