# Module imports import subprocess from nltk.tokenize import sent_tokenize, regexp_tokenize # internal imports from conversion.translation.script_writing import dial_checker, dual_script, addDialogue, default_script from conversion.translation.translation_variables import is_in_translation_list, special_characters, code_2_language from conversion.translation.all_transliteration import all_transliteration from conversion.translation.script_writing import default_script from conversion.translation.translation_metric import manual_diff_score, bleu_diff_score, gleu_diff_score, meteor_diff_score, rouge_diff_score, diff_score, critera4_5 from conversion.translation.selection_source import selection_source, function5, function41, function311, function221, function2111, function11111, selection_source_transliteration, two_sources_two_outputs from conversion.translation.translation_resources import google, aws, azure, yandex from .detection import language_detector, script_det # -> Random Function def word_transliterate(sentence, dest_script): return sentence # # -> to be used when option 5 or 6 is yes from frontend then this function is used for translation # def ui_option5_and_6(doc, dial_src_lang, dial_dest_lang, dialogue, dual_dial_script): # if dialogue == "": # return # dial_translate = dial_checker(dial_dest_lang, dial_src_lang) # if dial_translate: # print("in 51") # if is_in_translation_list(dial_src_lang) and is_in_translation_list(dial_dest_lang): # trans_text = ui_option5_translate_comparison( # dialogue, dial_src_lang, dial_dest_lang) # # if dual_dial_script == "Yes": # dual_script(doc, dialogue, # trans_text, dial_dest_lang) # else: # addDialogue(doc, trans_text, dial_dest_lang) # else: # print("in 52") # if dual_dial_script == "Yes": # dual_script(doc, dialogue, # dialogue, dial_dest_lang) # else: # addDialogue(doc, dialogue, dial_dest_lang) # # -> If option 3 or 4 is yes from frontend then this function is used for translation def ui_option3_and_4(dial_src_lang, dial_dest_lang, dialogue, ui_option_1st_choice, dual_dial_script): print("dial_src_lang", dial_src_lang) print("dial_dest_lang", dial_dest_lang) print("ui option 1st choice", ui_option_1st_choice) if ui_option_1st_choice == "Yes": print("in ui31") dial_translate = dial_checker(dial_dest_lang, dial_src_lang) if dial_translate: print("in ui311") if dialogue == "": return print(dial_src_lang, dial_dest_lang, "fjdjfskd") if is_in_translation_list(dial_src_lang) and is_in_translation_list(dial_dest_lang): print("case 1") trans_text = translate_comparison( dialogue, dial_src_lang, dial_dest_lang) print("the translated text is", trans_text) else: trans_text = dialogue print("no creation of ") if dual_dial_script == "Yes": print("case 1") # dual_script(doc, dialogue, # trans_text, dial_dest_lang) else: try: if trans_text == "": trans_text = dialogue else: trans_text = trans_text except: trans_text = dialogue return trans_text else: print("in ui312") if dual_dial_script == "Yes": pass # dual_script(doc, dialogue, # dialogue, dial_dest_lang) else: return dialogue else: print("in ui32") print(dialogue, script_det( dialogue), dial_src_lang) output = all_transliteration(dialogue, script_det( dialogue), default_script[dial_src_lang]) if dual_dial_script == "Yes": pass # transliteration # dual_script(doc, dialogue, output, dial_dest_lang) else: return output # addDialogue(doc, output, dial_dest_lang) # -> Converting Docx to PDF using Libra-Office def convert_to_pdf(input_docx, out_folder): p = subprocess.Popen(['libreoffice', '--headless', '--convert-to', 'pdf', '--outdir', out_folder, input_docx]) print(['--convert-to', 'pdf', input_docx]) p.communicate() # saveFile = input_docx.split('.')[0] + ".pdf" # change_chmod = subprocess.Popen(['sudo', 'chmod', '777', saveFile]) # change_chmod.communicate() def final_out(output1, output2, output3, dest_lang): # for word in output1.split(): for word in regexp_tokenize(output1, "[\w']+"): if script_det(word) != default_script[dest_lang]: for word in regexp_tokenize(output2, "[\w']+"): if script_det(word) != default_script[dest_lang]: for word in regexp_tokenize(output3, "[\w']+"): if script_det(word) != default_script[dest_lang]: # print("in3") output1 = word_transliterate( output1, default_script[dest_lang]) return output1 return output3 return output2 return output1 def manual_diff_score(trans, sources_name): global_diff = [] n=len(sources_name) for i in range(n): local_diff = 0 for j in range(n): if i!=j: if trans[str(i)] and trans[str(j)] == " ": continue d = diff_score(trans[str(i)], trans[str(j)]) local_diff += d global_diff.append(local_diff/(n-1)) Choiced_source = global_diff.index(min(global_diff)) return trans[str(Choiced_source)], sources_name[str(Choiced_source)] # -> Comapre Outputs from all sources like google,ibm,aws,etc and decides the text to be returned as output def compare_outputs(sentence, t0, trans, sources_name, target_lang): # take a sentence and give translated sentence by comparing outputs from different resources k = [] s = [] methods_name = {'0': 'MNF', '1': 'Gleu', '2': 'Meteor', '3': 'Rougen', '4': 'Rougel'} google_output = t0 #print("google", google_output) output1, source1 = manual_diff_score(trans, sources_name) #print("MNF", output1) output2, source2 = gleu_diff_score(trans, sources_name) #print("gleu", output2) print("TRans 2-> ",trans) print(type(trans)) output3, source3 = meteor_diff_score(trans, sources_name) #print("meteor", output3) output4, source4, output5, source5 = rouge_diff_score( trans, sources_name) #print("rougen", output4) #print("rougel", output5) if google_output == output1 == output2 == output3 == output4 == output5: print("all output is same as google") return google_output else: if google_output != output1: k.append(output1) s.append(source1) else: k.append(" ") s.append(" ") if google_output != output2: k.append(output2) s.append(source2) else: k.append(" ") s.append(" ") if google_output != output3: k.append(output3) s.append(source3) else: k.append(" ") s.append(" ") if google_output != output4: k.append(output4) s.append(source4) else: k.append(" ") s.append(" ") if google_output != output5: k.append(output5) s.append(source5) else: k.append(" ") s.append(" ") k.insert(0, sentence) k.insert(1, google_output) s1ANDm1, s2ANDm2, s3ANDm3 = selection_source( s, sources_name, trans, methods_name) for a, b in sources_name.items(): if b == s1ANDm1[0]: k = a output1 = trans[str(k)] if s2ANDm2[0] != "": for c, d in sources_name.items(): if d == s2ANDm2[0]: l = c output2 = trans[str(l)] else: output2 = output1 if s3ANDm3[0] != "": for e, f in sources_name.items(): if f == s3ANDm3[0]: m = e output3 = trans[str(m)] else: output3 = output1 output = final_out(output1, output2, output3, target_lang) return output # -> Defining own way of declaring Dictionary class myDict(dict): def __init__(self): self = dict() def add(self, key, value): self[key] = value # -> Main Translation function to be called without any special dots in Sentence def all_translator(sentence, source_lang, target_lang, makeExcel=False): import time i = 0 trans = myDict() sources_name = myDict() try: globals()['t%s' % i] = google( sentence, source_lang, target_lang) trans.add(str(i), globals()['t%s' % i]) sources_name.add(str(i), "GOOGLE") i = i+1 except: pass # try: # globals()['t%s' % i] = ibm_watson( # sentence, source_lang, target_lang) # trans.add(str(i), globals()['t%s' % i]) # sources_name.add(str(i), "IBM_WATSON") # i = i+1 # except: # pass try: globals()['t%s' % i] = aws(sentence, source_lang, target_lang) trans.add(str(i), globals()['t%s' % i]) sources_name.add(str(i), "AWS") i = i+1 except: pass try: globals()['t%s' % i] = azure(sentence, target_lang) trans.add(str(i), globals()['t%s' % i]) sources_name.add(str(i), "AZURE") i = i+1 except: pass # try: # globals()['t%s' % i] = lingvanex( # sentence, source_lang, target_lang) # trans.add(str(i), globals()['t%s' % i]) # sources_name.add(str(i), "LINGVANEX") # i = i+1 # except: # pass # # try: # globals()['t%s' % i] = yandex( # sentence, source_lang, target_lang) # trans.add(str(i), globals()['t%s' % i]) # sources_name.add(str(i), "YANDEX") # i = i+1 # except: # pass if len(sources_name) == 1: trans_text = trans["0"] else: print("Trans -> ", trans) print(type(trans)) trans_text = compare_outputs( sentence, trans["0"], trans, sources_name, target_lang) # print("final trasnlated text 101", trans_text) if makeExcel: print("Translated texts are",trans) return trans_text, str(rf"{sentence} | ".join(list(trans.values()))) else: return trans_text # -> Main Translation function to be called with any special dots in Sentence (TRY TO USE THIS FUNCTION FOR TRANSLATION) def translate_comparison(text, source_lang, target_lang, makeExcel=False): print(text, " : Text at 58%") sentences = sent_tokenize(text) translated_text = [] for sentence in sentences: if any(ext in sentence for ext in special_characters): print("Isme gaya") trans_text = translation_with_spcecial_dots( sentence, source_lang, target_lang) translated_text.append(trans_text) else: if makeExcel: trans_text = all_translator( sentence, source_lang, target_lang, makeExcel) translated_text.append(trans_text) else: trans_text = all_translator( sentence, source_lang, target_lang, makeExcel) translated_text.append(trans_text) if makeExcel: return " ".join(translated_text), str(trans) else: return " ".join(translated_text) # -> Handling all special dots in sentence # -> Main functio for handling sentences to remove recursive dots def recursive_dots(Sentence, source_lang, target_lang): translated_text = [] for i in special_characters: if i not in Sentence: continue Sentences = Sentence.split(i) for Sentence in Sentences: if Sentence == "" or Sentence == " ": continue if any(ext in Sentence for ext in special_characters): trans_text = translation_with_spcecial_dots( Sentence, source_lang, target_lang) else: if Sentence != Sentences[-1]: trans_text = all_translator( Sentence, source_lang, target_lang) + i else: trans_text = all_translator( Sentence, source_lang, target_lang) translated_text.append(trans_text) return " ".join(translated_text) def translation_with_spcecial_dots(text, source_lang, target_lang, splitter, line_language, line_script, script_data=None, subsentence_choices=None): sentences = text.split(splitter) translated_text = [] for sentence in sentences: if sentence == "" or sentence == " ": continue line_language2 = language_detector(sentence) line_script2 = script_det(sentence) if splitter2 := next((ext for ext in special_characters if ext in sentence), None): trans_text = translation_with_spcecial_dots( sentence, source_lang, target_lang, splitter2, line_language2, line_script2) else: if line_language == script_data['dial_dest_lang'] \ and line_script == script_data['dial_dest_script']: if subsentence_choices[0]: trans_text = all_translator( sentence, source_lang, target_lang) elif line_language == script_data['dial_dest_lang'] \ and line_script == script_data['non_dial_dest_script']: if subsentence_choices[2]: trans_text = all_translator( sentence, source_lang, target_lang) elif line_language == script_data['non_dial_src_lang'] \ and line_script == script_data['dial_dest_script']: if subsentence_choices[4]: trans_text = all_translator( sentence, source_lang, target_lang) elif line_language == script_data['non_dial_src_lang'] \ and line_script == script_data['non_dial_dest_script']: if subsentence_choices[6]: trans_text = all_translator( sentence, source_lang, target_lang) elif line_language == script_data['non_dial_dest_lang'] \ and line_script == script_data['dial_dest_script']: if subsentence_choices[8]: trans_text = all_translator( sentence, source_lang, target_lang) elif line_language == script_data['non_dial_dest_lang'] \ and line_script == script_data['non_dial_dest_script']: if subsentence_choices[10]: trans_text = all_translator( sentence, source_lang, target_lang) else: if subsentence_choices[12]: trans_text = all_translator( sentence, source_lang, target_lang) if sentence != sentences[-1]: trans_text = trans_text + splitter else: trans_text = all_translator( sentence, source_lang, target_lang) translated_text.append(trans_text) return " ".join(translated_text) def translate_comparison2(text, source_lang, target_lang, script_data=None, subsentence_choices=None, is_dialogue=False): if is_dialogue: tokenizers_valid_langs = ("malayalam", "french", "italian", "german", "spanish", "swedish", "finnish", "danish", "english", "slovene", "norwegian", "dutch", "portuguese", "czech", "russian", "polish","turkish", "estonian", "greek") try: if code_2_language[source_lang].lower() in tokenizers_valid_langs: sentences = sent_tokenize(text, language=(code_2_language[source_lang]).lower()) else: sentences = text.split(".") except Exception as e: print("Error was : ", e) return text # List of translated sentences translated_text = [] # Translating each sentence one by one for sentence in sentences: line_language = language_detector(sentence) line_script = script_det(sentence) if splitter := next((ext for ext in special_characters if ext in sentence), None): trans_text = translation_with_spcecial_dots( sentence, source_lang, target_lang, splitter, line_language, line_script, script_data, subsentence_choices) translated_text.append(trans_text) else: if line_language == script_data['dial_dest_lang'] \ and line_script == script_data['dial_dest_script']: if subsentence_choices[0]: trans_text = all_translator( sentence, source_lang, target_lang) elif line_language == script_data['dial_dest_lang'] \ and line_script == script_data['non_dial_dest_script']: if subsentence_choices[2]: trans_text = all_translator( sentence, source_lang, target_lang) elif line_language == script_data['non_dial_src_lang'] \ and line_script == script_data['dial_dest_script']: if subsentence_choices[4]: trans_text = all_translator( sentence, source_lang, target_lang) elif line_language == script_data['non_dial_src_lang'] \ and line_script == script_data['non_dial_dest_script']: if subsentence_choices[6]: trans_text = all_translator( sentence, source_lang, target_lang) elif line_language == script_data['non_dial_dest_lang'] \ and line_script == script_data['dial_dest_script']: if subsentence_choices[8]: trans_text = all_translator( sentence, source_lang, target_lang) elif line_language == script_data['non_dial_dest_lang'] \ and line_script == script_data['non_dial_dest_script']: if subsentence_choices[10]: trans_text = all_translator( sentence, source_lang, target_lang) else: if subsentence_choices[12]: trans_text = all_translator( sentence, source_lang, target_lang) translated_text.append(trans_text) else: return " ".join(translated_text) else: return all_translator(text, source_lang, target_lang)