import os import sys import docx import re # import textract from tqdm import tqdm from collections import Counter import ntpath from docx.shared import Inches, Cm, Pt from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.enum.table import WD_TABLE_ALIGNMENT, WD_ALIGN_VERTICAL import requests, uuid, json import nltk.translate.bleu_score as bleu import nltk.translate.gleu_score as gleu from rouge_score import rouge_scorer import numpy as np import statistics from statistics import mode from indicnlp.tokenize import sentence_tokenize import nltk try: print("time555555") nltk.data.find('tokenizers/punkt') except LookupError: #nltk.download('punkt') pass try: nltk.data.find('wordnet') except LookupError: ###nltk.download('wordnet') print("error in finding wordnet3333333") from nltk.tokenize import sent_tokenize from rouge_score import rouge_scorer from translation_metric import manual_diff_score, bleu_diff_score, gleu_diff_score, meteor_diff_score, rouge_diff_score, diff_score, critera4_5 from script_detector import script_cat # from buck_2_unicode import buck_2_unicode from translation_metric import manual_diff_score, bleu_diff_score, gleu_diff_score, meteor_diff_score, rouge_diff_score, diff_score, critera4_5 from selection_source import selection_source, function5, function41, function311, function221, function2111, function11111, selection_source_transliteration, two_sources_two_outputs from script_writing import addSlugLine, addActionLine, addSpeaker, addParenthetical, addDialogue, dual_script, addTransition, dial_checker, non_dial_checker # from script_reading import breaksen, getRefined, getSlugAndNonSlug, getSpeakers, getScenes from translation_resources import ibm_watson, google, aws, azure, lingvanex, yandex # from transliteration_resources import azure_transliteration, indic_trans, om_transliterator, libindic, indic_transliteration_IAST, indic_transliteration_ITRANS, sheetal, ritwik from script_writing import default_script from nltk.tokenize import regexp_tokenize def script_det(text): punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~“"”''' no_punct = "" for char in text: if char not in punctuations: no_punct = char break #print("alphabet", no_punct) script = script_cat(no_punct)[0] #print("script", script) return script def punct_remover(string): #punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।“”''' punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।1234567890''' for x in string.lower(): if x in punctuations: string = string.replace(x, " ") return string def word_transliterate(sentence, dest_script): return sentence def final_out(output1, output2, output3, dest_lang): temp_output1 = punct_remover(output1) temp_output2 = punct_remover(output2) temp_output3 = punct_remover(output3) #for word in regexp_tokenize(output1, "[\w']+") for word in temp_output1.split(): if script_det(word) != default_script[dest_lang]: for word in temp_output2.split(): if script_det(word) != default_script[dest_lang]: for word in temp_output3.split(): if script_det(word) != default_script[dest_lang]: #print("in3") output1 = word_transliterate(output1, default_script[dest_lang]) return output1 return output3 return output2 return output1 def compare_outputs(sentence, t0, trans, sources_name, target_lang): k=[] s=[] methods_name = {'0':'MNF', '1':'Gleu', '2':'Meteor', '3':'Rougen', '4':'Rougel'} google_output = t0 #print("google", google_output) output1, source1 = manual_diff_score(trans, sources_name) #print("MNF", output1) output2, source2 = gleu_diff_score(trans, sources_name) #print("gleu", output2) output3, source3 = meteor_diff_score(trans, sources_name) #print("meteor", output3) output4, source4, output5, source5 = rouge_diff_score(trans, sources_name) #print("rougen", output4) #print("rougel", output5) if google_output == output1 == output2==output3==output4==output5: #print("all output are same as google") return google_output else: if google_output != output1: k.append(output1) s.append(source1) else: k.append(" ") s.append(" ") if google_output != output2: k.append(output2) s.append(source2) else: k.append(" ") s.append(" ") if google_output != output3: k.append(output3) s.append(source3) else: k.append(" ") s.append(" ") if google_output != output4: k.append(output4) s.append(source4) else: k.append(" ") s.append(" ") if google_output != output5: k.append(output5) s.append(source5) else: k.append(" ") s.append(" ") k.insert(0,sentence) k.insert(1,google_output) s1ANDm1, s2ANDm2, s3ANDm3 = selection_source(s, sources_name, trans, methods_name ) # print("s1", s1ANDm1) # print("s2", s2ANDm2) # print("s3", s3ANDm3) #print(s1ANDm1[0]) #print(sources_name) #add_dial_comparison_doc1a(doc1a, table1a , k, s, s1ANDm1[0]) #add_dial_comparison_doc1b(doc1b, table1b , k, s, s1ANDm1[0]) #add_dial_comparison_doc2(doc2, table2, sentence, s1ANDm1, s2ANDm2, s3ANDm3, sources_name, trans) for a, b in sources_name.items(): if b == s1ANDm1[0]: k = a output1 = trans[str(k)] if s2ANDm2[0] != "": for c, d in sources_name.items(): if d == s2ANDm2[0]: l = c output2 = trans[str(l)] else: output2 = output1 if s3ANDm3[0] != "": for e, f in sources_name.items(): if f == s3ANDm3[0]: m = e output3 = trans[str(m)] else: output3 = output1 # print("output1", output1) # print("output2", output2) # print("output3", output3) output = final_out(output1, output2, output3, target_lang) # print("output", output) return output #compare_outputs(sentence, t0, trans, sources_name, target_lang): from translation_metric import diff_score # sentence="I am asad" sentence="" trans={"0":"Welcome to this tutorial on navigating a course in my testicle. Courses are in-depth training which contain multiple topics and knowledge.Check questions using a visual theme.In this video,we discuss how to access a course in my testicle.", "1": "welcome to this tutorial on navigating a course module in minetickle courses are in depth training modules which contain multiple topics and knowledge check questions using a visual theme in this video we discuss how to access a course module in mine tickle", "2": "Welcome to this tutorial on navigating a course module in Mindtickle courses are in depth training modules, which contain multiple topics and knowledge check questions.Using a visual theme.In this video, we discuss how to access a course module in Mindtickle."} #t0 = trans["0"] # t0 = "Welcome to this tutorial on navigating" t0 = " " sources_name = {'0': "google", "1": 'azure', "2": 'aws'} target_lang = "en" selected_para = compare_outputs(sentence, t0, trans, sources_name, target_lang) key_list = list(trans.keys()) val_list = list(trans.values()) position = val_list.index(selected_para) if position == '0': print("Google : \n",selected_para) elif position == '1': print("Azure :\n",selected_para) else: print("AWS :\n",selected_para)