216 lines
7.8 KiB
Python
Executable File
216 lines
7.8 KiB
Python
Executable File
import os
|
|
import sys
|
|
import docx
|
|
import re
|
|
# import textract
|
|
from tqdm import tqdm
|
|
from collections import Counter
|
|
import ntpath
|
|
from docx.shared import Inches, Cm, Pt
|
|
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
|
from docx.enum.table import WD_TABLE_ALIGNMENT, WD_ALIGN_VERTICAL
|
|
import requests, uuid, json
|
|
import nltk.translate.bleu_score as bleu
|
|
import nltk.translate.gleu_score as gleu
|
|
from rouge_score import rouge_scorer
|
|
import numpy as np
|
|
import statistics
|
|
from statistics import mode
|
|
from indicnlp.tokenize import sentence_tokenize
|
|
import nltk
|
|
try:
|
|
print("time555555")
|
|
nltk.data.find('tokenizers/punkt')
|
|
except LookupError:
|
|
#nltk.download('punkt')
|
|
pass
|
|
try: nltk.data.find('wordnet')
|
|
except LookupError: ###nltk.download('wordnet')
|
|
print("error in finding wordnet3333333")
|
|
from nltk.tokenize import sent_tokenize
|
|
from rouge_score import rouge_scorer
|
|
from translation_metric import manual_diff_score, bleu_diff_score, gleu_diff_score, meteor_diff_score, rouge_diff_score, diff_score, critera4_5
|
|
|
|
from script_detector import script_cat
|
|
# from buck_2_unicode import buck_2_unicode
|
|
from translation_metric import manual_diff_score, bleu_diff_score, gleu_diff_score, meteor_diff_score, rouge_diff_score, diff_score, critera4_5
|
|
from selection_source import selection_source, function5, function41, function311, function221, function2111, function11111, selection_source_transliteration, two_sources_two_outputs
|
|
from script_writing import addSlugLine, addActionLine, addSpeaker, addParenthetical, addDialogue, dual_script, addTransition, dial_checker, non_dial_checker
|
|
# from script_reading import breaksen, getRefined, getSlugAndNonSlug, getSpeakers, getScenes
|
|
from translation_resources import ibm_watson, google, aws, azure, lingvanex, yandex
|
|
# from transliteration_resources import azure_transliteration, indic_trans, om_transliterator, libindic, indic_transliteration_IAST, indic_transliteration_ITRANS, sheetal, ritwik
|
|
|
|
from script_writing import default_script
|
|
from nltk.tokenize import regexp_tokenize
|
|
|
|
def script_det(text):
|
|
punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~“"”'''
|
|
no_punct = ""
|
|
for char in text:
|
|
if char not in punctuations:
|
|
no_punct = char
|
|
break
|
|
#print("alphabet", no_punct)
|
|
script = script_cat(no_punct)[0]
|
|
#print("script", script)
|
|
return script
|
|
|
|
def punct_remover(string):
|
|
#punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।“”'''
|
|
punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।1234567890'''
|
|
for x in string.lower():
|
|
if x in punctuations:
|
|
string = string.replace(x, " ")
|
|
return string
|
|
|
|
def word_transliterate(sentence, dest_script):
|
|
return sentence
|
|
|
|
|
|
def final_out(output1, output2, output3, dest_lang):
|
|
temp_output1 = punct_remover(output1)
|
|
temp_output2 = punct_remover(output2)
|
|
temp_output3 = punct_remover(output3)
|
|
#for word in regexp_tokenize(output1, "[\w']+")
|
|
|
|
for word in temp_output1.split():
|
|
if script_det(word) != default_script[dest_lang]:
|
|
for word in temp_output2.split():
|
|
if script_det(word) != default_script[dest_lang]:
|
|
for word in temp_output3.split():
|
|
if script_det(word) != default_script[dest_lang]:
|
|
#print("in3")
|
|
output1 = word_transliterate(output1, default_script[dest_lang])
|
|
return output1
|
|
return output3
|
|
return output2
|
|
return output1
|
|
|
|
def compare_outputs(sentence, t0, trans, sources_name, target_lang):
|
|
k=[]
|
|
s=[]
|
|
methods_name = {'0':'MNF', '1':'Gleu', '2':'Meteor', '3':'Rougen', '4':'Rougel'}
|
|
google_output = t0
|
|
#print("google", google_output)
|
|
output1, source1 = manual_diff_score(trans, sources_name)
|
|
#print("MNF", output1)
|
|
output2, source2 = gleu_diff_score(trans, sources_name)
|
|
#print("gleu", output2)
|
|
output3, source3 = meteor_diff_score(trans, sources_name)
|
|
#print("meteor", output3)
|
|
output4, source4, output5, source5 = rouge_diff_score(trans, sources_name)
|
|
#print("rougen", output4)
|
|
#print("rougel", output5)
|
|
|
|
if google_output == output1 == output2==output3==output4==output5:
|
|
#print("all output are same as google")
|
|
return google_output
|
|
else:
|
|
if google_output != output1:
|
|
k.append(output1)
|
|
s.append(source1)
|
|
else:
|
|
k.append(" ")
|
|
s.append(" ")
|
|
if google_output != output2:
|
|
k.append(output2)
|
|
s.append(source2)
|
|
else:
|
|
k.append(" ")
|
|
s.append(" ")
|
|
if google_output != output3:
|
|
k.append(output3)
|
|
s.append(source3)
|
|
else:
|
|
k.append(" ")
|
|
s.append(" ")
|
|
if google_output != output4:
|
|
k.append(output4)
|
|
s.append(source4)
|
|
else:
|
|
k.append(" ")
|
|
s.append(" ")
|
|
if google_output != output5:
|
|
k.append(output5)
|
|
s.append(source5)
|
|
else:
|
|
k.append(" ")
|
|
s.append(" ")
|
|
|
|
k.insert(0,sentence)
|
|
k.insert(1,google_output)
|
|
s1ANDm1, s2ANDm2, s3ANDm3 = selection_source(s, sources_name, trans, methods_name )
|
|
# print("s1", s1ANDm1)
|
|
# print("s2", s2ANDm2)
|
|
# print("s3", s3ANDm3)
|
|
#print(s1ANDm1[0])
|
|
#print(sources_name)
|
|
|
|
#add_dial_comparison_doc1a(doc1a, table1a , k, s, s1ANDm1[0])
|
|
#add_dial_comparison_doc1b(doc1b, table1b , k, s, s1ANDm1[0])
|
|
#add_dial_comparison_doc2(doc2, table2, sentence, s1ANDm1, s2ANDm2, s3ANDm3, sources_name, trans)
|
|
|
|
for a, b in sources_name.items():
|
|
if b == s1ANDm1[0]:
|
|
k = a
|
|
output1 = trans[str(k)]
|
|
|
|
if s2ANDm2[0] != "":
|
|
for c, d in sources_name.items():
|
|
if d == s2ANDm2[0]:
|
|
l = c
|
|
output2 = trans[str(l)]
|
|
else:
|
|
output2 = output1
|
|
|
|
if s3ANDm3[0] != "":
|
|
for e, f in sources_name.items():
|
|
if f == s3ANDm3[0]:
|
|
m = e
|
|
output3 = trans[str(m)]
|
|
else:
|
|
output3 = output1
|
|
|
|
# print("output1", output1)
|
|
# print("output2", output2)
|
|
# print("output3", output3)
|
|
|
|
output = final_out(output1, output2, output3, target_lang)
|
|
|
|
# print("output", output)
|
|
|
|
return output
|
|
|
|
|
|
|
|
#compare_outputs(sentence, t0, trans, sources_name, target_lang):
|
|
|
|
from translation_metric import diff_score
|
|
# sentence="I am asad"
|
|
sentence=""
|
|
|
|
|
|
trans={"0":"Welcome to this tutorial on navigating a course in my testicle. Courses are in-depth training which contain multiple topics and knowledge.Check questions using a visual theme.In this video,we discuss how to access a course in my testicle.",
|
|
"1": "welcome to this tutorial on navigating a course module in minetickle courses are in depth training modules which contain multiple topics and knowledge check questions using a visual theme in this video we discuss how to access a course module in mine tickle",
|
|
"2": "Welcome to this tutorial on navigating a course module in Mindtickle courses are in depth training modules, which contain multiple topics and knowledge check questions.Using a visual theme.In this video, we discuss how to access a course module in Mindtickle."}
|
|
#t0 = trans["0"]
|
|
|
|
# t0 = "Welcome to this tutorial on navigating"
|
|
t0 = " "
|
|
sources_name = {'0': "google", "1": 'azure', "2": 'aws'}
|
|
target_lang = "en"
|
|
|
|
|
|
selected_para = compare_outputs(sentence, t0, trans, sources_name, target_lang)
|
|
|
|
key_list = list(trans.keys())
|
|
val_list = list(trans.values())
|
|
|
|
position = val_list.index(selected_para)
|
|
if position == '0':
|
|
print("Google : \n",selected_para)
|
|
elif position == '1':
|
|
print("Azure :\n",selected_para)
|
|
else:
|
|
print("AWS :\n",selected_para)
|