Conversion_Kitchen_Code/kitchen_counter/conversion/translation/translation_metric.py

266 lines
7.5 KiB
Python
Executable File

# from rouge_score import rouge_scorer
from rouge_score import rouge_scorer
import nltk
from collections import Counter
from nltk.translate import bleu_score as bleu
from nltk.translate import gleu_score as gleu
from nltk.tokenize import word_tokenize
def critera4_5(trans1, trans2, n):
L1= len(trans1.split())
L2= len(trans2.split())
d = 0
D = []
k = n+1
x=""
for i in range(n, L2):
if x == "yes":
k = a+1
for j in range(k, L1):
if trans1.split()[j]==trans2.split()[i]:
d = 0
x = "yes"
a = j
break
else:
x ="no"
d = 1
D.append(d)
return sum(D)
def diff_score(trans1, trans2):
try:
if trans1=="" or trans2 == "":
return 0
d = 0
substring = ","
L1= len(trans1.split())
L2= len(trans2.split())
if L1 == L2: d = d
else: d = d+1
if trans1.split()[0]== trans2.split()[0]: d = d
else: d = d+1
L = min(L1,L2)
for i in range(L):
if trans1.split()[i] != trans2.split()[i]:
x="yes"
break
else: x="no"
if x=="no" : d = d
else: d = d+1
n = i
d = critera4_5(trans1, trans2, n)
if (trans1.split()[-1]== trans2.split()[-1]) & (trans1[-1] == trans2[-1]): d = d
else: d = d+1
if trans1.count(substring)== trans2.count(substring): d = d
else: d = d+1
return d
except Exception as e:
f = open(f"/home/user/mnf/project/MNF/log/conversion_translation_fail.log", "a")
"[error-trans1-trans2-translations-sources_names-sentence-google_output-target_language]"
f.write("Exception was ->" + "[" + str(e) + "-" + str(trans1) + "-" + str(trans2) + "\n")
f.close()
def manual_diff_score(trans, sources_name):
global_diff = []
n=len(sources_name)
for i in range(n):
local_diff = 0
for j in range(n):
if i!=j:
if trans[str(i)] and trans[str(j)] == " ":
continue
d = diff_score(trans[str(i)], trans[str(j)])
local_diff += d
global_diff.append(local_diff/(n-1))
Choiced_source = global_diff.index(min(global_diff))
return trans[str(Choiced_source)], sources_name[str(Choiced_source)]
def bleu_diff_score(trans, sources_name):
global_diff = []
n=len(sources_name)
for i in range(n):
local_diff = 0
for j in range(n):
if i!=j:
d = 1 -bleu.sentence_bleu([trans[str(j)].split()], trans[str(i)].split())
local_diff += d
global_diff.append(local_diff/(n-1))
Choiced_source = global_diff.index(min(global_diff))
return trans[str(Choiced_source)], sources_name[str(Choiced_source)]
def gleu_diff_score(trans, sources_name):
global_diff = []
n=len(sources_name)
for i in range(n):
local_diff = 0
for j in range(n):
if i!=j:
d = 1 -gleu.sentence_gleu([trans[str(j)].split()], trans[str(i)].split())
local_diff += d
global_diff.append(local_diff/(n-1))
Choiced_source = global_diff.index(min(global_diff))
return trans[str(Choiced_source)], sources_name[str(Choiced_source)]
def meteor_diff_score(trans, sources_name):
global_diff = []
n=len(sources_name)
for i in range(n):
local_diff = 0
for j in range(n):
if i!=j:
print(str([trans[str(j)]]), str(trans[str(i)]))
d = 1 -(nltk.translate.meteor_score.meteor_score([word_tokenize(trans[str(j)])], word_tokenize(trans[str(i)])))
local_diff += d
global_diff.append(local_diff/(n-1))
Choiced_source = global_diff.index(max(global_diff))
return trans[str(Choiced_source)], sources_name[str(Choiced_source)]
scorer = rouge_scorer.RougeScorer(['rouge2', 'rougeL'], use_stemmer=True)
def rouge_diff_score(trans, sources_name):
global_diff1 = []
global_diff2 = []
n=len(sources_name)
for i in range(n):
local_diff1 = 0
local_diff2 = 0
for j in range(n):
if i!=j:
scores = scorer.score(trans[str(j)], trans[str(i)])
d1 = 1 - scores['rouge2'][2]
d2 = 1 - scores['rougeL'][2]
local_diff1 += d1
local_diff2 += d2
global_diff1.append(local_diff1/(n-1))
global_diff2.append(local_diff2/(n-1))
Choiced_source1 = global_diff1.index(min(global_diff1))
Choiced_source2 = global_diff2.index(min(global_diff2))
return trans[str(Choiced_source1)], sources_name[str(Choiced_source1)], trans[str(Choiced_source2)], sources_name[str(Choiced_source2)]
def selection_source_transliteration(sources_name, O, priority_list):
seq = list(Counter(O).values())
seq.sort(reverse = True)
check=[]
temp="y"
for i in range(len(seq)-1):
if seq[0]>seq[i+1]:
check.append(i)
if len(check)==(len(seq)-1):
temp = "yes"
if temp=="yes":
(o1, s1), (o2, s2) = two_sources_two_outputs(sources_name, O)
output1 = o1
source1 = s1
if len(seq)==2:
output2 = o2
source2 = s2
else:
temp1="y"
check1=[]
for i in range(len(seq)-2):
if seq[1]>seq[i+2]:
check.append(i)
if len(check1)==(len(seq)-2):
temp1 = "yes"
if temp1=="yes":
output2 = o2
source2 = s2
else:
for i in priority_list:
temp_source="test"
if i not in source1:
temp_source = i
break
if temp_source == "test":
output2 = o2
source2 = s2
else:
if temp_source != priority_list[1]:
output2= O[priority_list.index(temp_source)]
source2=temp_source
else:
output2= O[priority_list.index(priority_list[1])]
source2=priority_list[1]
else:
(o1, s1), (o2, s2) = two_sources_two_outputs(sources_name, O)
if priority_list[0] in s1:
output1= o1
source1= s1
print(output1, source1)
elif priority_list[0] in s2:
#print("2")
output1= o2
source1= s2
print(output1, source1)
else:
output1=O[0]
source1= priority_list[0]
temp_source = "test"
for i in priority_list:
if i not in source1:
temp_source = i
break
if temp_source=="test":
output2 = o2
source2 = s2
else:
if temp_source != priority_list[1]:
output2= O[priority_list.index(temp_source)]
source2=temp_source
else:
output2= O[priority_list.index(priority_list[1])]
source2=priority_list[1]
return(output1, source1), (output2, source2)
def two_sources_two_outputs(sources_name, O):
dict1 = Counter(O)
#print(dict1)
sorted_values = sorted(dict1.values(), reverse=True) # Sort the values
sorted_dict = {}
for i in sorted_values:
for k in dict1.keys():
if dict1[k] == i:
sorted_dict[k] = dict1[k]
sources = list(sorted_dict.keys())
#print(sources)
rm =[]
for r in Counter(O).keys():
temp = [i for i in range(len(O)) if O[i] == r]
rm.append(temp)
#print(rm)
resANDmethods_indexes={}
fs = list(Counter(O).keys())
for t in range(len(fs)):
resANDmethods_indexes.update({fs[t]: rm[t]})
#print(resANDmethods_indexes)
out1 = sources[0]
source1 = [sources_name[str(i)] for i in resANDmethods_indexes[out1]]
if len(sources)==1:
return (out1, source1), ("", "")
else:
out2 = sources[1]
source2 = [sources_name[str(i)] for i in resANDmethods_indexes[out2]]
return (out1, source1), (out2, source2)