469 lines
14 KiB
Python
Executable File
469 lines
14 KiB
Python
Executable File
from collections import Counter
|
|
from .translation_metric import diff_score
|
|
import numpy as np
|
|
|
|
def function11111(sources_name, trans , s, methods_name):
|
|
dict1 = Counter(s)
|
|
sorted_values = sorted(dict1.values(), reverse=True) # Sort the values
|
|
sorted_dict = {}
|
|
for i in sorted_values:
|
|
for k in dict1.keys():
|
|
if dict1[k] == i:
|
|
sorted_dict[k] = dict1[k]
|
|
#break
|
|
sources = list(sorted_dict.keys())
|
|
#print(sources)
|
|
s1 = sources[0]
|
|
s2 = sources[1]
|
|
s3 = sources[2]
|
|
s4 = sources[3]
|
|
s5 = sources[4]
|
|
|
|
di = {"0": s1, "1": s2, "2": s3, "3": s4, "4": s5,}
|
|
# print(s1)
|
|
# print(s2)
|
|
# print(s3)
|
|
# print(s4)
|
|
# print(s5)
|
|
# print(s6)
|
|
|
|
rm =[]
|
|
for r in Counter(s).keys():
|
|
temp = [i for i in range(len(s)) if s[i] == r]
|
|
rm.append(temp)
|
|
#print(rm)
|
|
resANDmethods_indexes={}
|
|
fs = list(Counter(s).keys())
|
|
for t in range(len(fs)):
|
|
resANDmethods_indexes.update({fs[t]: rm[t]})
|
|
#print(resANDmethods_indexes)
|
|
|
|
a1 = list(sources_name.values()).index(s1)
|
|
#print(a1)
|
|
a2 = list(sources_name.values()).index(s2)
|
|
#print(a2)
|
|
a3 = list(sources_name.values()).index(s3)
|
|
#print(a3)
|
|
a4 = list(sources_name.values()).index(s4)
|
|
#print(a4)
|
|
a5 = list(sources_name.values()).index(s5)
|
|
#print(a5)
|
|
|
|
d1 = diff_score(trans[str(a1)], trans["0"])
|
|
#print(d1)
|
|
d2 = diff_score(trans[str(a2)], trans["0"])
|
|
#print(d2)
|
|
d3 = diff_score(trans[str(a3)], trans["0"])
|
|
#print(d3)
|
|
d4 = diff_score(trans[str(a4)], trans["0"])
|
|
#print(d4)
|
|
d5 = diff_score(trans[str(a5)], trans["0"])
|
|
#print(d5)
|
|
|
|
check =[d1, d2, d3, d4, d5]
|
|
#print(check)
|
|
a1 = np.array(check)
|
|
|
|
source1 = di[str(a1.argsort()[0])]
|
|
source1_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source1]]
|
|
|
|
source2 = di[str(a1.argsort()[1])]
|
|
#print(source2)
|
|
source2_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source2]]
|
|
|
|
source3 = di[str(a1.argsort()[2])]
|
|
#print(source3)
|
|
source3_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source3]]
|
|
|
|
return (source1, source1_methods), (source2, source2_methods), (source3, source3_methods)
|
|
|
|
def function2111(sources_name, trans , s, methods_name):
|
|
dict1 = Counter(s)
|
|
sorted_values = sorted(dict1.values(), reverse=True) # Sort the values
|
|
sorted_dict = {}
|
|
for i in sorted_values:
|
|
for k in dict1.keys():
|
|
if dict1[k] == i:
|
|
sorted_dict[k] = dict1[k]
|
|
#break
|
|
sources = list(sorted_dict.keys())
|
|
#print(sources)
|
|
s1 = sources[0]
|
|
s2 = sources[1]
|
|
s3 = sources[2]
|
|
s4 = sources[3]
|
|
|
|
di = {"0": s1, "1": s2, "2": s3, "3": s4}
|
|
# print(s1)
|
|
# print(s2)
|
|
# print(s3)
|
|
# print(s4)
|
|
# print(s5)
|
|
|
|
rm =[]
|
|
for r in Counter(s).keys():
|
|
temp = [i for i in range(len(s)) if s[i] == r]
|
|
rm.append(temp)
|
|
#print(rm)
|
|
resANDmethods_indexes={}
|
|
fs = list(Counter(s).keys())
|
|
for t in range(len(fs)):
|
|
resANDmethods_indexes.update({fs[t]: rm[t]})
|
|
#print(resANDmethods_indexes)
|
|
|
|
a2 = list(sources_name.values()).index(s2)
|
|
#print(a2)
|
|
a3 = list(sources_name.values()).index(s3)
|
|
#print(a3)
|
|
a4 = list(sources_name.values()).index(s4)
|
|
#print(a4)
|
|
|
|
d2 = diff_score(trans[str(a2)], trans["0"])
|
|
#print(d2)
|
|
d3 = diff_score(trans[str(a3)], trans["0"])
|
|
#print(d3)
|
|
d4 = diff_score(trans[str(a4)], trans["0"])
|
|
#print(d4)
|
|
|
|
check =[d2, d3, d4]
|
|
#print(check)
|
|
a1 = np.array(check)
|
|
|
|
source1 = s1
|
|
source1_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source1]]
|
|
|
|
source2 = di[str(a1.argsort()[0]+1)]
|
|
source2_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source2]]
|
|
|
|
source3 = di[str(a1.argsort()[1]+1)]
|
|
source3_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source3]]
|
|
|
|
return (source1, source1_methods), (source2, source2_methods), (source3, source3_methods)
|
|
|
|
def function221(sources_name, trans , s, methods_name):
|
|
dict1 = Counter(s)
|
|
sorted_values = sorted(dict1.values(), reverse=True) # Sort the values
|
|
sorted_dict = {}
|
|
for i in sorted_values:
|
|
for k in dict1.keys():
|
|
if dict1[k] == i:
|
|
sorted_dict[k] = dict1[k]
|
|
#break
|
|
sources = list(sorted_dict.keys())
|
|
#print(sources)
|
|
s1 = sources[0]
|
|
s2 = sources[1]
|
|
s3 = sources[2]
|
|
|
|
di = {"0": s1, "1": s2, "2": s3}
|
|
# print(s1)
|
|
# print(s2)
|
|
# print(s3)
|
|
# print(s4)
|
|
|
|
rm =[]
|
|
for r in Counter(s).keys():
|
|
temp = [i for i in range(len(s)) if s[i] == r]
|
|
rm.append(temp)
|
|
#print(rm)
|
|
|
|
resANDmethods_indexes={}
|
|
fs = list(Counter(s).keys())
|
|
for t in range(len(fs)):
|
|
resANDmethods_indexes.update({fs[t]: rm[t]})
|
|
#print(resANDmethods_indexes)
|
|
|
|
a1 = list(sources_name.values()).index(s1)
|
|
#print(a1)
|
|
a2 = list(sources_name.values()).index(s2)
|
|
#print(a2)
|
|
a3 = list(sources_name.values()).index(s3)
|
|
#print(a3)
|
|
|
|
d1 = diff_score(trans[str(a1)], trans["0"])
|
|
#print(d1)
|
|
d2 = diff_score(trans[str(a2)], trans["0"])
|
|
#print(d2)
|
|
|
|
check =[d1, d2]
|
|
#print(check)
|
|
a1 = np.array(check)
|
|
|
|
source1 = di[str(a1.argsort()[0])]
|
|
#source1 = di[str(check1.index(temp1[0]))]
|
|
source1_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source1]]
|
|
|
|
source2 = di[str(a1.argsort()[1])]
|
|
source2_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source2]]
|
|
|
|
source3 = s3
|
|
source3_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source3]]
|
|
return (source1, source1_methods), (source2, source2_methods), (source3, source3_methods)
|
|
|
|
def function311(sources_name, trans, s, methods_name):
|
|
dict1 = Counter(s)
|
|
sorted_values = sorted(dict1.values(), reverse=True) # Sort the values
|
|
sorted_dict = {}
|
|
for i in sorted_values:
|
|
for k in dict1.keys():
|
|
if dict1[k] == i:
|
|
sorted_dict[k] = dict1[k]
|
|
#break
|
|
sources = list(sorted_dict.keys())
|
|
#print(sources)
|
|
s1 = sources[0]
|
|
s2 = sources[1]
|
|
s3 = sources[2]
|
|
|
|
di = {"0": s1, "1": s2, "2": s3}
|
|
# print(s1)
|
|
# print(s2)
|
|
# print(s3)
|
|
|
|
rm =[]
|
|
for r in Counter(s).keys():
|
|
temp = [i for i in range(len(s)) if s[i] == r]
|
|
rm.append(temp)
|
|
#print(rm)
|
|
|
|
resANDmethods_indexes={}
|
|
fs = list(Counter(s).keys())
|
|
for t in range(len(fs)):
|
|
resANDmethods_indexes.update({fs[t]: rm[t]})
|
|
#print(resANDmethods_indexes)
|
|
|
|
a2 = list(sources_name.values()).index(s2)
|
|
#print(a2)
|
|
a3 = list(sources_name.values()).index(s3)
|
|
#print(a3)
|
|
|
|
d2 = diff_score(trans[str(a2)], trans["0"])
|
|
#print(d2)
|
|
d3 = diff_score(trans[str(a3)], trans["0"])
|
|
#print(d3)
|
|
|
|
check =[d2, d3]
|
|
#print(check)
|
|
a1 = np.array(check)
|
|
#source1 = di[str(a1.argsort()[0])]
|
|
#temp = sorted(check)
|
|
|
|
|
|
source1 = s1
|
|
source1_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source1]]
|
|
|
|
source2 = di[str(a1.argsort()[0]+1)]
|
|
#source2 = di[str(check.index(temp[0])+1)]
|
|
source2_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source2]]
|
|
|
|
source3 = di[str(a1.argsort()[1]+1)]
|
|
#source3 = di[str(check.index(temp[1])+1)]
|
|
source3_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source3]]
|
|
return (source1, source1_methods), (source2, source2_methods), (source3, source3_methods)
|
|
|
|
|
|
def function41(sources_name, trans , s, methods_name):
|
|
dict1 = Counter(s)
|
|
sorted_values = sorted(dict1.values(), reverse=True) # Sort the values
|
|
sorted_dict = {}
|
|
for i in sorted_values:
|
|
for k in dict1.keys():
|
|
if dict1[k] == i:
|
|
sorted_dict[k] = dict1[k]
|
|
#break
|
|
sources = list(sorted_dict.keys())
|
|
#print(sources)
|
|
s1 = sources[0]
|
|
s2 = sources[1]
|
|
di = {"0": s1, "1": s2}
|
|
#print(s1)
|
|
#print(s2)
|
|
|
|
rm =[]
|
|
for r in Counter(s).keys():
|
|
temp = [i for i in range(len(s)) if s[i] == r]
|
|
rm.append(temp)
|
|
#print(rm)
|
|
resANDmethods_indexes={}
|
|
fs = list(Counter(s).keys())
|
|
for t in range(len(fs)):
|
|
resANDmethods_indexes.update({fs[t]: rm[t]})
|
|
#print(resANDmethods_indexes)
|
|
|
|
source1 = s1
|
|
source1_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source1]]
|
|
source2 = s2
|
|
source2_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source2]]
|
|
source3 = ""
|
|
source3_methods = ""
|
|
return (source1, source1_methods), (source2, source2_methods), (source3, source3_methods)
|
|
|
|
def function5(sources_name, trans , s, methods_name):
|
|
s1 = list(Counter(s).keys())[0]
|
|
#print(s1)
|
|
source1 = s1
|
|
source1_methods = list(methods_name.values())
|
|
source2 = ""
|
|
source2_methods = ""
|
|
source3 = ""
|
|
source3_methods = ""
|
|
return (source1, source1_methods), (source2, source2_methods), (source3, source3_methods)
|
|
|
|
def selection_source(S, sources_name, trans, methods_name ):
|
|
s = []
|
|
for string in S:
|
|
a = string.replace(" ", "GOOGLE")
|
|
s.append(a)
|
|
seq = list(Counter(s).values())
|
|
seq.sort(reverse = True)
|
|
#print(seq)
|
|
|
|
if seq == [1, 1, 1, 1, 1]:
|
|
sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3 = function11111(sources_name, trans, s, methods_name)
|
|
return sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3
|
|
if seq == [2, 1, 1, 1]:
|
|
sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3 = function2111(sources_name, trans, s, methods_name)
|
|
return sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3
|
|
if seq == [2, 2, 1]:
|
|
sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3 = function221(sources_name, trans, s, methods_name)
|
|
return sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3
|
|
|
|
if seq == [3, 1, 1]:
|
|
sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3 = function311(sources_name, trans, s, methods_name)
|
|
return sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3
|
|
|
|
if seq == [4, 1] or seq == [3, 2]:
|
|
sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3 = function41(sources_name, trans, s, methods_name)
|
|
return sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3
|
|
if seq == [5]:
|
|
sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3 = function5(sources_name, trans, s, methods_name)
|
|
return sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3
|
|
|
|
def selection_source_transliteration(sources_name, O, priority_list):
|
|
seq = list(Counter(O).values())
|
|
seq.sort(reverse = True)
|
|
#print(seq)
|
|
check=[]
|
|
temp="y"
|
|
for i in range(len(seq)-1):
|
|
if seq[0]>seq[i+1]:
|
|
check.append(i)
|
|
#print(check)
|
|
if len(check)==(len(seq)-1):
|
|
temp = "yes"
|
|
|
|
#print("strating", temp)
|
|
|
|
if temp=="yes":
|
|
#print(" in if")
|
|
(o1, s1), (o2, s2) = two_sources_two_outputs(sources_name, O)
|
|
output1 = o1
|
|
source1 = s1
|
|
#print(output1, source1)
|
|
if len(seq)==2:
|
|
output2 = o2
|
|
source2 = s2
|
|
#print("1", output2, source2)
|
|
else:
|
|
temp1="y"
|
|
check1=[]
|
|
for i in range(len(seq)-2):
|
|
if seq[1]>seq[i+2]:
|
|
check.append(i)
|
|
#print(check1)
|
|
if len(check1)==(len(seq)-2):
|
|
temp1 = "yes"
|
|
if temp1=="yes":
|
|
output2 = o2
|
|
source2 = s2
|
|
#print("2", output2, source2)
|
|
else:
|
|
for i in priority_list:
|
|
temp_source="test"
|
|
if i not in source1:
|
|
temp_source = i
|
|
break
|
|
#print(temp_source)
|
|
if temp_source=="test":
|
|
output2 = o2
|
|
source2 = s2
|
|
else:
|
|
if temp_source != priority_list[1]:
|
|
output2= O[priority_list.index(temp_source)]
|
|
source2=temp_source
|
|
#print("3", output2, source2)
|
|
else:
|
|
output2= O[priority_list.index(priority_list[1])]
|
|
source2=priority_list[1]
|
|
#print("4", output2, source2)
|
|
|
|
else:
|
|
#print("in else")
|
|
(o1, s1), (o2, s2) = two_sources_two_outputs(sources_name, O)
|
|
#print(o1, s1)
|
|
#print(o2, s2)
|
|
if priority_list[0] in s1:
|
|
#print("1")
|
|
output1= o1
|
|
source1= s1
|
|
#print(output1, source1)
|
|
elif priority_list[0] in s2:
|
|
#print("2")
|
|
output1= o2
|
|
source1= s2
|
|
print(output1, source1)
|
|
else:
|
|
#print("3")
|
|
output1=O[0]
|
|
source1= priority_list[0]
|
|
#print(output1, source1)
|
|
temp_source = "test"
|
|
for i in priority_list:
|
|
#print(i)
|
|
if i not in source1:
|
|
temp_source = i
|
|
break
|
|
#print(temp_source)
|
|
if temp_source=="test":
|
|
output2 = o2
|
|
source2 = s2
|
|
else:
|
|
if temp_source != priority_list[1]:
|
|
output2= O[priority_list.index(temp_source)]
|
|
source2=temp_source
|
|
#print("4", output2, source2)
|
|
else:
|
|
output2= O[priority_list.index(priority_list[1])]
|
|
source2=priority_list[1]
|
|
#print("5", output2, source2)
|
|
return(output1, source1), (output2, source2)
|
|
|
|
def two_sources_two_outputs(sources_name, O):
|
|
dict1 = Counter(O)
|
|
#print(dict1)
|
|
sorted_values = sorted(dict1.values(), reverse=True) # Sort the values
|
|
sorted_dict = {}
|
|
for i in sorted_values:
|
|
for k in dict1.keys():
|
|
if dict1[k] == i:
|
|
sorted_dict[k] = dict1[k]
|
|
sources = list(sorted_dict.keys())
|
|
#print(sources)
|
|
rm =[]
|
|
for r in Counter(O).keys():
|
|
temp = [i for i in range(len(O)) if O[i] == r]
|
|
rm.append(temp)
|
|
#print(rm)
|
|
resANDmethods_indexes={}
|
|
fs = list(Counter(O).keys())
|
|
for t in range(len(fs)):
|
|
resANDmethods_indexes.update({fs[t]: rm[t]})
|
|
#print(resANDmethods_indexes)
|
|
out1 = sources[0]
|
|
source1 = [sources_name[str(i)] for i in resANDmethods_indexes[out1]]
|
|
if len(sources)==1:
|
|
return (out1, source1), ("", "")
|
|
else:
|
|
out2 = sources[1]
|
|
source2 = [sources_name[str(i)] for i in resANDmethods_indexes[out2]]
|
|
return (out1, source1), (out2, source2) |