Conversion_Kitchen_Code/kitchen_counter/conversion/translation/selection_source.py

393 lines
12 KiB
Python
Raw Permalink Normal View History

2024-04-27 09:33:09 +00:00
from collections import Counter
from .translation_metric import diff_score
import numpy as np
# -> Below are different functions based on different values we get from translation output and based on them one of
# the below function runs
def function11111(sources_name, trans , s, methods_name):
dict1 = Counter(s)
sorted_values = sorted(dict1.values(), reverse=True) # Sort the values
sorted_dict = {}
for i in sorted_values:
for k in dict1.keys():
if dict1[k] == i:
sorted_dict[k] = dict1[k]
sources = list(sorted_dict.keys())
s1 = sources[0]
s2 = sources[1]
s3 = sources[2]
s4 = sources[3]
s5 = sources[4]
di = {"0": s1, "1": s2, "2": s3, "3": s4, "4": s5,}
rm =[]
for r in Counter(s).keys():
temp = [i for i in range(len(s)) if s[i] == r]
rm.append(temp)
resANDmethods_indexes={}
fs = list(Counter(s).keys())
for t in range(len(fs)):
resANDmethods_indexes.update({fs[t]: rm[t]})
a1 = list(sources_name.values()).index(s1)
a2 = list(sources_name.values()).index(s2)
a3 = list(sources_name.values()).index(s3)
a4 = list(sources_name.values()).index(s4)
a5 = list(sources_name.values()).index(s5)
d1 = diff_score(trans[str(a1)], trans["0"])
d2 = diff_score(trans[str(a2)], trans["0"])
d3 = diff_score(trans[str(a3)], trans["0"])
d4 = diff_score(trans[str(a4)], trans["0"])
d5 = diff_score(trans[str(a5)], trans["0"])
check =[d1, d2, d3, d4, d5]
a1 = np.array(check)
source1 = di[str(a1.argsort()[0])]
source1_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source1]]
source2 = di[str(a1.argsort()[1])]
source2_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source2]]
source3 = di[str(a1.argsort()[2])]
source3_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source3]]
return (source1, source1_methods), (source2, source2_methods), (source3, source3_methods)
def function2111(sources_name, trans , s, methods_name):
dict1 = Counter(s)
sorted_values = sorted(dict1.values(), reverse=True) # Sort the values
sorted_dict = {}
for i in sorted_values:
for k in dict1.keys():
if dict1[k] == i:
sorted_dict[k] = dict1[k]
#break
sources = list(sorted_dict.keys())
s1 = sources[0]
s2 = sources[1]
s3 = sources[2]
s4 = sources[3]
di = {"0": s1, "1": s2, "2": s3, "3": s4}
rm =[]
for r in Counter(s).keys():
temp = [i for i in range(len(s)) if s[i] == r]
rm.append(temp)
resANDmethods_indexes={}
fs = list(Counter(s).keys())
for t in range(len(fs)):
resANDmethods_indexes.update({fs[t]: rm[t]})
a2 = list(sources_name.values()).index(s2)
a3 = list(sources_name.values()).index(s3)
a4 = list(sources_name.values()).index(s4)
d2 = diff_score(trans[str(a2)], trans["0"])
d3 = diff_score(trans[str(a3)], trans["0"])
d4 = diff_score(trans[str(a4)], trans["0"])
check =[d2, d3, d4]
a1 = np.array(check)
source1 = s1
source1_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source1]]
source2 = di[str(a1.argsort()[0]+1)]
source2_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source2]]
source3 = di[str(a1.argsort()[1]+1)]
source3_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source3]]
return (source1, source1_methods), (source2, source2_methods), (source3, source3_methods)
def function221(sources_name, trans , s, methods_name):
dict1 = Counter(s)
sorted_values = sorted(dict1.values(), reverse=True) # Sort the values
sorted_dict = {}
for i in sorted_values:
for k in dict1.keys():
if dict1[k] == i:
sorted_dict[k] = dict1[k]
#break
sources = list(sorted_dict.keys())
s1 = sources[0]
s2 = sources[1]
s3 = sources[2]
di = {"0": s1, "1": s2, "2": s3}
rm =[]
for r in Counter(s).keys():
temp = [i for i in range(len(s)) if s[i] == r]
rm.append(temp)
resANDmethods_indexes={}
fs = list(Counter(s).keys())
for t in range(len(fs)):
resANDmethods_indexes.update({fs[t]: rm[t]})
a1 = list(sources_name.values()).index(s1)
a2 = list(sources_name.values()).index(s2)
a3 = list(sources_name.values()).index(s3)
d1 = diff_score(trans[str(a1)], trans["0"])
d2 = diff_score(trans[str(a2)], trans["0"])
check =[d1, d2]
a1 = np.array(check)
source1 = di[str(a1.argsort()[0])]
source1_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source1]]
source2 = di[str(a1.argsort()[1])]
source2_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source2]]
source3 = s3
source3_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source3]]
return (source1, source1_methods), (source2, source2_methods), (source3, source3_methods)
def function311(sources_name, trans, s, methods_name):
dict1 = Counter(s)
sorted_values = sorted(dict1.values(), reverse=True) # Sort the values
sorted_dict = {}
for i in sorted_values:
for k in dict1.keys():
if dict1[k] == i:
sorted_dict[k] = dict1[k]
#break
sources = list(sorted_dict.keys())
s1 = sources[0]
s2 = sources[1]
s3 = sources[2]
di = {"0": s1, "1": s2, "2": s3}
rm =[]
for r in Counter(s).keys():
temp = [i for i in range(len(s)) if s[i] == r]
rm.append(temp)
resANDmethods_indexes={}
fs = list(Counter(s).keys())
for t in range(len(fs)):
resANDmethods_indexes.update({fs[t]: rm[t]})
a2 = list(sources_name.values()).index(s2)
a3 = list(sources_name.values()).index(s3)
d2 = diff_score(trans[str(a2)], trans["0"])
d3 = diff_score(trans[str(a3)], trans["0"])
check =[d2, d3]
a1 = np.array(check)
source1 = s1
source1_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source1]]
source2 = di[str(a1.argsort()[0]+1)]
source2_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source2]]
source3 = di[str(a1.argsort()[1]+1)]
source3_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source3]]
return (source1, source1_methods), (source2, source2_methods), (source3, source3_methods)
def function41(sources_name, trans , s, methods_name):
dict1 = Counter(s)
sorted_values = sorted(dict1.values(), reverse=True) # Sort the values
sorted_dict = {}
for i in sorted_values:
for k in dict1.keys():
if dict1[k] == i:
sorted_dict[k] = dict1[k]
#break
sources = list(sorted_dict.keys())
s1 = sources[0]
s2 = sources[1]
di = {"0": s1, "1": s2}
rm =[]
for r in Counter(s).keys():
temp = [i for i in range(len(s)) if s[i] == r]
rm.append(temp)
resANDmethods_indexes={}
fs = list(Counter(s).keys())
for t in range(len(fs)):
resANDmethods_indexes.update({fs[t]: rm[t]})
source1 = s1
source1_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source1]]
source2 = s2
source2_methods = [methods_name[str(i)] for i in resANDmethods_indexes[source2]]
source3 = ""
source3_methods = ""
return (source1, source1_methods), (source2, source2_methods), (source3, source3_methods)
def function5(sources_name, trans , s, methods_name):
s1 = list(Counter(s).keys())[0]
source1 = s1
source1_methods = list(methods_name.values())
source2 = ""
source2_methods = ""
source3 = ""
source3_methods = ""
return (source1, source1_methods), (source2, source2_methods), (source3, source3_methods)
# -> Used for selecting best output for translation
def selection_source(S, sources_name, trans, methods_name ):
s = []
for string in S:
a = string.replace(" ", "GOOGLE")
s.append(a)
seq = list(Counter(s).values())
seq.sort(reverse = True)
if seq == [1, 1, 1, 1, 1]:
sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3 = function11111(sources_name, trans, s, methods_name)
return sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3
if seq == [2, 1, 1, 1]:
sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3 = function2111(sources_name, trans, s, methods_name)
return sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3
if seq == [2, 2, 1]:
sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3 = function221(sources_name, trans, s, methods_name)
return sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3
if seq == [3, 1, 1]:
sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3 = function311(sources_name, trans, s, methods_name)
return sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3
if seq == [4, 1] or seq == [3, 2]:
sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3 = function41(sources_name, trans, s, methods_name)
return sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3
if seq == [5]:
sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3 = function5(sources_name, trans, s, methods_name)
return sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3
# -> Used for selecting best output for transliteration
def selection_source_transliteration(sources_name, O, priority_list):
seq = list(Counter(O).values())
print(seq)
seq.sort(reverse = True)
print(seq)
check=[]
temp="y"
for i in range(len(seq)-1):
if seq[0]>seq[i+1]:
check.append(i)
if len(check)==(len(seq)-1):
temp = "yes"
print("check", check)
if temp=="yes":
print("here1")
(o1, s1), (o2, s2) = two_sources_two_outputs(sources_name, O)
print((o1, s1), (o2, s2))
output1 = o1
source1 = s1
print(seq)
if len(seq)==2:
output2 = o2
source2 = s2
else:
temp1="y"
check1=[]
for i in range(len(seq)-2):
if seq[1]>seq[i+2]:
check.append(i)
if len(check1)==(len(seq)-2):
temp1 = "yes"
if temp1=="yes":
output2 = o2
source2 = s2
else:
for i in priority_list:
temp_source="test"
if i not in source1:
temp_source = i
break
if temp_source=="test":
output2 = o2
source2 = s2
else:
if temp_source != priority_list[1]:
output2= O[priority_list.index(temp_source)]
source2=temp_source
else:
output2= O[priority_list.index(priority_list[1])]
source2=priority_list[1]
else:
(o1, s1), (o2, s2) = two_sources_two_outputs(sources_name, O)
if priority_list[0] in s1:
output1= o1
source1= s1
elif priority_list[0] in s2:
output1= o2
source1= s2
else:
output1=O[0]
source1= priority_list[0]
temp_source = "test"
for i in priority_list:
if i not in source1:
temp_source = i
break
if temp_source=="test":
output2 = o2
source2 = s2
else:
if temp_source != priority_list[1]:
output2= O[priority_list.index(temp_source)]
source2=temp_source
else:
output2= O[priority_list.index(priority_list[1])]
source2=priority_list[1]
return(output1, source1), (output2, source2)
def two_sources_two_outputs(sources_name, O):
print("sources name is", sources_name, O)
dict1 = Counter(O)
print("dict1", dict1)
sorted_values = sorted(dict1.values(), reverse=True) # Sort the values
print("sorted_value", sorted_values)
sorted_dict = {}
for i in sorted_values:
for k in dict1.keys():
if dict1[k] == i:
sorted_dict[k] = dict1[k]
print("sorted_Dict", sorted_dict)
sources = list(sorted_dict.keys())
print(sources)
rm =[]
for r in Counter(O).keys():
temp = [i for i in range(len(O)) if O[i] == r]
rm.append(temp)
print("rm", rm)
resANDmethods_indexes={}
fs = list(Counter(O).keys())
print("fs", fs)
for t in range(len(fs)):
resANDmethods_indexes.update({fs[t]: rm[t]})
out1 = sources[0]
source1 = [sources_name[str(i)] for i in resANDmethods_indexes[out1]]
if len(sources)==1:
return (out1, source1), ("", "")
else:
out2 = sources[1]
source2 = [sources_name[str(i)] for i in resANDmethods_indexes[out2]]
return (out1, source1), (out2, source2)