Conversion_Kitchen_Code/kitchen_counter/conversion/subtitling/selection_source.py

497 lines
16 KiB
Python
Executable File

from collections import Counter
from .translation_metric import diff_score
import numpy as np
def function11111(sources_name, trans, s, methods_name):
dict1 = Counter(s)
sorted_values = sorted(dict1.values(), reverse=True) # Sort the values
sorted_dict = {}
for i in sorted_values:
for k in dict1.keys():
if dict1[k] == i:
sorted_dict[k] = dict1[k]
# break
sources = list(sorted_dict.keys())
# print(sources)
s1 = sources[0]
s2 = sources[1]
s3 = sources[2]
s4 = sources[3]
s5 = sources[4]
di = {"0": s1, "1": s2, "2": s3, "3": s4, "4": s5, }
# print(s1)
# print(s2)
# print(s3)
# print(s4)
# print(s5)
# print(s6)
rm = []
for r in Counter(s).keys():
temp = [i for i in range(len(s)) if s[i] == r]
rm.append(temp)
# print(rm)
resANDmethods_indexes = {}
fs = list(Counter(s).keys())
for t in range(len(fs)):
resANDmethods_indexes.update({fs[t]: rm[t]})
# print(resANDmethods_indexes)
a1 = list(sources_name.values()).index(s1)
# print(a1)
a2 = list(sources_name.values()).index(s2)
# print(a2)
a3 = list(sources_name.values()).index(s3)
# print(a3)
a4 = list(sources_name.values()).index(s4)
# print(a4)
a5 = list(sources_name.values()).index(s5)
# print(a5)
d1 = diff_score(trans[str(a1)], trans["0"])
# print(d1)
d2 = diff_score(trans[str(a2)], trans["0"])
# print(d2)
d3 = diff_score(trans[str(a3)], trans["0"])
# print(d3)
d4 = diff_score(trans[str(a4)], trans["0"])
# print(d4)
d5 = diff_score(trans[str(a5)], trans["0"])
# print(d5)
check = [d1, d2, d3, d4, d5]
# print(check)
a1 = np.array(check)
source1 = di[str(a1.argsort()[0])]
source1_methods = [methods_name[str(i)]
for i in resANDmethods_indexes[source1]]
source2 = di[str(a1.argsort()[1])]
# print(source2)
source2_methods = [methods_name[str(i)]
for i in resANDmethods_indexes[source2]]
source3 = di[str(a1.argsort()[2])]
# print(source3)
source3_methods = [methods_name[str(i)]
for i in resANDmethods_indexes[source3]]
return (source1, source1_methods), (source2, source2_methods), (source3, source3_methods)
def function2111(sources_name, trans, s, methods_name):
dict1 = Counter(s)
sorted_values = sorted(dict1.values(), reverse=True) # Sort the values
sorted_dict = {}
for i in sorted_values:
for k in dict1.keys():
if dict1[k] == i:
sorted_dict[k] = dict1[k]
# break
sources = list(sorted_dict.keys())
# print(sources)
s1 = sources[0]
s2 = sources[1]
s3 = sources[2]
s4 = sources[3]
di = {"0": s1, "1": s2, "2": s3, "3": s4}
# print(s1)
# print(s2)
# print(s3)
# print(s4)
# print(s5)
rm = []
for r in Counter(s).keys():
temp = [i for i in range(len(s)) if s[i] == r]
rm.append(temp)
# print(rm)
resANDmethods_indexes = {}
fs = list(Counter(s).keys())
for t in range(len(fs)):
resANDmethods_indexes.update({fs[t]: rm[t]})
# print(resANDmethods_indexes)
a2 = list(sources_name.values()).index(s2)
# print(a2)
a3 = list(sources_name.values()).index(s3)
# print(a3)
a4 = list(sources_name.values()).index(s4)
# print(a4)
d2 = diff_score(trans[str(a2)], trans["0"])
# print(d2)
d3 = diff_score(trans[str(a3)], trans["0"])
# print(d3)
d4 = diff_score(trans[str(a4)], trans["0"])
# print(d4)
check = [d2, d3, d4]
# print(check)
a1 = np.array(check)
source1 = s1
source1_methods = [methods_name[str(i)]
for i in resANDmethods_indexes[source1]]
source2 = di[str(a1.argsort()[0]+1)]
source2_methods = [methods_name[str(i)]
for i in resANDmethods_indexes[source2]]
source3 = di[str(a1.argsort()[1]+1)]
source3_methods = [methods_name[str(i)]
for i in resANDmethods_indexes[source3]]
return (source1, source1_methods), (source2, source2_methods), (source3, source3_methods)
def function221(sources_name, trans, s, methods_name):
dict1 = Counter(s)
sorted_values = sorted(dict1.values(), reverse=True) # Sort the values
sorted_dict = {}
for i in sorted_values:
for k in dict1.keys():
if dict1[k] == i:
sorted_dict[k] = dict1[k]
# break
sources = list(sorted_dict.keys())
# print(sources)
s1 = sources[0]
s2 = sources[1]
s3 = sources[2]
di = {"0": s1, "1": s2, "2": s3}
# print(s1)
# print(s2)
# print(s3)
# print(s4)
rm = []
for r in Counter(s).keys():
temp = [i for i in range(len(s)) if s[i] == r]
rm.append(temp)
# print(rm)
resANDmethods_indexes = {}
fs = list(Counter(s).keys())
for t in range(len(fs)):
resANDmethods_indexes.update({fs[t]: rm[t]})
# print(resANDmethods_indexes)
a1 = list(sources_name.values()).index(s1)
# print(a1)
a2 = list(sources_name.values()).index(s2)
# print(a2)
a3 = list(sources_name.values()).index(s3)
# print(a3)
d1 = diff_score(trans[str(a1)], trans["0"])
# print(d1)
d2 = diff_score(trans[str(a2)], trans["0"])
# print(d2)
check = [d1, d2]
# print(check)
a1 = np.array(check)
source1 = di[str(a1.argsort()[0])]
#source1 = di[str(check1.index(temp1[0]))]
source1_methods = [methods_name[str(i)]
for i in resANDmethods_indexes[source1]]
source2 = di[str(a1.argsort()[1])]
source2_methods = [methods_name[str(i)]
for i in resANDmethods_indexes[source2]]
source3 = s3
source3_methods = [methods_name[str(i)]
for i in resANDmethods_indexes[source3]]
return (source1, source1_methods), (source2, source2_methods), (source3, source3_methods)
def function311(sources_name, trans, s, methods_name):
dict1 = Counter(s)
sorted_values = sorted(dict1.values(), reverse=True) # Sort the values
sorted_dict = {}
for i in sorted_values:
for k in dict1.keys():
if dict1[k] == i:
sorted_dict[k] = dict1[k]
# break
sources = list(sorted_dict.keys())
# print(sources)
s1 = sources[0]
s2 = sources[1]
s3 = sources[2]
di = {"0": s1, "1": s2, "2": s3}
# print(s1)
# print(s2)
# print(s3)
rm = []
for r in Counter(s).keys():
temp = [i for i in range(len(s)) if s[i] == r]
rm.append(temp)
# print(rm)
resANDmethods_indexes = {}
fs = list(Counter(s).keys())
for t in range(len(fs)):
resANDmethods_indexes.update({fs[t]: rm[t]})
# print(resANDmethods_indexes)
a2 = list(sources_name.values()).index(s2)
# print(a2)
a3 = list(sources_name.values()).index(s3)
# print(a3)
d2 = diff_score(trans[str(a2)], trans["0"])
# print(d2)
d3 = diff_score(trans[str(a3)], trans["0"])
# print(d3)
check = [d2, d3]
# print(check)
a1 = np.array(check)
#source1 = di[str(a1.argsort()[0])]
#temp = sorted(check)
source1 = s1
source1_methods = [methods_name[str(i)]
for i in resANDmethods_indexes[source1]]
source2 = di[str(a1.argsort()[0]+1)]
#source2 = di[str(check.index(temp[0])+1)]
source2_methods = [methods_name[str(i)]
for i in resANDmethods_indexes[source2]]
source3 = di[str(a1.argsort()[1]+1)]
#source3 = di[str(check.index(temp[1])+1)]
source3_methods = [methods_name[str(i)]
for i in resANDmethods_indexes[source3]]
return (source1, source1_methods), (source2, source2_methods), (source3, source3_methods)
def function41(sources_name, trans, s, methods_name):
dict1 = Counter(s)
sorted_values = sorted(dict1.values(), reverse=True) # Sort the values
sorted_dict = {}
for i in sorted_values:
for k in dict1.keys():
if dict1[k] == i:
sorted_dict[k] = dict1[k]
# break
sources = list(sorted_dict.keys())
# print(sources)
s1 = sources[0]
s2 = sources[1]
di = {"0": s1, "1": s2}
# print(s1)
# print(s2)
rm = []
for r in Counter(s).keys():
temp = [i for i in range(len(s)) if s[i] == r]
rm.append(temp)
# print(rm)
resANDmethods_indexes = {}
fs = list(Counter(s).keys())
for t in range(len(fs)):
resANDmethods_indexes.update({fs[t]: rm[t]})
# print(resANDmethods_indexes)
source1 = s1
source1_methods = [methods_name[str(i)]
for i in resANDmethods_indexes[source1]]
source2 = s2
source2_methods = [methods_name[str(i)]
for i in resANDmethods_indexes[source2]]
source3 = ""
source3_methods = ""
return (source1, source1_methods), (source2, source2_methods), (source3, source3_methods)
def function5(sources_name, trans, s, methods_name):
s1 = list(Counter(s).keys())[0]
# print(s1)
source1 = s1
source1_methods = list(methods_name.values())
source2 = ""
source2_methods = ""
source3 = ""
source3_methods = ""
return (source1, source1_methods), (source2, source2_methods), (source3, source3_methods)
def selection_source(S, sources_name, trans, methods_name):
s = []
for string in S:
a = string.replace(" ", "GOOGLE")
s.append(a)
seq = list(Counter(s).values())
seq.sort(reverse=True)
# print(seq)
if seq == [1, 1, 1, 1, 1]:
sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3 = function11111(
sources_name, trans, s, methods_name)
return sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3
if seq == [2, 1, 1, 1]:
sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3 = function2111(
sources_name, trans, s, methods_name)
return sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3
if seq == [2, 2, 1]:
sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3 = function221(
sources_name, trans, s, methods_name)
return sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3
if seq == [3, 1, 1]:
sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3 = function311(
sources_name, trans, s, methods_name)
return sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3
if seq == [4, 1] or seq == [3, 2]:
sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3 = function41(
sources_name, trans, s, methods_name)
return sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3
if seq == [5]:
sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3 = function5(
sources_name, trans, s, methods_name)
return sources1ANDmethods1, sources2ANDmethods2, sources3ANDmethods3
def selection_source_transliteration(sources_name, O, priority_list):
seq = list(Counter(O).values())
seq.sort(reverse=True)
# print(seq)
check = []
temp = "y"
for i in range(len(seq)-1):
if seq[0] > seq[i+1]:
check.append(i)
# print(check)
if len(check) == (len(seq)-1):
temp = "yes"
#print("strating", temp)
if temp == "yes":
#print(" in if")
(o1, s1), (o2, s2) = two_sources_two_outputs(sources_name, O)
output1 = o1
source1 = s1
#print(output1, source1)
if len(seq) == 2:
output2 = o2
source2 = s2
#print("1", output2, source2)
else:
temp1 = "y"
check1 = []
for i in range(len(seq)-2):
if seq[1] > seq[i+2]:
check.append(i)
# print(check1)
if len(check1) == (len(seq)-2):
temp1 = "yes"
if temp1 == "yes":
output2 = o2
source2 = s2
#print("2", output2, source2)
else:
for i in priority_list:
temp_source = "test"
if i not in source1:
temp_source = i
break
# print(temp_source)
if temp_source == "test":
output2 = o2
source2 = s2
else:
if temp_source != priority_list[1]:
output2 = O[priority_list.index(temp_source)]
source2 = temp_source
#print("3", output2, source2)
else:
output2 = O[priority_list.index(priority_list[1])]
source2 = priority_list[1]
#print("4", output2, source2)
else:
#print("in else")
(o1, s1), (o2, s2) = two_sources_two_outputs(sources_name, O)
#print(o1, s1)
#print(o2, s2)
if priority_list[0] in s1:
# print("1")
output1 = o1
source1 = s1
#print(output1, source1)
elif priority_list[0] in s2:
# print("2")
output1 = o2
source1 = s2
print(output1, source1)
else:
# print("3")
output1 = O[0]
source1 = priority_list[0]
#print(output1, source1)
temp_source = "test"
for i in priority_list:
# print(i)
if i not in source1:
temp_source = i
break
# print(temp_source)
if temp_source == "test":
output2 = o2
source2 = s2
else:
if temp_source != priority_list[1]:
output2 = O[priority_list.index(temp_source)]
source2 = temp_source
#print("4", output2, source2)
else:
output2 = O[priority_list.index(priority_list[1])]
source2 = priority_list[1]
#print("5", output2, source2)
return(output1, source1), (output2, source2)
def two_sources_two_outputs(sources_name, O):
dict1 = Counter(O)
# print(dict1)
sorted_values = sorted(dict1.values(), reverse=True) # Sort the values
sorted_dict = {}
for i in sorted_values:
for k in dict1.keys():
if dict1[k] == i:
sorted_dict[k] = dict1[k]
sources = list(sorted_dict.keys())
# print(sources)
rm = []
for r in Counter(O).keys():
temp = [i for i in range(len(O)) if O[i] == r]
rm.append(temp)
# print(rm)
resANDmethods_indexes = {}
fs = list(Counter(O).keys())
for t in range(len(fs)):
resANDmethods_indexes.update({fs[t]: rm[t]})
# print(resANDmethods_indexes)
out1 = sources[0]
source1 = [sources_name[str(i)] for i in resANDmethods_indexes[out1]]
if len(sources) == 1:
return (out1, source1), ("", "")
else:
out2 = sources[1]
source2 = [sources_name[str(i)] for i in resANDmethods_indexes[out2]]
return (out1, source1), (out2, source2)