from translation_metric import diff_score
import os
import sys
import re
from translation_metric import manual_diff_score, bleu_diff_score, gleu_diff_score, meteor_diff_score, rouge_diff_score, diff_score, critera4_5
from script_detector import script_cat
from translation_metric import manual_diff_score, bleu_diff_score, gleu_diff_score, meteor_diff_score, rouge_diff_score, diff_score, critera4_5
from selection_source import selection_source, function5, function41, function311, function221, function2111, function11111, selection_source_transliteration, two_sources_two_outputs
from script_writing import default_script


def script_det(text):
    punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~“"”'''
    no_punct = ""
    for char in text:
        if char not in punctuations:
            no_punct = char
            break
    #print("alphabet", no_punct)
    script = script_cat(no_punct)[0]
    #print("script", script)
    return script


def punct_remover(string):
    # punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।“”'''
    punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।1234567890'''
    for x in string.lower():
        if x in punctuations:
            string = string.replace(x, " ")
    return string


def word_transliterate(sentence, dest_script):
    return sentence


def final_out(output1, output2, output3, dest_lang):
    temp_output1 = punct_remover(output1)
    temp_output2 = punct_remover(output2)
    temp_output3 = punct_remover(output3)
    # for word in regexp_tokenize(output1, "[\w']+")

    for word in temp_output1.split():
        if script_det(word) != default_script[dest_lang]:
            for word in temp_output2.split():
                if script_det(word) != default_script[dest_lang]:
                    for word in temp_output3.split():
                        if script_det(word) != default_script[dest_lang]:
                            # print("in3")
                            output1 = word_transliterate(
                                output1, default_script[dest_lang])
                            return output1
                    return output3
            return output2
    return output1


def compare_outputs(sentence, t0, trans, sources_name, target_lang):
    k = []
    s = []
    methods_name = {'0': 'MNF', '1': 'Gleu',
                    '2': 'Meteor', '3': 'Rougen', '4': 'Rougel'}
    google_output = t0
    #print("google", google_output)
    output1, source1 = manual_diff_score(trans, sources_name)
    #print("MNF", output1)
    output2, source2 = gleu_diff_score(trans, sources_name)
    #print("gleu", output2)
    output3, source3 = meteor_diff_score(trans, sources_name)
    #print("meteor", output3)
    output4, source4, output5, source5 = rouge_diff_score(trans, sources_name)
    #print("rougen", output4)
    #print("rougel", output5)

    if google_output == output1 == output2 == output3 == output4 == output5:
        #print("all output are same as google")
        return google_output
    else:
        if google_output != output1:
            k.append(output1)
            s.append(source1)
        else:
            k.append(" ")
            s.append(" ")
        if google_output != output2:
            k.append(output2)
            s.append(source2)
        else:
            k.append(" ")
            s.append(" ")
        if google_output != output3:
            k.append(output3)
            s.append(source3)
        else:
            k.append(" ")
            s.append(" ")
        if google_output != output4:
            k.append(output4)
            s.append(source4)
        else:
            k.append(" ")
            s.append(" ")
        if google_output != output5:
            k.append(output5)
            s.append(source5)
        else:
            k.append(" ")
            s.append(" ")

        k.insert(0, sentence)
        k.insert(1, google_output)
        s1ANDm1, s2ANDm2, s3ANDm3 = selection_source(
            s, sources_name, trans, methods_name)
        # print("s1", s1ANDm1)
        # print("s2", s2ANDm2)
        # print("s3", s3ANDm3)
        # print(s1ANDm1[0])
        # print(sources_name)

        #add_dial_comparison_doc1a(doc1a, table1a , k, s, s1ANDm1[0])
        #add_dial_comparison_doc1b(doc1b, table1b , k, s, s1ANDm1[0])
        #add_dial_comparison_doc2(doc2, table2, sentence, s1ANDm1, s2ANDm2, s3ANDm3, sources_name, trans)

        for a, b in sources_name.items():
            if b == s1ANDm1[0]:
                k = a
        output1 = trans[str(k)]

        if s2ANDm2[0] != "":
            for c, d in sources_name.items():
                if d == s2ANDm2[0]:
                    l = c
            output2 = trans[str(l)]
        else:
            output2 = output1

        if s3ANDm3[0] != "":
            for e, f in sources_name.items():
                if f == s3ANDm3[0]:
                    m = e
            output3 = trans[str(m)]
        else:
            output3 = output1

        # print("output1", output1)
        # print("output2", output2)
        # print("output3", output3)

        output = final_out(output1, output2, output3, target_lang)

        # print("output", output)

        return output


####
basepath = "/home/user/mnf/project/MNF/conversion/subtitling"
sub_path = sys.argv[1]
current = basepath + "/" + sub_path
try:
    google_srt = current + "/google_subtitle.srt"
except:
    text = 'Not Found'
    with open(rf"{current}/google_subtitle.srt", "w", encoding="utf8") as f:
        f.write(text)
    google_srt = "google_subtitle.srt"

try:
    aws_srt = current + "/aws_subtitle.srt"
except:
    text = 'Not Found'
    with open("aws_subtitle.srt", "w", encoding="utf8") as f:
        f.write(text)
    aws_srt = "aws_subtitle.srt"

try:
    azure_srt = current + "/azure_subtitle.srt"
except:
    text = 'Not Found'
    with open("azure_subtitle.srt", "w", encoding="utf8") as f:
        f.write(text)
    azure_srt = "azure_subtitle.srt"


def main1():
    # read file line by line
    # file = open( "google_subtitle.srt", "r",encoding="utf8")
    file = open(google_srt, "r", encoding="utf8")
    lines = file.readlines()
    file.close()

    text = ''
    for line in lines:
        if re.search('^[0-9]+$', line) is None and re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and re.search('^$', line) is None:
            text += ' ' + line.rstrip('\n')
        text = text.lstrip()

    # print(text)
    with open("subtitle1.txt", "w", encoding="utf8") as f:
        f.write(text)


def main2():
    # read file line by line
    # file = open( "aws.srt", "r",encoding="utf8")
    file = open(aws_srt, "r", encoding="utf8")
    lines = file.readlines()
    file.close()

    text = ''
    for line in lines:
        if re.search('^[0-9]+$', line) is None and re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and re.search('^$', line) is None:
            text += ' ' + line.rstrip('\n')
        text = text.lstrip()

    # print(text)
    with open("subtitle2.txt", "w", encoding="utf8") as f:
        f.write(text)


def main3():
    # read file line by line
    # file = open( "azure.srt", "r",encoding="utf8")
    file = open(azure_srt, "r", encoding="utf8")
    lines = file.readlines()
    file.close()

    text = ''
    for line in lines:
        if re.search('^[0-9]+$', line) is None and re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and re.search('^$', line) is None:
            text += ' ' + line.rstrip('\n')
        text = text.lstrip()

    # print(text)
    with open("subtitle3.txt", "w", encoding="utf8") as f:
        f.write(text)


main1()
main2()
main3()

filenames = ["subtitle1.txt", "subtitle2.txt", "subtitle3.txt"]
with open("output_file.txt", "w", encoding="utf8") as outfile:
    for filename in filenames:
        with open(filename, encoding="utf8") as infile:
            outfile.write(infile.read())
        outfile.write("\n")

with open('output_file.txt', encoding="utf8") as f:
    finalList = [line.strip('\n') for line in f]

d = {finalList[i]: i for i in range(3)}
trans = dict([(str(value), key) for key, value in d.items()])


sentence = ""
t0 = " "
sources_name = {'0': "google", "1": 'aws', "2": 'azure'}
target_lang = "en"

selected_para = compare_outputs(sentence, t0, trans, sources_name, target_lang)
key_list = list(trans.keys())
val_list = list(trans.values())

position = val_list.index(selected_para)
if position == '0':
    print("Google:\n", google_srt)
elif position == '1':
    print("AWS:\n", aws_srt)
else:
    print("Azure:\n", azure_srt)