2023 lines
79 KiB
Python
2023 lines
79 KiB
Python
|
# Module Imports
|
||
|
from importlib import import_module
|
||
|
import os
|
||
|
import sys
|
||
|
import docx
|
||
|
import re
|
||
|
# import textract
|
||
|
from tqdm import tqdm
|
||
|
from collections import Counter
|
||
|
import ntpath
|
||
|
from docx.shared import Inches, Cm, Pt
|
||
|
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||
|
from docx.enum.table import WD_TABLE_ALIGNMENT, WD_ALIGN_VERTICAL
|
||
|
import requests
|
||
|
import uuid
|
||
|
import json
|
||
|
import nltk.translate.bleu_score as bleu
|
||
|
import nltk.translate.gleu_score as gleu
|
||
|
from rouge_score import rouge_scorer
|
||
|
import numpy as np
|
||
|
from indicnlp.tokenize import sentence_tokenize
|
||
|
import nltk
|
||
|
import unidecode
|
||
|
import datetime
|
||
|
from pytz import timezone
|
||
|
|
||
|
# Helper Files Imports
|
||
|
from .detection import language_detector, script_det
|
||
|
from .buck_2_unicode import buck_2_unicode
|
||
|
from .transString import transString
|
||
|
from .translation_metric import (
|
||
|
manual_diff_score,
|
||
|
bleu_diff_score,
|
||
|
gleu_diff_score,
|
||
|
meteor_diff_score,
|
||
|
rouge_diff_score,
|
||
|
diff_score,
|
||
|
critera4_5,
|
||
|
)
|
||
|
from .selection_source import (
|
||
|
selection_source,
|
||
|
function5,
|
||
|
function41,
|
||
|
function311,
|
||
|
function221,
|
||
|
function2111,
|
||
|
function11111,
|
||
|
selection_source_transliteration,
|
||
|
two_sources_two_outputs,
|
||
|
)
|
||
|
from .script_writing import (
|
||
|
addSlugLine,
|
||
|
addActionLine,
|
||
|
addSpeaker,
|
||
|
addParenthetical,
|
||
|
addDialogue,
|
||
|
dual_script,
|
||
|
addTransition,
|
||
|
dial_checker,
|
||
|
non_dial_checker,
|
||
|
)
|
||
|
from .script_reading import (
|
||
|
breaksen,
|
||
|
getRefined,
|
||
|
getSlugAndNonSlug,
|
||
|
getSpeakers,
|
||
|
getScenes,
|
||
|
)
|
||
|
from .translation_resources import google, aws, azure, yandex
|
||
|
from .transliteration_resources import (
|
||
|
azure_transliteration,
|
||
|
indic_trans,
|
||
|
indic_transliteration_OTHER_GUJARATI,
|
||
|
indic_transliteration_OTHER_GURMUKHI,
|
||
|
indic_transliteration_OTHER_ORIYA,
|
||
|
om_transliterator,
|
||
|
libindic,
|
||
|
indic_transliteration_IAST,
|
||
|
indic_transliteration_ITRANS,
|
||
|
# polyglot_trans,
|
||
|
sheetal,
|
||
|
unicode_transliteration_GURMUKHI,
|
||
|
indic_transliteration_GURMUKHI,
|
||
|
transliteration_LATIN_CYRILLIC,
|
||
|
indic_transliteration_TELUGU,
|
||
|
unicode_transliteration_GURMUKHI_LATIN,
|
||
|
indic_transliteration_GURMUKHI_LATIN,
|
||
|
transliteration_CYRILIC_LATIN,
|
||
|
ConvertToLatin,
|
||
|
readonly,
|
||
|
indic_transliteration_OTHER_DEVANAGRI,
|
||
|
indic_transliteration_DEVANAGRI_OTHER,
|
||
|
indic_transliteration_KANNADA_OTHER,
|
||
|
indic_transliteration_OTHER_KANNADA,
|
||
|
indic_transliteration_TAMIL_OTHER,
|
||
|
indic_transliteration_OTHER_TAMIL,
|
||
|
indic_transliteration_TELUGU_OTHER,
|
||
|
indic_transliteration_MALAYALAM_OTHER,
|
||
|
indic_transliteration_OTHER_GUJARATI,
|
||
|
indic_transliteration_OTHER_GURMUKHI,
|
||
|
indic_transliteration_OTHER_ORIYA,
|
||
|
translit_CHINESE_LATIN,
|
||
|
translit_th_sin_mng_heb_to_latin
|
||
|
) # , translit_THAI_LATIN
|
||
|
from MNF.settings import BasePath
|
||
|
|
||
|
|
||
|
# Importing Basepath of System
|
||
|
basePath = BasePath()
|
||
|
|
||
|
|
||
|
# -> Punctuation Remover code
|
||
|
def punct_remover(string):
|
||
|
punctuations = """!()-[]{};:'"\,<>./?@#$%^&*_~…।"""
|
||
|
for x in string.lower():
|
||
|
if x in punctuations:
|
||
|
string = string.replace(x, " ")
|
||
|
return string
|
||
|
|
||
|
|
||
|
# -> Space After Punctuation Remover code
|
||
|
def space_after_punct(text):
|
||
|
# text = text.replace('...',' ... ')
|
||
|
text = text.replace(". . .", " ... ")
|
||
|
text = re.sub("([,!?()…-])", r"\1 ", text)
|
||
|
text = re.sub("\s{2,}", " ", text)
|
||
|
return text
|
||
|
|
||
|
|
||
|
# -> Removing Punctuation from Transliterated text code
|
||
|
def final_transliterated_sentence(original, transliterated):
|
||
|
original = space_after_punct(original)
|
||
|
punct_list = [
|
||
|
"!",
|
||
|
'"',
|
||
|
"#",
|
||
|
"$",
|
||
|
"%",
|
||
|
"&",
|
||
|
"'",
|
||
|
"(",
|
||
|
")",
|
||
|
"*",
|
||
|
"+",
|
||
|
",",
|
||
|
" ",
|
||
|
"-",
|
||
|
".",
|
||
|
"/",
|
||
|
":",
|
||
|
";",
|
||
|
"<",
|
||
|
"=",
|
||
|
">",
|
||
|
"?",
|
||
|
"@",
|
||
|
"[",
|
||
|
"\\",
|
||
|
"]",
|
||
|
"^",
|
||
|
"_",
|
||
|
"`",
|
||
|
"{",
|
||
|
"|",
|
||
|
"}",
|
||
|
"~",
|
||
|
"…",
|
||
|
"...",
|
||
|
"।",
|
||
|
]
|
||
|
sentence = []
|
||
|
j = 0
|
||
|
|
||
|
for i in range(len(original.split())):
|
||
|
if original.split()[i] in punct_list:
|
||
|
sentence.append(original.split()[i])
|
||
|
elif original.split()[i][-1] in punct_list:
|
||
|
temp = transliterated.split()[j] + original.split()[i][-1]
|
||
|
sentence.append(temp)
|
||
|
j = j + 1
|
||
|
elif original.split()[i][-1] not in punct_list:
|
||
|
temp = transliterated.split()[j]
|
||
|
sentence.append(temp)
|
||
|
j = j + 1
|
||
|
|
||
|
transliterated_sentence = " ".join(sentence)
|
||
|
transliterated_sentence.replace(" ... ", "...")
|
||
|
transliterated_sentence.replace("… ", "…")
|
||
|
return transliterated_sentence
|
||
|
|
||
|
|
||
|
def google_length_checker(t, temp_sentence, t0):
|
||
|
if len(t.split()) >= len(temp_sentence.split()):
|
||
|
return t
|
||
|
elif len(t.split()) == len(temp_sentence.split()) - 1:
|
||
|
final_t = t + " " + t0.split()[-1]
|
||
|
return final_t
|
||
|
elif len(t.split()) == len(temp_sentence.split()) - 2:
|
||
|
final_t = t + " " + t0.split()[-2] + " " + t0.split()[-1]
|
||
|
return final_t
|
||
|
else:
|
||
|
return t
|
||
|
|
||
|
|
||
|
# Special Symbol(Hindi Sentence Ending) Remover
|
||
|
def Halant_remover(T3):
|
||
|
if T3[-1] == "्":
|
||
|
return T3[:-1]
|
||
|
else:
|
||
|
return T3
|
||
|
|
||
|
|
||
|
def dial_comparison_transliteration_rom_dev_ph1(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
source_lang = "hi"
|
||
|
source_script = "Latin"
|
||
|
dest_script = "Devanagari"
|
||
|
sources_name = {
|
||
|
"0": "Azure",
|
||
|
"1": "indic_trans",
|
||
|
"2": "google",
|
||
|
"3": "indic_trans_IAST",
|
||
|
}
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="en")
|
||
|
priority_list = [
|
||
|
"Azure",
|
||
|
"indic_trans",
|
||
|
"google",
|
||
|
"indic_trans_IAST",
|
||
|
]
|
||
|
transliterated_text = []
|
||
|
for sentence in sentences:
|
||
|
if (
|
||
|
sentence == ""
|
||
|
or sentence == " . . ."
|
||
|
or sentence == " . ."
|
||
|
or sentence == " . . ”"
|
||
|
):
|
||
|
continue
|
||
|
OUT = []
|
||
|
for word in sentence.split():
|
||
|
if word == ".":
|
||
|
continue
|
||
|
t0 = azure_transliteration(
|
||
|
word, source_lang, source_script, dest_script)
|
||
|
t1 = indic_trans(word, source_script, dest_script)
|
||
|
t2 = google(word, "en", "hi")
|
||
|
t3 = indic_transliteration_IAST(word)
|
||
|
outputs = [t0, t1, t2, t3]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
OUT.append(out)
|
||
|
transliterated_text.append(" ".join(OUT))
|
||
|
print("running perfectly")
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
def dial_comparison_transliteration_rom_dev_ph1_sentence_wise(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
source_lang = "hi"
|
||
|
sources_name = {
|
||
|
"0": "Azure",
|
||
|
"1": "indic_trans",
|
||
|
"2": "google",
|
||
|
"3": "indic_trans_IAST",
|
||
|
}
|
||
|
etc_punctuation = ["", " . . .", " . .", " . . ”"]
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="en")
|
||
|
priority_list = ["Azure", "indic_trans", "google", "indic_trans_IAST"]
|
||
|
transliterated_text = []
|
||
|
|
||
|
for sentence in sentences:
|
||
|
if sentence in etc_punctuation:
|
||
|
continue
|
||
|
print("original_sentence", sentence)
|
||
|
temp_sentence = punct_remover(sentence)
|
||
|
print("sentence_without_punctuation", temp_sentence)
|
||
|
t00 = azure_transliteration(
|
||
|
temp_sentence, source_lang, source_script, dest_script
|
||
|
)
|
||
|
t11 = indic_trans(temp_sentence, source_script, dest_script)
|
||
|
t = google(temp_sentence, "en", "hi")
|
||
|
t22 = google_length_checker(t, temp_sentence, t00)
|
||
|
t33 = indic_transliteration_IAST(temp_sentence)
|
||
|
Out = []
|
||
|
for i in range(len(temp_sentence.split())):
|
||
|
word = temp_sentence.split()[i]
|
||
|
|
||
|
T0 = t00.split()[i]
|
||
|
T1 = t11.split()[i]
|
||
|
T2 = t22.split()[i]
|
||
|
T3 = t33.split()[i]
|
||
|
T3 = Halant_remover(T3)
|
||
|
|
||
|
outputs = [T0, T1, T2, T3]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
Out.append(out)
|
||
|
trans_sent_wo_punct = " ".join(Out)
|
||
|
print("trans_sent_wo_punct", trans_sent_wo_punct)
|
||
|
transliterated_sentence = final_transliterated_sentence(
|
||
|
sentence, trans_sent_wo_punct
|
||
|
)
|
||
|
print("trans_sent_w_punct", transliterated_sentence)
|
||
|
transliterated_text.append(transliterated_sentence)
|
||
|
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
def dial_comparison_transliteration_dev_rom_ph1_sentence_wise(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
print("Entered Here1212")
|
||
|
sources_name = {"0": "indic_trans", "1": "Azure",
|
||
|
"2": "libindic", "3": "sheetal"}
|
||
|
priority_list = ["indic_trans", "Azure", "sheetal", "libindic"]
|
||
|
etc_punctuation = ["", " . . .", " . .", " . . ”"]
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="hi")
|
||
|
if source_lang == "ne":
|
||
|
source_lang = "hi"
|
||
|
transliterated_text = []
|
||
|
for sentence in sentences:
|
||
|
if sentence in etc_punctuation:
|
||
|
continue
|
||
|
print("original_sentence", sentence)
|
||
|
temp_sentence = punct_remover(sentence)
|
||
|
print("sentence_without_punctuation", temp_sentence)
|
||
|
t0 = indic_trans(temp_sentence, source_script, dest_script)
|
||
|
t1 = azure_transliteration(
|
||
|
temp_sentence, source_lang, source_script, dest_script
|
||
|
)
|
||
|
print("before t1111111111")
|
||
|
t2 = libindic(temp_sentence, dest_script).rstrip()
|
||
|
print("before sheetal", t2)
|
||
|
t3 = sheetal(temp_sentence).replace("\n", "")
|
||
|
print("after sheetal", t3)
|
||
|
Out = []
|
||
|
|
||
|
for i in range(len(temp_sentence.split())):
|
||
|
word = temp_sentence.split()[i]
|
||
|
T0 = t0.split()[i]
|
||
|
T1 = t1.split()[i]
|
||
|
T2 = t2.split()[i]
|
||
|
T3 = t3.split()[i]
|
||
|
outputs = [T0, T1, T2, T3]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
Out.append(out)
|
||
|
trans_sent_wo_punct = " ".join(Out)
|
||
|
|
||
|
print("trans_sent_wo_punct", trans_sent_wo_punct)
|
||
|
transliterated_sentence = final_transliterated_sentence(
|
||
|
sentence, trans_sent_wo_punct
|
||
|
)
|
||
|
print("trans_sent_w_punct", transliterated_sentence)
|
||
|
transliterated_text.append(transliterated_sentence)
|
||
|
print("Entered Exiting Here1212")
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
def dial_comparison_transliteration_dev_rom_ph1(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
sources_name = {"0": "indic_trans", "1": "Azure",
|
||
|
"2": "libindic", "3": "sheetal"}
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="hi")
|
||
|
priority_list = ["indic_trans", "Azure", "sheetal", "libindic"]
|
||
|
transliterated_text = []
|
||
|
|
||
|
for sentence in sentences:
|
||
|
if (
|
||
|
sentence == ""
|
||
|
or sentence == " . . ."
|
||
|
or sentence == " . ."
|
||
|
or sentence == " . . ”"
|
||
|
):
|
||
|
continue
|
||
|
OUT = []
|
||
|
for word in sentence.split():
|
||
|
if word == ".":
|
||
|
continue
|
||
|
t0 = indic_trans(word, source_script, dest_script)
|
||
|
t1 = azure_transliteration(
|
||
|
word, source_lang, source_script, dest_script)
|
||
|
t2 = libindic(word, dest_script).rstrip()
|
||
|
t3 = sheetal(word).replace("\n", "")
|
||
|
outputs = [t0, t1, t2, t3]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
OUT.append(out)
|
||
|
transliterated_text.append(" ".join(OUT))
|
||
|
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
def dial_comparison_transliteration_arbic_to_rom_ph1(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
sources_name = {"0": "indic_trans", "1": "Azure", "2": "buck_2_unicode"}
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="en")
|
||
|
priority_list = ["indic_trans", "Azure", "buck_2_unicode"]
|
||
|
transliterated_text = []
|
||
|
|
||
|
for sentence in sentences:
|
||
|
if (
|
||
|
sentence == ""
|
||
|
or sentence == " . . ."
|
||
|
or sentence == " . ."
|
||
|
or sentence == " . . ”"
|
||
|
):
|
||
|
continue
|
||
|
OUT = []
|
||
|
for word in sentence.split():
|
||
|
if word == ".":
|
||
|
continue
|
||
|
t0 = indic_trans(word, source_script, dest_script)
|
||
|
t1 = azure_transliteration(
|
||
|
word, source_lang, source_script, dest_script)
|
||
|
t2 = buck_2_unicode(word)
|
||
|
outputs = [t0, t1, t2]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
OUT.append(out)
|
||
|
transliterated_text.append(" ".join(OUT))
|
||
|
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
def dial_comparison_transliteration_kann_to_rom_ph1(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
sources_name = {
|
||
|
"0": "om_transliteration",
|
||
|
"1": "indic_trans",
|
||
|
"2": "libindic",
|
||
|
"3": "Azure",
|
||
|
}
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="en")
|
||
|
priority_list = ["om_transliteration", "indic_trans", "libindic", "Azure"]
|
||
|
transliterated_text = []
|
||
|
|
||
|
for sentence in sentences:
|
||
|
if (
|
||
|
sentence == ""
|
||
|
or sentence == " . . ."
|
||
|
or sentence == " . ."
|
||
|
or sentence == " . . ”"
|
||
|
):
|
||
|
continue
|
||
|
OUT = []
|
||
|
for word in sentence.split():
|
||
|
if word == ".":
|
||
|
continue
|
||
|
t0 = om_transliterator(word)
|
||
|
t1 = indic_trans(word, source_script, dest_script)
|
||
|
t2 = libindic(word, dest_script)
|
||
|
t3 = azure_transliteration(
|
||
|
word, source_lang, source_script, dest_script)
|
||
|
outputs = [t0, t1, t2, t3]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
OUT.append(out)
|
||
|
transliterated_text.append(" ".join(OUT))
|
||
|
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
def dial_comparison_transliteration_tamil_to_rom_ph1(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
sources_name = {
|
||
|
"0": "Azure",
|
||
|
"1": "libindic",
|
||
|
"2": "indic_trans",
|
||
|
}
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="en")
|
||
|
priority_list = ["Azure", "libindic", "indic_trans"]
|
||
|
transliterated_text = []
|
||
|
|
||
|
for sentence in sentences:
|
||
|
if (
|
||
|
sentence == ""
|
||
|
or sentence == " . . ."
|
||
|
or sentence == " . ."
|
||
|
or sentence == " . . ”"
|
||
|
):
|
||
|
continue
|
||
|
OUT = []
|
||
|
for word in sentence.split():
|
||
|
if word == ".":
|
||
|
continue
|
||
|
t0 = azure_transliteration(
|
||
|
word, source_lang, source_script, dest_script)
|
||
|
t2 = libindic(word, dest_script)
|
||
|
t1 = indic_trans(word, source_script, dest_script)
|
||
|
outputs = [t0, t1, t2]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
OUT.append(out)
|
||
|
transliterated_text.append(" ".join(OUT))
|
||
|
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
def dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
sources_name = {"0": "Azure", "1": "indic_trans", "2": "libindic"}
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="en")
|
||
|
priority_list = ["Azure", "indic_trans", "libindic"]
|
||
|
transliterated_text = []
|
||
|
for sentence in sentences:
|
||
|
if (
|
||
|
sentence == ""
|
||
|
or sentence == " . . ."
|
||
|
or sentence == " . ."
|
||
|
or sentence == " . . ”"
|
||
|
):
|
||
|
continue
|
||
|
OUT = []
|
||
|
for word in sentence.split():
|
||
|
if word == ".":
|
||
|
continue
|
||
|
t0 = azure_transliteration(
|
||
|
word, source_lang, source_script, dest_script)
|
||
|
t1 = indic_trans(word, source_script, dest_script)
|
||
|
t2 = libindic(word, dest_script)
|
||
|
outputs = [t0, t1, t2]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
OUT.append(out)
|
||
|
transliterated_text.append(" ".join(OUT))
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
def dial_comparison_transliteration_latin_gurmukhi(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
source_lang = "pa"
|
||
|
sources_name = {"0": "Azure", "1": "indic_trans", "2": "indic_trans_IAST"}
|
||
|
etc_punctuation = ["", " . . .", " . .", " . . ”"]
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="en")
|
||
|
priority_list = ["Azure", "indic_trans", "indic_trans_IAST"]
|
||
|
transliterated_text = []
|
||
|
|
||
|
for sentence in sentences:
|
||
|
if sentence in etc_punctuation:
|
||
|
continue
|
||
|
temp_sentence = punct_remover(sentence)
|
||
|
|
||
|
t00 = azure_transliteration(
|
||
|
temp_sentence, source_lang, source_script, dest_script
|
||
|
)
|
||
|
t11 = indic_transliteration_GURMUKHI(temp_sentence)
|
||
|
t22 = unicode_transliteration_GURMUKHI(temp_sentence)
|
||
|
Out = []
|
||
|
for i in range(len(temp_sentence.split())):
|
||
|
word = temp_sentence.split()[i]
|
||
|
T0 = t00.split()[i]
|
||
|
T1 = t11.split()[i]
|
||
|
T2 = t22.split()[i]
|
||
|
outputs = [T0, T1, T2]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
Out.append(out)
|
||
|
trans_sent_wo_punct = " ".join(Out)
|
||
|
transliterated_sentence = final_transliterated_sentence(
|
||
|
sentence, trans_sent_wo_punct
|
||
|
)
|
||
|
transliterated_text.append(transliterated_sentence)
|
||
|
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
def dial_comparison_transliteration_latin_cyrillic(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
source_lang = "bg"
|
||
|
sources_name = {"0": "Azure", "1": "indic_trans"}
|
||
|
etc_punctuation = ["", " . . .", " . .", " . . ”"]
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="en")
|
||
|
priority_list = ["Azure", "indic_trans"]
|
||
|
transliterated_text = []
|
||
|
|
||
|
for sentence in sentences:
|
||
|
if sentence in etc_punctuation:
|
||
|
continue
|
||
|
temp_sentence = punct_remover(sentence)
|
||
|
|
||
|
t00 = azure_transliteration(
|
||
|
temp_sentence, source_lang, source_script, dest_script
|
||
|
)
|
||
|
t11 = transliteration_LATIN_CYRILLIC(temp_sentence)
|
||
|
Out = []
|
||
|
for i in range(len(temp_sentence.split())):
|
||
|
word = temp_sentence.split()[i]
|
||
|
T0 = t00.split()[i]
|
||
|
T1 = t11.split()[i]
|
||
|
# T2 = t22.split()[i]
|
||
|
outputs = [T0, T1]
|
||
|
# outputs=[T0, T1, T2]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
Out.append(out)
|
||
|
trans_sent_wo_punct = " ".join(Out)
|
||
|
transliterated_sentence = final_transliterated_sentence(
|
||
|
sentence, trans_sent_wo_punct
|
||
|
)
|
||
|
transliterated_text.append(transliterated_sentence)
|
||
|
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
def dial_comparison_transliteration_latin_telugu_sentence_wise(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
source_lang = "te"
|
||
|
sources_name = {
|
||
|
"0": "indic_translit",
|
||
|
"1": "Azure",
|
||
|
"2": "indic_trans",
|
||
|
"3": "libindic",
|
||
|
}
|
||
|
priority_list = ["indic_translit", "Azure", "indic_trans", "libindic"]
|
||
|
etc_punctuation = ["", " . . .", " . .", " . . ”"]
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="hi")
|
||
|
transliterated_text = []
|
||
|
|
||
|
for sentence in sentences:
|
||
|
if sentence in etc_punctuation:
|
||
|
continue
|
||
|
print("original_sentence", sentence)
|
||
|
temp_sentence = punct_remover(sentence)
|
||
|
print("sentence_without_punctuation", temp_sentence)
|
||
|
t0 = indic_transliteration_TELUGU(temp_sentence)
|
||
|
t1 = azure_transliteration(
|
||
|
temp_sentence, source_lang, source_script, dest_script
|
||
|
)
|
||
|
t2 = indic_trans(temp_sentence, source_script, dest_script)
|
||
|
t3 = libindic(temp_sentence, dest_script)
|
||
|
Out = []
|
||
|
for i in range(len(temp_sentence.split())):
|
||
|
word = temp_sentence.split()[i]
|
||
|
T0 = t0.split()[i]
|
||
|
T1 = t1.split()[i]
|
||
|
T2 = t2.split()[i]
|
||
|
T3 = t3.split()[i]
|
||
|
outputs = [T0, T1, T2, T3]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
Out.append(out)
|
||
|
trans_sent_wo_punct = " ".join(Out)
|
||
|
print("trans_sent_wo_punct", trans_sent_wo_punct)
|
||
|
transliterated_sentence = final_transliterated_sentence(
|
||
|
sentence, trans_sent_wo_punct
|
||
|
)
|
||
|
print("trans_sent_w_punct", transliterated_sentence)
|
||
|
transliterated_text.append(transliterated_sentence)
|
||
|
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
def dial_comparison_transliteration_gurmukhi_latin_sentence_wise(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
source_lang = "pa"
|
||
|
sources_name = {"0": "indic_trans", "1": "Azure", "2": "unicode"}
|
||
|
priority_list = ["indic_trans", "Azure", "unicode"]
|
||
|
etc_punctuation = ["", " . . .", " . .", " . . ”"]
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="hi")
|
||
|
transliterated_text = []
|
||
|
|
||
|
for sentence in sentences:
|
||
|
if sentence in etc_punctuation:
|
||
|
continue
|
||
|
print("original_sentence", sentence)
|
||
|
temp_sentence = punct_remover(sentence)
|
||
|
print("sentence_without_punctuation", temp_sentence)
|
||
|
t0 = indic_transliteration_GURMUKHI_LATIN(temp_sentence)
|
||
|
t1 = azure_transliteration(
|
||
|
temp_sentence, source_lang, source_script, dest_script
|
||
|
)
|
||
|
t2 = unicode_transliteration_GURMUKHI_LATIN(temp_sentence).rstrip()
|
||
|
Out = []
|
||
|
for i in range(len(temp_sentence.split())):
|
||
|
word = temp_sentence.split()[i]
|
||
|
T0 = t0.split()[i]
|
||
|
T1 = t1.split()[i]
|
||
|
T2 = t2.split()[i]
|
||
|
outputs = [T0, T1, T2]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
Out.append(out)
|
||
|
trans_sent_wo_punct = " ".join(Out)
|
||
|
print("trans_sent_wo_punct", trans_sent_wo_punct)
|
||
|
transliterated_sentence = final_transliterated_sentence(
|
||
|
sentence, trans_sent_wo_punct
|
||
|
)
|
||
|
print("trans_sent_w_punct", transliterated_sentence)
|
||
|
transliterated_text.append(transliterated_sentence)
|
||
|
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
def dial_comparison_transliteration_cyrilic_latin_sentence_wise(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
source_lang = "bg"
|
||
|
sources_name = {"0": "indic_trans", "1": "Azure", "2": "unicode"}
|
||
|
priority_list = ["indic_trans", "Azure", "unicode"]
|
||
|
etc_punctuation = ["", " . . .", " . .", " . . ”"]
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="hi")
|
||
|
transliterated_text = []
|
||
|
|
||
|
for sentence in sentences:
|
||
|
if sentence in etc_punctuation:
|
||
|
continue
|
||
|
print("original_sentence", sentence)
|
||
|
temp_sentence = punct_remover(sentence)
|
||
|
print("sentence_without_punctuation", temp_sentence)
|
||
|
t0 = azure_transliteration(
|
||
|
temp_sentence, source_lang, source_script, dest_script
|
||
|
)
|
||
|
t1 = transliteration_CYRILIC_LATIN(temp_sentence)
|
||
|
t2 = ConvertToLatin(temp_sentence)
|
||
|
Out = []
|
||
|
for i in range(len(temp_sentence.split())):
|
||
|
word = temp_sentence.split()[i]
|
||
|
T0 = t0.split()[i]
|
||
|
T1 = t1.split()[i]
|
||
|
T2 = t2.split()[i]
|
||
|
outputs = [T0, T1, T2]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
Out.append(out)
|
||
|
trans_sent_wo_punct = " ".join(Out)
|
||
|
print("trans_sent_wo_punct", trans_sent_wo_punct)
|
||
|
transliterated_sentence = final_transliterated_sentence(
|
||
|
sentence, trans_sent_wo_punct
|
||
|
)
|
||
|
print("trans_sent_w_punct", transliterated_sentence)
|
||
|
transliterated_text.append(transliterated_sentence)
|
||
|
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
def dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
if dest_script == "Gujarati":
|
||
|
source_lang = "gu"
|
||
|
if dest_script == "Oriya":
|
||
|
source_lang = "or"
|
||
|
if dest_script == "Malayalam":
|
||
|
source_lang = "ml"
|
||
|
if dest_script == "Tamil":
|
||
|
source_lang = "ta"
|
||
|
if dest_script == "Bengali":
|
||
|
source_lang = "bn"
|
||
|
if dest_script == "Kannada":
|
||
|
source_lang = "kn"
|
||
|
sources_name = {"0": "Azure", "1": "indic_trans", "2": "indic_trans_IAST"}
|
||
|
etc_punctuation = ["", " . . .", " . .", " . . ”"]
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="en")
|
||
|
priority_list = ["Azure", "indic_trans", "indic_trans_IAST"]
|
||
|
transliterated_text = []
|
||
|
for sentence in sentences:
|
||
|
if sentence in etc_punctuation:
|
||
|
continue
|
||
|
temp_sentence = punct_remover(sentence)
|
||
|
t00 = azure_transliteration(
|
||
|
temp_sentence, source_lang, source_script, dest_script
|
||
|
)
|
||
|
t11 = libindic(temp_sentence, dest_script)
|
||
|
t22 = indic_trans(temp_sentence, source_script, dest_script)
|
||
|
Out = []
|
||
|
for i in range(len(temp_sentence.split())):
|
||
|
word = temp_sentence.split()[i]
|
||
|
T0 = t00.split()[i]
|
||
|
T1 = t11.split()[i]
|
||
|
T2 = t22.split()[i]
|
||
|
outputs = [T0, T1, T2]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
Out.append(out)
|
||
|
trans_sent_wo_punct = " ".join(Out)
|
||
|
transliterated_sentence = final_transliterated_sentence(
|
||
|
sentence, trans_sent_wo_punct
|
||
|
)
|
||
|
transliterated_text.append(transliterated_sentence)
|
||
|
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
def dial_comparison_transliteration_or_ml_gu_te_devanagari(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
sources_name = {"0": "Azure", "1": "indic_trans", "2": "indic_trans_IAST"}
|
||
|
etc_punctuation = ["", " . . .", " . .", " . . ”"]
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="en")
|
||
|
priority_list = ["Azure", "indic_trans", "indic_trans_IAST"]
|
||
|
transliterated_text = []
|
||
|
|
||
|
for sentence in sentences:
|
||
|
if sentence in etc_punctuation:
|
||
|
continue
|
||
|
temp_sentence = punct_remover(sentence)
|
||
|
t00 = indic_transliteration_OTHER_DEVANAGRI(
|
||
|
temp_sentence, source_script)
|
||
|
t11 = libindic(temp_sentence, source_script)
|
||
|
t22 = indic_trans(temp_sentence, source_script, dest_script)
|
||
|
Out = []
|
||
|
for i in range(len(temp_sentence.split())):
|
||
|
word = temp_sentence.split()[i]
|
||
|
T0 = t00.split()[i]
|
||
|
T1 = t11.split()[i]
|
||
|
T2 = t22.split()[i]
|
||
|
outputs = [T0, T1, T2]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
Out.append(out)
|
||
|
trans_sent_wo_punct = " ".join(Out)
|
||
|
transliterated_sentence = final_transliterated_sentence(
|
||
|
sentence, trans_sent_wo_punct
|
||
|
)
|
||
|
transliterated_text.append(transliterated_sentence)
|
||
|
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
def dial_comparison_transliteration_devanagari_or_ml_gu_te(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
sources_name = {"0": "Azure", "1": "indic_trans", "2": "indic_trans_IAST"}
|
||
|
etc_punctuation = ["", " . . .", " . .", " . . ”"]
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="en")
|
||
|
priority_list = ["Azure", "indic_trans", "indic_trans_IAST"]
|
||
|
transliterated_text = []
|
||
|
|
||
|
for sentence in sentences:
|
||
|
if sentence in etc_punctuation:
|
||
|
continue
|
||
|
temp_sentence = punct_remover(sentence)
|
||
|
t00 = indic_transliteration_DEVANAGRI_OTHER(temp_sentence, dest_script)
|
||
|
t11 = libindic(temp_sentence, source_script)
|
||
|
t22 = indic_trans(temp_sentence, source_script, dest_script)
|
||
|
Out = []
|
||
|
for i in range(len(temp_sentence.split())):
|
||
|
word = temp_sentence.split()[i]
|
||
|
T0 = t00.split()[i]
|
||
|
T1 = t11.split()[i]
|
||
|
T2 = t22.split()[i]
|
||
|
outputs = [T0, T1, T2]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
Out.append(out)
|
||
|
trans_sent_wo_punct = " ".join(Out)
|
||
|
transliterated_sentence = final_transliterated_sentence(
|
||
|
sentence, trans_sent_wo_punct
|
||
|
)
|
||
|
transliterated_text.append(transliterated_sentence)
|
||
|
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
def dial_comparison_transliteration_kannada_ml_ta_te_ben(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
sources_name = {"0": "Azure", "1": "indic_trans", "2": "indic_trans_IAST"}
|
||
|
etc_punctuation = ["", " . . .", " . .", " . . ”"]
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="en")
|
||
|
priority_list = ["Azure", "indic_trans", "indic_trans_IAST"]
|
||
|
transliterated_text = []
|
||
|
|
||
|
for sentence in sentences:
|
||
|
if sentence in etc_punctuation:
|
||
|
continue
|
||
|
temp_sentence = punct_remover(sentence)
|
||
|
t00 = indic_transliteration_KANNADA_OTHER(temp_sentence, dest_script)
|
||
|
t11 = libindic(temp_sentence, source_script)
|
||
|
t22 = indic_trans(temp_sentence, source_script, dest_script)
|
||
|
Out = []
|
||
|
for i in range(len(temp_sentence.split())):
|
||
|
word = temp_sentence.split()[i]
|
||
|
T0 = t00.split()[i]
|
||
|
T1 = t11.split()[i]
|
||
|
T2 = t22.split()[i]
|
||
|
outputs = [T0, T1, T2]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
Out.append(out)
|
||
|
trans_sent_wo_punct = " ".join(Out)
|
||
|
transliterated_sentence = final_transliterated_sentence(
|
||
|
sentence, trans_sent_wo_punct
|
||
|
)
|
||
|
transliterated_text.append(transliterated_sentence)
|
||
|
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
def dial_comparison_transliteration_ml_ta_te_ben_kannada(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
sources_name = {"0": "Azure", "1": "indic_trans", "2": "indic_trans_IAST"}
|
||
|
etc_punctuation = ["", " . . .", " . .", " . . ”"]
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="en")
|
||
|
priority_list = ["Azure", "indic_trans", "indic_trans_IAST"]
|
||
|
transliterated_text = []
|
||
|
for sentence in sentences:
|
||
|
if sentence in etc_punctuation:
|
||
|
continue
|
||
|
temp_sentence = punct_remover(sentence)
|
||
|
t00 = indic_transliteration_OTHER_KANNADA(temp_sentence, source_script)
|
||
|
t11 = libindic(temp_sentence, source_script)
|
||
|
t22 = indic_trans(temp_sentence, source_script, dest_script)
|
||
|
Out = []
|
||
|
for i in range(len(temp_sentence.split())):
|
||
|
word = temp_sentence.split()[i]
|
||
|
T0 = t00.split()[i]
|
||
|
T1 = t11.split()[i]
|
||
|
T2 = t22.split()[i]
|
||
|
outputs = [T0, T1, T2]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
Out.append(out)
|
||
|
trans_sent_wo_punct = " ".join(Out)
|
||
|
transliterated_sentence = final_transliterated_sentence(
|
||
|
sentence, trans_sent_wo_punct
|
||
|
)
|
||
|
transliterated_text.append(transliterated_sentence)
|
||
|
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
def dial_comparison_transliteration_tamil_other(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
sources_name = {"0": "Azure", "1": "indic_trans", "2": "indic_trans_IAST"}
|
||
|
etc_punctuation = ["", " . . .", " . .", " . . ”"]
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="en")
|
||
|
priority_list = ["Azure", "indic_trans", "indic_trans_IAST"]
|
||
|
transliterated_text = []
|
||
|
for sentence in sentences:
|
||
|
if sentence in etc_punctuation:
|
||
|
continue
|
||
|
temp_sentence = punct_remover(sentence)
|
||
|
t00 = indic_transliteration_TAMIL_OTHER(temp_sentence, dest_script)
|
||
|
t11 = libindic(temp_sentence, source_script)
|
||
|
t22 = indic_trans(temp_sentence, source_script, dest_script)
|
||
|
Out = []
|
||
|
for i in range(len(temp_sentence.split())):
|
||
|
word = temp_sentence.split()[i]
|
||
|
T0 = t00.split()[i]
|
||
|
T1 = t11.split()[i]
|
||
|
T2 = t22.split()[i]
|
||
|
outputs = [T0, T1, T2]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
Out.append(out)
|
||
|
trans_sent_wo_punct = " ".join(Out)
|
||
|
|
||
|
transliterated_sentence = final_transliterated_sentence(
|
||
|
sentence, trans_sent_wo_punct
|
||
|
)
|
||
|
transliterated_text.append(transliterated_sentence)
|
||
|
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
def dial_comparison_transliteration_other_tamil(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
sources_name = {"0": "Azure", "1": "indic_trans", "2": "indic_trans_IAST"}
|
||
|
etc_punctuation = ["", " . . .", " . .", " . . ”"]
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="en")
|
||
|
priority_list = ["Azure", "indic_trans", "indic_trans_IAST"]
|
||
|
transliterated_text = []
|
||
|
for sentence in sentences:
|
||
|
if sentence in etc_punctuation:
|
||
|
continue
|
||
|
temp_sentence = punct_remover(sentence)
|
||
|
t00 = indic_transliteration_OTHER_TAMIL(temp_sentence, source_script)
|
||
|
t11 = libindic(temp_sentence, source_script)
|
||
|
t22 = indic_trans(temp_sentence, source_script, dest_script)
|
||
|
Out = []
|
||
|
for i in range(len(temp_sentence.split())):
|
||
|
word = temp_sentence.split()[i]
|
||
|
T0 = t00.split()[i]
|
||
|
T1 = t11.split()[i]
|
||
|
T2 = t22.split()[i]
|
||
|
outputs = [T0, T1, T2]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
Out.append(out)
|
||
|
trans_sent_wo_punct = " ".join(Out)
|
||
|
transliterated_sentence = final_transliterated_sentence(
|
||
|
sentence, trans_sent_wo_punct
|
||
|
)
|
||
|
transliterated_text.append(transliterated_sentence)
|
||
|
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
# -> Function to transliterate from telugu to malayalam
|
||
|
def dial_comparison_transliteration_te_to_ml(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
sources_name = {"0": "indic_trans",
|
||
|
"1": "libindic", "2": "indic_trans_IAST"}
|
||
|
etc_punctuation = ["", " . . .", " . .", " . . ”"]
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="en")
|
||
|
priority_list = ["indic_trans", "libindic", "indic_trans_IAST"]
|
||
|
transliterated_text = []
|
||
|
for sentence in sentences:
|
||
|
if sentence in etc_punctuation:
|
||
|
continue
|
||
|
temp_sentence = punct_remover(sentence)
|
||
|
t00 = indic_trans(temp_sentence, source_script, dest_script)
|
||
|
t11 = libindic(temp_sentence, dest_script)
|
||
|
t22 = indic_transliteration_TELUGU_OTHER(temp_sentence, dest_script)
|
||
|
Out = []
|
||
|
for i in range(len(temp_sentence.split())):
|
||
|
word = temp_sentence.split()[i]
|
||
|
T0 = t00.split()[i]
|
||
|
T1 = t11.split()[i]
|
||
|
T2 = t22.split()[i]
|
||
|
outputs = [T0, T1, T2]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
Out.append(out)
|
||
|
trans_sent_wo_punct = " ".join(Out)
|
||
|
transliterated_sentence = final_transliterated_sentence(
|
||
|
sentence, trans_sent_wo_punct
|
||
|
)
|
||
|
transliterated_text.append(transliterated_sentence)
|
||
|
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
# -> Function to transliterate from malayalam to telugu
|
||
|
def dial_comparison_transliteration_ml_to_te(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
sources_name = {"0": "indic_trans",
|
||
|
"1": "libindic", "2": "indic_trans_IAST"}
|
||
|
etc_punctuation = ["", " . . .", " . .", " . . ”"]
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="en")
|
||
|
priority_list = ["indic_trans", "libindic", "indic_trans_IAST"]
|
||
|
transliterated_text = []
|
||
|
for sentence in sentences:
|
||
|
if sentence in etc_punctuation:
|
||
|
continue
|
||
|
temp_sentence = punct_remover(sentence)
|
||
|
t00 = azure_transliteration(
|
||
|
temp_sentence, source_lang, source_script, dest_script
|
||
|
)
|
||
|
# t00 = indic_trans(temp_sentence, source_script, dest_script)
|
||
|
t11 = libindic(temp_sentence, dest_script)
|
||
|
t22 = indic_transliteration_MALAYALAM_OTHER(temp_sentence, dest_script)
|
||
|
Out = []
|
||
|
for i in range(len(temp_sentence.split())):
|
||
|
word = temp_sentence.split()[i]
|
||
|
T0 = t00.split()[i]
|
||
|
T1 = t11.split()[i]
|
||
|
T2 = t22.split()[i]
|
||
|
outputs = [T0, T1, T2]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
Out.append(out)
|
||
|
trans_sent_wo_punct = " ".join(T0)
|
||
|
transliterated_sentence = final_transliterated_sentence(
|
||
|
sentence, trans_sent_wo_punct
|
||
|
)
|
||
|
transliterated_text.append(transliterated_sentence)
|
||
|
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
# -> Function to transliterate from gujarati and oriya to gurmukhi
|
||
|
def dial_comparison_transliteration_guj_or_to_gur(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
sources_name = {"0": "indic_trans",
|
||
|
"1": "libindic", "2": "indic_trans_IAST"}
|
||
|
etc_punctuation = ["", " . . .", " . .", " . . ”"]
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="en")
|
||
|
priority_list = ["indic_trans", "libindic", "indic_trans_IAST"]
|
||
|
transliterated_text = []
|
||
|
|
||
|
for sentence in sentences:
|
||
|
if sentence in etc_punctuation:
|
||
|
continue
|
||
|
temp_sentence = punct_remover(sentence)
|
||
|
t00 = indic_trans(temp_sentence, source_script, dest_script)
|
||
|
t11 = libindic(temp_sentence, dest_script)
|
||
|
t22 = indic_transliteration_OTHER_GURMUKHI(
|
||
|
temp_sentence, source_script)
|
||
|
Out = []
|
||
|
for i in range(len(temp_sentence.split())):
|
||
|
word = temp_sentence.split()[i]
|
||
|
T0 = t00.split()[i]
|
||
|
T1 = t11.split()[i]
|
||
|
T2 = t22.split()[i]
|
||
|
outputs = [T0, T1, T2]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
Out.append(out)
|
||
|
trans_sent_wo_punct = " ".join(Out)
|
||
|
transliterated_sentence = final_transliterated_sentence(
|
||
|
sentence, trans_sent_wo_punct
|
||
|
)
|
||
|
transliterated_text.append(transliterated_sentence)
|
||
|
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
# -> Function to transliterate from gurmukhi and oriya to gujarati
|
||
|
def dial_comparison_transliteration_gur_or_to_guj(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
sources_name = {"0": "indic_trans",
|
||
|
"1": "libindic", "2": "indic_trans_IAST"}
|
||
|
etc_punctuation = ["", " . . .", " . .", " . . ”"]
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="en")
|
||
|
priority_list = ["indic_trans", "libindic", "indic_trans_IAST"]
|
||
|
transliterated_text = []
|
||
|
|
||
|
for sentence in sentences:
|
||
|
if sentence in etc_punctuation:
|
||
|
continue
|
||
|
temp_sentence = punct_remover(sentence)
|
||
|
t00 = indic_trans(temp_sentence, source_script, dest_script)
|
||
|
t11 = libindic(temp_sentence, dest_script)
|
||
|
t22 = indic_transliteration_OTHER_GUJARATI(
|
||
|
temp_sentence, source_script)
|
||
|
Out = []
|
||
|
for i in range(len(temp_sentence.split())):
|
||
|
word = temp_sentence.split()[i]
|
||
|
T0 = t00.split()[i]
|
||
|
T1 = t11.split()[i]
|
||
|
T2 = t22.split()[i]
|
||
|
outputs = [T0, T1, T2]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
Out.append(out)
|
||
|
trans_sent_wo_punct = " ".join(Out)
|
||
|
transliterated_sentence = final_transliterated_sentence(
|
||
|
sentence, trans_sent_wo_punct
|
||
|
)
|
||
|
transliterated_text.append(transliterated_sentence)
|
||
|
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
# -> Function to transliterate from gujarati and gurmukhi to oriya
|
||
|
def dial_comparison_transliteration_guj_gur_to_or(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
sources_name = {"0": "indic_trans",
|
||
|
"1": "libindic", "2": "indic_trans_IAST"}
|
||
|
etc_punctuation = ["", " . . .", " . .", " . . ”"]
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="en")
|
||
|
priority_list = ["indic_trans", "libindic", "indic_trans_IAST"]
|
||
|
transliterated_text = []
|
||
|
|
||
|
for sentence in sentences:
|
||
|
if sentence in etc_punctuation:
|
||
|
continue
|
||
|
temp_sentence = punct_remover(sentence)
|
||
|
t00 = indic_trans(temp_sentence, source_script, dest_script)
|
||
|
t11 = libindic(temp_sentence, dest_script)
|
||
|
t22 = indic_transliteration_OTHER_ORIYA(temp_sentence, source_script)
|
||
|
Out = []
|
||
|
for i in range(len(temp_sentence.split())):
|
||
|
word = temp_sentence.split()[i]
|
||
|
T0 = t00.split()[i]
|
||
|
T1 = t11.split()[i]
|
||
|
T2 = t22.split()[i]
|
||
|
outputs = [T0, T1, T2]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
Out.append(out)
|
||
|
trans_sent_wo_punct = " ".join(Out)
|
||
|
transliterated_sentence = final_transliterated_sentence(
|
||
|
sentence, trans_sent_wo_punct
|
||
|
)
|
||
|
transliterated_text.append(transliterated_sentence)
|
||
|
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
# -> Function to transliterate from latin to arabic
|
||
|
def dial_comparison_transliteration_latin_arabic(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
sources_name = {"0": "Azure", "1": "transString"}
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="en")
|
||
|
priority_list = ["Azure", "transString"]
|
||
|
source_lang = "ar"
|
||
|
transliterated_text = []
|
||
|
|
||
|
for sentence in sentences:
|
||
|
if (
|
||
|
sentence == ""
|
||
|
or sentence == " . . ."
|
||
|
or sentence == " . ."
|
||
|
or sentence == " . . ”"
|
||
|
):
|
||
|
continue
|
||
|
OUT = []
|
||
|
for word in sentence.split():
|
||
|
if word == ".":
|
||
|
continue
|
||
|
t0 = azure_transliteration(
|
||
|
word, source_lang, source_script, dest_script)
|
||
|
t1 = transString(word, 1)
|
||
|
# t2 = polyglot_trans(word, source_script, dest_script)
|
||
|
outputs = [t0, t1]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
OUT.append(out)
|
||
|
transliterated_text.append(" ".join(OUT))
|
||
|
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
# -> Function to transliterate from chinese to latin
|
||
|
def dial_comparison_transliteration_chinese_latin(
|
||
|
text, source_lang, source_script, dest_script
|
||
|
):
|
||
|
sources_name = {"0": "Azure", "1": "pinyin"}
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang="en")
|
||
|
priority_list = ["Azure", "pinyin"]
|
||
|
transliterated_text = []
|
||
|
|
||
|
for sentence in sentences:
|
||
|
if (
|
||
|
sentence == ""
|
||
|
or sentence == " . . ."
|
||
|
or sentence == " . ."
|
||
|
or sentence == " . . ”"
|
||
|
):
|
||
|
continue
|
||
|
OUT = []
|
||
|
for word in sentence.split():
|
||
|
if word == ".":
|
||
|
continue
|
||
|
t0 = azure_transliteration(
|
||
|
word, source_lang, source_script, dest_script)
|
||
|
t1 = translit_CHINESE_LATIN(word)
|
||
|
# t2 = polyglot_trans(word, source_script, dest_script)
|
||
|
outputs = [t0, t1]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list
|
||
|
)
|
||
|
OUT.append(out)
|
||
|
transliterated_text.append(" ".join(OUT))
|
||
|
|
||
|
return unidecode.unidecode(" ".join(transliterated_text))
|
||
|
|
||
|
|
||
|
# -> Function to transliterate from thai, sinhala, mongolian and Hebrew to latin
|
||
|
def dial_comparison_transliteration_th_sin_mng_heb_latin(text, source_lang, source_script, dest_script):
|
||
|
sources_name = {'0': 'Azure', '1': 'anyascii'}
|
||
|
sentences = sentence_tokenize.sentence_split(text, lang='en')
|
||
|
priority_list = ['Azure', 'anyascii']
|
||
|
if source_lang == "iw":
|
||
|
source_lang = "he"
|
||
|
transliterated_text = []
|
||
|
|
||
|
for sentence in sentences:
|
||
|
if sentence == "" or sentence == " . . ." or sentence == " . ." or sentence == " . . ”":
|
||
|
continue
|
||
|
OUT = []
|
||
|
for word in sentence.split():
|
||
|
if word == ".":
|
||
|
continue
|
||
|
t0 = azure_transliteration(
|
||
|
word, source_lang, source_script, dest_script)
|
||
|
t1 = translit_th_sin_mng_heb_to_latin(word)
|
||
|
outputs = [t0, t1]
|
||
|
out = compare_outputs_transliteration(
|
||
|
word, outputs, sources_name, priority_list)
|
||
|
OUT.append(out)
|
||
|
transliterated_text.append(" ".join(OUT))
|
||
|
|
||
|
return " ".join(transliterated_text)
|
||
|
|
||
|
|
||
|
def compare_outputs_transliteration(word, outputs, sources_name, priority_list):
|
||
|
# print(outputs)
|
||
|
# doc2 = docx.Document()
|
||
|
# sections = doc2.sections
|
||
|
# for section in sections:
|
||
|
# section.top_margin = Inches(0.2)
|
||
|
# section.bottom_margin = Inches(0.2)
|
||
|
# section.left_margin = Inches(0.2)
|
||
|
# section.right_margin = Inches(0.2)
|
||
|
# section = doc2.sections[-1]
|
||
|
# new_height = section.page_width
|
||
|
# section.page_width = section.page_height
|
||
|
# section.page_height = new_height
|
||
|
# name = 'Final table ' + doc_file
|
||
|
# doc2.add_heading(name, 0)
|
||
|
# doc_para = doc2.add_paragraph()
|
||
|
# doc_para.add_run('Translation resources used : Google, IBM watson, AWS, Azure, Lingvanex, Yandex').bold = True
|
||
|
# table2 = doc2.add_table(rows=1, cols=4)
|
||
|
# table2.style = 'TableGrid'
|
||
|
# hdr_Cells = table2.rows[0].cells
|
||
|
# hdr_Cells[0].paragraphs[0].add_run("Input").bold = True
|
||
|
# hdr_Cells[1].paragraphs[0].add_run("Output1").bold = True
|
||
|
# hdr_Cells[2].paragraphs[0].add_run("Output2").bold = True
|
||
|
# hdr_Cells[3].paragraphs[0].add_run("Output3").bold = True
|
||
|
O1ANDS1, O2ANDS2 = selection_source_transliteration(
|
||
|
sources_name, outputs, priority_list
|
||
|
)
|
||
|
print(O1ANDS1, "compare all transliterations")
|
||
|
# add_dial_comparison_doc2_transliteration(doc2, table2, word, O1ANDS1, O2ANDS2, sources_name)
|
||
|
return O1ANDS1[0]
|
||
|
|
||
|
|
||
|
def add_dial_comparison_doc2_transliteration(
|
||
|
doc2, table2, word, O1ANDS1, O2ANDS2, sources_name
|
||
|
):
|
||
|
row_Cells = table2.add_row().cells
|
||
|
row_Cells[0].text = word
|
||
|
row_Cells[1].text = O1ANDS1[0]
|
||
|
row_Cells[1].paragraphs[0].add_run("(Source : " + str(O1ANDS1[1]) + ")")
|
||
|
row_Cells[2].text = O2ANDS2[0]
|
||
|
row_Cells[2].paragraphs[0].add_run("(Source : " + str(O2ANDS2[1]) + ")")
|
||
|
|
||
|
|
||
|
# -> Housing all the Script Pair Combinations for Transliterations
|
||
|
def transliterate(dest_script, src_script, src_lang, text):
|
||
|
print("transliterate",dest_script, src_script, src_lang, text)
|
||
|
# if src_script == "Common" or dest_script == "Common" or src_script == "None" or dest_script == "None" or src_script == dest_script:
|
||
|
# return
|
||
|
trans_text = text
|
||
|
if dest_script == "Latin" and src_script == "Devanagari":
|
||
|
# trans_text = dial_comparison_transliteration_dev_rom_ph1(text, src_lang, src_script,dest_script)
|
||
|
trans_text = dial_comparison_transliteration_dev_rom_ph1_sentence_wise(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Devanagari" and src_script == "Latin":
|
||
|
trans_text = dial_comparison_transliteration_rom_dev_ph1(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
# trans_text=dial_comparison_transliteration_rom_dev_ph1_sentence_wise(text, src_lang, src_script,dest_script)
|
||
|
elif dest_script == "Latin" and src_script == "Arabic":
|
||
|
trans_text = dial_comparison_transliteration_arbic_to_rom_ph1(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Latin" and src_script == "Kannada":
|
||
|
trans_text = dial_comparison_transliteration_kann_to_rom_ph1(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Latin" and src_script == "Tamil":
|
||
|
trans_text = dial_comparison_transliteration_tamil_to_rom_ph1(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Latin" and src_script == "Bengali":
|
||
|
trans_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Latin" and src_script == "Telugu":
|
||
|
trans_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Latin" and src_script == "Malayalam":
|
||
|
trans_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Gurmukhi" and src_script == "Latin":
|
||
|
trans_text = dial_comparison_transliteration_latin_gurmukhi(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Cyrillic" and src_script == "Latin":
|
||
|
trans_text = dial_comparison_transliteration_latin_cyrillic(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Telugu" and src_script == "Latin":
|
||
|
trans_text = dial_comparison_transliteration_latin_telugu_sentence_wise(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Latin" and src_script == "Gurmukhi":
|
||
|
trans_text = dial_comparison_transliteration_gurmukhi_latin_sentence_wise(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Latin" and src_script == "Cyrillic":
|
||
|
trans_text = dial_comparison_transliteration_cyrilic_latin_sentence_wise(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Latin" and src_script == "Gujarati":
|
||
|
trans_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Latin" and src_script == "Oriya":
|
||
|
trans_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Gujarati" and src_script == "Latin":
|
||
|
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Oriya" and src_script == "Latin":
|
||
|
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Tamil" and src_script == "Latin":
|
||
|
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Malayalam" and src_script == "Latin":
|
||
|
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Bengali" and src_script == "Latin":
|
||
|
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Devanagari" and src_script == "Oriya":
|
||
|
trans_text = dial_comparison_transliteration_or_ml_gu_te_devanagari(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Devanagari" and src_script == "Gujarati":
|
||
|
trans_text = dial_comparison_transliteration_or_ml_gu_te_devanagari(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Devanagari" and src_script == "Malayalam":
|
||
|
trans_text = dial_comparison_transliteration_or_ml_gu_te_devanagari(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Devanagari" and src_script == "Telugu":
|
||
|
trans_text = dial_comparison_transliteration_or_ml_gu_te_devanagari(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Oriya" and src_script == "Devanagari":
|
||
|
trans_text = dial_comparison_transliteration_devanagari_or_ml_gu_te(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Gujarati" and src_script == "Devanagari":
|
||
|
trans_text = dial_comparison_transliteration_devanagari_or_ml_gu_te(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Malayalam" and src_script == "Devanagari":
|
||
|
trans_text = dial_comparison_transliteration_devanagari_or_ml_gu_te(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Telugu" and src_script == "Devanagari":
|
||
|
trans_text = dial_comparison_transliteration_devanagari_or_ml_gu_te(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Devanagari" and src_script == "Bengali":
|
||
|
trans_text = dial_comparison_transliteration_or_ml_gu_te_devanagari(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Devanagari" and src_script == "Gurmukhi":
|
||
|
trans_text = dial_comparison_transliteration_or_ml_gu_te_devanagari(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Devanagari" and src_script == "Kannada":
|
||
|
trans_text = dial_comparison_transliteration_or_ml_gu_te_devanagari(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Bengali" and src_script == "Devanagari":
|
||
|
trans_text = dial_comparison_transliteration_devanagari_or_ml_gu_te(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Gurmukhi" and src_script == "Devanagari":
|
||
|
trans_text = dial_comparison_transliteration_devanagari_or_ml_gu_te(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Kannada" and src_script == "Devanagari":
|
||
|
trans_text = dial_comparison_transliteration_devanagari_or_ml_gu_te(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Tamil" and src_script == "Kannada":
|
||
|
trans_text = dial_comparison_transliteration_kannada_ml_ta_te_ben(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Malayalam" and src_script == "Kannada":
|
||
|
trans_text = dial_comparison_transliteration_kannada_ml_ta_te_ben(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Telugu" and src_script == "Kannada":
|
||
|
trans_text = dial_comparison_transliteration_kannada_ml_ta_te_ben(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Kannada" and src_script == "Tamil":
|
||
|
trans_text = dial_comparison_transliteration_ml_ta_te_ben_kannada(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Kannada" and src_script == "Malayalam":
|
||
|
trans_text = dial_comparison_transliteration_ml_ta_te_ben_kannada(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Kannada" and src_script == "Telugu":
|
||
|
trans_text = dial_comparison_transliteration_ml_ta_te_ben_kannada(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Kannada" and src_script == "Latin":
|
||
|
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Tamil" and src_script == "Devanagari":
|
||
|
trans_text = dial_comparison_transliteration_devanagari_or_ml_gu_te(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Devanagari" and src_script == "Tamil":
|
||
|
trans_text = dial_comparison_transliteration_or_ml_gu_te_devanagari(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Telugu" and src_script == "Tamil":
|
||
|
trans_text = dial_comparison_transliteration_tamil_other(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Malayalam" and src_script == "Tamil":
|
||
|
trans_text = dial_comparison_transliteration_tamil_other(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Tamil" and src_script == "Malayalam":
|
||
|
trans_text = dial_comparison_transliteration_other_tamil(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Tamil" and src_script == "Telugu":
|
||
|
trans_text = dial_comparison_transliteration_other_tamil(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Malayalam" and src_script == "Telugu":
|
||
|
trans_text = dial_comparison_transliteration_te_to_ml(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Telugu" and src_script == "Malayalam":
|
||
|
trans_text = dial_comparison_transliteration_ml_to_te(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Gurmukhi" and src_script == "Gujarati":
|
||
|
trans_text = dial_comparison_transliteration_guj_or_to_gur(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Gujarati" and src_script == "Gurmukhi":
|
||
|
trans_text = dial_comparison_transliteration_gur_or_to_guj(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Gujarati" and src_script == "Oriya":
|
||
|
trans_text = dial_comparison_transliteration_gur_or_to_guj(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Gurmukhi" and src_script == "Oriya":
|
||
|
trans_text = dial_comparison_transliteration_guj_or_to_gur(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Oriya" and src_script == "Gujarati":
|
||
|
trans_text = dial_comparison_transliteration_guj_gur_to_or(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Oriya" and src_script == "Gurmukhi":
|
||
|
trans_text = dial_comparison_transliteration_guj_gur_to_or(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Bengali" and src_script == "Kannada":
|
||
|
trans_text = dial_comparison_transliteration_kannada_ml_ta_te_ben(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Kannada" and src_script == "Bengali":
|
||
|
trans_text = dial_comparison_transliteration_ml_ta_te_ben_kannada(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Devanagari" and src_script == "Arabic":
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_arbic_to_rom_ph1(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_rom_dev_ph1(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Gurmukhi" and src_script == "Arabic":
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_arbic_to_rom_ph1(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_gurmukhi(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Gujarati" and src_script == "Arabic":
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_arbic_to_rom_ph1(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Cyrillic" and src_script == "Arabic":
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_arbic_to_rom_ph1(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_cyrillic(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Arabic" and src_script == "Latin":
|
||
|
trans_text = dial_comparison_transliteration_latin_arabic(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Cyrillic" and src_script == "Devanagari":
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_dev_rom_ph1_sentence_wise(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_cyrillic(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Kannada" and src_script == "Arabic":
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_arbic_to_rom_ph1(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Tamil" and src_script == "Arabic":
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_arbic_to_rom_ph1(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Telugu" and src_script == "Arabic":
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_arbic_to_rom_ph1(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_telugu_sentence_wise(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Malayalam" and src_script == "Arabic":
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_arbic_to_rom_ph1(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Bengali" and src_script == "Arabic":
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_arbic_to_rom_ph1(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Oriya" and src_script == "Arabic":
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_arbic_to_rom_ph1(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Cyrillic" and src_script == "Kannada":
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_kann_to_rom_ph1(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_cyrillic(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Gujarati" and src_script == "Kannada":
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_kann_to_rom_ph1(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Gurmukhi" and src_script == "Kannada":
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_kann_to_rom_ph1(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_gurmukhi(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Oriya" and src_script == "Kannada":
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_kann_to_rom_ph1(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Cyrillic" and src_script == "Tamil":
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_tamil_to_rom_ph1(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_cyrillic(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Tamil" and src_script == "Cyrillic":
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_cyrilic_latin_sentence_wise(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Tamil" and src_script == "Bengali":
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Telugu" and src_script == "Bengali":
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_telugu_sentence_wise(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Malayalam" and src_script == "Bengali":
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Arabic" and src_script == "Devanagari":
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_dev_rom_ph1_sentence_wise(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_arabic(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Arabic" and src_script == "Cyrillic":
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_cyrilic_latin_sentence_wise(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_arabic(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Arabic" and src_script == "Gurmukhi":
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_gurmukhi_latin_sentence_wise(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_arabic(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Arabic" and src_script == "Gujarati":
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_beng_tel_mal_to_rom_ph1(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_arabic(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Latin" and src_script == "Hanji":
|
||
|
if src_lang == "zh-CN":
|
||
|
src_lang = "zh-Hans"
|
||
|
trans_text = dial_comparison_transliteration_chinese_latin(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Devanagari" and src_script == "Hanji":
|
||
|
if src_lang == "zh-CN":
|
||
|
src_lang = "zh-Hans"
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_chinese_latin(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_rom_dev_ph1(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Arabic" and src_script == "Hanji":
|
||
|
if src_lang == "zh-CN":
|
||
|
src_lang = "zh-Hans"
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_chinese_latin(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_arabic(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Gurmukhi" and src_script == "Hanji":
|
||
|
if src_lang == "zh-CN":
|
||
|
src_lang = "zh-Hans"
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_chinese_latin(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_gurmukhi(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Gujarati" and src_script == "Hanji":
|
||
|
if src_lang == "zh-CN":
|
||
|
src_lang = "zh-Hans"
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_chinese_latin(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Oriya" and src_script == "Hanji":
|
||
|
if src_lang == "zh-CN":
|
||
|
src_lang = "zh-Hans"
|
||
|
temp_dest_script = "Latin"
|
||
|
temp_text = dial_comparison_transliteration_chinese_latin(
|
||
|
text, src_lang, src_script, temp_dest_script
|
||
|
)
|
||
|
trans_text = dial_comparison_transliteration_latin_to_gu_or_ml_ta_bn(
|
||
|
temp_text, src_lang, temp_dest_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Latin" and src_script == "Thai":
|
||
|
trans_text = dial_comparison_transliteration_th_sin_mng_heb_latin(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Latin" and src_script == "Sinhala":
|
||
|
trans_text = dial_comparison_transliteration_th_sin_mng_heb_latin(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Latin" and src_script == "Hebrew":
|
||
|
trans_text = dial_comparison_transliteration_th_sin_mng_heb_latin(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
elif dest_script == "Latin" and src_script == "Mongolian":
|
||
|
src_lang = "mn-Cyrl"
|
||
|
trans_text = dial_comparison_transliteration_th_sin_mng_heb_latin(
|
||
|
text, src_lang, src_script, dest_script
|
||
|
)
|
||
|
return trans_text
|
||
|
|
||
|
|
||
|
# -> Main Transliteration Function to co-ordingate all the functions
|
||
|
def makeTransliteration_only(**kwargs):
|
||
|
|
||
|
# Seting the Variables Required for Transliteration
|
||
|
dial_dest_script = kwargs.get("dial_dest_script")
|
||
|
dual_dial_script = kwargs.get("dual_dial_script")
|
||
|
original_file = kwargs.get("original_file")
|
||
|
dial_dest_lang = kwargs.get("dial_dest_lang")
|
||
|
is_dialogue_transliteration_required = kwargs.get(
|
||
|
"is_dialogue_transliteration_required"
|
||
|
)
|
||
|
is_action_line_transliteration_required = kwargs.get(
|
||
|
"is_action_line_transliteration_required"
|
||
|
)
|
||
|
action_line_dest_script = kwargs.get("action_line_dest_script")
|
||
|
action_line_src_lang = kwargs.get("action_line_src_lang")
|
||
|
action_line_src_script = kwargs.get("action_line_src_script")
|
||
|
scenes_original = kwargs.get("scenes_original")
|
||
|
restrict_to_five = kwargs.get("restrict_to_five")
|
||
|
filename2 = original_file
|
||
|
|
||
|
# -> Checking if Transliteration is really Required or not
|
||
|
if (
|
||
|
is_dialogue_transliteration_required == False
|
||
|
and is_action_line_transliteration_required == False
|
||
|
and dual_dial_script == "No"
|
||
|
):
|
||
|
return original_file, scenes_original
|
||
|
|
||
|
# create an instance of a word document
|
||
|
doc = docx.Document()
|
||
|
x = datetime.datetime.now(timezone("UTC")).astimezone(
|
||
|
timezone("Asia/Kolkata"))
|
||
|
if kwargs.get('ignore_because_sample_script') == True:
|
||
|
doc_file = filename2
|
||
|
else:
|
||
|
doc_file = (
|
||
|
basePath
|
||
|
+ "/media/scripts/translated/"
|
||
|
+ "trans_"
|
||
|
+ str(dial_dest_lang)
|
||
|
+ "_"
|
||
|
+ str(x.strftime("%d"))
|
||
|
+ "_"
|
||
|
+ str(x.strftime("%b"))
|
||
|
+ "_"
|
||
|
+ str(x.strftime("%H"))
|
||
|
+ str(x.strftime("%I"))
|
||
|
+ "_"
|
||
|
+ "trans"
|
||
|
+ "_of_"
|
||
|
+ ntpath.basename(filename2)
|
||
|
)
|
||
|
|
||
|
# -> Getting All the scenes form the Script File with updated actionlines from whichever previously concluded steps
|
||
|
refined, total_scenes = getRefined(filename2)
|
||
|
sluglines, without_slug = getSlugAndNonSlug(refined)
|
||
|
characters = getSpeakers(without_slug)
|
||
|
scenes1, actionline, parenthetical_lis, speakers, dialogues = getScenes(
|
||
|
refined, total_scenes, characters
|
||
|
)
|
||
|
|
||
|
# -> Restricitng Number of scenes to five if user only wants sample of script
|
||
|
if restrict_to_five == "yes":
|
||
|
scenes1 = scenes1[:5]
|
||
|
|
||
|
# -> This forloop detects actionline source language, dialogue source language and dialogue source script
|
||
|
# to avoid the load for detection of language in each and every line in next code(for-loop)
|
||
|
for scene in tqdm(scenes1):
|
||
|
x = "False"
|
||
|
y = "False"
|
||
|
for i, line in enumerate(scene):
|
||
|
if i == 0:
|
||
|
continue
|
||
|
if isinstance(line, str):
|
||
|
x = "True"
|
||
|
non_dial_src_lang = language_detector(line)
|
||
|
else:
|
||
|
[speaker] = line.keys()
|
||
|
if speaker == "Transition":
|
||
|
continue
|
||
|
if line[speaker][0] != "NONE":
|
||
|
continue
|
||
|
if line[speaker][2] == "":
|
||
|
continue
|
||
|
y = "True"
|
||
|
dial_src_lang = language_detector(line[speaker][2])
|
||
|
dial_src_script = script_det(line[speaker][2])
|
||
|
|
||
|
if x == "True" and y == "True":
|
||
|
break
|
||
|
|
||
|
scenes_current = scenes1
|
||
|
|
||
|
if scenes_original:
|
||
|
scenes1 = zip(scenes1, scenes_original)
|
||
|
else:
|
||
|
scenes1 = zip(scenes1, scenes1)
|
||
|
|
||
|
# -> Transliterating The Text Begins here
|
||
|
for scene, scene_original in tqdm(scenes1):
|
||
|
for i, (line, line_original) in enumerate(zip(scene, scene_original)):
|
||
|
if i == 0:
|
||
|
addSlugLine(doc, line)
|
||
|
continue
|
||
|
if isinstance(line, str):
|
||
|
print("transliterating action lines ",action_line_dest_script,
|
||
|
action_line_src_script,
|
||
|
action_line_src_lang,
|
||
|
line)
|
||
|
if is_action_line_transliteration_required:
|
||
|
trans_text = transliterate(
|
||
|
action_line_dest_script,
|
||
|
action_line_src_script,
|
||
|
action_line_src_lang,
|
||
|
line,
|
||
|
)
|
||
|
else:
|
||
|
trans_text = line
|
||
|
addActionLine(doc, trans_text, non_dial_src_lang)
|
||
|
else:
|
||
|
print("In dialogue")
|
||
|
[speaker] = line.keys()
|
||
|
if speaker == "Transition":
|
||
|
# if want to translate transition also along with action line use addTransition
|
||
|
# (doc,translator.translate(speaker,dest = gtrans_dict[actionline_dest_lang]).text)
|
||
|
addTransition(doc, line[speaker])
|
||
|
continue
|
||
|
addSpeaker(doc, speaker)
|
||
|
if line[speaker][0] != "NONE": # In parenthitical part
|
||
|
addParenthetical(doc, line[speaker][0])
|
||
|
|
||
|
print("dialogue to be transliterated ", line[speaker][2])
|
||
|
if line[speaker][2] == "":
|
||
|
continue
|
||
|
trans_text = line[speaker][2]
|
||
|
if is_dialogue_transliteration_required:
|
||
|
if dial_dest_script == dial_src_script:
|
||
|
trans_text = trans_text
|
||
|
else:
|
||
|
trans_text = transliterate(
|
||
|
dial_dest_script, dial_src_script, dial_src_lang, trans_text
|
||
|
)
|
||
|
if dual_dial_script == "Yes":
|
||
|
dual_script(
|
||
|
doc, line_original[speaker][2], trans_text, dial_src_lang
|
||
|
)
|
||
|
else:
|
||
|
addDialogue(doc, trans_text, dial_src_lang)
|
||
|
|
||
|
# Saving the Docfile
|
||
|
doc.save(doc_file)
|
||
|
print("done file is saved")
|
||
|
return doc_file, scenes_current
|