Conversion_Kitchen_Code/kitchen_counter/conversion/translation/fourstepconversion.py

563 lines
28 KiB
Python
Raw Permalink Normal View History

2024-04-27 09:33:09 +00:00
import fitz
from datetime import datetime
import PyPDF2
import os
import subprocess
from subprocess import Popen
# from narration.vectorcode.code.functions import ScriptBreakdown
import copy
from .detection import language_detector
from conversion.translation.script_reading import breaksen, getRefined, getSlugAndNonSlug, getSpeakers, getScenes
from conversion.translation.script_writing import addSlugLine, addActionLine, addSpeaker, addParenthetical, addDialogue, dual_script, addTransition, dial_checker, non_dial_checker
from conversion.translation.translation_function import translate_comparison, ui_option3_and_4, convert_to_pdf
from conversion.translation.translation_variables import get_doc_file, get_pdf_save_dir, is_in_translation_list, get_language_script_code
from conversion.translation.final_transliteration_only2 import makeTransliteration_only, add_dual_dialogue
from utils.utilities import csv_to_docx
from conversion.translation.translation_resources import google
from MNF.settings import BasePath
# from page_script.models import MNFScriptDatabase_2, Conversions, Conversion_link
from mnfapp.models import MNFScriptDatabase, ScriptTranslations
from tqdm import tqdm
import pandas as pd
import docx
basePath = BasePath()
from centralisedFileSystem.models import File, Script, ScreenPlay
from django.core.files.base import ContentFile
def translate_function(request, audited_id , dataframe, script_path, script_id, translation_id, dial_src_lang, action_line_src_lang, dial_src_script, restrict_to_five, option3, option4, option5, option6, centralised="no",sample_id="0000",sample_title="", existing_script=None):
# -> These are Global Booleans Variables
is_action_line_translation_required = False
is_action_line_transliteration_required = False
is_dialogue_translation_required = False
is_dialogue_transliteration_required = False
skip_translation_action_line = False
skip_translation_dialogue = False
# -> Extracting information from DB
if centralised == "yes":
z = Conversion_link.objects.get(
script_name=MNFScriptDatabase_2.objects.get(script_id=script_id))
objs = z.conversions.all()
for indx, obji in enumerate(objs):
if indx == len(objs) - 1:
obj = obji
else:
pass
action_line_dest_lang = obj.nondial_dest_language
dial_dest_lang = obj.dial_dest_language
dual_dial_script = obj.dual_dial_script
dial_dest_script = obj.dial_dest_script
action_line_src_script = obj.nondial_src_script
action_line_dest_script = obj.nondial_dest_script
data = [str(MNFScriptDatabase_2.objects.get(script_id=script_id).user_id), str(
MNFScriptDatabase_2.objects.get(script_id=script_id).script_name), str(script_id), str(dial_src_lang)]
else:
try:
translation_instance = ScriptTranslations.objects.get(translation_id = str(translation_id))
dial_dest_lang = translation_instance.dial_dest_language
dial_dest_script = translation_instance.dial_dest_script
action_line_dest_lang = translation_instance.nondial_dest_language
action_line_dest_script = translation_instance.nondial_dest_script
except:
pass
user_script_data = MNFScriptDatabase.objects.get(
script_id=str(script_id))
# action_line_dest_lang = user_script_data.nondial_dest_language
# dial_dest_lang = user_script_data.dial_dest_language
dual_dial_script = translation_instance.dual_dial_script
# dial_dest_script = user_script_data.dial_dest_script
action_line_src_script = user_script_data.nondial_src_script
# action_line_dest_script = user_script_data.nondial_dest_script
data = [user_script_data.dial_src_language, user_script_data.nondial_src_language, action_line_dest_lang, user_script_data.dial_src_script, translation_instance.dial_dest_script, user_script_data.nondial_src_script,translation_instance.nondial_dest_script]
# -> Creating Doc instance
doc = docx.Document()
doc_file = get_doc_file(dial_dest_lang, script_path, sample_id,sample_title, centralised, data)
original_script_path = script_path
print("Doc Name: ", doc_file)
original_doc_file_name = doc_file
print("get_language_script_code(dial_src_lang)-> ",
get_language_script_code(dial_src_lang))
print("dial_src_script->", dial_src_script)
# -> Setting of Global Variables (dial_src_script is average of scripts of all languages and same
# for action_line_src_script)
if dial_src_lang != dial_dest_lang and dial_src_script != get_language_script_code(dial_src_lang):
is_dialogue_transliteration_required = True
if action_line_src_lang != action_line_dest_lang and action_line_src_script != get_language_script_code(
action_line_src_lang):
is_action_line_transliteration_required = True
if action_line_src_lang != action_line_dest_lang:
is_action_line_translation_required = True
else:
skip_translation_action_line = True
if dial_src_lang != dial_dest_lang:
is_dialogue_translation_required = True
else:
skip_translation_dialogue = True
# -> This Transliteration will transliterate only when the script of the dialogues or actionlines is different from
# the default script of (language of dialogues) (kindof removing transliteration for the translation to happen
# smoothly if script of dialogues is different from its original default script of dialogues)
print("Actionlines Data ->", action_line_src_lang, action_line_dest_lang, action_line_src_script, action_line_dest_script)
print("Dialogues Data ->", dial_src_lang, dial_dest_lang, dial_src_script, dial_dest_script)
if restrict_to_five == "yes":
breakpoint_5scenes = len(dataframe)
scene = 0
for idx, line in enumerate(dataframe):
if line[3] == "slugline":
scene += 1
if scene > 5:
breakpoint_5scenes = idx
break
converted_df = copy.deepcopy(dataframe[:breakpoint_5scenes])
else:
converted_df = copy.deepcopy(dataframe)
# Saving Audited File
audited_df = pd.DataFrame(dataframe,
columns=['dataframe', 'para_no', 'content', 'script_element'])
audited_file = csv_to_docx(audited_df)
audited_file.save(f"{(doc_file.split('.'))[0]}_audited.docx")
for idx, line in enumerate(converted_df):
"""Step One"""
if is_dialogue_transliteration_required or is_action_line_transliteration_required:
print("Step One Started")
if line[3] in ["action", "dialogue"]:
# kwargs = {
# "dial_dest_script": get_language_script_code(dial_src_lang),
# "dual_dial_script": "No",
# "original_lines": dataframe,
# "dial_dest_lang": dial_src_lang,
# "is_dialogue_transliteration_required": is_dialogue_transliteration_required,
# "is_action_line_transliteration_required": is_action_line_transliteration_required,
# "action_line_dest_script": get_language_script_code(action_line_src_lang),
# "action_line_src_lang": action_line_src_lang,
# "action_line_src_script": action_line_src_script,
# "restrict_to_five": restrict_to_five
# }
if line[3] == "action" and is_action_line_transliteration_required:
kwargs = {
"dest_script": get_language_script_code(action_line_src_lang),
"dual_dial_script": "No",
"line": str(line[2]),
"lang": action_line_src_lang,
"src_script": action_line_src_script,
}
print("is_action_line_transliteration_required")
print("action_line_src_lang:",action_line_src_lang)
print("lang:",action_line_src_lang)
print("action_line_src_script",action_line_src_script)
converted_df[idx][2] = makeTransliteration_only(**kwargs)
elif line[3] == "dialogue" and is_dialogue_transliteration_required:
kwargs = {
"dest_script": get_language_script_code(dial_src_lang),
"dual_dial_script": "No",
"line": str(line[2]),
"lang": dial_src_lang,
"src_script": dial_src_script,
}
print("is_dialogue_transliteration_required")
print("dial_src_lang:",dial_src_lang)
print("lang:",dial_src_lang)
print("dial_src_script",dial_src_script)
converted_df[idx][2] = makeTransliteration_only(**kwargs)
"""Step Two"""
if is_dialogue_translation_required or is_action_line_translation_required:
print("Step Two Started")
if line[3] in ["action", "dialogue", "parenthetical"]:
if line[3] == "action" and is_action_line_translation_required:
if is_in_translation_list(action_line_src_lang) and is_in_translation_list(action_line_dest_lang):
converted_df[idx][2] = translate_comparison(
str(line[2]), action_line_src_lang, action_line_dest_lang)
else:
converted_df[idx][2] = str(line[2])
elif line[3] == "dialogue" and is_dialogue_translation_required:
dial_lang = language_detector(str(line[2]))
if dial_lang == action_line_src_lang:
converted_df[idx][2] = ui_option3_and_4(dial_lang, dial_dest_lang, str(line[2]), option3, "No")
elif dial_lang != action_line_src_lang and dial_lang != dial_dest_lang:
print("in case ui4")
converted_df[idx][2] = ui_option3_and_4(dial_lang, dial_dest_lang, str(line[2]), option4, "No")
elif line[3] == "parenthetical" and is_dialogue_translation_required:
par_lang = language_detector(str(line[2]))
if dial_dest_lang == par_lang:
converted_df[idx][2] = str(line[2])
else:
converted_df[idx][2] = translate_comparison(str(line[2]), par_lang, dial_dest_lang)
"""Step Three"""
action_line_src_script2 = get_language_script_code(action_line_dest_lang)
dial_src_script2 = get_language_script_code(dial_dest_lang)
if dial_src_script2 != dial_dest_script or action_line_src_script2 != action_line_dest_script:
print("Step Three Started")
if line[3] in ["action", "dialogue"]:
if line[3] == "action" and action_line_src_script2 != action_line_dest_script:
kwargs = {
"dest_script": action_line_dest_script,
"dual_dial_script": "No",
"line": str(line[2]),
"lang": action_line_dest_lang,
"src_script": action_line_src_script2,
}
converted_df[idx][2] = makeTransliteration_only(**kwargs)
elif line[3] == "dialogue" and dial_src_script2 != dial_dest_script:
kwargs = {
"dest_script": dial_dest_script,
"dual_dial_script": "No",
"line": str(line[2]),
"lang": dial_dest_lang,
"src_script": dial_src_script2,
}
converted_df[idx][2] = makeTransliteration_only(**kwargs)
elif skip_translation_action_line or skip_translation_dialogue and (
dial_src_script != dial_dest_script or action_line_src_script != action_line_dest_script):
if skip_translation_action_line and dial_src_script != dial_dest_script:
if line[3] == "dialogue":
kwargs = {
"dest_script": dial_dest_script,
"dual_dial_script": "No",
"line": str(line[2]),
"lang": dial_dest_lang,
"src_script": dial_src_script,
}
converted_df[idx][2] = makeTransliteration_only(**kwargs)
elif skip_translation_dialogue and action_line_src_script != action_line_dest_script:
if line[3] == "action":
kwargs = {
"dest_script": action_line_dest_script,
"dual_dial_script": "No",
"line": str(line[2]),
"lang": action_line_dest_lang,
"src_script": action_line_src_script,
}
converted_df[idx][2] = makeTransliteration_only(**kwargs)
print("Original Dataframe is->", dataframe)
print("Converted Dataframe is->", converted_df)
print("Dual dialogue option", dual_dial_script)
"""Step Four"""
# if dual_dial_script == "Yes":
converted_file = add_dual_dialogue(converted_df, dataframe, action_line_dest_lang, dial_dest_lang, dual_dial_script)
# -> Step one Started
# if is_dialogue_transliteration_required or is_action_line_transliteration_required:
# print("Step one Started")
#
# kwargs = {
# "dial_dest_script": get_language_script_code(dial_src_lang),
# "dual_dial_script": "No",
# "original_file": dataframe,
# "dial_dest_lang": dial_src_lang,
# "is_dialogue_transliteration_required": is_dialogue_transliteration_required,
# "is_action_line_transliteration_required": is_action_line_transliteration_required,
# "action_line_dest_script": get_language_script_code(action_line_src_lang),
# "action_line_src_lang": action_line_src_lang,
# "action_line_src_script": action_line_src_script,
# "restrict_to_five": restrict_to_five
# }
# if len(sample_title) > 2:
# kwargs["ignore_because_sample_script"] = True
# else:
# kwargs["ignore_because_sample_script"] = False
# script_path, scenes_original = makeTransliteration_only(**kwargs)
# print("Step one ended")
# # -> Step one ended
# print(doc_file, "step 1")
# # os.system("cp "+ str(doc_file) + " " + (doc_file.split("."))[0] + "_Step1.docx") -> could be uncommented to
# # check what changes are made to script as file is getting created at this stage
#
# # -> Step two Translation Started
# if is_dialogue_translation_required or is_action_line_translation_required:
# print("Step two Translation Started")
# refined, total_scenes = getRefined(script_path)
# sluglines, without_slug = getSlugAndNonSlug(refined)
# characters = getSpeakers(without_slug)
# scenes, actionline, parenthetical_lis, speakers, dialogues = getScenes(
# refined, total_scenes, characters)
# print(scenes, "these are the scenes")
# # -> Restriction of Number of scenes to Five if user Wants a Sample of Conversion
# if restrict_to_five == 'yes':
# scenes = scenes[:5]
#
# # going over each scene and all its dialogues,actionlines, paranthetical lines, transitions for translation
# for scene in tqdm(scenes):
# for i, line in enumerate(scene):
# if i == 0:
# addSlugLine(doc, line)
# continue
# if isinstance(line, str):
# if is_action_line_translation_required:
# print("in actionline")
# if is_in_translation_list(action_line_src_lang) and is_in_translation_list(action_line_dest_lang):
# trans_text = translate_comparison(
# line, action_line_src_lang, action_line_dest_lang)
# addActionLine(doc, trans_text,
# action_line_dest_lang)
# else:
# addActionLine(doc, line, action_line_dest_lang)
# else:
# print(
# "Adding actionline without translating:else of is_action_line_translation_required")
# addActionLine(doc, line, action_line_dest_lang)
# else:
# [speaker] = line.keys()
# if speaker == 'Transition':
# addTransition(doc, line[speaker])
# continue
# addSpeaker(doc, speaker)
# if is_dialogue_translation_required:
# dial_lang = language_detector(line[speaker][2])
# print("dial_lang", dial_lang)
#
# # paranthetical line Case
# if line[speaker][0] != 'NONE':
# par_lang = language_detector(line[speaker][0])
# if dial_dest_lang == par_lang:
# out = line[speaker][0]
# else:
# out = google(
# line[speaker][0], par_lang, dial_dest_lang)
# addParenthetical(doc, out)
#
# # only dialogues
# if line[speaker][2] == "":
# continue
# if dial_lang == action_line_src_lang:
# ui_option3_and_4(doc,
# dial_lang, dial_dest_lang, line[speaker][2], option3, "No")
#
# elif dial_lang != action_line_src_lang and dial_lang != dial_dest_lang:
# print("in case ui4")
# ui_option3_and_4(
# doc, dial_lang, dial_dest_lang, line[speaker][2], option4, "No")
#
# # To be ran when option 5 and 6 are yes(i.e. some word in whole dialogue has different language)
# # elif dial_src_lang == dial_dest_lang:
# # print("in case ui5_and_6")
# # ui_option5_and_6(
# # dial_src_lang, dial_dest_lang, line[speaker][2])
#
# else:
# if line[speaker][0] != 'NONE':
# print("else mein hain")
# addParenthetical(doc, line[speaker][0])
# addDialogue(doc, line[speaker][2], dial_dest_lang)
# doc.save(doc_file)
# if is_dialogue_translation_required:
# dial_src_script = get_language_script_code(dial_dest_lang)
# if 'scenes_original' not in locals():
# scenes_original = scenes
# print("Step two translation ended")
# # -> Step two translation ended
# print(doc_file, "step 2")
# # os.system("cp "+ str(doc_file) + " " + (doc_file.split("."))[0] + "_Step2.docx") -> could be uncommented to
# # check what changes are made to script as file is getting created at this stage
#
# # -> Step three transliteration Started
# action_line_src_script = get_language_script_code(
# action_line_dest_lang)
# #action_line_dest_script = "Latin"
# print("uiop hui", action_line_src_script, action_line_dest_script)
# if dial_src_script != dial_dest_script or action_line_src_script != action_line_dest_script:
# kwargs = {
# "dial_dest_script": dial_dest_script,
# "dual_dial_script": "No",
# "dial_dest_lang": dial_src_lang,
# "is_dialogue_transliteration_required": False,
# "is_action_line_transliteration_required": False,
# "action_line_src_script":action_line_src_script,
# "action_line_src_lang": action_line_dest_lang,
#
# }
# if len(sample_title) > 2:
# kwargs["ignore_because_sample_script"] = True
# else:
# kwargs["ignore_because_sample_script"] = False
# if dial_src_script != dial_dest_script:
# kwargs["is_dialogue_transliteration_required"] = True
# if action_line_src_script != action_line_dest_script:
# kwargs["is_action_line_transliteration_required"] = True
# kwargs["action_line_dest_script"] = action_line_dest_script
# # -> This the case when only Transliteration is required and step one and two were not ram
# if 'scenes_original' not in locals():
# print("yeh if mein gaya issliye change hogaya")
# kwargs["original_file"] = script_path
# doc_file, scenes_original = makeTransliteration_only(**kwargs)
# else:
# print("yeh else mein gaya issliye change hogaya")
# print(doc_file)
# kwargs["original_file"] = doc_file
# doc_file, scenes_final = makeTransliteration_only(**kwargs)
# print("Step three transliteration Ended")
# print(doc_file, "step 3")
# # os.system("cp "+ str(doc_file) + " " + (doc_file.split("."))[0] + "_Step3.docx") -> could be uncommented to
# # check what changes are made to script as file is getting created at this stage
#
# # -> Step three transliteration Ended
#
# # -> Step four dual-dialogue Started
# kwargs = {
# "dial_dest_script": dial_dest_script,
# "dual_dial_script": dual_dial_script,
# "original_file": doc_file,
# "dial_dest_lang": dial_src_lang,
# "is_dialogue_transliteration_required": False,
# "is_action_line_transliteration_required": False,
# "action_line_dest_script": get_language_script_code(action_line_src_lang),
# "action_line_src_lang": action_line_src_lang,
# "action_line_src_script": action_line_src_script,
# "scenes_original": scenes_original,
# }
# if len(sample_title) > 2:
# kwargs["ignore_because_sample_script"] = True
# else:
# kwargs["ignore_because_sample_script"] = False
# doc_file, scenes_final = makeTransliteration_only(**kwargs)
# # os.system("cp "+ str(doc_file) + " " + (doc_file.split("."))[0] + "_Step4.docx")# -> could be uncommented to
# # check what changes are made to script as file is getting created at this stage
# print(doc_file, "step 4")
# print("Step four dual-dialogue Ended")
# # Step four dual-dialogue Ended
# -> Here Goes the Saving of File
# -> For Scriptpage Conversion Storing
if centralised == "yes":
if (obj != None):
doc_file = doc_file[1:]
folders_path = ""
for ii in (doc_file.split("/"))[:-1]:
folders_path = folders_path + "/" + str(ii)
convert_to_pdf(str(doc_file), str(folders_path))
pathspecific = ""
for i in (doc_file.split('.'))[:-1]:
pathspecific = pathspecific + "." + str(i)
saveFile = pathspecific + ".pdf"
obj.translated_script_path = saveFile.split('MNF')[1]
obj.save()
print("saved done go to ")
return saveFile
else:
log.info("Entry for script id in Centralised Database not found")
else:
if (translation_instance != None):
if len(sample_title) > 2:
print(sample_title)
print(doc_file)
# if dual_dial_script != "Yes":
# converted_df = pd.DataFrame(converted_df,
# columns=['dataframe', 'para_no', 'content', 'script_element'])
# print("Translated and Transliterated DF is", converted_df)
# converted_file = csv_to_docx(converted_df)
converted_file.save(doc_file)
print("convert to pdf called", "and it is inside sample script translations")
convert_to_pdf(doc_file, rf'{basePath}/media/scripts/sample_script_translations/{sample_title}/')
# os.remove(doc_file)
saveFile = doc_file.split('.')[0] + ".pdf"
translation_instance.translated_script_path = saveFile.split('MNF')[1]
translation_instance.save()
return saveFile
# if dual_dial_script != "Yes":
# converted_df = pd.DataFrame(converted_df, columns=['dataframe', 'para_no', 'content', 'script_element'])
# print("Translated and Transliterated DF is", converted_df)
# converted_file = csv_to_docx(converted_df)
converted_file.save(doc_file)
convert_to_pdf(doc_file, get_pdf_save_dir())
saveFile = doc_file.split('.')[0] + ".pdf"
translation_instance.translated_script_path = saveFile.split('MNF')[1]
translation_instance.save()
# print("saved done go to ")
#
"""code for saving translated df into script pad for editing"""
if not len(sample_title) > 2:
'''
saving the converted df to script_csv of the centralised file System
'''
# audited_file_name = "audit_translated.docx"
# converted_df2 = pd.DataFrame(converted_df,
# columns=['dataframe', 'para_no', 'content', 'script_element'])
# converted_df2.to_csv("/home/user/mnf/project/MNF/conversion/translation/translated/test_csv.csv")
# audited_docx = csv_to_docx(converted_df2)
# audited_
# req_file = ContentFile(
# doc_file,
# (doc_file.split("/"))[-1],
# )
# try:
# print("inside try of script_csv saving")
# existing_file = File.objects.get(type="script-original", script_id=audited_id)
# existing_file.file = req_file
# existing_file.save()
# print("script-csv saved in the models")
# except Exception as exp:
# print("Con-Aud Exception = ", exp)
# print("this doesnt work much :(")
# File.objects.update(
# script=Script.objects.get(id=audited_id), # audied_id
# type="script-csv",
# file=req_file,
# )
# print("----> Script id from conv-aud is here=", audited_id).
if existing_script:
audit_name = str(((doc_file.split("/"))[-1]).split(".")[0]) + f"_{request.session.get('iteration_no')}"
else:
audit_name = str(((doc_file.split("/"))[-1]).split(".")[0])
from utils import filesystem
file = ContentFile(
open(doc_file, 'rb').read(),
(doc_file.split("/"))[-1],
)
language_code = "en"
result = filesystem.new_screenplay(
request.user,
request.user.username,
audit_name,
file,
"script-original",
language_code,
)
# result = filesystem.new_screenplay_without_audit_in_background
audit_id = result.get("script", {}).get("id")
print("----> Script id from conv-aud is here=", audit_id)
translation_instance.converted_audit_id = audit_id
translation_instance.save()
return saveFile
else:
log.info("Entry for script id not found")