Conversion_Kitchen_Code/kitchen_counter/scriptAudit/mnf_script_audit.py

2812 lines
120 KiB
Python
Executable File

import os
import re
import sys
from pathlib import Path
# import time
import datetime
# from utils import filesystem,utilities
import pandas as pd
from django.core.files.base import ContentFile
import time
# from users.models import UserCredentialsForBlockchain
# from utils.scripts_functions import countPages
# import page_script.models as ps_models
# from mnfapp.views import update_juggernaut
from centralisedFileSystem.models import File, Script, ScreenPlay
# from utils import filesystem
# from conversion.translation.detection import script_det, language_detector
# from conversion.translation.translation_variables import get_language_script_code, language_code
# from page_script.views import _file_path
# from page_script import views as page_script
from scriptAudit import sa_functions as sf
from scriptAudit import sa_functions_english as sf_eng
from scriptAudit.models import ScriptAuditModel, States
# from mnfapp.views import update_juggernaut
from io import BytesIO
import datetime
import pytz
import subprocess
#from django_q.tasks import async_task
# from django_q.brokers import Broker
class NeutralAudit:
def __init__(
self,
script_id: str = None,
log: bool = False,
) -> None:
"""
To Audit a Script already uploded.
_________________________________________________________________
Parameters :
script_id : str -> Id of the script to be Audited
default = None
log : bool -> save logs in log.txt
default = False
_________________________________________________________________
Return :
None
"""
self.start_time_count = time.time()
print("<<<<<<<<<<<<<<<<<<<<<<<<<")
self.matrices_path = str(Path(__file__).resolve().parent) + "/matrices/"
self.total_time_file = str(Path(__file__).resolve().parent)
print(script_id,"SCRIPT-ID IS HERE|| AYYA")
self.script_id = script_id
audit_root_dir = (
str(Path(__file__).resolve().parent.parent) + "/media/audit_folder/"
)
self.script_name = str(self.script_id)
# self.total_line_before_audit = 1
output_dir = os.path.join(audit_root_dir, self.script_name)
t_time_file = self.total_time_file + "/tail_errors.txt"
# with open(t_time_file, "a") as file008:
# file008.write(str(self.start_time_count))
file_to_audit = File.objects.get(
script=script_id,
type="script-original",
)
self.input_script = file_to_audit.file.path
if not os.path.exists(output_dir):
try:
os.mkdir(output_dir)
except Exception as exp:
print(repr(exp))
subprocess.run(["mkdir", output_dir])
subprocess.run(["chmod", "777", output_dir])
##print(output_dir)
self.base_file_path = str(output_dir) + "/"
self.csv_removed_space_between_words = (
self.base_file_path + "space_between_words_removed.csv"
)
self.audit_report_csv = self.base_file_path + "audit_spreadsheet.csv"
sys.stdout = open(os.devnull, "w")
if log:
log_file = self.base_file_path + "_log.txt"
sys.stdout = open(log_file, "w", encoding="utf-8")
self.gen_int_files = True
else:
self.gen_int_files = False
sys.stdout = sys.__stdout__
self.audit_model_obj = ScriptAuditModel.objects.get(
script = Script.objects.get(
id = self.script_id,
)
)
time_file = self.base_file_path + "time_taken.txt"
start_time = datetime.datetime.now()
print(start_time)
with open(time_file, "a") as file007:
file007.write("started\n\n")
file007.write("started\n\n")
def __del__(self) -> None:
sys.stdout = sys.__stdout__
def update_audit_df(self, df, audit_df):
print("inside update audit df")
print(df.dtypes)
print(audit_df.dtypes)
lines_not_removed = audit_df.loc[audit_df["line_removed"] != "Yes"].index.to_list()
audit_df.sort_index(inplace=True)
# audit_df.reset_index().to_csv(audit_report_csv,index =False)
audit_df["audited_line_no"] = ""
audited_line_no = 1
for line in lines_not_removed:
new_data = ""
try:
new_data = df.loc[df["line_no"] == line, "data"].values[0]
except:
pass
# print(new_data)
try:
audit_df["Identification_Status"][line] = df.loc[
df["line_no"] == line, "Identification_Status"
].values[0]
except:
pass
audit_df["scene_number"][line] = df.loc[
df["line_no"] == line, "scene_number"
].values[0]
audit_df["data_corrected"][line] = new_data
audit_df["line_removed"][line] = "No"
audit_df["audited_line_no"][line] = audited_line_no
audited_line_no += 1
# print(audit_df.loc[audit_df['line_no'] == line, 'data_corrected'])
audit_df.reset_index().to_csv(self.audit_report_csv, index=False)
return audit_df
def update_audit_df_intro(self, df, audit_df):
print("update_audit_df_intro")
audit_df.reset_index(inplace=True, drop=True)
new_data = ""
for line in audit_df.index:
try:
print("line",line)
if audit_df["introduction"][line] == "Yes":
try:
new_data = df.loc[df["line_no"] == line, "data"].values[0]
except Exception as e:
print("Exception 174:",e)
pass
audit_df["data_corrected"][line] = new_data
except Exception as e:
print(e)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("\n\n update_audit_df_intro : complete \n")
return audit_df
def update_audit_df_appendix(self, df, audit_df):
new_data = ""
print(audit_df.index)
for line in audit_df.index:
if audit_df["appendix"][line] == "Yes":
try:
new_data = df.loc[df["line_no"] == line, "data"].values[0]
except:
pass
audit_df["data_corrected"][line] = new_data
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("\n\n update_audit_df_appendix : complete \n")
return audit_df
def update_is_identified(self, df):
print("Updating is Identified")
df["Identification_Status"].fillna("", inplace=True)
for index in df.index:
print(index,df["Identification_Status"][index])
try:
if df["Identification_Status"][index]:
line_pos = df["Identification_Status"][index].split(";")
pos_count = len(line_pos)
else:
pos_count = 0
except:
pos_count = 0
print(pos_count)
if pos_count == 1:
df["isIdentified"][index] = "Yes"
else:
df["isIdentified"][index] = "No"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("\n\n Inside update_is_identified : complete \n")
return df
def before_audit(self, lang: str = None):
output_converted_txt = self.base_file_path + "temp.txt"
output_converted_docx = self.base_file_path + "temp.docx"
csv_for_pre_processing = self.base_file_path + "for_pre_processing.csv"
csv_for_processing = self.base_file_path + "for_processing.csv"
csv_prepped_for_audit = self.base_file_path + "prepped_for_audit.csv"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("Inside before audit : ( 0-16 ) \n\n")
## convert pdf/docx to text
sf.conv_to_txt(
self.input_script,
output_converted_docx,
output_converted_txt
)
try:
output_docx_from_orginal_text = self.base_file_path + "original_text1.docx"
pdf_file_path = self.base_file_path + "original_text1.pdf"
print("b4 txt to docx")
sf.convert_txt_to_docx(output_converted_txt,output_docx_from_orginal_text)
print("IN THE BEGINING OF AUDIT PDF PAGES")
print("b4 page count of pdf")
# total_page_bf = sf.countPages(output_docx_from_orginal_text,pdf_file_path,self.base_file_path)
sf.countPages(output_docx_from_orginal_text,pdf_file_path,self.base_file_path)
print("temp txt converted to docx")
self.total_page_bf = str(1)
try:
print("int try pdf bf")
self.total_page_bf = sf.PdfCounter(pdf_file_path)
print("taotal_page_bf", str(self.total_page_bf))
except Exception as exp:
print(repr(exp))
print("page bf didnt work")
pass
# self.audit_model_obj.number_of_pages = int(total_page_bf)
time_per_page = 26
base_time = 120
no_of_pages = int(self.total_page_bf)
formula_of_counting_pages = (time_per_page * no_of_pages) + base_time
print("time required for auditing is :",formula_of_counting_pages)
extimated_time = round(formula_of_counting_pages / 60, 1)
print("extimated_time:",extimated_time)
print("Exstimated time is updated")
kolkata_time = datetime.datetime.now(pytz.timezone('Asia/Kolkata'))
print(kolkata_time)
thirty_mins_later = kolkata_time + datetime.timedelta(minutes=extimated_time)
formatted_time = thirty_mins_later.strftime("%B %d, %Y %I:%M %p")
self.audit_model_obj.expected_duration = formatted_time
print(formatted_time)
except:
pass
# self.total_line_before_audit = sf.count_the_line(output_converted_txt)
# print("total_line_before_audit :",total_line_before_audit)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf.conv_to_text 1 : before audit\n")
## convert to df
sf.conv_to_csv(output_converted_txt, csv_for_pre_processing)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf.conv_to_csv 2 : before audit\n")
df = pd.read_csv(csv_for_pre_processing, encoding="utf8")
## direct to df.. not working as expected
# df = pd.DataFrame()
# df = sf.conv_to_df(output_converted_txt)
# df.to_csv(csv_for_pre_processing,index=False)
print("before assign weights:")
print(df.dtypes)
df['preassigned_weights'] = ''
df = sf.pre_assign_wts(df)
print(df.dtypes)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf.pre_assign_wts 3 : before audit\n")
df = df.sort_index().reset_index(drop=True)
df.to_csv(csv_for_processing, index =False)
df["data"].fillna("", inplace=True)
## make df to track audit
audit_df = pd.DataFrame()
df_1st = pd.DataFrame(df)
df_1st.to_csv(self.base_file_path + "very_first_df_feed_to_create_audit_df.csv", index = False)
audit_df = sf.create_audit_df(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf.create_audit_df 4 : before audit\n")
audit_df.reset_index().to_csv(self.audit_report_csv, index=False)
print(df.dtypes)
print(audit_df.dtypes)
audit_df.reset_index().to_csv(self.base_file_path + "very_first_audit_df_feed_to_create_audit_df.csv", index = False)
print("LANGUAGE IS",lang)
## trim intro
if lang:
if lang.upper() == "ENGLISH":
sf_eng.trim_intro_english(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf_eng.trim_intro_english (5) : before audit\n")
df = self.update_is_identified(df)
else:
sf_eng.trim_intro_english(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf_eng.trim_intro_english (6) : before audit\n")
df = self.update_is_identified(df)
else:
# sf.trim_intro(df,audit_df)
sf_eng.trim_intro_english(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf_eng.trim_intro_english (7) : before audit\n")
df = self.update_is_identified(df)
lines_not_removed = audit_df.loc[
audit_df["line_removed"] != "Yes"
].index.to_list()
print(lines_not_removed)
df = df.loc[df["line_no"].isin(lines_not_removed), :]
df = df.sort_index().reset_index(drop=True)
# df = df.reset_index()
audit_df.reset_index().to_csv(self.audit_report_csv, index=False)
print("Trimming Appendix")
## trim appendix
if lang:
if lang.upper() == "ENGLISH":
sf_eng.trim_appendix_english(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf_eng.trim_appendix_english 8: before audit\n")
df = self.update_is_identified(df)
else:
sf_eng.trim_appendix_english(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf_eng.trim_appendix_english 9: before audit\n")
df = self.update_is_identified(df)
else:
sf_eng.trim_appendix_english(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf_eng.trim_appendix_english 10 : before audit\n")
df = self.update_is_identified(df)
## remove page numbers
sf.remove_page_numbers(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf.remove_page_numbers 11 : before audit\n")
lines_not_removed = audit_df.loc[
audit_df["line_removed"] != "Yes"
].index.to_list()
print(lines_not_removed)
df = df.loc[df["line_no"].isin(lines_not_removed), :]
df = df.sort_index().reset_index(drop=True)
# df = df.reset_index()
audit_df.reset_index().to_csv(self.audit_report_csv, index=False)
## prepare for audit
df = sf.prep_for_audit(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf.prep_for_audit 12 : before audit\n")
# sf.prep_for_audit(df)
df.to_csv(csv_prepped_for_audit, index=False)
## remove extra blank lines
sf.remove_extra_blank_lines(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf.remove_extra_blank_lines 13 : before audit\n")
lines_not_removed = audit_df.loc[
audit_df["line_removed"] != "Yes"
].index.to_list()
print(lines_not_removed)
df = df.loc[df["line_no"].isin(lines_not_removed), :]
df = df.sort_index().reset_index(drop=True)
###
sf.remove_blank_line_after_parenthetical(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf.remove_blank_line_after_parenthetical 14 : before audit\n")
lines_not_removed = audit_df.loc[
audit_df["line_removed"] != "Yes"
].index.to_list()
print(lines_not_removed)
df = df.loc[df["line_no"].isin(lines_not_removed), :]
df = df.sort_index().reset_index(drop=True)
##
sf.merge_broken_lines(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf.merge_broken_lines 15 : before audit\n")
lines_not_removed = audit_df.loc[
audit_df["line_removed"] != "Yes"
].index.to_list()
df = df.loc[df["line_no"].isin(lines_not_removed), :]
df = df.sort_index().reset_index(drop=True)
###df.to_csv(csv_after_merge, index = False)
##
sf.remove_space_between_words(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf.remove_space_between_words 16 : before audit\n")
df.to_csv(self.csv_removed_space_between_words, index=False)
print("updating audit df")
df1 = pd.DataFrame(df)
df1.to_csv(self.base_file_path + "first_df.csv", index = False)
audit_df1 = pd.DataFrame(audit_df)
audit_df1.to_csv(self.base_file_path + "first_audit_df.csv", index = False)
audit_df = self.update_audit_df(df, audit_df)
# audit_model_obj = ScriptAuditModel.objects.get(
# script = Script.objects.get(
# id = self.script_id,
# )
# )
try:
audit_model_obj = ScriptAuditModel.objects.get(
script = Script.objects.get(
id = self.script_id,
)
)
audit_model_obj.pre_audit_run = True
audit_model_obj.save()
print("TRY")
except Exception as exp:
print(repr(exp))
print("EXCEPT")
self.audit_model_obj.pre_audit_run = True
self.audit_model_obj.save()
print("PRE AUDIT DONE")
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("before audit complete : PRE AUDIT DONE\n\n")
return df, audit_df
def audit(self, lang: str = None) -> None:
"""
Run Audit on NeutralAudit object.
_________________________________________________________________
Parameters :
lang : str -> language of the provided script.
default = None (language nuteral rules)
_________________________________________________________________
Return :
None
_________________________________________________________________
"""
# ---------------------------changes to save _audited.csv in media/scriptpage/script/folder
# csv_parawise_status = self.audited_script_path
# ---------------------------changes to save _audited.csv in media/scriptpage/script/folder
print("<<<<<<<<<<<<<<<<<<<<<<<<<")
print("<<<<<<<<<<<<<<<<<<<<<<<<<",self.base_file_path)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("INSIDE AUDIT (1-87): audit\n\n")
csv_after_first_strict_conditions = (
self.base_file_path
+ "after_first_strict_conditions.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_first_strict_conditions.csv 1 : audit\n")
csv_after_gen_and_sort_weights = (
self.base_file_path
+ "after_gen_and_sort_weights.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_gen_and_sort_weights.csv 2 : audit\n")
csv_after_examined_speaker_pos = (
self.base_file_path
+ "after_examined_speaker_pos.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_examined_speaker_pos.csv 3 : audit\n")
csv_after_examined_speaker_next_lines= (
self.base_file_path
+ "after_examined_speaker_next_lines.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_examined_speaker_next_lines.csv 4 : audit\n")
csv_after_pnnbl_ineligible= (
self.base_file_path
+ "after_pnnbl_ineligible1.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_pnnbl_ineligible1 (5) : audit\n")
csv_after_examine_same_content_lines= (
self.base_file_path
+ "after_examine_same_content_lines.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_examine_same_content_lines (6) : audit\n")
csv_after_examined_action_pos_part1 = (
self.base_file_path
+ "_after_examined_action_pos_part1.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examined_action_pos_part1.csv (7) : audit\n")
csv_after_pnnbl_inelgible_after_action_pos_part1=(
self.base_file_path
+ "_after_pnnbl_inelgible_after_action_pos_part1.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_pnnbl_inelgible_after_action_pos_part1.csv (8) : audit\n")
csv_after_examined_action_pos_part2 = (
self.base_file_path
+ "_after_examined_action_pos_part2.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examined_action_pos_part2.csv (9) : audit\n")
csv_after_pnnbl_inelgible_after_action_pos_part2 = (
self.base_file_path
+ "_after_pnnbl_inelgible_after_action_pos_part2.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_pnnbl_inelgible_after_action_pos_part2.csv (10) : audit\n")
csv_after_examined_same_indent_bunch = (
self.base_file_path
+ "_after_examined_same_indent_bunch.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examined_same_indent_bunch.csv (11) : audit\n")
csv_after_pnnbl_inelgible_after_same_indent = (
self.base_file_path
+ "_after_pnnbl_inelgible_after_same_indent.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_pnnbl_inelgible_after_same_indent.csv (12) : audit\n")
csv_after_examined_relative_indent_bunch = (
self.base_file_path
+ "_after_examined_relative_indent_bunch.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examined_relative_indent_bunch.csv (13) : audit\n")
csv_after_examined_speaker_next_lines_after_relative_indent = (
self.base_file_path
+ "_after_examined_speaker_next_lines_after_relative_indent.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examined_speaker_next_lines_after_relative_indent.csv (14) : audit\n")
csv_after_pnnbl_inelgible_after_relative_indent = (
self.base_file_path
+ "after_pnnbl_inelgible_after_relative_indent_bunch.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_pnnbl_inelgible_after_relative_indent_bunch.csv (15) : audit\n")
csv_examined_speaker_using_indent = (
self.base_file_path
+ "after_examined_speaker_using_indent.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_examined_speaker_using_indent.csv (16) : audit\n")
csv_after_examined_speaker_next_lines_after_pos_sp_indent = (
self.base_file_path
+ "_after_examined_speaker_next_lines_after_pos_sp_indent.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examined_speaker_next_lines_after_pos_sp_indent.csv (17) : audit\n")
csv_after_pnnbl_inelgible_after_pos_sp_indent = (
self.base_file_path
+ "_after_pnnbl_inelgible_after_pos_sp_indent.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_pnnbl_inelgible_after_pos_sp_indent.csv (18) : audit\n")
csv_examined_speaker_extension = (
self.base_file_path
+ "_after_examined_speaker_extension.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examined_speaker_extension.csv (19) : audit\n")
csv_after_examined_speaker_next_lines_after_speaker_extension = (
self.base_file_path
+ "_after_examined_speaker_next_lines_after_speaker_extension.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examined_speaker_next_lines_after_speaker_extension.csv(20) : audit\n")
csv_after_pnnbl_inelgible_after_speaker_extension = (
self.base_file_path
+ "_after_pnnbl_inelgible_after_speaker_extension.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_pnnbl_inelgible_after_speaker_extension.csv (21) : audit\n")
csv_after_examined_action_using_top2 = (
self.base_file_path
+ "_after_examined_action_using_top2.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examined_action_using_top2.csv (22) : audit\n")
csv_after_pnnbl_inelgible_after_action_using_top_pnnbl = (
self.base_file_path
+ "_after_pnnbl_inelgible_after_action_using_top_pnnbl.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_pnnbl_inelgible_after_action_using_top_pnnbl.csv (23) : audit\n")
csv_after_refined_action = (
self.base_file_path
+ "_after_refined_action.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_refined_action.csv (24) : audit\n")
csv_after_pnnbl_inelgible_after_refined_action = (
self.base_file_path
+ "_after_pnnbl_inelgible_after_refined_action.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_pnnbl_inelgible_after_refined_action.csv (25) : audit\n")
csv_after_eligibility_using_identified_pnnbl = (
self.base_file_path
+ "_after_eligibility_using_identified_pnnbl.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl.csv (26) : audit\n")
csv_after_top_identification_part1 = (
self.base_file_path
+ "_after_top_identification_part1.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top_identification_part1.csv (27) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_examine_sp_next_among_two = (
self.base_file_path
+ "after_eligibility_using_identified_pnnbl_after_examine_sp_next_among_two.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_eligibility_using_identified_pnnbl_after_examine_sp_next_among_two.csv (28) : audit\n")
csv_after_examined_speaker_pos_after_top1 = (
self.base_file_path + "_after_examined_speaker_pos_after_top1.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examined_speaker_pos_after_top1.csv (29) : audit\n")
csv_after_examined_speaker_next_lines_after_top1 = (
self.base_file_path + "after_examined_speaker_next_lines_after_top1.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_examined_speaker_next_lines_after_top1.csv (30) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top_part1 = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part1.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part1.csv (31) : audit\n")
csv_after_examine_speaker_mix_part1 = (
self.base_file_path + "_after_examine_speaker_mix_part1.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examine_speaker_mix_part1.csv (32) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_speaker_mix_part1 = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_speaker_mix_part1.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_speaker_mix_part1.csv (33) : audit\n")
csv_after_examine_speaker_mix_part2 = (
self.base_file_path + "_after_examine_speaker_mix_part2.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examine_speaker_mix_part2.csv (34) : audit\n")
csv_after_examined_speaker_pos_after_mix = (
self.base_file_path + "_after_examined_speaker_pos_after_mix.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examined_speaker_pos_after_mix.csv(35) : audit\n")
csv_after_examined_speaker_next_lines_after_mix = (
self.base_file_path + "_after_examined_speaker_next_lines_after_mix.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examined_speaker_next_lines_after_mix.csv (36) : audit\n")
csv_after_pnnbl_ineligible_after_mix = (
self.base_file_path + "_after_pnnbl_ineligible_after_mix.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_pnnbl_ineligible_after_mix.csv (37) : audit\n")
csv_after_top_identification_part2 = (
self.base_file_path + "_after_top_identification_part2.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top_identification_part2.csv (38) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top_part2 = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part2.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part2.csv (39) : audit\n")
csv_after_top_identification_part2_again = (
self.base_file_path + "_after_top_identification_part2_again.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top_identification_part2_again.csv (40) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top_part2_again = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part2_again.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part2_again.csv (41) : audit\n")
csv_after_top_identification_part2_again_again = (
self.base_file_path + "_after_top_identification_part2_again_again.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top_identification_part2_again_again.csv(42) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top_part2_again_again = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part2_again_again.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part2_again_again.csv (43) : audit\n")
csv_after_slug_identification = (
self.base_file_path + "_after_slug_identification.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_slug_identification.csv (44) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_slug_identification = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_slug_identification.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_slug_identification.csv (45) : audit\n")
csv_after_top_identification_part1_again = (
self.base_file_path + "_after_top_identification_part1_again.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top_identification_part1_again.csv (46) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top_part1_again = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part1_again.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part1_again.csv (47) : audit\n")
csv_after_top_identification_part3 = (
self.base_file_path + "_after_top_identification_part3.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top_identification_part3.csv (48) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top_part3 = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part3.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part3.csv (49) : audit\n")
csv_after_top_identification_part4 = (
self.base_file_path + "_after_top_identification_part4.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top_identification_part4.csv (50) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top_part4 = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part4.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part4.csv (51) : audit\n")
csv_after_top_identification_part5 = (
self.base_file_path + "_after_top_identification_part5.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top_identification_part5.csv (52) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top_part5 = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part5.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part5.csv (53) : audit\n")
csv_after_top_identification_part6 = (
self.base_file_path + "_after_top_identification_part6.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top_identification_part6.csv (54) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top_part6 = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part6.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part6.csv (55) : audit\n")
csv_after_top_identification_part7 = (
self.base_file_path + "_after_top_identification_part7.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top_identification_part7.csv (56) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top_part7 = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part7.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part7.csv (57) : audit\n")
csv_after_top_identification_part8 = (
self.base_file_path + "_after_top_identification_part8.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top_identification_part8.csv (58) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top_part8 = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part8.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part8.csv (59) : audit\n")
csv_after_examine_among_two = (
self.base_file_path + "_after_examine_among_two.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examine_among_two.csv (60) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_examine_among_two = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_examine_among_two.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_eligibility_using_identified_pnnbl_after_examine_among_two.csv (61) : audit\n")
csv_after_examine_speaker_next_line_after_among_two = (
self.base_file_path + "_after_examine_speaker_next_line_after_among_two.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examine_speaker_next_line_after_among_two.csv (62) : audit\n")
csv_after_top2_wt_diff = (
self.base_file_path + "_after_top2_wt_diff.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top2_wt_diff.csv (63) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top2_wt_diff = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top2_wt_diff.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top2_wt_diff.csv (64) : audit\n")
csv_after_top2_wt_diff_again = (
self.base_file_path + "_after_top2_wt_diff_again.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top2_wt_diff_again.csv (65) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top2_wt_diff_again = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top2_wt_diff_again.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top2_wt_diff_again.csv(66) : audit\n")
csv_after_top_identification_part1_diluted = (
self.base_file_path + "_after_top_identification_part1_diluted.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top_identification_part1_diluted.csv (67) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top_part1_diluted = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part1_diluted.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part1_diluted.csv (68) : audit\n")
#1.2
csv_after_examine_dial_between_action = (
self.base_file_path + "_after_examine_dial_between_action.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examine_dial_between_action.csv (69) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_examine_dial_between_action = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_examine_dial_between_action.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_examine_dial_between_action.csv (70) : audit\n")
csv_after_examine_among_two_again = (
self.base_file_path + "_after_examine_among_two_again.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examine_among_two_again.csv (71) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_examine_among_two_again = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_examine_among_two_again.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_examine_among_two_again.csv (72) : audit\n")
csv_after_identify_remaining_as_top = (
self.base_file_path + "after_identifying_remaining_as_top.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_identifying_remaining_as_top.csv (73) : audit\n")
csv_after_prep_for_audit_after_identification = (
self.base_file_path + "after_prep_for_audit_after_identification.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_prep_for_audit_after_identification.csv (74) : audit\n")
csv_after_audit1 = self.base_file_path + "after_audit1.csv"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_audit1.csv (75) : audit\n")
csv_after_wrapping = self.base_file_path + "after_wrapping.csv"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_wrapping.csv (76) : audit\n")
csv_after_prep_for_audit_after_wrapping = (
self.base_file_path + "after_prep_for_audit_after_wrapping.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_prep_for_audit_after_wrapping.csv (77) : audit\n")
csv_after_audit2 = self.base_file_path + "after_audit2.csv"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_audit2.csv (78) : audit\n")
output_linewise_docx = self.base_file_path + "audited_linewise.docx"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("audited_linewise.docx (79) : audit\n")
output_linewise_txt = self.base_file_path + "audited_linewise.txt"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("audited_linewise.txt (80) : audit\n")
audit_report_tabular_docx = self.base_file_path + "audit_report_tabular.docx"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("audit_report_tabular.docx (81) : audit\n")
csv_strict_conditions = self.matrices_path + "strict_conditions_230623.csv"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("strict_conditions_230623.csv : audit\n")
csv_pos_weights = self.matrices_path + "PS_Weights_250623_2.csv"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("PS_Weights_250623_2.csv (83) : audit\n")
csv_pnbl_nnbl = self.matrices_path + "pnbl_nnbl.csv"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("pnbl_nnbl.csv (84) : audit\n")
pnbl_eligibility_matrix = (
self.matrices_path + "pnbl_eligibility_matrix_250623.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("pnbl_eligibility_matrix_250623.csv (85) : audit\n")
nnbl_eligibility_matrix = (
self.matrices_path + "nnbl_eligibility_matrix_250623.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("nnbl_eligibility_matrix_250623.csv (86) : audit\n")
output_template = self.matrices_path + "ScriptTemplate5.docx"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("ScriptTemplate5.docx (87) : audit\n")
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AUDIT : audit\n\n")
df, audit_df = self.before_audit(lang)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER ASSIGNING LOCATIONS AUDIT : audit\n\n")
#######################################
sf.test_strict_conditions(df, csv_strict_conditions)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.test_strict_conditions 1 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_first_strict_conditions, index = False)
## gen weights for possibilties ## add preassigned weights
df = sf.gen_pos_weights(df, csv_pos_weights)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.gen_pos_weights 2 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
## language specific weights update
if lang:
if lang.upper() == "ENGLISH":
df = sf_eng.update_pos_wts_english(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf_eng.update_pos_wts_english 3 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = sf.sort_pos_decr_wts(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.sort_pos_decr_wts 4 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
#if self.gen_int_files:
df.to_csv(csv_after_gen_and_sort_weights, index = False)
##
sf.prep_for_pos_elimination(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.prep_for_pos_elimination 5 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df_bex1 = pd.DataFrame(df)
df_bex1.to_csv(self.base_file_path + "df_update_audit_df_b_exam_speaker_1.csv", index = False)
audit_df_bex1 = pd.DataFrame(audit_df)
audit_df_bex1.to_csv(self.base_file_path + "audit_df_update_audit_df_b_exam_speaker_1.csv", index = False)
## examine speaker possibilties
df = sf.examine_speaker_pos(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_pos 6 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df_ex1 = pd.DataFrame(df)
df_ex1.to_csv(self.base_file_path + "df_update_audit_df_exam_speaker_1.csv", index = False)
audit_df_ex1 = pd.DataFrame(audit_df)
audit_df_ex1.to_csv(self.base_file_path + "audit_df_update_audit_df_exam_speaker_1.csv", index = True)
if self.gen_int_files:
df.to_csv(csv_after_examined_speaker_pos, index = False)
print("printing info based on audit_df")
# df_b1 = pd.DataFrame(df)
# df_b1.to_csv(self.base_file_path + "df_update_audit_df_b1.csv", index = False)
print(audit_df.head(10),audit_df.dtypes)
try:
audit_df = audit_df.sort_values('audited_line_no')
except:
audit_df['audited_line_no'] = pd.to_numeric(audit_df['audited_line_no'], errors='coerce')
audit_df = audit_df.sort_values('audited_line_no')
audit_df_try1 = pd.DataFrame(audit_df)
audit_df_try1.to_csv(self.base_file_path + "audit_df_update_audit_df_try1.csv", index = True)
print(audit_df.head())
try:
audit_df = pd.merge(audit_df, df[['line_no']], on=audit_df.index, how='left')
print(audit_df.head())
# Set 'line_no' as index
audit_df.set_index('line_no', inplace=True)
print(audit_df.head())
audit_df_try2 = pd.DataFrame(audit_df)
audit_df_try2.to_csv(self.base_file_path + "audit_df_update_audit_df_try2.csv", index = True)
except Exception as e:
print(e, audit_df.head())
pass
# try:
# audit_df.reset_index(drop=True, inplace=True)
# audit_df.set_index('line_no',inplace=True)
# except Exception as e:
# print(e)
print(audit_df.head())
print(audit_df.dtypes)
audit_df_b1 = pd.DataFrame(audit_df)
audit_df_b1.to_csv(self.base_file_path + "audit_df_update_audit_df_b1.csv", index = True)
audit_df = self.update_audit_df(df, audit_df)
df_1 = pd.DataFrame(df)
df_1.to_csv(self.base_file_path + "df_update_audit_df_1.csv", index = True)
audit_df_1 = pd.DataFrame(audit_df)
audit_df_1.to_csv(self.base_file_path + "audit_df_update_audit_df_1.csv", index = True)
###
df = sf.examine_speaker_next_lines(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_next_lines 7 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_speaker_next_lines, index = False)
audit_df = self.update_audit_df(df, audit_df)
audit_df_u7 = pd.DataFrame(audit_df)
audit_df_u7.to_csv(self.base_file_path + "audit_df_update_audit_df_7.csv", index = True)
## do while pnnbl ineligible
sf.prep_pnnbl_wts(csv_pnbl_nnbl, self.matrices_path)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.prep_pnnbl_wts 8 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = sf.do_while_pnnbl_ineligible(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_pnnbl_ineligible 9 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_pnnbl_ineligible, index = False)
## examine same content
df = sf.examine_same_content_lines(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_same_content_lines 10 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_examine_same_content_lines, index = False)
### examine speaker next again
df = sf.examine_speaker_next_lines(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_next_lines 11 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
###df.to_csv(csv_after_examined_speaker_next_lines_after_same_content, index = False)
audit_df = self.update_audit_df(df, audit_df)
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
df = sf.do_while_pnnbl_ineligible(df)
df = self.update_is_identified(df)
###df.to_csv(csv_after_pnnbl_ineligible_after_same_content, index = False)
################
df = sf.examine_action_possibilities_part1(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_action_possibilities_part1 12 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_action_pos_part1, index = False)
audit_df = self.update_audit_df(df, audit_df)
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
df = sf.do_while_pnnbl_ineligible(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_pnnbl_ineligible 13 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_pnnbl_inelgible_after_action_pos_part1, index = False)
################
df = sf.examine_action_possibilities_part2(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_action_possibilities_part2 14 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_action_pos_part2, index = False)
audit_df = self.update_audit_df(df, audit_df)
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
df = sf.do_while_pnnbl_ineligible(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_pnnbl_ineligible 15 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_pnnbl_inelgible_after_action_pos_part2, index = False)
################
df = sf.examine_same_indent_bunch(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_same_indent_bunch 16 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_same_indent_bunch, index = False)
audit_df = self.update_audit_df(df, audit_df)
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
df = sf.do_while_pnnbl_ineligible(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_pnnbl_ineligible 17 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_pnnbl_inelgible_after_same_indent, index = False)
#####################
##for reorganisation
# df = pd.read_csv('Script_Shatranj_pnnbl_ineligible_same_indent_bunch_new_col_2.csv')
# csv_for_pos_elimination = os.path.join(self.output_dir,os.path.splitext(self.script_name)[0])+'_for_pos_elimination.csv'
#########################
df = sf.examine_relative_indent(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_relative_indent 18 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_relative_indent_bunch, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.examine_speaker_next_lines(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_next_lines 19 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_speaker_next_lines_after_relative_indent, index = False)
audit_df = self.update_audit_df(df, audit_df)
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl,matrices_path)
df = sf.do_while_pnnbl_ineligible(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_pnnbl_ineligible 20 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
df.to_csv(csv_after_pnnbl_inelgible_after_relative_indent, index=False)
#######################################
df = sf.examine_pos_sp_indent(
df,
self.csv_removed_space_between_words,
csv_after_pnnbl_inelgible_after_relative_indent,
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_pos_sp_indent 21 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv( csv_examined_speaker_using_indent,index =False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.examine_speaker_next_lines(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_next_lines 22 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_speaker_next_lines_after_pos_sp_indent, index = False)
audit_df = self.update_audit_df(df, audit_df)
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
df = sf.do_while_pnnbl_ineligible(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_pnnbl_ineligible 23 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_pnnbl_inelgible_after_pos_sp_indent, index = False)
#################################
df = sf.examine_speaker_extension(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_extension 24 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv( csv_examined_speaker_extension,index =False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.examine_speaker_next_lines(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_next_lines 25 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_speaker_next_lines_after_speaker_extension, index = False)
audit_df = self.update_audit_df(df, audit_df)
## do while pnnbl ineligible
print("pnnbl after speaker extension")
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
df = sf.do_while_pnnbl_ineligible(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_pnnbl_ineligible 26 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_pnnbl_inelgible_after_speaker_extension, index = False)
## checking
# audit_df.reset_index().to_csv(audit_report_csv,index =False)
#################################################
df = sf.examine_action_using_top2_part1(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_action_using_top2_part1 27 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_action_using_top2, index = False)
audit_df = self.update_audit_df(df, audit_df)
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
df = sf.do_while_pnnbl_ineligible(df)
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_pnnbl_inelgible_after_action_using_top_pnnbl, index = False)
# #########################################
df = sf.refine_action_possibilties(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.refine_action_possibilties 28 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_refined_action, index = False)
audit_df = self.update_audit_df(df, audit_df)
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
df = sf.do_while_pnnbl_ineligible(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_pnnbl_ineligible(df) 29 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_pnnbl_inelgible_after_refined_action, index = False)
##############################
sf.prep_pnnbl_eligible_csv(pnbl_eligibility_matrix, nnbl_eligibility_matrix)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.prep_pnnbl_eligible_csv 30 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
#############################
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 31 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl, index = False)
#################################
df = sf.start_top_identifications_part1(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.start_top_identifications_part1 32 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top_identification_part1, index = False)
audit_df = self.update_audit_df(df, audit_df)
## examine speaker possibilties again after top1
df = sf.examine_speaker_pos(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_pos 33 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_speaker_pos_after_top1, index = False)
audit_df = self.update_audit_df(df, audit_df)
###
df_34 = pd.DataFrame(df)
df_34.to_csv(self.base_file_path + "df_export_before_34.csv", index = True)
au_df_34 = pd.DataFrame(audit_df)
au_df_34.to_csv(self.base_file_path + "audit_df_before_after_34.csv", index = True)
df = sf.examine_speaker_next_lines(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_next_lines 34 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_speaker_next_lines_after_top1, index=False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 35 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part1, index = False)
#################################
copy_df_examine_speaker_mix_part1 = pd.DataFrame(df)
copy_df_examine_speaker_mix_part1.to_csv(self.base_file_path + "copy_df_examine_speaker_mix_part1.csv", index = True)
###########
copy_audit_df_examine_speaker_mix_part1 = pd.DataFrame(audit_df)
copy_audit_df_examine_speaker_mix_part1.to_csv(self.base_file_path + "copy_audit_df_examine_speaker_mix_part1.csv", index = True)
##########
df = sf.examine_speaker_mix_part1(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_mix_part1 36 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_examine_speaker_mix_part1, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 37 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_speaker_mix_part1, index = False)
#################################
df_38 = pd.DataFrame(df)
df_38.to_csv(self.base_file_path + "df_export_after_38.csv", index = True)
au_df_38 = pd.DataFrame(audit_df)
au_df_38.to_csv(self.base_file_path + "audit_df_export_after_38.csv", index = True)
df = sf.examine_speaker_mix_part2(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_mix_part2 38 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_examine_speaker_mix_part2, index = False)
audit_df = self.update_audit_df(df, audit_df)
## examine speaker possibilties again after mix
df = sf.examine_speaker_pos(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_pos 39 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_speaker_pos_after_mix, index = False)
audit_df = self.update_audit_df(df, audit_df)
###
df = sf.examine_speaker_next_lines(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTERsf.examine_speaker_next_lines 40 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_speaker_next_lines_after_mix, index = False)
audit_df = self.update_audit_df(df, audit_df)
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl,matrices_path)
df = sf.do_while_pnnbl_ineligible(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_pnnbl_ineligible 41 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_pnnbl_ineligible_after_mix, index = False)
# df = sf.do_while_examine_using_identified_pnnbl(df)
# df = update_is_identified(df)
# df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_speaker_mix_part2, index = False)
################################
df = sf.start_top_identifications_part2(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.start_top_identifications_part2 42 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top_identification_part2, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 43 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part2, index = False)
#################################
df = sf.start_top_identifications_part2(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.start_top_identifications_part2 44 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top_identification_part2_again, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 45 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part2_again, index = False)
#################################
df = sf.start_top_identifications_part2(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.start_top_identifications_part2 46 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top_identification_part2_again_again, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 47 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part2_again_again, index = False)
#################################
df = sf.start_slug_identification(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.start_slug_identification(df) 48 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_slug_identification, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl(df) 49 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_slug_identification, index = False)
#################################
df = sf.start_top_identifications_part1(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.start_top_identifications_part1(df) 50 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top_identification_part1_again, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 51 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part1_again, index = False)
#################################
df = sf.start_top_identifications_part3(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.start_top_identifications_part3 52 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top_identification_part3, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 53 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part3, index = False)
#################################
df = sf.start_top_identifications_part4(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.start_top_identifications_part4 54 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top_identification_part4, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 55 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part4, index = False)
#################################
df = sf.start_top_identifications_part5(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.start_top_identifications_part5(df) 56 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top_identification_part5, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 57 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part5, index = False)
#################################
df = sf.start_top_identifications_part6(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.start_top_identifications_part6 58 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top_identification_part6, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 59 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part6, index = False)
#################################
df = sf.start_top_identifications_part7(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.start_top_identifications_part7 60 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top_identification_part7, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 61 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part7, index = False)
#################################
df = sf.start_top_identifications_part8(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.start_top_identifications_part8 62 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top_identification_part8, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 63 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part8, index = False)
#################################
df = sf.examine_among_two(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_among_two 64 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_examine_among_two, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 65 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_examine_among_two, index = False)
#################################
df = sf.examine_speaker_next_lines(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_next_lines 66: AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_examine_speaker_next_line_after_among_two, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 67 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_examine_sp_next_among_two, index = False)
#################################
df = sf.examine_action_using_top2_wt_diff(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_action_using_top2_wt_diff 68 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top2_wt_diff, index = False)
audit_df = self.update_audit_df(df, audit_df)
try:
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 69 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top2_wt_diff, index = False)
except:
pass
#################################
try:
df = sf.examine_action_using_top2_wt_diff(df)
if self.gen_int_files:
df.to_csv(csv_after_top2_wt_diff_again, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 70 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top2_wt_diff_again, index = False)
except:
pass
#################################
try:
df = sf.start_top_identifications_part1_diluted(df)
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top_identification_part1_diluted, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 71 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part1_diluted, index = False)
except:
pass
###################################
####################################
##1.1
df = sf.decrease_wt_dial_between_action(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.decrease_wt_dial_between_action 72 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_examine_dial_between_action, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 73 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
#if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_examine_dial_between_action, index = False)
####################################
#################################
df = sf.examine_among_two(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_among_two 74 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_examine_among_two_again, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 75 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
#if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_examine_among_two_again, index = False)
####################################
#################################
df = sf.identify_top_as_final(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.identify_top_as_final 76 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df_76 = pd.DataFrame(df)
df_76.to_csv(self.base_file_path + "df_identify_top_as_final_76.csv", index = False)
au_df_76 = pd.DataFrame(audit_df)
au_df_76.to_csv(self.base_file_path + "audit_df_identify_top_as_final_76.csv", index = False)
df = self.update_is_identified(df)
df.to_csv(csv_after_identify_remaining_as_top, index=False)
audit_df = self.update_audit_df(df, audit_df)
#####################################
## prepare for audit
df = sf.prep_for_audit(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.prep_for_audit 77 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df.to_csv(csv_after_prep_for_audit_after_identification, index=False)
#####################################
df, audit_df = sf.run_audit_on_identified(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.run_audit_on_identified 78 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df.to_csv(csv_after_audit1, index=False)
audit_df = self.update_audit_df(df, audit_df)
#############################################
### run language specific audit on identified
if lang:
if lang.upper() == "ENGLISH":
df = sf_eng.run_audit_on_identified_english(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf_eng.run_audit_on_identified_english 79 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
audit_df = self.update_audit_df(df, audit_df)
#####################################
### merge the beginning/middle/end lines
# df.to_csv(self.base_file_path + "df_before_merge_line_para.csv", index = Flase)
para_df = sf.merge_line_to_para(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.merge_line_to_para 80 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
try:
para_df.to_csv(self.base_file_path+ "after_merge_line_para.csv", index = False)
print("para_df is writtern")
except:
pass
print("\n\n Function 80 is completed")
try:
script_language, dialogue_language = sf.language_detector_for_csv(para_df)
print("script_language",script_language)
print("dialogue_language",dialogue_language)
unique_script_languages = ', '.join(set(lang[0] for lang in script_language))
unique_dialogue_languages = ', '.join(set(lang[0] for lang in dialogue_language))
except:
unique_script_languages = ""
unique_dialogue_languages = ""
#commented as some unwanted change of . to comma
#para_df = sf.change_dot_to_comma_inslug(para_df)
print("unique_script_languages:",unique_script_languages)
print("unique_dialogue_languages:",unique_dialogue_languages)
# para_df.to_csv(csv_parawise_status, index=False)
##
print("\n\n dot to comma changes in slug")
audited_file_name = self.script_name + ".csv"
req_file = ContentFile(
(para_df.to_csv(index=False, path_or_buf=None)).encode("utf-8"),
audited_file_name,
)
File.objects.create(
script=Script.objects.get(id=self.script_id),
type="script-csv",
file=req_file,
)
print("\n\n exporting df and audit_df agter function 80")
df_df = pd.DataFrame(df)
df_df.to_csv(self.base_file_path + "df_export_after_80.csv", index = False)
audit_df_df = pd.DataFrame(audit_df)
audit_df_df.reset_index().to_csv(self.base_file_path + "audit_df_export_after_80.csv", index = False)
print("\nwrapping identified lines if required\n")
df = sf.wrap_text(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.wrap_text 81 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df_81 = pd.DataFrame(df)
df_81.to_csv(self.base_file_path + "df_export_after_81.csv", index = False)
au_df_81 = pd.DataFrame(audit_df)
au_df_81.reset_index().to_csv(self.base_file_path + "audit_df_export_after_81.csv", index = False)
df.to_csv(csv_after_wrapping, index=False)
#audit_df['line_no'] = audit_df['line_no'].astype(float)
audit_df = self.update_audit_df(df, audit_df)
#####################################
## prepare for audit again
only_df = pd.DataFrame(df)
only_df.to_csv(self.base_file_path + "df_before_82.csv", index = False)
df = sf.prep_for_audit(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.prep_for_audit 82 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df.to_csv(csv_after_prep_for_audit_after_wrapping, index=False)
#####################################
sf.run_audit_on_identified(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.run_audit_on_identified 83 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df.to_csv(csv_after_audit2, index=False)
audit_df = self.update_audit_df(df, audit_df)
#####################################################
### run language specific audit on identified
if lang:
if lang.upper() == "ENGLISH":
df = sf_eng.run_audit_on_identified_english(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf_eng.run_audit_on_identified_english 84 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
audit_df = self.update_audit_df(df, audit_df)
####################################
sf.sa_output_to_docx(df, output_linewise_docx, output_template)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.sa_output_to_docx 85 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
sf.sa_output_to_txt(output_linewise_docx, output_linewise_txt)
# print("line no: 2017",str(output_linewise_txt))
output_docx_after_audit = self.base_file_path + "audited_text.docx"
pdf_audit_file_path = self.base_file_path + "audited_text.pdf"
sf.convert_txt_to_docx(output_linewise_txt,output_docx_after_audit)
print("converted to docx")
try:
# total_page_af = sf.countPages(output_docx_after_audit,pdf_audit_file_path,self.base_file_path)
sf.countPages(output_docx_after_audit,pdf_audit_file_path,self.base_file_path)
try:
total_page_af = sf.PdfCounter(pdf_audit_file_path)
print("total pages af = ", total_page_af)
print("hehehehehe")
except Exception as exp:
print(repr(exp))
print("try except total pages didnt work")
except Exception as exp:
print("total_page_af : ", exp)
print("the count of pageline start here")
line_count_after_audit = sf.count_the_line(str(output_linewise_txt))
count_before_txt = self.base_file_path + "temp.txt"
line_count_before_audit = sf.count_the_line(str(count_before_txt))
print("you are here")
output_docx_from_orginal_text = self.base_file_path + "original_text.docx"
pdf_file_path = self.base_file_path + "original_text.pdf"
print("b4 txt to docx")
sf.convert_txt_to_docx(count_before_txt,output_docx_from_orginal_text)
print("b4 page count of pdf")
print("hehe")
"""13-2-24"""
# try:
# total_page_bf = sf.countPages(output_docx_from_orginal_text,pdf_file_path,self.base_file_path)
# print(total_page_bf)
# except Exception as exp:
# print(" total page bf",total_page_bf )
print("temp txt converted to docx")
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.sa_output_to_txt 86 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
########################################
# sf.print_audit_report_docx(audit_df,audit_report_docx)
# headers = True
try:
print("In the total number of pages count")
file_model_objects = File.objects.filter(script=self.script_id)
audit_file_object = file_model_objects.get(type="script-csv")
read_df = pd.read_csv(audit_file_object.file)
print("csv fetched")
docx = sf.csv_to_docx(read_df)
audited_docx_path = self.base_file_path + "csv_to_docx_audited.docx"
# temp_file_stream = BytesIO()
print("docx saved")
docx.save(audited_docx_path)
# temp_file_stream.seek(0)
docx_file = ContentFile(
open(audited_docx_path, 'rb').read(),
"from_audited_csv_to_document.docx",
)
# docx_file = ContentFile(
# audited_docx_path.getvalue(),
# "from_audited_csv_to_document.docx",
# )
File.objects.create(
script=Script.objects.get(id=self.script_id),
type="script-docx",
file=docx_file,
)
print("script-docx object created")
converted_audit_pdf_file_path = self.base_file_path + "csv_to_docx_audited.pdf"
Final_pdf_page_count = sf.countPages(audited_docx_path,converted_audit_pdf_file_path,self.base_file_path)
print("total number of pdf pages")
print(int(Final_pdf_page_count))
pass
except Exception as e:
print("yje exception is")
print(e)
audit_df = self.update_audit_df_intro(df, audit_df)
audit_df = self.update_audit_df_appendix(df, audit_df)
audit_report_name = self.script_name + "_report.docx"
print("audit_df_tabular 1908\n\n",audit_df,"\n\n" )
copy_df = pd.DataFrame(audit_df)
copy_df.reset_index().to_csv(self.base_file_path + "audit_report_export.csv", index = False)
print("before print_report_tabular_docx")
script_ob = Script.objects.get(id=self.script_id)
screen_play_name = script_ob.screenplay.name
author_name = script_ob.screenplay.author
print(screen_play_name)
print(author_name)
print(line_count_before_audit)
print(line_count_after_audit)
# audit_report_buffer = sf.print_audit_report_tabular_docx(audit_df,line_count_before_audit,line_count_after_audit) #commented on 13-09-23
para_filetered_audut_df = sf.assign_para_no(audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER para_filetered_audut_df\n")
scriptname = str(screen_play_name) # to fetched by audit data
author = str(author_name) # to fetched by audit data
try:
pre_audit_pagenumber = int(self.total_page_bf)
except:
pre_audit_pagenumber = 1
try:
print("total_page_af = ", total_page_af )
postauditpagenumber = int(total_page_af)
except:
print("total_page_af 1")
postauditpagenumber = 1
try:
preaudit_line_no = int(line_count_before_audit)
except:
preaudit_line_no = 1
try:
postaudit_line_no = int(line_count_after_audit)
except:
postaudit_line_no = 1
try:
print("unique_script_languages",unique_script_languages)
script_language = str(unique_script_languages) # to be fetched by conversin function
except:
script_language = "---"
try:
print("unique_dialogue_languages",unique_dialogue_languages)
dialogue_language = str(unique_dialogue_languages) # to be fetched by conversin function
except:
dialogue_language = "---"
print("scriptname",scriptname)
print("author",author)
print("pre_audit_pagenumber",pre_audit_pagenumber)
print("postauditpagenumber",postauditpagenumber)
print("preaudit_line_no",preaudit_line_no)
print("postaudit_line_no",postaudit_line_no)
'''
additiona model information
'''
#self.audit_model_obj.number_of_pages = int(postauditpagenumber)
# time_per_page = 30
# base time = 120
# no_of_pages = 10
# formula of counting pages = (time_per_page + base time) * no_of_pages
try:
self.audit_model_obj.screenplay_language = script_language
self.audit_model_obj.dialogue_language = dialogue_language
self.audit_model_obj.number_of_pages = int(postauditpagenumber)
print("script language, dialogue language, post audit pagenumber is update to the audit models")
except:
print("page number and language insertion failed")
pass
audit_report_buffer = sf.print_audit_report_tabular_docx(para_filetered_audut_df,scriptname,author,pre_audit_pagenumber,postauditpagenumber,preaudit_line_no,postaudit_line_no,script_language,dialogue_language)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.print_audit_report_tabular_docx 87 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
# audit_report_path = self.base_file_path + "audit_report_doc.docx"
# report_data = Document(audit_report_buffer)
# report_data.save(audit_report_path)
req_file = ContentFile(audit_report_buffer.read(), audit_report_name)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.print_audit_report_tabular_docx 87 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n"+str(req_file))
print("req_file")
try:
script = Script.objects.get(id=self.script_id)
script.no_of_pages = int(Final_pdf_page_count)
script.save()
# user_id = script.screenplay.user.id
# Now, 'user_id' contains the user.id associated with the given script_id
except Exception as e:
print(e)
# Handle the case where the script with the given ID doesn't exist
# user_id = None
print("No_of_pages not insertd")
# try:
# update_juggernaut(user_id=user_id,service_name='audit',audit_pages = int(postauditpagenumber))
# except:
# print("the update_juggernaut didnt work")
# req_file = File.objects.get(script=self.script_id)
# req_file.type= "audit-report"
# req_file.file = file
# req_file.save()
File.objects.create(
script=Script.objects.get(id=self.script_id),
type="audit-report",
file=req_file,
)
try:
end_time_count = time.time()
total_duration = end_time_count - self.start_time_count
hours, remainder = divmod(total_duration, 3600)
minutes, seconds = divmod(remainder, 60)
text_time = f"Program ran for {str(hours)} hours, {str(minutes)} minutes, and {str(seconds)} seconds. for script_id= {str(self.script_id)} which has pdf pages of {pre_audit_pagenumber}."
print(str(text_time))
t_time_file = self.total_time_file + "/tail_errors.txt"
with open(t_time_file, "a") as file008:
file008.write(str(text_time) + "\n")
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("87 function complete \n")
except:
pass
return True
def script_meta(self):
pass
def audit_fdx(self):
# fdx to audited csv
para_df = pd.DataFrame()
para_df = sf.fdx_to_audited_df(self.input_script)
# save audited csv to file system
audited_file_name = self.script_name + ".csv"
req_file = ContentFile(
(para_df.to_csv(index=False, path_or_buf=None)).encode("utf-8"),
audited_file_name,
)
File.objects.create(
script=Script.objects.get(id=self.script_id),
type="script-csv",
file=req_file,
)
print("csv created")
try:
self.audit_model_obj.isfdx = True
self.audit_model_obj.save()
print("isfdx True saved")
except Exception as exp:
print(repr(exp))
language_check_df = sf.check_and_copy_rows(para_df)
try:
script_language, dialogue_language = sf.language_detector_for_csv(language_check_df)
print("script_language",script_language)
print("dialogue_language",dialogue_language)
unique_script_languages = ', '.join(set(lang[0] for lang in script_language))
unique_dialogue_languages = ', '.join(set(lang[0] for lang in dialogue_language))
print("langauage detection worked")
except Exception as exp:
print(repr(exp))
unique_script_languages = ""
unique_dialogue_languages = ""
print("Langauuge detectedion csv didnt work")
try:
self.audit_model_obj.screenplay_language = unique_script_languages
self.audit_model_obj.dialogue_language = unique_dialogue_languages
self.audit_model_obj.save()
print("audit lang saved")
except Exception as exp:
print(repr(exp))
self.audit_model_obj.screenplay_language = "ENGLISH"
self.audit_model_obj.dialogue_language = "ENGLISH"
print("audot lang didnt save")
# print("In the total number of pages count")
# file_model_objects = File.objects.filter(script=self.script_id)
# audit_file_object = file_model_objects.get(type="script-csv")
# read_df = pd.read_csv(audit_file_object.file)
# print("csv fetched")
try:
print(para_df)
docx = sf.csv_to_docx(para_df)
audited_docx_path = self.base_file_path + "csv_to_docx_audited.docx"
# temp_file_stream = BytesIO()
print("docx saved")
docx.save(audited_docx_path)
# temp_file_stream.seek(0)
docx_file = ContentFile(
open(audited_docx_path, 'rb').read(),
"from_audited_csv_to_document.docx",
)
File.objects.create(
script=Script.objects.get(id=self.script_id),
type="script-docx",
file=docx_file,
)
print("script-docx object created")
# output_docx_after_audit = self.base_file_path + "audited_text.docx"
pdf_audit_file_path = self.base_file_path + "csv_to_docx_audited.pdf"
print("converted to docx")
try:
# total_page_af = sf.countPages(output_docx_after_audit,pdf_audit_file_path,self.base_file_path)
sf.countPages(audited_docx_path,pdf_audit_file_path,self.base_file_path)
print("fdx : docx to pdf was create at", str(pdf_audit_file_path) )
try:
total_page_af = sf.PdfCounter(pdf_audit_file_path)
print("total pages af = ", total_page_af)
print("hehehehehe")
self.audit_model_obj.number_of_pages = int(total_page_af)
self.audit_model_obj.save()
except Exception as exp:
print(repr(exp))
print("try except total pages didnt work")
except Exception as exp:
print("fdx docx to pdf conversion didnt work")
print("total_page_af : ", exp)
except Exception as exp:
print("csv to docs didnt work")
print(repr(exp))
return True
def quick_audit(self, lang: str = None):
df, audit_df = self.before_audit(lang)
## get the indents count
count_green = 0
count_amber = 0
total_count = len(df)
all_indents = df["ssc"].value_counts()
print(all_indents)
all_indents = df["ssc"].value_counts().sort_index().reset_index()
# print(all_indents)
for index in all_indents.index:
# print(all_indents['index'][index])
if str(all_indents["index"][index]) in ("15", "25", "30", "35"):
count_green += all_indents["ssc"][index]
elif str(all_indents["index"][index]) in (
"0",
"14",
"16",
"24",
"26",
"29",
"31",
"34",
"36",
):
count_amber += all_indents["ssc"][index]
elif all_indents["index"][index] > 62:
count_amber += all_indents["ssc"][index]
print(all_indents["index"].tolist())
print(count_green, count_amber, total_count)
percent_good = ((count_green + count_amber) / total_count) * 100
if percent_good > 80:
print("most lines are within prescribed indents", percent_good)
quick_audit_flag = "pass"
else:
print("most lines are not within prescribed indents", percent_good)
quick_audit_flag = "fail"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("\nafter quick audit : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
print(quick_audit_flag)
def get_character_list(self, lang: str = None):
if not self.audit_model_obj.pre_audit_run:
df, _ = self.before_audit(lang)
else:
df_path = os.path.join(self.base_file_path, "space_between_words_removed.csv")
df = pd.read_csv(df_path)
df_indents = df[["line_no", "data", "ssc", "parenthetical"]]
df_indents.fillna("", inplace=True)
for index in df_indents.index:
data = df_indents["data"][index]
if df_indents["parenthetical"][index] == "PartMidEnd":
par_pos = re.search("\(", data).start()
df_indents["data"][index] = data[0:par_pos].strip()
df_indents["parenthetical"][index] = "Absent"
elif data.strip():
df_indents["data"][index] = data.strip()
df_indents = df_indents.loc[df_indents["parenthetical"] == "Absent", :]
df_indents["ssc"].value_counts().sort_index()
df_indents["ssc"].value_counts().sort_index().reset_index()
all_indents = df_indents["ssc"].value_counts().sort_index().reset_index()
if 35 in all_indents["index"].tolist():
if df_indents["ssc"].value_counts().sort_index()[35] > 3:
sp_indent = 35
else:
ps_sp_indents = df_indents.loc[
(df_indents["ssc"] >= 32) & (df_indents["ssc"] <= 40), :
]
if not ps_sp_indents.empty:
sp_indent = (
ps_sp_indents["ssc"]
.value_counts()
.sort_values(ascending=False)
.reset_index()["index"][0]
)
else:
sp_indent = 35
# sp_indent = df_indents['ssc'].value_counts().sort_index().reset_index().iloc[3]['index']
else:
ps_sp_indents = df_indents.loc[
(df_indents["ssc"] >= 32) & (df_indents["ssc"] <= 40), :
]
if not ps_sp_indents.empty:
sp_indent = (
ps_sp_indents["ssc"]
.value_counts()
.sort_values(ascending=False)
.reset_index()["index"][0]
)
else:
sp_indent = -1
# sp_indent = df_indents['ssc'].value_counts().sort_index().reset_index().iloc[3]['index']
# third_indents = df_indents['ssc'].value_counts().sort_index().reset_index().iloc[3]
try:
character_list = df_indents.loc[
df_indents["ssc"] == sp_indent, "data"
].unique()
except:
character_list = []
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("\nafter get_character_list : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
return character_list
def get_character_subset(self, character_list):
# if character_list is None:
# character_list = self.get_character_list()
audited_linewise_csv = os.path.join(self.base_file_path, "after_audit1.csv") # replaced by after_audit1.csv
foldername = "character_subset"
output_dir = os.path.join(self.base_file_path, foldername)
if not os.path.exists(output_dir):
os.mkdir(output_dir)
df_after_audit = pd.read_csv(audited_linewise_csv)
df_after_audit.fillna("", inplace=True)
df_after_audit["line_no"] = df_after_audit["line_no"].astype(int)
scenes = df_after_audit.loc[
(df_after_audit["Identification_Status"] == "ps1")
| (df_after_audit["Identification_Status"] == "ps2"),
["line_no", "data", "Identification_Status"],
]
scene_indexes = scenes.index
last_index = df_after_audit.index[-1]
character_scripts_dict = dict()
for character in character_list:
try:
print("processing character subset for", character)
except:
pass
output_subset_script_txt = os.path.join(
output_dir,
(self.script_name.rsplit(".", 1)[0] + "_" + str(character) + ".txt"),
)
output_subset_script_docx = os.path.join(
output_dir,
(self.script_name.rsplit(".", 1)[0] + "_" + str(character) + ".docx"),
)
i, j = 0, 1
character_in_scenes = []
character_lines = []
while j <= len(scene_indexes):
scene_no = i + 1
start = scene_indexes[i]
if j < len(scene_indexes):
end = scene_indexes[j]
else:
end = last_index + 1
for index in range(start, end):
data = df_after_audit["data"][index]
if re.search(character.upper(), data.strip()):
character_lines.append(start)
# print(scene_no,index,data)
character_in_scenes.append(scene_no)
character_lines.append(index)
rev_index = index - 1
rev_index_is = df_after_audit["Identification_Status"][
rev_index
]
character_lines.append(rev_index)
# pvs_data = df_after_audit['data'][rev_index-1]
# print(rev_index,pvs_data)
try:
rev_index_before_is = df_after_audit[
"Identification_Status"
][rev_index - 1]
except:
rev_index_before_is = ""
# while rev_index != start and rev_index_is != 'ps4' and rev_index_is != 'ps1' and rev_index_is != 'ps7' :
while (
rev_index != start
and rev_index_is != "ps4"
and rev_index_is != "ps1"
and not (
rev_index_is == "ps6" and rev_index_before_is == "blank"
)
):
rev_index = rev_index - 1
pvs_data = df_after_audit["data"][rev_index]
# print(rev_index,pvs_data)
character_lines.append(rev_index)
rev_index_is = df_after_audit["Identification_Status"][
rev_index
]
fwd_index = index
fwd_index_is = df_after_audit["Identification_Status"][
fwd_index
]
while fwd_index_is != "blank" and fwd_index != "ps15":
fwd_index = fwd_index + 1
character_lines.append(fwd_index)
fwd_index_is = df_after_audit["Identification_Status"][
fwd_index
]
i += 1
j += 1
character_in_scenes = list(set(character_in_scenes))
character_lines = list(set(character_lines))
print(character_lines)
character_lines.sort()
print(character_lines)
character_df = df_after_audit[df_after_audit.index.isin(character_lines)]
character_df.reset_index(drop=True, inplace=True)
character_df = sf.prep_for_audit(character_df)
# test_path = os.path.join(output_dir,os.path.splitext(input_filename)[0])+ '_' + str(character) + '_test1.csv'
# character_df.to_csv(test_path,index= False)
character_df = sf.run_audit_on_identified(character_df)
# test_path = os.path.join(output_dir,os.path.splitext(input_filename)[0])+ '_' + str(character) + '_test2.csv'
# character_df.to_csv(test_path,index= False)
ch_para_df = sf.merge_line_to_para(character_df)
# ch_para_df.to_csv(csv_parawise_status, index = False)
sf.sa_wrapped_output_to_docx(ch_para_df, output_subset_script_docx)
character_scripts_dict[character] = output_subset_script_docx
# sf.conv_docx_to_txt(output_subset_script_docx,output_subset_script_txt)
with open(output_subset_script_txt, "w", encoding="utf-8") as fout:
for index in character_lines:
print(df_after_audit["Identification_Status"][index])
try:
if str(df_after_audit["Identification_Status"][index]) == "ps1":
fout.writelines("\n")
except:
pass
data = df_after_audit["data"][index]
try:
print(data)
except:
pass
fout.writelines(str(data))
fout.writelines("\n")
try:
if (
df_after_audit["Identification_Status"][index] == "ps1"
or df_after_audit["Identification_Status"][index] == "ps3"
):
fout.writelines("\n")
except:
pass
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("\nafter get_character_subset : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
return character_scripts_dict
def audit_in_background(self):
# # commenting os.fork to make code run in foreground
# if os.fork() != 0:
# return
print("Running in background")
end_time = datetime.datetime.now()
try:
extension = self.input_script.rsplit(".", 1)[-1]
if extension == 'fdx':
self.audit_fdx()
else:
self.audit()
self.audit_model_obj.status = States.SUCCESS
self.audit_model_obj.save()
print("Audit Success!!!!!!!!!!!!!!!!!!!!!!!")
end_time = datetime.datetime.now()
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("\n\n****AUDITING IS SUCCESSFUL****\n")
print(end_time)
except Exception as exp:
self.audit_model_obj.status = States.FAILURE
self.audit_model_obj.results = exp
self.audit_model_obj.save()
print(end_time)
# def _audit(self):
# try:
# extension = self.input_script.rsplit(".", 1)[-1]
# if extension == 'fdx':
# self.audit_fdx()
# else:
# self.audit()
# self.audit_model_obj.status = States.SUCCESS
# self.audit_model_obj.save()
# print("Audit Success!!!!!!!!!!!!!!!!!!!!!!!")
# with open(self.base_file_path + "time_taken.txt", "a") as file007:
# file007.write("\n\n****AUDITING IS SUCCESSFUL****\n")
# except Exception as exp:
# self.audit_model_obj.status = States.FAILURE
# self.audit_model_obj.results = exp
# self.audit_model_obj.save()
# def audit_in_background(self):
# print("Running in background")
# async_task(self._audit)
# def audit_in_background(self):
# print("Running in background")
# async_task(self._audit)
# print("Task enqueued successfully")
# def _audit(self):
# try:
# extension = self.input_script.rsplit(".", 1)[-1]
# if extension == 'fdx':
# self.audit_fdx()
# else:
# self.audit()
# self.audit_model_obj.status = States.SUCCESS
# self.audit_model_obj.save()
# print("Audit Success!!!!!!!!!!!!!!!!!!!!!!!")
# with open(self.base_file_path + "time_taken.txt", "a") as file007:
# file007.write("\n\n****AUDITING IS SUCCESSFUL****\n")
# except Exception as exp:
# self.audit_model_obj.status = States.FAILURE
# self.audit_model_obj.results = exp
# self.audit_model_obj.save()
# print("Audit Failed:", exp)
if __name__ == "__main__":
naudit = NeutralAudit("123", True)
## audit run
# naudit.__call__()
## character subset check
# character_list = []
# character_list = naudit.get_character_list()
# try:
# print("characters of script are",character_list)
# except:
# pass
# naudit.quick_audit()
naudit.get_character_subset()