2734 lines
117 KiB
Python
Executable File
2734 lines
117 KiB
Python
Executable File
import os
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
import datetime
|
|
import pandas as pd
|
|
from django.core.files.base import ContentFile
|
|
import time
|
|
from centralisedFileSystem.models import File, Script, ScreenPlay
|
|
from scriptAudit import sa_functions as sf
|
|
from scriptAudit import sa_functions_english as sf_eng
|
|
from scriptAudit.models import ScriptAuditModel, States
|
|
from io import BytesIO
|
|
import datetime
|
|
import pytz
|
|
import subprocess
|
|
|
|
# from django_q.tasks import async_task
|
|
# from django_q.brokers import Broker
|
|
|
|
class NeutralAudit:
|
|
def __init__(
|
|
self,
|
|
script_id: str = None,
|
|
log: bool = True,
|
|
) -> None:
|
|
"""
|
|
To Audit a Script already uploded.
|
|
|
|
_________________________________________________________________
|
|
|
|
Parameters :
|
|
|
|
script_id : str -> Id of the script to be Audited
|
|
default = None
|
|
|
|
log : bool -> save logs in log.txt
|
|
default = False
|
|
|
|
_________________________________________________________________
|
|
|
|
Return :
|
|
None
|
|
"""
|
|
self.start_time_count = time.time()
|
|
print("<<<<<<<<<<<<<<<<<<<<<<<<<")
|
|
self.matrices_path = str(Path(__file__).resolve().parent) + "/matrices/"
|
|
|
|
self.total_time_file = str(Path(__file__).resolve().parent)
|
|
|
|
print(script_id,"SCRIPT-ID IS HERE|| AYYA")
|
|
self.script_id = script_id
|
|
audit_root_dir = (
|
|
str(Path(__file__).resolve().parent.parent) + "/media/audit_folder/"
|
|
)
|
|
|
|
self.script_name = str(self.script_id)
|
|
# self.total_line_before_audit = 1
|
|
output_dir = os.path.join(audit_root_dir, self.script_name)
|
|
t_time_file = self.total_time_file + "/tail_errors.txt"
|
|
# with open(t_time_file, "a") as file008:
|
|
# file008.write(str(self.start_time_count))
|
|
|
|
file_to_audit = File.objects.get(
|
|
script=script_id,
|
|
type="script-original",
|
|
)
|
|
self.input_script = file_to_audit.file.path
|
|
|
|
if not os.path.exists(output_dir):
|
|
try:
|
|
os.mkdir(output_dir)
|
|
except Exception as exp:
|
|
print(repr(exp))
|
|
subprocess.run(["mkdir", output_dir])
|
|
subprocess.run(["chmod", "777", output_dir])
|
|
##print(output_dir)
|
|
self.base_file_path = str(output_dir) + "/"
|
|
|
|
self.csv_removed_space_between_words = (
|
|
self.base_file_path + "space_between_words_removed.csv"
|
|
)
|
|
|
|
self.audit_report_csv = self.base_file_path + "audit_spreadsheet.csv"
|
|
|
|
sys.stdout = open(os.devnull, "w")
|
|
if log:
|
|
log_file = self.base_file_path + "_log.txt"
|
|
sys.stdout = open(log_file, "w", encoding="utf-8")
|
|
self.gen_int_files = True
|
|
else:
|
|
self.gen_int_files = False
|
|
sys.stdout = sys.__stdout__
|
|
|
|
|
|
|
|
self.audit_model_obj = ScriptAuditModel.objects.get(
|
|
script = Script.objects.get(
|
|
id = self.script_id,
|
|
)
|
|
)
|
|
|
|
time_file = self.base_file_path + "time_taken.txt"
|
|
start_time = datetime.datetime.now()
|
|
print(start_time)
|
|
with open(time_file, "a") as file007:
|
|
file007.write("started\n\n")
|
|
file007.write("started\n\n")
|
|
|
|
|
|
def __del__(self) -> None:
|
|
sys.stdout = sys.__stdout__
|
|
|
|
def update_audit_df(self, df, audit_df):
|
|
print("inside update audit df")
|
|
print(df.dtypes)
|
|
print(audit_df.dtypes)
|
|
lines_not_removed = audit_df.loc[audit_df["line_removed"] != "Yes"].index.to_list()
|
|
audit_df.sort_index(inplace=True)
|
|
|
|
# audit_df.reset_index().to_csv(audit_report_csv,index =False)
|
|
|
|
audit_df["audited_line_no"] = ""
|
|
audited_line_no = 1
|
|
|
|
for line in lines_not_removed:
|
|
new_data = ""
|
|
try:
|
|
new_data = df.loc[df["line_no"] == line, "data"].values[0]
|
|
except:
|
|
pass
|
|
# print(new_data)
|
|
try:
|
|
audit_df["Identification_Status"][line] = df.loc[
|
|
df["line_no"] == line, "Identification_Status"
|
|
].values[0]
|
|
|
|
except:
|
|
pass
|
|
|
|
audit_df["scene_number"][line] = df.loc[
|
|
df["line_no"] == line, "scene_number"
|
|
].values[0]
|
|
|
|
audit_df["data_corrected"][line] = new_data
|
|
audit_df["line_removed"][line] = "No"
|
|
audit_df["audited_line_no"][line] = audited_line_no
|
|
|
|
audited_line_no += 1
|
|
# print(audit_df.loc[audit_df['line_no'] == line, 'data_corrected'])
|
|
|
|
audit_df.reset_index().to_csv(self.audit_report_csv, index=False)
|
|
|
|
return audit_df
|
|
|
|
def update_audit_df_intro(self, df, audit_df):
|
|
print("update_audit_df_intro")
|
|
audit_df.reset_index(inplace=True, drop=True)
|
|
new_data = ""
|
|
for line in audit_df.index:
|
|
try:
|
|
print("line",line)
|
|
if audit_df["introduction"][line] == "Yes":
|
|
try:
|
|
new_data = df.loc[df["line_no"] == line, "data"].values[0]
|
|
except Exception as e:
|
|
print("Exception 174:",e)
|
|
pass
|
|
|
|
audit_df["data_corrected"][line] = new_data
|
|
except Exception as e:
|
|
print(e)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("\n\n update_audit_df_intro : complete \n")
|
|
return audit_df
|
|
|
|
def update_audit_df_appendix(self, df, audit_df):
|
|
new_data = ""
|
|
print(audit_df.index)
|
|
for line in audit_df.index:
|
|
if audit_df["appendix"][line] == "Yes":
|
|
try:
|
|
new_data = df.loc[df["line_no"] == line, "data"].values[0]
|
|
except:
|
|
pass
|
|
|
|
audit_df["data_corrected"][line] = new_data
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("\n\n update_audit_df_appendix : complete \n")
|
|
|
|
return audit_df
|
|
|
|
def update_is_identified(self, df):
|
|
print("Updating is Identified")
|
|
df["Identification_Status"].fillna("", inplace=True)
|
|
for index in df.index:
|
|
print(index,df["Identification_Status"][index])
|
|
try:
|
|
if df["Identification_Status"][index]:
|
|
line_pos = df["Identification_Status"][index].split(";")
|
|
pos_count = len(line_pos)
|
|
else:
|
|
pos_count = 0
|
|
except:
|
|
pos_count = 0
|
|
|
|
print(pos_count)
|
|
if pos_count == 1:
|
|
df["isIdentified"][index] = "Yes"
|
|
|
|
else:
|
|
df["isIdentified"][index] = "No"
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("\n\n Inside update_is_identified : complete \n")
|
|
return df
|
|
|
|
def before_audit(self, lang: str = None):
|
|
|
|
output_converted_txt = self.base_file_path + "temp.txt"
|
|
output_converted_docx = self.base_file_path + "temp.docx"
|
|
csv_for_pre_processing = self.base_file_path + "for_pre_processing.csv"
|
|
|
|
csv_for_processing = self.base_file_path + "for_processing.csv"
|
|
csv_prepped_for_audit = self.base_file_path + "prepped_for_audit.csv"
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("Inside before audit : ( 0-16 ) \n\n")
|
|
|
|
## convert pdf/docx to text
|
|
sf.conv_to_txt(
|
|
self.input_script,
|
|
output_converted_docx,
|
|
output_converted_txt
|
|
)
|
|
|
|
try:
|
|
output_docx_from_orginal_text = self.base_file_path + "original_text1.docx"
|
|
pdf_file_path = self.base_file_path + "original_text1.pdf"
|
|
print("b4 txt to docx")
|
|
|
|
sf.convert_txt_to_docx(output_converted_txt,output_docx_from_orginal_text)
|
|
print("IN THE BEGINING OF AUDIT PDF PAGES")
|
|
print("b4 page count of pdf")
|
|
# total_page_bf = sf.countPages(output_docx_from_orginal_text,pdf_file_path,self.base_file_path)
|
|
sf.countPages(output_docx_from_orginal_text,pdf_file_path,self.base_file_path)
|
|
|
|
print("temp txt converted to docx")
|
|
self.total_page_bf = str(1)
|
|
try:
|
|
print("int try pdf bf")
|
|
self.total_page_bf = sf.PdfCounter(pdf_file_path)
|
|
print("taotal_page_bf", str(self.total_page_bf))
|
|
except Exception as exp:
|
|
print(repr(exp))
|
|
print("page bf didnt work")
|
|
pass
|
|
# self.audit_model_obj.number_of_pages = int(total_page_bf)
|
|
time_per_page = 26
|
|
base_time = 120
|
|
no_of_pages = int(self.total_page_bf)
|
|
formula_of_counting_pages = (time_per_page * no_of_pages) + base_time
|
|
print("time required for auditing is :",formula_of_counting_pages)
|
|
extimated_time = round(formula_of_counting_pages / 60, 1)
|
|
print("extimated_time:",extimated_time)
|
|
print("Exstimated time is updated")
|
|
kolkata_time = datetime.datetime.now(pytz.timezone('Asia/Kolkata'))
|
|
print(kolkata_time)
|
|
thirty_mins_later = kolkata_time + datetime.timedelta(minutes=extimated_time)
|
|
formatted_time = thirty_mins_later.strftime("%B %d, %Y %I:%M %p")
|
|
self.audit_model_obj.expected_duration = formatted_time
|
|
print(formatted_time)
|
|
except:
|
|
pass
|
|
# self.total_line_before_audit = sf.count_the_line(output_converted_txt)
|
|
# print("total_line_before_audit :",total_line_before_audit)
|
|
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after sf.conv_to_text 1 : before audit\n")
|
|
|
|
## convert to df
|
|
sf.conv_to_csv(output_converted_txt, csv_for_pre_processing)
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after sf.conv_to_csv 2 : before audit\n")
|
|
|
|
df = pd.read_csv(csv_for_pre_processing, encoding="utf8")
|
|
## direct to df.. not working as expected
|
|
# df = pd.DataFrame()
|
|
# df = sf.conv_to_df(output_converted_txt)
|
|
# df.to_csv(csv_for_pre_processing,index=False)
|
|
print("before assign weights:")
|
|
print(df.dtypes)
|
|
df['preassigned_weights'] = ''
|
|
df = sf.pre_assign_wts(df)
|
|
print(df.dtypes)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after sf.pre_assign_wts 3 : before audit\n")
|
|
|
|
df = df.sort_index().reset_index(drop=True)
|
|
df.to_csv(csv_for_processing, index =False)
|
|
|
|
df["data"].fillna("", inplace=True)
|
|
|
|
## make df to track audit
|
|
|
|
audit_df = pd.DataFrame()
|
|
df_1st = pd.DataFrame(df)
|
|
df_1st.to_csv(self.base_file_path + "very_first_df_feed_to_create_audit_df.csv", index = False)
|
|
audit_df = sf.create_audit_df(df)
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after sf.create_audit_df 4 : before audit\n")
|
|
|
|
audit_df.reset_index().to_csv(self.audit_report_csv, index=False)
|
|
print(df.dtypes)
|
|
print(audit_df.dtypes)
|
|
audit_df.reset_index().to_csv(self.base_file_path + "very_first_audit_df_feed_to_create_audit_df.csv", index = False)
|
|
|
|
print("LANGUAGE IS",lang)
|
|
## trim intro
|
|
if lang:
|
|
if lang.upper() == "ENGLISH":
|
|
sf_eng.trim_intro_english(df, audit_df)
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after sf_eng.trim_intro_english (5) : before audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
else:
|
|
sf_eng.trim_intro_english(df, audit_df)
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after sf_eng.trim_intro_english (6) : before audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
|
|
else:
|
|
# sf.trim_intro(df,audit_df)
|
|
sf_eng.trim_intro_english(df, audit_df)
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after sf_eng.trim_intro_english (7) : before audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
|
|
lines_not_removed = audit_df.loc[
|
|
audit_df["line_removed"] != "Yes"
|
|
].index.to_list()
|
|
print(lines_not_removed)
|
|
df = df.loc[df["line_no"].isin(lines_not_removed), :]
|
|
df = df.sort_index().reset_index(drop=True)
|
|
# df = df.reset_index()
|
|
|
|
audit_df.reset_index().to_csv(self.audit_report_csv, index=False)
|
|
|
|
print("Trimming Appendix")
|
|
## trim appendix
|
|
if lang:
|
|
if lang.upper() == "ENGLISH":
|
|
sf_eng.trim_appendix_english(df, audit_df)
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after sf_eng.trim_appendix_english 8: before audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
else:
|
|
sf_eng.trim_appendix_english(df, audit_df)
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after sf_eng.trim_appendix_english 9: before audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
else:
|
|
sf_eng.trim_appendix_english(df, audit_df)
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after sf_eng.trim_appendix_english 10 : before audit\n")
|
|
|
|
|
|
df = self.update_is_identified(df)
|
|
|
|
## remove page numbers
|
|
sf.remove_page_numbers(df, audit_df)
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after sf.remove_page_numbers 11 : before audit\n")
|
|
|
|
|
|
lines_not_removed = audit_df.loc[
|
|
audit_df["line_removed"] != "Yes"
|
|
].index.to_list()
|
|
print(lines_not_removed)
|
|
df = df.loc[df["line_no"].isin(lines_not_removed), :]
|
|
df = df.sort_index().reset_index(drop=True)
|
|
# df = df.reset_index()
|
|
|
|
audit_df.reset_index().to_csv(self.audit_report_csv, index=False)
|
|
|
|
## prepare for audit
|
|
df = sf.prep_for_audit(df)
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after sf.prep_for_audit 12 : before audit\n")
|
|
|
|
# sf.prep_for_audit(df)
|
|
df.to_csv(csv_prepped_for_audit, index=False)
|
|
|
|
## remove extra blank lines
|
|
sf.remove_extra_blank_lines(df, audit_df)
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after sf.remove_extra_blank_lines 13 : before audit\n")
|
|
|
|
lines_not_removed = audit_df.loc[
|
|
audit_df["line_removed"] != "Yes"
|
|
].index.to_list()
|
|
print(lines_not_removed)
|
|
df = df.loc[df["line_no"].isin(lines_not_removed), :]
|
|
df = df.sort_index().reset_index(drop=True)
|
|
|
|
###
|
|
sf.remove_blank_line_after_parenthetical(df, audit_df)
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after sf.remove_blank_line_after_parenthetical 14 : before audit\n")
|
|
|
|
lines_not_removed = audit_df.loc[
|
|
audit_df["line_removed"] != "Yes"
|
|
].index.to_list()
|
|
print(lines_not_removed)
|
|
df = df.loc[df["line_no"].isin(lines_not_removed), :]
|
|
df = df.sort_index().reset_index(drop=True)
|
|
|
|
##
|
|
sf.merge_broken_lines(df, audit_df)
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after sf.merge_broken_lines 15 : before audit\n")
|
|
|
|
lines_not_removed = audit_df.loc[
|
|
audit_df["line_removed"] != "Yes"
|
|
].index.to_list()
|
|
|
|
df = df.loc[df["line_no"].isin(lines_not_removed), :]
|
|
df = df.sort_index().reset_index(drop=True)
|
|
|
|
###df.to_csv(csv_after_merge, index = False)
|
|
|
|
##
|
|
sf.remove_space_between_words(df, audit_df)
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after sf.remove_space_between_words 16 : before audit\n")
|
|
|
|
df.to_csv(self.csv_removed_space_between_words, index=False)
|
|
|
|
print("updating audit df")
|
|
df1 = pd.DataFrame(df)
|
|
df1.to_csv(self.base_file_path + "first_df.csv", index = False)
|
|
audit_df1 = pd.DataFrame(audit_df)
|
|
audit_df1.to_csv(self.base_file_path + "first_audit_df.csv", index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
# audit_model_obj = ScriptAuditModel.objects.get(
|
|
# script = Script.objects.get(
|
|
# id = self.script_id,
|
|
# )
|
|
# )
|
|
try:
|
|
audit_model_obj = ScriptAuditModel.objects.get(
|
|
script = Script.objects.get(
|
|
id = self.script_id,
|
|
)
|
|
)
|
|
audit_model_obj.pre_audit_run = True
|
|
audit_model_obj.save()
|
|
print("TRY")
|
|
except Exception as exp:
|
|
print(repr(exp))
|
|
print("EXCEPT")
|
|
self.audit_model_obj.pre_audit_run = True
|
|
self.audit_model_obj.save()
|
|
print("PRE AUDIT DONE")
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("before audit complete : PRE AUDIT DONE\n\n")
|
|
|
|
return df, audit_df
|
|
|
|
def audit(self, lang: str = None) -> None:
|
|
"""
|
|
Run Audit on NeutralAudit object.
|
|
|
|
_________________________________________________________________
|
|
|
|
Parameters :
|
|
|
|
lang : str -> language of the provided script.
|
|
default = None (language nuteral rules)
|
|
|
|
_________________________________________________________________
|
|
|
|
Return :
|
|
|
|
None
|
|
|
|
_________________________________________________________________
|
|
"""
|
|
|
|
# ---------------------------changes to save _audited.csv in media/scriptpage/script/folder
|
|
# csv_parawise_status = self.audited_script_path
|
|
# ---------------------------changes to save _audited.csv in media/scriptpage/script/folder
|
|
|
|
print("<<<<<<<<<<<<<<<<<<<<<<<<<")
|
|
print("<<<<<<<<<<<<<<<<<<<<<<<<<",self.base_file_path)
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("INSIDE AUDIT (1-87): audit\n\n")
|
|
|
|
csv_after_first_strict_conditions = (
|
|
self.base_file_path
|
|
+ "after_first_strict_conditions.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after_first_strict_conditions.csv 1 : audit\n")
|
|
|
|
csv_after_gen_and_sort_weights = (
|
|
self.base_file_path
|
|
+ "after_gen_and_sort_weights.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after_gen_and_sort_weights.csv 2 : audit\n")
|
|
|
|
csv_after_examined_speaker_pos = (
|
|
self.base_file_path
|
|
+ "after_examined_speaker_pos.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after_examined_speaker_pos.csv 3 : audit\n")
|
|
|
|
csv_after_examined_speaker_next_lines= (
|
|
self.base_file_path
|
|
+ "after_examined_speaker_next_lines.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after_examined_speaker_next_lines.csv 4 : audit\n")
|
|
|
|
csv_after_pnnbl_ineligible= (
|
|
self.base_file_path
|
|
+ "after_pnnbl_ineligible1.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after_pnnbl_ineligible1 (5) : audit\n")
|
|
|
|
csv_after_examine_same_content_lines= (
|
|
self.base_file_path
|
|
+ "after_examine_same_content_lines.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after_examine_same_content_lines (6) : audit\n")
|
|
|
|
csv_after_examined_action_pos_part1 = (
|
|
self.base_file_path
|
|
+ "_after_examined_action_pos_part1.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_examined_action_pos_part1.csv (7) : audit\n")
|
|
|
|
csv_after_pnnbl_inelgible_after_action_pos_part1=(
|
|
self.base_file_path
|
|
+ "_after_pnnbl_inelgible_after_action_pos_part1.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_pnnbl_inelgible_after_action_pos_part1.csv (8) : audit\n")
|
|
|
|
csv_after_examined_action_pos_part2 = (
|
|
self.base_file_path
|
|
+ "_after_examined_action_pos_part2.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_examined_action_pos_part2.csv (9) : audit\n")
|
|
|
|
|
|
csv_after_pnnbl_inelgible_after_action_pos_part2 = (
|
|
self.base_file_path
|
|
+ "_after_pnnbl_inelgible_after_action_pos_part2.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_pnnbl_inelgible_after_action_pos_part2.csv (10) : audit\n")
|
|
|
|
csv_after_examined_same_indent_bunch = (
|
|
self.base_file_path
|
|
+ "_after_examined_same_indent_bunch.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_examined_same_indent_bunch.csv (11) : audit\n")
|
|
|
|
|
|
csv_after_pnnbl_inelgible_after_same_indent = (
|
|
self.base_file_path
|
|
+ "_after_pnnbl_inelgible_after_same_indent.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_pnnbl_inelgible_after_same_indent.csv (12) : audit\n")
|
|
|
|
csv_after_examined_relative_indent_bunch = (
|
|
self.base_file_path
|
|
+ "_after_examined_relative_indent_bunch.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_examined_relative_indent_bunch.csv (13) : audit\n")
|
|
|
|
csv_after_examined_speaker_next_lines_after_relative_indent = (
|
|
self.base_file_path
|
|
+ "_after_examined_speaker_next_lines_after_relative_indent.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_examined_speaker_next_lines_after_relative_indent.csv (14) : audit\n")
|
|
|
|
|
|
|
|
|
|
csv_after_pnnbl_inelgible_after_relative_indent = (
|
|
self.base_file_path
|
|
+ "after_pnnbl_inelgible_after_relative_indent_bunch.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after_pnnbl_inelgible_after_relative_indent_bunch.csv (15) : audit\n")
|
|
|
|
|
|
csv_examined_speaker_using_indent = (
|
|
self.base_file_path
|
|
+ "after_examined_speaker_using_indent.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after_examined_speaker_using_indent.csv (16) : audit\n")
|
|
|
|
|
|
csv_after_examined_speaker_next_lines_after_pos_sp_indent = (
|
|
self.base_file_path
|
|
+ "_after_examined_speaker_next_lines_after_pos_sp_indent.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_examined_speaker_next_lines_after_pos_sp_indent.csv (17) : audit\n")
|
|
|
|
csv_after_pnnbl_inelgible_after_pos_sp_indent = (
|
|
self.base_file_path
|
|
+ "_after_pnnbl_inelgible_after_pos_sp_indent.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_pnnbl_inelgible_after_pos_sp_indent.csv (18) : audit\n")
|
|
|
|
|
|
csv_examined_speaker_extension = (
|
|
self.base_file_path
|
|
+ "_after_examined_speaker_extension.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_examined_speaker_extension.csv (19) : audit\n")
|
|
|
|
csv_after_examined_speaker_next_lines_after_speaker_extension = (
|
|
self.base_file_path
|
|
+ "_after_examined_speaker_next_lines_after_speaker_extension.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_examined_speaker_next_lines_after_speaker_extension.csv(20) : audit\n")
|
|
|
|
|
|
csv_after_pnnbl_inelgible_after_speaker_extension = (
|
|
self.base_file_path
|
|
+ "_after_pnnbl_inelgible_after_speaker_extension.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_pnnbl_inelgible_after_speaker_extension.csv (21) : audit\n")
|
|
|
|
csv_after_examined_action_using_top2 = (
|
|
self.base_file_path
|
|
+ "_after_examined_action_using_top2.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_examined_action_using_top2.csv (22) : audit\n")
|
|
|
|
csv_after_pnnbl_inelgible_after_action_using_top_pnnbl = (
|
|
self.base_file_path
|
|
+ "_after_pnnbl_inelgible_after_action_using_top_pnnbl.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_pnnbl_inelgible_after_action_using_top_pnnbl.csv (23) : audit\n")
|
|
|
|
csv_after_refined_action = (
|
|
self.base_file_path
|
|
+ "_after_refined_action.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_refined_action.csv (24) : audit\n")
|
|
|
|
|
|
csv_after_pnnbl_inelgible_after_refined_action = (
|
|
self.base_file_path
|
|
+ "_after_pnnbl_inelgible_after_refined_action.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_pnnbl_inelgible_after_refined_action.csv (25) : audit\n")
|
|
|
|
csv_after_eligibility_using_identified_pnnbl = (
|
|
self.base_file_path
|
|
+ "_after_eligibility_using_identified_pnnbl.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_eligibility_using_identified_pnnbl.csv (26) : audit\n")
|
|
|
|
csv_after_top_identification_part1 = (
|
|
self.base_file_path
|
|
+ "_after_top_identification_part1.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_top_identification_part1.csv (27) : audit\n")
|
|
|
|
|
|
csv_after_eligibility_using_identified_pnnbl_after_examine_sp_next_among_two = (
|
|
self.base_file_path
|
|
+ "after_eligibility_using_identified_pnnbl_after_examine_sp_next_among_two.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after_eligibility_using_identified_pnnbl_after_examine_sp_next_among_two.csv (28) : audit\n")
|
|
|
|
csv_after_examined_speaker_pos_after_top1 = (
|
|
self.base_file_path + "_after_examined_speaker_pos_after_top1.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_examined_speaker_pos_after_top1.csv (29) : audit\n")
|
|
|
|
|
|
csv_after_examined_speaker_next_lines_after_top1 = (
|
|
self.base_file_path + "after_examined_speaker_next_lines_after_top1.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after_examined_speaker_next_lines_after_top1.csv (30) : audit\n")
|
|
|
|
csv_after_eligibility_using_identified_pnnbl_after_top_part1 = (
|
|
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part1.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part1.csv (31) : audit\n")
|
|
|
|
|
|
csv_after_examine_speaker_mix_part1 = (
|
|
self.base_file_path + "_after_examine_speaker_mix_part1.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_examine_speaker_mix_part1.csv (32) : audit\n")
|
|
|
|
csv_after_eligibility_using_identified_pnnbl_after_speaker_mix_part1 = (
|
|
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_speaker_mix_part1.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_eligibility_using_identified_pnnbl_after_speaker_mix_part1.csv (33) : audit\n")
|
|
|
|
csv_after_examine_speaker_mix_part2 = (
|
|
self.base_file_path + "_after_examine_speaker_mix_part2.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_examine_speaker_mix_part2.csv (34) : audit\n")
|
|
|
|
|
|
csv_after_examined_speaker_pos_after_mix = (
|
|
self.base_file_path + "_after_examined_speaker_pos_after_mix.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_examined_speaker_pos_after_mix.csv(35) : audit\n")
|
|
|
|
csv_after_examined_speaker_next_lines_after_mix = (
|
|
self.base_file_path + "_after_examined_speaker_next_lines_after_mix.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_examined_speaker_next_lines_after_mix.csv (36) : audit\n")
|
|
|
|
|
|
csv_after_pnnbl_ineligible_after_mix = (
|
|
self.base_file_path + "_after_pnnbl_ineligible_after_mix.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_pnnbl_ineligible_after_mix.csv (37) : audit\n")
|
|
|
|
|
|
csv_after_top_identification_part2 = (
|
|
self.base_file_path + "_after_top_identification_part2.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_top_identification_part2.csv (38) : audit\n")
|
|
|
|
csv_after_eligibility_using_identified_pnnbl_after_top_part2 = (
|
|
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part2.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part2.csv (39) : audit\n")
|
|
|
|
csv_after_top_identification_part2_again = (
|
|
self.base_file_path + "_after_top_identification_part2_again.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_top_identification_part2_again.csv (40) : audit\n")
|
|
|
|
csv_after_eligibility_using_identified_pnnbl_after_top_part2_again = (
|
|
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part2_again.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part2_again.csv (41) : audit\n")
|
|
|
|
csv_after_top_identification_part2_again_again = (
|
|
self.base_file_path + "_after_top_identification_part2_again_again.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_top_identification_part2_again_again.csv(42) : audit\n")
|
|
|
|
|
|
csv_after_eligibility_using_identified_pnnbl_after_top_part2_again_again = (
|
|
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part2_again_again.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part2_again_again.csv (43) : audit\n")
|
|
|
|
csv_after_slug_identification = (
|
|
self.base_file_path + "_after_slug_identification.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_slug_identification.csv (44) : audit\n")
|
|
|
|
csv_after_eligibility_using_identified_pnnbl_after_slug_identification = (
|
|
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_slug_identification.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_eligibility_using_identified_pnnbl_after_slug_identification.csv (45) : audit\n")
|
|
|
|
csv_after_top_identification_part1_again = (
|
|
self.base_file_path + "_after_top_identification_part1_again.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_top_identification_part1_again.csv (46) : audit\n")
|
|
|
|
csv_after_eligibility_using_identified_pnnbl_after_top_part1_again = (
|
|
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part1_again.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part1_again.csv (47) : audit\n")
|
|
|
|
csv_after_top_identification_part3 = (
|
|
self.base_file_path + "_after_top_identification_part3.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_top_identification_part3.csv (48) : audit\n")
|
|
|
|
csv_after_eligibility_using_identified_pnnbl_after_top_part3 = (
|
|
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part3.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part3.csv (49) : audit\n")
|
|
|
|
csv_after_top_identification_part4 = (
|
|
self.base_file_path + "_after_top_identification_part4.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_top_identification_part4.csv (50) : audit\n")
|
|
|
|
csv_after_eligibility_using_identified_pnnbl_after_top_part4 = (
|
|
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part4.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part4.csv (51) : audit\n")
|
|
|
|
csv_after_top_identification_part5 = (
|
|
self.base_file_path + "_after_top_identification_part5.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_top_identification_part5.csv (52) : audit\n")
|
|
|
|
csv_after_eligibility_using_identified_pnnbl_after_top_part5 = (
|
|
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part5.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part5.csv (53) : audit\n")
|
|
|
|
csv_after_top_identification_part6 = (
|
|
self.base_file_path + "_after_top_identification_part6.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_top_identification_part6.csv (54) : audit\n")
|
|
|
|
|
|
csv_after_eligibility_using_identified_pnnbl_after_top_part6 = (
|
|
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part6.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part6.csv (55) : audit\n")
|
|
|
|
csv_after_top_identification_part7 = (
|
|
self.base_file_path + "_after_top_identification_part7.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_top_identification_part7.csv (56) : audit\n")
|
|
|
|
csv_after_eligibility_using_identified_pnnbl_after_top_part7 = (
|
|
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part7.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part7.csv (57) : audit\n")
|
|
|
|
csv_after_top_identification_part8 = (
|
|
self.base_file_path + "_after_top_identification_part8.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_top_identification_part8.csv (58) : audit\n")
|
|
|
|
csv_after_eligibility_using_identified_pnnbl_after_top_part8 = (
|
|
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part8.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part8.csv (59) : audit\n")
|
|
|
|
csv_after_examine_among_two = (
|
|
self.base_file_path + "_after_examine_among_two.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_examine_among_two.csv (60) : audit\n")
|
|
|
|
csv_after_eligibility_using_identified_pnnbl_after_examine_among_two = (
|
|
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_examine_among_two.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after_eligibility_using_identified_pnnbl_after_examine_among_two.csv (61) : audit\n")
|
|
|
|
csv_after_examine_speaker_next_line_after_among_two = (
|
|
self.base_file_path + "_after_examine_speaker_next_line_after_among_two.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_examine_speaker_next_line_after_among_two.csv (62) : audit\n")
|
|
|
|
csv_after_top2_wt_diff = (
|
|
self.base_file_path + "_after_top2_wt_diff.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_top2_wt_diff.csv (63) : audit\n")
|
|
|
|
csv_after_eligibility_using_identified_pnnbl_after_top2_wt_diff = (
|
|
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top2_wt_diff.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_eligibility_using_identified_pnnbl_after_top2_wt_diff.csv (64) : audit\n")
|
|
|
|
|
|
|
|
csv_after_top2_wt_diff_again = (
|
|
self.base_file_path + "_after_top2_wt_diff_again.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_top2_wt_diff_again.csv (65) : audit\n")
|
|
|
|
csv_after_eligibility_using_identified_pnnbl_after_top2_wt_diff_again = (
|
|
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top2_wt_diff_again.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_eligibility_using_identified_pnnbl_after_top2_wt_diff_again.csv(66) : audit\n")
|
|
|
|
|
|
csv_after_top_identification_part1_diluted = (
|
|
self.base_file_path + "_after_top_identification_part1_diluted.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_top_identification_part1_diluted.csv (67) : audit\n")
|
|
|
|
csv_after_eligibility_using_identified_pnnbl_after_top_part1_diluted = (
|
|
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part1_diluted.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part1_diluted.csv (68) : audit\n")
|
|
|
|
|
|
#1.2
|
|
csv_after_examine_dial_between_action = (
|
|
self.base_file_path + "_after_examine_dial_between_action.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_examine_dial_between_action.csv (69) : audit\n")
|
|
|
|
|
|
csv_after_eligibility_using_identified_pnnbl_after_examine_dial_between_action = (
|
|
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_examine_dial_between_action.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_eligibility_using_identified_pnnbl_after_examine_dial_between_action.csv (70) : audit\n")
|
|
|
|
|
|
|
|
|
|
csv_after_examine_among_two_again = (
|
|
self.base_file_path + "_after_examine_among_two_again.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_examine_among_two_again.csv (71) : audit\n")
|
|
|
|
|
|
csv_after_eligibility_using_identified_pnnbl_after_examine_among_two_again = (
|
|
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_examine_among_two_again.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("_after_eligibility_using_identified_pnnbl_after_examine_among_two_again.csv (72) : audit\n")
|
|
|
|
|
|
|
|
|
|
csv_after_identify_remaining_as_top = (
|
|
self.base_file_path + "after_identifying_remaining_as_top.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after_identifying_remaining_as_top.csv (73) : audit\n")
|
|
|
|
|
|
csv_after_prep_for_audit_after_identification = (
|
|
self.base_file_path + "after_prep_for_audit_after_identification.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after_prep_for_audit_after_identification.csv (74) : audit\n")
|
|
|
|
|
|
csv_after_audit1 = self.base_file_path + "after_audit1.csv"
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after_audit1.csv (75) : audit\n")
|
|
|
|
csv_after_wrapping = self.base_file_path + "after_wrapping.csv"
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after_wrapping.csv (76) : audit\n")
|
|
|
|
csv_after_prep_for_audit_after_wrapping = (
|
|
self.base_file_path + "after_prep_for_audit_after_wrapping.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after_prep_for_audit_after_wrapping.csv (77) : audit\n")
|
|
|
|
|
|
csv_after_audit2 = self.base_file_path + "after_audit2.csv"
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("after_audit2.csv (78) : audit\n")
|
|
|
|
|
|
output_linewise_docx = self.base_file_path + "audited_linewise.docx"
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("audited_linewise.docx (79) : audit\n")
|
|
|
|
output_linewise_txt = self.base_file_path + "audited_linewise.txt"
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("audited_linewise.txt (80) : audit\n")
|
|
|
|
audit_report_tabular_docx = self.base_file_path + "audit_report_tabular.docx"
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("audit_report_tabular.docx (81) : audit\n")
|
|
|
|
|
|
csv_strict_conditions = self.matrices_path + "strict_conditions_230623.csv"
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("strict_conditions_230623.csv : audit\n")
|
|
|
|
csv_pos_weights = self.matrices_path + "PS_Weights_250623_2.csv"
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("PS_Weights_250623_2.csv (83) : audit\n")
|
|
|
|
csv_pnbl_nnbl = self.matrices_path + "pnbl_nnbl.csv"
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("pnbl_nnbl.csv (84) : audit\n")
|
|
|
|
pnbl_eligibility_matrix = (
|
|
self.matrices_path + "pnbl_eligibility_matrix_250623.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("pnbl_eligibility_matrix_250623.csv (85) : audit\n")
|
|
|
|
nnbl_eligibility_matrix = (
|
|
self.matrices_path + "nnbl_eligibility_matrix_250623.csv"
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("nnbl_eligibility_matrix_250623.csv (86) : audit\n")
|
|
|
|
output_template = self.matrices_path + "ScriptTemplate5.docx"
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("ScriptTemplate5.docx (87) : audit\n")
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AUDIT : audit\n\n")
|
|
|
|
df, audit_df = self.before_audit(lang)
|
|
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER ASSIGNING LOCATIONS AUDIT : audit\n\n")
|
|
|
|
#######################################
|
|
|
|
|
|
|
|
sf.test_strict_conditions(df, csv_strict_conditions)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.test_strict_conditions 1 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_first_strict_conditions, index = False)
|
|
|
|
## gen weights for possibilties ## add preassigned weights
|
|
df = sf.gen_pos_weights(df, csv_pos_weights)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.gen_pos_weights 2 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
|
|
|
|
|
|
## language specific weights update
|
|
if lang:
|
|
if lang.upper() == "ENGLISH":
|
|
df = sf_eng.update_pos_wts_english(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf_eng.update_pos_wts_english 3 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
|
|
df = sf.sort_pos_decr_wts(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.sort_pos_decr_wts 4 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
#if self.gen_int_files:
|
|
df.to_csv(csv_after_gen_and_sort_weights, index = False)
|
|
##
|
|
sf.prep_for_pos_elimination(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.prep_for_pos_elimination 5 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df_bex1 = pd.DataFrame(df)
|
|
df_bex1.to_csv(self.base_file_path + "df_update_audit_df_b_exam_speaker_1.csv", index = False)
|
|
audit_df_bex1 = pd.DataFrame(audit_df)
|
|
audit_df_bex1.to_csv(self.base_file_path + "audit_df_update_audit_df_b_exam_speaker_1.csv", index = False)
|
|
|
|
## examine speaker possibilties
|
|
df = sf.examine_speaker_pos(df, audit_df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.examine_speaker_pos 6 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
df_ex1 = pd.DataFrame(df)
|
|
df_ex1.to_csv(self.base_file_path + "df_update_audit_df_exam_speaker_1.csv", index = False)
|
|
audit_df_ex1 = pd.DataFrame(audit_df)
|
|
audit_df_ex1.to_csv(self.base_file_path + "audit_df_update_audit_df_exam_speaker_1.csv", index = True)
|
|
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_examined_speaker_pos, index = False)
|
|
print("printing info based on audit_df")
|
|
# df_b1 = pd.DataFrame(df)
|
|
# df_b1.to_csv(self.base_file_path + "df_update_audit_df_b1.csv", index = False)
|
|
print(audit_df.head(10),audit_df.dtypes)
|
|
try:
|
|
audit_df = audit_df.sort_values('audited_line_no')
|
|
except:
|
|
audit_df['audited_line_no'] = pd.to_numeric(audit_df['audited_line_no'], errors='coerce')
|
|
audit_df = audit_df.sort_values('audited_line_no')
|
|
audit_df_try1 = pd.DataFrame(audit_df)
|
|
audit_df_try1.to_csv(self.base_file_path + "audit_df_update_audit_df_try1.csv", index = True)
|
|
print(audit_df.head())
|
|
try:
|
|
audit_df = pd.merge(audit_df, df[['line_no']], on=audit_df.index, how='left')
|
|
print(audit_df.head())
|
|
# Set 'line_no' as index
|
|
audit_df.set_index('line_no', inplace=True)
|
|
print(audit_df.head())
|
|
audit_df_try2 = pd.DataFrame(audit_df)
|
|
audit_df_try2.to_csv(self.base_file_path + "audit_df_update_audit_df_try2.csv", index = True)
|
|
except Exception as e:
|
|
print(e, audit_df.head())
|
|
pass
|
|
# try:
|
|
# audit_df.reset_index(drop=True, inplace=True)
|
|
# audit_df.set_index('line_no',inplace=True)
|
|
# except Exception as e:
|
|
# print(e)
|
|
print(audit_df.head())
|
|
print(audit_df.dtypes)
|
|
|
|
audit_df_b1 = pd.DataFrame(audit_df)
|
|
audit_df_b1.to_csv(self.base_file_path + "audit_df_update_audit_df_b1.csv", index = True)
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
df_1 = pd.DataFrame(df)
|
|
df_1.to_csv(self.base_file_path + "df_update_audit_df_1.csv", index = True)
|
|
audit_df_1 = pd.DataFrame(audit_df)
|
|
audit_df_1.to_csv(self.base_file_path + "audit_df_update_audit_df_1.csv", index = True)
|
|
###
|
|
df = sf.examine_speaker_next_lines(df, audit_df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.examine_speaker_next_lines 7 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_examined_speaker_next_lines, index = False)
|
|
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
audit_df_u7 = pd.DataFrame(audit_df)
|
|
audit_df_u7.to_csv(self.base_file_path + "audit_df_update_audit_df_7.csv", index = True)
|
|
## do while pnnbl ineligible
|
|
sf.prep_pnnbl_wts(csv_pnbl_nnbl, self.matrices_path)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.prep_pnnbl_wts 8 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = sf.do_while_pnnbl_ineligible(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_pnnbl_ineligible 9 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_pnnbl_ineligible, index = False)
|
|
|
|
|
|
## examine same content
|
|
df = sf.examine_same_content_lines(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.examine_same_content_lines 10 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_examine_same_content_lines, index = False)
|
|
|
|
|
|
### examine speaker next again
|
|
df = sf.examine_speaker_next_lines(df, audit_df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.examine_speaker_next_lines 11 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
###df.to_csv(csv_after_examined_speaker_next_lines_after_same_content, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
## do while pnnbl ineligible
|
|
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
|
|
df = sf.do_while_pnnbl_ineligible(df)
|
|
df = self.update_is_identified(df)
|
|
###df.to_csv(csv_after_pnnbl_ineligible_after_same_content, index = False)
|
|
|
|
################
|
|
df = sf.examine_action_possibilities_part1(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.examine_action_possibilities_part1 12 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_examined_action_pos_part1, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
## do while pnnbl ineligible
|
|
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
|
|
df = sf.do_while_pnnbl_ineligible(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_pnnbl_ineligible 13 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_pnnbl_inelgible_after_action_pos_part1, index = False)
|
|
|
|
################
|
|
df = sf.examine_action_possibilities_part2(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.examine_action_possibilities_part2 14 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_examined_action_pos_part2, index = False)
|
|
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
## do while pnnbl ineligible
|
|
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
|
|
df = sf.do_while_pnnbl_ineligible(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_pnnbl_ineligible 15 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_pnnbl_inelgible_after_action_pos_part2, index = False)
|
|
|
|
################
|
|
df = sf.examine_same_indent_bunch(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.examine_same_indent_bunch 16 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_examined_same_indent_bunch, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
## do while pnnbl ineligible
|
|
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
|
|
df = sf.do_while_pnnbl_ineligible(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_pnnbl_ineligible 17 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_pnnbl_inelgible_after_same_indent, index = False)
|
|
|
|
#####################
|
|
##for reorganisation
|
|
|
|
# df = pd.read_csv('Script_Shatranj_pnnbl_ineligible_same_indent_bunch_new_col_2.csv')
|
|
# csv_for_pos_elimination = os.path.join(self.output_dir,os.path.splitext(self.script_name)[0])+'_for_pos_elimination.csv'
|
|
|
|
#########################
|
|
|
|
df = sf.examine_relative_indent(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.examine_relative_indent 18 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_examined_relative_indent_bunch, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
df = sf.examine_speaker_next_lines(df, audit_df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.examine_speaker_next_lines 19 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_examined_speaker_next_lines_after_relative_indent, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
## do while pnnbl ineligible
|
|
# sf.prep_pnnbl_wts(csv_pnbl_nnbl,matrices_path)
|
|
df = sf.do_while_pnnbl_ineligible(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_pnnbl_ineligible 20 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
df.to_csv(csv_after_pnnbl_inelgible_after_relative_indent, index=False)
|
|
|
|
#######################################
|
|
df = sf.examine_pos_sp_indent(
|
|
df,
|
|
self.csv_removed_space_between_words,
|
|
csv_after_pnnbl_inelgible_after_relative_indent,
|
|
)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.examine_pos_sp_indent 21 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
if self.gen_int_files:
|
|
df.to_csv( csv_examined_speaker_using_indent,index =False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
df = sf.examine_speaker_next_lines(df, audit_df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.examine_speaker_next_lines 22 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_examined_speaker_next_lines_after_pos_sp_indent, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
## do while pnnbl ineligible
|
|
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
|
|
df = sf.do_while_pnnbl_ineligible(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_pnnbl_ineligible 23 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_pnnbl_inelgible_after_pos_sp_indent, index = False)
|
|
|
|
#################################
|
|
df = sf.examine_speaker_extension(df, audit_df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.examine_speaker_extension 24 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
if self.gen_int_files:
|
|
df.to_csv( csv_examined_speaker_extension,index =False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
df = sf.examine_speaker_next_lines(df, audit_df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.examine_speaker_next_lines 25 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_examined_speaker_next_lines_after_speaker_extension, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
## do while pnnbl ineligible
|
|
print("pnnbl after speaker extension")
|
|
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
|
|
df = sf.do_while_pnnbl_ineligible(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_pnnbl_ineligible 26 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_pnnbl_inelgible_after_speaker_extension, index = False)
|
|
|
|
## checking
|
|
# audit_df.reset_index().to_csv(audit_report_csv,index =False)
|
|
|
|
#################################################
|
|
df = sf.examine_action_using_top2_part1(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.examine_action_using_top2_part1 27 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_examined_action_using_top2, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
## do while pnnbl ineligible
|
|
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
|
|
df = sf.do_while_pnnbl_ineligible(df)
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_pnnbl_inelgible_after_action_using_top_pnnbl, index = False)
|
|
|
|
# #########################################
|
|
df = sf.refine_action_possibilties(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.refine_action_possibilties 28 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_refined_action, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
## do while pnnbl ineligible
|
|
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
|
|
df = sf.do_while_pnnbl_ineligible(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_pnnbl_ineligible(df) 29 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_pnnbl_inelgible_after_refined_action, index = False)
|
|
|
|
##############################
|
|
sf.prep_pnnbl_eligible_csv(pnbl_eligibility_matrix, nnbl_eligibility_matrix)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.prep_pnnbl_eligible_csv 30 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
#############################
|
|
|
|
df = sf.do_while_examine_using_identified_pnnbl(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 31 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_eligibility_using_identified_pnnbl, index = False)
|
|
|
|
#################################
|
|
|
|
df = sf.start_top_identifications_part1(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.start_top_identifications_part1 32 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_top_identification_part1, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
## examine speaker possibilties again after top1
|
|
df = sf.examine_speaker_pos(df, audit_df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.examine_speaker_pos 33 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_examined_speaker_pos_after_top1, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
###
|
|
df_34 = pd.DataFrame(df)
|
|
df_34.to_csv(self.base_file_path + "df_export_before_34.csv", index = True)
|
|
|
|
au_df_34 = pd.DataFrame(audit_df)
|
|
au_df_34.to_csv(self.base_file_path + "audit_df_before_after_34.csv", index = True)
|
|
|
|
df = sf.examine_speaker_next_lines(df, audit_df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.examine_speaker_next_lines 34 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_examined_speaker_next_lines_after_top1, index=False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
df = sf.do_while_examine_using_identified_pnnbl(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 35 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part1, index = False)
|
|
|
|
#################################
|
|
copy_df_examine_speaker_mix_part1 = pd.DataFrame(df)
|
|
copy_df_examine_speaker_mix_part1.to_csv(self.base_file_path + "copy_df_examine_speaker_mix_part1.csv", index = True)
|
|
###########
|
|
copy_audit_df_examine_speaker_mix_part1 = pd.DataFrame(audit_df)
|
|
copy_audit_df_examine_speaker_mix_part1.to_csv(self.base_file_path + "copy_audit_df_examine_speaker_mix_part1.csv", index = True)
|
|
##########
|
|
df = sf.examine_speaker_mix_part1(df, audit_df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.examine_speaker_mix_part1 36 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_examine_speaker_mix_part1, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
df = sf.do_while_examine_using_identified_pnnbl(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 37 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_speaker_mix_part1, index = False)
|
|
|
|
#################################
|
|
df_38 = pd.DataFrame(df)
|
|
df_38.to_csv(self.base_file_path + "df_export_after_38.csv", index = True)
|
|
|
|
au_df_38 = pd.DataFrame(audit_df)
|
|
au_df_38.to_csv(self.base_file_path + "audit_df_export_after_38.csv", index = True)
|
|
|
|
df = sf.examine_speaker_mix_part2(df, audit_df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.examine_speaker_mix_part2 38 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_examine_speaker_mix_part2, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
## examine speaker possibilties again after mix
|
|
df = sf.examine_speaker_pos(df, audit_df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.examine_speaker_pos 39 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_examined_speaker_pos_after_mix, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
###
|
|
df = sf.examine_speaker_next_lines(df, audit_df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTERsf.examine_speaker_next_lines 40 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_examined_speaker_next_lines_after_mix, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
## do while pnnbl ineligible
|
|
# sf.prep_pnnbl_wts(csv_pnbl_nnbl,matrices_path)
|
|
df = sf.do_while_pnnbl_ineligible(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_pnnbl_ineligible 41 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_pnnbl_ineligible_after_mix, index = False)
|
|
|
|
# df = sf.do_while_examine_using_identified_pnnbl(df)
|
|
# df = update_is_identified(df)
|
|
# df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_speaker_mix_part2, index = False)
|
|
|
|
################################
|
|
|
|
df = sf.start_top_identifications_part2(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.start_top_identifications_part2 42 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_top_identification_part2, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
df = sf.do_while_examine_using_identified_pnnbl(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 43 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part2, index = False)
|
|
|
|
#################################
|
|
|
|
df = sf.start_top_identifications_part2(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.start_top_identifications_part2 44 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_top_identification_part2_again, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
df = sf.do_while_examine_using_identified_pnnbl(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 45 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part2_again, index = False)
|
|
|
|
#################################
|
|
|
|
df = sf.start_top_identifications_part2(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.start_top_identifications_part2 46 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_top_identification_part2_again_again, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
df = sf.do_while_examine_using_identified_pnnbl(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 47 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part2_again_again, index = False)
|
|
|
|
#################################
|
|
|
|
df = sf.start_slug_identification(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.start_slug_identification(df) 48 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_slug_identification, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
df = sf.do_while_examine_using_identified_pnnbl(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl(df) 49 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_slug_identification, index = False)
|
|
|
|
#################################
|
|
|
|
df = sf.start_top_identifications_part1(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.start_top_identifications_part1(df) 50 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_top_identification_part1_again, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
df = sf.do_while_examine_using_identified_pnnbl(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 51 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part1_again, index = False)
|
|
|
|
#################################
|
|
|
|
df = sf.start_top_identifications_part3(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.start_top_identifications_part3 52 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_top_identification_part3, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
df = sf.do_while_examine_using_identified_pnnbl(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 53 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part3, index = False)
|
|
|
|
#################################
|
|
|
|
df = sf.start_top_identifications_part4(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.start_top_identifications_part4 54 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_top_identification_part4, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
df = sf.do_while_examine_using_identified_pnnbl(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 55 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part4, index = False)
|
|
#################################
|
|
df = sf.start_top_identifications_part5(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.start_top_identifications_part5(df) 56 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_top_identification_part5, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
df = sf.do_while_examine_using_identified_pnnbl(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 57 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part5, index = False)
|
|
|
|
#################################
|
|
|
|
df = sf.start_top_identifications_part6(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.start_top_identifications_part6 58 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_top_identification_part6, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
df = sf.do_while_examine_using_identified_pnnbl(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 59 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part6, index = False)
|
|
|
|
#################################
|
|
|
|
df = sf.start_top_identifications_part7(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.start_top_identifications_part7 60 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_top_identification_part7, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
df = sf.do_while_examine_using_identified_pnnbl(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 61 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part7, index = False)
|
|
|
|
#################################
|
|
|
|
df = sf.start_top_identifications_part8(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.start_top_identifications_part8 62 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_top_identification_part8, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
df = sf.do_while_examine_using_identified_pnnbl(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 63 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part8, index = False)
|
|
|
|
#################################
|
|
|
|
df = sf.examine_among_two(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.examine_among_two 64 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_examine_among_two, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
df = sf.do_while_examine_using_identified_pnnbl(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 65 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_examine_among_two, index = False)
|
|
|
|
#################################
|
|
|
|
df = sf.examine_speaker_next_lines(df, audit_df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.examine_speaker_next_lines 66: AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_examine_speaker_next_line_after_among_two, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
df = sf.do_while_examine_using_identified_pnnbl(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 67 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_examine_sp_next_among_two, index = False)
|
|
#################################
|
|
|
|
df = sf.examine_action_using_top2_wt_diff(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.examine_action_using_top2_wt_diff 68 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_top2_wt_diff, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
try:
|
|
df = sf.do_while_examine_using_identified_pnnbl(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 69 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top2_wt_diff, index = False)
|
|
except:
|
|
pass
|
|
|
|
#################################
|
|
try:
|
|
df = sf.examine_action_using_top2_wt_diff(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_top2_wt_diff_again, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
df = sf.do_while_examine_using_identified_pnnbl(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 70 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top2_wt_diff_again, index = False)
|
|
except:
|
|
pass
|
|
#################################
|
|
try:
|
|
|
|
df = sf.start_top_identifications_part1_diluted(df)
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_top_identification_part1_diluted, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
df = sf.do_while_examine_using_identified_pnnbl(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 71 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part1_diluted, index = False)
|
|
except:
|
|
pass
|
|
|
|
###################################
|
|
####################################
|
|
##1.1
|
|
df = sf.decrease_wt_dial_between_action(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.decrease_wt_dial_between_action 72 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_examine_dial_between_action, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
df = sf.do_while_examine_using_identified_pnnbl(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 73 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
#if self.gen_int_files:
|
|
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_examine_dial_between_action, index = False)
|
|
|
|
####################################
|
|
#################################
|
|
|
|
df = sf.examine_among_two(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.examine_among_two 74 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
if self.gen_int_files:
|
|
df.to_csv(csv_after_examine_among_two_again, index = False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
df = sf.do_while_examine_using_identified_pnnbl(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 75 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df = self.update_is_identified(df)
|
|
#if self.gen_int_files:
|
|
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_examine_among_two_again, index = False)
|
|
|
|
####################################
|
|
#################################
|
|
|
|
df = sf.identify_top_as_final(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.identify_top_as_final 76 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
df_76 = pd.DataFrame(df)
|
|
df_76.to_csv(self.base_file_path + "df_identify_top_as_final_76.csv", index = False)
|
|
|
|
au_df_76 = pd.DataFrame(audit_df)
|
|
au_df_76.to_csv(self.base_file_path + "audit_df_identify_top_as_final_76.csv", index = False)
|
|
|
|
df = self.update_is_identified(df)
|
|
df.to_csv(csv_after_identify_remaining_as_top, index=False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
#####################################
|
|
## prepare for audit
|
|
|
|
df = sf.prep_for_audit(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.prep_for_audit 77 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df.to_csv(csv_after_prep_for_audit_after_identification, index=False)
|
|
#####################################
|
|
df, audit_df = sf.run_audit_on_identified(df, audit_df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.run_audit_on_identified 78 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df.to_csv(csv_after_audit1, index=False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
#############################################
|
|
### run language specific audit on identified
|
|
|
|
if lang:
|
|
if lang.upper() == "ENGLISH":
|
|
df = sf_eng.run_audit_on_identified_english(df, audit_df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf_eng.run_audit_on_identified_english 79 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
#####################################
|
|
### merge the beginning/middle/end lines
|
|
# df.to_csv(self.base_file_path + "df_before_merge_line_para.csv", index = Flase)
|
|
para_df = sf.merge_line_to_para(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.merge_line_to_para 80 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
try:
|
|
para_df.to_csv(self.base_file_path+ "after_merge_line_para.csv", index = False)
|
|
print("para_df is writtern")
|
|
except:
|
|
pass
|
|
print("\n\n Function 80 is completed")
|
|
try:
|
|
script_language, dialogue_language = sf.language_detector_for_csv(para_df)
|
|
print("script_language",script_language)
|
|
print("dialogue_language",dialogue_language)
|
|
|
|
unique_script_languages = ', '.join(set(lang[0] for lang in script_language))
|
|
unique_dialogue_languages = ', '.join(set(lang[0] for lang in dialogue_language))
|
|
except:
|
|
unique_script_languages = ""
|
|
unique_dialogue_languages = ""
|
|
|
|
#commented as some unwanted change of . to comma
|
|
#para_df = sf.change_dot_to_comma_inslug(para_df)
|
|
print("unique_script_languages:",unique_script_languages)
|
|
print("unique_dialogue_languages:",unique_dialogue_languages)
|
|
# para_df.to_csv(csv_parawise_status, index=False)
|
|
##
|
|
print("\n\n dot to comma changes in slug")
|
|
audited_file_name = self.script_name + ".csv"
|
|
|
|
req_file = ContentFile(
|
|
(para_df.to_csv(index=False, path_or_buf=None)).encode("utf-8"),
|
|
audited_file_name,
|
|
)
|
|
|
|
File.objects.create(
|
|
script=Script.objects.get(id=self.script_id),
|
|
type="script-csv",
|
|
file=req_file,
|
|
)
|
|
|
|
print("\n\n exporting df and audit_df agter function 80")
|
|
df_df = pd.DataFrame(df)
|
|
df_df.to_csv(self.base_file_path + "df_export_after_80.csv", index = False)
|
|
|
|
audit_df_df = pd.DataFrame(audit_df)
|
|
audit_df_df.reset_index().to_csv(self.base_file_path + "audit_df_export_after_80.csv", index = False)
|
|
|
|
print("\nwrapping identified lines if required\n")
|
|
df = sf.wrap_text(df, audit_df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.wrap_text 81 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df_81 = pd.DataFrame(df)
|
|
df_81.to_csv(self.base_file_path + "df_export_after_81.csv", index = False)
|
|
|
|
au_df_81 = pd.DataFrame(audit_df)
|
|
au_df_81.reset_index().to_csv(self.base_file_path + "audit_df_export_after_81.csv", index = False)
|
|
|
|
df.to_csv(csv_after_wrapping, index=False)
|
|
#audit_df['line_no'] = audit_df['line_no'].astype(float)
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
#####################################
|
|
## prepare for audit again
|
|
only_df = pd.DataFrame(df)
|
|
only_df.to_csv(self.base_file_path + "df_before_82.csv", index = False)
|
|
|
|
df = sf.prep_for_audit(df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.prep_for_audit 82 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
df.to_csv(csv_after_prep_for_audit_after_wrapping, index=False)
|
|
|
|
#####################################
|
|
sf.run_audit_on_identified(df, audit_df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.run_audit_on_identified 83 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
|
|
df.to_csv(csv_after_audit2, index=False)
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
#####################################################
|
|
### run language specific audit on identified
|
|
|
|
if lang:
|
|
if lang.upper() == "ENGLISH":
|
|
df = sf_eng.run_audit_on_identified_english(df, audit_df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf_eng.run_audit_on_identified_english 84 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
audit_df = self.update_audit_df(df, audit_df)
|
|
|
|
####################################
|
|
|
|
sf.sa_output_to_docx(df, output_linewise_docx, output_template)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.sa_output_to_docx 85 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
|
|
sf.sa_output_to_txt(output_linewise_docx, output_linewise_txt)
|
|
# print("line no: 2017",str(output_linewise_txt))
|
|
|
|
output_docx_after_audit = self.base_file_path + "audited_text.docx"
|
|
pdf_audit_file_path = self.base_file_path + "audited_text.pdf"
|
|
sf.convert_txt_to_docx(output_linewise_txt,output_docx_after_audit)
|
|
print("converted to docx")
|
|
try:
|
|
# total_page_af = sf.countPages(output_docx_after_audit,pdf_audit_file_path,self.base_file_path)
|
|
sf.countPages(output_docx_after_audit,pdf_audit_file_path,self.base_file_path)
|
|
try:
|
|
total_page_af = sf.PdfCounter(pdf_audit_file_path)
|
|
print("total pages af = ", total_page_af)
|
|
print("hehehehehe")
|
|
except Exception as exp:
|
|
print(repr(exp))
|
|
print("try except total pages didnt work")
|
|
except Exception as exp:
|
|
print("total_page_af : ", exp)
|
|
|
|
|
|
print("the count of pageline start here")
|
|
line_count_after_audit = sf.count_the_line(str(output_linewise_txt))
|
|
|
|
count_before_txt = self.base_file_path + "temp.txt"
|
|
line_count_before_audit = sf.count_the_line(str(count_before_txt))
|
|
|
|
print("you are here")
|
|
|
|
output_docx_from_orginal_text = self.base_file_path + "original_text.docx"
|
|
pdf_file_path = self.base_file_path + "original_text.pdf"
|
|
print("b4 txt to docx")
|
|
|
|
sf.convert_txt_to_docx(count_before_txt,output_docx_from_orginal_text)
|
|
|
|
print("b4 page count of pdf")
|
|
print("hehe")
|
|
"""13-2-24"""
|
|
# try:
|
|
# total_page_bf = sf.countPages(output_docx_from_orginal_text,pdf_file_path,self.base_file_path)
|
|
# print(total_page_bf)
|
|
# except Exception as exp:
|
|
# print(" total page bf",total_page_bf )
|
|
print("temp txt converted to docx")
|
|
|
|
|
|
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.sa_output_to_txt 86 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
|
|
########################################
|
|
# sf.print_audit_report_docx(audit_df,audit_report_docx)
|
|
# headers = True
|
|
try:
|
|
print("In the total number of pages count")
|
|
file_model_objects = File.objects.filter(script=self.script_id)
|
|
audit_file_object = file_model_objects.get(type="script-csv")
|
|
read_df = pd.read_csv(audit_file_object.file)
|
|
print("csv fetched")
|
|
docx = sf.csv_to_docx(read_df)
|
|
audited_docx_path = self.base_file_path + "csv_to_docx_audited.docx"
|
|
# temp_file_stream = BytesIO()
|
|
print("docx saved")
|
|
docx.save(audited_docx_path)
|
|
# temp_file_stream.seek(0)
|
|
|
|
docx_file = ContentFile(
|
|
open(audited_docx_path, 'rb').read(),
|
|
"from_audited_csv_to_document.docx",
|
|
)
|
|
# docx_file = ContentFile(
|
|
# audited_docx_path.getvalue(),
|
|
# "from_audited_csv_to_document.docx",
|
|
# )
|
|
|
|
File.objects.create(
|
|
script=Script.objects.get(id=self.script_id),
|
|
type="script-docx",
|
|
file=docx_file,
|
|
)
|
|
print("script-docx object created")
|
|
converted_audit_pdf_file_path = self.base_file_path + "csv_to_docx_audited.pdf"
|
|
|
|
Final_pdf_page_count = sf.countPages(audited_docx_path,converted_audit_pdf_file_path,self.base_file_path)
|
|
print("total number of pdf pages")
|
|
print(int(Final_pdf_page_count))
|
|
pass
|
|
except Exception as e:
|
|
print("yje exception is")
|
|
print(e)
|
|
|
|
audit_df = self.update_audit_df_intro(df, audit_df)
|
|
audit_df = self.update_audit_df_appendix(df, audit_df)
|
|
audit_report_name = self.script_name + "_report.docx"
|
|
print("audit_df_tabular 1908\n\n",audit_df,"\n\n" )
|
|
copy_df = pd.DataFrame(audit_df)
|
|
copy_df.reset_index().to_csv(self.base_file_path + "audit_report_export.csv", index = False)
|
|
print("before print_report_tabular_docx")
|
|
|
|
|
|
script_ob = Script.objects.get(id=self.script_id)
|
|
screen_play_name = script_ob.screenplay.name
|
|
author_name = script_ob.screenplay.author
|
|
print(screen_play_name)
|
|
print(author_name)
|
|
|
|
print(line_count_before_audit)
|
|
print(line_count_after_audit)
|
|
# audit_report_buffer = sf.print_audit_report_tabular_docx(audit_df,line_count_before_audit,line_count_after_audit) #commented on 13-09-23
|
|
|
|
para_filetered_audut_df = sf.assign_para_no(audit_df)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER para_filetered_audut_df\n")
|
|
|
|
scriptname = str(screen_play_name) # to fetched by audit data
|
|
author = str(author_name) # to fetched by audit data
|
|
try:
|
|
pre_audit_pagenumber = int(self.total_page_bf)
|
|
except:
|
|
pre_audit_pagenumber = 1
|
|
try:
|
|
print("total_page_af = ", total_page_af )
|
|
postauditpagenumber = int(total_page_af)
|
|
except:
|
|
print("total_page_af 1")
|
|
postauditpagenumber = 1
|
|
try:
|
|
preaudit_line_no = int(line_count_before_audit)
|
|
except:
|
|
preaudit_line_no = 1
|
|
try:
|
|
postaudit_line_no = int(line_count_after_audit)
|
|
except:
|
|
postaudit_line_no = 1
|
|
try:
|
|
print("unique_script_languages",unique_script_languages)
|
|
script_language = str(unique_script_languages) # to be fetched by conversin function
|
|
except:
|
|
script_language = "---"
|
|
try:
|
|
print("unique_dialogue_languages",unique_dialogue_languages)
|
|
dialogue_language = str(unique_dialogue_languages) # to be fetched by conversin function
|
|
except:
|
|
dialogue_language = "---"
|
|
print("scriptname",scriptname)
|
|
print("author",author)
|
|
print("pre_audit_pagenumber",pre_audit_pagenumber)
|
|
print("postauditpagenumber",postauditpagenumber)
|
|
print("preaudit_line_no",preaudit_line_no)
|
|
print("postaudit_line_no",postaudit_line_no)
|
|
'''
|
|
additiona model information
|
|
'''
|
|
#self.audit_model_obj.number_of_pages = int(postauditpagenumber)
|
|
# time_per_page = 30
|
|
# base time = 120
|
|
# no_of_pages = 10
|
|
# formula of counting pages = (time_per_page + base time) * no_of_pages
|
|
try:
|
|
self.audit_model_obj.screenplay_language = script_language
|
|
self.audit_model_obj.dialogue_language = dialogue_language
|
|
self.audit_model_obj.number_of_pages = int(postauditpagenumber)
|
|
|
|
print("script language, dialogue language, post audit pagenumber is update to the audit models")
|
|
except:
|
|
print("page number and language insertion failed")
|
|
pass
|
|
audit_report_buffer = sf.print_audit_report_tabular_docx(para_filetered_audut_df,scriptname,author,pre_audit_pagenumber,postauditpagenumber,preaudit_line_no,postaudit_line_no,script_language,dialogue_language)
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.print_audit_report_tabular_docx 87 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
# audit_report_path = self.base_file_path + "audit_report_doc.docx"
|
|
# report_data = Document(audit_report_buffer)
|
|
# report_data.save(audit_report_path)
|
|
|
|
req_file = ContentFile(audit_report_buffer.read(), audit_report_name)
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("AFTER sf.print_audit_report_tabular_docx 87 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n"+str(req_file))
|
|
|
|
print("req_file")
|
|
|
|
try:
|
|
script = Script.objects.get(id=self.script_id)
|
|
script.no_of_pages = int(Final_pdf_page_count)
|
|
script.save()
|
|
# user_id = script.screenplay.user.id
|
|
# Now, 'user_id' contains the user.id associated with the given script_id
|
|
except Exception as e:
|
|
print(e)
|
|
# Handle the case where the script with the given ID doesn't exist
|
|
# user_id = None
|
|
print("No_of_pages not insertd")
|
|
# try:
|
|
# update_juggernaut(user_id=user_id,service_name='audit',audit_pages = int(postauditpagenumber))
|
|
# except:
|
|
# print("the update_juggernaut didnt work")
|
|
# req_file = File.objects.get(script=self.script_id)
|
|
# req_file.type= "audit-report"
|
|
# req_file.file = file
|
|
# req_file.save()
|
|
|
|
File.objects.create(
|
|
script=Script.objects.get(id=self.script_id),
|
|
type="audit-report",
|
|
file=req_file,
|
|
)
|
|
|
|
try:
|
|
end_time_count = time.time()
|
|
total_duration = end_time_count - self.start_time_count
|
|
|
|
hours, remainder = divmod(total_duration, 3600)
|
|
minutes, seconds = divmod(remainder, 60)
|
|
text_time = f"Program ran for {str(hours)} hours, {str(minutes)} minutes, and {str(seconds)} seconds. for script_id= {str(self.script_id)} which has pdf pages of {pre_audit_pagenumber}."
|
|
print(str(text_time))
|
|
t_time_file = self.total_time_file + "/tail_errors.txt"
|
|
with open(t_time_file, "a") as file008:
|
|
file008.write(str(text_time) + "\n")
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("87 function complete \n")
|
|
except:
|
|
pass
|
|
|
|
return True
|
|
|
|
def script_meta(self):
|
|
pass
|
|
|
|
def audit_fdx(self):
|
|
|
|
# fdx to audited csv
|
|
para_df = pd.DataFrame()
|
|
para_df = sf.fdx_to_audited_df(self.input_script)
|
|
# save audited csv to file system
|
|
audited_file_name = self.script_name + ".csv"
|
|
|
|
req_file = ContentFile(
|
|
(para_df.to_csv(index=False, path_or_buf=None)).encode("utf-8"),
|
|
audited_file_name,
|
|
)
|
|
|
|
File.objects.create(
|
|
script=Script.objects.get(id=self.script_id),
|
|
type="script-csv",
|
|
file=req_file,
|
|
)
|
|
print("csv created")
|
|
try:
|
|
self.audit_model_obj.isfdx = True
|
|
self.audit_model_obj.save()
|
|
print("isfdx True saved")
|
|
except Exception as exp:
|
|
print(repr(exp))
|
|
language_check_df = sf.check_and_copy_rows(para_df)
|
|
try:
|
|
script_language, dialogue_language = sf.language_detector_for_csv(language_check_df)
|
|
print("script_language",script_language)
|
|
print("dialogue_language",dialogue_language)
|
|
|
|
unique_script_languages = ', '.join(set(lang[0] for lang in script_language))
|
|
unique_dialogue_languages = ', '.join(set(lang[0] for lang in dialogue_language))
|
|
print("langauage detection worked")
|
|
except Exception as exp:
|
|
print(repr(exp))
|
|
unique_script_languages = ""
|
|
unique_dialogue_languages = ""
|
|
print("Langauuge detectedion csv didnt work")
|
|
try:
|
|
self.audit_model_obj.screenplay_language = unique_script_languages
|
|
self.audit_model_obj.dialogue_language = unique_dialogue_languages
|
|
self.audit_model_obj.save()
|
|
print("audit lang saved")
|
|
except Exception as exp:
|
|
print(repr(exp))
|
|
self.audit_model_obj.screenplay_language = "ENGLISH"
|
|
self.audit_model_obj.dialogue_language = "ENGLISH"
|
|
print("audot lang didnt save")
|
|
# print("In the total number of pages count")
|
|
# file_model_objects = File.objects.filter(script=self.script_id)
|
|
# audit_file_object = file_model_objects.get(type="script-csv")
|
|
# read_df = pd.read_csv(audit_file_object.file)
|
|
# print("csv fetched")
|
|
try:
|
|
print(para_df)
|
|
docx = sf.csv_to_docx(para_df)
|
|
audited_docx_path = self.base_file_path + "csv_to_docx_audited.docx"
|
|
# temp_file_stream = BytesIO()
|
|
print("docx saved")
|
|
docx.save(audited_docx_path)
|
|
# temp_file_stream.seek(0)
|
|
|
|
docx_file = ContentFile(
|
|
open(audited_docx_path, 'rb').read(),
|
|
"from_audited_csv_to_document.docx",
|
|
)
|
|
|
|
|
|
File.objects.create(
|
|
script=Script.objects.get(id=self.script_id),
|
|
type="script-docx",
|
|
file=docx_file,
|
|
)
|
|
print("script-docx object created")
|
|
# output_docx_after_audit = self.base_file_path + "audited_text.docx"
|
|
pdf_audit_file_path = self.base_file_path + "csv_to_docx_audited.pdf"
|
|
|
|
print("converted to docx")
|
|
try:
|
|
# total_page_af = sf.countPages(output_docx_after_audit,pdf_audit_file_path,self.base_file_path)
|
|
sf.countPages(audited_docx_path,pdf_audit_file_path,self.base_file_path)
|
|
print("fdx : docx to pdf was create at", str(pdf_audit_file_path) )
|
|
try:
|
|
total_page_af = sf.PdfCounter(pdf_audit_file_path)
|
|
print("total pages af = ", total_page_af)
|
|
print("hehehehehe")
|
|
self.audit_model_obj.number_of_pages = int(total_page_af)
|
|
self.audit_model_obj.save()
|
|
except Exception as exp:
|
|
print(repr(exp))
|
|
print("try except total pages didnt work")
|
|
except Exception as exp:
|
|
print("fdx docx to pdf conversion didnt work")
|
|
print("total_page_af : ", exp)
|
|
except Exception as exp:
|
|
print("csv to docs didnt work")
|
|
print(repr(exp))
|
|
return True
|
|
|
|
def quick_audit(self, lang: str = None):
|
|
|
|
df, audit_df = self.before_audit(lang)
|
|
## get the indents count
|
|
count_green = 0
|
|
count_amber = 0
|
|
total_count = len(df)
|
|
|
|
all_indents = df["ssc"].value_counts()
|
|
print(all_indents)
|
|
all_indents = df["ssc"].value_counts().sort_index().reset_index()
|
|
# print(all_indents)
|
|
for index in all_indents.index:
|
|
# print(all_indents['index'][index])
|
|
if str(all_indents["index"][index]) in ("15", "25", "30", "35"):
|
|
count_green += all_indents["ssc"][index]
|
|
elif str(all_indents["index"][index]) in (
|
|
"0",
|
|
"14",
|
|
"16",
|
|
"24",
|
|
"26",
|
|
"29",
|
|
"31",
|
|
"34",
|
|
"36",
|
|
):
|
|
count_amber += all_indents["ssc"][index]
|
|
elif all_indents["index"][index] > 62:
|
|
count_amber += all_indents["ssc"][index]
|
|
|
|
print(all_indents["index"].tolist())
|
|
print(count_green, count_amber, total_count)
|
|
percent_good = ((count_green + count_amber) / total_count) * 100
|
|
if percent_good > 80:
|
|
print("most lines are within prescribed indents", percent_good)
|
|
quick_audit_flag = "pass"
|
|
else:
|
|
print("most lines are not within prescribed indents", percent_good)
|
|
quick_audit_flag = "fail"
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("\nafter quick audit : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
print(quick_audit_flag)
|
|
|
|
def get_character_list(self, lang: str = None):
|
|
|
|
if not self.audit_model_obj.pre_audit_run:
|
|
df, _ = self.before_audit(lang)
|
|
|
|
else:
|
|
df_path = os.path.join(self.base_file_path, "space_between_words_removed.csv")
|
|
df = pd.read_csv(df_path)
|
|
|
|
|
|
df_indents = df[["line_no", "data", "ssc", "parenthetical"]]
|
|
df_indents.fillna("", inplace=True)
|
|
for index in df_indents.index:
|
|
data = df_indents["data"][index]
|
|
|
|
if df_indents["parenthetical"][index] == "PartMidEnd":
|
|
|
|
par_pos = re.search("\(", data).start()
|
|
df_indents["data"][index] = data[0:par_pos].strip()
|
|
df_indents["parenthetical"][index] = "Absent"
|
|
elif data.strip():
|
|
df_indents["data"][index] = data.strip()
|
|
|
|
df_indents = df_indents.loc[df_indents["parenthetical"] == "Absent", :]
|
|
df_indents["ssc"].value_counts().sort_index()
|
|
df_indents["ssc"].value_counts().sort_index().reset_index()
|
|
all_indents = df_indents["ssc"].value_counts().sort_index().reset_index()
|
|
if 35 in all_indents["index"].tolist():
|
|
if df_indents["ssc"].value_counts().sort_index()[35] > 3:
|
|
sp_indent = 35
|
|
else:
|
|
ps_sp_indents = df_indents.loc[
|
|
(df_indents["ssc"] >= 32) & (df_indents["ssc"] <= 40), :
|
|
]
|
|
if not ps_sp_indents.empty:
|
|
sp_indent = (
|
|
ps_sp_indents["ssc"]
|
|
.value_counts()
|
|
.sort_values(ascending=False)
|
|
.reset_index()["index"][0]
|
|
)
|
|
else:
|
|
sp_indent = 35
|
|
|
|
# sp_indent = df_indents['ssc'].value_counts().sort_index().reset_index().iloc[3]['index']
|
|
else:
|
|
ps_sp_indents = df_indents.loc[
|
|
(df_indents["ssc"] >= 32) & (df_indents["ssc"] <= 40), :
|
|
]
|
|
if not ps_sp_indents.empty:
|
|
sp_indent = (
|
|
ps_sp_indents["ssc"]
|
|
.value_counts()
|
|
.sort_values(ascending=False)
|
|
.reset_index()["index"][0]
|
|
)
|
|
else:
|
|
sp_indent = -1
|
|
# sp_indent = df_indents['ssc'].value_counts().sort_index().reset_index().iloc[3]['index']
|
|
# third_indents = df_indents['ssc'].value_counts().sort_index().reset_index().iloc[3]
|
|
|
|
try:
|
|
character_list = df_indents.loc[
|
|
df_indents["ssc"] == sp_indent, "data"
|
|
].unique()
|
|
except:
|
|
character_list = []
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("\nafter get_character_list : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
return character_list
|
|
|
|
def get_character_subset(self, character_list):
|
|
|
|
# if character_list is None:
|
|
# character_list = self.get_character_list()
|
|
|
|
audited_linewise_csv = os.path.join(self.base_file_path, "after_audit1.csv") # replaced by after_audit1.csv
|
|
foldername = "character_subset"
|
|
output_dir = os.path.join(self.base_file_path, foldername)
|
|
|
|
if not os.path.exists(output_dir):
|
|
os.mkdir(output_dir)
|
|
|
|
df_after_audit = pd.read_csv(audited_linewise_csv)
|
|
df_after_audit.fillna("", inplace=True)
|
|
df_after_audit["line_no"] = df_after_audit["line_no"].astype(int)
|
|
scenes = df_after_audit.loc[
|
|
(df_after_audit["Identification_Status"] == "ps1")
|
|
| (df_after_audit["Identification_Status"] == "ps2"),
|
|
["line_no", "data", "Identification_Status"],
|
|
]
|
|
scene_indexes = scenes.index
|
|
last_index = df_after_audit.index[-1]
|
|
character_scripts_dict = dict()
|
|
|
|
for character in character_list:
|
|
|
|
try:
|
|
print("processing character subset for", character)
|
|
except:
|
|
pass
|
|
|
|
output_subset_script_txt = os.path.join(
|
|
output_dir,
|
|
(self.script_name.rsplit(".", 1)[0] + "_" + str(character) + ".txt"),
|
|
)
|
|
|
|
output_subset_script_docx = os.path.join(
|
|
output_dir,
|
|
(self.script_name.rsplit(".", 1)[0] + "_" + str(character) + ".docx"),
|
|
)
|
|
|
|
i, j = 0, 1
|
|
|
|
character_in_scenes = []
|
|
character_lines = []
|
|
while j <= len(scene_indexes):
|
|
scene_no = i + 1
|
|
start = scene_indexes[i]
|
|
if j < len(scene_indexes):
|
|
end = scene_indexes[j]
|
|
else:
|
|
end = last_index + 1
|
|
for index in range(start, end):
|
|
data = df_after_audit["data"][index]
|
|
if re.search(character.upper(), data.strip()):
|
|
character_lines.append(start)
|
|
# print(scene_no,index,data)
|
|
character_in_scenes.append(scene_no)
|
|
character_lines.append(index)
|
|
rev_index = index - 1
|
|
rev_index_is = df_after_audit["Identification_Status"][
|
|
rev_index
|
|
]
|
|
character_lines.append(rev_index)
|
|
# pvs_data = df_after_audit['data'][rev_index-1]
|
|
# print(rev_index,pvs_data)
|
|
try:
|
|
rev_index_before_is = df_after_audit[
|
|
"Identification_Status"
|
|
][rev_index - 1]
|
|
except:
|
|
rev_index_before_is = ""
|
|
# while rev_index != start and rev_index_is != 'ps4' and rev_index_is != 'ps1' and rev_index_is != 'ps7' :
|
|
while (
|
|
rev_index != start
|
|
and rev_index_is != "ps4"
|
|
and rev_index_is != "ps1"
|
|
and not (
|
|
rev_index_is == "ps6" and rev_index_before_is == "blank"
|
|
)
|
|
):
|
|
rev_index = rev_index - 1
|
|
pvs_data = df_after_audit["data"][rev_index]
|
|
# print(rev_index,pvs_data)
|
|
character_lines.append(rev_index)
|
|
rev_index_is = df_after_audit["Identification_Status"][
|
|
rev_index
|
|
]
|
|
fwd_index = index
|
|
fwd_index_is = df_after_audit["Identification_Status"][
|
|
fwd_index
|
|
]
|
|
while fwd_index_is != "blank" and fwd_index != "ps15":
|
|
|
|
fwd_index = fwd_index + 1
|
|
character_lines.append(fwd_index)
|
|
fwd_index_is = df_after_audit["Identification_Status"][
|
|
fwd_index
|
|
]
|
|
|
|
i += 1
|
|
j += 1
|
|
|
|
character_in_scenes = list(set(character_in_scenes))
|
|
character_lines = list(set(character_lines))
|
|
print(character_lines)
|
|
character_lines.sort()
|
|
print(character_lines)
|
|
|
|
character_df = df_after_audit[df_after_audit.index.isin(character_lines)]
|
|
character_df.reset_index(drop=True, inplace=True)
|
|
|
|
character_df = sf.prep_for_audit(character_df)
|
|
|
|
# test_path = os.path.join(output_dir,os.path.splitext(input_filename)[0])+ '_' + str(character) + '_test1.csv'
|
|
# character_df.to_csv(test_path,index= False)
|
|
|
|
character_df = sf.run_audit_on_identified(character_df)
|
|
|
|
# test_path = os.path.join(output_dir,os.path.splitext(input_filename)[0])+ '_' + str(character) + '_test2.csv'
|
|
# character_df.to_csv(test_path,index= False)
|
|
|
|
ch_para_df = sf.merge_line_to_para(character_df)
|
|
# ch_para_df.to_csv(csv_parawise_status, index = False)
|
|
|
|
sf.sa_wrapped_output_to_docx(ch_para_df, output_subset_script_docx)
|
|
|
|
character_scripts_dict[character] = output_subset_script_docx
|
|
|
|
# sf.conv_docx_to_txt(output_subset_script_docx,output_subset_script_txt)
|
|
|
|
with open(output_subset_script_txt, "w", encoding="utf-8") as fout:
|
|
|
|
for index in character_lines:
|
|
print(df_after_audit["Identification_Status"][index])
|
|
try:
|
|
if str(df_after_audit["Identification_Status"][index]) == "ps1":
|
|
fout.writelines("\n")
|
|
except:
|
|
pass
|
|
|
|
data = df_after_audit["data"][index]
|
|
try:
|
|
print(data)
|
|
except:
|
|
pass
|
|
fout.writelines(str(data))
|
|
fout.writelines("\n")
|
|
|
|
try:
|
|
if (
|
|
df_after_audit["Identification_Status"][index] == "ps1"
|
|
or df_after_audit["Identification_Status"][index] == "ps3"
|
|
):
|
|
fout.writelines("\n")
|
|
except:
|
|
pass
|
|
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("\nafter get_character_subset : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
|
|
|
|
return character_scripts_dict
|
|
|
|
def audit_in_background(self):
|
|
|
|
# if os.fork() != 0:
|
|
# return
|
|
|
|
print("Running in background")
|
|
end_time = datetime.datetime.now()
|
|
try:
|
|
extension = self.input_script.rsplit(".", 1)[-1]
|
|
if extension == 'fdx':
|
|
self.audit_fdx()
|
|
else:
|
|
self.audit()
|
|
self.audit_model_obj.status = States.SUCCESS
|
|
self.audit_model_obj.save()
|
|
print("Audit Success!!!!!!!!!!!!!!!!!!!!!!!")
|
|
end_time = datetime.datetime.now()
|
|
with open(self.base_file_path + "time_taken.txt", "a") as file007:
|
|
file007.write("\n\n****AUDITING IS SUCCESSFUL****\n")
|
|
print(end_time)
|
|
|
|
|
|
except Exception as exp:
|
|
self.audit_model_obj.status = States.FAILURE
|
|
self.audit_model_obj.results = exp
|
|
self.audit_model_obj.error_msg = "FAILED"
|
|
self.audit_model_obj.save()
|
|
print(end_time)
|
|
|
|
if __name__ == "__main__":
|
|
naudit = NeutralAudit("123", True)
|
|
naudit.get_character_subset()
|