Conversion_Kitchen_Code/kitchen_counter/scriptAudit/mnf_script_audit.py

2768 lines
119 KiB
Python
Raw Normal View History

2024-04-27 09:33:09 +00:00
import os
import re
import sys
from pathlib import Path
import datetime
import pandas as pd
from django.core.files.base import ContentFile
import time
from centralisedFileSystem.models import File, Script, ScreenPlay
from scriptAudit import sa_functions as sf
from scriptAudit import sa_functions_english as sf_eng
from scriptAudit.models import ScriptAuditModel, States
from io import BytesIO
import datetime
import pytz
import subprocess
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
pd.options.mode.copy_on_write = False
pd.options.mode.chained_assignment = None
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
2024-04-27 09:33:09 +00:00
2024-04-30 04:59:37 +00:00
2024-04-27 09:33:09 +00:00
class NeutralAudit:
def __init__(
self,
script_id: str = None,
log: bool = False,
2024-04-27 09:33:09 +00:00
) -> None:
"""
To Audit a Script already uploded.
_________________________________________________________________
Parameters :
script_id : str -> Id of the script to be Audited
default = None
log : bool -> save logs in log.txt
default = False
_________________________________________________________________
Return :
None
"""
# pd.options.mode.copy_on_write = False
# pd.options.mode.chained_assignment = None
2024-04-27 09:33:09 +00:00
self.start_time_count = time.time()
print("<<<<<<<<<<<<<<<<<<<<<<<<<")
self.matrices_path = str(Path(__file__).resolve().parent) + "/matrices/"
self.total_time_file = str(Path(__file__).resolve().parent)
print(script_id,"SCRIPT-ID IS HERE|| AYYA")
self.script_id = script_id
audit_root_dir = (str(Path(__file__).resolve().parent.parent) + "/media/audit_folder/")
2024-04-27 09:33:09 +00:00
self.script_name = str(self.script_id)
output_dir = os.path.join(audit_root_dir, self.script_name)
t_time_file = self.total_time_file + "/tail_errors.txt"
file_to_audit = File.objects.get(
script=script_id,
type="script-original",
)
self.input_script = file_to_audit.file.path
if not os.path.exists(output_dir):
try:
os.makedirs(output_dir,exist_ok=True)
2024-04-27 09:33:09 +00:00
except Exception as exp:
print(repr(exp))
subprocess.run(["mkdir", output_dir])
subprocess.run(["chmod", "777", output_dir])
self.base_file_path = str(output_dir) + "/"
self.csv_removed_space_between_words = (self.base_file_path + "space_between_words_removed.csv")
2024-04-27 09:33:09 +00:00
self.audit_report_csv = self.base_file_path + "audit_spreadsheet.csv"
2024-04-27 09:33:09 +00:00
sys.stdout = open(os.devnull, "w")
if log:
log_file = self.base_file_path + "_log.txt"
sys.stdout = open(log_file, "w", encoding="utf-8")
self.gen_int_files = True
else:
self.gen_int_files = False
sys.stdout = sys.__stdout__
self.audit_model_obj = ScriptAuditModel.objects.get(script = Script.objects.get(id = self.script_id,))
2024-04-27 09:33:09 +00:00
time_file = self.base_file_path + "time_taken.txt"
start_time = datetime.datetime.now()
print(start_time)
with open(time_file, "a") as file007:
file007.write("started\n\n")
file007.write("started\n\n")
def __del__(self) -> None:
sys.stdout = sys.__stdout__
def update_audit_df(self, df, audit_df):
print("inside update audit df")
print(df.dtypes)
print(audit_df.dtypes)
lines_not_removed = audit_df.loc[audit_df["line_removed"] != "Yes"].index.to_list()
audit_df.sort_index(inplace=True)
# audit_df.reset_index().to_csv(audit_report_csv,index =False)
audit_df["audited_line_no"] = ""
audited_line_no = 1
for line in lines_not_removed:
new_data = ""
try:
new_data = df.loc[df["line_no"] == line, "data"].values[0]
except:
pass
# print(new_data)
try:
audit_df["Identification_Status"][line] = df.loc[
df["line_no"] == line, "Identification_Status"
].values[0]
except:
pass
audit_df["scene_number"][line] = df.loc[
df["line_no"] == line, "scene_number"
].values[0]
audit_df["data_corrected"][line] = new_data
audit_df["line_removed"][line] = "No"
audit_df["audited_line_no"][line] = audited_line_no
audited_line_no += 1
# print(audit_df.loc[audit_df['line_no'] == line, 'data_corrected'])
audit_df.reset_index().to_csv(self.audit_report_csv, index=False)
return audit_df
def update_audit_df_intro(self, df, audit_df):
print("update_audit_df_intro")
audit_df.reset_index(inplace=True, drop=True)
new_data = ""
for line in audit_df.index:
try:
print("line",line)
if audit_df["introduction"][line] == "Yes":
try:
new_data = df.loc[df["line_no"] == line, "data"].values[0]
except Exception as e:
print("Exception 174:",e)
pass
audit_df["data_corrected"][line] = new_data
except Exception as e:
print(e)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("\n\n update_audit_df_intro : complete \n")
return audit_df
def update_audit_df_appendix(self, df, audit_df):
new_data = ""
print(audit_df.index)
for line in audit_df.index:
if audit_df["appendix"][line] == "Yes":
try:
new_data = df.loc[df["line_no"] == line, "data"].values[0]
except:
pass
audit_df["data_corrected"][line] = new_data
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("\n\n update_audit_df_appendix : complete \n")
return audit_df
def update_is_identified(self, df):
print("Updating is Identified")
df["Identification_Status"].fillna("", inplace=True)
for index in df.index:
print(index,df["Identification_Status"][index])
try:
if df["Identification_Status"][index]:
line_pos = df["Identification_Status"][index].split(";")
pos_count = len(line_pos)
else:
pos_count = 0
except:
pos_count = 0
print(pos_count)
if pos_count == 1:
df["isIdentified"][index] = "Yes"
else:
df["isIdentified"][index] = "No"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("\n\n Inside update_is_identified : complete \n")
return df
def before_audit(self, lang: str = None):
output_converted_txt = self.base_file_path + "temp.txt"
output_converted_docx = self.base_file_path + "temp.docx"
csv_for_pre_processing = self.base_file_path + "for_pre_processing.csv"
csv_for_processing = self.base_file_path + "for_processing.csv"
csv_prepped_for_audit = self.base_file_path + "prepped_for_audit.csv"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("Inside before audit : ( 0-16 ) \n\n")
## convert pdf/docx to text
sf.conv_to_txt(
self.input_script,
output_converted_docx,
output_converted_txt
)
try:
output_docx_from_orginal_text = self.base_file_path + "original_text1.docx"
pdf_file_path = self.base_file_path + "original_text1.pdf"
print("b4 txt to docx")
sf.convert_txt_to_docx(output_converted_txt,output_docx_from_orginal_text)
print("IN THE BEGINING OF AUDIT PDF PAGES")
print("b4 page count of pdf")
# total_page_bf = sf.countPages(output_docx_from_orginal_text,pdf_file_path,self.base_file_path)
sf.countPages(output_docx_from_orginal_text,pdf_file_path,self.base_file_path)
print("temp txt converted to docx")
self.total_page_bf = str(1)
try:
print("int try pdf bf")
self.total_page_bf = sf.PdfCounter(pdf_file_path)
print("taotal_page_bf", str(self.total_page_bf))
except Exception as exp:
print(repr(exp))
print("page bf didnt work")
pass
# self.audit_model_obj.number_of_pages = int(total_page_bf)
time_per_page = 26
base_time = 120
no_of_pages = int(self.total_page_bf)
formula_of_counting_pages = (time_per_page * no_of_pages) + base_time
print("time required for auditing is :",formula_of_counting_pages)
extimated_time = round(formula_of_counting_pages / 60, 1)
print("extimated_time:",extimated_time)
print("Exstimated time is updated")
kolkata_time = datetime.datetime.now(pytz.timezone('Asia/Kolkata'))
print(kolkata_time)
thirty_mins_later = kolkata_time + datetime.timedelta(minutes=extimated_time)
formatted_time = thirty_mins_later.strftime("%B %d, %Y %I:%M %p")
self.audit_model_obj.expected_duration = formatted_time
print(formatted_time)
except:
pass
# self.total_line_before_audit = sf.count_the_line(output_converted_txt)
# print("total_line_before_audit :",total_line_before_audit)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf.conv_to_text 1 : before audit\n")
## convert to df
sf.conv_to_csv(output_converted_txt, csv_for_pre_processing)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf.conv_to_csv 2 : before audit\n")
df = pd.read_csv(csv_for_pre_processing, encoding="utf8")
## direct to df.. not working as expected
# df = pd.DataFrame()
# df = sf.conv_to_df(output_converted_txt)
# df.to_csv(csv_for_pre_processing,index=False)
print("before assign weights:")
print(df.dtypes)
df['preassigned_weights'] = ''
df = sf.pre_assign_wts(df)
print(df.dtypes)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf.pre_assign_wts 3 : before audit\n")
df = df.sort_index().reset_index(drop=True)
df.to_csv(csv_for_processing, index =False)
df["data"].fillna("", inplace=True)
## make df to track audit
audit_df = pd.DataFrame()
df_1st = pd.DataFrame(df)
df_1st.to_csv(self.base_file_path + "very_first_df_feed_to_create_audit_df.csv", index = False)
audit_df = sf.create_audit_df(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf.create_audit_df 4 : before audit\n")
audit_df.reset_index().to_csv(self.audit_report_csv, index=False)
print(df.dtypes)
print(audit_df.dtypes)
audit_df.reset_index().to_csv(self.base_file_path + "very_first_audit_df_feed_to_create_audit_df.csv", index = False)
print("LANGUAGE IS",lang)
## trim intro
if lang:
if lang.upper() == "ENGLISH":
sf_eng.trim_intro_english(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf_eng.trim_intro_english (5) : before audit\n")
df = self.update_is_identified(df)
else:
sf_eng.trim_intro_english(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf_eng.trim_intro_english (6) : before audit\n")
df = self.update_is_identified(df)
else:
# sf.trim_intro(df,audit_df)
sf_eng.trim_intro_english(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf_eng.trim_intro_english (7) : before audit\n")
df = self.update_is_identified(df)
lines_not_removed = audit_df.loc[
audit_df["line_removed"] != "Yes"
].index.to_list()
print(lines_not_removed)
df = df.loc[df["line_no"].isin(lines_not_removed), :]
df = df.sort_index().reset_index(drop=True)
# df = df.reset_index()
audit_df.reset_index().to_csv(self.audit_report_csv, index=False)
print("Trimming Appendix")
## trim appendix
if lang:
if lang.upper() == "ENGLISH":
sf_eng.trim_appendix_english(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf_eng.trim_appendix_english 8: before audit\n")
df = self.update_is_identified(df)
else:
sf_eng.trim_appendix_english(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf_eng.trim_appendix_english 9: before audit\n")
df = self.update_is_identified(df)
else:
sf_eng.trim_appendix_english(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf_eng.trim_appendix_english 10 : before audit\n")
df = self.update_is_identified(df)
## remove page numbers
sf.remove_page_numbers(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf.remove_page_numbers 11 : before audit\n")
lines_not_removed = audit_df.loc[
audit_df["line_removed"] != "Yes"
].index.to_list()
print(lines_not_removed)
df = df.loc[df["line_no"].isin(lines_not_removed), :]
df = df.sort_index().reset_index(drop=True)
# df = df.reset_index()
audit_df.reset_index().to_csv(self.audit_report_csv, index=False)
## prepare for audit
df = sf.prep_for_audit(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf.prep_for_audit 12 : before audit\n")
# sf.prep_for_audit(df)
df.to_csv(csv_prepped_for_audit, index=False)
## remove extra blank lines
sf.remove_extra_blank_lines(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf.remove_extra_blank_lines 13 : before audit\n")
lines_not_removed = audit_df.loc[
audit_df["line_removed"] != "Yes"
].index.to_list()
print(lines_not_removed)
df = df.loc[df["line_no"].isin(lines_not_removed), :]
df = df.sort_index().reset_index(drop=True)
###
sf.remove_blank_line_after_parenthetical(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf.remove_blank_line_after_parenthetical 14 : before audit\n")
lines_not_removed = audit_df.loc[
audit_df["line_removed"] != "Yes"
].index.to_list()
print(lines_not_removed)
df = df.loc[df["line_no"].isin(lines_not_removed), :]
df = df.sort_index().reset_index(drop=True)
##
sf.merge_broken_lines(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf.merge_broken_lines 15 : before audit\n")
lines_not_removed = audit_df.loc[
audit_df["line_removed"] != "Yes"
].index.to_list()
df = df.loc[df["line_no"].isin(lines_not_removed), :]
df = df.sort_index().reset_index(drop=True)
###df.to_csv(csv_after_merge, index = False)
##
sf.remove_space_between_words(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after sf.remove_space_between_words 16 : before audit\n")
df.to_csv(self.csv_removed_space_between_words, index=False)
print("updating audit df")
df1 = pd.DataFrame(df)
df1.to_csv(self.base_file_path + "first_df.csv", index = False)
audit_df1 = pd.DataFrame(audit_df)
audit_df1.to_csv(self.base_file_path + "first_audit_df.csv", index = False)
audit_df = self.update_audit_df(df, audit_df)
# audit_model_obj = ScriptAuditModel.objects.get(
# script = Script.objects.get(
# id = self.script_id,
# )
# )
try:
audit_model_obj = ScriptAuditModel.objects.get(
script = Script.objects.get(
id = self.script_id,
)
)
audit_model_obj.pre_audit_run = True
audit_model_obj.save()
print("TRY")
except Exception as exp:
print(repr(exp))
print("EXCEPT")
self.audit_model_obj.pre_audit_run = True
self.audit_model_obj.save()
print("PRE AUDIT DONE")
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("before audit complete : PRE AUDIT DONE\n\n")
return df, audit_df
def audit(self, lang: str = None) -> None:
"""
Run Audit on NeutralAudit object.
_________________________________________________________________
Parameters :
lang : str -> language of the provided script.
default = None (language nuteral rules)
_________________________________________________________________
Return :
None
_________________________________________________________________
"""
# ---------------------------changes to save _audited.csv in media/scriptpage/script/folder
# csv_parawise_status = self.audited_script_path
# ---------------------------changes to save _audited.csv in media/scriptpage/script/folder
print("<<<<<<<<<<<<<<<<<<<<<<<<<")
print("<<<<<<<<<<<<<<<<<<<<<<<<<",self.base_file_path)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("INSIDE AUDIT (1-87): audit\n\n")
csv_after_first_strict_conditions = (
self.base_file_path
+ "after_first_strict_conditions.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_first_strict_conditions.csv 1 : audit\n")
csv_after_gen_and_sort_weights = (
self.base_file_path
+ "after_gen_and_sort_weights.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_gen_and_sort_weights.csv 2 : audit\n")
csv_after_examined_speaker_pos = (
self.base_file_path
+ "after_examined_speaker_pos.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_examined_speaker_pos.csv 3 : audit\n")
csv_after_examined_speaker_next_lines= (
self.base_file_path
+ "after_examined_speaker_next_lines.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_examined_speaker_next_lines.csv 4 : audit\n")
csv_after_pnnbl_ineligible= (
self.base_file_path
+ "after_pnnbl_ineligible1.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_pnnbl_ineligible1 (5) : audit\n")
csv_after_examine_same_content_lines= (
self.base_file_path
+ "after_examine_same_content_lines.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_examine_same_content_lines (6) : audit\n")
csv_after_examined_action_pos_part1 = (
self.base_file_path
+ "_after_examined_action_pos_part1.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examined_action_pos_part1.csv (7) : audit\n")
csv_after_pnnbl_inelgible_after_action_pos_part1=(
self.base_file_path
+ "_after_pnnbl_inelgible_after_action_pos_part1.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_pnnbl_inelgible_after_action_pos_part1.csv (8) : audit\n")
csv_after_examined_action_pos_part2 = (
self.base_file_path
+ "_after_examined_action_pos_part2.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examined_action_pos_part2.csv (9) : audit\n")
csv_after_pnnbl_inelgible_after_action_pos_part2 = (
self.base_file_path
+ "_after_pnnbl_inelgible_after_action_pos_part2.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_pnnbl_inelgible_after_action_pos_part2.csv (10) : audit\n")
csv_after_examined_same_indent_bunch = (
self.base_file_path
+ "_after_examined_same_indent_bunch.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examined_same_indent_bunch.csv (11) : audit\n")
csv_after_pnnbl_inelgible_after_same_indent = (
self.base_file_path
+ "_after_pnnbl_inelgible_after_same_indent.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_pnnbl_inelgible_after_same_indent.csv (12) : audit\n")
csv_after_examined_relative_indent_bunch = (
self.base_file_path
+ "_after_examined_relative_indent_bunch.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examined_relative_indent_bunch.csv (13) : audit\n")
csv_after_examined_speaker_next_lines_after_relative_indent = (
self.base_file_path
+ "_after_examined_speaker_next_lines_after_relative_indent.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examined_speaker_next_lines_after_relative_indent.csv (14) : audit\n")
csv_after_pnnbl_inelgible_after_relative_indent = (
self.base_file_path
+ "after_pnnbl_inelgible_after_relative_indent_bunch.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_pnnbl_inelgible_after_relative_indent_bunch.csv (15) : audit\n")
csv_examined_speaker_using_indent = (
self.base_file_path
+ "after_examined_speaker_using_indent.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_examined_speaker_using_indent.csv (16) : audit\n")
csv_after_examined_speaker_next_lines_after_pos_sp_indent = (
self.base_file_path
+ "_after_examined_speaker_next_lines_after_pos_sp_indent.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examined_speaker_next_lines_after_pos_sp_indent.csv (17) : audit\n")
csv_after_pnnbl_inelgible_after_pos_sp_indent = (
self.base_file_path
+ "_after_pnnbl_inelgible_after_pos_sp_indent.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_pnnbl_inelgible_after_pos_sp_indent.csv (18) : audit\n")
csv_examined_speaker_extension = (
self.base_file_path
+ "_after_examined_speaker_extension.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examined_speaker_extension.csv (19) : audit\n")
csv_after_examined_speaker_next_lines_after_speaker_extension = (
self.base_file_path
+ "_after_examined_speaker_next_lines_after_speaker_extension.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examined_speaker_next_lines_after_speaker_extension.csv(20) : audit\n")
csv_after_pnnbl_inelgible_after_speaker_extension = (
self.base_file_path
+ "_after_pnnbl_inelgible_after_speaker_extension.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_pnnbl_inelgible_after_speaker_extension.csv (21) : audit\n")
csv_after_examined_action_using_top2 = (
self.base_file_path
+ "_after_examined_action_using_top2.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examined_action_using_top2.csv (22) : audit\n")
csv_after_pnnbl_inelgible_after_action_using_top_pnnbl = (
self.base_file_path
+ "_after_pnnbl_inelgible_after_action_using_top_pnnbl.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_pnnbl_inelgible_after_action_using_top_pnnbl.csv (23) : audit\n")
csv_after_refined_action = (
self.base_file_path
+ "_after_refined_action.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_refined_action.csv (24) : audit\n")
csv_after_pnnbl_inelgible_after_refined_action = (
self.base_file_path
+ "_after_pnnbl_inelgible_after_refined_action.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_pnnbl_inelgible_after_refined_action.csv (25) : audit\n")
csv_after_eligibility_using_identified_pnnbl = (
self.base_file_path
+ "_after_eligibility_using_identified_pnnbl.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl.csv (26) : audit\n")
csv_after_top_identification_part1 = (
self.base_file_path
+ "_after_top_identification_part1.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top_identification_part1.csv (27) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_examine_sp_next_among_two = (
self.base_file_path
+ "after_eligibility_using_identified_pnnbl_after_examine_sp_next_among_two.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_eligibility_using_identified_pnnbl_after_examine_sp_next_among_two.csv (28) : audit\n")
csv_after_examined_speaker_pos_after_top1 = (
self.base_file_path + "_after_examined_speaker_pos_after_top1.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examined_speaker_pos_after_top1.csv (29) : audit\n")
csv_after_examined_speaker_next_lines_after_top1 = (
self.base_file_path + "after_examined_speaker_next_lines_after_top1.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_examined_speaker_next_lines_after_top1.csv (30) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top_part1 = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part1.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part1.csv (31) : audit\n")
csv_after_examine_speaker_mix_part1 = (
self.base_file_path + "_after_examine_speaker_mix_part1.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examine_speaker_mix_part1.csv (32) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_speaker_mix_part1 = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_speaker_mix_part1.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_speaker_mix_part1.csv (33) : audit\n")
csv_after_examine_speaker_mix_part2 = (
self.base_file_path + "_after_examine_speaker_mix_part2.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examine_speaker_mix_part2.csv (34) : audit\n")
csv_after_examined_speaker_pos_after_mix = (
self.base_file_path + "_after_examined_speaker_pos_after_mix.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examined_speaker_pos_after_mix.csv(35) : audit\n")
csv_after_examined_speaker_next_lines_after_mix = (
self.base_file_path + "_after_examined_speaker_next_lines_after_mix.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examined_speaker_next_lines_after_mix.csv (36) : audit\n")
csv_after_pnnbl_ineligible_after_mix = (
self.base_file_path + "_after_pnnbl_ineligible_after_mix.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_pnnbl_ineligible_after_mix.csv (37) : audit\n")
csv_after_top_identification_part2 = (
self.base_file_path + "_after_top_identification_part2.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top_identification_part2.csv (38) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top_part2 = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part2.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part2.csv (39) : audit\n")
csv_after_top_identification_part2_again = (
self.base_file_path + "_after_top_identification_part2_again.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top_identification_part2_again.csv (40) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top_part2_again = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part2_again.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part2_again.csv (41) : audit\n")
csv_after_top_identification_part2_again_again = (
self.base_file_path + "_after_top_identification_part2_again_again.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top_identification_part2_again_again.csv(42) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top_part2_again_again = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part2_again_again.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part2_again_again.csv (43) : audit\n")
csv_after_slug_identification = (
self.base_file_path + "_after_slug_identification.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_slug_identification.csv (44) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_slug_identification = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_slug_identification.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_slug_identification.csv (45) : audit\n")
csv_after_top_identification_part1_again = (
self.base_file_path + "_after_top_identification_part1_again.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top_identification_part1_again.csv (46) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top_part1_again = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part1_again.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part1_again.csv (47) : audit\n")
csv_after_top_identification_part3 = (
self.base_file_path + "_after_top_identification_part3.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top_identification_part3.csv (48) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top_part3 = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part3.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part3.csv (49) : audit\n")
csv_after_top_identification_part4 = (
self.base_file_path + "_after_top_identification_part4.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top_identification_part4.csv (50) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top_part4 = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part4.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part4.csv (51) : audit\n")
csv_after_top_identification_part5 = (
self.base_file_path + "_after_top_identification_part5.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top_identification_part5.csv (52) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top_part5 = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part5.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part5.csv (53) : audit\n")
csv_after_top_identification_part6 = (
self.base_file_path + "_after_top_identification_part6.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top_identification_part6.csv (54) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top_part6 = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part6.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part6.csv (55) : audit\n")
csv_after_top_identification_part7 = (
self.base_file_path + "_after_top_identification_part7.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top_identification_part7.csv (56) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top_part7 = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part7.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part7.csv (57) : audit\n")
csv_after_top_identification_part8 = (
self.base_file_path + "_after_top_identification_part8.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top_identification_part8.csv (58) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top_part8 = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part8.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part8.csv (59) : audit\n")
csv_after_examine_among_two = (
self.base_file_path + "_after_examine_among_two.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examine_among_two.csv (60) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_examine_among_two = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_examine_among_two.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_eligibility_using_identified_pnnbl_after_examine_among_two.csv (61) : audit\n")
csv_after_examine_speaker_next_line_after_among_two = (
self.base_file_path + "_after_examine_speaker_next_line_after_among_two.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examine_speaker_next_line_after_among_two.csv (62) : audit\n")
csv_after_top2_wt_diff = (
self.base_file_path + "_after_top2_wt_diff.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top2_wt_diff.csv (63) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top2_wt_diff = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top2_wt_diff.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top2_wt_diff.csv (64) : audit\n")
csv_after_top2_wt_diff_again = (
self.base_file_path + "_after_top2_wt_diff_again.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top2_wt_diff_again.csv (65) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top2_wt_diff_again = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top2_wt_diff_again.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top2_wt_diff_again.csv(66) : audit\n")
csv_after_top_identification_part1_diluted = (
self.base_file_path + "_after_top_identification_part1_diluted.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_top_identification_part1_diluted.csv (67) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_top_part1_diluted = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part1_diluted.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_top_part1_diluted.csv (68) : audit\n")
#1.2
csv_after_examine_dial_between_action = (
self.base_file_path + "_after_examine_dial_between_action.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examine_dial_between_action.csv (69) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_examine_dial_between_action = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_examine_dial_between_action.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_examine_dial_between_action.csv (70) : audit\n")
csv_after_examine_among_two_again = (
self.base_file_path + "_after_examine_among_two_again.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_examine_among_two_again.csv (71) : audit\n")
csv_after_eligibility_using_identified_pnnbl_after_examine_among_two_again = (
self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_examine_among_two_again.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("_after_eligibility_using_identified_pnnbl_after_examine_among_two_again.csv (72) : audit\n")
csv_after_identify_remaining_as_top = (
self.base_file_path + "after_identifying_remaining_as_top.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_identifying_remaining_as_top.csv (73) : audit\n")
csv_after_prep_for_audit_after_identification = (
self.base_file_path + "after_prep_for_audit_after_identification.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_prep_for_audit_after_identification.csv (74) : audit\n")
csv_after_audit1 = self.base_file_path + "after_audit1.csv"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_audit1.csv (75) : audit\n")
csv_after_wrapping = self.base_file_path + "after_wrapping.csv"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_wrapping.csv (76) : audit\n")
csv_after_prep_for_audit_after_wrapping = (
self.base_file_path + "after_prep_for_audit_after_wrapping.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_prep_for_audit_after_wrapping.csv (77) : audit\n")
csv_after_audit2 = self.base_file_path + "after_audit2.csv"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("after_audit2.csv (78) : audit\n")
output_linewise_docx = self.base_file_path + "audited_linewise.docx"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("audited_linewise.docx (79) : audit\n")
output_linewise_txt = self.base_file_path + "audited_linewise.txt"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("audited_linewise.txt (80) : audit\n")
audit_report_tabular_docx = self.base_file_path + "audit_report_tabular.docx"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("audit_report_tabular.docx (81) : audit\n")
csv_strict_conditions = self.matrices_path + "strict_conditions_230623.csv"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("strict_conditions_230623.csv : audit\n")
csv_pos_weights = self.matrices_path + "PS_Weights_250623_2.csv"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("PS_Weights_250623_2.csv (83) : audit\n")
csv_pnbl_nnbl = self.matrices_path + "pnbl_nnbl.csv"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("pnbl_nnbl.csv (84) : audit\n")
pnbl_eligibility_matrix = (
self.matrices_path + "pnbl_eligibility_matrix_250623.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("pnbl_eligibility_matrix_250623.csv (85) : audit\n")
nnbl_eligibility_matrix = (
self.matrices_path + "nnbl_eligibility_matrix_250623.csv"
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("nnbl_eligibility_matrix_250623.csv (86) : audit\n")
output_template = self.matrices_path + "ScriptTemplate5.docx"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("ScriptTemplate5.docx (87) : audit\n")
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AUDIT : audit\n\n")
df, audit_df = self.before_audit(lang)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER ASSIGNING LOCATIONS AUDIT : audit\n\n")
#######################################
sf.test_strict_conditions(df, csv_strict_conditions)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.test_strict_conditions 1 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_first_strict_conditions, index = False)
## gen weights for possibilties ## add preassigned weights
df = sf.gen_pos_weights(df, csv_pos_weights)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.gen_pos_weights 2 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
## language specific weights update
if lang:
if lang.upper() == "ENGLISH":
df = sf_eng.update_pos_wts_english(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf_eng.update_pos_wts_english 3 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = sf.sort_pos_decr_wts(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.sort_pos_decr_wts 4 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
#if self.gen_int_files:
df.to_csv(csv_after_gen_and_sort_weights, index = False)
##
## remove some columns
df.drop(['first_largest', 'second_largest','third_largest','fourth_largest','fifth_largest','sixth_largest','seventh_largest','eight_largest','ninth_largest','tenth_largest','eleventh_largest','twelth_largest','thirteenth_largest','fourteenth_largest','fifteenth_largest','sixteenth_largest','seventeenth_largest','eighteenth_largest','ninteenth_largest','tewenty_largest','tone_largest','ttwo_largest','tthree_largest','tfour_largest','tfive_largest','tsix_largest','tseven_largest','teight_largest'], axis=1, inplace=True)
2024-04-27 09:33:09 +00:00
sf.prep_for_pos_elimination(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.prep_for_pos_elimination 5 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df_bex1 = pd.DataFrame(df)
df_bex1.to_csv(self.base_file_path + "df_update_audit_df_b_exam_speaker_1.csv", index = False)
audit_df_bex1 = pd.DataFrame(audit_df)
audit_df_bex1.to_csv(self.base_file_path + "audit_df_update_audit_df_b_exam_speaker_1.csv", index = False)
## examine speaker possibilties
df = sf.examine_speaker_pos(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_pos 6 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df_ex1 = pd.DataFrame(df)
df_ex1.to_csv(self.base_file_path + "df_update_audit_df_exam_speaker_1.csv", index = False)
audit_df_ex1 = pd.DataFrame(audit_df)
audit_df_ex1.to_csv(self.base_file_path + "audit_df_update_audit_df_exam_speaker_1.csv", index = True)
if self.gen_int_files:
df.to_csv(csv_after_examined_speaker_pos, index = False)
print("printing info based on audit_df")
# df_b1 = pd.DataFrame(df)
# df_b1.to_csv(self.base_file_path + "df_update_audit_df_b1.csv", index = False)
print(audit_df.head(10),audit_df.dtypes)
try:
audit_df = audit_df.sort_values('audited_line_no')
except:
audit_df['audited_line_no'] = pd.to_numeric(audit_df['audited_line_no'], errors='coerce')
audit_df = audit_df.sort_values('audited_line_no')
audit_df_try1 = pd.DataFrame(audit_df)
audit_df_try1.to_csv(self.base_file_path + "audit_df_update_audit_df_try1.csv", index = True)
print(audit_df.head())
try:
audit_df = pd.merge(audit_df, df[['line_no']], on=audit_df.index, how='left')
print(audit_df.head())
# Set 'line_no' as index
audit_df.set_index('line_no', inplace=True)
print(audit_df.head())
audit_df_try2 = pd.DataFrame(audit_df)
audit_df_try2.to_csv(self.base_file_path + "audit_df_update_audit_df_try2.csv", index = True)
except Exception as e:
print(e, audit_df.head())
pass
# try:
# audit_df.reset_index(drop=True, inplace=True)
# audit_df.set_index('line_no',inplace=True)
# except Exception as e:
# print(e)
print(audit_df.head())
print(audit_df.dtypes)
audit_df_b1 = pd.DataFrame(audit_df)
audit_df_b1.to_csv(self.base_file_path + "audit_df_update_audit_df_b1.csv", index = True)
audit_df = self.update_audit_df(df, audit_df)
df_1 = pd.DataFrame(df)
df_1.to_csv(self.base_file_path + "df_update_audit_df_1.csv", index = True)
audit_df_1 = pd.DataFrame(audit_df)
audit_df_1.to_csv(self.base_file_path + "audit_df_update_audit_df_1.csv", index = True)
###
df = sf.examine_speaker_next_lines(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_next_lines 7 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_speaker_next_lines, index = False)
audit_df = self.update_audit_df(df, audit_df)
audit_df_u7 = pd.DataFrame(audit_df)
audit_df_u7.to_csv(self.base_file_path + "audit_df_update_audit_df_7.csv", index = True)
## do while pnnbl ineligible
sf.prep_pnnbl_wts(csv_pnbl_nnbl, self.matrices_path)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.prep_pnnbl_wts 8 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = sf.do_while_pnnbl_ineligible(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_pnnbl_ineligible 9 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_pnnbl_ineligible, index = False)
## examine same content
df = sf.examine_same_content_lines(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_same_content_lines 10 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_examine_same_content_lines, index = False)
### examine speaker next again
df = sf.examine_speaker_next_lines(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_next_lines 11 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
###df.to_csv(csv_after_examined_speaker_next_lines_after_same_content, index = False)
audit_df = self.update_audit_df(df, audit_df)
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
df = sf.do_while_pnnbl_ineligible(df)
df = self.update_is_identified(df)
###df.to_csv(csv_after_pnnbl_ineligible_after_same_content, index = False)
################
df = sf.examine_action_possibilities_part1(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_action_possibilities_part1 12 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_action_pos_part1, index = False)
audit_df = self.update_audit_df(df, audit_df)
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
df = sf.do_while_pnnbl_ineligible(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_pnnbl_ineligible 13 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_pnnbl_inelgible_after_action_pos_part1, index = False)
################
df = sf.examine_action_possibilities_part2(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_action_possibilities_part2 14 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_action_pos_part2, index = False)
audit_df = self.update_audit_df(df, audit_df)
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
df = sf.do_while_pnnbl_ineligible(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_pnnbl_ineligible 15 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_pnnbl_inelgible_after_action_pos_part2, index = False)
################
df = sf.examine_same_indent_bunch(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_same_indent_bunch 16 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_same_indent_bunch, index = False)
audit_df = self.update_audit_df(df, audit_df)
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
df = sf.do_while_pnnbl_ineligible(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_pnnbl_ineligible 17 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_pnnbl_inelgible_after_same_indent, index = False)
#####################
##for reorganisation
# df = pd.read_csv('Script_Shatranj_pnnbl_ineligible_same_indent_bunch_new_col_2.csv')
# csv_for_pos_elimination = os.path.join(self.output_dir,os.path.splitext(self.script_name)[0])+'_for_pos_elimination.csv'
#########################
df = sf.examine_relative_indent(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_relative_indent 18 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_relative_indent_bunch, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.examine_speaker_next_lines(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_next_lines 19 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_speaker_next_lines_after_relative_indent, index = False)
audit_df = self.update_audit_df(df, audit_df)
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl,matrices_path)
df = sf.do_while_pnnbl_ineligible(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_pnnbl_ineligible 20 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
df.to_csv(csv_after_pnnbl_inelgible_after_relative_indent, index=False)
#######################################
df = sf.examine_pos_sp_indent(
df,
self.csv_removed_space_between_words,
csv_after_pnnbl_inelgible_after_relative_indent,
)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_pos_sp_indent 21 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv( csv_examined_speaker_using_indent,index =False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.examine_speaker_next_lines(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_next_lines 22 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_speaker_next_lines_after_pos_sp_indent, index = False)
audit_df = self.update_audit_df(df, audit_df)
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
df = sf.do_while_pnnbl_ineligible(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_pnnbl_ineligible 23 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_pnnbl_inelgible_after_pos_sp_indent, index = False)
#################################
df = sf.examine_speaker_extension(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_extension 24 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv( csv_examined_speaker_extension,index =False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.examine_speaker_next_lines(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_next_lines 25 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_speaker_next_lines_after_speaker_extension, index = False)
audit_df = self.update_audit_df(df, audit_df)
## do while pnnbl ineligible
print("pnnbl after speaker extension")
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
df = sf.do_while_pnnbl_ineligible(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_pnnbl_ineligible 26 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_pnnbl_inelgible_after_speaker_extension, index = False)
## checking
# audit_df.reset_index().to_csv(audit_report_csv,index =False)
#################################################
df = sf.examine_action_using_top2_part1(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_action_using_top2_part1 27 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_action_using_top2, index = False)
audit_df = self.update_audit_df(df, audit_df)
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
df = sf.do_while_pnnbl_ineligible(df)
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_pnnbl_inelgible_after_action_using_top_pnnbl, index = False)
# #########################################
df = sf.refine_action_possibilties(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.refine_action_possibilties 28 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_refined_action, index = False)
audit_df = self.update_audit_df(df, audit_df)
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
df = sf.do_while_pnnbl_ineligible(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_pnnbl_ineligible(df) 29 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_pnnbl_inelgible_after_refined_action, index = False)
##############################
sf.prep_pnnbl_eligible_csv(pnbl_eligibility_matrix, nnbl_eligibility_matrix)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.prep_pnnbl_eligible_csv 30 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
#############################
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 31 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl, index = False)
#################################
df = sf.start_top_identifications_part1(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.start_top_identifications_part1 32 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top_identification_part1, index = False)
audit_df = self.update_audit_df(df, audit_df)
## examine speaker possibilties again after top1
df = sf.examine_speaker_pos(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_pos 33 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_speaker_pos_after_top1, index = False)
audit_df = self.update_audit_df(df, audit_df)
###
df_34 = pd.DataFrame(df)
df_34.to_csv(self.base_file_path + "df_export_before_34.csv", index = True)
au_df_34 = pd.DataFrame(audit_df)
au_df_34.to_csv(self.base_file_path + "audit_df_before_after_34.csv", index = True)
df = sf.examine_speaker_next_lines(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_next_lines 34 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_speaker_next_lines_after_top1, index=False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 35 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part1, index = False)
#################################
copy_df_examine_speaker_mix_part1 = pd.DataFrame(df)
copy_df_examine_speaker_mix_part1.to_csv(self.base_file_path + "copy_df_examine_speaker_mix_part1.csv", index = True)
###########
copy_audit_df_examine_speaker_mix_part1 = pd.DataFrame(audit_df)
copy_audit_df_examine_speaker_mix_part1.to_csv(self.base_file_path + "copy_audit_df_examine_speaker_mix_part1.csv", index = True)
##########
df = sf.examine_speaker_mix_part1(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_mix_part1 36 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_examine_speaker_mix_part1, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 37 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_speaker_mix_part1, index = False)
#################################
df_38 = pd.DataFrame(df)
df_38.to_csv(self.base_file_path + "df_export_after_38.csv", index = True)
au_df_38 = pd.DataFrame(audit_df)
au_df_38.to_csv(self.base_file_path + "audit_df_export_after_38.csv", index = True)
df = sf.examine_speaker_mix_part2(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_mix_part2 38 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_examine_speaker_mix_part2, index = False)
audit_df = self.update_audit_df(df, audit_df)
## examine speaker possibilties again after mix
df = sf.examine_speaker_pos(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_pos 39 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_speaker_pos_after_mix, index = False)
audit_df = self.update_audit_df(df, audit_df)
###
df = sf.examine_speaker_next_lines(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTERsf.examine_speaker_next_lines 40 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
if self.gen_int_files:
df.to_csv(csv_after_examined_speaker_next_lines_after_mix, index = False)
audit_df = self.update_audit_df(df, audit_df)
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl,matrices_path)
df = sf.do_while_pnnbl_ineligible(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_pnnbl_ineligible 41 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_pnnbl_ineligible_after_mix, index = False)
# df = sf.do_while_examine_using_identified_pnnbl(df)
# df = update_is_identified(df)
# df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_speaker_mix_part2, index = False)
################################
df = sf.start_top_identifications_part2(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.start_top_identifications_part2 42 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top_identification_part2, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 43 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part2, index = False)
#################################
df = sf.start_top_identifications_part2(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.start_top_identifications_part2 44 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top_identification_part2_again, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 45 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part2_again, index = False)
#################################
df = sf.start_top_identifications_part2(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.start_top_identifications_part2 46 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top_identification_part2_again_again, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 47 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part2_again_again, index = False)
#################################
df = sf.start_slug_identification(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.start_slug_identification(df) 48 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_slug_identification, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl(df) 49 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_slug_identification, index = False)
#################################
df = sf.start_top_identifications_part1(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.start_top_identifications_part1(df) 50 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top_identification_part1_again, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 51 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part1_again, index = False)
#################################
df = sf.start_top_identifications_part3(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.start_top_identifications_part3 52 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top_identification_part3, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 53 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part3, index = False)
#################################
df = sf.start_top_identifications_part4(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.start_top_identifications_part4 54 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top_identification_part4, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 55 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part4, index = False)
#################################
df = sf.start_top_identifications_part5(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.start_top_identifications_part5(df) 56 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top_identification_part5, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 57 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part5, index = False)
#################################
df = sf.start_top_identifications_part6(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.start_top_identifications_part6 58 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top_identification_part6, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 59 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part6, index = False)
#################################
df = sf.start_top_identifications_part7(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.start_top_identifications_part7 60 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top_identification_part7, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 61 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part7, index = False)
#################################
df = sf.start_top_identifications_part8(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.start_top_identifications_part8 62 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top_identification_part8, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 63 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part8, index = False)
#################################
df = sf.examine_among_two(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_among_two 64 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_examine_among_two, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 65 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_examine_among_two, index = False)
#################################
df = sf.examine_speaker_next_lines(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_speaker_next_lines 66: AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_examine_speaker_next_line_after_among_two, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 67 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_examine_sp_next_among_two, index = False)
#################################
df = sf.examine_action_using_top2_wt_diff(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_action_using_top2_wt_diff 68 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top2_wt_diff, index = False)
audit_df = self.update_audit_df(df, audit_df)
try:
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 69 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top2_wt_diff, index = False)
except:
pass
#################################
try:
df = sf.examine_action_using_top2_wt_diff(df)
if self.gen_int_files:
df.to_csv(csv_after_top2_wt_diff_again, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 70 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top2_wt_diff_again, index = False)
except:
pass
#################################
try:
df = sf.start_top_identifications_part1_diluted(df)
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_top_identification_part1_diluted, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 71 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part1_diluted, index = False)
except:
pass
###################################
####################################
##1.1
df = sf.decrease_wt_dial_between_action(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.decrease_wt_dial_between_action 72 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_examine_dial_between_action, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 73 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
#if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_examine_dial_between_action, index = False)
####################################
#################################
df = sf.examine_among_two(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.examine_among_two 74 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
if self.gen_int_files:
df.to_csv(csv_after_examine_among_two_again, index = False)
audit_df = self.update_audit_df(df, audit_df)
df = sf.do_while_examine_using_identified_pnnbl(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 75 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df = self.update_is_identified(df)
#if self.gen_int_files:
df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_examine_among_two_again, index = False)
####################################
#################################
df = sf.identify_top_as_final(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.identify_top_as_final 76 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df_76 = pd.DataFrame(df)
df_76.to_csv(self.base_file_path + "df_identify_top_as_final_76.csv", index = False)
au_df_76 = pd.DataFrame(audit_df)
au_df_76.to_csv(self.base_file_path + "audit_df_identify_top_as_final_76.csv", index = False)
df = self.update_is_identified(df)
df.to_csv(csv_after_identify_remaining_as_top, index=False)
audit_df = self.update_audit_df(df, audit_df)
#####################################
## prepare for audit
df = sf.prep_for_audit(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.prep_for_audit 77 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df.to_csv(csv_after_prep_for_audit_after_identification, index=False)
#####################################
df, audit_df = sf.run_audit_on_identified(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.run_audit_on_identified 78 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df.to_csv(csv_after_audit1, index=False)
audit_df = self.update_audit_df(df, audit_df)
#############################################
### run language specific audit on identified
if lang:
if lang.upper() == "ENGLISH":
df = sf_eng.run_audit_on_identified_english(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf_eng.run_audit_on_identified_english 79 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
audit_df = self.update_audit_df(df, audit_df)
#####################################
### merge the beginning/middle/end lines
# df.to_csv(self.base_file_path + "df_before_merge_line_para.csv", index = Flase)
para_df = sf.merge_line_to_para(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.merge_line_to_para 80 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
try:
para_df.to_csv(self.base_file_path+ "after_merge_line_para.csv", index = False)
print("para_df is writtern")
except:
pass
print("\n\n Function 80 is completed")
try:
script_language, dialogue_language = sf.language_detector_for_csv(para_df)
print("script_language",script_language)
print("dialogue_language",dialogue_language)
unique_script_languages = ', '.join(set(lang[0] for lang in script_language))
unique_dialogue_languages = ', '.join(set(lang[0] for lang in dialogue_language))
except:
unique_script_languages = ""
unique_dialogue_languages = ""
#commented as some unwanted change of . to comma
#para_df = sf.change_dot_to_comma_inslug(para_df)
print("unique_script_languages:",unique_script_languages)
print("unique_dialogue_languages:",unique_dialogue_languages)
# para_df.to_csv(csv_parawise_status, index=False)
##
print("\n\n dot to comma changes in slug")
audited_file_name = self.script_name + ".csv"
req_file = ContentFile(
(para_df.to_csv(index=False, path_or_buf=None)).encode("utf-8"),
audited_file_name,
)
File.objects.create(
script=Script.objects.get(id=self.script_id),
type="script-csv",
file=req_file,
)
print("\n\n exporting df and audit_df agter function 80")
df_df = pd.DataFrame(df)
df_df.to_csv(self.base_file_path + "df_export_after_80.csv", index = False)
audit_df_df = pd.DataFrame(audit_df)
audit_df_df.reset_index().to_csv(self.base_file_path + "audit_df_export_after_80.csv", index = False)
print("\nwrapping identified lines if required\n")
df = sf.wrap_text(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.wrap_text 81 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df_81 = pd.DataFrame(df)
df_81.to_csv(self.base_file_path + "df_export_after_81.csv", index = False)
au_df_81 = pd.DataFrame(audit_df)
au_df_81.reset_index().to_csv(self.base_file_path + "audit_df_export_after_81.csv", index = False)
df.to_csv(csv_after_wrapping, index=False)
#audit_df['line_no'] = audit_df['line_no'].astype(float)
audit_df = self.update_audit_df(df, audit_df)
#####################################
## prepare for audit again
only_df = pd.DataFrame(df)
only_df.to_csv(self.base_file_path + "df_before_82.csv", index = False)
df = sf.prep_for_audit(df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.prep_for_audit 82 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df.to_csv(csv_after_prep_for_audit_after_wrapping, index=False)
#####################################
sf.run_audit_on_identified(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.run_audit_on_identified 83 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
df.to_csv(csv_after_audit2, index=False)
audit_df = self.update_audit_df(df, audit_df)
#####################################################
### run language specific audit on identified
if lang:
if lang.upper() == "ENGLISH":
df = sf_eng.run_audit_on_identified_english(df, audit_df)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf_eng.run_audit_on_identified_english 84 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
audit_df = self.update_audit_df(df, audit_df)
####################################
sf.sa_output_to_docx(df, output_linewise_docx, output_template)
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.sa_output_to_docx 85 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
sf.sa_output_to_txt(output_linewise_docx, output_linewise_txt)
# print("line no: 2017",str(output_linewise_txt))
output_docx_after_audit = self.base_file_path + "audited_text.docx"
pdf_audit_file_path = self.base_file_path + "audited_text.pdf"
sf.convert_txt_to_docx(output_linewise_txt,output_docx_after_audit)
print("converted to docx")
try:
# total_page_af = sf.countPages(output_docx_after_audit,pdf_audit_file_path,self.base_file_path)
sf.countPages(output_docx_after_audit,pdf_audit_file_path,self.base_file_path)
try:
total_page_af = sf.PdfCounter(pdf_audit_file_path)
print("total pages af = ", total_page_af)
print("hehehehehe")
except Exception as exp:
print(repr(exp))
print("try except total pages didnt work")
except Exception as exp:
print("total_page_af : ", exp)
print("the count of pageline start here")
line_count_after_audit = sf.count_the_line(str(output_linewise_txt))
count_before_txt = self.base_file_path + "temp.txt"
line_count_before_audit = sf.count_the_line(str(count_before_txt))
print("you are here")
output_docx_from_orginal_text = self.base_file_path + "original_text.docx"
pdf_file_path = self.base_file_path + "original_text.pdf"
print("b4 txt to docx")
sf.convert_txt_to_docx(count_before_txt,output_docx_from_orginal_text)
print("b4 page count of pdf")
print("hehe")
"""13-2-24"""
# try:
# total_page_bf = sf.countPages(output_docx_from_orginal_text,pdf_file_path,self.base_file_path)
# print(total_page_bf)
# except Exception as exp:
# print(" total page bf",total_page_bf )
print("temp txt converted to docx")
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.sa_output_to_txt 86 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
########################################
# sf.print_audit_report_docx(audit_df,audit_report_docx)
# headers = True
try:
print("In the total number of pages count")
file_model_objects = File.objects.filter(script=self.script_id)
audit_file_object = file_model_objects.get(type="script-csv")
read_df = pd.read_csv(audit_file_object.file)
print("csv fetched")
docx = sf.csv_to_docx(read_df)
audited_docx_path = self.base_file_path + "csv_to_docx_audited.docx"
# temp_file_stream = BytesIO()
print("docx saved")
docx.save(audited_docx_path)
# temp_file_stream.seek(0)
docx_file = ContentFile(
open(audited_docx_path, 'rb').read(),
"from_audited_csv_to_document.docx",
)
# docx_file = ContentFile(
# audited_docx_path.getvalue(),
# "from_audited_csv_to_document.docx",
# )
File.objects.create(
script=Script.objects.get(id=self.script_id),
type="script-docx",
file=docx_file,
)
print("script-docx object created")
converted_audit_pdf_file_path = self.base_file_path + "csv_to_docx_audited.pdf"
Final_pdf_page_count = sf.countPages(audited_docx_path,converted_audit_pdf_file_path,self.base_file_path)
print("total number of pdf pages")
print(int(Final_pdf_page_count))
pass
except Exception as e:
print("yje exception is")
print(e)
audit_df = self.update_audit_df_intro(df, audit_df)
audit_df = self.update_audit_df_appendix(df, audit_df)
audit_report_name = self.script_name + "_report.docx"
print("audit_df_tabular 1908\n\n",audit_df,"\n\n" )
copy_df = pd.DataFrame(audit_df)
copy_df.reset_index().to_csv(self.base_file_path + "audit_report_export.csv", index = False)
print("before print_report_tabular_docx")
script_ob = Script.objects.get(id=self.script_id)
screen_play_name = script_ob.screenplay.name
author_name = script_ob.screenplay.author
print(screen_play_name)
print(author_name)
print(line_count_before_audit)
print(line_count_after_audit)
# audit_report_buffer = sf.print_audit_report_tabular_docx(audit_df,line_count_before_audit,line_count_after_audit) #commented on 13-09-23
para_filetered_audut_df = sf.assign_para_no(audit_df)
print("after para assign")
2024-04-27 09:33:09 +00:00
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER para_filetered_audut_df\n")
scriptname = str(screen_play_name) # to fetched by audit data
author = str(author_name) # to fetched by audit data
try:
pre_audit_pagenumber = int(self.total_page_bf)
except:
pre_audit_pagenumber = 1
try:
print("total_page_af = ", total_page_af )
postauditpagenumber = int(total_page_af)
except:
print("total_page_af 1")
postauditpagenumber = 1
try:
preaudit_line_no = int(line_count_before_audit)
except:
preaudit_line_no = 1
try:
postaudit_line_no = int(line_count_after_audit)
except:
postaudit_line_no = 1
try:
print("unique_script_languages",unique_script_languages)
script_language = str(unique_script_languages) # to be fetched by conversin function
except:
script_language = "---"
try:
print("unique_dialogue_languages",unique_dialogue_languages)
dialogue_language = str(unique_dialogue_languages) # to be fetched by conversin function
except:
dialogue_language = "---"
print("scriptname",scriptname)
print("author",author)
print("pre_audit_pagenumber",pre_audit_pagenumber)
print("postauditpagenumber",postauditpagenumber)
print("preaudit_line_no",preaudit_line_no)
print("postaudit_line_no",postaudit_line_no)
'''
additiona model information
'''
#self.audit_model_obj.number_of_pages = int(postauditpagenumber)
# time_per_page = 30
# base time = 120
# no_of_pages = 10
# formula of counting pages = (time_per_page + base time) * no_of_pages
try:
self.audit_model_obj.screenplay_language = script_language
self.audit_model_obj.dialogue_language = dialogue_language
self.audit_model_obj.number_of_pages = int(postauditpagenumber)
print("script language, dialogue language, post audit pagenumber is update to the audit models")
except:
print("page number and language insertion failed")
pass
auditdf_before_table_creation = self.base_file_path + "auditbefore_table.csv"
para_filetered_audut_df.to_csv(auditdf_before_table_creation)
print("B4 audit report buffer")
2024-04-27 09:33:09 +00:00
audit_report_buffer = sf.print_audit_report_tabular_docx(para_filetered_audut_df,scriptname,author,pre_audit_pagenumber,postauditpagenumber,preaudit_line_no,postaudit_line_no,script_language,dialogue_language)
print("after audit buffer")
2024-04-27 09:33:09 +00:00
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.print_audit_report_tabular_docx 87 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
print("going into models :- ")
try:
req_file = ContentFile(audit_report_buffer.read(), audit_report_name)
except Exception as exp:
print(repr(exp))
print("repoo")
2024-04-27 09:33:09 +00:00
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("AFTER sf.print_audit_report_tabular_docx 87 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n"+str(req_file))
print("req_file")
try:
script = Script.objects.get(id=self.script_id)
script.no_of_pages = int(Final_pdf_page_count)
script.save()
# user_id = script.screenplay.user.id
# Now, 'user_id' contains the user.id associated with the given script_id
except Exception as e:
print(e)
# Handle the case where the script with the given ID doesn't exist
# user_id = None
print("No_of_pages not insertd")
# try:
# update_juggernaut(user_id=user_id,service_name='audit',audit_pages = int(postauditpagenumber))
# except:
# print("the update_juggernaut didnt work")
# req_file = File.objects.get(script=self.script_id)
# req_file.type= "audit-report"
# req_file.file = file
# req_file.save()
File.objects.create(
script=Script.objects.get(id=self.script_id),
type="audit-report",
file=req_file,
)
try:
end_time_count = time.time()
total_duration = end_time_count - self.start_time_count
hours, remainder = divmod(total_duration, 3600)
minutes, seconds = divmod(remainder, 60)
text_time = f"Program ran for {str(hours)} hours, {str(minutes)} minutes, and {str(seconds)} seconds. for script_id= {str(self.script_id)} which has pdf pages of {pre_audit_pagenumber}."
print(str(text_time))
t_time_file = self.total_time_file + "/tail_errors.txt"
with open(t_time_file, "a") as file008:
file008.write(str(text_time) + "\n")
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("87 function complete \n")
except:
pass
return True
def script_meta(self):
pass
def audit_ai_gen_script(self,lang: str = None):
df, _ = self.before_audit(lang)
para_df = pd.DataFrame()
df = sf_eng.ai_gen_script_to_audited_df(df)
para_df = sf.merge_line_to_para(df)
try:
para_df.to_csv(self.base_file_path+ "after_merge_line_para.csv", index = False)
print("para_df is written")
except:
pass
audited_file_name = self.script_name + ".csv"
req_file = ContentFile(
(para_df.to_csv(index=False, path_or_buf=None)).encode("utf-8"),
audited_file_name,
)
## for local - uncomment
print("\n\n the code is here\n\n")
File.objects.create(
script=Script.objects.get(id=self.script_id),
type="script-csv",
file=req_file,
)
print("\n\n @@@@#$$$$$$$$ csv saved from s2s\n\n")
2024-04-27 09:33:09 +00:00
def audit_fdx(self):
# fdx to audited csv
para_df = pd.DataFrame()
para_df = sf.fdx_to_audited_df(self.input_script)
# save audited csv to file system
audited_file_name = self.script_name + ".csv"
req_file = ContentFile(
(para_df.to_csv(index=False, path_or_buf=None)).encode("utf-8"),
audited_file_name,
)
File.objects.create(
script=Script.objects.get(id=self.script_id),
type="script-csv",
file=req_file,
)
2024-04-30 04:59:37 +00:00
print("csv created")
try:
self.audit_model_obj.isfdx = True
self.audit_model_obj.save()
print("isfdx True saved")
except Exception as exp:
print(repr(exp))
language_check_df = sf.check_and_copy_rows(para_df)
try:
script_language, dialogue_language = sf.language_detector_for_csv(language_check_df)
print("script_language",script_language)
print("dialogue_language",dialogue_language)
unique_script_languages = ', '.join(set(lang[0] for lang in script_language))
unique_dialogue_languages = ', '.join(set(lang[0] for lang in dialogue_language))
print("langauage detection worked")
except Exception as exp:
print(repr(exp))
unique_script_languages = ""
unique_dialogue_languages = ""
print("Langauuge detectedion csv didnt work")
try:
self.audit_model_obj.screenplay_language = unique_script_languages
self.audit_model_obj.dialogue_language = unique_dialogue_languages
self.audit_model_obj.save()
print("audit lang saved")
except Exception as exp:
print(repr(exp))
self.audit_model_obj.screenplay_language = "ENGLISH"
self.audit_model_obj.dialogue_language = "ENGLISH"
print("audot lang didnt save")
# print("In the total number of pages count")
# file_model_objects = File.objects.filter(script=self.script_id)
# audit_file_object = file_model_objects.get(type="script-csv")
# read_df = pd.read_csv(audit_file_object.file)
# print("csv fetched")
try:
print(para_df)
docx = sf.csv_to_docx(para_df)
audited_docx_path = self.base_file_path + "csv_to_docx_audited.docx"
# temp_file_stream = BytesIO()
print("docx saved")
docx.save(audited_docx_path)
# temp_file_stream.seek(0)
2024-04-27 09:33:09 +00:00
2024-04-30 04:59:37 +00:00
docx_file = ContentFile(
open(audited_docx_path, 'rb').read(),
"from_audited_csv_to_document.docx",
)
File.objects.create(
script=Script.objects.get(id=self.script_id),
type="script-docx",
file=docx_file,
)
print("script-docx object created")
# output_docx_after_audit = self.base_file_path + "audited_text.docx"
pdf_audit_file_path = self.base_file_path + "csv_to_docx_audited.pdf"
print("converted to docx")
try:
# total_page_af = sf.countPages(output_docx_after_audit,pdf_audit_file_path,self.base_file_path)
sf.countPages(audited_docx_path,pdf_audit_file_path,self.base_file_path)
print("fdx : docx to pdf was create at", str(pdf_audit_file_path) )
try:
total_page_af = sf.PdfCounter(pdf_audit_file_path)
print("total pages af = ", total_page_af)
print("hehehehehe")
self.audit_model_obj.number_of_pages = int(total_page_af)
self.audit_model_obj.save()
except Exception as exp:
print(repr(exp))
print("try except total pages didnt work")
except Exception as exp:
print("fdx docx to pdf conversion didnt work")
print("total_page_af : ", exp)
except Exception as exp:
print("csv to docs didnt work")
print(repr(exp))
2024-04-27 09:33:09 +00:00
return True
def quick_audit(self, lang: str = None):
df, audit_df = self.before_audit(lang)
## get the indents count
count_green = 0
count_amber = 0
total_count = len(df)
all_indents = df["ssc"].value_counts()
print(all_indents)
all_indents = df["ssc"].value_counts().sort_index().reset_index()
# print(all_indents)
for index in all_indents.index:
# print(all_indents['index'][index])
if str(all_indents["index"][index]) in ("15", "25", "30", "35"):
count_green += all_indents["ssc"][index]
elif str(all_indents["index"][index]) in (
"0",
"14",
"16",
"24",
"26",
"29",
"31",
"34",
"36",
):
count_amber += all_indents["ssc"][index]
elif all_indents["index"][index] > 62:
count_amber += all_indents["ssc"][index]
print(all_indents["index"].tolist())
print(count_green, count_amber, total_count)
percent_good = ((count_green + count_amber) / total_count) * 100
if percent_good > 80:
print("most lines are within prescribed indents", percent_good)
quick_audit_flag = "pass"
else:
print("most lines are not within prescribed indents", percent_good)
quick_audit_flag = "fail"
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("\nafter quick audit : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
# print(quick_audit_flag)
2024-04-27 09:33:09 +00:00
def get_character_list(self, lang: str = None):
if not self.audit_model_obj.pre_audit_run:
df, _ = self.before_audit(lang)
else:
df_path = os.path.join(self.base_file_path, "space_between_words_removed.csv")
df = pd.read_csv(df_path)
df_indents = df[["line_no", "data", "ssc", "parenthetical"]]
df_indents.fillna("", inplace=True)
for index in df_indents.index:
data = df_indents["data"][index]
if df_indents["parenthetical"][index] == "PartMidEnd":
par_pos = re.search("\(", data).start()
df_indents["data"][index] = data[0:par_pos].strip()
df_indents["parenthetical"][index] = "Absent"
elif data.strip():
df_indents["data"][index] = data.strip()
df_indents = df_indents.loc[df_indents["parenthetical"] == "Absent", :]
df_indents["ssc"].value_counts().sort_index()
df_indents["ssc"].value_counts().sort_index().reset_index()
all_indents = df_indents["ssc"].value_counts().sort_index().reset_index()
if 35 in all_indents["index"].tolist():
if df_indents["ssc"].value_counts().sort_index()[35] > 3:
sp_indent = 35
else:
ps_sp_indents = df_indents.loc[
(df_indents["ssc"] >= 32) & (df_indents["ssc"] <= 40), :
]
if not ps_sp_indents.empty:
sp_indent = (
ps_sp_indents["ssc"]
.value_counts()
.sort_values(ascending=False)
.reset_index()["index"][0]
)
else:
sp_indent = 35
# sp_indent = df_indents['ssc'].value_counts().sort_index().reset_index().iloc[3]['index']
else:
ps_sp_indents = df_indents.loc[
(df_indents["ssc"] >= 32) & (df_indents["ssc"] <= 40), :
]
if not ps_sp_indents.empty:
sp_indent = (
ps_sp_indents["ssc"]
.value_counts()
.sort_values(ascending=False)
.reset_index()["index"][0]
)
else:
sp_indent = -1
# sp_indent = df_indents['ssc'].value_counts().sort_index().reset_index().iloc[3]['index']
# third_indents = df_indents['ssc'].value_counts().sort_index().reset_index().iloc[3]
try:
character_list = df_indents.loc[
df_indents["ssc"] == sp_indent, "data"
].unique()
except:
character_list = []
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("\nafter get_character_list : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
return character_list
def get_character_subset(self, character_list):
# if character_list is None:
# character_list = self.get_character_list()
audited_linewise_csv = os.path.join(self.base_file_path, "after_audit1.csv") # replaced by after_audit1.csv
foldername = "character_subset"
output_dir = os.path.join(self.base_file_path, foldername)
if not os.path.exists(output_dir):
os.makedirs(output_dir, exist_ok=True)
2024-04-27 09:33:09 +00:00
df_after_audit = pd.read_csv(audited_linewise_csv)
df_after_audit.fillna("", inplace=True)
df_after_audit["line_no"] = df_after_audit["line_no"].astype(int)
scenes = df_after_audit.loc[
(df_after_audit["Identification_Status"] == "ps1")
| (df_after_audit["Identification_Status"] == "ps2"),
["line_no", "data", "Identification_Status"],
]
scene_indexes = scenes.index
last_index = df_after_audit.index[-1]
character_scripts_dict = dict()
for character in character_list:
try:
print("processing character subset for", character)
except:
pass
output_subset_script_txt = os.path.join(
output_dir,
(self.script_name.rsplit(".", 1)[0] + "_" + str(character) + ".txt"),
)
output_subset_script_docx = os.path.join(
output_dir,
(self.script_name.rsplit(".", 1)[0] + "_" + str(character) + ".docx"),
)
i, j = 0, 1
character_in_scenes = []
character_lines = []
while j <= len(scene_indexes):
scene_no = i + 1
start = scene_indexes[i]
if j < len(scene_indexes):
end = scene_indexes[j]
else:
end = last_index + 1
for index in range(start, end):
data = df_after_audit["data"][index]
if re.search(character.upper(), data.strip()):
character_lines.append(start)
# print(scene_no,index,data)
character_in_scenes.append(scene_no)
character_lines.append(index)
rev_index = index - 1
rev_index_is = df_after_audit["Identification_Status"][
rev_index
]
character_lines.append(rev_index)
# pvs_data = df_after_audit['data'][rev_index-1]
# print(rev_index,pvs_data)
try:
rev_index_before_is = df_after_audit[
"Identification_Status"
][rev_index - 1]
except:
rev_index_before_is = ""
# while rev_index != start and rev_index_is != 'ps4' and rev_index_is != 'ps1' and rev_index_is != 'ps7' :
while (
rev_index != start
and rev_index_is != "ps4"
and rev_index_is != "ps1"
and not (
rev_index_is == "ps6" and rev_index_before_is == "blank"
)
):
rev_index = rev_index - 1
pvs_data = df_after_audit["data"][rev_index]
# print(rev_index,pvs_data)
character_lines.append(rev_index)
rev_index_is = df_after_audit["Identification_Status"][
rev_index
]
fwd_index = index
fwd_index_is = df_after_audit["Identification_Status"][
fwd_index
]
while fwd_index_is != "blank" and fwd_index != "ps15":
fwd_index = fwd_index + 1
character_lines.append(fwd_index)
fwd_index_is = df_after_audit["Identification_Status"][
fwd_index
]
i += 1
j += 1
character_in_scenes = list(set(character_in_scenes))
character_lines = list(set(character_lines))
print(character_lines)
character_lines.sort()
print(character_lines)
character_df = df_after_audit[df_after_audit.index.isin(character_lines)]
character_df.reset_index(drop=True, inplace=True)
character_df = sf.prep_for_audit(character_df)
# test_path = os.path.join(output_dir,os.path.splitext(input_filename)[0])+ '_' + str(character) + '_test1.csv'
# character_df.to_csv(test_path,index= False)
character_df = sf.run_audit_on_identified(character_df)
# test_path = os.path.join(output_dir,os.path.splitext(input_filename)[0])+ '_' + str(character) + '_test2.csv'
# character_df.to_csv(test_path,index= False)
ch_para_df = sf.merge_line_to_para(character_df)
# ch_para_df.to_csv(csv_parawise_status, index = False)
sf.sa_wrapped_output_to_docx(ch_para_df, output_subset_script_docx)
character_scripts_dict[character] = output_subset_script_docx
# sf.conv_docx_to_txt(output_subset_script_docx,output_subset_script_txt)
with open(output_subset_script_txt, "w", encoding="utf-8") as fout:
for index in character_lines:
print(df_after_audit["Identification_Status"][index])
try:
if str(df_after_audit["Identification_Status"][index]) == "ps1":
fout.writelines("\n")
except:
pass
data = df_after_audit["data"][index]
try:
# print(data)
pass
2024-04-27 09:33:09 +00:00
except:
pass
fout.writelines(str(data))
fout.writelines("\n")
try:
if (
df_after_audit["Identification_Status"][index] == "ps1"
or df_after_audit["Identification_Status"][index] == "ps3"
):
fout.writelines("\n")
except:
pass
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("\nafter get_character_subset : AFTER ASSIGNING LOCATIONS AUDIT :audit\n")
return character_scripts_dict
def audit_in_background(self):
# # commenting os.fork to make code run in foreground
2024-05-03 05:53:28 +00:00
# if os.fork() != 0:
# return
2024-04-27 09:33:09 +00:00
print("Running in background")
end_time = datetime.datetime.now()
try:
extension = self.input_script.rsplit(".", 1)[-1]
if extension == 'fdx':
self.audit_fdx()
else:
self.audit()
self.audit_model_obj.status = States.SUCCESS
self.audit_model_obj.save()
print("Audit Success!!!!!!!!!!!!!!!!!!!!!!!")
# end_time = datetime.datetime.now()
2024-04-27 09:33:09 +00:00
with open(self.base_file_path + "time_taken.txt", "a") as file007:
file007.write("\n\n****AUDITING IS SUCCESSFUL****\n")
# print(end_time)
2024-04-27 09:33:09 +00:00
except Exception as exp:
self.audit_model_obj.status = States.FAILURE
self.audit_model_obj.results = exp
self.audit_model_obj.error_msg = "FAILED"
2024-04-27 09:33:09 +00:00
self.audit_model_obj.save()
# print(end_time)
2024-04-27 09:33:09 +00:00
if __name__ == "__main__":
naudit = NeutralAudit("123", True)
naudit.get_character_subset()