import os import re import sys from pathlib import Path import datetime import pandas as pd from django.core.files.base import ContentFile import time from centralisedFileSystem.models import File, Script, ScreenPlay from scriptAudit import sa_functions as sf from scriptAudit import sa_functions_english as sf_eng from scriptAudit.models import ScriptAuditModel, States from io import BytesIO import datetime import pytz import subprocess # from django_q.tasks import async_task # from django_q.brokers import Broker class NeutralAudit: def __init__( self, script_id: str = None, log: bool = True, ) -> None: """ To Audit a Script already uploded. _________________________________________________________________ Parameters : script_id : str -> Id of the script to be Audited default = None log : bool -> save logs in log.txt default = False _________________________________________________________________ Return : None """ self.start_time_count = time.time() print("<<<<<<<<<<<<<<<<<<<<<<<<<") self.matrices_path = str(Path(__file__).resolve().parent) + "/matrices/" self.total_time_file = str(Path(__file__).resolve().parent) print(script_id,"SCRIPT-ID IS HERE|| AYYA") self.script_id = script_id audit_root_dir = ( str(Path(__file__).resolve().parent.parent) + "/media/audit_folder/" ) self.script_name = str(self.script_id) # self.total_line_before_audit = 1 output_dir = os.path.join(audit_root_dir, self.script_name) t_time_file = self.total_time_file + "/tail_errors.txt" # with open(t_time_file, "a") as file008: # file008.write(str(self.start_time_count)) file_to_audit = File.objects.get( script=script_id, type="script-original", ) self.input_script = file_to_audit.file.path if not os.path.exists(output_dir): try: os.mkdir(output_dir) except Exception as exp: print(repr(exp)) subprocess.run(["mkdir", output_dir]) subprocess.run(["chmod", "777", output_dir]) ##print(output_dir) self.base_file_path = str(output_dir) + "/" self.csv_removed_space_between_words = ( self.base_file_path + "space_between_words_removed.csv" ) self.audit_report_csv = self.base_file_path + "audit_spreadsheet.csv" sys.stdout = open(os.devnull, "w") if log: log_file = self.base_file_path + "_log.txt" sys.stdout = open(log_file, "w", encoding="utf-8") self.gen_int_files = True else: self.gen_int_files = False sys.stdout = sys.__stdout__ self.audit_model_obj = ScriptAuditModel.objects.get( script = Script.objects.get( id = self.script_id, ) ) time_file = self.base_file_path + "time_taken.txt" start_time = datetime.datetime.now() print(start_time) with open(time_file, "a") as file007: file007.write("started\n\n") file007.write("started\n\n") def __del__(self) -> None: sys.stdout = sys.__stdout__ def update_audit_df(self, df, audit_df): print("inside update audit df") print(df.dtypes) print(audit_df.dtypes) lines_not_removed = audit_df.loc[audit_df["line_removed"] != "Yes"].index.to_list() audit_df.sort_index(inplace=True) # audit_df.reset_index().to_csv(audit_report_csv,index =False) audit_df["audited_line_no"] = "" audited_line_no = 1 for line in lines_not_removed: new_data = "" try: new_data = df.loc[df["line_no"] == line, "data"].values[0] except: pass # print(new_data) try: audit_df["Identification_Status"][line] = df.loc[ df["line_no"] == line, "Identification_Status" ].values[0] except: pass audit_df["scene_number"][line] = df.loc[ df["line_no"] == line, "scene_number" ].values[0] audit_df["data_corrected"][line] = new_data audit_df["line_removed"][line] = "No" audit_df["audited_line_no"][line] = audited_line_no audited_line_no += 1 # print(audit_df.loc[audit_df['line_no'] == line, 'data_corrected']) audit_df.reset_index().to_csv(self.audit_report_csv, index=False) return audit_df def update_audit_df_intro(self, df, audit_df): print("update_audit_df_intro") audit_df.reset_index(inplace=True, drop=True) new_data = "" for line in audit_df.index: try: print("line",line) if audit_df["introduction"][line] == "Yes": try: new_data = df.loc[df["line_no"] == line, "data"].values[0] except Exception as e: print("Exception 174:",e) pass audit_df["data_corrected"][line] = new_data except Exception as e: print(e) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("\n\n update_audit_df_intro : complete \n") return audit_df def update_audit_df_appendix(self, df, audit_df): new_data = "" print(audit_df.index) for line in audit_df.index: if audit_df["appendix"][line] == "Yes": try: new_data = df.loc[df["line_no"] == line, "data"].values[0] except: pass audit_df["data_corrected"][line] = new_data with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("\n\n update_audit_df_appendix : complete \n") return audit_df def update_is_identified(self, df): print("Updating is Identified") df["Identification_Status"].fillna("", inplace=True) for index in df.index: print(index,df["Identification_Status"][index]) try: if df["Identification_Status"][index]: line_pos = df["Identification_Status"][index].split(";") pos_count = len(line_pos) else: pos_count = 0 except: pos_count = 0 print(pos_count) if pos_count == 1: df["isIdentified"][index] = "Yes" else: df["isIdentified"][index] = "No" with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("\n\n Inside update_is_identified : complete \n") return df def before_audit(self, lang: str = None): output_converted_txt = self.base_file_path + "temp.txt" output_converted_docx = self.base_file_path + "temp.docx" csv_for_pre_processing = self.base_file_path + "for_pre_processing.csv" csv_for_processing = self.base_file_path + "for_processing.csv" csv_prepped_for_audit = self.base_file_path + "prepped_for_audit.csv" with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("Inside before audit : ( 0-16 ) \n\n") ## convert pdf/docx to text sf.conv_to_txt( self.input_script, output_converted_docx, output_converted_txt ) try: output_docx_from_orginal_text = self.base_file_path + "original_text1.docx" pdf_file_path = self.base_file_path + "original_text1.pdf" print("b4 txt to docx") sf.convert_txt_to_docx(output_converted_txt,output_docx_from_orginal_text) print("IN THE BEGINING OF AUDIT PDF PAGES") print("b4 page count of pdf") # total_page_bf = sf.countPages(output_docx_from_orginal_text,pdf_file_path,self.base_file_path) sf.countPages(output_docx_from_orginal_text,pdf_file_path,self.base_file_path) print("temp txt converted to docx") self.total_page_bf = str(1) try: print("int try pdf bf") self.total_page_bf = sf.PdfCounter(pdf_file_path) print("taotal_page_bf", str(self.total_page_bf)) except Exception as exp: print(repr(exp)) print("page bf didnt work") pass # self.audit_model_obj.number_of_pages = int(total_page_bf) time_per_page = 26 base_time = 120 no_of_pages = int(self.total_page_bf) formula_of_counting_pages = (time_per_page * no_of_pages) + base_time print("time required for auditing is :",formula_of_counting_pages) extimated_time = round(formula_of_counting_pages / 60, 1) print("extimated_time:",extimated_time) print("Exstimated time is updated") kolkata_time = datetime.datetime.now(pytz.timezone('Asia/Kolkata')) print(kolkata_time) thirty_mins_later = kolkata_time + datetime.timedelta(minutes=extimated_time) formatted_time = thirty_mins_later.strftime("%B %d, %Y %I:%M %p") self.audit_model_obj.expected_duration = formatted_time print(formatted_time) except: pass # self.total_line_before_audit = sf.count_the_line(output_converted_txt) # print("total_line_before_audit :",total_line_before_audit) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after sf.conv_to_text 1 : before audit\n") ## convert to df sf.conv_to_csv(output_converted_txt, csv_for_pre_processing) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after sf.conv_to_csv 2 : before audit\n") df = pd.read_csv(csv_for_pre_processing, encoding="utf8") ## direct to df.. not working as expected # df = pd.DataFrame() # df = sf.conv_to_df(output_converted_txt) # df.to_csv(csv_for_pre_processing,index=False) print("before assign weights:") print(df.dtypes) df['preassigned_weights'] = '' df = sf.pre_assign_wts(df) print(df.dtypes) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after sf.pre_assign_wts 3 : before audit\n") df = df.sort_index().reset_index(drop=True) df.to_csv(csv_for_processing, index =False) df["data"].fillna("", inplace=True) ## make df to track audit audit_df = pd.DataFrame() df_1st = pd.DataFrame(df) df_1st.to_csv(self.base_file_path + "very_first_df_feed_to_create_audit_df.csv", index = False) audit_df = sf.create_audit_df(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after sf.create_audit_df 4 : before audit\n") audit_df.reset_index().to_csv(self.audit_report_csv, index=False) print(df.dtypes) print(audit_df.dtypes) audit_df.reset_index().to_csv(self.base_file_path + "very_first_audit_df_feed_to_create_audit_df.csv", index = False) print("LANGUAGE IS",lang) ## trim intro if lang: if lang.upper() == "ENGLISH": sf_eng.trim_intro_english(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after sf_eng.trim_intro_english (5) : before audit\n") df = self.update_is_identified(df) else: sf_eng.trim_intro_english(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after sf_eng.trim_intro_english (6) : before audit\n") df = self.update_is_identified(df) else: # sf.trim_intro(df,audit_df) sf_eng.trim_intro_english(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after sf_eng.trim_intro_english (7) : before audit\n") df = self.update_is_identified(df) lines_not_removed = audit_df.loc[ audit_df["line_removed"] != "Yes" ].index.to_list() print(lines_not_removed) df = df.loc[df["line_no"].isin(lines_not_removed), :] df = df.sort_index().reset_index(drop=True) # df = df.reset_index() audit_df.reset_index().to_csv(self.audit_report_csv, index=False) print("Trimming Appendix") ## trim appendix if lang: if lang.upper() == "ENGLISH": sf_eng.trim_appendix_english(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after sf_eng.trim_appendix_english 8: before audit\n") df = self.update_is_identified(df) else: sf_eng.trim_appendix_english(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after sf_eng.trim_appendix_english 9: before audit\n") df = self.update_is_identified(df) else: sf_eng.trim_appendix_english(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after sf_eng.trim_appendix_english 10 : before audit\n") df = self.update_is_identified(df) ## remove page numbers sf.remove_page_numbers(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after sf.remove_page_numbers 11 : before audit\n") lines_not_removed = audit_df.loc[ audit_df["line_removed"] != "Yes" ].index.to_list() print(lines_not_removed) df = df.loc[df["line_no"].isin(lines_not_removed), :] df = df.sort_index().reset_index(drop=True) # df = df.reset_index() audit_df.reset_index().to_csv(self.audit_report_csv, index=False) ## prepare for audit df = sf.prep_for_audit(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after sf.prep_for_audit 12 : before audit\n") # sf.prep_for_audit(df) df.to_csv(csv_prepped_for_audit, index=False) ## remove extra blank lines sf.remove_extra_blank_lines(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after sf.remove_extra_blank_lines 13 : before audit\n") lines_not_removed = audit_df.loc[ audit_df["line_removed"] != "Yes" ].index.to_list() print(lines_not_removed) df = df.loc[df["line_no"].isin(lines_not_removed), :] df = df.sort_index().reset_index(drop=True) ### sf.remove_blank_line_after_parenthetical(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after sf.remove_blank_line_after_parenthetical 14 : before audit\n") lines_not_removed = audit_df.loc[ audit_df["line_removed"] != "Yes" ].index.to_list() print(lines_not_removed) df = df.loc[df["line_no"].isin(lines_not_removed), :] df = df.sort_index().reset_index(drop=True) ## sf.merge_broken_lines(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after sf.merge_broken_lines 15 : before audit\n") lines_not_removed = audit_df.loc[ audit_df["line_removed"] != "Yes" ].index.to_list() df = df.loc[df["line_no"].isin(lines_not_removed), :] df = df.sort_index().reset_index(drop=True) ###df.to_csv(csv_after_merge, index = False) ## sf.remove_space_between_words(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after sf.remove_space_between_words 16 : before audit\n") df.to_csv(self.csv_removed_space_between_words, index=False) print("updating audit df") df1 = pd.DataFrame(df) df1.to_csv(self.base_file_path + "first_df.csv", index = False) audit_df1 = pd.DataFrame(audit_df) audit_df1.to_csv(self.base_file_path + "first_audit_df.csv", index = False) audit_df = self.update_audit_df(df, audit_df) # audit_model_obj = ScriptAuditModel.objects.get( # script = Script.objects.get( # id = self.script_id, # ) # ) try: audit_model_obj = ScriptAuditModel.objects.get( script = Script.objects.get( id = self.script_id, ) ) audit_model_obj.pre_audit_run = True audit_model_obj.save() print("TRY") except Exception as exp: print(repr(exp)) print("EXCEPT") self.audit_model_obj.pre_audit_run = True self.audit_model_obj.save() print("PRE AUDIT DONE") with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("before audit complete : PRE AUDIT DONE\n\n") return df, audit_df def audit(self, lang: str = None) -> None: """ Run Audit on NeutralAudit object. _________________________________________________________________ Parameters : lang : str -> language of the provided script. default = None (language nuteral rules) _________________________________________________________________ Return : None _________________________________________________________________ """ # ---------------------------changes to save _audited.csv in media/scriptpage/script/folder # csv_parawise_status = self.audited_script_path # ---------------------------changes to save _audited.csv in media/scriptpage/script/folder print("<<<<<<<<<<<<<<<<<<<<<<<<<") print("<<<<<<<<<<<<<<<<<<<<<<<<<",self.base_file_path) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("INSIDE AUDIT (1-87): audit\n\n") csv_after_first_strict_conditions = ( self.base_file_path + "after_first_strict_conditions.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after_first_strict_conditions.csv 1 : audit\n") csv_after_gen_and_sort_weights = ( self.base_file_path + "after_gen_and_sort_weights.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after_gen_and_sort_weights.csv 2 : audit\n") csv_after_examined_speaker_pos = ( self.base_file_path + "after_examined_speaker_pos.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after_examined_speaker_pos.csv 3 : audit\n") csv_after_examined_speaker_next_lines= ( self.base_file_path + "after_examined_speaker_next_lines.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after_examined_speaker_next_lines.csv 4 : audit\n") csv_after_pnnbl_ineligible= ( self.base_file_path + "after_pnnbl_ineligible1.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after_pnnbl_ineligible1 (5) : audit\n") csv_after_examine_same_content_lines= ( self.base_file_path + "after_examine_same_content_lines.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after_examine_same_content_lines (6) : audit\n") csv_after_examined_action_pos_part1 = ( self.base_file_path + "_after_examined_action_pos_part1.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_examined_action_pos_part1.csv (7) : audit\n") csv_after_pnnbl_inelgible_after_action_pos_part1=( self.base_file_path + "_after_pnnbl_inelgible_after_action_pos_part1.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_pnnbl_inelgible_after_action_pos_part1.csv (8) : audit\n") csv_after_examined_action_pos_part2 = ( self.base_file_path + "_after_examined_action_pos_part2.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_examined_action_pos_part2.csv (9) : audit\n") csv_after_pnnbl_inelgible_after_action_pos_part2 = ( self.base_file_path + "_after_pnnbl_inelgible_after_action_pos_part2.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_pnnbl_inelgible_after_action_pos_part2.csv (10) : audit\n") csv_after_examined_same_indent_bunch = ( self.base_file_path + "_after_examined_same_indent_bunch.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_examined_same_indent_bunch.csv (11) : audit\n") csv_after_pnnbl_inelgible_after_same_indent = ( self.base_file_path + "_after_pnnbl_inelgible_after_same_indent.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_pnnbl_inelgible_after_same_indent.csv (12) : audit\n") csv_after_examined_relative_indent_bunch = ( self.base_file_path + "_after_examined_relative_indent_bunch.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_examined_relative_indent_bunch.csv (13) : audit\n") csv_after_examined_speaker_next_lines_after_relative_indent = ( self.base_file_path + "_after_examined_speaker_next_lines_after_relative_indent.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_examined_speaker_next_lines_after_relative_indent.csv (14) : audit\n") csv_after_pnnbl_inelgible_after_relative_indent = ( self.base_file_path + "after_pnnbl_inelgible_after_relative_indent_bunch.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after_pnnbl_inelgible_after_relative_indent_bunch.csv (15) : audit\n") csv_examined_speaker_using_indent = ( self.base_file_path + "after_examined_speaker_using_indent.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after_examined_speaker_using_indent.csv (16) : audit\n") csv_after_examined_speaker_next_lines_after_pos_sp_indent = ( self.base_file_path + "_after_examined_speaker_next_lines_after_pos_sp_indent.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_examined_speaker_next_lines_after_pos_sp_indent.csv (17) : audit\n") csv_after_pnnbl_inelgible_after_pos_sp_indent = ( self.base_file_path + "_after_pnnbl_inelgible_after_pos_sp_indent.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_pnnbl_inelgible_after_pos_sp_indent.csv (18) : audit\n") csv_examined_speaker_extension = ( self.base_file_path + "_after_examined_speaker_extension.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_examined_speaker_extension.csv (19) : audit\n") csv_after_examined_speaker_next_lines_after_speaker_extension = ( self.base_file_path + "_after_examined_speaker_next_lines_after_speaker_extension.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_examined_speaker_next_lines_after_speaker_extension.csv(20) : audit\n") csv_after_pnnbl_inelgible_after_speaker_extension = ( self.base_file_path + "_after_pnnbl_inelgible_after_speaker_extension.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_pnnbl_inelgible_after_speaker_extension.csv (21) : audit\n") csv_after_examined_action_using_top2 = ( self.base_file_path + "_after_examined_action_using_top2.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_examined_action_using_top2.csv (22) : audit\n") csv_after_pnnbl_inelgible_after_action_using_top_pnnbl = ( self.base_file_path + "_after_pnnbl_inelgible_after_action_using_top_pnnbl.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_pnnbl_inelgible_after_action_using_top_pnnbl.csv (23) : audit\n") csv_after_refined_action = ( self.base_file_path + "_after_refined_action.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_refined_action.csv (24) : audit\n") csv_after_pnnbl_inelgible_after_refined_action = ( self.base_file_path + "_after_pnnbl_inelgible_after_refined_action.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_pnnbl_inelgible_after_refined_action.csv (25) : audit\n") csv_after_eligibility_using_identified_pnnbl = ( self.base_file_path + "_after_eligibility_using_identified_pnnbl.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_eligibility_using_identified_pnnbl.csv (26) : audit\n") csv_after_top_identification_part1 = ( self.base_file_path + "_after_top_identification_part1.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_top_identification_part1.csv (27) : audit\n") csv_after_eligibility_using_identified_pnnbl_after_examine_sp_next_among_two = ( self.base_file_path + "after_eligibility_using_identified_pnnbl_after_examine_sp_next_among_two.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after_eligibility_using_identified_pnnbl_after_examine_sp_next_among_two.csv (28) : audit\n") csv_after_examined_speaker_pos_after_top1 = ( self.base_file_path + "_after_examined_speaker_pos_after_top1.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_examined_speaker_pos_after_top1.csv (29) : audit\n") csv_after_examined_speaker_next_lines_after_top1 = ( self.base_file_path + "after_examined_speaker_next_lines_after_top1.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after_examined_speaker_next_lines_after_top1.csv (30) : audit\n") csv_after_eligibility_using_identified_pnnbl_after_top_part1 = ( self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part1.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_eligibility_using_identified_pnnbl_after_top_part1.csv (31) : audit\n") csv_after_examine_speaker_mix_part1 = ( self.base_file_path + "_after_examine_speaker_mix_part1.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_examine_speaker_mix_part1.csv (32) : audit\n") csv_after_eligibility_using_identified_pnnbl_after_speaker_mix_part1 = ( self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_speaker_mix_part1.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_eligibility_using_identified_pnnbl_after_speaker_mix_part1.csv (33) : audit\n") csv_after_examine_speaker_mix_part2 = ( self.base_file_path + "_after_examine_speaker_mix_part2.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_examine_speaker_mix_part2.csv (34) : audit\n") csv_after_examined_speaker_pos_after_mix = ( self.base_file_path + "_after_examined_speaker_pos_after_mix.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_examined_speaker_pos_after_mix.csv(35) : audit\n") csv_after_examined_speaker_next_lines_after_mix = ( self.base_file_path + "_after_examined_speaker_next_lines_after_mix.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_examined_speaker_next_lines_after_mix.csv (36) : audit\n") csv_after_pnnbl_ineligible_after_mix = ( self.base_file_path + "_after_pnnbl_ineligible_after_mix.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_pnnbl_ineligible_after_mix.csv (37) : audit\n") csv_after_top_identification_part2 = ( self.base_file_path + "_after_top_identification_part2.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_top_identification_part2.csv (38) : audit\n") csv_after_eligibility_using_identified_pnnbl_after_top_part2 = ( self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part2.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_eligibility_using_identified_pnnbl_after_top_part2.csv (39) : audit\n") csv_after_top_identification_part2_again = ( self.base_file_path + "_after_top_identification_part2_again.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_top_identification_part2_again.csv (40) : audit\n") csv_after_eligibility_using_identified_pnnbl_after_top_part2_again = ( self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part2_again.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_eligibility_using_identified_pnnbl_after_top_part2_again.csv (41) : audit\n") csv_after_top_identification_part2_again_again = ( self.base_file_path + "_after_top_identification_part2_again_again.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_top_identification_part2_again_again.csv(42) : audit\n") csv_after_eligibility_using_identified_pnnbl_after_top_part2_again_again = ( self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part2_again_again.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_eligibility_using_identified_pnnbl_after_top_part2_again_again.csv (43) : audit\n") csv_after_slug_identification = ( self.base_file_path + "_after_slug_identification.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_slug_identification.csv (44) : audit\n") csv_after_eligibility_using_identified_pnnbl_after_slug_identification = ( self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_slug_identification.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_eligibility_using_identified_pnnbl_after_slug_identification.csv (45) : audit\n") csv_after_top_identification_part1_again = ( self.base_file_path + "_after_top_identification_part1_again.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_top_identification_part1_again.csv (46) : audit\n") csv_after_eligibility_using_identified_pnnbl_after_top_part1_again = ( self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part1_again.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_eligibility_using_identified_pnnbl_after_top_part1_again.csv (47) : audit\n") csv_after_top_identification_part3 = ( self.base_file_path + "_after_top_identification_part3.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_top_identification_part3.csv (48) : audit\n") csv_after_eligibility_using_identified_pnnbl_after_top_part3 = ( self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part3.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_eligibility_using_identified_pnnbl_after_top_part3.csv (49) : audit\n") csv_after_top_identification_part4 = ( self.base_file_path + "_after_top_identification_part4.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_top_identification_part4.csv (50) : audit\n") csv_after_eligibility_using_identified_pnnbl_after_top_part4 = ( self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part4.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_eligibility_using_identified_pnnbl_after_top_part4.csv (51) : audit\n") csv_after_top_identification_part5 = ( self.base_file_path + "_after_top_identification_part5.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_top_identification_part5.csv (52) : audit\n") csv_after_eligibility_using_identified_pnnbl_after_top_part5 = ( self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part5.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_eligibility_using_identified_pnnbl_after_top_part5.csv (53) : audit\n") csv_after_top_identification_part6 = ( self.base_file_path + "_after_top_identification_part6.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_top_identification_part6.csv (54) : audit\n") csv_after_eligibility_using_identified_pnnbl_after_top_part6 = ( self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part6.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_eligibility_using_identified_pnnbl_after_top_part6.csv (55) : audit\n") csv_after_top_identification_part7 = ( self.base_file_path + "_after_top_identification_part7.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_top_identification_part7.csv (56) : audit\n") csv_after_eligibility_using_identified_pnnbl_after_top_part7 = ( self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part7.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_eligibility_using_identified_pnnbl_after_top_part7.csv (57) : audit\n") csv_after_top_identification_part8 = ( self.base_file_path + "_after_top_identification_part8.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_top_identification_part8.csv (58) : audit\n") csv_after_eligibility_using_identified_pnnbl_after_top_part8 = ( self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part8.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_eligibility_using_identified_pnnbl_after_top_part8.csv (59) : audit\n") csv_after_examine_among_two = ( self.base_file_path + "_after_examine_among_two.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_examine_among_two.csv (60) : audit\n") csv_after_eligibility_using_identified_pnnbl_after_examine_among_two = ( self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_examine_among_two.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after_eligibility_using_identified_pnnbl_after_examine_among_two.csv (61) : audit\n") csv_after_examine_speaker_next_line_after_among_two = ( self.base_file_path + "_after_examine_speaker_next_line_after_among_two.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_examine_speaker_next_line_after_among_two.csv (62) : audit\n") csv_after_top2_wt_diff = ( self.base_file_path + "_after_top2_wt_diff.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_top2_wt_diff.csv (63) : audit\n") csv_after_eligibility_using_identified_pnnbl_after_top2_wt_diff = ( self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top2_wt_diff.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_eligibility_using_identified_pnnbl_after_top2_wt_diff.csv (64) : audit\n") csv_after_top2_wt_diff_again = ( self.base_file_path + "_after_top2_wt_diff_again.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_top2_wt_diff_again.csv (65) : audit\n") csv_after_eligibility_using_identified_pnnbl_after_top2_wt_diff_again = ( self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top2_wt_diff_again.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_eligibility_using_identified_pnnbl_after_top2_wt_diff_again.csv(66) : audit\n") csv_after_top_identification_part1_diluted = ( self.base_file_path + "_after_top_identification_part1_diluted.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_top_identification_part1_diluted.csv (67) : audit\n") csv_after_eligibility_using_identified_pnnbl_after_top_part1_diluted = ( self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_top_part1_diluted.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_eligibility_using_identified_pnnbl_after_top_part1_diluted.csv (68) : audit\n") #1.2 csv_after_examine_dial_between_action = ( self.base_file_path + "_after_examine_dial_between_action.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_examine_dial_between_action.csv (69) : audit\n") csv_after_eligibility_using_identified_pnnbl_after_examine_dial_between_action = ( self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_examine_dial_between_action.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_eligibility_using_identified_pnnbl_after_examine_dial_between_action.csv (70) : audit\n") csv_after_examine_among_two_again = ( self.base_file_path + "_after_examine_among_two_again.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_examine_among_two_again.csv (71) : audit\n") csv_after_eligibility_using_identified_pnnbl_after_examine_among_two_again = ( self.base_file_path + "_after_eligibility_using_identified_pnnbl_after_examine_among_two_again.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("_after_eligibility_using_identified_pnnbl_after_examine_among_two_again.csv (72) : audit\n") csv_after_identify_remaining_as_top = ( self.base_file_path + "after_identifying_remaining_as_top.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after_identifying_remaining_as_top.csv (73) : audit\n") csv_after_prep_for_audit_after_identification = ( self.base_file_path + "after_prep_for_audit_after_identification.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after_prep_for_audit_after_identification.csv (74) : audit\n") csv_after_audit1 = self.base_file_path + "after_audit1.csv" with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after_audit1.csv (75) : audit\n") csv_after_wrapping = self.base_file_path + "after_wrapping.csv" with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after_wrapping.csv (76) : audit\n") csv_after_prep_for_audit_after_wrapping = ( self.base_file_path + "after_prep_for_audit_after_wrapping.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after_prep_for_audit_after_wrapping.csv (77) : audit\n") csv_after_audit2 = self.base_file_path + "after_audit2.csv" with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("after_audit2.csv (78) : audit\n") output_linewise_docx = self.base_file_path + "audited_linewise.docx" with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("audited_linewise.docx (79) : audit\n") output_linewise_txt = self.base_file_path + "audited_linewise.txt" with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("audited_linewise.txt (80) : audit\n") audit_report_tabular_docx = self.base_file_path + "audit_report_tabular.docx" with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("audit_report_tabular.docx (81) : audit\n") csv_strict_conditions = self.matrices_path + "strict_conditions_230623.csv" with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("strict_conditions_230623.csv : audit\n") csv_pos_weights = self.matrices_path + "PS_Weights_250623_2.csv" with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("PS_Weights_250623_2.csv (83) : audit\n") csv_pnbl_nnbl = self.matrices_path + "pnbl_nnbl.csv" with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("pnbl_nnbl.csv (84) : audit\n") pnbl_eligibility_matrix = ( self.matrices_path + "pnbl_eligibility_matrix_250623.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("pnbl_eligibility_matrix_250623.csv (85) : audit\n") nnbl_eligibility_matrix = ( self.matrices_path + "nnbl_eligibility_matrix_250623.csv" ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("nnbl_eligibility_matrix_250623.csv (86) : audit\n") output_template = self.matrices_path + "ScriptTemplate5.docx" with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("ScriptTemplate5.docx (87) : audit\n") with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AUDIT : audit\n\n") df, audit_df = self.before_audit(lang) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER ASSIGNING LOCATIONS AUDIT : audit\n\n") ####################################### sf.test_strict_conditions(df, csv_strict_conditions) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.test_strict_conditions 1 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") if self.gen_int_files: df.to_csv(csv_after_first_strict_conditions, index = False) ## gen weights for possibilties ## add preassigned weights df = sf.gen_pos_weights(df, csv_pos_weights) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.gen_pos_weights 2 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") ## language specific weights update if lang: if lang.upper() == "ENGLISH": df = sf_eng.update_pos_wts_english(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf_eng.update_pos_wts_english 3 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = sf.sort_pos_decr_wts(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.sort_pos_decr_wts 4 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") #if self.gen_int_files: df.to_csv(csv_after_gen_and_sort_weights, index = False) ## sf.prep_for_pos_elimination(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.prep_for_pos_elimination 5 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df_bex1 = pd.DataFrame(df) df_bex1.to_csv(self.base_file_path + "df_update_audit_df_b_exam_speaker_1.csv", index = False) audit_df_bex1 = pd.DataFrame(audit_df) audit_df_bex1.to_csv(self.base_file_path + "audit_df_update_audit_df_b_exam_speaker_1.csv", index = False) ## examine speaker possibilties df = sf.examine_speaker_pos(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.examine_speaker_pos 6 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df_ex1 = pd.DataFrame(df) df_ex1.to_csv(self.base_file_path + "df_update_audit_df_exam_speaker_1.csv", index = False) audit_df_ex1 = pd.DataFrame(audit_df) audit_df_ex1.to_csv(self.base_file_path + "audit_df_update_audit_df_exam_speaker_1.csv", index = True) if self.gen_int_files: df.to_csv(csv_after_examined_speaker_pos, index = False) print("printing info based on audit_df") # df_b1 = pd.DataFrame(df) # df_b1.to_csv(self.base_file_path + "df_update_audit_df_b1.csv", index = False) print(audit_df.head(10),audit_df.dtypes) try: audit_df = audit_df.sort_values('audited_line_no') except: audit_df['audited_line_no'] = pd.to_numeric(audit_df['audited_line_no'], errors='coerce') audit_df = audit_df.sort_values('audited_line_no') audit_df_try1 = pd.DataFrame(audit_df) audit_df_try1.to_csv(self.base_file_path + "audit_df_update_audit_df_try1.csv", index = True) print(audit_df.head()) try: audit_df = pd.merge(audit_df, df[['line_no']], on=audit_df.index, how='left') print(audit_df.head()) # Set 'line_no' as index audit_df.set_index('line_no', inplace=True) print(audit_df.head()) audit_df_try2 = pd.DataFrame(audit_df) audit_df_try2.to_csv(self.base_file_path + "audit_df_update_audit_df_try2.csv", index = True) except Exception as e: print(e, audit_df.head()) pass # try: # audit_df.reset_index(drop=True, inplace=True) # audit_df.set_index('line_no',inplace=True) # except Exception as e: # print(e) print(audit_df.head()) print(audit_df.dtypes) audit_df_b1 = pd.DataFrame(audit_df) audit_df_b1.to_csv(self.base_file_path + "audit_df_update_audit_df_b1.csv", index = True) audit_df = self.update_audit_df(df, audit_df) df_1 = pd.DataFrame(df) df_1.to_csv(self.base_file_path + "df_update_audit_df_1.csv", index = True) audit_df_1 = pd.DataFrame(audit_df) audit_df_1.to_csv(self.base_file_path + "audit_df_update_audit_df_1.csv", index = True) ### df = sf.examine_speaker_next_lines(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.examine_speaker_next_lines 7 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") if self.gen_int_files: df.to_csv(csv_after_examined_speaker_next_lines, index = False) audit_df = self.update_audit_df(df, audit_df) audit_df_u7 = pd.DataFrame(audit_df) audit_df_u7.to_csv(self.base_file_path + "audit_df_update_audit_df_7.csv", index = True) ## do while pnnbl ineligible sf.prep_pnnbl_wts(csv_pnbl_nnbl, self.matrices_path) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.prep_pnnbl_wts 8 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = sf.do_while_pnnbl_ineligible(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_pnnbl_ineligible 9 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_pnnbl_ineligible, index = False) ## examine same content df = sf.examine_same_content_lines(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.examine_same_content_lines 10 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_examine_same_content_lines, index = False) ### examine speaker next again df = sf.examine_speaker_next_lines(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.examine_speaker_next_lines 11 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") ###df.to_csv(csv_after_examined_speaker_next_lines_after_same_content, index = False) audit_df = self.update_audit_df(df, audit_df) ## do while pnnbl ineligible # sf.prep_pnnbl_wts(csv_pnbl_nnbl) df = sf.do_while_pnnbl_ineligible(df) df = self.update_is_identified(df) ###df.to_csv(csv_after_pnnbl_ineligible_after_same_content, index = False) ################ df = sf.examine_action_possibilities_part1(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.examine_action_possibilities_part1 12 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") if self.gen_int_files: df.to_csv(csv_after_examined_action_pos_part1, index = False) audit_df = self.update_audit_df(df, audit_df) ## do while pnnbl ineligible # sf.prep_pnnbl_wts(csv_pnbl_nnbl) df = sf.do_while_pnnbl_ineligible(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_pnnbl_ineligible 13 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_pnnbl_inelgible_after_action_pos_part1, index = False) ################ df = sf.examine_action_possibilities_part2(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.examine_action_possibilities_part2 14 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") if self.gen_int_files: df.to_csv(csv_after_examined_action_pos_part2, index = False) audit_df = self.update_audit_df(df, audit_df) ## do while pnnbl ineligible # sf.prep_pnnbl_wts(csv_pnbl_nnbl) df = sf.do_while_pnnbl_ineligible(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_pnnbl_ineligible 15 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_pnnbl_inelgible_after_action_pos_part2, index = False) ################ df = sf.examine_same_indent_bunch(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.examine_same_indent_bunch 16 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") if self.gen_int_files: df.to_csv(csv_after_examined_same_indent_bunch, index = False) audit_df = self.update_audit_df(df, audit_df) ## do while pnnbl ineligible # sf.prep_pnnbl_wts(csv_pnbl_nnbl) df = sf.do_while_pnnbl_ineligible(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_pnnbl_ineligible 17 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_pnnbl_inelgible_after_same_indent, index = False) ##################### ##for reorganisation # df = pd.read_csv('Script_Shatranj_pnnbl_ineligible_same_indent_bunch_new_col_2.csv') # csv_for_pos_elimination = os.path.join(self.output_dir,os.path.splitext(self.script_name)[0])+'_for_pos_elimination.csv' ######################### df = sf.examine_relative_indent(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.examine_relative_indent 18 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") if self.gen_int_files: df.to_csv(csv_after_examined_relative_indent_bunch, index = False) audit_df = self.update_audit_df(df, audit_df) df = sf.examine_speaker_next_lines(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.examine_speaker_next_lines 19 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") if self.gen_int_files: df.to_csv(csv_after_examined_speaker_next_lines_after_relative_indent, index = False) audit_df = self.update_audit_df(df, audit_df) ## do while pnnbl ineligible # sf.prep_pnnbl_wts(csv_pnbl_nnbl,matrices_path) df = sf.do_while_pnnbl_ineligible(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_pnnbl_ineligible 20 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) df.to_csv(csv_after_pnnbl_inelgible_after_relative_indent, index=False) ####################################### df = sf.examine_pos_sp_indent( df, self.csv_removed_space_between_words, csv_after_pnnbl_inelgible_after_relative_indent, ) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.examine_pos_sp_indent 21 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") if self.gen_int_files: df.to_csv( csv_examined_speaker_using_indent,index =False) audit_df = self.update_audit_df(df, audit_df) df = sf.examine_speaker_next_lines(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.examine_speaker_next_lines 22 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") if self.gen_int_files: df.to_csv(csv_after_examined_speaker_next_lines_after_pos_sp_indent, index = False) audit_df = self.update_audit_df(df, audit_df) ## do while pnnbl ineligible # sf.prep_pnnbl_wts(csv_pnbl_nnbl) df = sf.do_while_pnnbl_ineligible(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_pnnbl_ineligible 23 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_pnnbl_inelgible_after_pos_sp_indent, index = False) ################################# df = sf.examine_speaker_extension(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.examine_speaker_extension 24 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") if self.gen_int_files: df.to_csv( csv_examined_speaker_extension,index =False) audit_df = self.update_audit_df(df, audit_df) df = sf.examine_speaker_next_lines(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.examine_speaker_next_lines 25 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") if self.gen_int_files: df.to_csv(csv_after_examined_speaker_next_lines_after_speaker_extension, index = False) audit_df = self.update_audit_df(df, audit_df) ## do while pnnbl ineligible print("pnnbl after speaker extension") # sf.prep_pnnbl_wts(csv_pnbl_nnbl) df = sf.do_while_pnnbl_ineligible(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_pnnbl_ineligible 26 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_pnnbl_inelgible_after_speaker_extension, index = False) ## checking # audit_df.reset_index().to_csv(audit_report_csv,index =False) ################################################# df = sf.examine_action_using_top2_part1(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.examine_action_using_top2_part1 27 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") if self.gen_int_files: df.to_csv(csv_after_examined_action_using_top2, index = False) audit_df = self.update_audit_df(df, audit_df) ## do while pnnbl ineligible # sf.prep_pnnbl_wts(csv_pnbl_nnbl) df = sf.do_while_pnnbl_ineligible(df) df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_pnnbl_inelgible_after_action_using_top_pnnbl, index = False) # ######################################### df = sf.refine_action_possibilties(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.refine_action_possibilties 28 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_refined_action, index = False) audit_df = self.update_audit_df(df, audit_df) ## do while pnnbl ineligible # sf.prep_pnnbl_wts(csv_pnbl_nnbl) df = sf.do_while_pnnbl_ineligible(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_pnnbl_ineligible(df) 29 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_pnnbl_inelgible_after_refined_action, index = False) ############################## sf.prep_pnnbl_eligible_csv(pnbl_eligibility_matrix, nnbl_eligibility_matrix) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.prep_pnnbl_eligible_csv 30 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") ############################# df = sf.do_while_examine_using_identified_pnnbl(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 31 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_eligibility_using_identified_pnnbl, index = False) ################################# df = sf.start_top_identifications_part1(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.start_top_identifications_part1 32 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_top_identification_part1, index = False) audit_df = self.update_audit_df(df, audit_df) ## examine speaker possibilties again after top1 df = sf.examine_speaker_pos(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.examine_speaker_pos 33 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") if self.gen_int_files: df.to_csv(csv_after_examined_speaker_pos_after_top1, index = False) audit_df = self.update_audit_df(df, audit_df) ### df_34 = pd.DataFrame(df) df_34.to_csv(self.base_file_path + "df_export_before_34.csv", index = True) au_df_34 = pd.DataFrame(audit_df) au_df_34.to_csv(self.base_file_path + "audit_df_before_after_34.csv", index = True) df = sf.examine_speaker_next_lines(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.examine_speaker_next_lines 34 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") if self.gen_int_files: df.to_csv(csv_after_examined_speaker_next_lines_after_top1, index=False) audit_df = self.update_audit_df(df, audit_df) df = sf.do_while_examine_using_identified_pnnbl(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 35 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part1, index = False) ################################# copy_df_examine_speaker_mix_part1 = pd.DataFrame(df) copy_df_examine_speaker_mix_part1.to_csv(self.base_file_path + "copy_df_examine_speaker_mix_part1.csv", index = True) ########### copy_audit_df_examine_speaker_mix_part1 = pd.DataFrame(audit_df) copy_audit_df_examine_speaker_mix_part1.to_csv(self.base_file_path + "copy_audit_df_examine_speaker_mix_part1.csv", index = True) ########## df = sf.examine_speaker_mix_part1(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.examine_speaker_mix_part1 36 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_examine_speaker_mix_part1, index = False) audit_df = self.update_audit_df(df, audit_df) df = sf.do_while_examine_using_identified_pnnbl(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 37 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_speaker_mix_part1, index = False) ################################# df_38 = pd.DataFrame(df) df_38.to_csv(self.base_file_path + "df_export_after_38.csv", index = True) au_df_38 = pd.DataFrame(audit_df) au_df_38.to_csv(self.base_file_path + "audit_df_export_after_38.csv", index = True) df = sf.examine_speaker_mix_part2(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.examine_speaker_mix_part2 38 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_examine_speaker_mix_part2, index = False) audit_df = self.update_audit_df(df, audit_df) ## examine speaker possibilties again after mix df = sf.examine_speaker_pos(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.examine_speaker_pos 39 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") if self.gen_int_files: df.to_csv(csv_after_examined_speaker_pos_after_mix, index = False) audit_df = self.update_audit_df(df, audit_df) ### df = sf.examine_speaker_next_lines(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTERsf.examine_speaker_next_lines 40 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") if self.gen_int_files: df.to_csv(csv_after_examined_speaker_next_lines_after_mix, index = False) audit_df = self.update_audit_df(df, audit_df) ## do while pnnbl ineligible # sf.prep_pnnbl_wts(csv_pnbl_nnbl,matrices_path) df = sf.do_while_pnnbl_ineligible(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_pnnbl_ineligible 41 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_pnnbl_ineligible_after_mix, index = False) # df = sf.do_while_examine_using_identified_pnnbl(df) # df = update_is_identified(df) # df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_speaker_mix_part2, index = False) ################################ df = sf.start_top_identifications_part2(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.start_top_identifications_part2 42 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_top_identification_part2, index = False) audit_df = self.update_audit_df(df, audit_df) df = sf.do_while_examine_using_identified_pnnbl(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 43 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part2, index = False) ################################# df = sf.start_top_identifications_part2(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.start_top_identifications_part2 44 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_top_identification_part2_again, index = False) audit_df = self.update_audit_df(df, audit_df) df = sf.do_while_examine_using_identified_pnnbl(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 45 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part2_again, index = False) ################################# df = sf.start_top_identifications_part2(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.start_top_identifications_part2 46 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_top_identification_part2_again_again, index = False) audit_df = self.update_audit_df(df, audit_df) df = sf.do_while_examine_using_identified_pnnbl(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 47 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part2_again_again, index = False) ################################# df = sf.start_slug_identification(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.start_slug_identification(df) 48 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_slug_identification, index = False) audit_df = self.update_audit_df(df, audit_df) df = sf.do_while_examine_using_identified_pnnbl(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_examine_using_identified_pnnbl(df) 49 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_slug_identification, index = False) ################################# df = sf.start_top_identifications_part1(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.start_top_identifications_part1(df) 50 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_top_identification_part1_again, index = False) audit_df = self.update_audit_df(df, audit_df) df = sf.do_while_examine_using_identified_pnnbl(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 51 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part1_again, index = False) ################################# df = sf.start_top_identifications_part3(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.start_top_identifications_part3 52 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_top_identification_part3, index = False) audit_df = self.update_audit_df(df, audit_df) df = sf.do_while_examine_using_identified_pnnbl(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 53 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part3, index = False) ################################# df = sf.start_top_identifications_part4(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.start_top_identifications_part4 54 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_top_identification_part4, index = False) audit_df = self.update_audit_df(df, audit_df) df = sf.do_while_examine_using_identified_pnnbl(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 55 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part4, index = False) ################################# df = sf.start_top_identifications_part5(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.start_top_identifications_part5(df) 56 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_top_identification_part5, index = False) audit_df = self.update_audit_df(df, audit_df) df = sf.do_while_examine_using_identified_pnnbl(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 57 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part5, index = False) ################################# df = sf.start_top_identifications_part6(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.start_top_identifications_part6 58 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_top_identification_part6, index = False) audit_df = self.update_audit_df(df, audit_df) df = sf.do_while_examine_using_identified_pnnbl(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 59 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part6, index = False) ################################# df = sf.start_top_identifications_part7(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.start_top_identifications_part7 60 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_top_identification_part7, index = False) audit_df = self.update_audit_df(df, audit_df) df = sf.do_while_examine_using_identified_pnnbl(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 61 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part7, index = False) ################################# df = sf.start_top_identifications_part8(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.start_top_identifications_part8 62 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_top_identification_part8, index = False) audit_df = self.update_audit_df(df, audit_df) df = sf.do_while_examine_using_identified_pnnbl(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 63 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part8, index = False) ################################# df = sf.examine_among_two(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.examine_among_two 64 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_examine_among_two, index = False) audit_df = self.update_audit_df(df, audit_df) df = sf.do_while_examine_using_identified_pnnbl(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 65 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_examine_among_two, index = False) ################################# df = sf.examine_speaker_next_lines(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.examine_speaker_next_lines 66: AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_examine_speaker_next_line_after_among_two, index = False) audit_df = self.update_audit_df(df, audit_df) df = sf.do_while_examine_using_identified_pnnbl(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 67 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_examine_sp_next_among_two, index = False) ################################# df = sf.examine_action_using_top2_wt_diff(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.examine_action_using_top2_wt_diff 68 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_top2_wt_diff, index = False) audit_df = self.update_audit_df(df, audit_df) try: df = sf.do_while_examine_using_identified_pnnbl(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 69 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top2_wt_diff, index = False) except: pass ################################# try: df = sf.examine_action_using_top2_wt_diff(df) if self.gen_int_files: df.to_csv(csv_after_top2_wt_diff_again, index = False) audit_df = self.update_audit_df(df, audit_df) df = sf.do_while_examine_using_identified_pnnbl(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 70 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top2_wt_diff_again, index = False) except: pass ################################# try: df = sf.start_top_identifications_part1_diluted(df) df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_top_identification_part1_diluted, index = False) audit_df = self.update_audit_df(df, audit_df) df = sf.do_while_examine_using_identified_pnnbl(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 71 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_top_part1_diluted, index = False) except: pass ################################### #################################### ##1.1 df = sf.decrease_wt_dial_between_action(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.decrease_wt_dial_between_action 72 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_examine_dial_between_action, index = False) audit_df = self.update_audit_df(df, audit_df) df = sf.do_while_examine_using_identified_pnnbl(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 73 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) #if self.gen_int_files: df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_examine_dial_between_action, index = False) #################################### ################################# df = sf.examine_among_two(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.examine_among_two 74 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) if self.gen_int_files: df.to_csv(csv_after_examine_among_two_again, index = False) audit_df = self.update_audit_df(df, audit_df) df = sf.do_while_examine_using_identified_pnnbl(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.do_while_examine_using_identified_pnnbl 75 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df = self.update_is_identified(df) #if self.gen_int_files: df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_examine_among_two_again, index = False) #################################### ################################# df = sf.identify_top_as_final(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.identify_top_as_final 76 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df_76 = pd.DataFrame(df) df_76.to_csv(self.base_file_path + "df_identify_top_as_final_76.csv", index = False) au_df_76 = pd.DataFrame(audit_df) au_df_76.to_csv(self.base_file_path + "audit_df_identify_top_as_final_76.csv", index = False) df = self.update_is_identified(df) df.to_csv(csv_after_identify_remaining_as_top, index=False) audit_df = self.update_audit_df(df, audit_df) ##################################### ## prepare for audit df = sf.prep_for_audit(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.prep_for_audit 77 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df.to_csv(csv_after_prep_for_audit_after_identification, index=False) ##################################### df, audit_df = sf.run_audit_on_identified(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.run_audit_on_identified 78 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df.to_csv(csv_after_audit1, index=False) audit_df = self.update_audit_df(df, audit_df) ############################################# ### run language specific audit on identified if lang: if lang.upper() == "ENGLISH": df = sf_eng.run_audit_on_identified_english(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf_eng.run_audit_on_identified_english 79 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") audit_df = self.update_audit_df(df, audit_df) ##################################### ### merge the beginning/middle/end lines # df.to_csv(self.base_file_path + "df_before_merge_line_para.csv", index = Flase) para_df = sf.merge_line_to_para(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.merge_line_to_para 80 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") try: para_df.to_csv(self.base_file_path+ "after_merge_line_para.csv", index = False) print("para_df is writtern") except: pass print("\n\n Function 80 is completed") try: script_language, dialogue_language = sf.language_detector_for_csv(para_df) print("script_language",script_language) print("dialogue_language",dialogue_language) unique_script_languages = ', '.join(set(lang[0] for lang in script_language)) unique_dialogue_languages = ', '.join(set(lang[0] for lang in dialogue_language)) except: unique_script_languages = "" unique_dialogue_languages = "" #commented as some unwanted change of . to comma #para_df = sf.change_dot_to_comma_inslug(para_df) print("unique_script_languages:",unique_script_languages) print("unique_dialogue_languages:",unique_dialogue_languages) # para_df.to_csv(csv_parawise_status, index=False) ## print("\n\n dot to comma changes in slug") audited_file_name = self.script_name + ".csv" req_file = ContentFile( (para_df.to_csv(index=False, path_or_buf=None)).encode("utf-8"), audited_file_name, ) File.objects.create( script=Script.objects.get(id=self.script_id), type="script-csv", file=req_file, ) print("\n\n exporting df and audit_df agter function 80") df_df = pd.DataFrame(df) df_df.to_csv(self.base_file_path + "df_export_after_80.csv", index = False) audit_df_df = pd.DataFrame(audit_df) audit_df_df.reset_index().to_csv(self.base_file_path + "audit_df_export_after_80.csv", index = False) print("\nwrapping identified lines if required\n") df = sf.wrap_text(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.wrap_text 81 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df_81 = pd.DataFrame(df) df_81.to_csv(self.base_file_path + "df_export_after_81.csv", index = False) au_df_81 = pd.DataFrame(audit_df) au_df_81.reset_index().to_csv(self.base_file_path + "audit_df_export_after_81.csv", index = False) df.to_csv(csv_after_wrapping, index=False) #audit_df['line_no'] = audit_df['line_no'].astype(float) audit_df = self.update_audit_df(df, audit_df) ##################################### ## prepare for audit again only_df = pd.DataFrame(df) only_df.to_csv(self.base_file_path + "df_before_82.csv", index = False) df = sf.prep_for_audit(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.prep_for_audit 82 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df.to_csv(csv_after_prep_for_audit_after_wrapping, index=False) ##################################### sf.run_audit_on_identified(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.run_audit_on_identified 83 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") df.to_csv(csv_after_audit2, index=False) audit_df = self.update_audit_df(df, audit_df) ##################################################### ### run language specific audit on identified if lang: if lang.upper() == "ENGLISH": df = sf_eng.run_audit_on_identified_english(df, audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf_eng.run_audit_on_identified_english 84 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") audit_df = self.update_audit_df(df, audit_df) #################################### sf.sa_output_to_docx(df, output_linewise_docx, output_template) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.sa_output_to_docx 85 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") sf.sa_output_to_txt(output_linewise_docx, output_linewise_txt) # print("line no: 2017",str(output_linewise_txt)) output_docx_after_audit = self.base_file_path + "audited_text.docx" pdf_audit_file_path = self.base_file_path + "audited_text.pdf" sf.convert_txt_to_docx(output_linewise_txt,output_docx_after_audit) print("converted to docx") try: # total_page_af = sf.countPages(output_docx_after_audit,pdf_audit_file_path,self.base_file_path) sf.countPages(output_docx_after_audit,pdf_audit_file_path,self.base_file_path) try: total_page_af = sf.PdfCounter(pdf_audit_file_path) print("total pages af = ", total_page_af) print("hehehehehe") except Exception as exp: print(repr(exp)) print("try except total pages didnt work") except Exception as exp: print("total_page_af : ", exp) print("the count of pageline start here") line_count_after_audit = sf.count_the_line(str(output_linewise_txt)) count_before_txt = self.base_file_path + "temp.txt" line_count_before_audit = sf.count_the_line(str(count_before_txt)) print("you are here") output_docx_from_orginal_text = self.base_file_path + "original_text.docx" pdf_file_path = self.base_file_path + "original_text.pdf" print("b4 txt to docx") sf.convert_txt_to_docx(count_before_txt,output_docx_from_orginal_text) print("b4 page count of pdf") print("hehe") """13-2-24""" # try: # total_page_bf = sf.countPages(output_docx_from_orginal_text,pdf_file_path,self.base_file_path) # print(total_page_bf) # except Exception as exp: # print(" total page bf",total_page_bf ) print("temp txt converted to docx") with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.sa_output_to_txt 86 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") ######################################## # sf.print_audit_report_docx(audit_df,audit_report_docx) # headers = True try: print("In the total number of pages count") file_model_objects = File.objects.filter(script=self.script_id) audit_file_object = file_model_objects.get(type="script-csv") read_df = pd.read_csv(audit_file_object.file) print("csv fetched") docx = sf.csv_to_docx(read_df) audited_docx_path = self.base_file_path + "csv_to_docx_audited.docx" # temp_file_stream = BytesIO() print("docx saved") docx.save(audited_docx_path) # temp_file_stream.seek(0) docx_file = ContentFile( open(audited_docx_path, 'rb').read(), "from_audited_csv_to_document.docx", ) # docx_file = ContentFile( # audited_docx_path.getvalue(), # "from_audited_csv_to_document.docx", # ) File.objects.create( script=Script.objects.get(id=self.script_id), type="script-docx", file=docx_file, ) print("script-docx object created") converted_audit_pdf_file_path = self.base_file_path + "csv_to_docx_audited.pdf" Final_pdf_page_count = sf.countPages(audited_docx_path,converted_audit_pdf_file_path,self.base_file_path) print("total number of pdf pages") print(int(Final_pdf_page_count)) pass except Exception as e: print("yje exception is") print(e) audit_df = self.update_audit_df_intro(df, audit_df) audit_df = self.update_audit_df_appendix(df, audit_df) audit_report_name = self.script_name + "_report.docx" print("audit_df_tabular 1908\n\n",audit_df,"\n\n" ) copy_df = pd.DataFrame(audit_df) copy_df.reset_index().to_csv(self.base_file_path + "audit_report_export.csv", index = False) print("before print_report_tabular_docx") script_ob = Script.objects.get(id=self.script_id) screen_play_name = script_ob.screenplay.name author_name = script_ob.screenplay.author print(screen_play_name) print(author_name) print(line_count_before_audit) print(line_count_after_audit) # audit_report_buffer = sf.print_audit_report_tabular_docx(audit_df,line_count_before_audit,line_count_after_audit) #commented on 13-09-23 para_filetered_audut_df = sf.assign_para_no(audit_df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER para_filetered_audut_df\n") scriptname = str(screen_play_name) # to fetched by audit data author = str(author_name) # to fetched by audit data try: pre_audit_pagenumber = int(self.total_page_bf) except: pre_audit_pagenumber = 1 try: print("total_page_af = ", total_page_af ) postauditpagenumber = int(total_page_af) except: print("total_page_af 1") postauditpagenumber = 1 try: preaudit_line_no = int(line_count_before_audit) except: preaudit_line_no = 1 try: postaudit_line_no = int(line_count_after_audit) except: postaudit_line_no = 1 try: print("unique_script_languages",unique_script_languages) script_language = str(unique_script_languages) # to be fetched by conversin function except: script_language = "---" try: print("unique_dialogue_languages",unique_dialogue_languages) dialogue_language = str(unique_dialogue_languages) # to be fetched by conversin function except: dialogue_language = "---" print("scriptname",scriptname) print("author",author) print("pre_audit_pagenumber",pre_audit_pagenumber) print("postauditpagenumber",postauditpagenumber) print("preaudit_line_no",preaudit_line_no) print("postaudit_line_no",postaudit_line_no) ''' additiona model information ''' #self.audit_model_obj.number_of_pages = int(postauditpagenumber) # time_per_page = 30 # base time = 120 # no_of_pages = 10 # formula of counting pages = (time_per_page + base time) * no_of_pages try: self.audit_model_obj.screenplay_language = script_language self.audit_model_obj.dialogue_language = dialogue_language self.audit_model_obj.number_of_pages = int(postauditpagenumber) print("script language, dialogue language, post audit pagenumber is update to the audit models") except: print("page number and language insertion failed") pass audit_report_buffer = sf.print_audit_report_tabular_docx(para_filetered_audut_df,scriptname,author,pre_audit_pagenumber,postauditpagenumber,preaudit_line_no,postaudit_line_no,script_language,dialogue_language) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.print_audit_report_tabular_docx 87 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") # audit_report_path = self.base_file_path + "audit_report_doc.docx" # report_data = Document(audit_report_buffer) # report_data.save(audit_report_path) req_file = ContentFile(audit_report_buffer.read(), audit_report_name) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.print_audit_report_tabular_docx 87 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n"+str(req_file)) print("req_file") try: script = Script.objects.get(id=self.script_id) script.no_of_pages = int(Final_pdf_page_count) script.save() # user_id = script.screenplay.user.id # Now, 'user_id' contains the user.id associated with the given script_id except Exception as e: print(e) # Handle the case where the script with the given ID doesn't exist # user_id = None print("No_of_pages not insertd") # try: # update_juggernaut(user_id=user_id,service_name='audit',audit_pages = int(postauditpagenumber)) # except: # print("the update_juggernaut didnt work") # req_file = File.objects.get(script=self.script_id) # req_file.type= "audit-report" # req_file.file = file # req_file.save() File.objects.create( script=Script.objects.get(id=self.script_id), type="audit-report", file=req_file, ) try: end_time_count = time.time() total_duration = end_time_count - self.start_time_count hours, remainder = divmod(total_duration, 3600) minutes, seconds = divmod(remainder, 60) text_time = f"Program ran for {str(hours)} hours, {str(minutes)} minutes, and {str(seconds)} seconds. for script_id= {str(self.script_id)} which has pdf pages of {pre_audit_pagenumber}." print(str(text_time)) t_time_file = self.total_time_file + "/tail_errors.txt" with open(t_time_file, "a") as file008: file008.write(str(text_time) + "\n") with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("87 function complete \n") except: pass return True def script_meta(self): pass def audit_fdx(self): # fdx to audited csv para_df = pd.DataFrame() para_df = sf.fdx_to_audited_df(self.input_script) # save audited csv to file system audited_file_name = self.script_name + ".csv" req_file = ContentFile( (para_df.to_csv(index=False, path_or_buf=None)).encode("utf-8"), audited_file_name, ) File.objects.create( script=Script.objects.get(id=self.script_id), type="script-csv", file=req_file, ) print("csv created") try: self.audit_model_obj.isfdx = True self.audit_model_obj.save() print("isfdx True saved") except Exception as exp: print(repr(exp)) language_check_df = sf.check_and_copy_rows(para_df) try: script_language, dialogue_language = sf.language_detector_for_csv(language_check_df) print("script_language",script_language) print("dialogue_language",dialogue_language) unique_script_languages = ', '.join(set(lang[0] for lang in script_language)) unique_dialogue_languages = ', '.join(set(lang[0] for lang in dialogue_language)) print("langauage detection worked") except Exception as exp: print(repr(exp)) unique_script_languages = "" unique_dialogue_languages = "" print("Langauuge detectedion csv didnt work") try: self.audit_model_obj.screenplay_language = unique_script_languages self.audit_model_obj.dialogue_language = unique_dialogue_languages self.audit_model_obj.save() print("audit lang saved") except Exception as exp: print(repr(exp)) self.audit_model_obj.screenplay_language = "ENGLISH" self.audit_model_obj.dialogue_language = "ENGLISH" print("audot lang didnt save") # print("In the total number of pages count") # file_model_objects = File.objects.filter(script=self.script_id) # audit_file_object = file_model_objects.get(type="script-csv") # read_df = pd.read_csv(audit_file_object.file) # print("csv fetched") try: print(para_df) docx = sf.csv_to_docx(para_df) audited_docx_path = self.base_file_path + "csv_to_docx_audited.docx" # temp_file_stream = BytesIO() print("docx saved") docx.save(audited_docx_path) # temp_file_stream.seek(0) docx_file = ContentFile( open(audited_docx_path, 'rb').read(), "from_audited_csv_to_document.docx", ) File.objects.create( script=Script.objects.get(id=self.script_id), type="script-docx", file=docx_file, ) print("script-docx object created") # output_docx_after_audit = self.base_file_path + "audited_text.docx" pdf_audit_file_path = self.base_file_path + "csv_to_docx_audited.pdf" print("converted to docx") try: # total_page_af = sf.countPages(output_docx_after_audit,pdf_audit_file_path,self.base_file_path) sf.countPages(audited_docx_path,pdf_audit_file_path,self.base_file_path) print("fdx : docx to pdf was create at", str(pdf_audit_file_path) ) try: total_page_af = sf.PdfCounter(pdf_audit_file_path) print("total pages af = ", total_page_af) print("hehehehehe") self.audit_model_obj.number_of_pages = int(total_page_af) self.audit_model_obj.save() except Exception as exp: print(repr(exp)) print("try except total pages didnt work") except Exception as exp: print("fdx docx to pdf conversion didnt work") print("total_page_af : ", exp) except Exception as exp: print("csv to docs didnt work") print(repr(exp)) return True def quick_audit(self, lang: str = None): df, audit_df = self.before_audit(lang) ## get the indents count count_green = 0 count_amber = 0 total_count = len(df) all_indents = df["ssc"].value_counts() print(all_indents) all_indents = df["ssc"].value_counts().sort_index().reset_index() # print(all_indents) for index in all_indents.index: # print(all_indents['index'][index]) if str(all_indents["index"][index]) in ("15", "25", "30", "35"): count_green += all_indents["ssc"][index] elif str(all_indents["index"][index]) in ( "0", "14", "16", "24", "26", "29", "31", "34", "36", ): count_amber += all_indents["ssc"][index] elif all_indents["index"][index] > 62: count_amber += all_indents["ssc"][index] print(all_indents["index"].tolist()) print(count_green, count_amber, total_count) percent_good = ((count_green + count_amber) / total_count) * 100 if percent_good > 80: print("most lines are within prescribed indents", percent_good) quick_audit_flag = "pass" else: print("most lines are not within prescribed indents", percent_good) quick_audit_flag = "fail" with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("\nafter quick audit : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") print(quick_audit_flag) def get_character_list(self, lang: str = None): if not self.audit_model_obj.pre_audit_run: df, _ = self.before_audit(lang) else: df_path = os.path.join(self.base_file_path, "space_between_words_removed.csv") df = pd.read_csv(df_path) df_indents = df[["line_no", "data", "ssc", "parenthetical"]] df_indents.fillna("", inplace=True) for index in df_indents.index: data = df_indents["data"][index] if df_indents["parenthetical"][index] == "PartMidEnd": par_pos = re.search("\(", data).start() df_indents["data"][index] = data[0:par_pos].strip() df_indents["parenthetical"][index] = "Absent" elif data.strip(): df_indents["data"][index] = data.strip() df_indents = df_indents.loc[df_indents["parenthetical"] == "Absent", :] df_indents["ssc"].value_counts().sort_index() df_indents["ssc"].value_counts().sort_index().reset_index() all_indents = df_indents["ssc"].value_counts().sort_index().reset_index() if 35 in all_indents["index"].tolist(): if df_indents["ssc"].value_counts().sort_index()[35] > 3: sp_indent = 35 else: ps_sp_indents = df_indents.loc[ (df_indents["ssc"] >= 32) & (df_indents["ssc"] <= 40), : ] if not ps_sp_indents.empty: sp_indent = ( ps_sp_indents["ssc"] .value_counts() .sort_values(ascending=False) .reset_index()["index"][0] ) else: sp_indent = 35 # sp_indent = df_indents['ssc'].value_counts().sort_index().reset_index().iloc[3]['index'] else: ps_sp_indents = df_indents.loc[ (df_indents["ssc"] >= 32) & (df_indents["ssc"] <= 40), : ] if not ps_sp_indents.empty: sp_indent = ( ps_sp_indents["ssc"] .value_counts() .sort_values(ascending=False) .reset_index()["index"][0] ) else: sp_indent = -1 # sp_indent = df_indents['ssc'].value_counts().sort_index().reset_index().iloc[3]['index'] # third_indents = df_indents['ssc'].value_counts().sort_index().reset_index().iloc[3] try: character_list = df_indents.loc[ df_indents["ssc"] == sp_indent, "data" ].unique() except: character_list = [] with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("\nafter get_character_list : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") return character_list def get_character_subset(self, character_list): # if character_list is None: # character_list = self.get_character_list() audited_linewise_csv = os.path.join(self.base_file_path, "after_audit1.csv") # replaced by after_audit1.csv foldername = "character_subset" output_dir = os.path.join(self.base_file_path, foldername) if not os.path.exists(output_dir): os.mkdir(output_dir) df_after_audit = pd.read_csv(audited_linewise_csv) df_after_audit.fillna("", inplace=True) df_after_audit["line_no"] = df_after_audit["line_no"].astype(int) scenes = df_after_audit.loc[ (df_after_audit["Identification_Status"] == "ps1") | (df_after_audit["Identification_Status"] == "ps2"), ["line_no", "data", "Identification_Status"], ] scene_indexes = scenes.index last_index = df_after_audit.index[-1] character_scripts_dict = dict() for character in character_list: try: print("processing character subset for", character) except: pass output_subset_script_txt = os.path.join( output_dir, (self.script_name.rsplit(".", 1)[0] + "_" + str(character) + ".txt"), ) output_subset_script_docx = os.path.join( output_dir, (self.script_name.rsplit(".", 1)[0] + "_" + str(character) + ".docx"), ) i, j = 0, 1 character_in_scenes = [] character_lines = [] while j <= len(scene_indexes): scene_no = i + 1 start = scene_indexes[i] if j < len(scene_indexes): end = scene_indexes[j] else: end = last_index + 1 for index in range(start, end): data = df_after_audit["data"][index] if re.search(character.upper(), data.strip()): character_lines.append(start) # print(scene_no,index,data) character_in_scenes.append(scene_no) character_lines.append(index) rev_index = index - 1 rev_index_is = df_after_audit["Identification_Status"][ rev_index ] character_lines.append(rev_index) # pvs_data = df_after_audit['data'][rev_index-1] # print(rev_index,pvs_data) try: rev_index_before_is = df_after_audit[ "Identification_Status" ][rev_index - 1] except: rev_index_before_is = "" # while rev_index != start and rev_index_is != 'ps4' and rev_index_is != 'ps1' and rev_index_is != 'ps7' : while ( rev_index != start and rev_index_is != "ps4" and rev_index_is != "ps1" and not ( rev_index_is == "ps6" and rev_index_before_is == "blank" ) ): rev_index = rev_index - 1 pvs_data = df_after_audit["data"][rev_index] # print(rev_index,pvs_data) character_lines.append(rev_index) rev_index_is = df_after_audit["Identification_Status"][ rev_index ] fwd_index = index fwd_index_is = df_after_audit["Identification_Status"][ fwd_index ] while fwd_index_is != "blank" and fwd_index != "ps15": fwd_index = fwd_index + 1 character_lines.append(fwd_index) fwd_index_is = df_after_audit["Identification_Status"][ fwd_index ] i += 1 j += 1 character_in_scenes = list(set(character_in_scenes)) character_lines = list(set(character_lines)) print(character_lines) character_lines.sort() print(character_lines) character_df = df_after_audit[df_after_audit.index.isin(character_lines)] character_df.reset_index(drop=True, inplace=True) character_df = sf.prep_for_audit(character_df) # test_path = os.path.join(output_dir,os.path.splitext(input_filename)[0])+ '_' + str(character) + '_test1.csv' # character_df.to_csv(test_path,index= False) character_df = sf.run_audit_on_identified(character_df) # test_path = os.path.join(output_dir,os.path.splitext(input_filename)[0])+ '_' + str(character) + '_test2.csv' # character_df.to_csv(test_path,index= False) ch_para_df = sf.merge_line_to_para(character_df) # ch_para_df.to_csv(csv_parawise_status, index = False) sf.sa_wrapped_output_to_docx(ch_para_df, output_subset_script_docx) character_scripts_dict[character] = output_subset_script_docx # sf.conv_docx_to_txt(output_subset_script_docx,output_subset_script_txt) with open(output_subset_script_txt, "w", encoding="utf-8") as fout: for index in character_lines: print(df_after_audit["Identification_Status"][index]) try: if str(df_after_audit["Identification_Status"][index]) == "ps1": fout.writelines("\n") except: pass data = df_after_audit["data"][index] try: print(data) except: pass fout.writelines(str(data)) fout.writelines("\n") try: if ( df_after_audit["Identification_Status"][index] == "ps1" or df_after_audit["Identification_Status"][index] == "ps3" ): fout.writelines("\n") except: pass with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("\nafter get_character_subset : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") return character_scripts_dict def audit_in_background(self): # if os.fork() != 0: # return print("Running in background") end_time = datetime.datetime.now() try: extension = self.input_script.rsplit(".", 1)[-1] if extension == 'fdx': self.audit_fdx() else: self.audit() self.audit_model_obj.status = States.SUCCESS self.audit_model_obj.save() print("Audit Success!!!!!!!!!!!!!!!!!!!!!!!") end_time = datetime.datetime.now() with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("\n\n****AUDITING IS SUCCESSFUL****\n") print(end_time) except Exception as exp: self.audit_model_obj.status = States.FAILURE self.audit_model_obj.results = exp self.audit_model_obj.error_msg = "FAILED" self.audit_model_obj.save() print(end_time) if __name__ == "__main__": naudit = NeutralAudit("123", True) naudit.get_character_subset()