2024-04-27 09:33:09 +00:00
import os
import re
import sys
from pathlib import Path
import datetime
import pandas as pd
from django . core . files . base import ContentFile
import time
from centralisedFileSystem . models import File , Script , ScreenPlay
from scriptAudit import sa_functions as sf
from scriptAudit import sa_functions_english as sf_eng
from scriptAudit . models import ScriptAuditModel , States
from io import BytesIO
import datetime
import pytz
import subprocess
2024-09-03 12:07:33 +00:00
import warnings
warnings . simplefilter ( action = ' ignore ' , category = FutureWarning )
pd . options . mode . copy_on_write = False
pd . options . mode . chained_assignment = None
warnings . filterwarnings ( " ignore " , category = DeprecationWarning )
warnings . filterwarnings ( " ignore " , category = RuntimeWarning )
2024-04-27 09:33:09 +00:00
2024-04-30 04:59:37 +00:00
2024-04-27 09:33:09 +00:00
class NeutralAudit :
def __init__ (
self ,
script_id : str = None ,
2024-09-03 12:07:33 +00:00
log : bool = False ,
2024-04-27 09:33:09 +00:00
) - > None :
"""
To Audit a Script already uploded .
_________________________________________________________________
Parameters :
script_id : str - > Id of the script to be Audited
default = None
log : bool - > save logs in log . txt
default = False
_________________________________________________________________
Return :
None
"""
2024-09-03 12:07:33 +00:00
# pd.options.mode.copy_on_write = False
# pd.options.mode.chained_assignment = None
2024-04-27 09:33:09 +00:00
self . start_time_count = time . time ( )
print ( " <<<<<<<<<<<<<<<<<<<<<<<<< " )
self . matrices_path = str ( Path ( __file__ ) . resolve ( ) . parent ) + " /matrices/ "
self . total_time_file = str ( Path ( __file__ ) . resolve ( ) . parent )
print ( script_id , " SCRIPT-ID IS HERE|| AYYA " )
self . script_id = script_id
2024-09-03 12:07:33 +00:00
audit_root_dir = ( str ( Path ( __file__ ) . resolve ( ) . parent . parent ) + " /media/audit_folder/ " )
2024-04-27 09:33:09 +00:00
self . script_name = str ( self . script_id )
output_dir = os . path . join ( audit_root_dir , self . script_name )
t_time_file = self . total_time_file + " /tail_errors.txt "
file_to_audit = File . objects . get (
script = script_id ,
type = " script-original " ,
)
self . input_script = file_to_audit . file . path
if not os . path . exists ( output_dir ) :
try :
2024-09-03 12:07:33 +00:00
os . makedirs ( output_dir , exist_ok = True )
2024-04-27 09:33:09 +00:00
except Exception as exp :
print ( repr ( exp ) )
subprocess . run ( [ " mkdir " , output_dir ] )
subprocess . run ( [ " chmod " , " 777 " , output_dir ] )
self . base_file_path = str ( output_dir ) + " / "
2024-09-03 12:07:33 +00:00
self . csv_removed_space_between_words = ( self . base_file_path + " space_between_words_removed.csv " )
2024-04-27 09:33:09 +00:00
self . audit_report_csv = self . base_file_path + " audit_spreadsheet.csv "
2024-09-03 12:07:33 +00:00
2024-04-27 09:33:09 +00:00
sys . stdout = open ( os . devnull , " w " )
if log :
log_file = self . base_file_path + " _log.txt "
sys . stdout = open ( log_file , " w " , encoding = " utf-8 " )
self . gen_int_files = True
else :
self . gen_int_files = False
sys . stdout = sys . __stdout__
2024-09-03 12:07:33 +00:00
self . audit_model_obj = ScriptAuditModel . objects . get ( script = Script . objects . get ( id = self . script_id , ) )
2024-04-27 09:33:09 +00:00
time_file = self . base_file_path + " time_taken.txt "
start_time = datetime . datetime . now ( )
print ( start_time )
with open ( time_file , " a " ) as file007 :
file007 . write ( " started \n \n " )
file007 . write ( " started \n \n " )
def __del__ ( self ) - > None :
sys . stdout = sys . __stdout__
def update_audit_df ( self , df , audit_df ) :
print ( " inside update audit df " )
print ( df . dtypes )
print ( audit_df . dtypes )
lines_not_removed = audit_df . loc [ audit_df [ " line_removed " ] != " Yes " ] . index . to_list ( )
audit_df . sort_index ( inplace = True )
# audit_df.reset_index().to_csv(audit_report_csv,index =False)
audit_df [ " audited_line_no " ] = " "
audited_line_no = 1
for line in lines_not_removed :
new_data = " "
try :
new_data = df . loc [ df [ " line_no " ] == line , " data " ] . values [ 0 ]
except :
pass
# print(new_data)
try :
audit_df [ " Identification_Status " ] [ line ] = df . loc [
df [ " line_no " ] == line , " Identification_Status "
] . values [ 0 ]
except :
pass
audit_df [ " scene_number " ] [ line ] = df . loc [
df [ " line_no " ] == line , " scene_number "
] . values [ 0 ]
audit_df [ " data_corrected " ] [ line ] = new_data
audit_df [ " line_removed " ] [ line ] = " No "
audit_df [ " audited_line_no " ] [ line ] = audited_line_no
audited_line_no + = 1
# print(audit_df.loc[audit_df['line_no'] == line, 'data_corrected'])
audit_df . reset_index ( ) . to_csv ( self . audit_report_csv , index = False )
return audit_df
def update_audit_df_intro ( self , df , audit_df ) :
print ( " update_audit_df_intro " )
audit_df . reset_index ( inplace = True , drop = True )
new_data = " "
for line in audit_df . index :
try :
print ( " line " , line )
if audit_df [ " introduction " ] [ line ] == " Yes " :
try :
new_data = df . loc [ df [ " line_no " ] == line , " data " ] . values [ 0 ]
except Exception as e :
print ( " Exception 174: " , e )
pass
audit_df [ " data_corrected " ] [ line ] = new_data
except Exception as e :
print ( e )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " \n \n update_audit_df_intro : complete \n " )
return audit_df
def update_audit_df_appendix ( self , df , audit_df ) :
new_data = " "
print ( audit_df . index )
for line in audit_df . index :
if audit_df [ " appendix " ] [ line ] == " Yes " :
try :
new_data = df . loc [ df [ " line_no " ] == line , " data " ] . values [ 0 ]
except :
pass
audit_df [ " data_corrected " ] [ line ] = new_data
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " \n \n update_audit_df_appendix : complete \n " )
return audit_df
def update_is_identified ( self , df ) :
print ( " Updating is Identified " )
df [ " Identification_Status " ] . fillna ( " " , inplace = True )
for index in df . index :
print ( index , df [ " Identification_Status " ] [ index ] )
try :
if df [ " Identification_Status " ] [ index ] :
line_pos = df [ " Identification_Status " ] [ index ] . split ( " ; " )
pos_count = len ( line_pos )
else :
pos_count = 0
except :
pos_count = 0
print ( pos_count )
if pos_count == 1 :
df [ " isIdentified " ] [ index ] = " Yes "
else :
df [ " isIdentified " ] [ index ] = " No "
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " \n \n Inside update_is_identified : complete \n " )
return df
def before_audit ( self , lang : str = None ) :
output_converted_txt = self . base_file_path + " temp.txt "
output_converted_docx = self . base_file_path + " temp.docx "
csv_for_pre_processing = self . base_file_path + " for_pre_processing.csv "
csv_for_processing = self . base_file_path + " for_processing.csv "
csv_prepped_for_audit = self . base_file_path + " prepped_for_audit.csv "
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " Inside before audit : ( 0-16 ) \n \n " )
## convert pdf/docx to text
sf . conv_to_txt (
self . input_script ,
output_converted_docx ,
output_converted_txt
)
try :
output_docx_from_orginal_text = self . base_file_path + " original_text1.docx "
pdf_file_path = self . base_file_path + " original_text1.pdf "
print ( " b4 txt to docx " )
sf . convert_txt_to_docx ( output_converted_txt , output_docx_from_orginal_text )
print ( " IN THE BEGINING OF AUDIT PDF PAGES " )
print ( " b4 page count of pdf " )
# total_page_bf = sf.countPages(output_docx_from_orginal_text,pdf_file_path,self.base_file_path)
sf . countPages ( output_docx_from_orginal_text , pdf_file_path , self . base_file_path )
print ( " temp txt converted to docx " )
self . total_page_bf = str ( 1 )
try :
print ( " int try pdf bf " )
self . total_page_bf = sf . PdfCounter ( pdf_file_path )
print ( " taotal_page_bf " , str ( self . total_page_bf ) )
except Exception as exp :
print ( repr ( exp ) )
print ( " page bf didnt work " )
pass
# self.audit_model_obj.number_of_pages = int(total_page_bf)
time_per_page = 26
base_time = 120
no_of_pages = int ( self . total_page_bf )
formula_of_counting_pages = ( time_per_page * no_of_pages ) + base_time
print ( " time required for auditing is : " , formula_of_counting_pages )
extimated_time = round ( formula_of_counting_pages / 60 , 1 )
print ( " extimated_time: " , extimated_time )
print ( " Exstimated time is updated " )
kolkata_time = datetime . datetime . now ( pytz . timezone ( ' Asia/Kolkata ' ) )
print ( kolkata_time )
thirty_mins_later = kolkata_time + datetime . timedelta ( minutes = extimated_time )
formatted_time = thirty_mins_later . strftime ( " % B %d , % Y % I: % M % p " )
self . audit_model_obj . expected_duration = formatted_time
print ( formatted_time )
except :
pass
# self.total_line_before_audit = sf.count_the_line(output_converted_txt)
# print("total_line_before_audit :",total_line_before_audit)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after sf.conv_to_text 1 : before audit \n " )
## convert to df
sf . conv_to_csv ( output_converted_txt , csv_for_pre_processing )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after sf.conv_to_csv 2 : before audit \n " )
df = pd . read_csv ( csv_for_pre_processing , encoding = " utf8 " )
## direct to df.. not working as expected
# df = pd.DataFrame()
# df = sf.conv_to_df(output_converted_txt)
# df.to_csv(csv_for_pre_processing,index=False)
print ( " before assign weights: " )
print ( df . dtypes )
df [ ' preassigned_weights ' ] = ' '
df = sf . pre_assign_wts ( df )
print ( df . dtypes )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after sf.pre_assign_wts 3 : before audit \n " )
df = df . sort_index ( ) . reset_index ( drop = True )
df . to_csv ( csv_for_processing , index = False )
df [ " data " ] . fillna ( " " , inplace = True )
## make df to track audit
audit_df = pd . DataFrame ( )
df_1st = pd . DataFrame ( df )
df_1st . to_csv ( self . base_file_path + " very_first_df_feed_to_create_audit_df.csv " , index = False )
audit_df = sf . create_audit_df ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after sf.create_audit_df 4 : before audit \n " )
audit_df . reset_index ( ) . to_csv ( self . audit_report_csv , index = False )
print ( df . dtypes )
print ( audit_df . dtypes )
audit_df . reset_index ( ) . to_csv ( self . base_file_path + " very_first_audit_df_feed_to_create_audit_df.csv " , index = False )
print ( " LANGUAGE IS " , lang )
## trim intro
if lang :
if lang . upper ( ) == " ENGLISH " :
sf_eng . trim_intro_english ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after sf_eng.trim_intro_english (5) : before audit \n " )
df = self . update_is_identified ( df )
else :
sf_eng . trim_intro_english ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after sf_eng.trim_intro_english (6) : before audit \n " )
df = self . update_is_identified ( df )
else :
# sf.trim_intro(df,audit_df)
sf_eng . trim_intro_english ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after sf_eng.trim_intro_english (7) : before audit \n " )
df = self . update_is_identified ( df )
lines_not_removed = audit_df . loc [
audit_df [ " line_removed " ] != " Yes "
] . index . to_list ( )
print ( lines_not_removed )
df = df . loc [ df [ " line_no " ] . isin ( lines_not_removed ) , : ]
df = df . sort_index ( ) . reset_index ( drop = True )
# df = df.reset_index()
audit_df . reset_index ( ) . to_csv ( self . audit_report_csv , index = False )
print ( " Trimming Appendix " )
## trim appendix
if lang :
if lang . upper ( ) == " ENGLISH " :
sf_eng . trim_appendix_english ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after sf_eng.trim_appendix_english 8: before audit \n " )
df = self . update_is_identified ( df )
else :
sf_eng . trim_appendix_english ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after sf_eng.trim_appendix_english 9: before audit \n " )
df = self . update_is_identified ( df )
else :
sf_eng . trim_appendix_english ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after sf_eng.trim_appendix_english 10 : before audit \n " )
df = self . update_is_identified ( df )
## remove page numbers
sf . remove_page_numbers ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after sf.remove_page_numbers 11 : before audit \n " )
lines_not_removed = audit_df . loc [
audit_df [ " line_removed " ] != " Yes "
] . index . to_list ( )
print ( lines_not_removed )
df = df . loc [ df [ " line_no " ] . isin ( lines_not_removed ) , : ]
df = df . sort_index ( ) . reset_index ( drop = True )
# df = df.reset_index()
audit_df . reset_index ( ) . to_csv ( self . audit_report_csv , index = False )
## prepare for audit
df = sf . prep_for_audit ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after sf.prep_for_audit 12 : before audit \n " )
# sf.prep_for_audit(df)
df . to_csv ( csv_prepped_for_audit , index = False )
## remove extra blank lines
sf . remove_extra_blank_lines ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after sf.remove_extra_blank_lines 13 : before audit \n " )
lines_not_removed = audit_df . loc [
audit_df [ " line_removed " ] != " Yes "
] . index . to_list ( )
print ( lines_not_removed )
df = df . loc [ df [ " line_no " ] . isin ( lines_not_removed ) , : ]
df = df . sort_index ( ) . reset_index ( drop = True )
###
sf . remove_blank_line_after_parenthetical ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after sf.remove_blank_line_after_parenthetical 14 : before audit \n " )
lines_not_removed = audit_df . loc [
audit_df [ " line_removed " ] != " Yes "
] . index . to_list ( )
print ( lines_not_removed )
df = df . loc [ df [ " line_no " ] . isin ( lines_not_removed ) , : ]
df = df . sort_index ( ) . reset_index ( drop = True )
##
sf . merge_broken_lines ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after sf.merge_broken_lines 15 : before audit \n " )
lines_not_removed = audit_df . loc [
audit_df [ " line_removed " ] != " Yes "
] . index . to_list ( )
df = df . loc [ df [ " line_no " ] . isin ( lines_not_removed ) , : ]
df = df . sort_index ( ) . reset_index ( drop = True )
###df.to_csv(csv_after_merge, index = False)
##
sf . remove_space_between_words ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after sf.remove_space_between_words 16 : before audit \n " )
df . to_csv ( self . csv_removed_space_between_words , index = False )
print ( " updating audit df " )
df1 = pd . DataFrame ( df )
df1 . to_csv ( self . base_file_path + " first_df.csv " , index = False )
audit_df1 = pd . DataFrame ( audit_df )
audit_df1 . to_csv ( self . base_file_path + " first_audit_df.csv " , index = False )
audit_df = self . update_audit_df ( df , audit_df )
# audit_model_obj = ScriptAuditModel.objects.get(
# script = Script.objects.get(
# id = self.script_id,
# )
# )
try :
audit_model_obj = ScriptAuditModel . objects . get (
script = Script . objects . get (
id = self . script_id ,
)
)
audit_model_obj . pre_audit_run = True
audit_model_obj . save ( )
print ( " TRY " )
except Exception as exp :
print ( repr ( exp ) )
print ( " EXCEPT " )
self . audit_model_obj . pre_audit_run = True
self . audit_model_obj . save ( )
print ( " PRE AUDIT DONE " )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " before audit complete : PRE AUDIT DONE \n \n " )
return df , audit_df
def audit ( self , lang : str = None ) - > None :
"""
Run Audit on NeutralAudit object .
_________________________________________________________________
Parameters :
lang : str - > language of the provided script .
default = None ( language nuteral rules )
_________________________________________________________________
Return :
None
_________________________________________________________________
"""
# ---------------------------changes to save _audited.csv in media/scriptpage/script/folder
# csv_parawise_status = self.audited_script_path
# ---------------------------changes to save _audited.csv in media/scriptpage/script/folder
print ( " <<<<<<<<<<<<<<<<<<<<<<<<< " )
print ( " <<<<<<<<<<<<<<<<<<<<<<<<< " , self . base_file_path )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " INSIDE AUDIT (1-87): audit \n \n " )
csv_after_first_strict_conditions = (
self . base_file_path
+ " after_first_strict_conditions.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after_first_strict_conditions.csv 1 : audit \n " )
csv_after_gen_and_sort_weights = (
self . base_file_path
+ " after_gen_and_sort_weights.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after_gen_and_sort_weights.csv 2 : audit \n " )
csv_after_examined_speaker_pos = (
self . base_file_path
+ " after_examined_speaker_pos.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after_examined_speaker_pos.csv 3 : audit \n " )
csv_after_examined_speaker_next_lines = (
self . base_file_path
+ " after_examined_speaker_next_lines.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after_examined_speaker_next_lines.csv 4 : audit \n " )
csv_after_pnnbl_ineligible = (
self . base_file_path
+ " after_pnnbl_ineligible1.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after_pnnbl_ineligible1 (5) : audit \n " )
csv_after_examine_same_content_lines = (
self . base_file_path
+ " after_examine_same_content_lines.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after_examine_same_content_lines (6) : audit \n " )
csv_after_examined_action_pos_part1 = (
self . base_file_path
+ " _after_examined_action_pos_part1.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_examined_action_pos_part1.csv (7) : audit \n " )
csv_after_pnnbl_inelgible_after_action_pos_part1 = (
self . base_file_path
+ " _after_pnnbl_inelgible_after_action_pos_part1.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_pnnbl_inelgible_after_action_pos_part1.csv (8) : audit \n " )
csv_after_examined_action_pos_part2 = (
self . base_file_path
+ " _after_examined_action_pos_part2.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_examined_action_pos_part2.csv (9) : audit \n " )
csv_after_pnnbl_inelgible_after_action_pos_part2 = (
self . base_file_path
+ " _after_pnnbl_inelgible_after_action_pos_part2.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_pnnbl_inelgible_after_action_pos_part2.csv (10) : audit \n " )
csv_after_examined_same_indent_bunch = (
self . base_file_path
+ " _after_examined_same_indent_bunch.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_examined_same_indent_bunch.csv (11) : audit \n " )
csv_after_pnnbl_inelgible_after_same_indent = (
self . base_file_path
+ " _after_pnnbl_inelgible_after_same_indent.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_pnnbl_inelgible_after_same_indent.csv (12) : audit \n " )
csv_after_examined_relative_indent_bunch = (
self . base_file_path
+ " _after_examined_relative_indent_bunch.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_examined_relative_indent_bunch.csv (13) : audit \n " )
csv_after_examined_speaker_next_lines_after_relative_indent = (
self . base_file_path
+ " _after_examined_speaker_next_lines_after_relative_indent.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_examined_speaker_next_lines_after_relative_indent.csv (14) : audit \n " )
csv_after_pnnbl_inelgible_after_relative_indent = (
self . base_file_path
+ " after_pnnbl_inelgible_after_relative_indent_bunch.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after_pnnbl_inelgible_after_relative_indent_bunch.csv (15) : audit \n " )
csv_examined_speaker_using_indent = (
self . base_file_path
+ " after_examined_speaker_using_indent.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after_examined_speaker_using_indent.csv (16) : audit \n " )
csv_after_examined_speaker_next_lines_after_pos_sp_indent = (
self . base_file_path
+ " _after_examined_speaker_next_lines_after_pos_sp_indent.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_examined_speaker_next_lines_after_pos_sp_indent.csv (17) : audit \n " )
csv_after_pnnbl_inelgible_after_pos_sp_indent = (
self . base_file_path
+ " _after_pnnbl_inelgible_after_pos_sp_indent.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_pnnbl_inelgible_after_pos_sp_indent.csv (18) : audit \n " )
csv_examined_speaker_extension = (
self . base_file_path
+ " _after_examined_speaker_extension.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_examined_speaker_extension.csv (19) : audit \n " )
csv_after_examined_speaker_next_lines_after_speaker_extension = (
self . base_file_path
+ " _after_examined_speaker_next_lines_after_speaker_extension.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_examined_speaker_next_lines_after_speaker_extension.csv(20) : audit \n " )
csv_after_pnnbl_inelgible_after_speaker_extension = (
self . base_file_path
+ " _after_pnnbl_inelgible_after_speaker_extension.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_pnnbl_inelgible_after_speaker_extension.csv (21) : audit \n " )
csv_after_examined_action_using_top2 = (
self . base_file_path
+ " _after_examined_action_using_top2.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_examined_action_using_top2.csv (22) : audit \n " )
csv_after_pnnbl_inelgible_after_action_using_top_pnnbl = (
self . base_file_path
+ " _after_pnnbl_inelgible_after_action_using_top_pnnbl.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_pnnbl_inelgible_after_action_using_top_pnnbl.csv (23) : audit \n " )
csv_after_refined_action = (
self . base_file_path
+ " _after_refined_action.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_refined_action.csv (24) : audit \n " )
csv_after_pnnbl_inelgible_after_refined_action = (
self . base_file_path
+ " _after_pnnbl_inelgible_after_refined_action.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_pnnbl_inelgible_after_refined_action.csv (25) : audit \n " )
csv_after_eligibility_using_identified_pnnbl = (
self . base_file_path
+ " _after_eligibility_using_identified_pnnbl.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_eligibility_using_identified_pnnbl.csv (26) : audit \n " )
csv_after_top_identification_part1 = (
self . base_file_path
+ " _after_top_identification_part1.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_top_identification_part1.csv (27) : audit \n " )
csv_after_eligibility_using_identified_pnnbl_after_examine_sp_next_among_two = (
self . base_file_path
+ " after_eligibility_using_identified_pnnbl_after_examine_sp_next_among_two.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after_eligibility_using_identified_pnnbl_after_examine_sp_next_among_two.csv (28) : audit \n " )
csv_after_examined_speaker_pos_after_top1 = (
self . base_file_path + " _after_examined_speaker_pos_after_top1.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_examined_speaker_pos_after_top1.csv (29) : audit \n " )
csv_after_examined_speaker_next_lines_after_top1 = (
self . base_file_path + " after_examined_speaker_next_lines_after_top1.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after_examined_speaker_next_lines_after_top1.csv (30) : audit \n " )
csv_after_eligibility_using_identified_pnnbl_after_top_part1 = (
self . base_file_path + " _after_eligibility_using_identified_pnnbl_after_top_part1.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_eligibility_using_identified_pnnbl_after_top_part1.csv (31) : audit \n " )
csv_after_examine_speaker_mix_part1 = (
self . base_file_path + " _after_examine_speaker_mix_part1.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_examine_speaker_mix_part1.csv (32) : audit \n " )
csv_after_eligibility_using_identified_pnnbl_after_speaker_mix_part1 = (
self . base_file_path + " _after_eligibility_using_identified_pnnbl_after_speaker_mix_part1.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_eligibility_using_identified_pnnbl_after_speaker_mix_part1.csv (33) : audit \n " )
csv_after_examine_speaker_mix_part2 = (
self . base_file_path + " _after_examine_speaker_mix_part2.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_examine_speaker_mix_part2.csv (34) : audit \n " )
csv_after_examined_speaker_pos_after_mix = (
self . base_file_path + " _after_examined_speaker_pos_after_mix.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_examined_speaker_pos_after_mix.csv(35) : audit \n " )
csv_after_examined_speaker_next_lines_after_mix = (
self . base_file_path + " _after_examined_speaker_next_lines_after_mix.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_examined_speaker_next_lines_after_mix.csv (36) : audit \n " )
csv_after_pnnbl_ineligible_after_mix = (
self . base_file_path + " _after_pnnbl_ineligible_after_mix.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_pnnbl_ineligible_after_mix.csv (37) : audit \n " )
csv_after_top_identification_part2 = (
self . base_file_path + " _after_top_identification_part2.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_top_identification_part2.csv (38) : audit \n " )
csv_after_eligibility_using_identified_pnnbl_after_top_part2 = (
self . base_file_path + " _after_eligibility_using_identified_pnnbl_after_top_part2.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_eligibility_using_identified_pnnbl_after_top_part2.csv (39) : audit \n " )
csv_after_top_identification_part2_again = (
self . base_file_path + " _after_top_identification_part2_again.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_top_identification_part2_again.csv (40) : audit \n " )
csv_after_eligibility_using_identified_pnnbl_after_top_part2_again = (
self . base_file_path + " _after_eligibility_using_identified_pnnbl_after_top_part2_again.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_eligibility_using_identified_pnnbl_after_top_part2_again.csv (41) : audit \n " )
csv_after_top_identification_part2_again_again = (
self . base_file_path + " _after_top_identification_part2_again_again.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_top_identification_part2_again_again.csv(42) : audit \n " )
csv_after_eligibility_using_identified_pnnbl_after_top_part2_again_again = (
self . base_file_path + " _after_eligibility_using_identified_pnnbl_after_top_part2_again_again.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_eligibility_using_identified_pnnbl_after_top_part2_again_again.csv (43) : audit \n " )
csv_after_slug_identification = (
self . base_file_path + " _after_slug_identification.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_slug_identification.csv (44) : audit \n " )
csv_after_eligibility_using_identified_pnnbl_after_slug_identification = (
self . base_file_path + " _after_eligibility_using_identified_pnnbl_after_slug_identification.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_eligibility_using_identified_pnnbl_after_slug_identification.csv (45) : audit \n " )
csv_after_top_identification_part1_again = (
self . base_file_path + " _after_top_identification_part1_again.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_top_identification_part1_again.csv (46) : audit \n " )
csv_after_eligibility_using_identified_pnnbl_after_top_part1_again = (
self . base_file_path + " _after_eligibility_using_identified_pnnbl_after_top_part1_again.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_eligibility_using_identified_pnnbl_after_top_part1_again.csv (47) : audit \n " )
csv_after_top_identification_part3 = (
self . base_file_path + " _after_top_identification_part3.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_top_identification_part3.csv (48) : audit \n " )
csv_after_eligibility_using_identified_pnnbl_after_top_part3 = (
self . base_file_path + " _after_eligibility_using_identified_pnnbl_after_top_part3.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_eligibility_using_identified_pnnbl_after_top_part3.csv (49) : audit \n " )
csv_after_top_identification_part4 = (
self . base_file_path + " _after_top_identification_part4.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_top_identification_part4.csv (50) : audit \n " )
csv_after_eligibility_using_identified_pnnbl_after_top_part4 = (
self . base_file_path + " _after_eligibility_using_identified_pnnbl_after_top_part4.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_eligibility_using_identified_pnnbl_after_top_part4.csv (51) : audit \n " )
csv_after_top_identification_part5 = (
self . base_file_path + " _after_top_identification_part5.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_top_identification_part5.csv (52) : audit \n " )
csv_after_eligibility_using_identified_pnnbl_after_top_part5 = (
self . base_file_path + " _after_eligibility_using_identified_pnnbl_after_top_part5.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_eligibility_using_identified_pnnbl_after_top_part5.csv (53) : audit \n " )
csv_after_top_identification_part6 = (
self . base_file_path + " _after_top_identification_part6.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_top_identification_part6.csv (54) : audit \n " )
csv_after_eligibility_using_identified_pnnbl_after_top_part6 = (
self . base_file_path + " _after_eligibility_using_identified_pnnbl_after_top_part6.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_eligibility_using_identified_pnnbl_after_top_part6.csv (55) : audit \n " )
csv_after_top_identification_part7 = (
self . base_file_path + " _after_top_identification_part7.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_top_identification_part7.csv (56) : audit \n " )
csv_after_eligibility_using_identified_pnnbl_after_top_part7 = (
self . base_file_path + " _after_eligibility_using_identified_pnnbl_after_top_part7.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_eligibility_using_identified_pnnbl_after_top_part7.csv (57) : audit \n " )
csv_after_top_identification_part8 = (
self . base_file_path + " _after_top_identification_part8.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_top_identification_part8.csv (58) : audit \n " )
csv_after_eligibility_using_identified_pnnbl_after_top_part8 = (
self . base_file_path + " _after_eligibility_using_identified_pnnbl_after_top_part8.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_eligibility_using_identified_pnnbl_after_top_part8.csv (59) : audit \n " )
csv_after_examine_among_two = (
self . base_file_path + " _after_examine_among_two.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_examine_among_two.csv (60) : audit \n " )
csv_after_eligibility_using_identified_pnnbl_after_examine_among_two = (
self . base_file_path + " _after_eligibility_using_identified_pnnbl_after_examine_among_two.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after_eligibility_using_identified_pnnbl_after_examine_among_two.csv (61) : audit \n " )
csv_after_examine_speaker_next_line_after_among_two = (
self . base_file_path + " _after_examine_speaker_next_line_after_among_two.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_examine_speaker_next_line_after_among_two.csv (62) : audit \n " )
csv_after_top2_wt_diff = (
self . base_file_path + " _after_top2_wt_diff.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_top2_wt_diff.csv (63) : audit \n " )
csv_after_eligibility_using_identified_pnnbl_after_top2_wt_diff = (
self . base_file_path + " _after_eligibility_using_identified_pnnbl_after_top2_wt_diff.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_eligibility_using_identified_pnnbl_after_top2_wt_diff.csv (64) : audit \n " )
csv_after_top2_wt_diff_again = (
self . base_file_path + " _after_top2_wt_diff_again.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_top2_wt_diff_again.csv (65) : audit \n " )
csv_after_eligibility_using_identified_pnnbl_after_top2_wt_diff_again = (
self . base_file_path + " _after_eligibility_using_identified_pnnbl_after_top2_wt_diff_again.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_eligibility_using_identified_pnnbl_after_top2_wt_diff_again.csv(66) : audit \n " )
csv_after_top_identification_part1_diluted = (
self . base_file_path + " _after_top_identification_part1_diluted.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_top_identification_part1_diluted.csv (67) : audit \n " )
csv_after_eligibility_using_identified_pnnbl_after_top_part1_diluted = (
self . base_file_path + " _after_eligibility_using_identified_pnnbl_after_top_part1_diluted.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_eligibility_using_identified_pnnbl_after_top_part1_diluted.csv (68) : audit \n " )
#1.2
csv_after_examine_dial_between_action = (
self . base_file_path + " _after_examine_dial_between_action.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_examine_dial_between_action.csv (69) : audit \n " )
csv_after_eligibility_using_identified_pnnbl_after_examine_dial_between_action = (
self . base_file_path + " _after_eligibility_using_identified_pnnbl_after_examine_dial_between_action.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_eligibility_using_identified_pnnbl_after_examine_dial_between_action.csv (70) : audit \n " )
csv_after_examine_among_two_again = (
self . base_file_path + " _after_examine_among_two_again.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_examine_among_two_again.csv (71) : audit \n " )
csv_after_eligibility_using_identified_pnnbl_after_examine_among_two_again = (
self . base_file_path + " _after_eligibility_using_identified_pnnbl_after_examine_among_two_again.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " _after_eligibility_using_identified_pnnbl_after_examine_among_two_again.csv (72) : audit \n " )
csv_after_identify_remaining_as_top = (
self . base_file_path + " after_identifying_remaining_as_top.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after_identifying_remaining_as_top.csv (73) : audit \n " )
csv_after_prep_for_audit_after_identification = (
self . base_file_path + " after_prep_for_audit_after_identification.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after_prep_for_audit_after_identification.csv (74) : audit \n " )
csv_after_audit1 = self . base_file_path + " after_audit1.csv "
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after_audit1.csv (75) : audit \n " )
csv_after_wrapping = self . base_file_path + " after_wrapping.csv "
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after_wrapping.csv (76) : audit \n " )
csv_after_prep_for_audit_after_wrapping = (
self . base_file_path + " after_prep_for_audit_after_wrapping.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after_prep_for_audit_after_wrapping.csv (77) : audit \n " )
csv_after_audit2 = self . base_file_path + " after_audit2.csv "
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " after_audit2.csv (78) : audit \n " )
output_linewise_docx = self . base_file_path + " audited_linewise.docx "
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " audited_linewise.docx (79) : audit \n " )
output_linewise_txt = self . base_file_path + " audited_linewise.txt "
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " audited_linewise.txt (80) : audit \n " )
audit_report_tabular_docx = self . base_file_path + " audit_report_tabular.docx "
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " audit_report_tabular.docx (81) : audit \n " )
csv_strict_conditions = self . matrices_path + " strict_conditions_230623.csv "
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " strict_conditions_230623.csv : audit \n " )
csv_pos_weights = self . matrices_path + " PS_Weights_250623_2.csv "
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " PS_Weights_250623_2.csv (83) : audit \n " )
csv_pnbl_nnbl = self . matrices_path + " pnbl_nnbl.csv "
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " pnbl_nnbl.csv (84) : audit \n " )
pnbl_eligibility_matrix = (
self . matrices_path + " pnbl_eligibility_matrix_250623.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " pnbl_eligibility_matrix_250623.csv (85) : audit \n " )
nnbl_eligibility_matrix = (
self . matrices_path + " nnbl_eligibility_matrix_250623.csv "
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " nnbl_eligibility_matrix_250623.csv (86) : audit \n " )
output_template = self . matrices_path + " ScriptTemplate5.docx "
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " ScriptTemplate5.docx (87) : audit \n " )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AUDIT : audit \n \n " )
df , audit_df = self . before_audit ( lang )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER ASSIGNING LOCATIONS AUDIT : audit \n \n " )
#######################################
sf . test_strict_conditions ( df , csv_strict_conditions )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.test_strict_conditions 1 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
if self . gen_int_files :
df . to_csv ( csv_after_first_strict_conditions , index = False )
## gen weights for possibilties ## add preassigned weights
df = sf . gen_pos_weights ( df , csv_pos_weights )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.gen_pos_weights 2 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
## language specific weights update
if lang :
if lang . upper ( ) == " ENGLISH " :
df = sf_eng . update_pos_wts_english ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf_eng.update_pos_wts_english 3 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = sf . sort_pos_decr_wts ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.sort_pos_decr_wts 4 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
#if self.gen_int_files:
df . to_csv ( csv_after_gen_and_sort_weights , index = False )
##
2024-09-03 12:07:33 +00:00
## remove some columns
df . drop ( [ ' first_largest ' , ' second_largest ' , ' third_largest ' , ' fourth_largest ' , ' fifth_largest ' , ' sixth_largest ' , ' seventh_largest ' , ' eight_largest ' , ' ninth_largest ' , ' tenth_largest ' , ' eleventh_largest ' , ' twelth_largest ' , ' thirteenth_largest ' , ' fourteenth_largest ' , ' fifteenth_largest ' , ' sixteenth_largest ' , ' seventeenth_largest ' , ' eighteenth_largest ' , ' ninteenth_largest ' , ' tewenty_largest ' , ' tone_largest ' , ' ttwo_largest ' , ' tthree_largest ' , ' tfour_largest ' , ' tfive_largest ' , ' tsix_largest ' , ' tseven_largest ' , ' teight_largest ' ] , axis = 1 , inplace = True )
2024-04-27 09:33:09 +00:00
sf . prep_for_pos_elimination ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.prep_for_pos_elimination 5 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df_bex1 = pd . DataFrame ( df )
df_bex1 . to_csv ( self . base_file_path + " df_update_audit_df_b_exam_speaker_1.csv " , index = False )
audit_df_bex1 = pd . DataFrame ( audit_df )
audit_df_bex1 . to_csv ( self . base_file_path + " audit_df_update_audit_df_b_exam_speaker_1.csv " , index = False )
## examine speaker possibilties
df = sf . examine_speaker_pos ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.examine_speaker_pos 6 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df_ex1 = pd . DataFrame ( df )
df_ex1 . to_csv ( self . base_file_path + " df_update_audit_df_exam_speaker_1.csv " , index = False )
audit_df_ex1 = pd . DataFrame ( audit_df )
audit_df_ex1 . to_csv ( self . base_file_path + " audit_df_update_audit_df_exam_speaker_1.csv " , index = True )
if self . gen_int_files :
df . to_csv ( csv_after_examined_speaker_pos , index = False )
print ( " printing info based on audit_df " )
# df_b1 = pd.DataFrame(df)
# df_b1.to_csv(self.base_file_path + "df_update_audit_df_b1.csv", index = False)
print ( audit_df . head ( 10 ) , audit_df . dtypes )
try :
audit_df = audit_df . sort_values ( ' audited_line_no ' )
except :
audit_df [ ' audited_line_no ' ] = pd . to_numeric ( audit_df [ ' audited_line_no ' ] , errors = ' coerce ' )
audit_df = audit_df . sort_values ( ' audited_line_no ' )
audit_df_try1 = pd . DataFrame ( audit_df )
audit_df_try1 . to_csv ( self . base_file_path + " audit_df_update_audit_df_try1.csv " , index = True )
print ( audit_df . head ( ) )
try :
audit_df = pd . merge ( audit_df , df [ [ ' line_no ' ] ] , on = audit_df . index , how = ' left ' )
print ( audit_df . head ( ) )
# Set 'line_no' as index
audit_df . set_index ( ' line_no ' , inplace = True )
print ( audit_df . head ( ) )
audit_df_try2 = pd . DataFrame ( audit_df )
audit_df_try2 . to_csv ( self . base_file_path + " audit_df_update_audit_df_try2.csv " , index = True )
except Exception as e :
print ( e , audit_df . head ( ) )
pass
# try:
# audit_df.reset_index(drop=True, inplace=True)
# audit_df.set_index('line_no',inplace=True)
# except Exception as e:
# print(e)
print ( audit_df . head ( ) )
print ( audit_df . dtypes )
audit_df_b1 = pd . DataFrame ( audit_df )
audit_df_b1 . to_csv ( self . base_file_path + " audit_df_update_audit_df_b1.csv " , index = True )
audit_df = self . update_audit_df ( df , audit_df )
df_1 = pd . DataFrame ( df )
df_1 . to_csv ( self . base_file_path + " df_update_audit_df_1.csv " , index = True )
audit_df_1 = pd . DataFrame ( audit_df )
audit_df_1 . to_csv ( self . base_file_path + " audit_df_update_audit_df_1.csv " , index = True )
###
df = sf . examine_speaker_next_lines ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.examine_speaker_next_lines 7 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
if self . gen_int_files :
df . to_csv ( csv_after_examined_speaker_next_lines , index = False )
audit_df = self . update_audit_df ( df , audit_df )
audit_df_u7 = pd . DataFrame ( audit_df )
audit_df_u7 . to_csv ( self . base_file_path + " audit_df_update_audit_df_7.csv " , index = True )
## do while pnnbl ineligible
sf . prep_pnnbl_wts ( csv_pnbl_nnbl , self . matrices_path )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.prep_pnnbl_wts 8 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = sf . do_while_pnnbl_ineligible ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_pnnbl_ineligible 9 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_pnnbl_ineligible , index = False )
## examine same content
df = sf . examine_same_content_lines ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.examine_same_content_lines 10 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_examine_same_content_lines , index = False )
### examine speaker next again
df = sf . examine_speaker_next_lines ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.examine_speaker_next_lines 11 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
###df.to_csv(csv_after_examined_speaker_next_lines_after_same_content, index = False)
audit_df = self . update_audit_df ( df , audit_df )
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
df = sf . do_while_pnnbl_ineligible ( df )
df = self . update_is_identified ( df )
###df.to_csv(csv_after_pnnbl_ineligible_after_same_content, index = False)
################
df = sf . examine_action_possibilities_part1 ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.examine_action_possibilities_part1 12 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
if self . gen_int_files :
df . to_csv ( csv_after_examined_action_pos_part1 , index = False )
audit_df = self . update_audit_df ( df , audit_df )
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
df = sf . do_while_pnnbl_ineligible ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_pnnbl_ineligible 13 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_pnnbl_inelgible_after_action_pos_part1 , index = False )
################
df = sf . examine_action_possibilities_part2 ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.examine_action_possibilities_part2 14 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
if self . gen_int_files :
df . to_csv ( csv_after_examined_action_pos_part2 , index = False )
audit_df = self . update_audit_df ( df , audit_df )
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
df = sf . do_while_pnnbl_ineligible ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_pnnbl_ineligible 15 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_pnnbl_inelgible_after_action_pos_part2 , index = False )
################
df = sf . examine_same_indent_bunch ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.examine_same_indent_bunch 16 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
if self . gen_int_files :
df . to_csv ( csv_after_examined_same_indent_bunch , index = False )
audit_df = self . update_audit_df ( df , audit_df )
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
df = sf . do_while_pnnbl_ineligible ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_pnnbl_ineligible 17 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_pnnbl_inelgible_after_same_indent , index = False )
#####################
##for reorganisation
# df = pd.read_csv('Script_Shatranj_pnnbl_ineligible_same_indent_bunch_new_col_2.csv')
# csv_for_pos_elimination = os.path.join(self.output_dir,os.path.splitext(self.script_name)[0])+'_for_pos_elimination.csv'
#########################
df = sf . examine_relative_indent ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.examine_relative_indent 18 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
if self . gen_int_files :
df . to_csv ( csv_after_examined_relative_indent_bunch , index = False )
audit_df = self . update_audit_df ( df , audit_df )
df = sf . examine_speaker_next_lines ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.examine_speaker_next_lines 19 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
if self . gen_int_files :
df . to_csv ( csv_after_examined_speaker_next_lines_after_relative_indent , index = False )
audit_df = self . update_audit_df ( df , audit_df )
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl,matrices_path)
df = sf . do_while_pnnbl_ineligible ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_pnnbl_ineligible 20 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
df . to_csv ( csv_after_pnnbl_inelgible_after_relative_indent , index = False )
#######################################
df = sf . examine_pos_sp_indent (
df ,
self . csv_removed_space_between_words ,
csv_after_pnnbl_inelgible_after_relative_indent ,
)
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.examine_pos_sp_indent 21 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
if self . gen_int_files :
df . to_csv ( csv_examined_speaker_using_indent , index = False )
audit_df = self . update_audit_df ( df , audit_df )
df = sf . examine_speaker_next_lines ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.examine_speaker_next_lines 22 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
if self . gen_int_files :
df . to_csv ( csv_after_examined_speaker_next_lines_after_pos_sp_indent , index = False )
audit_df = self . update_audit_df ( df , audit_df )
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
df = sf . do_while_pnnbl_ineligible ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_pnnbl_ineligible 23 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_pnnbl_inelgible_after_pos_sp_indent , index = False )
#################################
df = sf . examine_speaker_extension ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.examine_speaker_extension 24 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
if self . gen_int_files :
df . to_csv ( csv_examined_speaker_extension , index = False )
audit_df = self . update_audit_df ( df , audit_df )
df = sf . examine_speaker_next_lines ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.examine_speaker_next_lines 25 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
if self . gen_int_files :
df . to_csv ( csv_after_examined_speaker_next_lines_after_speaker_extension , index = False )
audit_df = self . update_audit_df ( df , audit_df )
## do while pnnbl ineligible
print ( " pnnbl after speaker extension " )
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
df = sf . do_while_pnnbl_ineligible ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_pnnbl_ineligible 26 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_pnnbl_inelgible_after_speaker_extension , index = False )
## checking
# audit_df.reset_index().to_csv(audit_report_csv,index =False)
#################################################
df = sf . examine_action_using_top2_part1 ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.examine_action_using_top2_part1 27 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
if self . gen_int_files :
df . to_csv ( csv_after_examined_action_using_top2 , index = False )
audit_df = self . update_audit_df ( df , audit_df )
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
df = sf . do_while_pnnbl_ineligible ( df )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_pnnbl_inelgible_after_action_using_top_pnnbl , index = False )
# #########################################
df = sf . refine_action_possibilties ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.refine_action_possibilties 28 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_refined_action , index = False )
audit_df = self . update_audit_df ( df , audit_df )
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl)
df = sf . do_while_pnnbl_ineligible ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_pnnbl_ineligible(df) 29 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_pnnbl_inelgible_after_refined_action , index = False )
##############################
sf . prep_pnnbl_eligible_csv ( pnbl_eligibility_matrix , nnbl_eligibility_matrix )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.prep_pnnbl_eligible_csv 30 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
#############################
df = sf . do_while_examine_using_identified_pnnbl ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_examine_using_identified_pnnbl 31 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_eligibility_using_identified_pnnbl , index = False )
#################################
df = sf . start_top_identifications_part1 ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.start_top_identifications_part1 32 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_top_identification_part1 , index = False )
audit_df = self . update_audit_df ( df , audit_df )
## examine speaker possibilties again after top1
df = sf . examine_speaker_pos ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.examine_speaker_pos 33 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
if self . gen_int_files :
df . to_csv ( csv_after_examined_speaker_pos_after_top1 , index = False )
audit_df = self . update_audit_df ( df , audit_df )
###
df_34 = pd . DataFrame ( df )
df_34 . to_csv ( self . base_file_path + " df_export_before_34.csv " , index = True )
au_df_34 = pd . DataFrame ( audit_df )
au_df_34 . to_csv ( self . base_file_path + " audit_df_before_after_34.csv " , index = True )
df = sf . examine_speaker_next_lines ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.examine_speaker_next_lines 34 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
if self . gen_int_files :
df . to_csv ( csv_after_examined_speaker_next_lines_after_top1 , index = False )
audit_df = self . update_audit_df ( df , audit_df )
df = sf . do_while_examine_using_identified_pnnbl ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_examine_using_identified_pnnbl 35 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_eligibility_using_identified_pnnbl_after_top_part1 , index = False )
#################################
copy_df_examine_speaker_mix_part1 = pd . DataFrame ( df )
copy_df_examine_speaker_mix_part1 . to_csv ( self . base_file_path + " copy_df_examine_speaker_mix_part1.csv " , index = True )
###########
copy_audit_df_examine_speaker_mix_part1 = pd . DataFrame ( audit_df )
copy_audit_df_examine_speaker_mix_part1 . to_csv ( self . base_file_path + " copy_audit_df_examine_speaker_mix_part1.csv " , index = True )
##########
df = sf . examine_speaker_mix_part1 ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.examine_speaker_mix_part1 36 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_examine_speaker_mix_part1 , index = False )
audit_df = self . update_audit_df ( df , audit_df )
df = sf . do_while_examine_using_identified_pnnbl ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_examine_using_identified_pnnbl 37 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_eligibility_using_identified_pnnbl_after_speaker_mix_part1 , index = False )
#################################
df_38 = pd . DataFrame ( df )
df_38 . to_csv ( self . base_file_path + " df_export_after_38.csv " , index = True )
au_df_38 = pd . DataFrame ( audit_df )
au_df_38 . to_csv ( self . base_file_path + " audit_df_export_after_38.csv " , index = True )
df = sf . examine_speaker_mix_part2 ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.examine_speaker_mix_part2 38 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_examine_speaker_mix_part2 , index = False )
audit_df = self . update_audit_df ( df , audit_df )
## examine speaker possibilties again after mix
df = sf . examine_speaker_pos ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.examine_speaker_pos 39 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
if self . gen_int_files :
df . to_csv ( csv_after_examined_speaker_pos_after_mix , index = False )
audit_df = self . update_audit_df ( df , audit_df )
###
df = sf . examine_speaker_next_lines ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTERsf.examine_speaker_next_lines 40 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
if self . gen_int_files :
df . to_csv ( csv_after_examined_speaker_next_lines_after_mix , index = False )
audit_df = self . update_audit_df ( df , audit_df )
## do while pnnbl ineligible
# sf.prep_pnnbl_wts(csv_pnbl_nnbl,matrices_path)
df = sf . do_while_pnnbl_ineligible ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_pnnbl_ineligible 41 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_pnnbl_ineligible_after_mix , index = False )
# df = sf.do_while_examine_using_identified_pnnbl(df)
# df = update_is_identified(df)
# df.to_csv(csv_after_eligibility_using_identified_pnnbl_after_speaker_mix_part2, index = False)
################################
df = sf . start_top_identifications_part2 ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.start_top_identifications_part2 42 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_top_identification_part2 , index = False )
audit_df = self . update_audit_df ( df , audit_df )
df = sf . do_while_examine_using_identified_pnnbl ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_examine_using_identified_pnnbl 43 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_eligibility_using_identified_pnnbl_after_top_part2 , index = False )
#################################
df = sf . start_top_identifications_part2 ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.start_top_identifications_part2 44 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_top_identification_part2_again , index = False )
audit_df = self . update_audit_df ( df , audit_df )
df = sf . do_while_examine_using_identified_pnnbl ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_examine_using_identified_pnnbl 45 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_eligibility_using_identified_pnnbl_after_top_part2_again , index = False )
#################################
df = sf . start_top_identifications_part2 ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.start_top_identifications_part2 46 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_top_identification_part2_again_again , index = False )
audit_df = self . update_audit_df ( df , audit_df )
df = sf . do_while_examine_using_identified_pnnbl ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_examine_using_identified_pnnbl 47 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_eligibility_using_identified_pnnbl_after_top_part2_again_again , index = False )
#################################
df = sf . start_slug_identification ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.start_slug_identification(df) 48 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_slug_identification , index = False )
audit_df = self . update_audit_df ( df , audit_df )
df = sf . do_while_examine_using_identified_pnnbl ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_examine_using_identified_pnnbl(df) 49 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_eligibility_using_identified_pnnbl_after_slug_identification , index = False )
#################################
df = sf . start_top_identifications_part1 ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.start_top_identifications_part1(df) 50 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_top_identification_part1_again , index = False )
audit_df = self . update_audit_df ( df , audit_df )
df = sf . do_while_examine_using_identified_pnnbl ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_examine_using_identified_pnnbl 51 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_eligibility_using_identified_pnnbl_after_top_part1_again , index = False )
#################################
df = sf . start_top_identifications_part3 ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.start_top_identifications_part3 52 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_top_identification_part3 , index = False )
audit_df = self . update_audit_df ( df , audit_df )
df = sf . do_while_examine_using_identified_pnnbl ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_examine_using_identified_pnnbl 53 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_eligibility_using_identified_pnnbl_after_top_part3 , index = False )
#################################
df = sf . start_top_identifications_part4 ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.start_top_identifications_part4 54 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_top_identification_part4 , index = False )
audit_df = self . update_audit_df ( df , audit_df )
df = sf . do_while_examine_using_identified_pnnbl ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_examine_using_identified_pnnbl 55 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_eligibility_using_identified_pnnbl_after_top_part4 , index = False )
#################################
df = sf . start_top_identifications_part5 ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.start_top_identifications_part5(df) 56 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_top_identification_part5 , index = False )
audit_df = self . update_audit_df ( df , audit_df )
df = sf . do_while_examine_using_identified_pnnbl ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_examine_using_identified_pnnbl 57 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_eligibility_using_identified_pnnbl_after_top_part5 , index = False )
#################################
df = sf . start_top_identifications_part6 ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.start_top_identifications_part6 58 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_top_identification_part6 , index = False )
audit_df = self . update_audit_df ( df , audit_df )
df = sf . do_while_examine_using_identified_pnnbl ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_examine_using_identified_pnnbl 59 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_eligibility_using_identified_pnnbl_after_top_part6 , index = False )
#################################
df = sf . start_top_identifications_part7 ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.start_top_identifications_part7 60 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_top_identification_part7 , index = False )
audit_df = self . update_audit_df ( df , audit_df )
df = sf . do_while_examine_using_identified_pnnbl ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_examine_using_identified_pnnbl 61 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_eligibility_using_identified_pnnbl_after_top_part7 , index = False )
#################################
df = sf . start_top_identifications_part8 ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.start_top_identifications_part8 62 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_top_identification_part8 , index = False )
audit_df = self . update_audit_df ( df , audit_df )
df = sf . do_while_examine_using_identified_pnnbl ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_examine_using_identified_pnnbl 63 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_eligibility_using_identified_pnnbl_after_top_part8 , index = False )
#################################
df = sf . examine_among_two ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.examine_among_two 64 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_examine_among_two , index = False )
audit_df = self . update_audit_df ( df , audit_df )
df = sf . do_while_examine_using_identified_pnnbl ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_examine_using_identified_pnnbl 65 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_eligibility_using_identified_pnnbl_after_examine_among_two , index = False )
#################################
df = sf . examine_speaker_next_lines ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.examine_speaker_next_lines 66: AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_examine_speaker_next_line_after_among_two , index = False )
audit_df = self . update_audit_df ( df , audit_df )
df = sf . do_while_examine_using_identified_pnnbl ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_examine_using_identified_pnnbl 67 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_eligibility_using_identified_pnnbl_after_examine_sp_next_among_two , index = False )
#################################
df = sf . examine_action_using_top2_wt_diff ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.examine_action_using_top2_wt_diff 68 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_top2_wt_diff , index = False )
audit_df = self . update_audit_df ( df , audit_df )
try :
df = sf . do_while_examine_using_identified_pnnbl ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_examine_using_identified_pnnbl 69 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_eligibility_using_identified_pnnbl_after_top2_wt_diff , index = False )
except :
pass
#################################
try :
df = sf . examine_action_using_top2_wt_diff ( df )
if self . gen_int_files :
df . to_csv ( csv_after_top2_wt_diff_again , index = False )
audit_df = self . update_audit_df ( df , audit_df )
df = sf . do_while_examine_using_identified_pnnbl ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_examine_using_identified_pnnbl 70 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_eligibility_using_identified_pnnbl_after_top2_wt_diff_again , index = False )
except :
pass
#################################
try :
df = sf . start_top_identifications_part1_diluted ( df )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_top_identification_part1_diluted , index = False )
audit_df = self . update_audit_df ( df , audit_df )
df = sf . do_while_examine_using_identified_pnnbl ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_examine_using_identified_pnnbl 71 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_eligibility_using_identified_pnnbl_after_top_part1_diluted , index = False )
except :
pass
###################################
####################################
##1.1
df = sf . decrease_wt_dial_between_action ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.decrease_wt_dial_between_action 72 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_examine_dial_between_action , index = False )
audit_df = self . update_audit_df ( df , audit_df )
df = sf . do_while_examine_using_identified_pnnbl ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_examine_using_identified_pnnbl 73 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
#if self.gen_int_files:
df . to_csv ( csv_after_eligibility_using_identified_pnnbl_after_examine_dial_between_action , index = False )
####################################
#################################
df = sf . examine_among_two ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.examine_among_two 74 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
if self . gen_int_files :
df . to_csv ( csv_after_examine_among_two_again , index = False )
audit_df = self . update_audit_df ( df , audit_df )
df = sf . do_while_examine_using_identified_pnnbl ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.do_while_examine_using_identified_pnnbl 75 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df = self . update_is_identified ( df )
#if self.gen_int_files:
df . to_csv ( csv_after_eligibility_using_identified_pnnbl_after_examine_among_two_again , index = False )
####################################
#################################
df = sf . identify_top_as_final ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.identify_top_as_final 76 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df_76 = pd . DataFrame ( df )
df_76 . to_csv ( self . base_file_path + " df_identify_top_as_final_76.csv " , index = False )
au_df_76 = pd . DataFrame ( audit_df )
au_df_76 . to_csv ( self . base_file_path + " audit_df_identify_top_as_final_76.csv " , index = False )
df = self . update_is_identified ( df )
df . to_csv ( csv_after_identify_remaining_as_top , index = False )
audit_df = self . update_audit_df ( df , audit_df )
#####################################
## prepare for audit
df = sf . prep_for_audit ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.prep_for_audit 77 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df . to_csv ( csv_after_prep_for_audit_after_identification , index = False )
#####################################
df , audit_df = sf . run_audit_on_identified ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.run_audit_on_identified 78 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df . to_csv ( csv_after_audit1 , index = False )
audit_df = self . update_audit_df ( df , audit_df )
#############################################
### run language specific audit on identified
if lang :
if lang . upper ( ) == " ENGLISH " :
df = sf_eng . run_audit_on_identified_english ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf_eng.run_audit_on_identified_english 79 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
audit_df = self . update_audit_df ( df , audit_df )
#####################################
### merge the beginning/middle/end lines
# df.to_csv(self.base_file_path + "df_before_merge_line_para.csv", index = Flase)
para_df = sf . merge_line_to_para ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.merge_line_to_para 80 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
try :
para_df . to_csv ( self . base_file_path + " after_merge_line_para.csv " , index = False )
print ( " para_df is writtern " )
except :
pass
print ( " \n \n Function 80 is completed " )
try :
script_language , dialogue_language = sf . language_detector_for_csv ( para_df )
print ( " script_language " , script_language )
print ( " dialogue_language " , dialogue_language )
unique_script_languages = ' , ' . join ( set ( lang [ 0 ] for lang in script_language ) )
unique_dialogue_languages = ' , ' . join ( set ( lang [ 0 ] for lang in dialogue_language ) )
except :
unique_script_languages = " "
unique_dialogue_languages = " "
#commented as some unwanted change of . to comma
#para_df = sf.change_dot_to_comma_inslug(para_df)
print ( " unique_script_languages: " , unique_script_languages )
print ( " unique_dialogue_languages: " , unique_dialogue_languages )
# para_df.to_csv(csv_parawise_status, index=False)
##
print ( " \n \n dot to comma changes in slug " )
audited_file_name = self . script_name + " .csv "
req_file = ContentFile (
( para_df . to_csv ( index = False , path_or_buf = None ) ) . encode ( " utf-8 " ) ,
audited_file_name ,
)
File . objects . create (
script = Script . objects . get ( id = self . script_id ) ,
type = " script-csv " ,
file = req_file ,
)
print ( " \n \n exporting df and audit_df agter function 80 " )
df_df = pd . DataFrame ( df )
df_df . to_csv ( self . base_file_path + " df_export_after_80.csv " , index = False )
audit_df_df = pd . DataFrame ( audit_df )
audit_df_df . reset_index ( ) . to_csv ( self . base_file_path + " audit_df_export_after_80.csv " , index = False )
print ( " \n wrapping identified lines if required \n " )
df = sf . wrap_text ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.wrap_text 81 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df_81 = pd . DataFrame ( df )
df_81 . to_csv ( self . base_file_path + " df_export_after_81.csv " , index = False )
au_df_81 = pd . DataFrame ( audit_df )
au_df_81 . reset_index ( ) . to_csv ( self . base_file_path + " audit_df_export_after_81.csv " , index = False )
df . to_csv ( csv_after_wrapping , index = False )
#audit_df['line_no'] = audit_df['line_no'].astype(float)
audit_df = self . update_audit_df ( df , audit_df )
#####################################
## prepare for audit again
only_df = pd . DataFrame ( df )
only_df . to_csv ( self . base_file_path + " df_before_82.csv " , index = False )
df = sf . prep_for_audit ( df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.prep_for_audit 82 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df . to_csv ( csv_after_prep_for_audit_after_wrapping , index = False )
#####################################
sf . run_audit_on_identified ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.run_audit_on_identified 83 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
df . to_csv ( csv_after_audit2 , index = False )
audit_df = self . update_audit_df ( df , audit_df )
#####################################################
### run language specific audit on identified
if lang :
if lang . upper ( ) == " ENGLISH " :
df = sf_eng . run_audit_on_identified_english ( df , audit_df )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf_eng.run_audit_on_identified_english 84 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
audit_df = self . update_audit_df ( df , audit_df )
####################################
sf . sa_output_to_docx ( df , output_linewise_docx , output_template )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.sa_output_to_docx 85 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
sf . sa_output_to_txt ( output_linewise_docx , output_linewise_txt )
# print("line no: 2017",str(output_linewise_txt))
output_docx_after_audit = self . base_file_path + " audited_text.docx "
pdf_audit_file_path = self . base_file_path + " audited_text.pdf "
sf . convert_txt_to_docx ( output_linewise_txt , output_docx_after_audit )
print ( " converted to docx " )
try :
# total_page_af = sf.countPages(output_docx_after_audit,pdf_audit_file_path,self.base_file_path)
sf . countPages ( output_docx_after_audit , pdf_audit_file_path , self . base_file_path )
try :
total_page_af = sf . PdfCounter ( pdf_audit_file_path )
print ( " total pages af = " , total_page_af )
print ( " hehehehehe " )
except Exception as exp :
print ( repr ( exp ) )
print ( " try except total pages didnt work " )
except Exception as exp :
print ( " total_page_af : " , exp )
print ( " the count of pageline start here " )
line_count_after_audit = sf . count_the_line ( str ( output_linewise_txt ) )
count_before_txt = self . base_file_path + " temp.txt "
line_count_before_audit = sf . count_the_line ( str ( count_before_txt ) )
print ( " you are here " )
output_docx_from_orginal_text = self . base_file_path + " original_text.docx "
pdf_file_path = self . base_file_path + " original_text.pdf "
print ( " b4 txt to docx " )
sf . convert_txt_to_docx ( count_before_txt , output_docx_from_orginal_text )
print ( " b4 page count of pdf " )
print ( " hehe " )
""" 13-2-24 """
# try:
# total_page_bf = sf.countPages(output_docx_from_orginal_text,pdf_file_path,self.base_file_path)
# print(total_page_bf)
# except Exception as exp:
# print(" total page bf",total_page_bf )
print ( " temp txt converted to docx " )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.sa_output_to_txt 86 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
########################################
# sf.print_audit_report_docx(audit_df,audit_report_docx)
# headers = True
try :
print ( " In the total number of pages count " )
file_model_objects = File . objects . filter ( script = self . script_id )
audit_file_object = file_model_objects . get ( type = " script-csv " )
read_df = pd . read_csv ( audit_file_object . file )
print ( " csv fetched " )
docx = sf . csv_to_docx ( read_df )
audited_docx_path = self . base_file_path + " csv_to_docx_audited.docx "
# temp_file_stream = BytesIO()
print ( " docx saved " )
docx . save ( audited_docx_path )
# temp_file_stream.seek(0)
docx_file = ContentFile (
open ( audited_docx_path , ' rb ' ) . read ( ) ,
" from_audited_csv_to_document.docx " ,
)
# docx_file = ContentFile(
# audited_docx_path.getvalue(),
# "from_audited_csv_to_document.docx",
# )
File . objects . create (
script = Script . objects . get ( id = self . script_id ) ,
type = " script-docx " ,
file = docx_file ,
)
print ( " script-docx object created " )
converted_audit_pdf_file_path = self . base_file_path + " csv_to_docx_audited.pdf "
Final_pdf_page_count = sf . countPages ( audited_docx_path , converted_audit_pdf_file_path , self . base_file_path )
print ( " total number of pdf pages " )
print ( int ( Final_pdf_page_count ) )
pass
except Exception as e :
print ( " yje exception is " )
print ( e )
audit_df = self . update_audit_df_intro ( df , audit_df )
audit_df = self . update_audit_df_appendix ( df , audit_df )
audit_report_name = self . script_name + " _report.docx "
print ( " audit_df_tabular 1908 \n \n " , audit_df , " \n \n " )
copy_df = pd . DataFrame ( audit_df )
copy_df . reset_index ( ) . to_csv ( self . base_file_path + " audit_report_export.csv " , index = False )
print ( " before print_report_tabular_docx " )
script_ob = Script . objects . get ( id = self . script_id )
screen_play_name = script_ob . screenplay . name
author_name = script_ob . screenplay . author
print ( screen_play_name )
print ( author_name )
print ( line_count_before_audit )
print ( line_count_after_audit )
# audit_report_buffer = sf.print_audit_report_tabular_docx(audit_df,line_count_before_audit,line_count_after_audit) #commented on 13-09-23
para_filetered_audut_df = sf . assign_para_no ( audit_df )
2024-09-03 12:07:33 +00:00
print ( " after para assign " )
2024-04-27 09:33:09 +00:00
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER para_filetered_audut_df \n " )
scriptname = str ( screen_play_name ) # to fetched by audit data
author = str ( author_name ) # to fetched by audit data
try :
pre_audit_pagenumber = int ( self . total_page_bf )
except :
pre_audit_pagenumber = 1
try :
print ( " total_page_af = " , total_page_af )
postauditpagenumber = int ( total_page_af )
except :
print ( " total_page_af 1 " )
postauditpagenumber = 1
try :
preaudit_line_no = int ( line_count_before_audit )
except :
preaudit_line_no = 1
try :
postaudit_line_no = int ( line_count_after_audit )
except :
postaudit_line_no = 1
try :
print ( " unique_script_languages " , unique_script_languages )
script_language = str ( unique_script_languages ) # to be fetched by conversin function
except :
script_language = " --- "
try :
print ( " unique_dialogue_languages " , unique_dialogue_languages )
dialogue_language = str ( unique_dialogue_languages ) # to be fetched by conversin function
except :
dialogue_language = " --- "
print ( " scriptname " , scriptname )
print ( " author " , author )
print ( " pre_audit_pagenumber " , pre_audit_pagenumber )
print ( " postauditpagenumber " , postauditpagenumber )
print ( " preaudit_line_no " , preaudit_line_no )
print ( " postaudit_line_no " , postaudit_line_no )
'''
additiona model information
'''
#self.audit_model_obj.number_of_pages = int(postauditpagenumber)
# time_per_page = 30
# base time = 120
# no_of_pages = 10
# formula of counting pages = (time_per_page + base time) * no_of_pages
try :
self . audit_model_obj . screenplay_language = script_language
self . audit_model_obj . dialogue_language = dialogue_language
self . audit_model_obj . number_of_pages = int ( postauditpagenumber )
print ( " script language, dialogue language, post audit pagenumber is update to the audit models " )
except :
print ( " page number and language insertion failed " )
pass
2024-09-03 12:07:33 +00:00
auditdf_before_table_creation = self . base_file_path + " auditbefore_table.csv "
para_filetered_audut_df . to_csv ( auditdf_before_table_creation )
print ( " B4 audit report buffer " )
2024-04-27 09:33:09 +00:00
audit_report_buffer = sf . print_audit_report_tabular_docx ( para_filetered_audut_df , scriptname , author , pre_audit_pagenumber , postauditpagenumber , preaudit_line_no , postaudit_line_no , script_language , dialogue_language )
2024-09-03 12:07:33 +00:00
print ( " after audit buffer " )
2024-04-27 09:33:09 +00:00
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.print_audit_report_tabular_docx 87 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
2024-09-03 12:07:33 +00:00
print ( " going into models :- " )
try :
req_file = ContentFile ( audit_report_buffer . read ( ) , audit_report_name )
except Exception as exp :
print ( repr ( exp ) )
print ( " repoo " )
2024-04-27 09:33:09 +00:00
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " AFTER sf.print_audit_report_tabular_docx 87 : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " + str ( req_file ) )
print ( " req_file " )
try :
script = Script . objects . get ( id = self . script_id )
script . no_of_pages = int ( Final_pdf_page_count )
script . save ( )
# user_id = script.screenplay.user.id
# Now, 'user_id' contains the user.id associated with the given script_id
except Exception as e :
print ( e )
# Handle the case where the script with the given ID doesn't exist
# user_id = None
print ( " No_of_pages not insertd " )
# try:
# update_juggernaut(user_id=user_id,service_name='audit',audit_pages = int(postauditpagenumber))
# except:
# print("the update_juggernaut didnt work")
# req_file = File.objects.get(script=self.script_id)
# req_file.type= "audit-report"
# req_file.file = file
# req_file.save()
File . objects . create (
script = Script . objects . get ( id = self . script_id ) ,
type = " audit-report " ,
file = req_file ,
)
try :
end_time_count = time . time ( )
total_duration = end_time_count - self . start_time_count
hours , remainder = divmod ( total_duration , 3600 )
minutes , seconds = divmod ( remainder , 60 )
text_time = f " Program ran for { str ( hours ) } hours, { str ( minutes ) } minutes, and { str ( seconds ) } seconds. for script_id= { str ( self . script_id ) } which has pdf pages of { pre_audit_pagenumber } . "
print ( str ( text_time ) )
t_time_file = self . total_time_file + " /tail_errors.txt "
with open ( t_time_file , " a " ) as file008 :
file008 . write ( str ( text_time ) + " \n " )
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " 87 function complete \n " )
except :
pass
return True
def script_meta ( self ) :
pass
2024-09-03 12:07:33 +00:00
def audit_ai_gen_script ( self , lang : str = None ) :
df , _ = self . before_audit ( lang )
para_df = pd . DataFrame ( )
df = sf_eng . ai_gen_script_to_audited_df ( df )
para_df = sf . merge_line_to_para ( df )
try :
para_df . to_csv ( self . base_file_path + " after_merge_line_para.csv " , index = False )
print ( " para_df is written " )
except :
pass
audited_file_name = self . script_name + " .csv "
req_file = ContentFile (
( para_df . to_csv ( index = False , path_or_buf = None ) ) . encode ( " utf-8 " ) ,
audited_file_name ,
)
## for local - uncomment
print ( " \n \n the code is here \n \n " )
File . objects . create (
script = Script . objects . get ( id = self . script_id ) ,
type = " script-csv " ,
file = req_file ,
)
print ( " \n \n @@@@#$$$$$$$$ csv saved from s2s \n \n " )
2024-04-27 09:33:09 +00:00
def audit_fdx ( self ) :
# fdx to audited csv
para_df = pd . DataFrame ( )
para_df = sf . fdx_to_audited_df ( self . input_script )
# save audited csv to file system
audited_file_name = self . script_name + " .csv "
req_file = ContentFile (
( para_df . to_csv ( index = False , path_or_buf = None ) ) . encode ( " utf-8 " ) ,
audited_file_name ,
)
File . objects . create (
script = Script . objects . get ( id = self . script_id ) ,
type = " script-csv " ,
file = req_file ,
)
2024-04-30 04:59:37 +00:00
print ( " csv created " )
try :
self . audit_model_obj . isfdx = True
self . audit_model_obj . save ( )
print ( " isfdx True saved " )
except Exception as exp :
print ( repr ( exp ) )
language_check_df = sf . check_and_copy_rows ( para_df )
try :
script_language , dialogue_language = sf . language_detector_for_csv ( language_check_df )
print ( " script_language " , script_language )
print ( " dialogue_language " , dialogue_language )
unique_script_languages = ' , ' . join ( set ( lang [ 0 ] for lang in script_language ) )
unique_dialogue_languages = ' , ' . join ( set ( lang [ 0 ] for lang in dialogue_language ) )
print ( " langauage detection worked " )
except Exception as exp :
print ( repr ( exp ) )
unique_script_languages = " "
unique_dialogue_languages = " "
print ( " Langauuge detectedion csv didnt work " )
try :
self . audit_model_obj . screenplay_language = unique_script_languages
self . audit_model_obj . dialogue_language = unique_dialogue_languages
self . audit_model_obj . save ( )
print ( " audit lang saved " )
except Exception as exp :
print ( repr ( exp ) )
self . audit_model_obj . screenplay_language = " ENGLISH "
self . audit_model_obj . dialogue_language = " ENGLISH "
print ( " audot lang didnt save " )
# print("In the total number of pages count")
# file_model_objects = File.objects.filter(script=self.script_id)
# audit_file_object = file_model_objects.get(type="script-csv")
# read_df = pd.read_csv(audit_file_object.file)
# print("csv fetched")
try :
print ( para_df )
docx = sf . csv_to_docx ( para_df )
audited_docx_path = self . base_file_path + " csv_to_docx_audited.docx "
# temp_file_stream = BytesIO()
print ( " docx saved " )
docx . save ( audited_docx_path )
# temp_file_stream.seek(0)
2024-04-27 09:33:09 +00:00
2024-04-30 04:59:37 +00:00
docx_file = ContentFile (
open ( audited_docx_path , ' rb ' ) . read ( ) ,
" from_audited_csv_to_document.docx " ,
)
File . objects . create (
script = Script . objects . get ( id = self . script_id ) ,
type = " script-docx " ,
file = docx_file ,
)
print ( " script-docx object created " )
# output_docx_after_audit = self.base_file_path + "audited_text.docx"
pdf_audit_file_path = self . base_file_path + " csv_to_docx_audited.pdf "
print ( " converted to docx " )
try :
# total_page_af = sf.countPages(output_docx_after_audit,pdf_audit_file_path,self.base_file_path)
sf . countPages ( audited_docx_path , pdf_audit_file_path , self . base_file_path )
print ( " fdx : docx to pdf was create at " , str ( pdf_audit_file_path ) )
try :
total_page_af = sf . PdfCounter ( pdf_audit_file_path )
print ( " total pages af = " , total_page_af )
print ( " hehehehehe " )
self . audit_model_obj . number_of_pages = int ( total_page_af )
self . audit_model_obj . save ( )
except Exception as exp :
print ( repr ( exp ) )
print ( " try except total pages didnt work " )
except Exception as exp :
print ( " fdx docx to pdf conversion didnt work " )
print ( " total_page_af : " , exp )
except Exception as exp :
print ( " csv to docs didnt work " )
print ( repr ( exp ) )
2024-04-27 09:33:09 +00:00
return True
def quick_audit ( self , lang : str = None ) :
df , audit_df = self . before_audit ( lang )
## get the indents count
count_green = 0
count_amber = 0
total_count = len ( df )
all_indents = df [ " ssc " ] . value_counts ( )
print ( all_indents )
all_indents = df [ " ssc " ] . value_counts ( ) . sort_index ( ) . reset_index ( )
# print(all_indents)
for index in all_indents . index :
# print(all_indents['index'][index])
if str ( all_indents [ " index " ] [ index ] ) in ( " 15 " , " 25 " , " 30 " , " 35 " ) :
count_green + = all_indents [ " ssc " ] [ index ]
elif str ( all_indents [ " index " ] [ index ] ) in (
" 0 " ,
" 14 " ,
" 16 " ,
" 24 " ,
" 26 " ,
" 29 " ,
" 31 " ,
" 34 " ,
" 36 " ,
) :
count_amber + = all_indents [ " ssc " ] [ index ]
elif all_indents [ " index " ] [ index ] > 62 :
count_amber + = all_indents [ " ssc " ] [ index ]
print ( all_indents [ " index " ] . tolist ( ) )
print ( count_green , count_amber , total_count )
percent_good = ( ( count_green + count_amber ) / total_count ) * 100
if percent_good > 80 :
print ( " most lines are within prescribed indents " , percent_good )
quick_audit_flag = " pass "
else :
print ( " most lines are not within prescribed indents " , percent_good )
quick_audit_flag = " fail "
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " \n after quick audit : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
2024-09-03 12:07:33 +00:00
# print(quick_audit_flag)
2024-04-27 09:33:09 +00:00
def get_character_list ( self , lang : str = None ) :
if not self . audit_model_obj . pre_audit_run :
df , _ = self . before_audit ( lang )
else :
df_path = os . path . join ( self . base_file_path , " space_between_words_removed.csv " )
df = pd . read_csv ( df_path )
df_indents = df [ [ " line_no " , " data " , " ssc " , " parenthetical " ] ]
df_indents . fillna ( " " , inplace = True )
for index in df_indents . index :
data = df_indents [ " data " ] [ index ]
if df_indents [ " parenthetical " ] [ index ] == " PartMidEnd " :
par_pos = re . search ( " \ ( " , data ) . start ( )
df_indents [ " data " ] [ index ] = data [ 0 : par_pos ] . strip ( )
df_indents [ " parenthetical " ] [ index ] = " Absent "
elif data . strip ( ) :
df_indents [ " data " ] [ index ] = data . strip ( )
df_indents = df_indents . loc [ df_indents [ " parenthetical " ] == " Absent " , : ]
df_indents [ " ssc " ] . value_counts ( ) . sort_index ( )
df_indents [ " ssc " ] . value_counts ( ) . sort_index ( ) . reset_index ( )
all_indents = df_indents [ " ssc " ] . value_counts ( ) . sort_index ( ) . reset_index ( )
if 35 in all_indents [ " index " ] . tolist ( ) :
if df_indents [ " ssc " ] . value_counts ( ) . sort_index ( ) [ 35 ] > 3 :
sp_indent = 35
else :
ps_sp_indents = df_indents . loc [
( df_indents [ " ssc " ] > = 32 ) & ( df_indents [ " ssc " ] < = 40 ) , :
]
if not ps_sp_indents . empty :
sp_indent = (
ps_sp_indents [ " ssc " ]
. value_counts ( )
. sort_values ( ascending = False )
. reset_index ( ) [ " index " ] [ 0 ]
)
else :
sp_indent = 35
# sp_indent = df_indents['ssc'].value_counts().sort_index().reset_index().iloc[3]['index']
else :
ps_sp_indents = df_indents . loc [
( df_indents [ " ssc " ] > = 32 ) & ( df_indents [ " ssc " ] < = 40 ) , :
]
if not ps_sp_indents . empty :
sp_indent = (
ps_sp_indents [ " ssc " ]
. value_counts ( )
. sort_values ( ascending = False )
. reset_index ( ) [ " index " ] [ 0 ]
)
else :
sp_indent = - 1
# sp_indent = df_indents['ssc'].value_counts().sort_index().reset_index().iloc[3]['index']
# third_indents = df_indents['ssc'].value_counts().sort_index().reset_index().iloc[3]
try :
character_list = df_indents . loc [
df_indents [ " ssc " ] == sp_indent , " data "
] . unique ( )
except :
character_list = [ ]
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " \n after get_character_list : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
return character_list
def get_character_subset ( self , character_list ) :
# if character_list is None:
# character_list = self.get_character_list()
audited_linewise_csv = os . path . join ( self . base_file_path , " after_audit1.csv " ) # replaced by after_audit1.csv
foldername = " character_subset "
output_dir = os . path . join ( self . base_file_path , foldername )
if not os . path . exists ( output_dir ) :
2024-09-03 12:07:33 +00:00
os . makedirs ( output_dir , exist_ok = True )
2024-04-27 09:33:09 +00:00
df_after_audit = pd . read_csv ( audited_linewise_csv )
df_after_audit . fillna ( " " , inplace = True )
df_after_audit [ " line_no " ] = df_after_audit [ " line_no " ] . astype ( int )
scenes = df_after_audit . loc [
( df_after_audit [ " Identification_Status " ] == " ps1 " )
| ( df_after_audit [ " Identification_Status " ] == " ps2 " ) ,
[ " line_no " , " data " , " Identification_Status " ] ,
]
scene_indexes = scenes . index
last_index = df_after_audit . index [ - 1 ]
character_scripts_dict = dict ( )
for character in character_list :
try :
print ( " processing character subset for " , character )
except :
pass
output_subset_script_txt = os . path . join (
output_dir ,
( self . script_name . rsplit ( " . " , 1 ) [ 0 ] + " _ " + str ( character ) + " .txt " ) ,
)
output_subset_script_docx = os . path . join (
output_dir ,
( self . script_name . rsplit ( " . " , 1 ) [ 0 ] + " _ " + str ( character ) + " .docx " ) ,
)
i , j = 0 , 1
character_in_scenes = [ ]
character_lines = [ ]
while j < = len ( scene_indexes ) :
scene_no = i + 1
start = scene_indexes [ i ]
if j < len ( scene_indexes ) :
end = scene_indexes [ j ]
else :
end = last_index + 1
for index in range ( start , end ) :
data = df_after_audit [ " data " ] [ index ]
if re . search ( character . upper ( ) , data . strip ( ) ) :
character_lines . append ( start )
# print(scene_no,index,data)
character_in_scenes . append ( scene_no )
character_lines . append ( index )
rev_index = index - 1
rev_index_is = df_after_audit [ " Identification_Status " ] [
rev_index
]
character_lines . append ( rev_index )
# pvs_data = df_after_audit['data'][rev_index-1]
# print(rev_index,pvs_data)
try :
rev_index_before_is = df_after_audit [
" Identification_Status "
] [ rev_index - 1 ]
except :
rev_index_before_is = " "
# while rev_index != start and rev_index_is != 'ps4' and rev_index_is != 'ps1' and rev_index_is != 'ps7' :
while (
rev_index != start
and rev_index_is != " ps4 "
and rev_index_is != " ps1 "
and not (
rev_index_is == " ps6 " and rev_index_before_is == " blank "
)
) :
rev_index = rev_index - 1
pvs_data = df_after_audit [ " data " ] [ rev_index ]
# print(rev_index,pvs_data)
character_lines . append ( rev_index )
rev_index_is = df_after_audit [ " Identification_Status " ] [
rev_index
]
fwd_index = index
fwd_index_is = df_after_audit [ " Identification_Status " ] [
fwd_index
]
while fwd_index_is != " blank " and fwd_index != " ps15 " :
fwd_index = fwd_index + 1
character_lines . append ( fwd_index )
fwd_index_is = df_after_audit [ " Identification_Status " ] [
fwd_index
]
i + = 1
j + = 1
character_in_scenes = list ( set ( character_in_scenes ) )
character_lines = list ( set ( character_lines ) )
print ( character_lines )
character_lines . sort ( )
print ( character_lines )
character_df = df_after_audit [ df_after_audit . index . isin ( character_lines ) ]
character_df . reset_index ( drop = True , inplace = True )
character_df = sf . prep_for_audit ( character_df )
# test_path = os.path.join(output_dir,os.path.splitext(input_filename)[0])+ '_' + str(character) + '_test1.csv'
# character_df.to_csv(test_path,index= False)
character_df = sf . run_audit_on_identified ( character_df )
# test_path = os.path.join(output_dir,os.path.splitext(input_filename)[0])+ '_' + str(character) + '_test2.csv'
# character_df.to_csv(test_path,index= False)
ch_para_df = sf . merge_line_to_para ( character_df )
# ch_para_df.to_csv(csv_parawise_status, index = False)
sf . sa_wrapped_output_to_docx ( ch_para_df , output_subset_script_docx )
character_scripts_dict [ character ] = output_subset_script_docx
# sf.conv_docx_to_txt(output_subset_script_docx,output_subset_script_txt)
with open ( output_subset_script_txt , " w " , encoding = " utf-8 " ) as fout :
for index in character_lines :
print ( df_after_audit [ " Identification_Status " ] [ index ] )
try :
if str ( df_after_audit [ " Identification_Status " ] [ index ] ) == " ps1 " :
fout . writelines ( " \n " )
except :
pass
data = df_after_audit [ " data " ] [ index ]
try :
2024-09-03 12:07:33 +00:00
# print(data)
pass
2024-04-27 09:33:09 +00:00
except :
pass
fout . writelines ( str ( data ) )
fout . writelines ( " \n " )
try :
if (
df_after_audit [ " Identification_Status " ] [ index ] == " ps1 "
or df_after_audit [ " Identification_Status " ] [ index ] == " ps3 "
) :
fout . writelines ( " \n " )
except :
pass
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " \n after get_character_subset : AFTER ASSIGNING LOCATIONS AUDIT :audit \n " )
return character_scripts_dict
def audit_in_background ( self ) :
2024-09-03 12:07:33 +00:00
# # commenting os.fork to make code run in foreground
2024-05-03 05:53:28 +00:00
# if os.fork() != 0:
# return
2024-04-27 09:33:09 +00:00
print ( " Running in background " )
end_time = datetime . datetime . now ( )
try :
extension = self . input_script . rsplit ( " . " , 1 ) [ - 1 ]
if extension == ' fdx ' :
self . audit_fdx ( )
else :
self . audit ( )
self . audit_model_obj . status = States . SUCCESS
self . audit_model_obj . save ( )
print ( " Audit Success!!!!!!!!!!!!!!!!!!!!!!! " )
2024-09-03 12:07:33 +00:00
# end_time = datetime.datetime.now()
2024-04-27 09:33:09 +00:00
with open ( self . base_file_path + " time_taken.txt " , " a " ) as file007 :
file007 . write ( " \n \n ****AUDITING IS SUCCESSFUL**** \n " )
2024-09-03 12:07:33 +00:00
# print(end_time)
2024-04-27 09:33:09 +00:00
except Exception as exp :
self . audit_model_obj . status = States . FAILURE
self . audit_model_obj . results = exp
2024-07-02 08:24:31 +00:00
self . audit_model_obj . error_msg = " FAILED "
2024-04-27 09:33:09 +00:00
self . audit_model_obj . save ( )
2024-09-03 12:07:33 +00:00
# print(end_time)
2024-04-27 09:33:09 +00:00
if __name__ == " __main__ " :
naudit = NeutralAudit ( " 123 " , True )
naudit . get_character_subset ( )